7.4 Linear Methods

We’ll use our xrf and icp datasets to create linear models of all the variables of interest. They must be combined using pivot_longer(). The plot indicates that some, but not all variables are suitable for calibration.

full_join(
  icp %>%
    select(any_of(c(elementsList, "SampleID"))) %>%
    pivot_longer(any_of(elementsList),
                 values_to = "icp",
                 names_to = "element"),
  
  xrf %>% 
    select(any_of(c(elementsList, "SampleID"))) %>%
    pivot_longer(any_of(elementsList),
                 values_to = "xrf",
                 names_to = "element"),
  
  by = c("SampleID", "element")
  ) %>%

  filter(element %in% myElements) %>%
  drop_na() %>%
  
  ggplot(aes(x = icp, y = xrf)) +
  geom_point() +
  ggpmisc::stat_poly_line() +
  ggpmisc::stat_poly_eq() +
  facet_wrap(vars(element), 
             scales = "free") +
  xlab("ICP-OES [ppm]") + 
  ylab("Itrax ED-XRF [peak area]")

To apply these calibration models to our existing data, we first need to save the models created using lm(). We don’t force the intercept through zero (e.g. icp ~ 0 + xrf) because of background (high baseline) conditions that may be present.

calibration <- full_join(
  icp %>%
    select(any_of(c(elementsList, "SampleID"))) %>%
    pivot_longer(any_of(elementsList),
                 values_to = "icp",
                 names_to = "element"),
  
  xrf %>% 
    select(any_of(c(elementsList, "SampleID"))) %>%
    pivot_longer(any_of(elementsList),
                 values_to = "xrf",
                 names_to = "element"),
  
  by = c("SampleID", "element")
  ) %>%
  mutate(element = as_factor(element)) %>%
  drop_na()

calibration <- calibration %>%
  group_by(element) %>%
  group_split() %>%
  lapply(function(x){lm(data = x, icp~xrf)}) %>%
  `names<-`(calibration %>%
              group_by(element) %>%
              group_keys() %>%
              pull(element))

We can see the performance of our model using summary(), for example:

summary(calibration$Ca)
## 
## Call:
## lm(formula = icp ~ xrf, data = x)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -57765 -11255   1072  11437  71439 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 3.343e+04  9.352e+03   3.575 0.000714 ***
## xrf         5.701e-01  4.466e-02  12.764  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 24780 on 58 degrees of freedom
## Multiple R-squared:  0.7375, Adjusted R-squared:  0.7329 
## F-statistic: 162.9 on 1 and 58 DF,  p-value: < 2.2e-16

Now we can use predict() to apply those models to the data.

CD166_19_xrf %>%
mutate(Ca_ppm =
         predict(calibration$Ca, 
                 newdata = 
                   CD166_19_xrf %>%
                   select(Ca) %>% 
                   rename(xrf = Ca))
       ) %>%
  ggplot(aes(x = Ca_ppm, y = depth)) +
  geom_lineh() +
  scale_y_reverse() +
  scale_x_continuous(labels = function(x){x/10000}) +
  ylab("Depth [mm]") +
  xlab("Ca [%]")
## Warning: Removed 3 rows containing missing values or values outside the scale range
## (`geom_lineh()`).

We can also extract the confidence intervals, for example:

predict(calibration$Ca, 
        newdata = 
          CD166_19_xrf %>%
          select(Ca) %>% 
          rename(xrf = Ca),
        interval = "confidence",
        level = 0.95,
        type = "response") %>%
  as_tibble() %>%
  mutate(depth = CD166_19_xrf$depth) %>%
  
  ggplot(aes(y = depth, x = fit)) +
  geom_errorbar(aes(xmin = lwr, xmax = upr), col = "grey") + 
  geom_lineh() + 
  scale_x_continuous(labels = function(x){x/10000},
                     name = "Ca [%]") +
  scale_y_reverse(name = "Depth [mm]") + 
  theme_paleo()