Data import and model fitting

Thermal tolerance is assessed for each genotype by analyzing the decrease in Fv/Fm as the heat stress temperature increases. For each genotype, a logistic regression is fit for Fv/Fm as a function of temperature, and a tolerance value is calculated as the temperature at which Fv/Fm is reduced by 50% (analagous to an LD50 value).

#### Import data
df <- #read_csv("data/raw/20200628_Genoswap_Acer/CBASS_Master_data_clean.csv") %>%
  read_csv("data/raw/20200628_Genoswap_Acer/CBASSGenoSwap2020_exactnames.csv") %>%
  mutate(Temp = parse_number(Tank)) %>%
  select(-X1, -Notes.Corrections) %>%
  mutate(Genotype = gsub(" ", "", Genotype)) %>%
  mutate(Genotype = gsub("-", "", Genotype))

# filter out outliers and initial time point
df <- df %>%
  drop_na() %>%
  filter(Y < 1, Timepoint == "Final") %>%
  droplevels()

# df %>%
#   filter(Temp == 33) %>%
#   ggplot(aes(x = factor(Picture), y = Y)) +
#   geom_boxplot()
#The above  figure shows that two PAM images  from one of the 33° tanks are abnormally low -- remove these values below

df <- df %>%
  # Remove the outlier values from bad PAM images
  filter(!(Temp == 33 & Picture %in% c(3, 22))) %>%
  select(nursery = Source, geno = Genotype, tileID, max_temp = Temp, fvfm = Y) %>%
  # Rename some variables
  mutate(nursery = fct_recode(nursery, crf = "CRF", nsu = "Broward", um = "UM",
                              mote = "Mote", fwc = "FWC"))

# filter out corals with not enough data
df <- df %>%
  group_by(nursery, geno) %>%
  filter(n() >= 10) %>%  # remove corals with fewer than 10 data points
  filter(n_distinct(max_temp) >= 5) %>%
  ungroup()

Initial data filtering and QC

df <- df %>%
  # Save raw fvfm data in new column
  mutate(fvfmraw = fvfm) %>%          
  # Identify problematic data points
  mutate(problem = case_when(
    fvfm > 0.750 ~ "abnormally high",
    TRUE ~ "none")) %>%
  mutate(fvfm = case_when(
    problem == "abnormally high" ~ NA_real_,      # Change abnormally high values to NA
    problem == "none" ~ fvfm))

Initial model fitting and outlier detection

# Define function to fit 3-parameter LL model to data and return NULL if fitting error
ll3 <- function(data) {
  drm(fvfm ~ max_temp, data = data, 
      fct = LL.3(names = c("hill", "max", "ED50")),
      upperl = c(120, 0.72, 40),
      lowerl = c(10, 0.55, 30))}
tryll3 <- possibly(ll3, otherwise = NULL)

# Fit model to each coral, get parameters, fitted values, and residuals
initmods <- df %>%
  nest(data = c(tileID, max_temp, fvfmraw, fvfm, problem)) %>%
  # Fit the model to each coral
  mutate(ll3 = map(data, tryll3)) %>%
  # Get model parameters and fitted values/residuals
  mutate(pars = map(ll3, tidy),
         pred = map2(ll3, data, ~augment(.x, drop_na(.y, fvfm))))

# Extract ed50 parameter values from model fits
ed50 <- initmods %>% 
  select(nursery, geno, pars) %>%
  unnest(pars) %>%
  filter(term == "ED50")

# Collect raw data, fitted values, and diagnostics
vals <- initmods %>%
  select(nursery, geno, pred) %>%
  unnest(pred) %>%
  full_join(ed50) %>%
  full_join(df) %>%
  rename(ed50 = estimate)

# Identify problematic data points based on cook's distance and residuals
counts <- vals %>% 
  group_by(nursery, geno) %>% 
  summarise(n = sum(!is.na(fvfm)))
dff <- vals %>%
  left_join(counts) %>%
  group_by(nursery, geno) %>%
  mutate(resid.thresh = 2*sd(.resid, na.rm = TRUE)) %>%  # Calculate residual threshold as 3 standard deviations
  mutate(cooksd.thresh = 4/n) %>%   # Calculate cook's distance threshold as 4/n
  mutate(max_to_remove = floor(n * 0.09)) %>%
  ungroup() %>%
  mutate(problem = case_when(.cooksd > cooksd.thresh & .resid > 0 ~ "high cook's distance",
                             abs(.resid) > resid.thresh ~ "high residual", 
                             TRUE ~ problem)) %>%
  group_by(nursery, geno, outlier = problem %in% c("high cook's distance", "high residual")) %>%
  mutate(n.outliers = n(),
         rank.out = order(.cooksd, decreasing = TRUE)) %>%
  ungroup() %>%
  mutate(fvfm = case_when(outlier & rank.out <= max_to_remove ~ NA_real_, 
                          TRUE ~ fvfm)) 

# Refit models without problematic points
fmods <- dff %>%
  select(nursery, geno, tileID, max_temp, fvfmraw, problem, fvfm) %>%
  nest(data = c(tileID, max_temp, fvfmraw, fvfm, problem)) %>%
  # Fit the model to each coral
  mutate(ll3 = map(data, tryll3)) %>%
  # Get model parameters and fitted values/residuals
  mutate(pars = map(ll3, tidy),
         pred = map2(ll3, data, ~augment(.x, drop_na(.y, fvfm))))

# Extract ed50 parameter values from model fits
fed50 <- fmods %>% 
  select(nursery, geno, pars) %>%
  unnest(pars) %>%
  filter(term == "ED50")

# Collect raw data, fitted values, and ed50 estimates
fvals <- fmods %>%
  select(nursery, geno, pred) %>%
  unnest(pred) %>%
  full_join(fed50) %>%
  full_join(select(dff, nursery, geno, tileID, max_temp, fvfmraw, problem, fvfm)) %>%
  rename(ed50 = estimate)
# Define function to plot raw data, fitted values, and ed50 for each genotype
plotfits <- function(data) {
  ggplot(data = data, aes(x = max_temp)) +
    geom_point(pch = 4, size = 1.25,
               aes(y = fvfmraw, color = factor(problem, levels = c("none", "abnormally high", 
                                              "abnormally high w/ low Ft", 
                                              "high residual", "high cook's distance")))) +
    geom_point(aes(y = fvfm), pch = 1, size = 2) +
    geom_line(data = drop_na(data, .fitted),
              aes(y = .fitted)) +
    geom_vline(data = distinct(data, nursery, geno, ed50), 
               aes(xintercept = ed50), 
               lwd = 0.2, lty = 2) +
    geom_text(data = distinct(data, nursery, geno, ed50),
              aes(x = ed50, y = 0.05, label = round(ed50, 2)), 
              hjust = 1, nudge_x = -0.2, size = 3) +
    facet_wrap(~ nursery + geno, drop = TRUE) +
    scale_color_manual(name = "problem", drop = FALSE,
                       values = c("black", "red", "orange", "blue", "turquoise"))
}

# Plot fits
plotfits(data = fvals)

Save processed data to file

# Write tidy data to file
fvals %>%
  select(nursery, geno, max_temp, fvfm) %>%
  drop_na() %>%
  mutate(date = "2020-06") %>%
  write_csv("data/processed/Jun2020_fvfm_clean.csv")