Data import and model fitting

Thermal tolerance is assessed for each genotype by analyzing the decrease in Fv/Fm as the heat stress temperature increases. For each genotype, a logistic regression is fit for Fv/Fm as a function of temperature, and a tolerance value is calculated as the temperature at which Fv/Fm is reduced by 50% (analagous to an LD50 value).

#### Import data
df <- #read_csv("data/raw/20200628_Genoswap_Acer/CBASS_Master_data_clean.csv") %>%
  read_csv("data/raw/20200628_Genoswap_Acer/CBASSGenoSwap2020_exactnames.csv") %>%
  mutate(Temp = parse_number(Tank)) %>%
  select(-X1, -Notes.Corrections) %>%
  mutate(Genotype = gsub(" ", "", Genotype)) %>%
  mutate(Genotype = gsub("-", "", Genotype))

# filter out outliers and initial time point
df <- df %>%
  drop_na() %>%
  filter(Y < 1, Timepoint == "Final") %>%
  droplevels()

# df %>%
#   filter(Temp == 33) %>%
#   ggplot(aes(x = factor(Picture), y = Y)) +
#   geom_boxplot()
#The above  figure shows that two PAM images  from one of the 33° tanks are abnormally low -- remove these values below

df <- df %>%
  # Remove the outlier values from bad PAM images
  filter(!(Temp == 33 & Picture %in% c(3, 22))) %>%
  select(nursery = Source, geno = Genotype, tileID, max_temp = Temp, fvfm = Y) %>%
  # Rename some variables
  mutate(nursery = fct_recode(nursery, crf = "CRF", nsu = "Broward", um = "UM",
                              mote = "Mote", fwc = "FWC"))

# filter out corals with not enough data
df <- df %>%
  group_by(nursery, geno) %>%
  filter(n() >= 10) %>%  # remove corals with fewer than 10 data points
  filter(n_distinct(max_temp) >= 5) %>%
  ungroup()

Initial data filtering and QC

df <- df %>%
  # Save raw fvfm data in new column
  mutate(fvfmraw = fvfm) %>%          
  # Identify problematic data points
  mutate(problem = case_when(
    fvfm > 0.750 ~ "abnormally high",
    TRUE ~ "none")) %>%
  mutate(fvfm = case_when(
    problem == "abnormally high" ~ NA_real_,      # Change abnormally high values to NA
    problem == "none" ~ fvfm))

Initial model fitting and outlier detection

# Define function to fit 3-parameter LL model to data and return NULL if fitting error
ll3 <- function(data) {
  drm(fvfm ~ max_temp, data = data, 
      fct = LL.3(names = c("hill", "max", "ED50")),
      upperl = c(120, 0.72, 40),
      lowerl = c(10, 0.55, 30))}
tryll3 <- possibly(ll3, otherwise = NULL)

# Fit model to each coral, get parameters, fitted values, and residuals
initmods <- df %>%
  nest(data = c(tileID, max_temp, fvfmraw, fvfm, problem)) %>%
  # Fit the model to each coral
  mutate(ll3 = map(data, tryll3)) %>%
  # Get model parameters and fitted values/residuals
  mutate(pars = map(ll3, tidy),
         pred = map2(ll3, data, ~augment(.x, drop_na(.y, fvfm))))

# Extract ed50 parameter values from model fits
ed50 <- initmods %>% 
  select(nursery, geno, pars) %>%
  unnest(pars) %>%
  filter(term == "ED50")

# Collect raw data, fitted values, and diagnostics
vals <- initmods %>%
  select(nursery, geno, pred) %>%
  unnest(pred) %>%
  full_join(ed50) %>%
  full_join(df) %>%
  rename(ed50 = estimate)

# Identify problematic data points based on cook's distance and residuals
counts <- vals %>% 
  group_by(nursery, geno) %>% 
  summarise(n = sum(!is.na(fvfm)))
dff <- vals %>%
  left_join(counts) %>%
  group_by(nursery, geno) %>%
  mutate(resid.thresh = 2*sd(.resid, na.rm = TRUE)) %>%  # Calculate residual threshold as 3 standard deviations
  mutate(cooksd.thresh = 4/n) %>%   # Calculate cook's distance threshold as 4/n
  mutate(max_to_remove = floor(n * 0.09)) %>%
  ungroup() %>%
  mutate(problem = case_when(.cooksd > cooksd.thresh & .resid > 0 ~ "high cook's distance",
                             abs(.resid) > resid.thresh ~ "high residual", 
                             TRUE ~ problem)) %>%
  group_by(nursery, geno, outlier = problem %in% c("high cook's distance", "high residual")) %>%
  mutate(n.outliers = n(),
         rank.out = order(.cooksd, decreasing = TRUE)) %>%
  ungroup() %>%
  mutate(fvfm = case_when(outlier & rank.out <= max_to_remove ~ NA_real_, 
                          TRUE ~ fvfm)) 

# Refit models without problematic points
fmods <- dff %>%
  select(nursery, geno, tileID, max_temp, fvfmraw, problem, fvfm) %>%
  nest(data = c(tileID, max_temp, fvfmraw, fvfm, problem)) %>%
  # Fit the model to each coral
  mutate(ll3 = map(data, tryll3)) %>%
  # Get model parameters and fitted values/residuals
  mutate(pars = map(ll3, tidy),
         pred = map2(ll3, data, ~augment(.x, drop_na(.y, fvfm))))

# Extract ed50 parameter values from model fits
fed50 <- fmods %>% 
  select(nursery, geno, pars) %>%
  unnest(pars) %>%
  filter(term == "ED50")

# Collect raw data, fitted values, and ed50 estimates
fvals <- fmods %>%
  select(nursery, geno, pred) %>%
  unnest(pred) %>%
  full_join(fed50) %>%
  full_join(select(dff, nursery, geno, tileID, max_temp, fvfmraw, problem, fvfm)) %>%
  rename(ed50 = estimate)

# Define function to plot raw data, fitted values, and ed50 for each genotype
plotfits <- function(data) {
  ggplot(data = data, aes(x = max_temp)) +
    geom_point(pch = 4, size = 1.25,
               aes(y = fvfmraw, color = factor(problem, levels = c("none", "abnormally high", 
                                              "abnormally high w/ low Ft", 
                                              "high residual", "high cook's distance")))) +
    geom_point(aes(y = fvfm), pch = 1, size = 2) +
    geom_line(data = drop_na(data, .fitted),
              aes(y = .fitted)) +
    geom_vline(data = distinct(data, nursery, geno, ed50), 
               aes(xintercept = ed50), 
               lwd = 0.2, lty = 2) +
    geom_text(data = distinct(data, nursery, geno, ed50),
              aes(x = ed50, y = 0.05, label = round(ed50, 2)), 
              hjust = 1, nudge_x = -0.2, size = 3) +
    facet_wrap(~ nursery + geno, drop = TRUE) +
    scale_color_manual(name = "problem", drop = FALSE,
                       values = c("black", "red", "orange", "blue", "turquoise"))
}

# Plot fits
plotfits(data = fvals)

Save processed data to file

# Write tidy data to file
fvals %>%
  select(nursery, geno, max_temp, fvfm) %>%
  drop_na() %>%
  mutate(date = "2020-06") %>%
  write_csv("data/processed/Jun2020_fvfm_clean.csv")

Dose-response curves for PAM data from FL genotype swap CBASS runs

Data import and model fitting

Initial data filtering and QC

Initial model fitting and outlier detection

Save processed data to file