knitr::opts_chunk$set(echo = TRUE, warning = FALSE, message = FALSE)
library(janitor)
library(lubridate)
library(scales)
library(readxl)
library(ggthemes)
library(drc)
library(broom)
library(tidyverse)
library(emmeans)
library(ggpubr)
library(joeyr)

CBASS temperature logs

Import CBASS log files

b1log <- read_csv("data/raw/20200820_KJSNSU_Acer/Log_Files/B1_LOG_0821.TXT")

b1log <- b1log %>%
  filter(PrintDate != "PrintDate") %>%
  mutate(date = as_date(Date, format = "%Y_%B_%d")) %>%
  unite(time, Th, Tm, Ts, sep = ":") %>%
  unite(dttm, date, time) %>%
  mutate(dttm = ymd_hms(dttm),
         hm = format(dttm, "%H:%M")) %>%
  select(dttm, hm, T1SP, TempT1, T2SP, TempT2, T3SP, TempT3, T4SP, TempT4) %>%
  pivot_longer(starts_with("T"), names_to = "key", values_to = "temp") %>%
  filter(temp > 0, temp < 50) %>%
  mutate(tank = str_extract(key, "T[0-9]"),
         tank = paste0("B1", tank),
         key = case_when(grepl("SP", key) ~ str_sub(key, 3, 4),
                         TRUE ~ str_sub(key, 1, 4))) %>%
  pivot_wider(names_from = key, values_from = temp)

b2log <- read_csv("data/raw/20200820_KJSNSU_Acer/Log_Files/B2_LOG_0821.TXT")

b2log <- b2log %>%
  filter(PrintDate != "PrintDate") %>%
  mutate(date = as_date(Date, format = "%Y_%B_%d")) %>%
  unite(time, Th, Tm, Ts, sep = ":") %>%
  unite(dttm, date, time) %>%
  mutate(dttm = ymd_hms(dttm),
         hm = format(dttm, "%H:%M")) %>%
  select(dttm, hm, T1SP, TempT1, T2SP, TempT2, T3SP, TempT3, T4SP, TempT4) %>%
  pivot_longer(starts_with("T"), names_to = "key", values_to = "temp") %>%
  filter(temp > 0, temp < 50) %>%
  mutate(tank = str_extract(key, "T[0-9]"),
         tank = paste0("B2", tank),
         key = case_when(grepl("SP", key) ~ str_sub(key, 3, 4),
                         TRUE ~ str_sub(key, 1, 4))) %>%
  pivot_wider(names_from = key, values_from = temp)

log <- bind_rows(b1log, b2log) %>%
  mutate(tank = factor(tank),
         SP = as.numeric(SP),
         Temp = as.numeric(Temp))

Import CBASS set points (used for settings file)

sp <- read_xlsx("data/raw/20200820_KJSNSU_Acer/Temp_Profiles/CRII_ACERV_2020.xlsx") %>%
  mutate(Time = format(Time, "%H:%M")) %>%
  filter(Time >= "13:00", Time <= "22:00") %>%
  crossing(date = as_date(c("2020-08-20", "2020-08-21"))) %>%
  mutate(dttm = ymd_hm(paste(date, Time))) %>%
  select(dttm, matches("B")) %>%
  pivot_longer(-dttm, names_to = "tank", values_to = "Temp") %>%
  arrange(dttm)

Plot CBASS temperature profiles with set points

ggplot(mapping = aes(x = dttm, y = Temp, group = tank)) +
  geom_line(data = filter(log, hm > "12:50", hm < "21:50"),
            aes(color = tank), lwd = 0.2) +
  geom_step(data = sp, lwd = 0.2) +
  facet_wrap(~date(dttm), scales = "free") +
  scale_y_continuous(breaks = 30:37) +
  scale_x_datetime(breaks = "hours", labels = label_date("%H:%M")) +
  theme_hc() +
  theme(legend.position = "none") +
  labs(x = "Time of day", y = "Temperature (°C)")

#  Calculate actual hold temperatures
actual_temps <- log %>%
  filter(hm > "16:15" & hm < "19:05") %>%
  group_by(tank_id = tank) %>%
  summarise(max_temp = mean(Temp, na.rm = TRUE)) %>%
  arrange(max_temp)

actual_temps
## # A tibble: 8 x 2
##   tank_id max_temp
##   <fct>      <dbl>
## 1 B1T1        30.1
## 2 B1T2        31.0
## 3 B1T3        32.0
## 4 B1T4        33.0
## 5 B2T1        34.0
## 6 B2T2        34.9
## 7 B2T3        35.9
## 8 B2T4        36.9

PAM data

Turn each filtering and adjustment step on or off

# Initial pre-filtering (abnormally high, abnormally high w/ low Ft)
pre.filter <- T

# Post-model-fit filtering (high cook's distance outliers)
mod.filter <- T

# Adjust for positional effects
pos.adjust <- T

# Use actual max temperatures instead of target temperatures
temp.adjust <- T

Data pre-filtering and QC

dpii <- read_csv("data/raw/20200820_KJSNSU_Acer/PAM/KJS_pamdata_tidy.csv")

dpii <- dpii %>%
  mutate(nursery = "nsu",
         geno = paste0("kjs", Genotype)) %>%
  select(nursery, geno, f, fm, fvfm, target_temp = Final_Temp, tank_id = Tank) %>%
  drop_na(fvfm)

# Replace max_temp with actual max_temp calculated above
dpii <- dpii %>%
  left_join(actual_temps)

# Identify points at high temperatures where low background fluorescence (f) resulted in abnormally high values of fvfm
out <- dpii %>%
  filter(target_temp >= 36) %>%
  drop_na(fvfm) %>%
  mutate(mahal = tidy_mahalanobis(f, fvfm),
         is_out = mahal > quantile(mahal, 0.75) & f < median(f) & fvfm > median(fvfm, na.rm = T))
# Plot showing those outliers
ggplot(out, aes(x = f, y = fvfm)) +
  geom_point(aes(shape = is_out, color = factor(target_temp))) +
  scale_shape_manual(values = c(19, 25))

df <- dpii %>%
  # Save raw fvfm data in new column
  mutate(fvfmraw = fvfm) %>% 
  # Merge with mahalanobis outlier info from above
  left_join(out) %>%
  # Identify problematic data points
  mutate(problem = case_when(
    fvfm > 0.75 ~ "abnormally high",
    is_out ~ "abnormally high w/ low Ft",
    TRUE ~ "none")) %>%
  mutate(fvfm = case_when(
    problem == "abnormally high" ~ NA_real_,    # Change abnormally high values to NA
    problem == "abnormally high w/ low Ft" ~ NA_real_,
    problem == "none" ~ fvfm))

# Select pre-filtered or raw data to use moving forward based on set option
df <- if (pre.filter) { 
  df 
} else { 
  dpii %>% 
    mutate(fvfmraw = fvfm, 
           problem = case_when(is.na(fvfm) ~ "no signal",
                               TRUE ~ "none") )
}

Adjust for positional effects

# Import maps of the position of each genotype in each tank
map1 <- read_csv("data/raw/20200820_KJSNSU_Acer/20200820_map.csv") %>%
  mutate(across(.fns = as.character)) %>%
  pivot_longer(-Row, values_to = "geno", names_to = "Col") %>%
  drop_na() %>%
  unite(pos, Row, Col, sep = "", remove = FALSE)
map2 <- read_csv("data/raw/20200820_KJSNSU_Acer/20200821_map.csv") %>%
  pivot_longer(-Row, values_to = "geno", names_to = "Col") %>%
  mutate(across(.fns = as.character)) %>%
  drop_na() %>%
  unite(pos, Row, Col, sep = "", remove = FALSE)

df_pos <- bind_rows(map1, map2) %>%
  mutate(geno = paste0("kjs", geno)) %>%
  full_join(df) %>%
  # Create new cooler_side column
  mutate(cooler_side = case_when(tank_id %in% c("B2T2", "B2T4", "B1T2", "B1T4") ~ "left",
                                 tank_id %in% c("B2T1", "B2T3", "B1T1", "B1T3") ~ "right")) %>%
  # Recode cooler column position as number of columns from center of cooler
  mutate(cols_from_ctr = case_when(
    cooler_side == "right" ~ recode(Col, "4" = 4, "3" = 3, "2" = 2, "1" = 1),
    cooler_side == "left" ~ recode(Col, "4" = 1, "3" = 2, "2" = 3, "1" = 4))) %>%
  # Recode cooler row position as number of rows from center of cooler
  mutate(rows_from_ctr = case_when(Row == "A" ~ 3, Row == "B" ~ 2, Row == "C" ~ 1, 
                                   Row == "D" ~ 1, Row == "E" ~ 2, Row == "F" ~ 3))

# Visualize positional effects within each tank
ggplot(df_pos, aes(x = cols_from_ctr, y = fvfm, group = rows_from_ctr, 
                   color = factor(rows_from_ctr), shape = factor(rows_from_ctr))) +
  geom_point(alpha = 0.7) +
  geom_smooth(method = "lm", se = FALSE) +
  facet_grid(~target_temp)

# Fit model to adjust for positional effects
mod <- lm(fvfm ~ tank_id * cols_from_ctr * rows_from_ctr, data = df_pos, na.action = na.exclude)
anova(mod) %>% tidy() %>% knitr::kable()
term df sumsq meansq statistic p.value
tank_id 7 12.9302741 1.8471820 343.2976813 0.0000000
cols_from_ctr 1 0.0022620 0.0022620 0.4204005 0.5170124
rows_from_ctr 1 0.0038104 0.0038104 0.7081568 0.4004282
tank_id:cols_from_ctr 7 0.0604010 0.0086287 1.6036406 0.1317157
tank_id:rows_from_ctr 7 0.0568088 0.0081155 1.5082692 0.1617473
cols_from_ctr:rows_from_ctr 1 0.0038235 0.0038235 0.7106001 0.3996172
tank_id:cols_from_ctr:rows_from_ctr 7 0.0143711 0.0020530 0.3815518 0.9132242
Residuals 541 2.9109590 0.0053807 NA NA
# Calculate estimated marginal means for the "center" of each tank
emms <- emmeans(mod, specs  = c("tank_id", "cols_from_ctr", "rows_from_ctr")) %>%
  as_tibble() %>%
  select(tank_id, emmean)

# Adjust FvFm using partial residuals to account for positional effects within each tank
df_pos <- augment(mod, df_pos) %>%
  mutate(resid = .resid) %>%
  select(!starts_with("."))

df_pos <- left_join(emms, df_pos) %>%
  mutate(fvfm.adj = emmean + resid) %>%
  select(geno, f, fm, fvfm, emmean, resid, fvfm.adj, target_temp, max_temp, tank_id, cols_from_ctr, rows_from_ctr)

# Select whether to use adjusted or unadjusted fvfm data downstream
df <- if (pos.adjust) {
  # If position adjustment turned on, join with adjusted fvfm values
  left_join(df, df_pos) %>%
    # Select adjusted fvfm to use in modeling below
    select(nursery, geno, f, fm, fvfmraw, fvfm = fvfm.adj, target_temp, max_temp, problem)
} else { 
  # If position adjustment turned off, stick with original data
  df %>% select(nursery, geno, f, fm, fvfmraw, fvfm, target_temp, max_temp, problem)
}

Dose-response curve model fitting

# Select whether to use target temp or actual recorded max temp for model fitting
df <- if (temp.adjust) {
  df %>% select(-target_temp)
} else {
  df %>% mutate(max_temp = target_temp) %>% select(-target_temp)
}

# Define function to fit 3-parameter LL model to data and return NULL if fitting error
ll3 <- function(data) {
  drm(fvfm ~ max_temp, data = data, 
      fct = LL.3(names = c("hill", "max", "ED50")),
      upperl = c(120, 0.72, 40),
      lowerl = c(10, 0.55, 30))}
tryll3 <- possibly(ll3, otherwise = NULL)

# Fit model to each coral, get parameters, fitted values, and residuals
initmods <- df %>%
  nest(data = c(max_temp, f, fm, fvfmraw, fvfm, problem)) %>%
  # Fit the model to each coral
  mutate(ll3 = map(data, tryll3)) %>%
  # Get model parameters and fitted values/residuals
  mutate(pars = map(ll3, tidy),
         pred = map2(ll3, data, ~augment(.x, drop_na(.y, fvfm))))

# Extract ed50 parameter values from model fits
ed50 <- initmods %>% 
  select(nursery, geno, pars) %>%
  unnest(pars) %>%
  filter(term == "ED50")

# Collect raw data, fitted values, and diagnostics
vals <- initmods %>%
  select(nursery, geno, pred) %>%
  unnest(pred) %>%
  full_join(ed50) %>%
  full_join(df) %>%
  rename(ed50 = estimate)

# Identify problematic data points based on cook's distance and residuals
counts <- vals %>% 
  group_by(nursery, geno) %>% 
  summarise(n = sum(!is.na(fvfm)))
dff <- vals %>%
  left_join(counts) %>%
  group_by(nursery, geno) %>%
  mutate(resid.thresh = 2*sd(.resid, na.rm = T)) %>%  # Calculate residual threshold as 2 standard deviations
  mutate(cooksd.thresh = 4/n) %>%   # Calculate cook's distance threshold as 4/n
  mutate(max_to_remove = floor(n * 0.2)) %>%
  ungroup() %>%
  mutate(problem = case_when(.cooksd > cooksd.thresh & .resid > 0 ~ "high cook's distance",
                             .resid > resid.thresh ~ "high residual", 
                             TRUE ~ problem)) %>%
  group_by(nursery, geno, outlier = problem %in% c("high cook's distance", "high residual")) %>%
  mutate(n.outliers = n(),
         rank.out = order(.cooksd, decreasing = TRUE)) %>%
  ungroup() %>%
  mutate(fvfm = case_when(outlier & rank.out <= max_to_remove ~ NA_real_, 
                          TRUE ~ fvfm)) 


# Refit models without problematic points
fmods <- dff %>%
  select(nursery, geno, max_temp, f, fm, fvfmraw, problem, fvfm) %>%
  nest(data = c(max_temp, f, fm, fvfmraw, fvfm, problem)) %>%
  # Fit the model to each coral
  mutate(ll3 = map(data, tryll3)) %>%
  # Get model parameters and fitted values/residuals
  mutate(pars = map(ll3, tidy),
         pred = map2(ll3, data, ~augment(.x, drop_na(.y, fvfm))))

# Extract ed50 parameter values from model fits
fed50 <- fmods %>% 
  select(nursery, geno, pars) %>%
  unnest(pars) %>%
  filter(term == "ED50")

# Collect raw data, fitted values, and ed50 estimates
fvals <- fmods %>%
  select(nursery, geno, pred) %>%
  unnest(pred) %>%
  full_join(fed50) %>%
  full_join(select(dff, nursery, geno, max_temp, f, fm, fvfmraw, problem, fvfm)) %>%
  rename(ed50 = estimate)

Plot fits

# Select whether to use model-based filtering or not
final <- if (mod.filter) { fvals } else { vals }

# Plot fits
plotfits(data = final)

Summarize data filtering

final %>%
  count(problem) %>%
  mutate(freq = n / sum(n)) %>%
  knitr::kable()
problem n freq
abnormally high 8 0.0135823
abnormally high w/ low Ft 8 0.0135823
high cook’s distance 36 0.0611205
high residual 1 0.0016978
none 536 0.9100170
# Calculate pseudo R2 across dataset of the fitted models with given filtering options
finalmods <- if (mod.filter) { fmods } else { initmods }
calc.ss <- finalmods %>%
  mutate(aov = map(data, ~ aov(fvfm ~ factor(max_temp), data = .)),
         tss = map_dbl(aov, ~ sum(tidy(.)$sumsq)),
         drc.rss = map_dbl(ll3, ~ tidy(modelFit(.))$rss[2]),
         pseudo.R2 = 1 - (drc.rss / tss))

tibble(pseudo_R2 = 1 - (sum(calc.ss$drc.rss) / sum(calc.ss$tss))) %>%
  knitr::kable()
pseudo_R2
0.9156172

Save processed data to file

# Write tidy data to file
final %>%
  select(nursery, geno, max_temp, fvfm) %>%
  drop_na() %>%
  mutate(date = "2020-08") %>%
  write_csv("data/processed/KJS_fvfm_adj_clean.csv")