knitr::opts_chunk$set(echo = TRUE, warning = FALSE, message = FALSE)

library(janitor)
library(lubridate)
library(scales)
library(readxl)
library(ggthemes)
library(drc)
library(broom)
library(tidyverse)
library(emmeans)
library(ggpubr)
library(joeyr)

CBASS temperature logs

Import CBASS log files

b1log <- read_csv("data/raw/20200820_KJSNSU_Acer/Log_Files/B1_LOG_0821.TXT")

b1log <- b1log %>%
  filter(PrintDate != "PrintDate") %>%
  mutate(date = as_date(Date, format = "%Y_%B_%d")) %>%
  unite(time, Th, Tm, Ts, sep = ":") %>%
  unite(dttm, date, time) %>%
  mutate(dttm = ymd_hms(dttm),
         hm = format(dttm, "%H:%M")) %>%
  select(dttm, hm, T1SP, TempT1, T2SP, TempT2, T3SP, TempT3, T4SP, TempT4) %>%
  pivot_longer(starts_with("T"), names_to = "key", values_to = "temp") %>%
  filter(temp > 0, temp < 50) %>%
  mutate(tank = str_extract(key, "T[0-9]"),
         tank = paste0("B1", tank),
         key = case_when(grepl("SP", key) ~ str_sub(key, 3, 4),
                         TRUE ~ str_sub(key, 1, 4))) %>%
  pivot_wider(names_from = key, values_from = temp)

b2log <- read_csv("data/raw/20200820_KJSNSU_Acer/Log_Files/B2_LOG_0821.TXT")

b2log <- b2log %>%
  filter(PrintDate != "PrintDate") %>%
  mutate(date = as_date(Date, format = "%Y_%B_%d")) %>%
  unite(time, Th, Tm, Ts, sep = ":") %>%
  unite(dttm, date, time) %>%
  mutate(dttm = ymd_hms(dttm),
         hm = format(dttm, "%H:%M")) %>%
  select(dttm, hm, T1SP, TempT1, T2SP, TempT2, T3SP, TempT3, T4SP, TempT4) %>%
  pivot_longer(starts_with("T"), names_to = "key", values_to = "temp") %>%
  filter(temp > 0, temp < 50) %>%
  mutate(tank = str_extract(key, "T[0-9]"),
         tank = paste0("B2", tank),
         key = case_when(grepl("SP", key) ~ str_sub(key, 3, 4),
                         TRUE ~ str_sub(key, 1, 4))) %>%
  pivot_wider(names_from = key, values_from = temp)

log <- bind_rows(b1log, b2log) %>%
  mutate(tank = factor(tank),
         SP = as.numeric(SP),
         Temp = as.numeric(Temp))

Import CBASS set points (used for settings file)

sp <- read_xlsx("data/raw/20200820_KJSNSU_Acer/Temp_Profiles/CRII_ACERV_2020.xlsx") %>%
  mutate(Time = format(Time, "%H:%M")) %>%
  filter(Time >= "13:00", Time <= "22:00") %>%
  crossing(date = as_date(c("2020-08-20", "2020-08-21"))) %>%
  mutate(dttm = ymd_hm(paste(date, Time))) %>%
  select(dttm, matches("B")) %>%
  pivot_longer(-dttm, names_to = "tank", values_to = "Temp") %>%
  arrange(dttm)

Plot CBASS temperature profiles with set points

ggplot(mapping = aes(x = dttm, y = Temp, group = tank)) +
  geom_line(data = filter(log, hm > "12:50", hm < "21:50"),
            aes(color = tank), lwd = 0.2) +
  geom_step(data = sp, lwd = 0.2) +
  facet_wrap(~date(dttm), scales = "free") +
  scale_y_continuous(breaks = 30:37) +
  scale_x_datetime(breaks = "hours", labels = label_date("%H:%M")) +
  theme_hc() +
  theme(legend.position = "none") +
  labs(x = "Time of day", y = "Temperature (°C)")

#  Calculate actual hold temperatures
actual_temps <- log %>%
  filter(hm > "16:15" & hm < "19:05") %>%
  group_by(tank_id = tank) %>%
  summarise(max_temp = mean(Temp, na.rm = TRUE)) %>%
  arrange(max_temp)

actual_temps

## # A tibble: 8 x 2
##   tank_id max_temp
##   <fct>      <dbl>
## 1 B1T1        30.1
## 2 B1T2        31.0
## 3 B1T3        32.0
## 4 B1T4        33.0
## 5 B2T1        34.0
## 6 B2T2        34.9
## 7 B2T3        35.9
## 8 B2T4        36.9

PAM data

Turn each filtering and adjustment step on or off

# Initial pre-filtering (abnormally high, abnormally high w/ low Ft)
pre.filter <- T

# Post-model-fit filtering (high cook's distance outliers)
mod.filter <- T

# Adjust for positional effects
pos.adjust <- T

# Use actual max temperatures instead of target temperatures
temp.adjust <- T

Data pre-filtering and QC

dpii <- read_csv("data/raw/20200820_KJSNSU_Acer/PAM/KJS_pamdata_tidy.csv")

dpii <- dpii %>%
  mutate(nursery = "nsu",
         geno = paste0("kjs", Genotype)) %>%
  select(nursery, geno, f, fm, fvfm, target_temp = Final_Temp, tank_id = Tank) %>%
  drop_na(fvfm)

# Replace max_temp with actual max_temp calculated above
dpii <- dpii %>%
  left_join(actual_temps)

# Identify points at high temperatures where low background fluorescence (f) resulted in abnormally high values of fvfm
out <- dpii %>%
  filter(target_temp >= 36) %>%
  drop_na(fvfm) %>%
  mutate(mahal = tidy_mahalanobis(f, fvfm),
         is_out = mahal > quantile(mahal, 0.75) & f < median(f) & fvfm > median(fvfm, na.rm = T))
# Plot showing those outliers
ggplot(out, aes(x = f, y = fvfm)) +
  geom_point(aes(shape = is_out, color = factor(target_temp))) +
  scale_shape_manual(values = c(19, 25))

df <- dpii %>%
  # Save raw fvfm data in new column
  mutate(fvfmraw = fvfm) %>% 
  # Merge with mahalanobis outlier info from above
  left_join(out) %>%
  # Identify problematic data points
  mutate(problem = case_when(
    fvfm > 0.75 ~ "abnormally high",
    is_out ~ "abnormally high w/ low Ft",
    TRUE ~ "none")) %>%
  mutate(fvfm = case_when(
    problem == "abnormally high" ~ NA_real_,    # Change abnormally high values to NA
    problem == "abnormally high w/ low Ft" ~ NA_real_,
    problem == "none" ~ fvfm))

# Select pre-filtered or raw data to use moving forward based on set option
df <- if (pre.filter) { 
  df 
} else { 
  dpii %>% 
    mutate(fvfmraw = fvfm, 
           problem = case_when(is.na(fvfm) ~ "no signal",
                               TRUE ~ "none") )
}

Adjust for positional effects

# Import maps of the position of each genotype in each tank
map1 <- read_csv("data/raw/20200820_KJSNSU_Acer/20200820_map.csv") %>%
  mutate(across(.fns = as.character)) %>%
  pivot_longer(-Row, values_to = "geno", names_to = "Col") %>%
  drop_na() %>%
  unite(pos, Row, Col, sep = "", remove = FALSE)
map2 <- read_csv("data/raw/20200820_KJSNSU_Acer/20200821_map.csv") %>%
  pivot_longer(-Row, values_to = "geno", names_to = "Col") %>%
  mutate(across(.fns = as.character)) %>%
  drop_na() %>%
  unite(pos, Row, Col, sep = "", remove = FALSE)

df_pos <- bind_rows(map1, map2) %>%
  mutate(geno = paste0("kjs", geno)) %>%
  full_join(df) %>%
  # Create new cooler_side column
  mutate(cooler_side = case_when(tank_id %in% c("B2T2", "B2T4", "B1T2", "B1T4") ~ "left",
                                 tank_id %in% c("B2T1", "B2T3", "B1T1", "B1T3") ~ "right")) %>%
  # Recode cooler column position as number of columns from center of cooler
  mutate(cols_from_ctr = case_when(
    cooler_side == "right" ~ recode(Col, "4" = 4, "3" = 3, "2" = 2, "1" = 1),
    cooler_side == "left" ~ recode(Col, "4" = 1, "3" = 2, "2" = 3, "1" = 4))) %>%
  # Recode cooler row position as number of rows from center of cooler
  mutate(rows_from_ctr = case_when(Row == "A" ~ 3, Row == "B" ~ 2, Row == "C" ~ 1, 
                                   Row == "D" ~ 1, Row == "E" ~ 2, Row == "F" ~ 3))

# Visualize positional effects within each tank
ggplot(df_pos, aes(x = cols_from_ctr, y = fvfm, group = rows_from_ctr, 
                   color = factor(rows_from_ctr), shape = factor(rows_from_ctr))) +
  geom_point(alpha = 0.7) +
  geom_smooth(method = "lm", se = FALSE) +
  facet_grid(~target_temp)

# Fit model to adjust for positional effects
mod <- lm(fvfm ~ tank_id * cols_from_ctr * rows_from_ctr, data = df_pos, na.action = na.exclude)
anova(mod) %>% tidy() %>% knitr::kable()

term	df	sumsq	meansq	statistic	p.value
tank_id	7	12.9302741	1.8471820	343.2976813	0.0000000
cols_from_ctr	1	0.0022620	0.0022620	0.4204005	0.5170124
rows_from_ctr	1	0.0038104	0.0038104	0.7081568	0.4004282
tank_id:cols_from_ctr	7	0.0604010	0.0086287	1.6036406	0.1317157
tank_id:rows_from_ctr	7	0.0568088	0.0081155	1.5082692	0.1617473
cols_from_ctr:rows_from_ctr	1	0.0038235	0.0038235	0.7106001	0.3996172
tank_id:cols_from_ctr:rows_from_ctr	7	0.0143711	0.0020530	0.3815518	0.9132242
Residuals	541	2.9109590	0.0053807	NA	NA

# Calculate estimated marginal means for the "center" of each tank
emms <- emmeans(mod, specs  = c("tank_id", "cols_from_ctr", "rows_from_ctr")) %>%
  as_tibble() %>%
  select(tank_id, emmean)

# Adjust FvFm using partial residuals to account for positional effects within each tank
df_pos <- augment(mod, df_pos) %>%
  mutate(resid = .resid) %>%
  select(!starts_with("."))

df_pos <- left_join(emms, df_pos) %>%
  mutate(fvfm.adj = emmean + resid) %>%
  select(geno, f, fm, fvfm, emmean, resid, fvfm.adj, target_temp, max_temp, tank_id, cols_from_ctr, rows_from_ctr)

# Select whether to use adjusted or unadjusted fvfm data downstream
df <- if (pos.adjust) {
  # If position adjustment turned on, join with adjusted fvfm values
  left_join(df, df_pos) %>%
    # Select adjusted fvfm to use in modeling below
    select(nursery, geno, f, fm, fvfmraw, fvfm = fvfm.adj, target_temp, max_temp, problem)
} else { 
  # If position adjustment turned off, stick with original data
  df %>% select(nursery, geno, f, fm, fvfmraw, fvfm, target_temp, max_temp, problem)
}

Dose-response curve model fitting

# Select whether to use target temp or actual recorded max temp for model fitting
df <- if (temp.adjust) {
  df %>% select(-target_temp)
} else {
  df %>% mutate(max_temp = target_temp) %>% select(-target_temp)
}

# Define function to fit 3-parameter LL model to data and return NULL if fitting error
ll3 <- function(data) {
  drm(fvfm ~ max_temp, data = data, 
      fct = LL.3(names = c("hill", "max", "ED50")),
      upperl = c(120, 0.72, 40),
      lowerl = c(10, 0.55, 30))}
tryll3 <- possibly(ll3, otherwise = NULL)

# Fit model to each coral, get parameters, fitted values, and residuals
initmods <- df %>%
  nest(data = c(max_temp, f, fm, fvfmraw, fvfm, problem)) %>%
  # Fit the model to each coral
  mutate(ll3 = map(data, tryll3)) %>%
  # Get model parameters and fitted values/residuals
  mutate(pars = map(ll3, tidy),
         pred = map2(ll3, data, ~augment(.x, drop_na(.y, fvfm))))

# Extract ed50 parameter values from model fits
ed50 <- initmods %>% 
  select(nursery, geno, pars) %>%
  unnest(pars) %>%
  filter(term == "ED50")

# Collect raw data, fitted values, and diagnostics
vals <- initmods %>%
  select(nursery, geno, pred) %>%
  unnest(pred) %>%
  full_join(ed50) %>%
  full_join(df) %>%
  rename(ed50 = estimate)

# Identify problematic data points based on cook's distance and residuals
counts <- vals %>% 
  group_by(nursery, geno) %>% 
  summarise(n = sum(!is.na(fvfm)))
dff <- vals %>%
  left_join(counts) %>%
  group_by(nursery, geno) %>%
  mutate(resid.thresh = 2*sd(.resid, na.rm = T)) %>%  # Calculate residual threshold as 2 standard deviations
  mutate(cooksd.thresh = 4/n) %>%   # Calculate cook's distance threshold as 4/n
  mutate(max_to_remove = floor(n * 0.2)) %>%
  ungroup() %>%
  mutate(problem = case_when(.cooksd > cooksd.thresh & .resid > 0 ~ "high cook's distance",
                             .resid > resid.thresh ~ "high residual", 
                             TRUE ~ problem)) %>%
  group_by(nursery, geno, outlier = problem %in% c("high cook's distance", "high residual")) %>%
  mutate(n.outliers = n(),
         rank.out = order(.cooksd, decreasing = TRUE)) %>%
  ungroup() %>%
  mutate(fvfm = case_when(outlier & rank.out <= max_to_remove ~ NA_real_, 
                          TRUE ~ fvfm)) 


# Refit models without problematic points
fmods <- dff %>%
  select(nursery, geno, max_temp, f, fm, fvfmraw, problem, fvfm) %>%
  nest(data = c(max_temp, f, fm, fvfmraw, fvfm, problem)) %>%
  # Fit the model to each coral
  mutate(ll3 = map(data, tryll3)) %>%
  # Get model parameters and fitted values/residuals
  mutate(pars = map(ll3, tidy),
         pred = map2(ll3, data, ~augment(.x, drop_na(.y, fvfm))))

# Extract ed50 parameter values from model fits
fed50 <- fmods %>% 
  select(nursery, geno, pars) %>%
  unnest(pars) %>%
  filter(term == "ED50")

# Collect raw data, fitted values, and ed50 estimates
fvals <- fmods %>%
  select(nursery, geno, pred) %>%
  unnest(pred) %>%
  full_join(fed50) %>%
  full_join(select(dff, nursery, geno, max_temp, f, fm, fvfmraw, problem, fvfm)) %>%
  rename(ed50 = estimate)

Plot fits

# Select whether to use model-based filtering or not
final <- if (mod.filter) { fvals } else { vals }

# Plot fits
plotfits(data = final)

Summarize data filtering

final %>%
  count(problem) %>%
  mutate(freq = n / sum(n)) %>%
  knitr::kable()

problem	n	freq
abnormally high	8	0.0135823
abnormally high w/ low Ft	8	0.0135823
high cook’s distance	36	0.0611205
high residual	1	0.0016978
none	536	0.9100170

# Calculate pseudo R2 across dataset of the fitted models with given filtering options
finalmods <- if (mod.filter) { fmods } else { initmods }
calc.ss <- finalmods %>%
  mutate(aov = map(data, ~ aov(fvfm ~ factor(max_temp), data = .)),
         tss = map_dbl(aov, ~ sum(tidy(.)$sumsq)),
         drc.rss = map_dbl(ll3, ~ tidy(modelFit(.))$rss[2]),
         pseudo.R2 = 1 - (drc.rss / tss))

tibble(pseudo_R2 = 1 - (sum(calc.ss$drc.rss) / sum(calc.ss$tss))) %>%
  knitr::kable()

pseudo_R2
0.9156172

Save processed data to file

# Write tidy data to file
final %>%
  select(nursery, geno, max_temp, fvfm) %>%
  drop_na() %>%
  mutate(date = "2020-08") %>%
  write_csv("data/processed/KJS_fvfm_adj_clean.csv")

KJS/NSU A. cervicornis CBASS run

R/V Coral Reef II, August 20-21, 2020

CBASS temperature logs

Import CBASS log files

Import CBASS set points (used for settings file)

Plot CBASS temperature profiles with set points

PAM data

Turn each filtering and adjustment step on or off

Data pre-filtering and QC

Adjust for positional effects

Dose-response curve model fitting

Plot fits

Summarize data filtering

Save processed data to file