knitr::opts_chunk$set(echo = TRUE, warning = FALSE, message = FALSE)
library(janitor)
library(lubridate)
library(scales)
library(readxl)
library(ggthemes)
library(drc)
library(broom)
library(tidyverse)
library(joeyr)
library(emmeans)
library(ggpubr)

CBASS temperature logs

Import temperature logs for Arduino-controlled tanks

readlog <- function(logfile, prefix) {
  read_csv(logfile) %>%
    # Remove internal header rows
    filter(PrintDate != "PrintDate") %>%
    # Format date and time
    mutate(date = as_date(Date, format = "%Y_%B_%d")) %>%
    unite(time, Th, Tm, Ts, sep = ":") %>%
    unite(dttm, date, time) %>%
    mutate(dttm = ymd_hms(dttm), hm = format(dttm, "%H:%M")) %>%
    select(dttm, hm, T1SP, TempT1, T2SP, TempT2, T3SP, TempT3, T4SP, TempT4) %>%
    # Pivot to long format
    pivot_longer(starts_with("T"), names_to = "key", values_to = "temp") %>%
    # Remove rows where temp sensors gave error codes
    filter(temp > 0, temp < 50) %>%
    # Link arduino box and sensor positions (e.g., B1T2)
    mutate(tank = str_extract(key, "T[0-9]"),
           tank = paste0(prefix, tank),
           key = case_when(grepl("SP", key) ~ str_sub(key, 3, 4),
                           TRUE ~ str_sub(key, 1, 4)),
           key = tolower(key)) %>%
    # Drop rows that did not parse a date/time
    drop_na(dttm, temp) %>%
    # Create columns for set point and actual temperature
    pivot_wider(names_from = key, values_from = temp) %>%
    # Tidy column types
    mutate(tank = factor(tank), sp = as.numeric(sp), temp = as.numeric(temp)) %>%
    # Add maximum setpoint temperature as max_temp column
    group_by(tank) %>%
    mutate(max_temp = max(sp, na.rm = TRUE)) %>%
    ungroup()
}

# Read in arduino log files
b1log <- readlog("data/raw/20201007_RRT_Acer/temperature/sdlog/20201007_BOX1_LOG.TXT", prefix = "B1")
b2log <- readlog("data/raw/20201007_RRT_Acer/temperature/sdlog/20201007_BOX2_LOG.TXT", prefix = "B2")
b3log <- readlog("data/raw/20201007_RRT_Acer/temperature/sdlog/20201007_BOX3_LOG.TXT", prefix = "B3")

# Tidy arduino log files for this run
arduinolog <- bind_rows(b1log, b2log, b3log) %>%
  # Filter to just the date of this run
  filter(date(dttm) == "2020-10-07") %>%
  # Specify which arduino boxes controlled port vs. starboard tank sets
  mutate(tank_set = if_else(str_detect(tank, "B1"), "port", "starboard")) %>%
  # Add tank_id column from tank design
  left_join(read_csv("data/tank_setup.csv")) %>%
  select(dttm, hm, sp, temp, max_temp, tank_set, tank_id)

Import HOBO temperature logs for Inkbird-controlled tanks

# Read in HOBO temperature logs
hobologfiles <- list.files("data/raw/20201007_RRT_Acer/temperature/hobo", 
                           pattern = ".xlsx", full.names = TRUE)
hobolog <- tibble(filename = hobologfiles) %>%
  mutate(log = map(filename, read_xlsx, skip = 1)) %>%
  unnest()

# Tidy HOBO temperature logs
hobolog <- hobolog %>%
  clean_names() %>%
  # Get tank_id from filename
  mutate(tank_id = str_sub(basename(filename), 1, 2)) %>%
  # Convert temps from F to C
  mutate(temp = (temp_f - 32) * 5/9) %>%
  # Select columns to keep
  select(dttm = date_time_gmt_0400, temp, tank_id) %>%
  mutate(hm = format(dttm, "%H:%M")) %>%
  # Join with tank_id information
  left_join(read_csv("data/tank_setup.csv"))

# Import inkbird setpoints
inkbird_sp <- read_csv("data/program_files/inkbird_settings.csv") %>%
  pivot_longer(2:5, names_to = "max_temp", values_to = "sp") %>%
  mutate(max_temp = as.numeric(str_extract(max_temp, "[0-9]+")),
         dttm = ymd_hms(paste("2020-10-07", time)), hm = format(dttm, "%H:%M"),
         temp = NA) %>%
  left_join(read_csv("data/tank_setup.csv")) %>%
  select(dttm, hm, sp, temp, max_temp, tank_set, tank_id)

hobolog <- bind_rows(inkbird_sp, hobolog)

Plot CBASS temperature profiles and set points

# Combine arduino and hobo temperature log data
templog <- bind_rows(arduinolog, hobolog)

# Import hobo offset data
hobocal <- read_csv("data/processed/hobo_calibrations.csv")
templog <- left_join(templog, hobocal) %>%
  mutate(temp.adj = case_when(!is.na(hobo) ~ temp - mean_offset, TRUE ~  temp)) %>%
  select(dttm, hm, sp, temp = temp.adj, max_temp, tank_set, tank_id)

# Plot
ggplot(templog, mapping = aes(x = dttm, y = temp, group = tank_id)) +
  geom_line(aes(color = tank_id), lwd = 0.2) +
  geom_step(aes(y = sp, color = tank_id), lwd = 0.5) +
  facet_wrap(~ tank_set, scales = "free") +
  scale_y_continuous(breaks = 30:38, limits = c(29, 39)) +
  scale_x_datetime(breaks = "hours", labels = label_date("%H:%M"),
                   limits = as.POSIXct(c("2020-10-07 12:00:00", "2020-10-07 21:00:00"), tz = "UTC")) +
  theme_hc() +
  theme(legend.position = "none", axis.text.x = element_text(angle = 45, hjust = 1)) +
  labs(x = "Time of day", y = "Temperature (°C)")

#  Calculate actual hold temperatures
actual_temps <- templog %>%
  filter(hm > "16:15" & hm < "19:05") %>%
  group_by(tank_id) %>%
  summarise(max_temp = mean(temp, na.rm = TRUE)) %>%
  arrange(max_temp)

PAM data

Turn each filtering and adjustment step on or off

# Initial pre-filtering (abnormally high, abnormally high w/ low Ft)
pre.filter <- T

# Post-model-fit filtering (high cook's distance outliers)
mod.filter <- T

# Adjust for positional effects
pos.adjust <- T

# Use actual max temperatures instead of target temperatures
temp.adjust <- T

Import data from DIVING PAM II

# Read manually entered data from Diving PAM II
path <- "data/raw/20201007_RRT_Acer/PAM/20201007dii_Acer.xlsx"
dpii <- path %>%
  excel_sheets() %>%
  set_names() %>%
  map_df(~ read_excel(path = path, sheet = .x, range = "A1:H43", col_types = "text"), .id = "tank_id") %>%
  mutate(f = as.numeric(`F`), fm = as.numeric(`Fm'`), fvfm = as.numeric(`Y(II)`)) %>%
  select(tank_id, Geno, f, fm, fvfm) %>%
  clean_names() %>%
  mutate(geno = str_replace(geno, "-", ""),
         geno = str_replace(geno, " ", ""))

# Combine with tank_id metadata
tanks <- read_csv("data/tank_setup.csv")
dpii <- full_join(dpii, tanks) %>%
  select(geno, f, fm, fvfm, target_temp = max_temp, tank_id, tank_set, cooler_side)

# Add 'RR' to end of 'AcerXXX' genotype names since these are distinct from CRF genotypes with same names
dpii <- dpii %>%
  mutate(geno = case_when(str_detect(geno, "Acer") ~ paste0(geno, "RR"),
                          TRUE ~ geno))

# Replace max_temp with actual max_temp calculated above
dpii <- dpii %>%
  left_join(actual_temps)

Data pre-filtering and QC

dpii <- dpii %>%
  mutate(nursery = "rrt") %>%
  select(nursery, geno, f, fm, fvfm, target_temp, max_temp, tank_id, tank_set, cooler_side)

# Identify points at high temperatures where low background fluorescence (f) resulted in abnormally high values of fvfm
out <- dpii %>%
  filter(target_temp >= 36) %>%
  drop_na(fvfm) %>%
  mutate(mahal = tidy_mahalanobis(f, fvfm),
         is_out = mahal > median(mahal) & f < median(f) & fvfm > median(fvfm))
out1 <- out %>%
  filter(!is_out) %>%
  mutate(mahal = tidy_mahalanobis(f, fvfm),
         is_out = mahal > median(mahal) & f < median(f) & fvfm > median(fvfm))
out2 <- full_join(out, out1, 
                  by = c("nursery", "geno", "f", "fm", "fvfm", "target_temp", "max_temp")) %>%
  mutate(is_out = is_out.x | is_out.y) %>%
  select(nursery, geno, f, fm, fvfm, target_temp, max_temp, is_out)
# Plot showing those outliers
ggplot(out2, aes(x = f, y = fvfm)) +
  geom_point(aes(shape = is_out, color = factor(target_temp))) +
  scale_shape_manual(values = c(19, 25))

# Filter fvfm values based on various outlier classifications
df <- dpii %>%
  # Save raw fvfm in new column
  mutate(fvfmraw = fvfm) %>%          
  # Merge with mahalanobis outlier info from above
  left_join(out2) %>%
  # Identify problematic data points
  mutate(problem = case_when(
    is.na(fvfm) ~ "no signal",
    fvfm > 0.750 ~ "abnormally high",
    is_out ~ "abnormally high w/ low Ft",
    TRUE ~ "none")) %>%
  mutate(fvfm = case_when(
    problem == "abnormally high" ~ NA_real_,    # Change abnormally high values to NA
    problem == "abnormally high w/ low Ft" ~ NA_real_,   
    problem == "none" ~ fvfm))

# Select pre-filtered or raw data to use moving forward based on set option
df <- if (pre.filter) { 
  df 
} else { 
  dpii %>% 
    mutate(fvfmraw = fvfm, 
           problem = case_when(is.na(fvfm) ~ "no signal",
                               TRUE ~ "none") )
}

Positional effects

# Import maps of the position of each genotype in each tank
Pmap <- read_csv("data/raw/20201007_RRT_Acer/P_map.csv") %>%
  pivot_longer(-Row, values_to = "geno", names_to = "Col") %>%
  drop_na() %>%
  unite(pos, Row, Col, sep = "", remove = FALSE)
Smap <- read_csv("data/raw/20201007_RRT_Acer/S_map.csv") %>%
  pivot_longer(-Row, values_to = "geno", names_to = "Col") %>%
  drop_na() %>%
  unite(pos, Row, Col, sep = "", remove = FALSE)

df_pos <- bind_rows(Pmap, Smap) %>%
  full_join(df) %>%
  # Create new cooler_id column
  mutate(cooler = case_when(tank_id %in% c("P1", "P2") ~ "p_cooler_1",
                            tank_id %in% c("P3", "P4") ~ "p_cooler_2",
                            tank_id %in% c("P5", "P6") ~ "p_cooler_3",
                            tank_id %in% c("P7", "P8") ~ "p_cooler_4",
                            tank_id %in% c("S1", "S2") ~ "s_cooler_1",
                            tank_id %in% c("S3", "S4") ~ "s_cooler_2",
                            tank_id %in% c("S5", "S6") ~ "s_cooler_3",
                            tank_id %in% c("S7", "S8") ~ "s_cooler_4")) %>%
  # Recode cooler column position as number of columns from center of cooler
  mutate(cols_from_ctr = case_when(
    cooler_side == "right" ~ recode(Col, "5" = 4, "4" = 4, "3" = 3, "2" = 2, "1" = 1),
    cooler_side == "left"  ~ recode(Col, "5" = 1, "4" = 1, "3" = 2, "2" = 3, "1" = 4))) %>%
  # Recode cooler row position as number of rows from center of cooler
  mutate(rows_from_ctr = case_when(Row == "A" ~ 3, Row == "B" ~ 2, Row == "C" ~ 1, 
                                   Row == "D" ~ 1, Row == "E" ~ 2, Row == "F" ~ 3))

# Visualize positional effects within each tank
ggplot(df_pos, aes(x = cols_from_ctr, y = fvfm, group = rows_from_ctr, 
                   color = factor(rows_from_ctr), shape = factor(rows_from_ctr))) +
  geom_point(alpha = 0.7) +
  geom_smooth(method = "lm", se = FALSE) +
  facet_grid(tank_set~target_temp)

# Fit model to adjust for positional effects
mod <- lm(fvfm ~ tank_id * cols_from_ctr * rows_from_ctr, data = df_pos, na.action = na.exclude)
anova(mod) %>% tidy() %>% knitr::kable()
term df sumsq meansq statistic p.value
tank_id 15 13.5105941 0.9007063 144.975327 0.0000000
cols_from_ctr 1 0.0316618 0.0316618 5.096202 0.0243829
rows_from_ctr 1 0.0118465 0.0118465 1.906777 0.1679016
tank_id:cols_from_ctr 15 0.1130987 0.0075399 1.213605 0.2565619
tank_id:rows_from_ctr 15 0.1582969 0.0105531 1.698603 0.0474516
cols_from_ctr:rows_from_ctr 1 0.0415765 0.0415765 6.692043 0.0099487
tank_id:cols_from_ctr:rows_from_ctr 15 0.2101299 0.0140087 2.254797 0.0044209
Residuals 532 3.3052227 0.0062128 NA NA
# Calculate estimated marginal means for the "center" of each tank
emms <- emmeans(mod, specs  = c("tank_id", "cols_from_ctr", "rows_from_ctr")) %>%
  as_tibble() %>%
  select(tank_id, emmean)

# Adjust FvFm using partial residuals to account for positional effects within each tank
df_pos <- augment(mod, df_pos) %>%
  mutate(resid = .resid) %>%
  select(!starts_with("."))

df_pos <- left_join(emms, df_pos) %>%
  mutate(fvfm.adj = emmean + resid) %>%
  select(geno, f, fm, fvfm, emmean, resid, fvfm.adj, target_temp, max_temp, tank_id, tank_set, cooler, cols_from_ctr, rows_from_ctr)

# Select whether to use adjusted or unadjusted fvfm data downstream
df <- if (pos.adjust) {
  # If position adjustment turned on, join with adjusted fvfm values
  left_join(df, df_pos) %>%
    # Select adjusted fvfm to use in modeling below
    select(nursery, geno, f, fm, fvfmraw, fvfm = fvfm.adj, target_temp, max_temp, problem)
} else { 
  # If position adjustment turned off, stick with original data
  df %>% select(nursery, geno, f, fm, fvfmraw, fvfm, target_temp, max_temp, problem)
}

Dose-response curve model fitting

# Select whether to use target temp or actual recorded max temp for model fitting
df <- if (temp.adjust) {
  df %>% select(-target_temp)
} else {
  df %>% mutate(max_temp = target_temp) %>% select(-target_temp)
}

# Define function to fit 3-parameter LL model to data and return NULL if fitting error
ll3 <- function(data) {
  drm(fvfm ~ max_temp, data = data, 
      fct = LL.3(names = c("hill", "max", "ED50")),
      upperl = c(120, 0.72, 40),
      lowerl = c(10, 0.55, 30))}
tryll3 <- possibly(ll3, otherwise = NULL)

# Fit model to each coral, get parameters, fitted values, and residuals
initmods <- df %>%
  nest(data = c(max_temp, f, fm, fvfmraw, fvfm, problem)) %>%
  # Fit the model to each coral
  mutate(ll3 = map(data, tryll3)) %>%
  # Get model parameters and fitted values/residuals
  mutate(pars = map(ll3, tidy),
         pred = map2(ll3, data, ~augment(.x, drop_na(.y, fvfm))))

# Extract ed50 parameter values from model fits
ed50 <- initmods %>% 
  select(nursery, geno, pars) %>%
  unnest(pars) %>%
  filter(term == "ED50")

# Collect raw data, fitted values, and diagnostics
vals <- initmods %>%
  select(nursery, geno, pred) %>%
  unnest(pred) %>%
  full_join(ed50) %>%
  full_join(df) %>%
  rename(ed50 = estimate)

# Identify problematic data points based on cook's distance and residuals
counts <- vals %>% 
  group_by(nursery, geno) %>% 
  summarise(n = sum(!is.na(fvfm)))
dff <- vals %>%
  left_join(counts) %>%
  group_by(nursery, geno) %>%
  mutate(resid.thresh = 2*sd(.resid, na.rm = T)) %>%  # Calculate residual threshold as 2 standard deviations
  mutate(cooksd.thresh = 4/n) %>%   # Calculate cook's distance threshold as 4/n
  mutate(max_to_remove = floor(n * 0.2)) %>%
  ungroup() %>%
  mutate(problem = case_when(.cooksd > cooksd.thresh & .resid > 0 ~ "high cook's distance",
                             .resid > resid.thresh ~ "high residual", 
                             TRUE ~ problem)) %>%
  group_by(nursery, geno, outlier = problem %in% c("high cook's distance", "high residual")) %>%
  mutate(n.outliers = n(),
         rank.out = order(.cooksd, decreasing = TRUE)) %>%
  ungroup() %>%
  mutate(fvfm = case_when(outlier & rank.out <= max_to_remove ~ NA_real_, 
                          TRUE ~ fvfm)) 


# Refit models without problematic points
fmods <- dff %>%
  select(nursery, geno, max_temp, f, fm, fvfmraw, problem, fvfm) %>%
  nest(data = c(max_temp, f, fm, fvfmraw, fvfm, problem)) %>%
  # Fit the model to each coral
  mutate(ll3 = map(data, tryll3)) %>%
  # Get model parameters and fitted values/residuals
  mutate(pars = map(ll3, tidy),
         pred = map2(ll3, data, ~augment(.x, drop_na(.y, fvfm))))

# Extract ed50 parameter values from model fits
fed50 <- fmods %>% 
  select(nursery, geno, pars) %>%
  unnest(pars) %>%
  filter(term == "ED50")

# Collect raw data, fitted values, and ed50 estimates
fvals <- fmods %>%
  select(nursery, geno, pred) %>%
  unnest(pred) %>%
  full_join(fed50) %>%
  full_join(select(dff, nursery, geno, max_temp, f, fm, fvfmraw, problem, fvfm)) %>%
  rename(ed50 = estimate)

Plot fits

# Select whether to use model-based filtering or not
final <- if (mod.filter) { fvals } else { vals }

# Plot fits
plotfits(data = final)

Summarize data filtering

final %>%
  count(problem) %>%
  mutate(freq = n / sum(n)) %>%
  knitr::kable()
problem n freq
abnormally high 11 0.0163205
abnormally high w/ low Ft 59 0.0875371
high cook’s distance 39 0.0578635
high residual 2 0.0029674
no signal 8 0.0118694
none 555 0.8234421
# Calculate pseudo R2 across dataset of the fitted models with given filtering options
finalmods <- if (mod.filter) { fmods } else { initmods }
calc.ss <- finalmods %>%
  mutate(aov = map(data, ~ aov(fvfm ~ factor(max_temp), data = .)),
         tss = map_dbl(aov, ~ sum(tidy(.)$sumsq)),
         drc.rss = map_dbl(ll3, ~ tidy(modelFit(.))$rss[2]),
         pseudo.R2 = 1 - (drc.rss / tss))

tibble(pseudo_R2 = 1 - (sum(calc.ss$drc.rss) / sum(calc.ss$tss))) %>%
  knitr::kable()
pseudo_R2
0.8923435

Save processed data to file

# Write tidy data to file
final %>%
  select(nursery, geno, max_temp, fvfm) %>%
  drop_na() %>%
  mutate(date = "2020-10") %>%
  write_csv("data/processed/rrt_fvfm_adj_clean.csv")