knitr::opts_chunk$set(echo = TRUE, warning = FALSE, message = FALSE)

CBASS temperature logs

Import temperature logs for Arduino-controlled tanks

readlog <- function(logfile, prefix) {
  read_csv(logfile) %>%
    # Remove internal header rows
    filter(PrintDate != "PrintDate") %>%
    # Format date and time
    mutate(date = as_date(Date, format = "%Y_%B_%d")) %>%
    unite(time, Th, Tm, Ts, sep = ":") %>%
    unite(dttm, date, time) %>%
    mutate(dttm = ymd_hms(dttm), hm = format(dttm, "%H:%M")) %>%
    select(dttm, hm, T1SP, TempT1, T2SP, TempT2, T3SP, TempT3, T4SP, TempT4) %>%
    # Pivot to long format
    pivot_longer(starts_with("T"), names_to = "key", values_to = "temp") %>%
    # Remove rows where temp sensors gave error codes
    filter(temp > 0, temp < 50) %>%
    # Link arduino box and sensor positions (e.g., B1T2)
    mutate(tank = str_extract(key, "T[0-9]"),
           tank = paste0(prefix, tank),
           key = case_when(grepl("SP", key) ~ str_sub(key, 3, 4),
                           TRUE ~ str_sub(key, 1, 4)),
           key = tolower(key)) %>%
    # Drop rows that did not parse a date/time
    drop_na(dttm, temp) %>%
    # Create columns for set point and actual temperature
    pivot_wider(names_from = key, values_from = temp) %>%
    # Tidy column types
    mutate(tank = factor(tank), sp = as.numeric(sp), temp = as.numeric(temp)) %>%
    # Add maximum setpoint temperature as max_temp column
    group_by(tank) %>%
    mutate(max_temp = max(sp, na.rm = TRUE)) %>%
    ungroup()
}

# Read in arduino log files
b1log <- readlog("data/raw/20201013_CRF_Acer/temperature/sdlog/20201013_BOX1_LOG.TXT", prefix = "B1")
b3log <- readlog("data/raw/20201013_CRF_Acer/temperature/sdlog/20201013_BOX3_LOG.TXT", prefix = "B3")
### NEED BOX2 DATA FROM 10/13

# Tidy arduino log files for this run
arduinolog <- bind_rows(b1log, b3log) %>%
  # Filter to just the date of this run
  filter(date(dttm) == "2020-10-13") %>%
  # Specify which arduino boxes controlled port vs. starboard tank sets
  mutate(tank_set = if_else(str_detect(tank, "B1"), "starboard", "port")) %>%
  # Add tank_id column from tank design
  left_join(read_csv("data/tank_setup.csv")) %>%
  select(dttm, hm, sp, temp, max_temp, tank_set, tank_id)

Import HOBO temperature logs for Inkbird-controlled tanks

# Read in HOBO temperature logs
hobologfiles <- list.files("data/raw/20201013_CRF_Acer/temperature/hobo", pattern = ".xlsx", full.names = TRUE)
hobolog <- tibble(filename = hobologfiles) %>%
  mutate(log = map(filename, read_xlsx, skip = 1)) %>%
  unnest()

# Tidy HOBO temperature logs
hobolog <- hobolog %>%
  clean_names() %>%
  # Get tank_id from filename
  mutate(tank_id = str_sub(basename(filename), 1, 2)) %>%
  # Convert temps from F to C
  mutate(temp = (temp_f - 32) * 5/9) %>%
  # Select columns to keep
  select(dttm = date_time_gmt_0400, temp, tank_id) %>%
  mutate(hm = format(dttm, "%H:%M")) %>%
  # Join with tank_id information
  left_join(read_csv("data/tank_setup.csv")) %>%
  # Filter only target date
  filter(date(dttm) == "2020-10-13")

# Import inkbird setpoints
inkbird_sp <- read_csv("data/program_files/inkbird_settings.csv") %>%
  pivot_longer(2:5, names_to = "max_temp", values_to = "sp") %>%
  mutate(max_temp = as.numeric(str_extract(max_temp, "[0-9]+")),
         dttm = ymd_hms(paste("2020-10-13", time)), hm = format(dttm, "%H:%M"),
         temp = NA) %>%
  left_join(read_csv("data/tank_setup.csv")) %>%
  select(dttm, hm, sp, temp, max_temp, tank_set, tank_id)

hobolog <- bind_rows(inkbird_sp, hobolog)

Plot CBASS temperature profiles and set points

# Combine arduino and hobo temperature log data
templog <- bind_rows(arduinolog, hobolog)

# Import hobo offset data
hobocal <- read_csv("data/processed/hobo_calibrations.csv")
templog <- left_join(templog, hobocal) %>%
  mutate(temp.adj = case_when(!is.na(hobo) ~ temp - mean_offset, TRUE ~  temp)) %>%
  select(dttm, hm, sp, temp = temp.adj, max_temp, tank_set, tank_id)

# Plot
ggplot(templog, mapping = aes(x = dttm, y = temp, group = tank_id)) +
  geom_line(aes(color = tank_id), lwd = 0.2) +
  geom_step(aes(y = sp, color = tank_id), lwd = 0.5) +
  facet_wrap(~ tank_set, scales = "free") +
  scale_y_continuous(breaks = 30:38, limits = c(29, 39)) +
  scale_x_datetime(breaks = "hours", labels = label_date("%H:%M"),
                   limits = as.POSIXct(c("2020-10-13 12:00:00", "2020-10-13 21:00:00"), tz = "UTC")) +
  theme_hc() +
  theme(legend.position = "none", axis.text.x = element_text(angle = 45, hjust = 1)) +
  labs(x = "Time of day", y = "Temperature (°C)")

#  Calculate actual hold temperatures
actual_temps <- templog %>%
  filter(hm > "16:15" & hm < "19:05") %>%
  group_by(tank_id) %>%
  summarise(max_temp = mean(temp, na.rm = TRUE)) %>%
  arrange(max_temp)

# Combine with tank metadata
tanks <- read_csv("data/tank_setup.csv")
actual_temps %>%
  full_join(tanks, by = "tank_id") %>%
  select(target_temp = max_temp.y, max_temp = max_temp.x, tank_set) %>%
  pivot_wider(names_from = target_temp, values_from = max_temp) %>%
  knitr::kable(digits = 2)
tank_set 30 32 33 34 35 36 37 38
port 30.36 32.48 33.52 34.26 34.99 35.97 36.97 37.80
starboard 30.49 32.51 33.16 34.52 34.95 35.97 36.88 37.91

PAM data

Turn each filtering and adjustment step on or off

# Initial pre-filtering (abnormally high, abnormally high w/ low Ft)
pre.filter <- TRUE

# Post-model-fit filtering (high cook's distance outliers)
mod.filter <- TRUE

# Adjust for positional effects
pos.adjust <- TRUE

# Use actual max temperatures instead of target temperatures
temp.adjust <- TRUE
#### Import data from DIVING PAM II
# Read manually entered data from Diving PAM II
path <- "data/raw/20201013_CRF_Acer/PAM/20201013dii_Acer.xlsx"
dpii <- path %>%
  excel_sheets() %>%
  set_names() %>%
  discard(.p = ~str_detect(.x, "Acer")) %>%
  map_df(~ read_excel(path = path, sheet = .x, range = "A1:H45", col_types = "text"), .id = "tank_id") %>%
  mutate(f = as.numeric(`F`), fm = as.numeric(`Fm'`), fvfm = as.numeric(`Y (II)`)) %>%
  select(tank_id, Geno, f, fm, fvfm) %>%
  clean_names() %>%
  mutate(geno = str_replace(geno, "-", ""),
         geno = str_replace(geno, " ", ""))

# Combine with tank_id metadata
dpii <- full_join(dpii, tanks) %>%
  select(geno, f, fm, fvfm, tank_id, tank_set, target_temp = max_temp, cooler_side)

# Replace max_temp with actual max_temp calculated above
dpii <- dpii %>%
  left_join(actual_temps)

Data pre-filtering and QC

dpii <- dpii %>%
  mutate(nursery = "crf") %>%
  select(nursery, geno, f, fm, fvfm, target_temp, max_temp, tank_id, tank_set, cooler_side)

# Identify points at high temperatures where low background fluorescence (f) resulted in abnormally high values of fvfm
out <- dpii %>%
  filter(target_temp >= 36) %>%
  drop_na(fvfm) %>%
  mutate(mahal = tidy_mahalanobis(f, fvfm),
         is_out = mahal > median(mahal) & f < median(f) & fvfm > median(fvfm, na.rm = T))
# Plot showing those outliers
ggplot(out, aes(x = f, y = fvfm)) +
  geom_point(aes(shape = is_out, color = factor(target_temp))) +
  scale_shape_manual(values = c(19, 25))

# Filter fvfm values based on various outlier classifications
df <- dpii %>%
  # Save raw fvfm in new column
  mutate(fvfmraw = fvfm) %>%          
  # Merge with mahalanobis outlier info from above
  left_join(out) %>%
  # Identify problematic data points
  mutate(problem = case_when(
    is.na(fvfm) ~ "no signal",
    fvfm > 0.750 ~ "abnormally high",
    is_out ~ "abnormally high w/ low Ft",
    TRUE ~ "none")) %>%
  mutate(fvfm = case_when(
    problem == "abnormally high" ~ NA_real_,    # Change abnormally high values to NA
    problem == "abnormally high w/ low Ft" ~ NA_real_,   
    problem == "none" ~ fvfm))# %>%
  #select(nursery, geno, f, fm, fvfm, target_temp, max_temp, tank_id, tank_set, cooler_side, fvfmraw, problem)

# Select pre-filtered or raw data to use moving forward based on set option
df <- if (pre.filter) { 
  df 
} else { 
  dpii %>% 
    mutate(fvfmraw = fvfm, 
           problem = case_when(is.na(fvfm) ~ "no signal",
                               TRUE ~ "none") )
}

Positional effects

# Import maps of the position of each genotype in each tank
Pmap <- read_csv("data/raw/20201013_CRF_Acer/P_map.csv") %>%
  pivot_longer(-Row, values_to = "geno", names_to = "Col") %>%
  drop_na() %>%
  unite(pos, Row, Col, sep = "", remove = FALSE)
Smap <- read_csv("data/raw/20201013_CRF_Acer/S_map.csv") %>%
  pivot_longer(-Row, values_to = "geno", names_to = "Col") %>%
  drop_na() %>%
  unite(pos, Row, Col, sep = "", remove = FALSE)

df_pos <- bind_rows(Pmap, Smap) %>%
  full_join(df) %>%
  # Create new cooler_id column
  mutate(cooler = case_when(tank_id %in% c("P1", "P2") ~ "p_cooler_1",
                            tank_id %in% c("P3", "P4") ~ "p_cooler_2",
                            tank_id %in% c("P5", "P6") ~ "p_cooler_3",
                            tank_id %in% c("P7", "P8") ~ "p_cooler_4",
                            tank_id %in% c("S1", "S2") ~ "s_cooler_1",
                            tank_id %in% c("S3", "S4") ~ "s_cooler_2",
                            tank_id %in% c("S5", "S6") ~ "s_cooler_3",
                            tank_id %in% c("S7", "S8") ~ "s_cooler_4")) %>%
  # Recode cooler column position as number of columns from center of cooler
  mutate(cols_from_ctr = case_when(
    cooler_side == "right" ~ recode(Col, "5" = 4, "4" = 4, "3" = 3, "2" = 2, "1" = 1),
    cooler_side == "left" ~ recode(Col, "5" = 1, "4" = 1, "3" = 2, "2" = 3, "1" = 4))) %>%
  # Recode cooler row position as number of rows from center of cooler
  mutate(rows_from_ctr = case_when(Row == "A" ~ 3, Row == "B" ~ 2, Row == "C" ~ 1, 
                                   Row == "D" ~ 1, Row == "E" ~ 2, Row == "F" ~ 3))

# Visualize positional effects within each tank
ggplot(df_pos, aes(x = cols_from_ctr, y = fvfm, group = rows_from_ctr, 
                   color = factor(rows_from_ctr), shape = factor(rows_from_ctr))) +
  geom_point(alpha = 0.7) +
  geom_smooth(method = "lm", se = FALSE) +
  facet_grid(tank_set~target_temp)

# Fit model to adjust for positional effects
mod <- lm(fvfm ~ tank_id * cols_from_ctr * rows_from_ctr, data = df_pos, na.action = na.exclude)
anova(mod) %>% tidy() %>% knitr::kable()
term df sumsq meansq statistic p.value
tank_id 15 15.2085797 1.0139053 258.9246355 0.0000000
cols_from_ctr 1 0.0707014 0.0707014 18.0552655 0.0000250
rows_from_ctr 1 0.0321874 0.0321874 8.2198050 0.0042959
tank_id:cols_from_ctr 15 0.0762186 0.0050812 1.2976142 0.1980132
tank_id:rows_from_ctr 15 0.0472791 0.0031519 0.8049217 0.6727092
cols_from_ctr:rows_from_ctr 1 0.0000829 0.0000829 0.0211652 0.8843809
tank_id:cols_from_ctr:rows_from_ctr 15 0.0454020 0.0030268 0.7729650 0.7083951
Residuals 573 2.2437716 0.0039158 NA NA
# Calculate estimated marginal means for the "center" of each tank
emms <- emmeans(mod, specs  = c("tank_id", "cols_from_ctr", "rows_from_ctr")) %>%
  as_tibble() %>%
  select(tank_id, emmean)

# Adjust FvFm using partial residuals to account for positional effects within each tank
df_pos <- augment(mod, df_pos) %>%
  mutate(resid = .resid) %>%
  select(!starts_with("."))

df_pos <- left_join(emms, df_pos) %>%
  mutate(fvfm.adj = emmean + resid) %>%
  select(geno, f, fm, fvfm, emmean, resid, fvfm.adj, target_temp, max_temp, tank_id, tank_set, cooler, cols_from_ctr, rows_from_ctr)

# Select whether to use adjusted or unadjusted fvfm data downstream
df <- if (pos.adjust) {
  # If position adjustment turned on, join with adjusted fvfm values
  left_join(df, df_pos) %>%
    # Select adjusted fvfm to use in modeling below
    select(nursery, geno, f, fm, fvfmraw, fvfm = fvfm.adj, target_temp, max_temp, problem)
} else { 
  # If position adjustment turned off, stick with original data
  df %>% select(nursery, geno, f, fm, fvfmraw, fvfm, target_temp, max_temp, problem)
}

Dose-response curve model fitting

# Select whether to use target temp or actual recorded max temp for model fitting
df <- if (temp.adjust) {
  df %>% select(-target_temp)
} else {
  df %>% mutate(max_temp = target_temp) %>% select(-target_temp)
}

# Define function to fit 3-parameter LL model to data and return NULL if fitting error
ll3 <- function(data) {
  drm(fvfm ~ max_temp, data = data, 
      fct = LL.3(names = c("hill", "max", "ED50")),
      upperl = c(120, 0.72, 40),
      lowerl = c(10, 0.55, 30))}
tryll3 <- possibly(ll3, otherwise = NULL)

# Fit model to each coral, get parameters, fitted values, and residuals
initmods <- df %>%
  nest(data = c(max_temp, f, fm, fvfmraw, fvfm, problem)) %>%
  # Fit the model to each coral
  mutate(ll3 = map(data, tryll3)) %>%
  # Get model parameters and fitted values/residuals
  mutate(pars = map(ll3, tidy),
         pred = map2(ll3, data, ~augment(.x, drop_na(.y, fvfm))))

# Extract ed50 parameter values from model fits
ed50 <- initmods %>% 
  select(nursery, geno, pars) %>%
  unnest(pars) %>%
  filter(term == "ED50")

# Collect raw data, fitted values, and diagnostics
vals <- initmods %>%
  select(nursery, geno, pred) %>%
  unnest(pred) %>%
  full_join(ed50) %>%
  full_join(df) %>%
  rename(ed50 = estimate)

# Identify problematic data points based on cook's distance and residuals
counts <- vals %>% 
  group_by(nursery, geno) %>% 
  summarise(n = sum(!is.na(fvfm)))
dff <- vals %>%
  left_join(counts) %>%
  group_by(nursery, geno) %>%
  mutate(resid.thresh = 2*sd(.resid, na.rm = T)) %>%  # Calculate residual threshold as 2 standard deviations
  mutate(cooksd.thresh = 4/n) %>%   # Calculate cook's distance threshold as 4/n
  mutate(max_to_remove = floor(n * 0.2)) %>%
  ungroup() %>%
  mutate(problem = case_when(.cooksd > cooksd.thresh & .resid > 0 ~ "high cook's distance",
                             .resid > resid.thresh ~ "high residual", 
                             TRUE ~ problem)) %>%
  group_by(nursery, geno, outlier = problem %in% c("high cook's distance", "high residual")) %>%
  mutate(n.outliers = n(),
         rank.out = order(.cooksd, decreasing = TRUE)) %>%
  ungroup() %>%
  mutate(fvfm = case_when(outlier & rank.out <= max_to_remove ~ NA_real_, 
                          TRUE ~ fvfm)) 


# Refit models without problematic points
fmods <- dff %>%
  select(nursery, geno, max_temp, f, fm, fvfmraw, problem, fvfm) %>%
  nest(data = c(max_temp, f, fm, fvfmraw, fvfm, problem)) %>%
  # Fit the model to each coral
  mutate(ll3 = map(data, tryll3)) %>%
  # Get model parameters and fitted values/residuals
  mutate(pars = map(ll3, tidy),
         pred = map2(ll3, data, ~augment(.x, drop_na(.y, fvfm))))

# Extract ed50 parameter values from model fits
fed50 <- fmods %>% 
  select(nursery, geno, pars) %>%
  unnest(pars) %>%
  filter(term == "ED50")

# Collect raw data, fitted values, and ed50 estimates
fvals <- fmods %>%
  select(nursery, geno, pred) %>%
  unnest(pred) %>%
  full_join(fed50) %>%
  full_join(select(dff, nursery, geno, max_temp, f, fm, fvfmraw, problem, fvfm)) %>%
  rename(ed50 = estimate)

Plot fits

# Select whether to use model-based filtering or not
final <- if (mod.filter) { fvals } else { vals }

# Plot fits
plotfits(data = final)

Summarize data filtering

final %>%
  count(problem) %>%
  mutate(freq = n / sum(n)) %>%
  knitr::kable()
problem n freq
abnormally high w/ low Ft 31 0.0440341
high cook’s distance 36 0.0511364
high residual 1 0.0014205
no signal 36 0.0511364
none 600 0.8522727
# Calculate pseudo R2 across dataset of the fitted models with given filtering options
finalmods <- if (mod.filter) { fmods } else { initmods }
calc.ss <- finalmods %>%
  mutate(aov = map(data, ~ aov(fvfm ~ factor(max_temp), data = .)),
         tss = map_dbl(aov, ~ sum(tidy(.)$sumsq)),
         drc.rss = map_dbl(ll3, ~ tidy(modelFit(.))$rss[2]),
         pseudo.R2 = 1 - (drc.rss / tss))

tibble(pseudo_R2 = 1 - (sum(calc.ss$drc.rss) / sum(calc.ss$tss))) %>%
  knitr::kable()
pseudo_R2
0.9055564

Save processed data to file

# Write tidy data to file
final %>%
  select(nursery, geno, max_temp, fvfm) %>%
  drop_na() %>%
  mutate(date = "2020-10") %>%
  write_csv("data/processed/crf_fvfm_adj_clean.csv")