knitr::opts_chunk$set(echo = TRUE, warning = FALSE, message = FALSE)
CBASS temperature logs
Import temperature logs for Arduino-controlled tanks
readlog <- function(logfile, prefix) {
read_csv(logfile) %>%
# Remove internal header rows
filter(PrintDate != "PrintDate") %>%
# Format date and time
mutate(date = as_date(Date, format = "%Y_%B_%d")) %>%
unite(time, Th, Tm, Ts, sep = ":") %>%
unite(dttm, date, time) %>%
mutate(dttm = ymd_hms(dttm), hm = format(dttm, "%H:%M")) %>%
select(dttm, hm, T1SP, TempT1, T2SP, TempT2, T3SP, TempT3, T4SP, TempT4) %>%
# Pivot to long format
pivot_longer(starts_with("T"), names_to = "key", values_to = "temp") %>%
# Remove rows where temp sensors gave error codes
filter(temp > 0, temp < 50) %>%
# Link arduino box and sensor positions (e.g., B1T2)
mutate(tank = str_extract(key, "T[0-9]"),
tank = paste0(prefix, tank),
key = case_when(grepl("SP", key) ~ str_sub(key, 3, 4),
TRUE ~ str_sub(key, 1, 4)),
key = tolower(key)) %>%
# Drop rows that did not parse a date/time
drop_na(dttm, temp) %>%
# Create columns for set point and actual temperature
pivot_wider(names_from = key, values_from = temp) %>%
# Tidy column types
mutate(tank = factor(tank), sp = as.numeric(sp), temp = as.numeric(temp)) %>%
# Add maximum setpoint temperature as max_temp column
group_by(tank) %>%
mutate(max_temp = max(sp, na.rm = TRUE)) %>%
ungroup()
}
# Read in arduino log files
b1log <- readlog("data/raw/20201013_CRF_Acer/temperature/sdlog/20201013_BOX1_LOG.TXT", prefix = "B1")
b3log <- readlog("data/raw/20201013_CRF_Acer/temperature/sdlog/20201013_BOX3_LOG.TXT", prefix = "B3")
### NEED BOX2 DATA FROM 10/13
# Tidy arduino log files for this run
arduinolog <- bind_rows(b1log, b3log) %>%
# Filter to just the date of this run
filter(date(dttm) == "2020-10-13") %>%
# Specify which arduino boxes controlled port vs. starboard tank sets
mutate(tank_set = if_else(str_detect(tank, "B1"), "starboard", "port")) %>%
# Add tank_id column from tank design
left_join(read_csv("data/tank_setup.csv")) %>%
select(dttm, hm, sp, temp, max_temp, tank_set, tank_id)
Import HOBO temperature logs for Inkbird-controlled tanks
# Read in HOBO temperature logs
hobologfiles <- list.files("data/raw/20201013_CRF_Acer/temperature/hobo", pattern = ".xlsx", full.names = TRUE)
hobolog <- tibble(filename = hobologfiles) %>%
mutate(log = map(filename, read_xlsx, skip = 1)) %>%
unnest()
# Tidy HOBO temperature logs
hobolog <- hobolog %>%
clean_names() %>%
# Get tank_id from filename
mutate(tank_id = str_sub(basename(filename), 1, 2)) %>%
# Convert temps from F to C
mutate(temp = (temp_f - 32) * 5/9) %>%
# Select columns to keep
select(dttm = date_time_gmt_0400, temp, tank_id) %>%
mutate(hm = format(dttm, "%H:%M")) %>%
# Join with tank_id information
left_join(read_csv("data/tank_setup.csv")) %>%
# Filter only target date
filter(date(dttm) == "2020-10-13")
# Import inkbird setpoints
inkbird_sp <- read_csv("data/program_files/inkbird_settings.csv") %>%
pivot_longer(2:5, names_to = "max_temp", values_to = "sp") %>%
mutate(max_temp = as.numeric(str_extract(max_temp, "[0-9]+")),
dttm = ymd_hms(paste("2020-10-13", time)), hm = format(dttm, "%H:%M"),
temp = NA) %>%
left_join(read_csv("data/tank_setup.csv")) %>%
select(dttm, hm, sp, temp, max_temp, tank_set, tank_id)
hobolog <- bind_rows(inkbird_sp, hobolog)
Plot CBASS temperature profiles and set points
# Combine arduino and hobo temperature log data
templog <- bind_rows(arduinolog, hobolog)
# Import hobo offset data
hobocal <- read_csv("data/processed/hobo_calibrations.csv")
templog <- left_join(templog, hobocal) %>%
mutate(temp.adj = case_when(!is.na(hobo) ~ temp - mean_offset, TRUE ~ temp)) %>%
select(dttm, hm, sp, temp = temp.adj, max_temp, tank_set, tank_id)
# Plot
ggplot(templog, mapping = aes(x = dttm, y = temp, group = tank_id)) +
geom_line(aes(color = tank_id), lwd = 0.2) +
geom_step(aes(y = sp, color = tank_id), lwd = 0.5) +
facet_wrap(~ tank_set, scales = "free") +
scale_y_continuous(breaks = 30:38, limits = c(29, 39)) +
scale_x_datetime(breaks = "hours", labels = label_date("%H:%M"),
limits = as.POSIXct(c("2020-10-13 12:00:00", "2020-10-13 21:00:00"), tz = "UTC")) +
theme_hc() +
theme(legend.position = "none", axis.text.x = element_text(angle = 45, hjust = 1)) +
labs(x = "Time of day", y = "Temperature (°C)")

# Calculate actual hold temperatures
actual_temps <- templog %>%
filter(hm > "16:15" & hm < "19:05") %>%
group_by(tank_id) %>%
summarise(max_temp = mean(temp, na.rm = TRUE)) %>%
arrange(max_temp)
# Combine with tank metadata
tanks <- read_csv("data/tank_setup.csv")
actual_temps %>%
full_join(tanks, by = "tank_id") %>%
select(target_temp = max_temp.y, max_temp = max_temp.x, tank_set) %>%
pivot_wider(names_from = target_temp, values_from = max_temp) %>%
knitr::kable(digits = 2)
port |
30.36 |
32.48 |
33.52 |
34.26 |
34.99 |
35.97 |
36.97 |
37.80 |
starboard |
30.49 |
32.51 |
33.16 |
34.52 |
34.95 |
35.97 |
36.88 |
37.91 |
PAM data
Turn each filtering and adjustment step on or off
# Initial pre-filtering (abnormally high, abnormally high w/ low Ft)
pre.filter <- TRUE
# Post-model-fit filtering (high cook's distance outliers)
mod.filter <- TRUE
# Adjust for positional effects
pos.adjust <- TRUE
# Use actual max temperatures instead of target temperatures
temp.adjust <- TRUE
#### Import data from DIVING PAM II
# Read manually entered data from Diving PAM II
path <- "data/raw/20201013_CRF_Acer/PAM/20201013dii_Acer.xlsx"
dpii <- path %>%
excel_sheets() %>%
set_names() %>%
discard(.p = ~str_detect(.x, "Acer")) %>%
map_df(~ read_excel(path = path, sheet = .x, range = "A1:H45", col_types = "text"), .id = "tank_id") %>%
mutate(f = as.numeric(`F`), fm = as.numeric(`Fm'`), fvfm = as.numeric(`Y (II)`)) %>%
select(tank_id, Geno, f, fm, fvfm) %>%
clean_names() %>%
mutate(geno = str_replace(geno, "-", ""),
geno = str_replace(geno, " ", ""))
# Combine with tank_id metadata
dpii <- full_join(dpii, tanks) %>%
select(geno, f, fm, fvfm, tank_id, tank_set, target_temp = max_temp, cooler_side)
# Replace max_temp with actual max_temp calculated above
dpii <- dpii %>%
left_join(actual_temps)
Data pre-filtering and QC
dpii <- dpii %>%
mutate(nursery = "crf") %>%
select(nursery, geno, f, fm, fvfm, target_temp, max_temp, tank_id, tank_set, cooler_side)
# Identify points at high temperatures where low background fluorescence (f) resulted in abnormally high values of fvfm
out <- dpii %>%
filter(target_temp >= 36) %>%
drop_na(fvfm) %>%
mutate(mahal = tidy_mahalanobis(f, fvfm),
is_out = mahal > median(mahal) & f < median(f) & fvfm > median(fvfm, na.rm = T))
# Plot showing those outliers
ggplot(out, aes(x = f, y = fvfm)) +
geom_point(aes(shape = is_out, color = factor(target_temp))) +
scale_shape_manual(values = c(19, 25))

# Filter fvfm values based on various outlier classifications
df <- dpii %>%
# Save raw fvfm in new column
mutate(fvfmraw = fvfm) %>%
# Merge with mahalanobis outlier info from above
left_join(out) %>%
# Identify problematic data points
mutate(problem = case_when(
is.na(fvfm) ~ "no signal",
fvfm > 0.750 ~ "abnormally high",
is_out ~ "abnormally high w/ low Ft",
TRUE ~ "none")) %>%
mutate(fvfm = case_when(
problem == "abnormally high" ~ NA_real_, # Change abnormally high values to NA
problem == "abnormally high w/ low Ft" ~ NA_real_,
problem == "none" ~ fvfm))# %>%
#select(nursery, geno, f, fm, fvfm, target_temp, max_temp, tank_id, tank_set, cooler_side, fvfmraw, problem)
# Select pre-filtered or raw data to use moving forward based on set option
df <- if (pre.filter) {
df
} else {
dpii %>%
mutate(fvfmraw = fvfm,
problem = case_when(is.na(fvfm) ~ "no signal",
TRUE ~ "none") )
}
Positional effects
# Import maps of the position of each genotype in each tank
Pmap <- read_csv("data/raw/20201013_CRF_Acer/P_map.csv") %>%
pivot_longer(-Row, values_to = "geno", names_to = "Col") %>%
drop_na() %>%
unite(pos, Row, Col, sep = "", remove = FALSE)
Smap <- read_csv("data/raw/20201013_CRF_Acer/S_map.csv") %>%
pivot_longer(-Row, values_to = "geno", names_to = "Col") %>%
drop_na() %>%
unite(pos, Row, Col, sep = "", remove = FALSE)
df_pos <- bind_rows(Pmap, Smap) %>%
full_join(df) %>%
# Create new cooler_id column
mutate(cooler = case_when(tank_id %in% c("P1", "P2") ~ "p_cooler_1",
tank_id %in% c("P3", "P4") ~ "p_cooler_2",
tank_id %in% c("P5", "P6") ~ "p_cooler_3",
tank_id %in% c("P7", "P8") ~ "p_cooler_4",
tank_id %in% c("S1", "S2") ~ "s_cooler_1",
tank_id %in% c("S3", "S4") ~ "s_cooler_2",
tank_id %in% c("S5", "S6") ~ "s_cooler_3",
tank_id %in% c("S7", "S8") ~ "s_cooler_4")) %>%
# Recode cooler column position as number of columns from center of cooler
mutate(cols_from_ctr = case_when(
cooler_side == "right" ~ recode(Col, "5" = 4, "4" = 4, "3" = 3, "2" = 2, "1" = 1),
cooler_side == "left" ~ recode(Col, "5" = 1, "4" = 1, "3" = 2, "2" = 3, "1" = 4))) %>%
# Recode cooler row position as number of rows from center of cooler
mutate(rows_from_ctr = case_when(Row == "A" ~ 3, Row == "B" ~ 2, Row == "C" ~ 1,
Row == "D" ~ 1, Row == "E" ~ 2, Row == "F" ~ 3))
# Visualize positional effects within each tank
ggplot(df_pos, aes(x = cols_from_ctr, y = fvfm, group = rows_from_ctr,
color = factor(rows_from_ctr), shape = factor(rows_from_ctr))) +
geom_point(alpha = 0.7) +
geom_smooth(method = "lm", se = FALSE) +
facet_grid(tank_set~target_temp)

# Fit model to adjust for positional effects
mod <- lm(fvfm ~ tank_id * cols_from_ctr * rows_from_ctr, data = df_pos, na.action = na.exclude)
anova(mod) %>% tidy() %>% knitr::kable()
tank_id |
15 |
15.2085797 |
1.0139053 |
258.9246355 |
0.0000000 |
cols_from_ctr |
1 |
0.0707014 |
0.0707014 |
18.0552655 |
0.0000250 |
rows_from_ctr |
1 |
0.0321874 |
0.0321874 |
8.2198050 |
0.0042959 |
tank_id:cols_from_ctr |
15 |
0.0762186 |
0.0050812 |
1.2976142 |
0.1980132 |
tank_id:rows_from_ctr |
15 |
0.0472791 |
0.0031519 |
0.8049217 |
0.6727092 |
cols_from_ctr:rows_from_ctr |
1 |
0.0000829 |
0.0000829 |
0.0211652 |
0.8843809 |
tank_id:cols_from_ctr:rows_from_ctr |
15 |
0.0454020 |
0.0030268 |
0.7729650 |
0.7083951 |
Residuals |
573 |
2.2437716 |
0.0039158 |
NA |
NA |
# Calculate estimated marginal means for the "center" of each tank
emms <- emmeans(mod, specs = c("tank_id", "cols_from_ctr", "rows_from_ctr")) %>%
as_tibble() %>%
select(tank_id, emmean)
# Adjust FvFm using partial residuals to account for positional effects within each tank
df_pos <- augment(mod, df_pos) %>%
mutate(resid = .resid) %>%
select(!starts_with("."))
df_pos <- left_join(emms, df_pos) %>%
mutate(fvfm.adj = emmean + resid) %>%
select(geno, f, fm, fvfm, emmean, resid, fvfm.adj, target_temp, max_temp, tank_id, tank_set, cooler, cols_from_ctr, rows_from_ctr)
# Select whether to use adjusted or unadjusted fvfm data downstream
df <- if (pos.adjust) {
# If position adjustment turned on, join with adjusted fvfm values
left_join(df, df_pos) %>%
# Select adjusted fvfm to use in modeling below
select(nursery, geno, f, fm, fvfmraw, fvfm = fvfm.adj, target_temp, max_temp, problem)
} else {
# If position adjustment turned off, stick with original data
df %>% select(nursery, geno, f, fm, fvfmraw, fvfm, target_temp, max_temp, problem)
}
Dose-response curve model fitting
# Select whether to use target temp or actual recorded max temp for model fitting
df <- if (temp.adjust) {
df %>% select(-target_temp)
} else {
df %>% mutate(max_temp = target_temp) %>% select(-target_temp)
}
# Define function to fit 3-parameter LL model to data and return NULL if fitting error
ll3 <- function(data) {
drm(fvfm ~ max_temp, data = data,
fct = LL.3(names = c("hill", "max", "ED50")),
upperl = c(120, 0.72, 40),
lowerl = c(10, 0.55, 30))}
tryll3 <- possibly(ll3, otherwise = NULL)
# Fit model to each coral, get parameters, fitted values, and residuals
initmods <- df %>%
nest(data = c(max_temp, f, fm, fvfmraw, fvfm, problem)) %>%
# Fit the model to each coral
mutate(ll3 = map(data, tryll3)) %>%
# Get model parameters and fitted values/residuals
mutate(pars = map(ll3, tidy),
pred = map2(ll3, data, ~augment(.x, drop_na(.y, fvfm))))
# Extract ed50 parameter values from model fits
ed50 <- initmods %>%
select(nursery, geno, pars) %>%
unnest(pars) %>%
filter(term == "ED50")
# Collect raw data, fitted values, and diagnostics
vals <- initmods %>%
select(nursery, geno, pred) %>%
unnest(pred) %>%
full_join(ed50) %>%
full_join(df) %>%
rename(ed50 = estimate)
# Identify problematic data points based on cook's distance and residuals
counts <- vals %>%
group_by(nursery, geno) %>%
summarise(n = sum(!is.na(fvfm)))
dff <- vals %>%
left_join(counts) %>%
group_by(nursery, geno) %>%
mutate(resid.thresh = 2*sd(.resid, na.rm = T)) %>% # Calculate residual threshold as 2 standard deviations
mutate(cooksd.thresh = 4/n) %>% # Calculate cook's distance threshold as 4/n
mutate(max_to_remove = floor(n * 0.2)) %>%
ungroup() %>%
mutate(problem = case_when(.cooksd > cooksd.thresh & .resid > 0 ~ "high cook's distance",
.resid > resid.thresh ~ "high residual",
TRUE ~ problem)) %>%
group_by(nursery, geno, outlier = problem %in% c("high cook's distance", "high residual")) %>%
mutate(n.outliers = n(),
rank.out = order(.cooksd, decreasing = TRUE)) %>%
ungroup() %>%
mutate(fvfm = case_when(outlier & rank.out <= max_to_remove ~ NA_real_,
TRUE ~ fvfm))
# Refit models without problematic points
fmods <- dff %>%
select(nursery, geno, max_temp, f, fm, fvfmraw, problem, fvfm) %>%
nest(data = c(max_temp, f, fm, fvfmraw, fvfm, problem)) %>%
# Fit the model to each coral
mutate(ll3 = map(data, tryll3)) %>%
# Get model parameters and fitted values/residuals
mutate(pars = map(ll3, tidy),
pred = map2(ll3, data, ~augment(.x, drop_na(.y, fvfm))))
# Extract ed50 parameter values from model fits
fed50 <- fmods %>%
select(nursery, geno, pars) %>%
unnest(pars) %>%
filter(term == "ED50")
# Collect raw data, fitted values, and ed50 estimates
fvals <- fmods %>%
select(nursery, geno, pred) %>%
unnest(pred) %>%
full_join(fed50) %>%
full_join(select(dff, nursery, geno, max_temp, f, fm, fvfmraw, problem, fvfm)) %>%
rename(ed50 = estimate)
Plot fits
# Select whether to use model-based filtering or not
final <- if (mod.filter) { fvals } else { vals }
# Plot fits
plotfits(data = final)

Summarize data filtering
final %>%
count(problem) %>%
mutate(freq = n / sum(n)) %>%
knitr::kable()
abnormally high w/ low Ft |
31 |
0.0440341 |
high cook’s distance |
36 |
0.0511364 |
high residual |
1 |
0.0014205 |
no signal |
36 |
0.0511364 |
none |
600 |
0.8522727 |
# Calculate pseudo R2 across dataset of the fitted models with given filtering options
finalmods <- if (mod.filter) { fmods } else { initmods }
calc.ss <- finalmods %>%
mutate(aov = map(data, ~ aov(fvfm ~ factor(max_temp), data = .)),
tss = map_dbl(aov, ~ sum(tidy(.)$sumsq)),
drc.rss = map_dbl(ll3, ~ tidy(modelFit(.))$rss[2]),
pseudo.R2 = 1 - (drc.rss / tss))
tibble(pseudo_R2 = 1 - (sum(calc.ss$drc.rss) / sum(calc.ss$tss))) %>%
knitr::kable()
Save processed data to file
# Write tidy data to file
final %>%
select(nursery, geno, max_temp, fvfm) %>%
drop_na() %>%
mutate(date = "2020-10") %>%
write_csv("data/processed/crf_fvfm_adj_clean.csv")