knitr::opts_chunk$set(echo = TRUE, warning = FALSE, message = FALSE)
library(janitor)
library(lubridate)
library(scales)
library(readxl)
library(ggthemes)
library(drc)
library(broom)
library(tidyverse)
library(emmeans)
library(ggpubr)
library(joeyr)
CBASS temperature logs
Import CBASS log files
b1log <- read_csv("data/raw/20200820_KJSNSU_Acer/Log_Files/B1_LOG_0821.TXT")
b1log <- b1log %>%
filter(PrintDate != "PrintDate") %>%
mutate(date = as_date(Date, format = "%Y_%B_%d")) %>%
unite(time, Th, Tm, Ts, sep = ":") %>%
unite(dttm, date, time) %>%
mutate(dttm = ymd_hms(dttm),
hm = format(dttm, "%H:%M")) %>%
select(dttm, hm, T1SP, TempT1, T2SP, TempT2, T3SP, TempT3, T4SP, TempT4) %>%
pivot_longer(starts_with("T"), names_to = "key", values_to = "temp") %>%
filter(temp > 0, temp < 50) %>%
mutate(tank = str_extract(key, "T[0-9]"),
tank = paste0("B1", tank),
key = case_when(grepl("SP", key) ~ str_sub(key, 3, 4),
TRUE ~ str_sub(key, 1, 4))) %>%
pivot_wider(names_from = key, values_from = temp)
b2log <- read_csv("data/raw/20200820_KJSNSU_Acer/Log_Files/B2_LOG_0821.TXT")
b2log <- b2log %>%
filter(PrintDate != "PrintDate") %>%
mutate(date = as_date(Date, format = "%Y_%B_%d")) %>%
unite(time, Th, Tm, Ts, sep = ":") %>%
unite(dttm, date, time) %>%
mutate(dttm = ymd_hms(dttm),
hm = format(dttm, "%H:%M")) %>%
select(dttm, hm, T1SP, TempT1, T2SP, TempT2, T3SP, TempT3, T4SP, TempT4) %>%
pivot_longer(starts_with("T"), names_to = "key", values_to = "temp") %>%
filter(temp > 0, temp < 50) %>%
mutate(tank = str_extract(key, "T[0-9]"),
tank = paste0("B2", tank),
key = case_when(grepl("SP", key) ~ str_sub(key, 3, 4),
TRUE ~ str_sub(key, 1, 4))) %>%
pivot_wider(names_from = key, values_from = temp)
log <- bind_rows(b1log, b2log) %>%
mutate(tank = factor(tank),
SP = as.numeric(SP),
Temp = as.numeric(Temp))
Import CBASS set points (used for settings file)
sp <- read_xlsx("data/raw/20200820_KJSNSU_Acer/Temp_Profiles/CRII_ACERV_2020.xlsx") %>%
mutate(Time = format(Time, "%H:%M")) %>%
filter(Time >= "13:00", Time <= "22:00") %>%
crossing(date = as_date(c("2020-08-20", "2020-08-21"))) %>%
mutate(dttm = ymd_hm(paste(date, Time))) %>%
select(dttm, matches("B")) %>%
pivot_longer(-dttm, names_to = "tank", values_to = "Temp") %>%
arrange(dttm)
Plot CBASS temperature profiles with set points
ggplot(mapping = aes(x = dttm, y = Temp, group = tank)) +
geom_line(data = filter(log, hm > "12:50", hm < "21:50"),
aes(color = tank), lwd = 0.2) +
geom_step(data = sp, lwd = 0.2) +
facet_wrap(~date(dttm), scales = "free") +
scale_y_continuous(breaks = 30:37) +
scale_x_datetime(breaks = "hours", labels = label_date("%H:%M")) +
theme_hc() +
theme(legend.position = "none") +
labs(x = "Time of day", y = "Temperature (°C)")

# Calculate actual hold temperatures
actual_temps <- log %>%
filter(hm > "16:15" & hm < "19:05") %>%
group_by(tank_id = tank) %>%
summarise(max_temp = mean(Temp, na.rm = TRUE)) %>%
arrange(max_temp)
actual_temps
## # A tibble: 8 x 2
## tank_id max_temp
## <fct> <dbl>
## 1 B1T1 30.1
## 2 B1T2 31.0
## 3 B1T3 32.0
## 4 B1T4 33.0
## 5 B2T1 34.0
## 6 B2T2 34.9
## 7 B2T3 35.9
## 8 B2T4 36.9
PAM data
Turn each filtering and adjustment step on or off
# Initial pre-filtering (abnormally high, abnormally high w/ low Ft)
pre.filter <- T
# Post-model-fit filtering (high cook's distance outliers)
mod.filter <- T
# Adjust for positional effects
pos.adjust <- T
# Use actual max temperatures instead of target temperatures
temp.adjust <- T
Data pre-filtering and QC
dpii <- read_csv("data/raw/20200820_KJSNSU_Acer/PAM/KJS_pamdata_tidy.csv")
dpii <- dpii %>%
mutate(nursery = "nsu",
geno = paste0("kjs", Genotype)) %>%
select(nursery, geno, f, fm, fvfm, target_temp = Final_Temp, tank_id = Tank) %>%
drop_na(fvfm)
# Replace max_temp with actual max_temp calculated above
dpii <- dpii %>%
left_join(actual_temps)
# Identify points at high temperatures where low background fluorescence (f) resulted in abnormally high values of fvfm
out <- dpii %>%
filter(target_temp >= 36) %>%
drop_na(fvfm) %>%
mutate(mahal = tidy_mahalanobis(f, fvfm),
is_out = mahal > quantile(mahal, 0.75) & f < median(f) & fvfm > median(fvfm, na.rm = T))
# Plot showing those outliers
ggplot(out, aes(x = f, y = fvfm)) +
geom_point(aes(shape = is_out, color = factor(target_temp))) +
scale_shape_manual(values = c(19, 25))

df <- dpii %>%
# Save raw fvfm data in new column
mutate(fvfmraw = fvfm) %>%
# Merge with mahalanobis outlier info from above
left_join(out) %>%
# Identify problematic data points
mutate(problem = case_when(
fvfm > 0.75 ~ "abnormally high",
is_out ~ "abnormally high w/ low Ft",
TRUE ~ "none")) %>%
mutate(fvfm = case_when(
problem == "abnormally high" ~ NA_real_, # Change abnormally high values to NA
problem == "abnormally high w/ low Ft" ~ NA_real_,
problem == "none" ~ fvfm))
# Select pre-filtered or raw data to use moving forward based on set option
df <- if (pre.filter) {
df
} else {
dpii %>%
mutate(fvfmraw = fvfm,
problem = case_when(is.na(fvfm) ~ "no signal",
TRUE ~ "none") )
}
Adjust for positional effects
# Import maps of the position of each genotype in each tank
map1 <- read_csv("data/raw/20200820_KJSNSU_Acer/20200820_map.csv") %>%
mutate(across(.fns = as.character)) %>%
pivot_longer(-Row, values_to = "geno", names_to = "Col") %>%
drop_na() %>%
unite(pos, Row, Col, sep = "", remove = FALSE)
map2 <- read_csv("data/raw/20200820_KJSNSU_Acer/20200821_map.csv") %>%
pivot_longer(-Row, values_to = "geno", names_to = "Col") %>%
mutate(across(.fns = as.character)) %>%
drop_na() %>%
unite(pos, Row, Col, sep = "", remove = FALSE)
df_pos <- bind_rows(map1, map2) %>%
mutate(geno = paste0("kjs", geno)) %>%
full_join(df) %>%
# Create new cooler_side column
mutate(cooler_side = case_when(tank_id %in% c("B2T2", "B2T4", "B1T2", "B1T4") ~ "left",
tank_id %in% c("B2T1", "B2T3", "B1T1", "B1T3") ~ "right")) %>%
# Recode cooler column position as number of columns from center of cooler
mutate(cols_from_ctr = case_when(
cooler_side == "right" ~ recode(Col, "4" = 4, "3" = 3, "2" = 2, "1" = 1),
cooler_side == "left" ~ recode(Col, "4" = 1, "3" = 2, "2" = 3, "1" = 4))) %>%
# Recode cooler row position as number of rows from center of cooler
mutate(rows_from_ctr = case_when(Row == "A" ~ 3, Row == "B" ~ 2, Row == "C" ~ 1,
Row == "D" ~ 1, Row == "E" ~ 2, Row == "F" ~ 3))
# Visualize positional effects within each tank
ggplot(df_pos, aes(x = cols_from_ctr, y = fvfm, group = rows_from_ctr,
color = factor(rows_from_ctr), shape = factor(rows_from_ctr))) +
geom_point(alpha = 0.7) +
geom_smooth(method = "lm", se = FALSE) +
facet_grid(~target_temp)

# Fit model to adjust for positional effects
mod <- lm(fvfm ~ tank_id * cols_from_ctr * rows_from_ctr, data = df_pos, na.action = na.exclude)
anova(mod) %>% tidy() %>% knitr::kable()
tank_id |
7 |
12.9302741 |
1.8471820 |
343.2976813 |
0.0000000 |
cols_from_ctr |
1 |
0.0022620 |
0.0022620 |
0.4204005 |
0.5170124 |
rows_from_ctr |
1 |
0.0038104 |
0.0038104 |
0.7081568 |
0.4004282 |
tank_id:cols_from_ctr |
7 |
0.0604010 |
0.0086287 |
1.6036406 |
0.1317157 |
tank_id:rows_from_ctr |
7 |
0.0568088 |
0.0081155 |
1.5082692 |
0.1617473 |
cols_from_ctr:rows_from_ctr |
1 |
0.0038235 |
0.0038235 |
0.7106001 |
0.3996172 |
tank_id:cols_from_ctr:rows_from_ctr |
7 |
0.0143711 |
0.0020530 |
0.3815518 |
0.9132242 |
Residuals |
541 |
2.9109590 |
0.0053807 |
NA |
NA |
# Calculate estimated marginal means for the "center" of each tank
emms <- emmeans(mod, specs = c("tank_id", "cols_from_ctr", "rows_from_ctr")) %>%
as_tibble() %>%
select(tank_id, emmean)
# Adjust FvFm using partial residuals to account for positional effects within each tank
df_pos <- augment(mod, df_pos) %>%
mutate(resid = .resid) %>%
select(!starts_with("."))
df_pos <- left_join(emms, df_pos) %>%
mutate(fvfm.adj = emmean + resid) %>%
select(geno, f, fm, fvfm, emmean, resid, fvfm.adj, target_temp, max_temp, tank_id, cols_from_ctr, rows_from_ctr)
# Select whether to use adjusted or unadjusted fvfm data downstream
df <- if (pos.adjust) {
# If position adjustment turned on, join with adjusted fvfm values
left_join(df, df_pos) %>%
# Select adjusted fvfm to use in modeling below
select(nursery, geno, f, fm, fvfmraw, fvfm = fvfm.adj, target_temp, max_temp, problem)
} else {
# If position adjustment turned off, stick with original data
df %>% select(nursery, geno, f, fm, fvfmraw, fvfm, target_temp, max_temp, problem)
}
Dose-response curve model fitting
# Select whether to use target temp or actual recorded max temp for model fitting
df <- if (temp.adjust) {
df %>% select(-target_temp)
} else {
df %>% mutate(max_temp = target_temp) %>% select(-target_temp)
}
# Define function to fit 3-parameter LL model to data and return NULL if fitting error
ll3 <- function(data) {
drm(fvfm ~ max_temp, data = data,
fct = LL.3(names = c("hill", "max", "ED50")),
upperl = c(120, 0.72, 40),
lowerl = c(10, 0.55, 30))}
tryll3 <- possibly(ll3, otherwise = NULL)
# Fit model to each coral, get parameters, fitted values, and residuals
initmods <- df %>%
nest(data = c(max_temp, f, fm, fvfmraw, fvfm, problem)) %>%
# Fit the model to each coral
mutate(ll3 = map(data, tryll3)) %>%
# Get model parameters and fitted values/residuals
mutate(pars = map(ll3, tidy),
pred = map2(ll3, data, ~augment(.x, drop_na(.y, fvfm))))
# Extract ed50 parameter values from model fits
ed50 <- initmods %>%
select(nursery, geno, pars) %>%
unnest(pars) %>%
filter(term == "ED50")
# Collect raw data, fitted values, and diagnostics
vals <- initmods %>%
select(nursery, geno, pred) %>%
unnest(pred) %>%
full_join(ed50) %>%
full_join(df) %>%
rename(ed50 = estimate)
# Identify problematic data points based on cook's distance and residuals
counts <- vals %>%
group_by(nursery, geno) %>%
summarise(n = sum(!is.na(fvfm)))
dff <- vals %>%
left_join(counts) %>%
group_by(nursery, geno) %>%
mutate(resid.thresh = 2*sd(.resid, na.rm = T)) %>% # Calculate residual threshold as 2 standard deviations
mutate(cooksd.thresh = 4/n) %>% # Calculate cook's distance threshold as 4/n
mutate(max_to_remove = floor(n * 0.2)) %>%
ungroup() %>%
mutate(problem = case_when(.cooksd > cooksd.thresh & .resid > 0 ~ "high cook's distance",
.resid > resid.thresh ~ "high residual",
TRUE ~ problem)) %>%
group_by(nursery, geno, outlier = problem %in% c("high cook's distance", "high residual")) %>%
mutate(n.outliers = n(),
rank.out = order(.cooksd, decreasing = TRUE)) %>%
ungroup() %>%
mutate(fvfm = case_when(outlier & rank.out <= max_to_remove ~ NA_real_,
TRUE ~ fvfm))
# Refit models without problematic points
fmods <- dff %>%
select(nursery, geno, max_temp, f, fm, fvfmraw, problem, fvfm) %>%
nest(data = c(max_temp, f, fm, fvfmraw, fvfm, problem)) %>%
# Fit the model to each coral
mutate(ll3 = map(data, tryll3)) %>%
# Get model parameters and fitted values/residuals
mutate(pars = map(ll3, tidy),
pred = map2(ll3, data, ~augment(.x, drop_na(.y, fvfm))))
# Extract ed50 parameter values from model fits
fed50 <- fmods %>%
select(nursery, geno, pars) %>%
unnest(pars) %>%
filter(term == "ED50")
# Collect raw data, fitted values, and ed50 estimates
fvals <- fmods %>%
select(nursery, geno, pred) %>%
unnest(pred) %>%
full_join(fed50) %>%
full_join(select(dff, nursery, geno, max_temp, f, fm, fvfmraw, problem, fvfm)) %>%
rename(ed50 = estimate)
Plot fits
# Select whether to use model-based filtering or not
final <- if (mod.filter) { fvals } else { vals }
# Plot fits
plotfits(data = final)

Summarize data filtering
final %>%
count(problem) %>%
mutate(freq = n / sum(n)) %>%
knitr::kable()
abnormally high |
8 |
0.0135823 |
abnormally high w/ low Ft |
8 |
0.0135823 |
high cook’s distance |
36 |
0.0611205 |
high residual |
1 |
0.0016978 |
none |
536 |
0.9100170 |
# Calculate pseudo R2 across dataset of the fitted models with given filtering options
finalmods <- if (mod.filter) { fmods } else { initmods }
calc.ss <- finalmods %>%
mutate(aov = map(data, ~ aov(fvfm ~ factor(max_temp), data = .)),
tss = map_dbl(aov, ~ sum(tidy(.)$sumsq)),
drc.rss = map_dbl(ll3, ~ tidy(modelFit(.))$rss[2]),
pseudo.R2 = 1 - (drc.rss / tss))
tibble(pseudo_R2 = 1 - (sum(calc.ss$drc.rss) / sum(calc.ss$tss))) %>%
knitr::kable()
Save processed data to file
# Write tidy data to file
final %>%
select(nursery, geno, max_temp, fvfm) %>%
drop_na() %>%
mutate(date = "2020-08") %>%
write_csv("data/processed/KJS_fvfm_adj_clean.csv")