This document analyzes thermal tolerance metrics (ED50) derived from each coral genotype measured at each nursery on the R/V Coral Reef II in the summer and fall of 2020. ED50 values are analyzed for differences among nurseries, and effects of source colony location and temperature regime. For a subset of corals, ED50 values are compared to values obtained from the same genets at the same nurseries in spring 2020, in a separate, independent set of CBASS assays.

# Import cleaned datasets from each nursery
clean_data_files <- list.files(path = "data/processed", 
                               pattern = "*_clean.csv", 
                               full.names = TRUE)
clean_data <- clean_data_files %>% map_dfr(read_csv)

# Import genotype metadata
gmd <- read_csv("data/genotype_metadata_raw.csv") %>%
  mutate(nursery = tolower(nursery),
         across(everything(), str_squish),
         across(ends_with("lat"), ~signif(parse_lat(.), 6)),             # parse latitude
         across(ends_with("lon"), ~signif(parse_lon(.), 6))) %>%        # parse longitude
  mutate(ugeno = gsub(" ", "", ugeno)) %>%
  select(nursery, name = geno, source_lat, source_lon, MMM_5km)

# Import names key
gkey <- read_csv("data/genotype_name_key.csv") %>%
  mutate(alt0 = geno) %>%
  pivot_longer(starts_with("alt"), names_to = "alt", values_to = "name") %>%
  drop_na() %>%
  select(geno, name)

# Combine cleaned data with genotype metadata
df.all <- clean_data %>%
  rename(name = geno) %>%
  left_join(gkey) %>%
  select(nursery, geno, name, max_temp, fvfm, date) %>%
  left_join(gmd) %>%
  # Order nurseries from south to north
  mutate(nursery = factor(nursery, levels = c("mote", "fwc", "rrt", "crf", "um", "nsu"))) %>%
  select(nursery, name, geno, date, max_temp, fvfm, source_lat, source_lon, MMM_5km)

# Define dose-response curves using same constraints
ll3 <- function(data) {
  drm(fvfm ~ max_temp, data = data, 
      fct = LL.3(names = c("hill", "max", "ED50")),
      upperl = c(120, 0.72, 40),
      lowerl = c(10, 0.55, 30))}

# Fit model to each coral, get parameters, fitted values, and residuals
mods <- df.all %>%
  nest(data = c(max_temp, fvfm)) %>%
  # Fit the model to each coral
  mutate(ll3 = map(data, ll3)) %>%
  # Get model parameters and fitted values/residuals
  mutate(pars = map(ll3, tidy),
         pred = map2(ll3, data, ~augment(.x, drop_na(.y, fvfm))))

# Extract parameter values from model fits
res.all <- mods %>%
  unnest(pars) %>%
  pivot_wider(names_from = term, 
              values_from = c(estimate, std.error, statistic, p.value)) %>%
  clean_names() %>%
  select(!where(is.list), -curve)

# Filter to analyze only CBASS runs from Coral Reef II in August and October
res <- res.all %>%
  filter(date %in% c("2020-08", "2020-10"))

# Recode nursery abbreviations
res <- res %>%
  mutate(nursery = recode(nursery, mote = "MML", fwc = "FWC", rrt = "RR", crf = "CRF", um = "UM", nsu = "NSU"))

How many coral colonies and unique genotypes were tested?

res %>%
  summarize(n_colonies = n_distinct(nursery, name),
            n_genotypes = n_distinct(geno)) %>%
  knitr::kable()

n_colonies	n_genotypes
229	175

Number of unique genotypes shared across nurseries

mat <- res %>%
  distinct(nursery, geno) %>%
  mutate(present = TRUE) %>%
  complete(nursery, geno, fill = list(present = FALSE)) %>%         # Complete implicit missing values
  pivot_wider(names_from = "nursery", values_from = "present") %>%
  mutate(n_nurs = rowSums(.[-1]))


library(ggvenn)  # remotes::install_github("yanlinlin82/ggvenn")

figs1 <- ggvenn(
  mat[,2:5], 
  show_percentage = FALSE,
  fill_color = c("#F8766D", "#B79F00", "#00BA38", "#00BFC4", "#619CFF", "#F564E3"),
  stroke_size = 0.5, set_name_size = 4
)
figs1
ggsave("output/figS1.png", width = 90, height = 90, units = "mm")

What is the range in thermal tolerance across all colonies?

# Basic histogram
# ggplot(res, aes(x = estimate_ed50)) + 
#   geom_histogram(binwidth = 0.1) +
#   labs(x = "ED50 Tolerance (°C)", y = "Number of genotypes",
#        title = "Histogram of ED50 values for all corals") +
#   theme_few()

# Histogram colored by nursery with density plot
hist1 <- res %>%
  ggplot(aes(x = estimate_ed50)) + 
  geom_histogram(aes(fill = nursery), binwidth = 0.1, alpha = 0.7, position = position_stack(reverse = TRUE)) +
  geom_density(aes(y = ..count.. * 0.1)) +
  theme_few() +
  theme(text = element_text(size = 10)) +
  theme(legend.position = "none", legend.key.size = unit(0.25, "cm")) +
  labs(x = "ED50 (°C)", y = "Count") +
  guides(fill = guide_legend(reverse = TRUE)) + 
  xlim(qnorm(c(0.005, 0.9999), mean(res$estimate_ed50), sd(res$estimate_ed50)))

# Remove nursery effect
mod <- lm(estimate_ed50 ~ nursery, data = res)
res.adj <- augment(mod, data = res) %>%
  mutate(ed50_adj = mean(res$estimate_ed50) + .resid)

# Get mean and sd from adjusted distribution
meanadj <- mean(res.adj$ed50_adj)
sdadj <- sd(res.adj$ed50_adj)

hist2 <- ggplot(res.adj, aes(x = ed50_adj)) + 
  geom_histogram(aes(y = ..count../sum(..count..)), binwidth = 0.1, alpha = 0.4) +
  stat_function(fun = ~ dnorm(.x, meanadj, sdadj) * 0.1) +
  geom_vline(aes(xintercept = meanadj)) +
  geom_segment(aes(x = meanadj, xend = meanadj + sdadj, 
                   y = dnorm(meanadj + sdadj, meanadj, sdadj) * 0.1,
                   yend = dnorm(meanadj + sdadj, meanadj, sdadj) * 0.1),
               arrow = arrow(length = unit(0.2,"cm"), ends = "both"), lwd = 0.1) +
  xlim(qnorm(c(0.00001, 0.99999), meanadj, sdadj)) +
  scale_y_continuous(labels = scales::percent_format(accuracy = 1)) +
  annotate("text", x = meanadj - 0.1, y = 0.125, adj = 1,
           label = paste0("mean = ", round(meanadj, 2)), size = 2) +
  annotate("text", x = meanadj + sdadj + 0.1, 
           y = dnorm(meanadj + sdadj, meanadj, sdadj) * 0.1, 
           adj = 0, label = paste0("s.d. = ", round(sdadj, 2)), size = 2) +
  theme_few() +
  theme(text = element_text(size = 10)) +
  labs(x = "ED50 (°C) adjusted", y = "Percentage")


# Combine histograms into multi-panel figure
fig2 <- cowplot::plot_grid(
  hist1, 
  hist2, labels = "AUTO")
fig2

# Summarize original distribution
tidy(summary(res$estimate_ed50)) %>%
  knitr::kable(caption = "Summary of unadjusted ED50 values")

Summary of unadjusted ED50 values
minimum	q1	median	mean	q3	maximum
35.05274	35.65659	35.91761	35.98276	36.26004	37.61749

# Is adjusted distribution normal?
tidy(shapiro.test(res.adj$ed50_adj)) %>% 
  knitr::kable(caption = "Normality test for adjusted ED50 values")

Normality test for adjusted ED50 values
statistic	p.value	method
0.9871973	0.0381158	Shapiro-Wilk normality test

#ggsave("output/fig2.png", width = 172, height = 75, units = "mm")

After adjusting for variation due to nursery location (i.e., environment), ED50 values are approximately normally distributed, with a mean of 35.98°C and standard deviation of 0.38°C. This is the variance due to genetic effects (and measurement error).

ED50 ± standard error for all colonies

res %>%
  mutate(nursname = interaction(nursery, name),
         nursname = fct_reorder(nursname, estimate_ed50)) %>%
  ggplot(aes(x = nursname, y = estimate_ed50, color = nursery)) +
  geom_errorbar(aes(ymin = estimate_ed50 - std_error_ed50, ymax = estimate_ed50 + std_error_ed50)) +
  geom_point() +
  facet_wrap(~nursery, scales = "free_x") +
  theme_few() +
  theme(legend.position = "none",
        axis.text.x = element_blank()) +
  labs(x = "Colony", y = "ED50 ± SE (°C)") +
  coord_cartesian(ylim = c(34.75, 37.75))

How does thermal tolerance vary across nurseries?

# Test for equal variance across nurseries
car::leveneTest(estimate_ed50 ~ nursery, data = res) %>%
  tidy() %>%
  knitr::kable(caption = "Levene's test for homogeneity of variance")

Levene’s test for homogeneity of variance
term	df	statistic	p.value
group	5	4.153644	0.0012554
	223	NA	NA

## Unequal variance among nurseries -- use Welch ANOVA
welch <- oneway.test(estimate_ed50 ~ nursery, data = res)
tidy(welch) %>% knitr::kable(caption = "Welch ANOVA table")

Welch ANOVA table
num.df	den.df	statistic	p.value	method
5	102.399	28.30402	0	One-way analysis of means (not assuming equal variances)

# Variance explained by nursery
omega.squared <- function(WelchF, df, N) df * (WelchF - 1) / (df * (WelchF - 1) + N)
tibble(omega.squared = omega.squared(
  WelchF = welch$statistic[[1]], df = welch$parameter[[1]], N = nrow(res))) %>%
  knitr::kable(caption = "Variance explained by nursery")

Variance explained by nursery
omega.squared
0.3734955

# Get mean and sd by nursery for plotting
nurs.stats <- res %>%
  group_by(nursery) %>%
  summarise(mean = mean(estimate_ed50), sd = sd(estimate_ed50), n = n()) %>%
  ungroup()

# Median pairwise difference between nurseries
mpwdiff <- median(dist(nurs.stats$mean))
mpwdiff %>%
  knitr::kable(caption = "Median pairwise difference between nurseries (°C)")

Median pairwise difference between nurseries (°C)
x
0.3180534

# Use pairwise wilcox to test for differences in ED50 among nurseries
test <- pairwise.wilcox.test(res$estimate_ed50, res$nursery, p.adjust.method = "bonf")
pvals <- tibble(reshape2::melt(test$p.value)) %>% drop_na() %>%
  mutate(Var1 = factor(Var1, levels = levels(fct_reorder(nurs.stats$nursery, nurs.stats$mean)))) %>%
  arrange(Var1)
stats <- setNames(pull(pvals[,3]), paste(pvals$Var1, pvals$Var2, sep = "-")) %>%
  multcompLetters(threshold = 0.01)
lett <- enframe(stats$monospacedLetters, name = "nursery", value = "group")
nurs.stats <- left_join(nurs.stats, lett)


# Raincloud plot
source("https://raw.githubusercontent.com/datavizpyr/data/master/half_flat_violinplot.R")
nurs.fig <- ggplot(res, aes(x = nursery, y = estimate_ed50, fill = nursery, group = nursery)) +
  geom_hline(aes(yintercept = mean(estimate_ed50)), lty = 2, col = "gray") +
  geom_flat_violin(position = position_nudge(x = 0.2, y = 0), 
                   adjust = 1, width = 2, alpha = 0.7, lwd = 0) +
  geom_boxplot(position = position_nudge(x = 0.2, y = 0), width = 0.1,
               outlier.shape = NA, alpha = 1, lwd = 0.25) +
  geom_point(aes(color = nursery), alpha = 0.7, size = 1,
             position = position_jitter(width = 0.075)) +
  geom_point(data = nurs.stats, aes(x = nursery, y = mean), inherit.aes = FALSE,
             position = position_nudge(x = 0, y = 0), size = 1, pch = 5) +
  geom_errorbar(data = nurs.stats, inherit.aes = FALSE, width = 0, lwd = 0.25,
                aes(x = nursery, y = mean, ymin = mean - sd, ymax = mean + sd),
                position = position_nudge(x = 0, y = 0)) +
  geom_text(data = nurs.stats, aes(y = 37.5, label = group), size = 2) +
  geom_text(data = nurs.stats, aes(y = 37.5, label = paste("n=", n)), nudge_x = 0.2, size = 2) +
  coord_flip() +
  scale_size(range = c(0.5, 3)) +
  scale_shape_manual(values = c(21, 22, 23, 24, 25)) +
  theme_few() +
  theme(text = element_text(size = 10),
        axis.title.y = element_blank()) +
  theme(legend.position = "none") +
  labs(y = "ED50 (°C)", x = NA)
nurs.fig

#ggsave("output/fig3.png", width = 120, height = 100, units = "mm")

There is variation among nurseries in average ED50 values. This could be due to different environmental conditions, and/or different set of genotypes present, and/or confounding experimental effects. However, most of the variation in thermal tolerance is found within nurseries rather than between them. This means that each nursery tends to have genets with a wide range of thermal tolerance phenotypes.

# 3-panel Figure 2   
fig2 <- cowplot::plot_grid(hist1, nurs.fig, hist2, ncol = 3, labels = "AUTO")
ggsave("output/fig2.png", plot = fig2, width = 180, height = 60, units = "mm")

How does thermal tolerance vary for the same genotype grown at different nurseries?

Plot ED50s for each genotype

# Get ED50s (adjusted for nursery effect) for genotypes present at multiple nurseries
multi.adj <- res.adj %>%
  # Filter genotypes present at more than one nursery
  group_by(geno) %>%
  filter(n_distinct(nursery) > 1) %>%
  # If genotype run more than once at a nursery, use mean ed50 within nursery
  group_by(geno, nursery) %>%
  summarize(ed50_adj = mean(ed50_adj)) %>%
  ungroup()
# Reorder genotypes based on mean ed50 across nurseries
multi.adj <- multi.adj %>%
  group_by(geno) %>%
  summarize(geno_mean_ed50 = mean(ed50_adj)) %>%
  right_join(multi.adj) %>%
  mutate(geno = fct_reorder(geno, geno_mean_ed50))
  

# Plot ED50adj by genotype
ggplot(multi.adj, aes(x = geno, y = ed50_adj, color = nursery, shape = nursery, group = geno)) + 
  geom_point(size = 1.5, alpha = 0.75) + 
  geom_point(aes(y = geno_mean_ed50), pch = 1, color = "black", stroke = 0.25, size = 2)+
  stat_summary(fun.data = mean_se, geom = "errorbar", color = "black", lwd = 0.25, width = 0.25) +
  theme_few() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1),
        axis.text = element_text(size = 8),
        legend.position = c(0.15, 0.75),
        legend.text = element_text(size = 6),
        legend.title = element_text(size = 8)) +
  labs(x = "Genotype", y = "Adjusted ED50 (°C)")

ggsave(filename = "output/figS3.png", width = 120, height = 87, units = "mm")

tidy(anova(lm(ed50_adj ~ geno, data = multi.adj))) %>%
  knitr::kable(caption = "ANOVA: Effect of genotype on ED50")

ANOVA: Effect of genotype on ED50
term	df	sumsq	meansq	statistic	p.value
geno	30	4.133054	0.1377685	1.026536	0.4605915
Residuals	44	5.905116	0.1342072	NA	NA

Plot correlations across nurseries

gatherpairs <- function(data, ..., 
                        xkey = '.xkey', xvalue = '.xvalue',
                        ykey = '.ykey', yvalue = '.yvalue',
                        na.rm = FALSE, convert = FALSE, factor_key = FALSE) {
  vars <- quos(...)
  xkey <- enquo(xkey)
  xvalue <- enquo(xvalue)
  ykey <- enquo(ykey)
  yvalue <- enquo(yvalue)

  data %>% {
    cbind(gather(., key = !!xkey, value = !!xvalue, !!!vars,
                 na.rm = na.rm, convert = convert, factor_key = factor_key),
          select(., !!!vars)) 
  } %>% gather(., key = !!ykey, value = !!yvalue, !!!vars,
               na.rm = na.rm, convert = convert, factor_key = factor_key)
}

cors <- multi.adj %>%
  select(nursery, geno, ed50_adj) %>%
  pivot_wider(names_from = nursery, values_from = ed50_adj, values_fn = mean) %>%
  gatherpairs(FWC, CRF, RR, MML)# %>%
  #filter(.xvalue < 37, .#yvalue < 37)

corfig <- ggplot(cors, aes(x = .xvalue, y = .yvalue)) +
  geom_point() + 
  geom_abline(a = 0, b = 1, lty = 2) +
  stat_cor(size = 3) +
  facet_grid(.ykey ~ .xkey) +
  theme_few() +
  labs(x = "Adjusted ED50 (°C)", y = "Adjusted ED50 (°C)")
corfig

ggsave(plot = corfig, filename = "output/figS4.png", width = 190, height = 190, units = "mm")

Genotype is not a significant predictor of ED50 among coral genotypes tested at multiple nurseries. This could indicate that: 1) ED50 is plastic and genotype by environment interactions result in genotypes having different ED50 values at different nurseries, and/or 2) small sample sizes for each genotype limit statistical power to detect differences among genotypes. The 31 genotypes in this subset were present at only two (n=18) or three nurseries (n=13), so the low replication for each genotype is definitely a limitation. This study was not really designed to be able to answer this question, but the genotype swap experiment should hopefully be able to!

Is the average ED50 of corals at each nursery related to the nursery’s MMM temperature?

# Relationship between nursery MMM and average nursery ED50?
nursmmm <- res.adj %>%
  mutate(nursmmm = case_when(nursery == "MML" ~ 30.2694,
                             nursery == "UM"  ~ 29.5412,
                             nursery == "NSU" ~ 29.5344,
                             nursery == "CRF" ~ 29.7307,
                             nursery == "RR" ~ 29.7307,
                             nursery == "FWC" ~ 29.9965))

nursmmm %>%
  group_by(nursery) %>%
  summarize(nursmmm = mean(nursmmm), meaned50 = mean(estimate_ed50)) %>%
  ggplot(aes(x = nursmmm, y = meaned50)) +
  geom_label(aes(label = nursery))

There is no relationship between nursery MMM and average ED50.

Is thermal tolerance related to symbiont density?

sh <- read_csv("data/processed/acer_SH_data.csv") %>%
  clean_names() %>%
  filter(species == "Acer") %>%
  rename(name = genotype) %>%
  left_join(res.adj, sh, by = c("nursery", "name")) %>%
  mutate(a_acer = replace_na(a_acer, 0),
         d_acer = replace_na(d_acer, 0),
         totSH = a_acer + d_acer,
         logSH = log10(totSH))

# ggplot(sh, aes(x = logSH, y = estimate_ed50)) + 
#   geom_point(aes(color = nursery)) +
#   geom_smooth(method = "lm", se = FALSE)

ggplot(sh, aes(x = logSH, y = ed50_adj, color = nursery)) + 
  geom_point() +
  geom_smooth(method = "lm", se = FALSE) +
  theme_few() +
  labs(x = "log10 symbiont to host cell ratio", y = "Adjusted ED50 (°C)")

mod <- lm(ed50_adj ~ logSH, data = sh)
anova(mod) %>% knitr::kable()

	Df	Sum Sq	Mean Sq	F value	Pr(>F)
logSH	1	0.0243809	0.0243809	0.1758259	0.6754852
Residuals	180	24.9597585	0.1386653	NA	NA

Is thermal tolerance related to source colony latitude and longitude?

mod <- lm(ed50_adj ~ source_lat + source_lon, data = res.adj)
anova(mod) %>%
  tidy() %>%
  knitr::kable()

term	df	sumsq	meansq	statistic	p.value
source_lat	1	0.0090278	0.0090278	0.0642242	0.8001787
source_lon	1	0.0481394	0.0481394	0.3424663	0.5590143
Residuals	218	30.6435526	0.1405668	NA	NA

lat.plot <- ggplot(res.adj, aes(x = source_lat, y = ed50_adj)) +
  geom_point(size = 0.25) +
  geom_smooth(method = "lm", se = FALSE) +
  theme_few() +
  theme(text = element_text(size = 10)) +
  labs(x = "Latitude", y = expression(paste(ED50[adj], " (°C)")))

lon.plot <- ggplot(res.adj, aes(x = source_lon, y = ed50_adj)) +
  geom_point(size = 0.25) +
  geom_smooth(method = "lm", se = FALSE) +
  theme_few() +
  theme(text = element_text(size = 10)) +
  labs(x = "Longitude", y = expression(paste(ED50[adj], " (°C)")))

cowplot::plot_grid(lat.plot, lon.plot, ncol = 2)

There is no relationship between ED50 and source colony latitude and longitude.

Is thermal tolerance related to source colony temperature regime?

To test this hypothesis, we calculate maximum monthly mean (MMM) temperatures corresponding to each source colony location, using satellite SST data with 5km resolution.

# Test effect of MMM from 5km data
mod5km <- lm(ed50_adj ~ mmm_5km, data = res.adj)
anova(mod5km) %>%
  tidy() %>%
  knitr::kable()

term	df	sumsq	meansq	statistic	p.value
mmm_5km	50	7.066033	0.1413207	1.016494	0.4549149
Residuals	170	23.634687	0.1390276	NA	NA

# Plot
mmm5.plot <- ggplot(res.adj, aes(x = as.numeric(mmm_5km), y = ed50_adj)) +
  geom_point(size = 0.25) +
  geom_smooth(method = "lm", se = FALSE) +
  theme_few() +
  theme(text = element_text(size = 10)) +
  scale_x_continuous(labels = function(x) str_trim(x)) +
  labs(x = "MMM (°C)", y = expression(paste(ED50[adj], " (°C)")))

mmm5.plot

mod <- lm(ed50_adj ~ source_lat * source_lon * mmm_5km * nursmmm, data = nursmmm)
anova(mod) %>%
  tidy() %>%
  knitr::kable()

term	df	sumsq	meansq	statistic	p.value
source_lat	1	0.0090278	0.0090278	0.0701401	0.7916835
source_lon	1	0.0481394	0.0481394	0.3740122	0.5422272
mmm_5km	50	7.3991406	0.1479828	1.1497318	0.2749984
nursmmm	1	0.0001377	0.0001377	0.0010695	0.9739766
source_lat:source_lon	1	0.1843870	0.1843870	1.4325694	0.2342039
source_lat:mmm_5km	28	5.1183405	0.1827979	1.4202226	0.1062601
source_lon:mmm_5km	18	2.7885631	0.1549202	1.2036307	0.2731721
source_lat:nursmmm	1	0.2819423	0.2819423	2.1905112	0.1420386
source_lon:nursmmm	1	0.0299674	0.0299674	0.2328273	0.6305005
mmm_5km:nursmmm	15	1.3810718	0.0920715	0.7153363	0.7637416
source_lat:mmm_5km:nursmmm	3	0.5904492	0.1968164	1.5291375	0.2116873
source_lat:source_lon:mmm_5km:nursmmm	1	0.1271918	0.1271918	0.9881992	0.3226064
Residuals	99	12.7423612	0.1287107	NA	NA

There are no relationships between ED50 and source colony latitude, longitude, or MMM temperature. This suggests that thermally tolerant colonies are not restricted to specific locations, but can be found throughout the Florida reef tract.

Map the source locations of the most thermally tolerant genets

top10 <- res.adj %>%
  filter(ed50_adj > quantile(ed50_adj, 0.9))

library("rnaturalearth")
library("rnaturalearthdata")
library("sf")

world <- ne_countries(scale = "large", returnclass = "sf")
top10plot <- ggplot(data = world) +
  geom_sf(lwd = 0, fill = "gray70") +
  coord_sf(xlim = c(-82.35, -79.75), ylim = c(24.3, 26.4), expand = FALSE) +
  geom_point(aes(x = source_lon, y = source_lat), data = res.adj, size = 0.1, color = "black", alpha = 0.5) +
  geom_point(aes(x = source_lon, y = source_lat), data = top10, color = "red", size = 0.1, alpha = 1) +
  scale_x_continuous(breaks = seq(-82, -80, 1), labels = seq(-82, -80, 1)) +
  scale_y_continuous(breaks = seq(25, 26, 1), labels = seq(25, 26, 1)) +
  theme_few() +
  theme(text = element_text(size = 10),
        plot.margin = margin(6,3,6,6)) +
  labs(y = "Latitude", x = "Longitude") +
  geom_segment(aes(x = -81.475, y = 25.325, xend = source_lon, yend = source_lat), data = top10, lwd = 0.1, col = "red", alpha = 0.5)

insetplot <- tibble(x = seq(-3, 3, length.out = 1001), y = dnorm(x, 0, 1)) %>%
  mutate(area = x >= qnorm(0.9)) %>%
  ggplot(aes(x = x, y = y)) +
  geom_line(lwd = 0.1) +
  geom_ribbon(aes(ymin = 0, ymax = y, fill = area)) +
  scale_fill_manual(values = c(NA, "red")) +
  theme_void() +
  theme(plot.margin = margin(0,0,0,0)) +
  annotate("text", x = -1, y = -0.1, label = expression("ED50" [adj]), size = 2, angle = 0) +   # 
  coord_cartesian(clip = "off") +
  theme(legend.position = "none")



plot.with.inset <- ggdraw() +
  draw_plot(top10plot) +
  draw_plot(insetplot, x = 0.3, y = .55, width = .25, height = .2)

plot.with.inset

newfig3 <- cowplot::plot_grid(lat.plot, lon.plot, mmm5.plot, plot.with.inset, nrow = 2,
                   labels = "AUTO")
ggsave("output/fig3.png", plot = newfig3, width = 90, height = 80, units = "mm")

Map each nursery’s collection sources

nurs <- tribble(
  ~nursery, ~nurs_lat, ~nurs_lon, 
  "MML", 24.56257, -81.40009,
  "NSU", 26.124533, -80.097033,
  "UM", 25.676261, -80.098681,
  "CRF", 24.982398, -80.436548,
  "RR", 24.98065, -80.438833,
  "FWC", 24.667233, -81.025117
)

df <- res.adj %>%
  select(source_lat, source_lon, nursery, name) %>%
  full_join(nurs) %>%
  mutate(nursery = factor(nursery, levels = levels(res.adj$nursery)))


maps <- ggplot(data = world) +
  geom_sf(lwd = 0, fill = "gray70") +
  coord_sf(xlim = c(-82.35, -79.75), ylim = c(24.3, 26.4), expand = FALSE) +
  geom_point(aes(x = source_lon, y = source_lat, color = nursery), 
             data = df, size = 0.5, alpha = 0.5) +
  geom_point(aes(x = nurs_lon, y = nurs_lat, fill = nursery),
             data = df, size = 1, pch = 21) +
  geom_segment(aes(x = nurs_lon, xend = source_lon, y = nurs_lat, yend = source_lat, color = nursery), 
               data = df, lwd = 0.2, alpha = 0.5) +
  scale_x_continuous(breaks = seq(-82, -80, 1), labels = seq(-82, -80, 1)) +
  scale_y_continuous(breaks = seq(25, 26, 1), labels = seq(25, 26, 1)) +
  facet_wrap(~nursery, ncol = 2) +
  theme_few() +
  theme(text = element_text(size = 10),
        legend.position = "none") +
  labs(y = "Latitude", x = "Longitude")
maps

ggsave("output/figS2.png", plot = maps, width = 80, height = 120, units = "mm")

Thermal tolerance phenotype rarefaction curves

# ED50 rarefaction curves from each nursery
sample_ed50s <- function(ed50s, n) {
  s <- sample(ed50s, n)
  rs <- diff(range(s))
  rt <- diff(qnorm(c(0.01, 0.99), mean(ed50s), sd(ed50s)))
  propr <- rs / rt
  return(propr)
}

rc <- res %>%
  select(nursery, estimate_ed50) %>%
  nest(data = estimate_ed50) %>%
  crossing(n = 1:25) %>%
  mutate(prop = map2(data, n, ~ replicate(10000, sample_ed50s(.x$estimate_ed50, .y))),
         mean = map_dbl(prop, mean))

rc1fig <- ggplot(rc, aes(x = n, y = mean, color = nursery)) +
  geom_line(lwd = 0.3) +
  geom_point(size = 0.5) +
  scale_y_continuous(limits = c(0, 1)) +
  theme_few() +
  theme(text = element_text(size = 8),
        legend.position = "none") +
  labs(x = "Genets sampled", y = "Proportion of nursery\n ED50 range captured")



# Prob of any genos in top 25% of population
top25pop <- quantile(rnorm(10000, mean(res.adj$ed50_adj), sd(res.adj$ed50_adj)), prob = 0.75)
sample_ed50s2 <- function(ed50s, n) {
  s <- sample(ed50s, n)
  any_top25 <- any(s >= top25pop)
  return(any_top25)
}

rc2 <- res.adj %>%
  select(nursery, ed50_adj) %>%
  nest(data = ed50_adj) %>%
  crossing(n = 1:25) %>%
  mutate(ntop25 = map2(data, n, ~ replicate(10000, sample_ed50s2(.x$ed50_adj, .y))),
         mean = map_dbl(ntop25, mean))

rc2fig <- ggplot(rc2, aes(x = n, y = mean, color = nursery)) +
  geom_line(lwd = 0.3) +
  geom_point(size = 0.5) +
  scale_y_continuous(limits = c(0, 1)) +
  theme_few() +
  theme(text = element_text(size = 8),
        legend.position = c(0.7, 0.4),
        legend.key.size = unit(3, 'mm'),
        legend.spacing.y = unit(1, "mm")) +
  labs(x = "Genets sampled", 
       y = "Probability top quartile\nof population captured") +
  guides(color = guide_legend(reverse = TRUE))

fig4 <- cowplot::plot_grid(rc1fig, rc2fig, ncol = 2, labels = "AUTO")
fig4

ggsave("output/fig4.png", width = 114, height = 50, units = "mm")

rc %>%
  group_by(nursery) %>%
  filter(mean >= 0.5) %>%
  filter(n == min(n)) %>%
  select(nursery, n_genets = n, mean_range_captured = mean) %>%
  knitr::kable(caption = "Number of genets needed to capture >50% of nursery ED50 range")

Number of genets needed to capture >50% of nursery ED50 range
nursery	n_genets	mean_range_captured
MML	6	0.5417413
FWC	5	0.5141617
RR	5	0.5058364
CRF	6	0.5412042
UM	5	0.5077699
NSU	5	0.5028655

rc2 %>%
  group_by(nursery) %>%
  filter(mean >= 0.9) %>%
  filter(n == min(n)) %>%
  select(nursery, n_genets = n, mean_prob_top25pct = mean) %>%
  knitr::kable(caption = "Number of genets needed for 90% chance of capturing ≥1 in top 25% of population")

Number of genets needed for 90% chance of capturing ≥1 in top 25% of population
nursery	n_genets	mean_prob_top25pct
MML	11	0.9186
FWC	11	0.9222
RR	6	0.9229
CRF	8	0.9218
UM	8	0.9143
NSU	7	0.9190

Compare October to June CBASS results

27 of the genotypes we tested on the October cruise were also tested in June 2020 by Kelsey Johnson-Sapp at RSMAS in an independent set of CBASS assays (this occurred at the time of the genotype swap). We can compare the ED50 values obtained for these 27 genotypes in June and October to see how reproducible the results are.

# Filter data for corals run in October and June
df2 <- df.all %>%
  drop_na(date) %>%
  group_by(nursery, geno) %>%
  filter(n_distinct(date) == 2) %>%
  ungroup() %>%
  select(nursery, name, geno, date, max_temp, fvfm)

# Fit dose-response curves for each genotype on each date
df2$genoN <- factor(paste0("g", group_indices(df2, nursery, geno, date)))
mod2 <- drm(fvfm ~ max_temp, curveid = genoN, data = df2, 
           fct = LL.3(names = c("hill", "max", "ED50")),
           upperl = c(120, 0.72, 40),
           lowerl = c(10, 0.55, 30))

# Extract ED50 estimates and standard error
ed50s <- tidy(mod2) %>%
  filter(term == "ED50") %>%
  select(genoN = curve, estimate, std.error) %>%
  left_join(distinct(df2, nursery, geno, date, genoN))

# Rearrange data to compare ED50 values between dates
ed50s2 <- ed50s %>% 
  mutate(date = recode(date, "2020-10" = "oct", "2020-06" = "jun")) %>%
  select(nursery, geno, date, ed50 = estimate, std.error) %>%
  pivot_wider(names_from = date, values_from = c(ed50, std.error)) %>%
  filter(geno != "FM19")

# Plot figure
fig <- ggplot(ed50s2, aes(y = ed50_oct, x = ed50_jun)) +
  geom_errorbar(aes(xmin = ed50_jun - std.error_jun,
                    xmax = ed50_jun + std.error_jun), lwd = 0.15) +
  geom_errorbar(aes(ymin = ed50_oct - std.error_oct,
                    ymax = ed50_oct + std.error_oct), lwd = 0.15) +
  geom_point() +
  geom_smooth(method = "lm", se = FALSE) +
  #geom_label(aes(label = geno), alpha = 0.7, size = 2) +
  annotate(geom = "text", x = 34.8, y = 36.5, 
           label = "Pearson's correlation:", adj = 0, size = 2) +
  stat_cor(method = "pearson", label.y = 36.45, label.x = 34.85, size = 2) +
  annotate(geom = "text", x = 34.8, y = 36.375, 
           label = "Spearman's rank correlation:", adj = 0, size = 2) +
  stat_cor(method = "spearman", label.y = 36.325, label.x = 34.85, size = 2) +
  geom_abline(intercept = 0, slope = 1, lty = 2, color = "black") +
  coord_fixed()  +
  labs(x = "ED50 (°C) measured in June", y = "ED50 (°C) measured in Oct.") +
  theme_few()
fig

ggsave("output/fig5.png", width = 87, height = 97, units = "mm")

# Linear regression
mod <- lm(ed50_oct ~ ed50_jun, data = ed50s2)
## Slope of fit
tibble(slope = coef(mod)[2]) %>% knitr::kable()

slope
0.9772041

## Test if slope is different than 1
car::linearHypothesis(mod, "ed50_jun = 1") %>% knitr::kable()

Res.Df	RSS	Df	Sum of Sq	F	Pr(>F)
26	1.044915	NA	NA	NA	NA
25	1.044264	1	0.0006514	0.0155941	0.9016203

## Find mean difference between Oct and Jun values
augment(mod) %>%
  mutate(diff = ed50_oct - ed50_jun) %>%
  summarize("Mean difference Oct. - Jun" = mean(diff)) %>%
  knitr::kable()

Mean difference Oct. - Jun
0.5112125

There is a high correlation between ED50 values obtained from the same coral genets (from the same nurseries) run in both June and October (n = 27 genets). This suggests that ED50 values from independently run CBASS assays are relatively reproducible for a given genotype. The shift in y-intercept could be due to seasonal differences, or experimental differences, in October vs. June. Overall, this high reproducibility is very encouraging for the continued use of the CBASS approach for thermal tolerance phenotyping.

Thermal tolerance of A. cervicornis across 6 Florida coral nurseries

Ross Cunning

23 April, 2021