# Load packages
library(MASS)
library(tidyverse)
library(lubridate)
library(lme4)
library(lsmeans)
library(mgcv)
library(gamm4)
library(furrr)
# Convert logit to probability
logit2prob <- function(logit){
odds <- exp(logit)
prob <- odds / (1 + odds)
return(prob)
}
# Plan for parallel processing
plan(multiprocess)
We will analyze the complete tagged coral dataset.
# Import tidy tagged coral data
load("data/processed/tagged.RData")
tagged_bool <- tagged_bool %>%
filter(!site %in% c("HBN1", "HBN2")) %>% # Omit HBN1 and HBN2
mutate(day = (date - min(date)) / ddays(1))
# Define start and end of dredging (for plotting)
dredge.start.date <- as.Date("2013-11-20") # NOAA sediment report April 2016, page 44
dredge.end.date <- as.Date("2015-03-16") # DCA report August 2015, page 3
Several different condition codes were used to document impacts of sedimentation on corals:
First we will look at all three of these codes as indicators of general sedimentation stress. Then we will look at only the more severe conditions - PBUR and BUR.
# Get proportion of living corals with sedimentation stress for each reef/dir at each week
sedstress.summ1 <- tagged_bool %>%
filter(!DEAD) %>%
group_by(reef, dir, channel, site, date) %>%
summarise(n = n(),
SEDSTRESS = sum(SA|PBUR|BUR),
prop = SEDSTRESS / n)
# Plot proportions
ggplot(sedstress.summ1, aes(x = date, y = prop, color = channel)) +
facet_wrap(site ~ .) + ylim(0, 1) +
geom_point(aes(size = n), alpha = 0.4) +
scale_size(range = c(0.1, 2.5)) +
geom_smooth(method = "loess", se = FALSE, span = 1) +
labs(title = "Sedimentation stress (SA, PBUR, or BUR)",
x = "Date", y = "Proportion of corals") +
geom_vline(xintercept = c(dredge.start.date, dredge.end.date), lwd = 0.25, linetype = 3) +
theme(axis.text.x = element_text(angle = 45))
These plots with local regression smoothing show that sedimentation stress was much greater at channelside sites relative to controls. To generate more robust statistical fits for the dataset and compare channelside vs. controls at each reef, we can fit generalized additive mixed models (GAMMs) and use only the regions in the data without large gaps in time.
# Filter dataset to only fit over regions with less than 20-week gaps between data points
tagged_bool_f <- tagged_bool %>%
filter(!DEAD) %>%
nest(-reef, -dir, -channel) %>% # For each reef
mutate(dates = map(data, ~ sort(unique(.$date))), # Which weeks have data
runs = map(dates, ~ split(.x, cumsum(c(TRUE, diff(.x) > 140)))), # Find runs w/o 140d gaps
longest = map(runs, ~ .x[[which.max(sapply(.x, length))]]), # Get longest run
data.f = map2(data, longest, ~ filter(.x, .$date %in% .y))) %>% # Get data from those days
unnest(data.f)
# Fit GAMMs for each reef
set.seed(55423)
gamms1 <- tagged_bool_f %>%
# For each reef and direction
nest(-reef, -dir) %>%
mutate(
# Fit a binomial GAMM for sediment stress over time at channelside vs. control, site as random
mod = future_map(data, ~ gamm4(SA|PBUR|BUR ~ s(as.numeric(date), by = factor(channel)) + channel,
random = ~ (1|site/transect) + (1|species),
family = "binomial", data = .)),
# Get newdata frame for predict() using the appropriate range of weeks
newdata = map(data, ~ expand.grid(date = as.numeric(min(.$date):max(.$date)),
channel = c("channelside", "control"))),
# Get fitted values from GAMM using newdata frame
fit = map2(mod, newdata, ~ predict(object = .x$gam, newdata = .y, type = "response")),
# Get confidence intervals by simulation
Rbeta = map(mod, ~ mvrnorm(n = 1000, coef(.$gam), vcov(.$gam))),
Xp = map2(mod, newdata, ~ predict(object = .x$gam, newdata = .y, type = "lpmatrix")),
sim = map2(Xp, Rbeta, function(x, y) x %*% t(y)),
lci = map(sim, ~ logit2prob(apply(., 1, quantile, 0.025))),
uci = map(sim, ~ logit2prob(apply(., 1, quantile, 0.975)))
)
gamms1.fitted <- gamms1 %>%
unnest(newdata, fit, lci, uci) %>%
mutate(date = zoo::as.Date.numeric(date))
# Plot
fig1 <- ggplot(gamms1.fitted, aes(x = date, y = fit, color = channel)) +
facet_grid(dir ~ reef) +
geom_point(data = sedstress.summ1, aes(y = prop, size = n), alpha = 0.4) +
scale_size(range = c(0.1, 2.5)) +
geom_ribbon(aes(ymin = lci, ymax = uci, linetype = NA), alpha = 0.2) +
geom_line(lwd = 1) +
geom_vline(xintercept = c(dredge.start.date, dredge.end.date), linetype = 3, lwd = 0.25) +
labs(title = "Sedimentation stress (SA, PBUR, or BUR)",
x = "Date", y = "Proportion of corals")
fig1
Why is there so much sediment stress at control sites, even those >9km away? Either some of these condition codes (e.g., SA) don’t really reflect dredging sediment, and/or the control sites were actually impacted by significant levels of dredging sediment.
If we assume that control sites are far enough from dredging activity that they are not impacted by dredging sediments, then the “SA” condition code, which was observed frequently at control sites (even those >9km away), may not be indicative of dredging sediments. Therefore, PBUR and BUR may be more appropriate indicators of sedimentation stress due to dredging.
# Get proportion of living corals with sedimentation stress for each reef/dir at each week
sedstress.summ2 <- tagged_bool %>%
filter(!DEAD) %>%
group_by(reef, dir, channel, site, date) %>%
summarise(n = n(),
SEDSTRESS = sum(PBUR|BUR),
prop = SEDSTRESS / n)
# Plot proportions
ggplot(sedstress.summ2, aes(x = date, y = prop, color = channel)) +
facet_wrap(site ~ .) + ylim(0, 1) +
geom_point(aes(size = n), alpha = 0.4) +
scale_size(range = c(0.1, 2.5)) +
geom_smooth(method = "loess", se = FALSE, span = 1) +
labs(title = "Severe sedimentation stress (PBUR or BUR)",
x = "Date", y = "Proportion of corals")
# Fit GAMMs for each reef
set.seed(4327)
gamms2 <- tagged_bool_f %>%
# For each reef (R2N, R2S, R3N, R3S)
nest(-reef, -dir) %>%
mutate(
# Fit a binomial GAMM for sediment stress over time at channelside vs. control, site as random
mod = future_map(data, ~ gamm4(PBUR|BUR ~ s(as.numeric(date), by = factor(channel)) + channel, #, k = 8
random = ~ (1|site/transect) + (1|species),
family = "binomial", data = .)),
# Get newdata frame for predict() using the appropriate range of weeks
newdata = map(data, ~ expand.grid(date = as.numeric(min(.$date):max(.$date)),
channel = c("channelside", "control"))),
# Get fitted values from GAMM using newdata frame
fit = map2(mod, newdata, ~ predict(object = .x$gam, newdata = .y, type = "response")),
# Get confidence intervals by simulation
Rbeta = map(mod, ~ mvrnorm(n = 1000, coef(.$gam), vcov(.$gam))),
Xp = map2(mod, newdata, ~ predict(object = .x$gam, newdata = .y, type = "lpmatrix")),
sim = map2(Xp, Rbeta, function(x, y) x %*% t(y)),
lci = map(sim, ~ logit2prob(apply(., 1, quantile, 0.025))),
uci = map(sim, ~ logit2prob(apply(., 1, quantile, 0.975)))
)
gamms2.fitted <- gamms2 %>%
unnest(newdata, fit, lci, uci) %>%
mutate(date = zoo::as.Date.numeric(date))
save(sedstress.summ2, gamms2.fitted, file = "data/processed/sedstress_gamms.RData")
# Plot
fig2 <- ggplot(gamms2.fitted, aes(x = date, y = fit, color = channel)) +
facet_grid(dir ~ reef) +
geom_point(data = sedstress.summ2, aes(y = prop, size = n), alpha = 0.4) +
scale_size(range = c(0.1, 2.5)) +
geom_ribbon(aes(ymin = lci, ymax = uci, linetype = NA), alpha = 0.2) +
geom_line(lwd = 1) +
geom_vline(xintercept = c(dredge.start.date, dredge.end.date), linetype = 3, lwd = 0.25) +
labs(title = "Severe sedimentation stress (PBUR or BUR)",
x = "Date", y = "Proportion of corals") +
theme(axis.text.x = element_text(angle = 45, hjust = 1),
panel.grid.minor = element_blank(), panel.background = element_blank(),
panel.border = element_rect(color = "black", fill = NA),
legend.position = c(0.8, 0.5), # c(0,0) bottom left, c(1,1) top-right.
legend.background = element_rect(fill = NA, colour = NA),
legend.title = element_blank(),
legend.key.size = unit(0.3, "cm"))
fig2 + xlim(as.Date("2013-10-20"), as.Date("2015-07-01")) +
labs(title = "", y = "Probability of partial or complete burial")
Analysis of PBUR or BUR only shows much lower prevalence of these condition codes at the farthest control sites, yet their prevalence at channelside sites (and some control sites, e.g., HBN) remains very high.
load("data/processed/sedstress_gamms.RData")
# Number of corals PBUR or BUR before dredging began
sedstress.summ2 %>% ungroup() %>%
filter(date < "2013-11-20") %>%
summarise(nobs = sum(n),
nsedstress = sum(SEDSTRESS))
## # A tibble: 1 x 2
## nobs nsedstress
## <int> <int>
## 1 1151 1
# Peak of sed stress
# What was the maximum coral burial observed in each area? Look at upper confidence limit on prediction, and when it occurred.
maxfit2014 <- gamms2.fitted %>%
group_by(reef, dir, channel) %>%
summarise(max_burial = round(max(fit) * 100, 1),
when = date[fit == max(fit)])
knitr::kable(maxfit2014 %>% arrange(-max_burial),
caption = "Maximum predicted prevalence of coral burial")
| reef | dir | channel | max_burial | when |
|---|---|---|---|---|
| R3 | N | channelside | 76.4 | 2014-09-05 |
| R2 | N | channelside | 72.9 | 2014-09-09 |
| HB | N | channelside | 57.2 | 2014-09-04 |
| HB | S | channelside | 52.7 | 2014-08-09 |
| R2 | S | channelside | 44.5 | 2014-10-14 |
| HB | N | control | 31.2 | 2014-10-01 |
| R3 | S | channelside | 18.3 | 2014-09-16 |
| HB | S | control | 5.7 | 2015-01-15 |
| R3 | N | control | 5.6 | 2014-12-30 |
| R2 | N | control | 5.4 | 2015-07-15 |
| R2 | S | control | 2.7 | 2014-11-20 |
| R3 | S | control | 1.7 | 2014-12-30 |