# Import tidy count data
load("data/processed/all_counts.RData")
# Subset Scleractinian count data
scler <- filter(all_counts, category == "scleractinian") %>% # Get scleractinians only
filter(year %in% c(2010, 2013, 2016, 2017)) %>% # Get only before dredging (2010, 2013) and 2y postcon (2016, 2017)
dplyr::select(reef, dir, channel, site, site2, dist, year, date, # Reorder columns
transect, transect_area_m2, genus, species, diameter, count, perm)
# Subset disease resistant species only
resistant.genus <- c("Acropora", "Agaricia", "Madracis", "Mycetophyllia", "PDIV", "Porites", "Siderastrea", "Stephanocoenia")
# Subset only Porited
#resistant.genus <- c("Porites", "PDIV")
# Subset only disease-resistant corals
scler <- filter(scler, genus %in% resistant.genus)
ggplot(scler, aes(x = diameter)) +
geom_histogram(binwidth = 1) +
facet_wrap(~ year) +
scale_x_continuous(limits = c(0, 5))
Small corals (< 3cm) were recorded, but only in 2010, 2016, and 2017. Must filter out corals < 3 cm in order to compare data from all years.
## Many entries also do not include a size (diameter = NA)
## For coral density analysis, assume these are bigger than 3 cm and keep them
scler_3 <- scler %>%
filter(diameter >= 3 | is.na(diameter))
Decreases in coral density could have resulted from mortality due to disease, or mortality due to dredging sedimentation. Disease impacts should not show a relationship to distance from the dredging channel, but dredge impacts should. Therefore, analyzing changes in density as a function of distance from channel is one way to get at impacts due to dredging.
The full coral count dataset includes permanent sites and non-permanent sites, with data from 2010, 2013, 2015, and 2016/2017. Since corals <3 cm were not recorded in 2013 and 2015 surveys, we need to use the dataset excluding <3 cm corals.
# Sum number of large corals counted on each transect
large_counts <- scler_3 %>% # Use dataset that omits small corals
filter(dist < 1000) %>% # Use data within 1000m of the channel
group_by(reef, dir, channel, site, site2, dist, date, year, transect, transect_area_m2, perm) %>%
summarise(n = sum(count)) %>%
mutate(dens = n / transect_area_m2,
timepoint = cut(year, breaks = c(-Inf, 2014, Inf),
labels = c("pre-2014", "post-2014"))) %>%
ungroup()
# Plot density of corals for each transect
ggplot(large_counts, aes(x = dist, y = dens, color = timepoint)) +
facet_grid(dir ~ reef) +
geom_point() +
labs(y = "Corals / m2", x = "Distance from channel")
Reef 2 south post-dredging timepoint has an apparent outlier at 400m from channel. The transect was performed on 2017-03-29
# Subset data for only reef 2 south
r2s <- filter(large_counts, reef == "R2", dir == "S", timepoint == "post-2014")
# Fit a linear model with all points and calculate Cook's distance (cannot calculate Cook's for poisson glm)
mod2 <- lm(n/transect_area_m2 ~ dist, data = r2s)
nd2 <- crossing(dist = r2s$dist, transect_area_m2 = 1)
nd2$fit <- predict(mod2, newdata = nd2, re.form = NA, type = "response")
# Plot fitted lm
ggplot(r2s, aes(x = dist, y = dens, color = timepoint)) +
geom_point() +
geom_line(data = nd2, aes(x = dist, y = fit), inherit.aes = FALSE)
# Plot cook's distance ~ distance
plot(cooks.distance(mod2) ~ r2s$dist, xlab = "Distance from channel", ylab = "Cook's Distance")
text(r2s$dist - 30, cooks.distance(mod2),
labels = ifelse(cooks.distance(mod2) > 0.2, round(cooks.distance(mod2), 2), ""))
# Remove outlier point from R2S (it was the only data point from 2017-03-29)
large_counts <- filter(large_counts, date != "2017-03-29")
# Subset 1: locations with enough data at different distances from channel
large_counts_1 <- large_counts %>%
# only use where sufficient data exist to test dist
filter(reef == "R2" | (reef == "R3" & dir == "N") | (reef == "R3" & dir == "S" & timepoint == "pre-2014"))
# Fit model with distance from channel as predictor
mod_1 <- glmer(n ~ dist * reef:dir * timepoint + # count as response
offset(log(transect_area_m2)) + # offset accounts for diff transect sizes
(1|site/site2/transect) + (1|date), # random effects
data = large_counts_1, family = "poisson") # poisson model
# Get fitted responses
newdata_1 <- large_counts_1 %>%
tidyr::expand(nesting(reef, dir, timepoint, dist),
transect_area_m2 = 1) # transect area of 1 m2 will predict count per m2
newdata_1$fit <- predict(mod_1, newdata_1, type = "response", re.form = NA)
# Get confidence intervals by simulation
set.seed(9870)
bootfit_1 <- bootMer(mod_1, FUN = function(x) predict(x, newdata_1, type = "response", re.form = NA),
nsim = 400, parallel = "multicore", ncpus = 8)
newdata_1$lci <- apply(bootfit_1$t, 2, quantile, 0.025)
newdata_1$uci <- apply(bootfit_1$t, 2, quantile, 0.975)
# Get fitted value of coral density adjacent to channel (i.e., dist = 20 m)
lsm20 <- lsmeans(mod_1, specs = c("reef", "dir", "timepoint"),
at = list(dist = 20, transect_area_m2 = 1), transform = "response")
# Get fitted value of coral density away from channel (i.e., dist = 300 m)
lsm300 <- lsmeans(mod_1, specs = c("reef", "dir", "timepoint"),
at = list(dist = 300, transect_area_m2 = 1), transform = "response")
# Gather fitted values for annotating plot
fit20 <- summary(lsm20) %>%
mutate(dist = 20) %>%
dplyr::select(reef, dir, timepoint, rate, dist) %>%
spread(timepoint, rate)
fit300 <- summary(lsm300) %>%
mutate(dist = 300) %>%
dplyr::select(reef, dir, timepoint, rate, dist) %>%
spread(timepoint, rate)
fits <- as_tibble(bind_rows(fit20, fit300))
# Test for significant differences by timepoint for fitted values
diff20 <- rbind(pairs(lsm20, by = c("reef", "dir")), adjust = "none")
diff300 <- rbind(pairs(lsm300, by = c("reef", "dir")), adjust = "none")
# Gather fitted values for annotating plot
sig20 <- summary(diff20) %>%
mutate(dist = 20) %>%
dplyr::select(reef, dir, estimate, p.value, dist)
sig300 <- summary(diff300) %>%
mutate(dist = 300) %>%
dplyr::select(reef, dir, estimate, p.value, dist)
sigs <- as_tibble(bind_rows(sig20, sig300))
# Generate data frame to use to annotate the plot
anno <- full_join(fits, sigs) %>%
mutate(rel = -(estimate) / `pre-2014` * 100,
rel2 = ifelse(p.value > 0.1, "", paste0(round(rel, 1), "%")),
sig = gtools::stars.pval(p.value),
sig2 = gsub(" ", "ns", sig))
# Subset 2: areas where data are only available close to channel
large_counts_2 <- large_counts %>%
filter(dist < 50) %>%
filter(reef == "HB" | (reef == "R3" & dir == "S")) %>% # & timepoint == "post-2014")) %>%
group_by(reef, dir) %>%
mutate(med = median(dist))
mod_2 <- glmer(n ~ reef * dir * timepoint + # count as response
offset(log(transect_area_m2)) + # offset accounts for diff transect sizes
(1|site/site2/transect) + (1|date), # random effects
data = large_counts_2, family = "poisson")
lsm <- lsmeans(mod_2, specs = c("reef", "dir", "timepoint"),
at = list(transect_area_m2 = 1), transform = "response")
sig <- rbind(pairs(lsm, by = c("reef", "dir")), adjust = "none")
sig <- summary(sig) %>%
mutate(dist = 25) %>%
dplyr::select(reef, dir, estimate, p.value, dist) %>% drop_na()
lsm <- summary(lsm, level = 0.84) %>%
mutate(dist = 25)
lsm2 <- lsm %>%
dplyr::select(reef, dir, timepoint, rate, dist) %>%
spread(timepoint, rate)
# Don't plot point for R3S pre, because already fitted with line
lsm <- lsm[-4,]
anno_2 <- full_join(lsm2, sig) %>%
mutate(rel = -(estimate) / `pre-2014` * 100,
rel2 = ifelse(p.value > 0.1, "", paste0(round(rel, 1), "%")),
sig = gtools::stars.pval(p.value),
sig2 = gsub(" ", "ns", sig))
anno <- bind_rows(anno, anno_2)
# Save as RData
save(newdata_1, lsm, anno, file = "data/processed/nonsus.large.dens.mod.RData")
# PLOT ALL RESULTS FOR LARGE CORALS
ggplot(data = newdata_1, aes(x = dist, y = fit, color = timepoint)) +
facet_grid(dir ~ reef, labeller = global_labeller) +
geom_line() +
scale_x_continuous(limits = c(0, 500)) +
#scale_y_continuous(limits = c(0.2, 1.6), expand = c(0,0)) +
geom_ribbon(aes(ymin = lci, ymax = uci), linetype = 0, alpha = 0.2) +
geom_segment(data = anno, inherit.aes = FALSE,
aes(x = dist, y = `pre-2014`, xend = dist, yend = `post-2014`),
arrow = arrow(length = unit(0.03, "npc"))) +
geom_text(data = anno, inherit.aes = FALSE, hjust = "left",
aes(x = dist, y = `post-2014` - 0.1, label = paste0(rel2, sig2))) +
geom_point(data = lsm, aes(x = dist, y = rate), alpha = 0.5) +
geom_errorbar(data = lsm, aes(x = dist, y = rate, ymin = asymp.LCL, ymax = asymp.UCL)) +
labs(x = "Distance from channel (m)", y = "Number of corals ≥ 3cm per m2", color = "") +
scale_color_manual(labels = c("Before dredging", "After dredging"), values = c("#00BFC4", "#F8766D")) +
theme_custom() +
theme(legend.position = c(0.1, 0.9),
legend.key.size = unit(5, "mm"),
legend.text = element_text(size = 7),
axis.text.x = element_text(angle=0, hjust=0.5, vjust = 1))
ggsave(filename = "figures/FigS6.png", device = "png", width = 190, height = 95, units = "mm")
Transects at increasing distances from channel were recorded in 2010 baseline surveys and 2016/2017 cross-site surveys, when small corals were recorded.
# Subset 2010 baseline and 2016/2017 cross-site survey data <1000 m from channel
small_counts <- scler %>% # Select dataset that includes all corals, including small
filter(year %in% c(2010, 2016, 2017) & dist < 1000) %>%
filter(diameter < 3) %>% # ONLY 1-2cm CORALS
group_by(reef, dir, channel, site, site2, dist, date, year, transect, transect_area_m2, perm) %>%
summarise(n = sum(count)) %>%
mutate(dens = n / transect_area_m2,
timepoint = cut(year, breaks = c(-Inf, 2011, Inf),
labels = c("2010 baseline", "2016/2017 cross-site surveys"))) %>%
ungroup()
# Plot density of corals for each transect
ggplot(small_counts, aes(x = dist, y = dens, color = timepoint)) +
facet_grid(dir ~ reef) +
geom_point() +
labs(y = "Small corals / m2", x = "Distance from channel")
small_counts.f <- small_counts
# Fit model
mod_3 <- glmer(n ~ dist * reef * dir * timepoint + # count as response
offset(log(transect_area_m2)) + # offset accounts for diff transect sizes
(1|site/site2/transect) + (1|date), # random effects
data = small_counts.f, family = "poisson") # poisson model
# Get fitted responses
newdata_2 <- small_counts.f %>%
tidyr::expand(nesting(reef, dir, timepoint), dist = 1:500,
transect_area_m2 = 1) # transect area of 1 m2 will predict count per m2
newdata_2$fit <- predict(mod_3, newdata_2, type = "response", re.form = NA)
# Get confidence intervals by simulation
set.seed(9014)
bootfit_2 <- bootMer(mod_3, FUN = function(x) predict(x, newdata_2, type = "response", re.form = NA),
nsim = 400, parallel = "multicore", ncpus = 8)
newdata_2$lci <- apply(bootfit_2$t, 2, quantile, 0.025)
newdata_2$uci <- apply(bootfit_2$t, 2, quantile, 0.975)
# Subset areas where sufficient data exist to model based on distance from channel
newdata_2 <- filter(newdata_2, reef == "R2" | (reef == "R3" & dir == "N") | (reef == "R3" & dir == "S" & timepoint == "2010 baseline"))
# Plot raw data and fitted results
ggplot(newdata_2, aes(x = dist, y = fit, color = timepoint)) +
facet_grid(dir ~ reef) +
geom_ribbon(aes(ymin = lci, ymax = uci, linetype = NA), alpha = 0.2) +
geom_line() +
geom_point(data = small_counts, aes(y = dens)) +
labs(x = "Distance from channel", y = "# corals per m2")
# Get fitted value of coral density adjacent to channel (i.e., dist = 20 m)
lsm20 <- lsmeans(mod_3, specs = c("reef", "dir", "timepoint"),
at = list(dist = 20, transect_area_m2 = 1), transform = "response")
# Get fitted value of coral density away from channel (i.e., dist = 300 m)
lsm300 <- lsmeans(mod_3, specs = c("reef", "dir", "timepoint"),
at = list(dist = 300, transect_area_m2 = 1), transform = "response")
# Gather fitted values for annotating plot
fit20 <- summary(lsm20) %>%
mutate(dist = 20) %>%
select(reef, dir, timepoint, rate, dist) %>%
spread(timepoint, rate)
fit300 <- summary(lsm300) %>%
mutate(dist = 300) %>%
select(reef, dir, timepoint, rate, dist) %>%
spread(timepoint, rate)
fits <- as_tibble(bind_rows(fit20, fit300))
# Test for significant differences by timepoint for fitted values
diff20 <- rbind(pairs(lsm20, by = c("reef", "dir")), adjust = "none")
diff300 <- rbind(pairs(lsm300, by = c("reef", "dir")), adjust = "none")
# Gather fitted values for annotating plot
sig20 <- summary(diff20) %>%
mutate(dist = 20) %>%
select(reef, dir, estimate, p.value, dist)
sig300 <- summary(diff300) %>%
mutate(dist = 300) %>%
select(reef, dir, estimate, p.value, dist)
sigs <- as_tibble(bind_rows(sig20, sig300))
#anno <- full_join(fits, sigs)
anno <- full_join(fits, sigs) %>%
mutate(rel = -(estimate) / `2010 baseline` * 100,
rel2 = ifelse(p.value > 0.1, "", paste0(round(rel, 1), "%")),
sig = gtools::stars.pval(p.value),
sig2 = gsub(" ", "ns", sig))
anno <- anno %>%
mutate(`2010 baseline` = ifelse((reef == "HB" | (reef == "R3" & dir == "S")), NA, `2010 baseline`),
`2016/2017 cross-site surveys` = ifelse(reef == "HB", NA, `2016/2017 cross-site surveys`),
sig2 = ifelse((reef == "HB" | (reef == "R3" & dir == "S")), "", sig2))
# Subset 2: areas where data are only available close to channel
small_counts_2 <- small_counts %>%
filter(dist < 50) %>%
filter(reef == "HB" | (reef == "R3" & dir == "S")) %>%
group_by(reef, dir) %>%
mutate(med = median(dist))
ggplot(small_counts_2, aes(x = dist, y = dens, color = timepoint)) +
facet_grid(dir ~ reef) +
geom_point() +
labs(y = "Small corals / m2", x = "Distance from channel")
mod_2 <- glmer(n ~ reef * dir * timepoint + # count as response
offset(log(transect_area_m2)) + # offset accounts for diff transect sizes
(1|site/site2/transect) + (1|date), # random effects
data = small_counts_2, family = "poisson")
lsm <- lsmeans(mod_2, specs = c("reef", "dir", "timepoint"),
at = list(transect_area_m2 = 1), transform = "response")
sig <- rbind(pairs(lsm, by = c("reef", "dir")), adjust = "none")
sig <- summary(sig) %>%
mutate(dist = 25) %>%
dplyr::select(reef, dir, estimate, p.value, dist) %>% drop_na()
lsm <- summary(lsm, level = 0.84) %>%
mutate(dist = 25)
lsm2 <- lsm %>%
dplyr::select(reef, dir, timepoint, rate, dist) %>%
spread(timepoint, rate)
# Don't plot point for R3S pre, because already fitted with line
lsm <- lsm[-4,]
# Don't plot for HB before because no data
lsm <- lsm[-(c(1, 3)),]
anno_2 <- full_join(lsm2, sig) %>%
mutate(rel = -(estimate) / `2010 baseline` * 100,
rel2 = ifelse(p.value > 0.1, "", paste0(round(rel, 1), "%")),
sig = gtools::stars.pval(p.value),
sig2 = gsub(" ", "ns", sig))
anno <- bind_rows(drop_na(anno), anno_2)
anno <- drop_na(anno)
# PLOT ALL RESULTS FOR SMALL CORALS
ggplot(data = newdata_2, aes(x = dist, y = fit, color = timepoint)) +
facet_grid(dir ~ reef, labeller = global_labeller) +
geom_line() +
scale_x_continuous(limits = c(0, 500)) +
#scale_y_continuous(limits = c(0.2, 1.6), expand = c(0,0)) +
geom_ribbon(aes(ymin = lci, ymax = uci), linetype = 0, alpha = 0.2) +
geom_segment(data = anno, inherit.aes = FALSE,
aes(x = dist, y = `2010 baseline`, xend = dist, yend = `2016/2017 cross-site surveys`),
arrow = arrow(length = unit(0.03, "npc"))) +
geom_text(data = anno, inherit.aes = FALSE, hjust = "left",
aes(x = dist, y = `2016/2017 cross-site surveys` - 0.1, label = paste0(rel2, sig2))) +
geom_point(data = lsm, aes(x = dist, y = rate), alpha = 0.5) +
geom_errorbar(data = lsm, aes(x = dist, y = rate, ymin = asymp.LCL, ymax = asymp.UCL)) +
labs(x = "Distance from channel (m)", y = "Number of corals ≥ 3cm per m2", color = "") +
scale_color_manual(labels = c("Before dredging", "After dredging"), values = c("#00BFC4", "#F8766D")) +
theme_custom() +
theme(legend.position = c(0.1, 0.9),
legend.key.size = unit(5, "mm"),
legend.text = element_text(size = 7),
axis.text.x = element_text(angle=0, hjust=0.5, vjust = 1))
ggsave(filename = "figures/FigS7.png", device = "png", width = 190, height = 95, units = "mm")
save(newdata_2, lsm, anno, file = "data/processed/nonsus.small.dens.mod.RData")
Look at tiny corals (< 3cm) abundance before and after dredging. Corals <3cm in diameter were only recorded in 2010 (within 500 m of channel) and in 2016/2017 at cross-sites and permanent monitoring sites. The surveys that are most comparable are those within 100m of channel (see DCA cross site report page 2). Unfortunately, there are no “control” locations at which we can compare size frequency distributions. We currently don’t have HB data from 2010, so we can only compare R2 and R3 locations.
### Look at map on page 2 of october 2017 cross_impact report
##### Reef 2 N:: 2010 transects below 100 m are all same general area as R2N1
##### Reef 2 S:: 2010 transects below 100 m are all same general area as R2S1
##### Reef 3 N:: 2010 transects below 100 m are all same general area as R3N1
##### Reef 3 S:: 2010 transects LR are all same general area as R3S2
# Filter: 2010 and 2016/2017, only dist < 100 m from channel
df <- filter(scler, year %in% c(2010, 2016, 2017),
dist <= 100,
site %in% c("R2N", "R2S", "R3N", "R3S", "R2N1", "R2S1", "R3N1", "R3S2")) %>%
mutate(timepoint = cut(year, breaks = c(-Inf, 2011, Inf),
labels = c("2010 baseline", "2016/2017 cross-site surveys")))
# Kruskal-Wallis non-parametric test for difference in mean
kwtest <- df %>%
nest(-reef, -dir) %>%
mutate(kw = map(data, ~ kruskal.test(diameter ~ timepoint, data = .)),
p = map_dbl(kw, ~ .$p.value))
kwtest <- kwtest %>% mutate(x = 15, y = 0.1)
save("df", "kwtest", file = "data/processed/nonsus.sizefreq.RData")
# Plot size frequency distributions and annotate with KW test p values
ggplot(df, aes(x = diameter, fill = year <= 2010)) +
geom_histogram(aes(y = ..density..), # Scales each histogram so the total area = 1
position = "identity", binwidth = 1,
alpha = 0.3) +
facet_grid(dir ~ reef) +
labs(title = paste("Coral size frequency within", max(df$dist), "m of channel"),
x = "Coral diameter (cm)", y = "Proportion of individuals") +
scale_fill_manual(labels = c("after dredging", "before dredging"), values = c("#F8766D", "#00BFC4")) +
theme(legend.title = element_blank(),
legend.position = c(0.3, 0.9),
legend.background = element_blank()) +
xlim(0, 20) +
geom_text(data = kwtest, aes(fill = NULL, x = x, y = y,
label = paste("p =", formatC(p, format = "e", digits = 2))))
df %>%
group_by(reef, dir, timepoint) %>%
summarise(med = median(diameter),
mean = mean(diameter))
## # A tibble: 8 x 5
## # Groups: reef, dir [?]
## reef dir timepoint med mean
## <chr> <chr> <fct> <dbl> <dbl>
## 1 R2 N 2010 baseline 2 3.84
## 2 R2 N 2016/2017 cross-site surveys 4 5.36
## 3 R2 S 2010 baseline 3 3.79
## 4 R2 S 2016/2017 cross-site surveys 5 6.09
## 5 R3 N 2010 baseline 7 8.83
## 6 R3 N 2016/2017 cross-site surveys 10 9.51
## 7 R3 S 2010 baseline 4 4.71
## 8 R3 S 2016/2017 cross-site surveys 5 6.69