Import coral count data

# Import tidy count data
load("data/processed/all_counts.RData")

# Subset Scleractinian count data
scler <- filter(all_counts, category == "scleractinian") %>%      # Get scleractinians only
  filter(year %in% c(2010, 2013, 2016, 2017)) %>%                 # Get only before dredging (2010, 2013) and 2y postcon (2016, 2017)
  dplyr::select(reef, dir, channel, site, site2, dist, year, date,       # Reorder columns
                transect, transect_area_m2, genus, species, diameter, count, perm)

# Subset disease resistant species only
resistant.genus <- c("Acropora", "Agaricia", "Madracis", "Mycetophyllia", "PDIV", "Porites", "Siderastrea", "Stephanocoenia")

# Subset only Porited
#resistant.genus <- c("Porites", "PDIV")

# Subset only disease-resistant corals
scler <- filter(scler, genus %in% resistant.genus)

Coral size distributions: were small corals recorded?

ggplot(scler, aes(x = diameter)) + 
  geom_histogram(binwidth = 1) + 
  facet_wrap(~ year) +
  scale_x_continuous(limits = c(0, 5))

Small corals (< 3cm) were recorded, but only in 2010, 2016, and 2017. Must filter out corals < 3 cm in order to compare data from all years.

Create subsetted dataset without small corals (< 3 cm)

## Many entries also do not include a size (diameter = NA)
## For coral density analysis, assume these are bigger than 3 cm and keep them
scler_3 <- scler %>%
  filter(diameter >= 3 | is.na(diameter))

Analyze coral density as a function of distance from channel

Decreases in coral density could have resulted from mortality due to disease, or mortality due to dredging sedimentation. Disease impacts should not show a relationship to distance from the dredging channel, but dredge impacts should. Therefore, analyzing changes in density as a function of distance from channel is one way to get at impacts due to dredging.

Change in density of larger corals (>3cm) – all pre-dredging vs. post-dredging count data

The full coral count dataset includes permanent sites and non-permanent sites, with data from 2010, 2013, 2015, and 2016/2017. Since corals <3 cm were not recorded in 2013 and 2015 surveys, we need to use the dataset excluding <3 cm corals.

# Sum number of large corals counted on each transect
large_counts <- scler_3 %>%                # Use dataset that omits small corals
  filter(dist < 1000) %>%                  # Use data within 1000m of the channel
  group_by(reef, dir, channel, site, site2, dist, date, year, transect, transect_area_m2, perm) %>%
  summarise(n = sum(count)) %>%
  mutate(dens = n / transect_area_m2,
         timepoint = cut(year, breaks = c(-Inf, 2014, Inf), 
                         labels = c("pre-2014", "post-2014"))) %>%
  ungroup()

# Plot density of corals for each transect
ggplot(large_counts, aes(x = dist, y = dens, color = timepoint)) +
  facet_grid(dir ~ reef) +
  geom_point() +
  labs(y = "Corals / m2", x = "Distance from channel")

Reef 2 south post-dredging timepoint has an apparent outlier at 400m from channel. The transect was performed on 2017-03-29

# Subset data for only reef 2 south
r2s <- filter(large_counts, reef == "R2", dir == "S", timepoint == "post-2014")

# Fit a linear model with all points and calculate Cook's distance (cannot calculate Cook's for poisson glm)
mod2 <- lm(n/transect_area_m2 ~ dist, data = r2s) 
nd2 <- crossing(dist = r2s$dist, transect_area_m2 = 1)
nd2$fit <- predict(mod2, newdata = nd2, re.form = NA, type = "response")
# Plot fitted lm
ggplot(r2s, aes(x = dist, y = dens, color = timepoint)) +
  geom_point() +
  geom_line(data = nd2, aes(x = dist, y = fit), inherit.aes = FALSE)

# Plot cook's distance ~ distance
plot(cooks.distance(mod2) ~ r2s$dist, xlab = "Distance from channel", ylab = "Cook's Distance")
text(r2s$dist - 30, cooks.distance(mod2), 
     labels = ifelse(cooks.distance(mod2) > 0.2, round(cooks.distance(mod2), 2), ""))

# Remove outlier point from R2S (it was the only data point from 2017-03-29)
large_counts <- filter(large_counts, date != "2017-03-29") 

Fit Poisson regression on count data

# Subset 1: locations with enough data at different distances from channel
large_counts_1 <- large_counts %>%
  # only use where sufficient data exist to test dist
  filter(reef == "R2" | (reef == "R3" & dir == "N") | (reef == "R3" & dir == "S" & timepoint == "pre-2014"))    

# Fit model with distance from channel as predictor
mod_1 <- glmer(n ~ dist * reef:dir * timepoint +      # count as response
               offset(log(transect_area_m2)) +          # offset accounts for diff transect sizes
               (1|site/site2/transect) + (1|date),            # random effects
               data = large_counts_1, family = "poisson")           # poisson model

# Get fitted responses
newdata_1 <- large_counts_1 %>%
  tidyr::expand(nesting(reef, dir, timepoint, dist), 
                transect_area_m2 = 1)        # transect area of 1 m2 will predict count per m2
newdata_1$fit <- predict(mod_1, newdata_1, type = "response", re.form = NA)

# Get confidence intervals by simulation
set.seed(9870)
bootfit_1 <- bootMer(mod_1, FUN = function(x) predict(x, newdata_1, type = "response", re.form = NA), 
                     nsim = 400, parallel = "multicore", ncpus = 8)
newdata_1$lci <- apply(bootfit_1$t, 2, quantile, 0.025)
newdata_1$uci <- apply(bootfit_1$t, 2, quantile, 0.975)

# Get fitted value of coral density adjacent to channel (i.e., dist = 20 m)
lsm20 <- lsmeans(mod_1, specs = c("reef", "dir", "timepoint"),
                 at = list(dist = 20, transect_area_m2 = 1), transform = "response")

# Get fitted value of coral density away from channel (i.e., dist = 300 m)
lsm300 <- lsmeans(mod_1, specs = c("reef", "dir", "timepoint"), 
                  at = list(dist = 300, transect_area_m2 = 1), transform = "response")

# Gather fitted values for annotating plot
fit20 <- summary(lsm20) %>%
  mutate(dist = 20) %>%
  dplyr::select(reef, dir, timepoint, rate, dist) %>%
  spread(timepoint, rate)
fit300 <- summary(lsm300) %>%
  mutate(dist = 300) %>%
  dplyr::select(reef, dir, timepoint, rate, dist) %>%
  spread(timepoint, rate)
fits <- as_tibble(bind_rows(fit20, fit300))

# Test for significant differences by timepoint for fitted values
diff20 <- rbind(pairs(lsm20, by = c("reef", "dir")), adjust = "none")
diff300 <- rbind(pairs(lsm300, by = c("reef", "dir")), adjust = "none")

# Gather fitted values for annotating plot
sig20 <- summary(diff20) %>%
  mutate(dist = 20) %>%
  dplyr::select(reef, dir, estimate, p.value, dist)
sig300 <- summary(diff300) %>%
  mutate(dist = 300) %>%
  dplyr::select(reef, dir, estimate, p.value, dist)
sigs <- as_tibble(bind_rows(sig20, sig300))

# Generate data frame to use to annotate the plot
anno <- full_join(fits, sigs) %>%
  mutate(rel = -(estimate) / `pre-2014` * 100,
         rel2 = ifelse(p.value > 0.1, "", paste0(round(rel, 1), "%")),
         sig = gtools::stars.pval(p.value),
         sig2 = gsub(" ", "ns", sig))

# Subset 2: areas where data are only available close to channel
large_counts_2 <- large_counts %>%
  filter(dist < 50) %>%
  filter(reef == "HB" | (reef == "R3" & dir == "S")) %>% # & timepoint == "post-2014")) %>%
  group_by(reef, dir) %>%
  mutate(med = median(dist))

mod_2 <- glmer(n ~ reef * dir * timepoint +      # count as response
               offset(log(transect_area_m2)) +          # offset accounts for diff transect sizes
               (1|site/site2/transect) + (1|date),            # random effects
               data = large_counts_2, family = "poisson")
lsm <- lsmeans(mod_2, specs = c("reef", "dir", "timepoint"), 
               at = list(transect_area_m2 = 1), transform = "response")
sig <- rbind(pairs(lsm, by = c("reef", "dir")), adjust = "none")
sig <- summary(sig) %>%
  mutate(dist = 25) %>%
  dplyr::select(reef, dir, estimate, p.value, dist) %>% drop_na()
lsm <- summary(lsm, level = 0.84) %>%
  mutate(dist = 25)
lsm2 <- lsm %>%
  dplyr::select(reef, dir, timepoint, rate, dist) %>%
  spread(timepoint, rate)
# Don't plot point for R3S pre, because already fitted with line
lsm <- lsm[-4,]
anno_2 <- full_join(lsm2, sig) %>%
    mutate(rel = -(estimate) / `pre-2014` * 100,
         rel2 = ifelse(p.value > 0.1, "", paste0(round(rel, 1), "%")),
         sig = gtools::stars.pval(p.value),
         sig2 = gsub(" ", "ns", sig))

anno <- bind_rows(anno, anno_2)

# Save as RData
save(newdata_1, lsm, anno, file = "data/processed/nonsus.large.dens.mod.RData")

# PLOT ALL RESULTS FOR LARGE CORALS
ggplot(data = newdata_1, aes(x = dist, y = fit, color = timepoint)) +
  facet_grid(dir ~ reef, labeller = global_labeller) +
  geom_line() +
  scale_x_continuous(limits = c(0, 500)) +
  #scale_y_continuous(limits = c(0.2, 1.6), expand = c(0,0)) +
  geom_ribbon(aes(ymin = lci, ymax = uci), linetype = 0, alpha = 0.2) +
  geom_segment(data = anno, inherit.aes = FALSE,
               aes(x = dist, y = `pre-2014`, xend = dist, yend = `post-2014`), 
               arrow = arrow(length = unit(0.03, "npc"))) +
  geom_text(data = anno, inherit.aes = FALSE, hjust = "left",
            aes(x = dist, y = `post-2014` - 0.1, label = paste0(rel2, sig2))) +
  geom_point(data = lsm, aes(x = dist, y = rate), alpha = 0.5) +
  geom_errorbar(data = lsm, aes(x = dist, y = rate, ymin = asymp.LCL, ymax = asymp.UCL)) +
  labs(x = "Distance from channel (m)", y = "Number of corals ≥ 3cm per m2", color = "") +
  scale_color_manual(labels = c("Before dredging", "After dredging"), values = c("#00BFC4", "#F8766D")) +
  theme_custom() +
  theme(legend.position = c(0.1, 0.9),
        legend.key.size = unit(5, "mm"),
        legend.text = element_text(size = 7),
        axis.text.x = element_text(angle=0, hjust=0.5, vjust = 1))

ggsave(filename = "figures/FigS6.png", device = "png", width = 190, height = 95, units = "mm")

Change in density of small corals (1-2cm) (2010 vs. 2016-2017)

Transects at increasing distances from channel were recorded in 2010 baseline surveys and 2016/2017 cross-site surveys, when small corals were recorded.

# Subset 2010 baseline and 2016/2017 cross-site survey data <1000 m from channel
small_counts <- scler %>%                # Select dataset that includes all corals, including small
  filter(year %in% c(2010, 2016, 2017) & dist < 1000) %>%
  filter(diameter < 3) %>%   # ONLY 1-2cm CORALS
  group_by(reef, dir, channel, site, site2, dist, date, year, transect, transect_area_m2, perm) %>%
  summarise(n = sum(count)) %>%
  mutate(dens = n / transect_area_m2,
         timepoint = cut(year, breaks = c(-Inf, 2011, Inf), 
                         labels = c("2010 baseline", "2016/2017 cross-site surveys"))) %>%
  ungroup()

# Plot density of corals for each transect
ggplot(small_counts, aes(x = dist, y = dens, color = timepoint)) +
  facet_grid(dir ~ reef) +
  geom_point() +
  labs(y = "Small corals / m2", x = "Distance from channel")

Fit poisson regression on count data

small_counts.f <- small_counts 
# Fit model
mod_3 <- glmer(n ~ dist * reef * dir * timepoint +      # count as response
               offset(log(transect_area_m2)) +          # offset accounts for diff transect sizes
               (1|site/site2/transect) + (1|date),            # random effects
               data = small_counts.f, family = "poisson")           # poisson model

# Get fitted responses
newdata_2 <- small_counts.f %>%
  tidyr::expand(nesting(reef, dir, timepoint), dist = 1:500, 
                transect_area_m2 = 1)        # transect area of 1 m2 will predict count per m2
newdata_2$fit <- predict(mod_3, newdata_2, type = "response", re.form = NA)

# Get confidence intervals by simulation
set.seed(9014)
bootfit_2 <- bootMer(mod_3, FUN = function(x) predict(x, newdata_2, type = "response", re.form = NA), 
                     nsim = 400, parallel = "multicore", ncpus = 8)
newdata_2$lci <- apply(bootfit_2$t, 2, quantile, 0.025)
newdata_2$uci <- apply(bootfit_2$t, 2, quantile, 0.975) 

Plot fits and test for change in density near channel

# Subset areas where sufficient data exist to model based on distance from channel
newdata_2 <- filter(newdata_2, reef == "R2" | (reef == "R3" & dir == "N") | (reef == "R3" & dir == "S" & timepoint == "2010 baseline"))
# Plot raw data and fitted results
ggplot(newdata_2, aes(x = dist, y = fit, color = timepoint)) +
  facet_grid(dir ~ reef) +
  geom_ribbon(aes(ymin = lci, ymax = uci, linetype = NA), alpha = 0.2) +
  geom_line() +
  geom_point(data = small_counts, aes(y = dens)) +
  labs(x = "Distance from channel", y = "# corals per m2")

# Get fitted value of coral density adjacent to channel (i.e., dist = 20 m)
lsm20 <- lsmeans(mod_3, specs = c("reef", "dir", "timepoint"),
                 at = list(dist = 20, transect_area_m2 = 1), transform = "response")

# Get fitted value of coral density away from channel (i.e., dist = 300 m)
lsm300 <- lsmeans(mod_3, specs = c("reef", "dir", "timepoint"), 
                  at = list(dist = 300, transect_area_m2 = 1), transform = "response")

# Gather fitted values for annotating plot
fit20 <- summary(lsm20) %>%
  mutate(dist = 20) %>%
  select(reef, dir, timepoint, rate, dist) %>%
  spread(timepoint, rate)
fit300 <- summary(lsm300) %>%
  mutate(dist = 300) %>%
  select(reef, dir, timepoint, rate, dist) %>%
  spread(timepoint, rate)
fits <- as_tibble(bind_rows(fit20, fit300))

# Test for significant differences by timepoint for fitted values
diff20 <- rbind(pairs(lsm20, by = c("reef", "dir")), adjust = "none")
diff300 <- rbind(pairs(lsm300, by = c("reef", "dir")), adjust = "none")

# Gather fitted values for annotating plot
sig20 <- summary(diff20) %>%
  mutate(dist = 20) %>%
  select(reef, dir, estimate, p.value, dist)
sig300 <- summary(diff300) %>%
  mutate(dist = 300) %>%
  select(reef, dir, estimate, p.value, dist)
sigs <- as_tibble(bind_rows(sig20, sig300))

#anno <- full_join(fits, sigs)
anno <- full_join(fits, sigs) %>%
  mutate(rel = -(estimate) / `2010 baseline` * 100,
         rel2 = ifelse(p.value > 0.1, "", paste0(round(rel, 1), "%")),
         sig = gtools::stars.pval(p.value),
         sig2 = gsub(" ", "ns", sig))

anno <- anno %>%
  mutate(`2010 baseline` = ifelse((reef == "HB" | (reef == "R3" & dir == "S")), NA, `2010 baseline`),
         `2016/2017 cross-site surveys` = ifelse(reef == "HB", NA, `2016/2017 cross-site surveys`),
         sig2 = ifelse((reef == "HB" | (reef == "R3" & dir == "S")), "", sig2))

# Subset 2: areas where data are only available close to channel
small_counts_2 <- small_counts %>%
  filter(dist < 50) %>%
  filter(reef == "HB" | (reef == "R3" & dir == "S")) %>%
  group_by(reef, dir) %>%
  mutate(med = median(dist))
ggplot(small_counts_2, aes(x = dist, y = dens, color = timepoint)) +
  facet_grid(dir ~ reef) +
  geom_point() +
  labs(y = "Small corals / m2", x = "Distance from channel")

mod_2 <- glmer(n ~ reef * dir * timepoint +      # count as response
               offset(log(transect_area_m2)) +          # offset accounts for diff transect sizes
               (1|site/site2/transect) + (1|date),            # random effects
               data = small_counts_2, family = "poisson")
lsm <- lsmeans(mod_2, specs = c("reef", "dir", "timepoint"), 
               at = list(transect_area_m2 = 1), transform = "response")
sig <- rbind(pairs(lsm, by = c("reef", "dir")), adjust = "none")
sig <- summary(sig) %>%
  mutate(dist = 25) %>%
  dplyr::select(reef, dir, estimate, p.value, dist) %>% drop_na()
lsm <- summary(lsm, level = 0.84) %>%
  mutate(dist = 25)
lsm2 <- lsm %>%
  dplyr::select(reef, dir, timepoint, rate, dist) %>%
  spread(timepoint, rate)
# Don't plot point for R3S pre, because already fitted with line
lsm <- lsm[-4,]
# Don't plot for HB before because no data
lsm <- lsm[-(c(1, 3)),]

anno_2 <- full_join(lsm2, sig) %>%
    mutate(rel = -(estimate) / `2010 baseline` * 100,
         rel2 = ifelse(p.value > 0.1, "", paste0(round(rel, 1), "%")),
         sig = gtools::stars.pval(p.value),
         sig2 = gsub(" ", "ns", sig))

anno <- bind_rows(drop_na(anno), anno_2)
anno <- drop_na(anno)


# PLOT ALL RESULTS FOR SMALL CORALS
ggplot(data = newdata_2, aes(x = dist, y = fit, color = timepoint)) +
  facet_grid(dir ~ reef, labeller = global_labeller) +
  geom_line() +
  scale_x_continuous(limits = c(0, 500)) +
  #scale_y_continuous(limits = c(0.2, 1.6), expand = c(0,0)) +
  geom_ribbon(aes(ymin = lci, ymax = uci), linetype = 0, alpha = 0.2) +
  geom_segment(data = anno, inherit.aes = FALSE,
               aes(x = dist, y = `2010 baseline`, xend = dist, yend = `2016/2017 cross-site surveys`), 
               arrow = arrow(length = unit(0.03, "npc"))) +
  geom_text(data = anno, inherit.aes = FALSE, hjust = "left",
            aes(x = dist, y = `2016/2017 cross-site surveys` - 0.1, label = paste0(rel2, sig2))) +
  geom_point(data = lsm, aes(x = dist, y = rate), alpha = 0.5) +
  geom_errorbar(data = lsm, aes(x = dist, y = rate, ymin = asymp.LCL, ymax = asymp.UCL)) +
  labs(x = "Distance from channel (m)", y = "Number of corals ≥ 3cm per m2", color = "") +
  scale_color_manual(labels = c("Before dredging", "After dredging"), values = c("#00BFC4", "#F8766D")) +
  theme_custom() +
  theme(legend.position = c(0.1, 0.9),
        legend.key.size = unit(5, "mm"),
        legend.text = element_text(size = 7),
        axis.text.x = element_text(angle=0, hjust=0.5, vjust = 1))

ggsave(filename = "figures/FigS7.png", device = "png", width = 190, height = 95, units = "mm")
  
save(newdata_2, lsm, anno, file = "data/processed/nonsus.small.dens.mod.RData")

Analyze coral size frequency distributions

Look at tiny corals (< 3cm) abundance before and after dredging. Corals <3cm in diameter were only recorded in 2010 (within 500 m of channel) and in 2016/2017 at cross-sites and permanent monitoring sites. The surveys that are most comparable are those within 100m of channel (see DCA cross site report page 2). Unfortunately, there are no “control” locations at which we can compare size frequency distributions. We currently don’t have HB data from 2010, so we can only compare R2 and R3 locations.

### Look at map on page 2 of october 2017 cross_impact report
##### Reef 2 N:: 2010 transects below 100 m are all same general area as R2N1
##### Reef 2 S:: 2010 transects below 100 m are all same general area as R2S1
##### Reef 3 N:: 2010 transects below 100 m are all same general area as R3N1
##### Reef 3 S:: 2010 transects LR are all same general area as R3S2

# Filter: 2010 and 2016/2017, only dist < 100 m from channel
df <- filter(scler, year %in% c(2010, 2016, 2017), 
             dist <= 100,
             site %in% c("R2N", "R2S", "R3N", "R3S", "R2N1", "R2S1", "R3N1", "R3S2")) %>%
  mutate(timepoint = cut(year, breaks = c(-Inf, 2011, Inf), 
                         labels = c("2010 baseline", "2016/2017 cross-site surveys")))

# Kruskal-Wallis non-parametric test for difference in mean
kwtest <- df %>%
  nest(-reef, -dir) %>%
  mutate(kw = map(data, ~ kruskal.test(diameter ~ timepoint, data = .)),
         p = map_dbl(kw, ~ .$p.value))

kwtest <- kwtest %>% mutate(x = 15, y = 0.1)

save("df", "kwtest", file = "data/processed/nonsus.sizefreq.RData")

# Plot size frequency distributions and annotate with KW test p values
ggplot(df, aes(x = diameter, fill = year <= 2010)) + 
  geom_histogram(aes(y = ..density..),    # Scales each histogram so the total area = 1
                 position = "identity", binwidth = 1,
                 alpha = 0.3) +
  facet_grid(dir ~ reef) +
  labs(title = paste("Coral size frequency within", max(df$dist), "m of channel"),
       x = "Coral diameter (cm)", y = "Proportion of individuals") +
  scale_fill_manual(labels = c("after dredging", "before dredging"), values = c("#F8766D", "#00BFC4")) +
  theme(legend.title = element_blank(),
        legend.position = c(0.3, 0.9),
        legend.background = element_blank()) +
  xlim(0, 20) +
  geom_text(data = kwtest, aes(fill = NULL, x = x, y = y, 
            label = paste("p =", formatC(p, format = "e", digits = 2))))

df %>%
  group_by(reef, dir, timepoint) %>%
  summarise(med = median(diameter),
            mean = mean(diameter))
## # A tibble: 8 x 5
## # Groups:   reef, dir [?]
##   reef  dir   timepoint                      med  mean
##   <chr> <chr> <fct>                        <dbl> <dbl>
## 1 R2    N     2010 baseline                    2  3.84
## 2 R2    N     2016/2017 cross-site surveys     4  5.36
## 3 R2    S     2010 baseline                    3  3.79
## 4 R2    S     2016/2017 cross-site surveys     5  6.09
## 5 R3    N     2010 baseline                    7  8.83
## 6 R3    N     2016/2017 cross-site surveys    10  9.51
## 7 R3    S     2010 baseline                    4  4.71
## 8 R3    S     2016/2017 cross-site surveys     5  6.69