Setup

# Load packages
library(tidyverse)
library(lme4)
library(lsmeans)
library(janitor)

Import and tidy cross site sediment depth data

# find all file names ending in .xlsx 
files <- list.files(path = "data/sediment_depth/2016 cross-transect data/Entered Data/",
                    pattern = "^[^~].*.xlsx", recursive = TRUE, full.names = TRUE)
# remove files manually if multiple files for a given transect
files <- files[-c(24,25,28,30,32,33,36)]


# Read in SUBSTRATE sheet from each xlsx file
substrate <- files %>%
  map(readxl::read_xlsx, sheet = "SUBSTRATE", col_types = "text") %>%    # read in all files individually
  reduce(bind_rows)

# Tidy data
substrate <- substrate %>%
  clean_names() %>%
  rename(site0 = station,                                         # Rename columns
         transect = transect_ns_or_ew,
         sdepth_cm = sediment_depth_cm_excavation_depth,
         stype = type_of_sediments_sand_fines_mix) %>%
  filter(!is.na(site0)) %>%                   # Filter out blank rows without site name
  mutate(site0 = str_replace(site0, "R2N1-350", "R2N-350-RR")) %>%   # Fix error in site coding
  mutate(meter_mark = if_else(is.na(actual_position_if_altered),  # Get meter mark, actual pos if altered
                              true = position_meter_mark,
                              false = actual_position_if_altered)) %>%
  select(site0, transect, meter_mark, sdepth_cm, stype) %>%
  mutate(site = case_when(grepl("15b|15B", site0) ~ "HBS", 
                          grepl("7a", site0) ~ "HBN",
                          TRUE ~ str_extract(site0, "^.[^-|_]*")),                   # Get base site name
         perm = if_else(nchar(site) == 3, "no", "yes")) %>%       # Identify permanent sites vs. non
  mutate(dist = case_when(site0 == "HBS-15b-150" ~ "150",
                          site0 == "15B-250" ~ "250",
                          perm == "no" ~ str_extract(site0, "[[:digit:]]{2,3}")),
         dist = ifelse(is.na(dist), case_when(  
           site=="HBN3" ~ "48", site=="HBNC1" ~ "2350", site=="HBS4" ~ "32", site=="HBSC1" ~ "1650",
           site=="R2N1" ~ "28", site=="R2NC2" ~ "9380", site=="R2N2" ~ "18",
           site=="R2NC1" ~ "9380", site=="R2NC3" ~ "9380", site=="R2S1" ~ "23",
           site=="R2SC1" ~ "1270", site=="R2S2" ~ "21", site=="R2SC2" ~ "1270",
           site=="R3N1" ~ "23", site=="R3NC1" ~ "9380", site=="R3S2" ~ "21",
           site=="R3SC2" ~ "1300", site=="R3S1" ~ "30", site=="R3S3" ~ "30",
           site=="R3SC3" ~ "1300", site=="R3SC1" ~ "1300"), dist)) %>%
  mutate(reef = str_sub(site, 1, 2), 
        reef2 = case_when(reef == "HB" ~ "",
                          TRUE ~ str_extract(site0, "[^-|_]R$")),
          dir = str_sub(site, 3, 3),
      channel = case_when(perm == "yes" ~ if_else(grepl("C", site), "control", "channelside")))

# Recode columns
substrate <- substrate %>%
  mutate(sdepth_cm = gsub(">", "", sdepth_cm)) %>%          # remove greater-than sign          
  mutate(sdepth_cm = as.numeric(sdepth_cm),
              dist = as.numeric(dist),
        meter_mark = as.numeric(meter_mark)) %>%
  mutate_if(is.character, as.factor)

Compute actual distance from channel of each sediment measurement

# Site layouts are described in cross-site survey report, page 22.
substrate <- substrate %>%
  mutate(dist_actual = case_when(
    transect == "EW" ~ dist,
    transect == "NS" ~ dist + meter_mark - 25))
save(substrate, file = "data/processed/sediment_depth.RData")

Dataset metrics

# Total number of sediment depth measurements
nmeas <- substrate %>%
  filter(dist <= 1000) %>%
  group_by(reef, dir) %>%
  count()

# Number of transects performed in each area within 1000m from channel
ntrans <- substrate %>%
  filter(dist <= 1000) %>%
  group_by(reef, dir) %>%
  distinct(site0, transect) %>%
  summarise(ntransect = n())

# Max dist from channel
maxdist <- substrate %>%
  filter(dist <= 1000) %>%
  group_by(reef, dir) %>%
  summarise(maxdist = max(dist))

# Print results
full_join(full_join(nmeas, ntrans), maxdist)
## # A tibble: 6 x 5
## # Groups:   reef, dir [?]
##   reef  dir       n ntransect maxdist
##   <fct> <fct> <int>     <int>   <dbl>
## 1 HB    N       204         4     150
## 2 HB    S       306         6     250
## 3 R2    N      2040        40     875
## 4 R2    S      1020        20     400
## 5 R3    N       714        14     500
## 6 R3    S       306         6     350