Load sediment trap dates data

dates <- readxl::read_xlsx("data/sediment_traps/sed_trap_dates.xlsx",
                           col_names = FALSE)

dates <- dates %>%
  nest(-X__1) %>%
  mutate(d2 = map(data, ~ drop_na(as.data.frame(t(.x[,-1])))),
         d3 = map(d2, drop_na)) %>%
  unnest(d3)

dates <- dates %>%
  setNames(c("site", "start", "end")) %>%
  mutate(site = as_factor(site),
         start = as.Date(start),
         end = as.Date(end),
         duration = as.numeric(end - start)) %>%
  separate(site, into = c("site", "site2"))

Load baseline sediment trap data

# Import Baseline sediment tables and charts- reef 3 31114.xlsx - baseline
#### Manually added spaces between site and transect IDs in baseline tab of this spreadsheet where needed
df1 <- readxl::read_xlsx(
  path = "data/sediment_traps/Baseline sediment tables and charts- reef 3 31114.xlsx",
  sheet = "baseline",
  skip = 5, guess_max = 10, n_max = 38) # excludes one row where PVC was found in bottle...

df1 <- df1 %>% 
  select(4, 3, 8, 18) %>%
  setNames(c("sample", "end", "total_dry", "frac_fine")) %>%
  mutate(end = as.Date(end)) %>%
  separate(sample, into = c("site", "site2")) %>%
  mutate(site = toupper(site),
         site = recode(site, HBNC = "HBNC1"))

#####
# Import Baseline sediment tables and charts- reef 3 31114.xlsx - 02-06-14
df2 <- readxl::read_xlsx(
  path = "data/sediment_traps/Baseline sediment tables and charts- reef 3 31114.xlsx",
  sheet = "02-06-14",
  skip = 5, guess_max = 10, n_max = 38)

df2 <- df2 %>%
  select(1, 3, 6, 20) %>%
  setNames(c("sample", "end", "total_dry", "frac_fine")) %>%
  mutate(end = as.Date(end)) %>%
  separate(sample, into = c("site", "site2")) %>%
  mutate(site = toupper(site),
         site = recode(site, HBNC = "HBNC1"))

Load compliance sediment trap data

# Manual QC: Samples from compliance week 65 were entered in "29TH SET-0228" worksheet with a date of Jan 13, 2015, but should have been Feb 13, 2015 -- manually corrected in Excel.
# in "2ND SET" - 12/17/13 changed to 12/18/13
# in "4TH SET-0224" - 01/20/14 changed to 01/28/14
#### Dates are missing from trap data for samples labeled Compliance week 70 ("C70"). Based on data in trap dates spreadsheet, we believe the end date for these to be 03/22/15 - manually changed.


two_passes <- function(path, sheet = NULL) {
  first_pass <- read_excel(path = path, sheet = sheet)
  skip <- min(which(grepl("HB|Hb|R2|R3", first_pass[[1]])))
  message("For sheet '", if (is.null(sheet)) 1 else sheet,
          "' we'll skip ", skip, " rows.")  
  second_pass <- read_excel(path, sheet = sheet, skip = skip - 1, 
                            col_names = TRUE, col_types = NULL, guess_max = 15,
                            na = c("", "----"))
  second_pass <- second_pass %>% select(1, 
                                        ifelse(sheet == "2ND SET", 4, 3), 
                                        case_when(sheet == "2ND SET" ~ 8,
                                                  sheet == "3RD SET-0206" ~ 6,
                                                  TRUE ~ 7),
                                        ncol(.))
  second_pass <- setNames(second_pass, c("sample", "end", "total_dry", "frac_fine"))
  second_pass <- second_pass %>% mutate(frac_fine = as.numeric(frac_fine),
                                        end = as_date(end),
                                        total_dry = as.numeric(total_dry))
  second_pass
}

sheets <- readxl::excel_sheets("data/sediment_traps/Offshore Compliance Sediment Trap Data Final.xlsx")
sheets <- setNames(sheets, sheets)
trap <- lapply(sheets, two_passes, 
               path = "data/sediment_traps/Offshore Compliance Sediment Trap Data Final.xlsx")

trap <- bind_rows(trap)

trap <- trap %>%
  drop_na() %>%
  separate(sample, into = c("site", "site2", "wk")) %>%
  mutate(site = toupper(site),
         site = recode(site, HBNC = "HBNC1"))

Merge baseline and compliance sediment trap data

trap <- bind_rows(df1, df2, trap) %>% select(site, end, total_dry, frac_fine)
trap <- distinct(trap)  # some rows duplicated because data present in multiple above spreadsheets

# Add site metadata
trap <- trap %>%
  mutate(reef = str_sub(site, 1, 2),
         dir  = str_sub(site, 3, 3),
         channel = if_else(grepl("C", site), "control", "channelside"))

Merge sediment trap data with dates and calculate rates

# Merge by site and end date
sedtrap <- left_join(trap, dates, by = c("site", "end")) %>%
  select(reef, dir, channel, site, start, end, duration, total_dry, frac_fine)

# Calculate daily sedimentation rate
sedtrap <- sedtrap %>%
  mutate(sedrate = total_dry / duration,
         finerate = total_dry * frac_fine / 100 / duration,
         middate = start + (end - start)/2)

# Code factor levels
sedtrap <- sedtrap %>%
  mutate(reef = factor(reef, levels = c("HB", "R2", "R3")),
         dir = factor(dir, levels = c("N", "S")),
         channel = factor(channel, levels = c("channelside", "control")))

# There are three NAs -- this is likely data entry error. Omit these...
sedtrap <- drop_na(sedtrap)

# Save as .RData
save(sedtrap, file = "data/processed/sedtrap.RData")

Dataset metrics

# Sites for which there are sediment trap data
levels(factor(sedtrap$site))
##  [1] "HBN1"  "HBN2"  "HBN3"  "HBNC1" "HBS1"  "HBS2"  "HBS3"  "HBS4" 
##  [9] "HBSC1" "R2N1"  "R2N2"  "R2NC1" "R2NC2" "R2NC3" "R2S1"  "R2S2" 
## [17] "R2SC1" "R2SC2" "R3N1"  "R3NC1" "R3S1"  "R3S2"  "R3S3"  "R3SC1"
## [25] "R3SC2" "R3SC3"
# Earliest and latest dates that sediment traps were deployed (overall)
min(sedtrap$start)
## [1] "2013-10-15"
max(sedtrap$end)
## [1] "2015-07-20"
# Range of duration of deployments
range(sedtrap$duration)
## [1] 10 89
# Total number of sediment samples
nrow(sedtrap)
## [1] 1287