# Load packages
library(tidyverse)
library(stringr)
library(lubridate)
library(readxl)
library(janitor)

Import data

Baseline

Hardbottom
# Import data from file: Hardbottom_Baseline_POM_050714.xlsx
bl_hb <- readxl::read_xlsx(path = "data/tagged_corals/Hardbottom_Baseline_POM_050714.xlsx",
                           sheet = "Raw Data")
# Tidy data
bl_hb <- bl_hb %>%
  clean_names() %>%
  filter(tagged_coral_y_n == "Y") %>%         # Tagged corals only
  rename(site = site_e_g_hbnc1_cp,
         transect = transect_e_g_hbna_1,
         species = subcategory) %>%
  mutate(date = as_date(date)) %>%
  select(date, site, transect, coral_id, species, condition_qa_qc, surveyor) %>%
  rename(condition = condition_qa_qc)

# Check and make sure no missing dates
#bl_hb %>% filter(is.na(date))

# Separate condition codes into multiple rows
bl_hb <- bl_hb %>%
  separate_rows(condition, sep = "/")
Middle reef
# Import data from file: Reef2_Baseline_POM_2013_050714.xlsx
bl_r2 <- readxl::read_xlsx(path = "data/tagged_corals/Reef2_Baseline_POM_2013_050714.xlsx",
                           sheet = "Raw Data", guess_max = 3000)

# Tidy data
bl_r2 <- bl_r2 %>%
  clean_names() %>%
  filter(tagged_coral_y_n == "Y") %>%
  rename(site = site_e_g_r2n1_rr,
         transect = transect_e_g_r2n1_rr_1,
         species = subcategory) %>%
  mutate(date = as_date(date)) %>%
  select(date, site, transect, coral_id, species, condition_qa_qc, surveyor) %>%
  rename(condition = condition_qa_qc)

# Check and make sure no missing dates
#bl_r2 %>% filter(is.na(date))
# Correct date errors based on manual inspection of spreadsheet
bl_r2 <- bl_r2 %>% 
  mutate(date = case_when(
    transect=="R2N1-RR-1" & coral_id==10 & is.na(date) ~ as_date("2013-11-02"),
    transect=="R2NC3-LR-2" & coral_id==9 & is.na(date) ~ as_date("2013-11-19"),
    TRUE ~ as_date(date)))

# Separate condition codes into multiple rows
bl_r2 <- bl_r2 %>%
  separate_rows(condition, sep = "/")
Outer reef
# Import data from file: Reef2_Baseline_POM_2013_050714.xlsx
bl_r3 <- readxl::read_xlsx(path = "data/tagged_corals/Reef3_Baseline_POM_050714 (REVISED DOCUMENT).xlsx",
                           sheet = "Raw Data", guess_max = 3000)

# Tidy data
bl_r3 <- bl_r3 %>%
  clean_names() %>%
  filter(tagged_coral_y_n == "Y") %>%
  rename(site = site_e_g_hbnc1_cp,
         transect = transect_e_g_hbna_1,
         species = subcategory) %>%
  mutate(date = as_date(date)) %>%
  select(date, site, transect, coral_id, species, qa_qc_condition_code, surveyor) %>%
  rename(condition = qa_qc_condition_code)

# Check and make sure no missing dates
#bl_r3 %>% filter(is.na(date))

# Separate condition codes into multiple rows
bl_r3 <- bl_r3 %>%
  separate_rows(condition, sep = "/") %>%
  separate_rows(condition, sep = " ")
Combine baseline data for HB, R2, R3
bl <- bind_rows(bl_hb, bl_r2, bl_r3) %>%
  mutate_at(c("transect", "coral_id"), as.factor)

Compliance monitoring (during construction)

Hardbottom, middle, and outer reef data are all in the same spreadsheet.

# Import data from file: NEW_Offshore_Comp_Scleractinian_Cond_All_Weeks_8_10_15-QAQC.xlsx
comp <- readxl::read_xlsx(
  path = "data/tagged_corals/NEW_Offshore_Comp_Scleractinian_Cond_All_Weeks_8_10_15-QAQC.xlsx",
  sheet = "All Weeks", guess_max = 3000)

# Tidy data
comp <- comp %>%
  clean_names() %>%
  rename(condition = condition_code) %>%
  select(date, week, visit, site, transect, coral_id, species, condition, surveyor) %>%
  mutate(date = as_date(date)) %>%
  mutate_at(c("transect", "coral_id"), as.factor)

# Correct typos and errors in date column (detected by looking at distinct date/week combinations)
comp <- comp %>%
  # 2014-11-25 should be 2013-11-25 because it is Week #1
  mutate(date = if_else(date == "2014-11-25", as_date("2013-11-25"), as_date(date))) %>%
  # 2013-04-02 should be 2014-04-02
  mutate(date = if_else(date == "2013-04-02", as_date("2014-04-02"), as_date(date))) %>%
  # Looks like a mistake from filling down Date in Excel for Surveyor MLR 
  # Change all Week 27 dates for R2SC1-RR visit 1 to May 23, 2014
  mutate(date = case_when(site == "R2SC1-RR" & week == 27 & visit == 1 ~ as_date("2014-05-23"),
                          TRUE ~ as_date(date))) %>%
  # 2020-05-29 should be 2014-05-29
  mutate(date = if_else(date == "2020-05-29", as_date("2014-05-29"), as_date(date))) %>%
  # Fix missing dates for HBS1-CP week 28
  mutate(date = case_when(site=="HBS1-CP" & week == 28 & is.na(date) ~ as_date("2014-05-29"),
                          TRUE ~ as_date(date)))

# Select columns
comp <- comp %>% select(date, site, transect, coral_id, species, condition, surveyor)

# Check and make sure no missing dates
#comp %>% filter(is.na(date))

Immediate post-construction

April - May 2015

# Import data from file: NEW_Offshore_Compliance_Scleractinian_Cond_R2_R3_IMPACT_ASSESS_data TAGGED CORALS.xlsx
pc1 <- readxl::read_xlsx(
  path = "data/tagged_corals/NEW_Offshore_Compliance_Scleractinian_Cond_R2_R3_IMPACT_ASSESS_data TAGGED CORALS.xlsx",
  sheet = "IA TAGGED ONLY")

# Tidy data
pc1 <- pc1 %>%
  clean_names() %>%
  rename(condition = condition_code) %>%
  select(date, site, transect, coral_id, species, condition, surveyor) %>%
  mutate(date = as_date(date)) %>%
  mutate_at(c("transect", "coral_id"), as.factor)

# Check and make sure no missing dates
#pc1 %>% filter(is.na(date))

June - July 2015

# Import data from file: NEW_Offshore_Post_Construction_Data_Entry_ALL_Weeks_GOOD 151015.xlsx
pc2 <- readxl::read_xlsx(
  path = "data/tagged_corals/NEW_Offshore_Post_Construction_Data_Entry_ALL_Weeks_GOOD 151015.xlsx",
  sheet = "Raw Data", guess_max = 3000)

# Tidy data
pc2 <- pc2 %>%
  clean_names() %>%
  filter(tagged_coral_y_n == "Y") %>%           # Tagged corals only
  rename(condition = condition_code_qaqc) %>%
  select(date, site, transect, coral_id, species, condition, surveyor) %>%
  mutate(date = as_date(date)) %>%
  mutate_at(c("transect", "coral_id"), as.factor)

# Check and make sure no missing dates
#pc2 %>% filter(is.na(date))

One year post-construction (Aug 2016)

# Import data from file: permanent_sites_impact_assessment_corals_2016_CSI_QAQC.xlsx
pc3 <- readxl::read_xlsx(
  path = "data/tagged_corals/permanent_sites_impact_assessment_corals_2016_CSI_QAQC.XLSX",
  sheet = "Raw Data", guess_max = 3000, skip = 1)

# Tidy data
pc3 <- pc3 %>%
  clean_names() %>%
  filter(tagged_coral_y_n == "Y") %>%              # Tagged corals only
  rename(condition = condition_code_qaqc) %>%
  select(date, site, transect, coral_id, species, condition, surveyor) %>%
  mutate(date = as_date(date)) %>%
  mutate_at(c("transect", "coral_id"), as.factor)

# Check and make sure no missing dates
#pc3 %>% filter(is.na(date))

# Aggregate condition codes into single row for each observation
# pc3 <- pc3 %>%
#   group_by(date, site, transect, coral_id, species) %>%
#   summarise(condition = list(condition_code_qaqc)) %>%
#   ungroup()

Combine all tagged coral data

# Combine all data
tagged <- bind_rows(bl, comp, pc1, pc2, pc3)

Homogenize site, transect, coral ID, and species codes

tagged <- tagged %>%
  mutate(site = str_extract(site, pattern = "[^-]+")) %>%           # Remove "-RR" etc.
  mutate(transect = str_extract(transect, pattern = ".$")) %>%      # Get transect as single digit
  unite(coral, site, transect, coral_id, sep = ".", remove = FALSE)

tagged <- tagged %>%
  mutate(species = case_when(
    species %in% c("MFOR") ~ "MDEC",
    species %in% c("MFER", "MALI", "Mycetophyllia ferox") ~ "MYCE",
    species %in% c("Orbicella annularis") ~ "OANN",
    species %in% c("stephanoceonia intersepta", "Stephanocoenia intersepta",
                   "Stephanocoenia intercepta", "stephanocoenia intersepta") ~ "SINT",
    species %in% c("Dichocoenia stokesii", "Dichocenia stokesii") ~ "DSTO",
    species %in% c("Solenastrea bournoni") ~ "SBOU",
    species %in% c("Montastraea cavernosa") ~ "MCAV",
    species %in% c("Siderastrea siderea") ~ "SSID",
    species %in% c("Meandrina meandrites") ~ "MMEA",
    species %in% c("Diploria clivosa") ~ "DCLI",
    species %in% c("Agaricia sp.") ~ "AGSP",
    species %in% c("Madracis formosa") ~ "MFOR",
    species %in% c("Madracis decactis") ~ "MDEC",
    species %in% c("Porites astreoides") ~ "PAST",
    species %in% c("Porites porites") ~ "PPOR",
    species %in% c("Siderastrea sp.") ~ "SIDSP",
    species %in% c("Oculina diffusa") ~ "ODIF",
    species %in% c("Diploria strigosa", "Diplora strigosa", "DSTR") ~ "PSTR",
    species %in% c("Diploria labyrinthiformis") ~ "DLAB",
    species %in% c("Colpophyllia natans", "Colophyllia natans") ~ "CNAT",
    species %in% c("Eusmilia fastigiata") ~ "EFAS",
    species %in% c("Orbicella faveolata") ~ "OFAV",
    species %in% c("Acropora cervicornis") ~ "ACER",
    species %in% c("Agaricia agaricites") ~ "AAGA",
    species %in% c("Agaricia lamarcki") ~ "ALAM",
    TRUE ~ as.character(species)))

# Remove row with missing data
tagged <- tagged %>% drop_na(date)

Additional data QC

Check condition codes

# tagged %>%
#   distinct(condition) %>%
#   print(n = nrow(.))

tagged <- tagged %>%
  mutate(condition = recode(condition, `sed dust` = "SED"))
# Downstream analysis revealed a single observation from R2SC1 on May 18, 2015, but the rest of the data from this site were taken May 19, 2015. The coral 'observed' on May 18 was also observed on May 19, so the May 18 entry may be some kind of error.  Omit.
tagged <- tagged %>% 
  filter(date != "2015-05-18")  #(this was the only observation on this date)

# Downstream analysis revealed a single observation from R3NC1 on April 25, 2015, but the rest of the data from this site were taken April 26, 2015. The coral 'observed' on April 25 was also observed on April 26, so the April 25 entry may be some kind of error.  Omit.
tagged <- tagged %>% 
  filter(!(date == "2015-04-25" & coral == "R3NC1.1.3"))

Check for conflicting species IDs

spp_conflicts <- tagged %>% 
  distinct(coral, species) %>%
  group_by(coral) %>%
  filter(n() > 1) %>%
  arrange(coral) %>%
  summarise(conflicting_species = list(species))

183 of the tagged corals have conflicting species information.

Correct conflicting IDs by assigning most frequently noted species

# Get most frequently used species ID for each coral
spp_ids <- tagged %>%
  group_by(coral) %>%
  count(species) %>%
  summarise(most_freq_species = species[which.max(n)])

# Replace species ID with most frequently used species ID for each coral
tagged <- tagged %>%
  left_join(spp_ids, by = "coral") %>%
  select(date, site, transect, coral_id, coral, most_freq_species, condition, surveyor) %>%
  rename(species = most_freq_species) 

Change A. cervicornis WP records to WB

tagged <- tagged %>%
  mutate(condition = if_else(species == "ACER", 
                             str_replace(condition, "WP", "WB"),
                             condition))

Check surveyor IDs

tagged %>% count(surveyor)
## # A tibble: 10 x 2
##    surveyor     n
##    <chr>    <int>
##  1 AS       10080
##  2 BG        5024
##  3 JC          18
##  4 JDC        944
##  5 MLR       1721
##  6 MR          30
##  7 RF       14673
##  8 WFP       5771
##  9 WP         414
## 10 WPF         32
# Change WP and WPF to WFP
# Change MR to MLR
# Change JC to JDC
tagged <- tagged %>%
  mutate(surveyor = recode(surveyor, 
                           WP = "WFP", WPF = "WFP",
                           MR = "MLR",
                           JC = "JDC"))

Add site metadata

tagged <- tagged %>%
  mutate(channel = if_else(str_detect(site, "C"), "control", "channelside"),
            reef = str_sub(site, 1, 2),
             dir = str_sub(site, 3, 3)) %>%
  select(date, reef, dir, channel, site, transect, coral, species, condition, surveyor)

Spread all conditions to boolean columns for each observation

tagged_bool <- tagged %>% 
  mutate(hasCond = TRUE) %>%
  distinct() %>%
  spread(condition, hasCond, fill = FALSE)

Nest all condition codes for each observation

tagged_nest <- tagged %>% nest(condition, .key = "condition")

Concatenate all condition codes for each obervation

tagged_c <- tagged %>%
  group_by(date, reef, dir, channel, site, transect, coral, species) %>%
  summarise(condition = paste(condition, collapse = ", ")) %>%
  ungroup()

Save tagged coral data as .RData

save(tagged, tagged_bool, tagged_nest, tagged_c, file = "data/processed/tagged.RData")

Write concatenated data to file

# Write all data to spreadsheet
dd <- tagged_c %>% 
  arrange(coral, date) %>%
  select(-channel, -reef, -dir, -coral)

write.table(dd, file="data/processed/tagged_c.txt", row.names = FALSE, quote = FALSE, sep = "\t")

Dataset metrics

load("data/processed/tagged.RData")

# Number of observations
nrow(tagged_bool)
## [1] 23537
# Number of corals observed
tagged_bool %>%
  distinct(coral) %>%
  nrow(.)
## [1] 650
# Number of sites
tagged_bool %>%
  distinct(site) %>%
  nrow(.)
## [1] 26
# Date range of entire dataset
range(tagged_bool$date)
## [1] "2013-10-14" "2016-08-20"