Import and tidy density data for scleractinians and octocorals

Import baseline data from 2010

# Hardbottom 2010 data -- does not include coral diameter, so cannot use this with the rest of the data (because we separately analyze corals >3cm and corals <3cm in diameter)
# dens2010hb <- readxl::read_xlsx(
#   path = "data/Density_Data/Miami Quantitative Survey HB Areas Raw Data for Appendix.xlsx",
#   sheet = "HB raw data") %>%
#   clean_names() %>%
#   rename(site = area_e_g_hbn, block = block_e_g_a, transect = transect_e_g_hbna_1) 
#dens2010hb %>%

# Import and tidy baseline density data from 2010 (10m transects) on reef 2 and reef 3 (see report dated 2012)
dens2010 <- readxl::read_xlsx(
  path = "data/coral_counts/Miami Quantitative Study Raw Data forAppendix.xlsx") %>% 
  clean_names() %>%
  rename(site = area_e_g_r2n, transect = transect_e_g_r2n_10, diameter = diameter_cm) %>%
  select(site, transect, category, subcategory, diameter) %>%
  filter(!grepl("D", transect)) %>%  # Remove transects that contain a "D". These were 'direct impact'
                                     # areas where reef was removed, so no later data for comparison
  mutate(site = substr(transect, 1, ifelse(nchar(transect) == 6, 3, 4)),   # Get site name
         site2 = case_when(site == "R2N" ~ "RR",  # 2010 R2N transects match up with 2016 R2N-RR
                           site == "R2S" ~ "",     # GET THIS INFO
                           site == "R3N" ~ "",     # GET THIS INFO
                           site == "R3S1" ~ "CP",
                           site == "R3S2" ~ "LR",
                           site == "R3S3" ~ "SG"),
         dist = as.numeric(substr(transect, ifelse(nchar(transect) == 6, 4, 5), # Get dist from channel
                                  ifelse(nchar(transect) == 6, 6, 7))),
         count = 1,                  # single observation per row
         date = ymd("2010-08-15"),   # add date (data collected August 2010 (2012 report p. viii))
         transect_area_m2 = 10)      # each transect was 10 square meters

unique(dens2010$site)
## [1] "R2N"  "R2S"  "R3N"  "R3S1" "R3S2" "R3S3"

Import “baseline” data from 2013

# Data from Oct - Dec 2013 (three 20m long transects (x1m wide))

#Hardbottom
dens2013hb <- readxl::read_xlsx(
  path = "data/coral_counts/Copy of Hardbottom_Baseline_POM_022514_LATEST.xlsx",
  sheet = "Raw Data") %>%
  clean_names() %>%
  rename(site = site_e_g_hbnc1_cp, transect = transect_e_g_hbna_1, 
         diameter = max_diameter, count = total_count) %>%
  separate(site, into = c("site", "site2")) %>%
  select(date, site, transect, category, subcategory, diameter, count) %>%
  mutate(count = as.numeric(count))

# Reef 2
dens2013r2 <- readxl::read_xlsx(
  path  = "data/coral_counts/Reef2_Baseline_POM_2013_050714.xlsx", 
  sheet = "Raw Data", guess_max = 4000) %>%
  clean_names() %>%
  rename(site = site_e_g_r2n1_rr, transect = transect_e_g_r2n1_rr_1, 
         diameter = max_diameter_cm, count = total_count) %>%
  separate(site, into = c("site", "site2")) %>%
  mutate(diameter = as.numeric(diameter)) %>%
  select(date, site, transect, category, subcategory, diameter, count)


# Reef 3
dens2013r3 <- readxl::read_xlsx(
  path  = "data/coral_counts/Reef3_Baseline_POM_050714 (REVISED DOCUMENT).xlsx", sheet = "Raw Data") %>%
  clean_names() %>%
  rename(site = site_e_g_hbnc1_cp, transect = transect_e_g_hbna_1, 
         diameter = max_diameter, count = total_count) %>%
  separate(site, into = c("site", "site2")) %>%
  select(date, site, transect, category, subcategory, diameter, count)

# Combine reef 2 and reef 3 data
dens2013 <- bind_rows(dens2013hb, dens2013r2, dens2013r3) %>%
  mutate(dist = NA,
         date = as.Date(date),
         transect_area_m2 = 20) %>%  # each transect was 20 square meters 
  mutate(count = ifelse(is.na(count), 1, count))            # ASSUME MISSING COUNTS ARE 1

Import data from 2015 - permanent sites

# 2015 data has multiple condition codes for the same coral recorded as separate lines.
# need to aggregate these to a single line so one row = one coral
dens2015 <- readxl::read_xlsx(
  path = "data/coral_counts/NEW_Offshore_Post_Construction_Data_Entry_ALL_Weeks_GOOD 151015.xlsx",
  guess_max = 10000) %>%
  clean_names() %>%
  rename(category = category_scleractinian_etc,
         subcategory = subcategory_species_or_genus,
         count = total_count, diameter = max_diameter) %>%
  separate(site, into = c("site", "site2")) %>%
  mutate(date = as.Date(date),                                # convert to Date
         dist = NA) %>%                                       # add dist column (NAs filled in later)
  filter(!grepl("DEAD", condition_code_qaqc)) %>%              # EXCLUDE DEAD CORALS!
  # tagged and non-tagged corals are given same coral_id numbers, so must keep tagged_coral_y_n
  select(date, site, transect, coral_id, tagged_coral_y_n, category, subcategory, diameter, count) %>%
  distinct() %>%      # reduces to single row for unique site/transect/coral_id
  mutate(count = ifelse(is.na(count), 1, count)) %>%                # ASSUME MISSING COUNTS ARE 1
  mutate(date = if_else(is.na(date), as.Date("2015-06-22"), date)) %>%  # fill in missing date
  mutate(diameter = as.numeric(diameter)) %>%
  mutate(transect_area_m2 = 20)    # transects were 20 m2

Import data from 2016 - permanent sites

# Data collected August 2016
# need to include dat

## Sponges and octocorals are in one spreadsheet
dens2016octo <- readxl::read_xlsx(
  path = "data/coral_counts/permanent_sites_impact_assessment_sponges_octocorals_2016_CSI_QAQC.xlsx",
  sheet = "Raw Data", skip = 1) %>%
  clean_names() %>%
  rename(category = category_scleractinian_etc, subcategory = subcategory_genus_or_type,
         count = octo_sponge_zoanthid_count_qaqc) %>%
  filter(!grepl("DEAD", condition_code_qaqc)) %>%              # EXCLUDE DEAD CORALS!
  select(date, site, transect, category, subcategory, count) %>%
  mutate(count = ifelse(is.na(count), 1, count),            # ASSUME MISSING COUNTS ARE 1
         date = as.Date(date))

## Corals are in another spreadsheet
# 2016 data has multiple condition codes for the same coral recorded as separate lines.
# need to aggregate these to a single line so one row = one coral
dens2016scler <- readxl::read_xlsx(
  path = "data/coral_counts/permanent_sites_impact_assessment_corals_2016_CSI_QAQC.XLSX",
  sheet = "Raw Data", skip = 1)  %>%
  clean_names() %>%
  rename(category = category_scleractinian_etc, subcategory = species,
         diameter = max_diameter) %>%
  filter(!grepl("DEAD", condition_code_qaqc)) %>%              # EXCLUDE DEAD CORALS!
  select(date, site, transect, coral_id, category, subcategory, diameter) %>%
  mutate(count = 1,  # 1 coral per row (no count data in sheet)
         date = as.Date(date)) %>%
  distinct(date, site, transect, coral_id, .keep_all = TRUE)   # reduce to single row for each site/transect/coral_id (b/c multiple conditions/rows)
         
## Combine sponge, octocoral, and scleractinian data
dens2016 <- bind_rows(dens2016octo, dens2016scler) %>%
  mutate(dist = NA,
         transect = as.character(transect),
         transect_area_m2 = 20)     # each transect was 20 square meters

Import data from 2016 - cross sites

# find all file names ending in .xlsx 
files <- list.files(path = "data/coral_counts/2016 cross-transect data/Entered Data/",
                    pattern = "^[^~].*.xlsx", recursive = TRUE, full.names = TRUE)
# remove files manually if multiple files for a given transect
files <- files[-c(24,25,28,30,32,33,36)]


# read in OCTOCORALS sheet from each xlsx file
cs2016octo <- files %>%
  map(readxl::read_xlsx, sheet = "OCTOCORALS") %>%    # read in all the files individually
  reduce(bind_rows)                                   # reduce with rbind into one dataframe
# Tidy data
cs2016octo <- cs2016octo %>%
  clean_names() %>%
  rename(site = station, transect = transect_ns_or_ew,
         category = organism_type_scl_spo_octo, 
         subcategory = species_scl_genera_octo_morph_spo) %>%
  mutate(count = 1) %>%                               # single observation per row
  mutate(transect_area_m2 = case_when(transect == "EW" ~ 30,     # transect area, DCA 10/2017 p.23
                                      transect == "NS" ~ 20)) %>%  
mutate(dist = case_when(
    site == "7a-150" ~ "150",
    site == "15B-250" ~ "250",
    site == "HBS-15b-150" ~ "150",
    TRUE ~ str_extract(site, "[[:digit:]]{2,3}"))) %>%      # extract distance from site name
  mutate(site2 = case_when(
    site == "HBS-15b-150" ~ "15b",
    site == "15B-250" ~ "15b",
    site == "7a-150" ~ "7a",
    TRUE ~ str_extract(site, "LR|RR|CP|SG|CR"))) %>%           # extract LR or RR from site name
  mutate(site = case_when(
    site == "HBS-15b-150" ~ "HBS",
    site == "15B-250" ~ "HBS",
    site == "7a-150" ~ "HBN",
    TRUE ~ str_extract(site, "^.[^-|_]*"))) %>%                 # extract base site form site name
  filter(!grepl("DEAD", condition_codes_all)) %>%              # EXCLUDE DEAD CORALS!
  select(date, site, site2, dist, transect, transect_area_m2, category, subcategory, count)


# read in CORALS sheet from each xlsx file
cs2016scler <- files %>%
  map(readxl::read_xlsx, sheet = "CORALS") %>%    # read in all the files individually
  reduce(bind_rows)                                   # reduce with rbind into one dataframe
# Tidy data
cs2016scler <- cs2016scler %>%
  clean_names() %>%
  rename(site = station, transect = transect_ns_or_ew,
         category = organism_type_scl_spo_octo, 
         subcategory = species_scl_genera_octo_morph_spo,
         diameter = size_cm) %>%
  mutate(count = 1) %>%                               # single observation per row
  mutate(transect_area_m2 = case_when(transect == "EW" ~ 30,     # transect area, DCA 10/2017 p.23
                                      transect == "NS" ~ 20)) %>%  
  mutate(dist = case_when(
    site == "7a-150" ~ "150",
    site == "15B-250" ~ "250",
    site == "HBS-15b-150" ~ "150",
    TRUE ~ str_extract(site, "[[:digit:]]{2,3}"))) %>%      # extract distance from site name
  mutate(site2 = case_when(
    site == "HBS-15b-150" ~ "15b",
    site == "15B-250" ~ "15b",
    site == "7a-150" ~ "7a",
    TRUE ~ str_extract(site, "LR|RR|CP|SG|CR"))) %>%           # extract LR or RR from site name
  mutate(site = case_when(
    site == "HBS-15b-150" ~ "HBS",
    site == "15B-250" ~ "HBS",
    site == "7a-150" ~ "HBN",
    TRUE ~ str_extract(site, "^.[^-|_]*"))) %>%                 # extract base site form site name
  filter(!grepl("DEAD", condition_codes_all)) %>%              # EXCLUDE DEAD CORALS!
  select(date, site, site2, dist, transect, transect_area_m2, category, subcategory, diameter, count)



# Combine octocoral and scleractinian data
cs2016 <- bind_rows(cs2016octo, cs2016scler) %>%
  mutate(dist = as.numeric(dist),
         date = as.Date(date))

Combine all count data and add site metadata

# Combine all count data and select octocorals and corals
all_counts <- data_frame(data = list(dens2010, dens2013, dens2015, dens2016, cs2016)) %>% unnest(data)

# Add site metadata
all_counts <- all_counts %>%
  mutate(year = year(date),                                      # add year column
         reef = str_sub(site, 1, 2),                             # add reef column (R2 or R3)
          dir = str_sub(site, 3, 3),                             # add direction (N or S)
         perm = case_when(nchar(site) == 3 | year == 2010 ~ "no",   # add permanent site (yes or no)
                          nchar(site) >= 4 & year != 2010 ~ "yes"),
      channel = ifelse(perm == "yes",
                       case_when(grepl("C", site) ~ "control",          # add control vs. channelside
                                !grepl("C", site) ~ "channelside"),
                       NA))

Tidy and filter data

# Tidy category names and filter only scleractinians and octocorals
all_counts <- all_counts %>%
  mutate(category = str_replace_all(category, 
           c("Scleractinian" = "scleractinian",
             "Octocoral" = "octocoral",
             "Ocotocoral" = "octocoral",
             "Coral" = "scleractinian",
             "CORAL" = "scleractinian"))) %>%
  filter(category == "scleractinian" | category == "octocoral")



# Tidy taxonomy
all_counts <- all_counts %>%
  mutate(subcategory = str_replace_all(subcategory,     # replace abbreviations with full names
           c("SINT" = "Stephanocoenia intersepta",
             "DSTO" = "Dichocoenia stokesii",
             "SBOU" = "Solenastrea bournoni",
             "MCAV" = "Montastraea cavernosa",
             "SSID" = "Siderastrea siderea",
             "SRAD" = "Siderastrea radians",
             "MMEA" = "Meandrina meandrites",
             "DCLI" = "Pseudodiploria clivosa",
             "AGSP" = "Agaricia sp.",
             "MFOR" = "Madracis formosa",
             "MDEC" = "Madracis decactis",
             "PAST" = "Porites astreoides",
             "DSTR" = "Pseudodiploria strigosa",
             "CNAT" = "Colpophyllia natans",
             "EFAS" = "Eusmilia fastigiata",
             "OFAV" = "Orbicella faveolata",
             "ACER" = "Acropora cervicornis",
             "AAGA" = "Agaricia agaricites",
             "MALI" = "Mycetophyllia aliciae",
             "SCUB" = "Scolymia cubensis",
             "DLAB" = "Diploria labyrinthiformis",
             "PPOR" = "Porites porites",
             "MFAV" = "Orbicella faveolata",
             "PCLI" = "Pseudodiplora clivosa",
             "AHUM" = "Agaricia humilis",
             "PSTR" = "Pseudodiploria strigosa",
             "ALAM" = "Agaricia lamarcki"))) %>%
  separate(subcategory, into = c("genus", "species")) %>%    # separate into genus and species
  mutate(genus = str_replace_all(genus,                      # fix genus spelling errors
           c("Briarieum" = "Briareum",
             "Dichocenia" = "Dichocoenia",
             "Diplora" = "Diploria",
             "Montastrea" = "Montastraea",
             "Muriceposis" = "Muriceopsis",
             "Pseudodiplora" = "Pseudodiploria",
             "Pseudoplexaua" = "Pseudoplexaura",
             "Pseudplexaura" = "Pseudoplexaura",
             "Psuedoplexaura" = "Pseudoplexaura",
             "^pterogorgia" = "Pterogorgia",
             "SID" = "Siderastrea",
             "siderastrea" = "Siderastrea",
             "stephanocoenia" = "Stephanocoenia",
             "^Coral" = "no_id",
             "^CORAL" = "no_id",
             "Octocoral" = "no_id",
             "OCTOCORAL" = "no_id")),
         species = str_replace_all(species,                   # fix species spelling errors
           c("annuliris" = "annularis",
             "Cavernosa" = "cavernosa",
             "fasitgiata" = "fastigiata",
             "Intersepta" = "intersepta",
             "spp" = "sp", "SPP" = "sp", "species" = "sp",
             "stokesi$" = "stokesii")))

# Add distance from channel for each permanent site
all_counts <- all_counts %>%
  mutate(dist = ifelse(is.na(dist), case_when(    
    site=="HBNC1" ~ "2350",
    site=="HBN3" ~ "48",
    site=="HBSC1" ~ "1650",
    site=="HBS4" ~ "32",
    site=="HBS1" ~ "20",     
    site=="HBS2" ~ "20",   
    site=="HBS3" ~ "20",    
    site=="HBN1" ~ "20",     
    site=="HBN2" ~ "30",     
    site=="R2N1" ~ "28",
    site=="R2NC2" ~ "9380",
    site=="R2N2" ~ "18",
    site=="R2NC1" ~ "9380",
    site=="R2NC3" ~ "9380",
    site=="R2S1" ~ "23",
    site=="R2SC1" ~ "1270",
    site=="R2S2" ~ "21",
    site=="R2SC2" ~ "1270",
    site=="R3N1" ~ "23",
    site=="R3NC1" ~ "9380",
    site=="R3S2" ~ "21",
    site=="R3SC2" ~ "1300",
    site=="R3S1" ~ "30",  
    site=="R3S3" ~ "30",   
    site=="R3SC3" ~ "1300",   
    site=="R3SC1" ~ "1300"),   
    dist)) %>%
  type_convert()

# remove "tag not found"
all_counts <- filter(all_counts, genus != "Tag")

# Add "site2" (LR or RR) information for permanent sites
all_counts <- all_counts %>%
  mutate(site2 = ifelse(perm == "yes", case_when(
    site %in% c("R2NC2", "R2N1", "R2SC1", "R2S1") ~ "RR",
    site %in% c("R2NC1", "R2NC3", "R2N1", "R2SC2", "R2S2", 
                "R3NC1", "R3N1", "R3SC2", "R2S2",
                "R2N2", "R3S2") ~ "LR",
    site %in% c("R3S1", "R3SC1", "HBNC1", "HBS1", "HBS2", "HBS3", "HBN3", "HBSC1") ~ "CP",
    site %in% c("HBN1", "HBN2", "HBS4") ~ "CR",
    site %in% c("R3S3", "R3SC3") ~ "SG"),
    site2))

# Add missing site metadata
### Look at map on page 2 of october 2017 cross_impact report
##### Reef 2 N:: 2010 transects below 100 m are all same general area as R2N1(-RR)
##### Reef 2 S:: 2010 transects below 100 m are all same general area as R2S1(-RR)
##### Reef 3 N:: 2010 transects below 100 m are all same general area as R3N1(-LR)
##### Reef 3 S:: 2010 transects LR are all same general area as R3S2
all_counts <- all_counts %>%
  mutate(site2 = case_when(
    reef == "R2" & dir == "N" & year == 2010 ~ "RR",
    reef == "R2" & dir == "S" & year == 2010 ~ "RR",
    reef == "R3" & dir == "N" & year == 2010 ~ "LR",
    reef == "R3" & dir == "S" & year == 2010 ~ "LR",
    site %in% c("R2N1", "R2NC2", "R2S1", "R2SC1") & perm == "yes" ~ "RR",
    site %in% c("R2N2", "R2NC1", "R2NC3", "R2S2", "R2SC2", 
                "R3S2", "R3N1", "R3SC2", "R3NC1") & perm == "yes" ~ "LR",
    site %in% c("R3S1", "R3SC1") & perm == "yes" ~ "CP",
    site %in% c("R3S3", "R3SC3") & perm == "yes" ~ "SG",
    TRUE ~ site2))


# For transects with only single digit listed for transect number, change to site-site2-transect
all_counts <- all_counts %>% 
  mutate(transect = case_when(
    nchar(transect) == 1 ~ paste(site, site2, transect, sep = "-"),
    TRUE ~ transect)
  )

Save count data as .RData object

save(all_counts, file = "data/processed/all_counts.RData")

Dataset metrics

# Subset scleractinians
scler <- filter(all_counts, category == "scleractinian") %>%
  filter(year %in% c(2010, 2013, 2016, 2017))   # exclude 2015

# Total number of corals counted
sum(scler$count)
## [1] 11166
# Total number of transects performed
transects <- scler %>%
  distinct(site, site2, transect, date)
nrow(transects)
## [1] 482