Import and tidy density data for scleractinians and octocorals
# Hardbottom 2010 data -- does not include coral diameter, so cannot use this with the rest of the data (because we separately analyze corals >3cm and corals <3cm in diameter)
# dens2010hb <- readxl::read_xlsx(
# path = "data/Density_Data/Miami Quantitative Survey HB Areas Raw Data for Appendix.xlsx",
# sheet = "HB raw data") %>%
# clean_names() %>%
# rename(site = area_e_g_hbn, block = block_e_g_a, transect = transect_e_g_hbna_1)
#dens2010hb %>%
# Import and tidy baseline density data from 2010 (10m transects) on reef 2 and reef 3 (see report dated 2012)
dens2010 <- readxl::read_xlsx(
path = "data/coral_counts/Miami Quantitative Study Raw Data forAppendix.xlsx") %>%
clean_names() %>%
rename(site = area_e_g_r2n, transect = transect_e_g_r2n_10, diameter = diameter_cm) %>%
select(site, transect, category, subcategory, diameter) %>%
filter(!grepl("D", transect)) %>% # Remove transects that contain a "D". These were 'direct impact'
# areas where reef was removed, so no later data for comparison
mutate(site = substr(transect, 1, ifelse(nchar(transect) == 6, 3, 4)), # Get site name
site2 = case_when(site == "R2N" ~ "RR", # 2010 R2N transects match up with 2016 R2N-RR
site == "R2S" ~ "", # GET THIS INFO
site == "R3N" ~ "", # GET THIS INFO
site == "R3S1" ~ "CP",
site == "R3S2" ~ "LR",
site == "R3S3" ~ "SG"),
dist = as.numeric(substr(transect, ifelse(nchar(transect) == 6, 4, 5), # Get dist from channel
ifelse(nchar(transect) == 6, 6, 7))),
count = 1, # single observation per row
date = ymd("2010-08-15"), # add date (data collected August 2010 (2012 report p. viii))
transect_area_m2 = 10) # each transect was 10 square meters
unique(dens2010$site)
## [1] "R2N" "R2S" "R3N" "R3S1" "R3S2" "R3S3"
# Data from Oct - Dec 2013 (three 20m long transects (x1m wide))
#Hardbottom
dens2013hb <- readxl::read_xlsx(
path = "data/coral_counts/Copy of Hardbottom_Baseline_POM_022514_LATEST.xlsx",
sheet = "Raw Data") %>%
clean_names() %>%
rename(site = site_e_g_hbnc1_cp, transect = transect_e_g_hbna_1,
diameter = max_diameter, count = total_count) %>%
separate(site, into = c("site", "site2")) %>%
select(date, site, transect, category, subcategory, diameter, count) %>%
mutate(count = as.numeric(count))
# Reef 2
dens2013r2 <- readxl::read_xlsx(
path = "data/coral_counts/Reef2_Baseline_POM_2013_050714.xlsx",
sheet = "Raw Data", guess_max = 4000) %>%
clean_names() %>%
rename(site = site_e_g_r2n1_rr, transect = transect_e_g_r2n1_rr_1,
diameter = max_diameter_cm, count = total_count) %>%
separate(site, into = c("site", "site2")) %>%
mutate(diameter = as.numeric(diameter)) %>%
select(date, site, transect, category, subcategory, diameter, count)
# Reef 3
dens2013r3 <- readxl::read_xlsx(
path = "data/coral_counts/Reef3_Baseline_POM_050714 (REVISED DOCUMENT).xlsx", sheet = "Raw Data") %>%
clean_names() %>%
rename(site = site_e_g_hbnc1_cp, transect = transect_e_g_hbna_1,
diameter = max_diameter, count = total_count) %>%
separate(site, into = c("site", "site2")) %>%
select(date, site, transect, category, subcategory, diameter, count)
# Combine reef 2 and reef 3 data
dens2013 <- bind_rows(dens2013hb, dens2013r2, dens2013r3) %>%
mutate(dist = NA,
date = as.Date(date),
transect_area_m2 = 20) %>% # each transect was 20 square meters
mutate(count = ifelse(is.na(count), 1, count)) # ASSUME MISSING COUNTS ARE 1
# 2015 data has multiple condition codes for the same coral recorded as separate lines.
# need to aggregate these to a single line so one row = one coral
dens2015 <- readxl::read_xlsx(
path = "data/coral_counts/NEW_Offshore_Post_Construction_Data_Entry_ALL_Weeks_GOOD 151015.xlsx",
guess_max = 10000) %>%
clean_names() %>%
rename(category = category_scleractinian_etc,
subcategory = subcategory_species_or_genus,
count = total_count, diameter = max_diameter) %>%
separate(site, into = c("site", "site2")) %>%
mutate(date = as.Date(date), # convert to Date
dist = NA) %>% # add dist column (NAs filled in later)
filter(!grepl("DEAD", condition_code_qaqc)) %>% # EXCLUDE DEAD CORALS!
# tagged and non-tagged corals are given same coral_id numbers, so must keep tagged_coral_y_n
select(date, site, transect, coral_id, tagged_coral_y_n, category, subcategory, diameter, count) %>%
distinct() %>% # reduces to single row for unique site/transect/coral_id
mutate(count = ifelse(is.na(count), 1, count)) %>% # ASSUME MISSING COUNTS ARE 1
mutate(date = if_else(is.na(date), as.Date("2015-06-22"), date)) %>% # fill in missing date
mutate(diameter = as.numeric(diameter)) %>%
mutate(transect_area_m2 = 20) # transects were 20 m2
# Data collected August 2016
# need to include dat
## Sponges and octocorals are in one spreadsheet
dens2016octo <- readxl::read_xlsx(
path = "data/coral_counts/permanent_sites_impact_assessment_sponges_octocorals_2016_CSI_QAQC.xlsx",
sheet = "Raw Data", skip = 1) %>%
clean_names() %>%
rename(category = category_scleractinian_etc, subcategory = subcategory_genus_or_type,
count = octo_sponge_zoanthid_count_qaqc) %>%
filter(!grepl("DEAD", condition_code_qaqc)) %>% # EXCLUDE DEAD CORALS!
select(date, site, transect, category, subcategory, count) %>%
mutate(count = ifelse(is.na(count), 1, count), # ASSUME MISSING COUNTS ARE 1
date = as.Date(date))
## Corals are in another spreadsheet
# 2016 data has multiple condition codes for the same coral recorded as separate lines.
# need to aggregate these to a single line so one row = one coral
dens2016scler <- readxl::read_xlsx(
path = "data/coral_counts/permanent_sites_impact_assessment_corals_2016_CSI_QAQC.XLSX",
sheet = "Raw Data", skip = 1) %>%
clean_names() %>%
rename(category = category_scleractinian_etc, subcategory = species,
diameter = max_diameter) %>%
filter(!grepl("DEAD", condition_code_qaqc)) %>% # EXCLUDE DEAD CORALS!
select(date, site, transect, coral_id, category, subcategory, diameter) %>%
mutate(count = 1, # 1 coral per row (no count data in sheet)
date = as.Date(date)) %>%
distinct(date, site, transect, coral_id, .keep_all = TRUE) # reduce to single row for each site/transect/coral_id (b/c multiple conditions/rows)
## Combine sponge, octocoral, and scleractinian data
dens2016 <- bind_rows(dens2016octo, dens2016scler) %>%
mutate(dist = NA,
transect = as.character(transect),
transect_area_m2 = 20) # each transect was 20 square meters
# find all file names ending in .xlsx
files <- list.files(path = "data/coral_counts/2016 cross-transect data/Entered Data/",
pattern = "^[^~].*.xlsx", recursive = TRUE, full.names = TRUE)
# remove files manually if multiple files for a given transect
files <- files[-c(24,25,28,30,32,33,36)]
# read in OCTOCORALS sheet from each xlsx file
cs2016octo <- files %>%
map(readxl::read_xlsx, sheet = "OCTOCORALS") %>% # read in all the files individually
reduce(bind_rows) # reduce with rbind into one dataframe
# Tidy data
cs2016octo <- cs2016octo %>%
clean_names() %>%
rename(site = station, transect = transect_ns_or_ew,
category = organism_type_scl_spo_octo,
subcategory = species_scl_genera_octo_morph_spo) %>%
mutate(count = 1) %>% # single observation per row
mutate(transect_area_m2 = case_when(transect == "EW" ~ 30, # transect area, DCA 10/2017 p.23
transect == "NS" ~ 20)) %>%
mutate(dist = case_when(
site == "7a-150" ~ "150",
site == "15B-250" ~ "250",
site == "HBS-15b-150" ~ "150",
TRUE ~ str_extract(site, "[[:digit:]]{2,3}"))) %>% # extract distance from site name
mutate(site2 = case_when(
site == "HBS-15b-150" ~ "15b",
site == "15B-250" ~ "15b",
site == "7a-150" ~ "7a",
TRUE ~ str_extract(site, "LR|RR|CP|SG|CR"))) %>% # extract LR or RR from site name
mutate(site = case_when(
site == "HBS-15b-150" ~ "HBS",
site == "15B-250" ~ "HBS",
site == "7a-150" ~ "HBN",
TRUE ~ str_extract(site, "^.[^-|_]*"))) %>% # extract base site form site name
filter(!grepl("DEAD", condition_codes_all)) %>% # EXCLUDE DEAD CORALS!
select(date, site, site2, dist, transect, transect_area_m2, category, subcategory, count)
# read in CORALS sheet from each xlsx file
cs2016scler <- files %>%
map(readxl::read_xlsx, sheet = "CORALS") %>% # read in all the files individually
reduce(bind_rows) # reduce with rbind into one dataframe
# Tidy data
cs2016scler <- cs2016scler %>%
clean_names() %>%
rename(site = station, transect = transect_ns_or_ew,
category = organism_type_scl_spo_octo,
subcategory = species_scl_genera_octo_morph_spo,
diameter = size_cm) %>%
mutate(count = 1) %>% # single observation per row
mutate(transect_area_m2 = case_when(transect == "EW" ~ 30, # transect area, DCA 10/2017 p.23
transect == "NS" ~ 20)) %>%
mutate(dist = case_when(
site == "7a-150" ~ "150",
site == "15B-250" ~ "250",
site == "HBS-15b-150" ~ "150",
TRUE ~ str_extract(site, "[[:digit:]]{2,3}"))) %>% # extract distance from site name
mutate(site2 = case_when(
site == "HBS-15b-150" ~ "15b",
site == "15B-250" ~ "15b",
site == "7a-150" ~ "7a",
TRUE ~ str_extract(site, "LR|RR|CP|SG|CR"))) %>% # extract LR or RR from site name
mutate(site = case_when(
site == "HBS-15b-150" ~ "HBS",
site == "15B-250" ~ "HBS",
site == "7a-150" ~ "HBN",
TRUE ~ str_extract(site, "^.[^-|_]*"))) %>% # extract base site form site name
filter(!grepl("DEAD", condition_codes_all)) %>% # EXCLUDE DEAD CORALS!
select(date, site, site2, dist, transect, transect_area_m2, category, subcategory, diameter, count)
# Combine octocoral and scleractinian data
cs2016 <- bind_rows(cs2016octo, cs2016scler) %>%
mutate(dist = as.numeric(dist),
date = as.Date(date))
# Combine all count data and select octocorals and corals
all_counts <- data_frame(data = list(dens2010, dens2013, dens2015, dens2016, cs2016)) %>% unnest(data)
# Add site metadata
all_counts <- all_counts %>%
mutate(year = year(date), # add year column
reef = str_sub(site, 1, 2), # add reef column (R2 or R3)
dir = str_sub(site, 3, 3), # add direction (N or S)
perm = case_when(nchar(site) == 3 | year == 2010 ~ "no", # add permanent site (yes or no)
nchar(site) >= 4 & year != 2010 ~ "yes"),
channel = ifelse(perm == "yes",
case_when(grepl("C", site) ~ "control", # add control vs. channelside
!grepl("C", site) ~ "channelside"),
NA))
# Tidy category names and filter only scleractinians and octocorals
all_counts <- all_counts %>%
mutate(category = str_replace_all(category,
c("Scleractinian" = "scleractinian",
"Octocoral" = "octocoral",
"Ocotocoral" = "octocoral",
"Coral" = "scleractinian",
"CORAL" = "scleractinian"))) %>%
filter(category == "scleractinian" | category == "octocoral")
# Tidy taxonomy
all_counts <- all_counts %>%
mutate(subcategory = str_replace_all(subcategory, # replace abbreviations with full names
c("SINT" = "Stephanocoenia intersepta",
"DSTO" = "Dichocoenia stokesii",
"SBOU" = "Solenastrea bournoni",
"MCAV" = "Montastraea cavernosa",
"SSID" = "Siderastrea siderea",
"SRAD" = "Siderastrea radians",
"MMEA" = "Meandrina meandrites",
"DCLI" = "Pseudodiploria clivosa",
"AGSP" = "Agaricia sp.",
"MFOR" = "Madracis formosa",
"MDEC" = "Madracis decactis",
"PAST" = "Porites astreoides",
"DSTR" = "Pseudodiploria strigosa",
"CNAT" = "Colpophyllia natans",
"EFAS" = "Eusmilia fastigiata",
"OFAV" = "Orbicella faveolata",
"ACER" = "Acropora cervicornis",
"AAGA" = "Agaricia agaricites",
"MALI" = "Mycetophyllia aliciae",
"SCUB" = "Scolymia cubensis",
"DLAB" = "Diploria labyrinthiformis",
"PPOR" = "Porites porites",
"MFAV" = "Orbicella faveolata",
"PCLI" = "Pseudodiplora clivosa",
"AHUM" = "Agaricia humilis",
"PSTR" = "Pseudodiploria strigosa",
"ALAM" = "Agaricia lamarcki"))) %>%
separate(subcategory, into = c("genus", "species")) %>% # separate into genus and species
mutate(genus = str_replace_all(genus, # fix genus spelling errors
c("Briarieum" = "Briareum",
"Dichocenia" = "Dichocoenia",
"Diplora" = "Diploria",
"Montastrea" = "Montastraea",
"Muriceposis" = "Muriceopsis",
"Pseudodiplora" = "Pseudodiploria",
"Pseudoplexaua" = "Pseudoplexaura",
"Pseudplexaura" = "Pseudoplexaura",
"Psuedoplexaura" = "Pseudoplexaura",
"^pterogorgia" = "Pterogorgia",
"SID" = "Siderastrea",
"siderastrea" = "Siderastrea",
"stephanocoenia" = "Stephanocoenia",
"^Coral" = "no_id",
"^CORAL" = "no_id",
"Octocoral" = "no_id",
"OCTOCORAL" = "no_id")),
species = str_replace_all(species, # fix species spelling errors
c("annuliris" = "annularis",
"Cavernosa" = "cavernosa",
"fasitgiata" = "fastigiata",
"Intersepta" = "intersepta",
"spp" = "sp", "SPP" = "sp", "species" = "sp",
"stokesi$" = "stokesii")))
# Add distance from channel for each permanent site
all_counts <- all_counts %>%
mutate(dist = ifelse(is.na(dist), case_when(
site=="HBNC1" ~ "2350",
site=="HBN3" ~ "48",
site=="HBSC1" ~ "1650",
site=="HBS4" ~ "32",
site=="HBS1" ~ "20",
site=="HBS2" ~ "20",
site=="HBS3" ~ "20",
site=="HBN1" ~ "20",
site=="HBN2" ~ "30",
site=="R2N1" ~ "28",
site=="R2NC2" ~ "9380",
site=="R2N2" ~ "18",
site=="R2NC1" ~ "9380",
site=="R2NC3" ~ "9380",
site=="R2S1" ~ "23",
site=="R2SC1" ~ "1270",
site=="R2S2" ~ "21",
site=="R2SC2" ~ "1270",
site=="R3N1" ~ "23",
site=="R3NC1" ~ "9380",
site=="R3S2" ~ "21",
site=="R3SC2" ~ "1300",
site=="R3S1" ~ "30",
site=="R3S3" ~ "30",
site=="R3SC3" ~ "1300",
site=="R3SC1" ~ "1300"),
dist)) %>%
type_convert()
# remove "tag not found"
all_counts <- filter(all_counts, genus != "Tag")
# Add "site2" (LR or RR) information for permanent sites
all_counts <- all_counts %>%
mutate(site2 = ifelse(perm == "yes", case_when(
site %in% c("R2NC2", "R2N1", "R2SC1", "R2S1") ~ "RR",
site %in% c("R2NC1", "R2NC3", "R2N1", "R2SC2", "R2S2",
"R3NC1", "R3N1", "R3SC2", "R2S2",
"R2N2", "R3S2") ~ "LR",
site %in% c("R3S1", "R3SC1", "HBNC1", "HBS1", "HBS2", "HBS3", "HBN3", "HBSC1") ~ "CP",
site %in% c("HBN1", "HBN2", "HBS4") ~ "CR",
site %in% c("R3S3", "R3SC3") ~ "SG"),
site2))
# Add missing site metadata
### Look at map on page 2 of october 2017 cross_impact report
##### Reef 2 N:: 2010 transects below 100 m are all same general area as R2N1(-RR)
##### Reef 2 S:: 2010 transects below 100 m are all same general area as R2S1(-RR)
##### Reef 3 N:: 2010 transects below 100 m are all same general area as R3N1(-LR)
##### Reef 3 S:: 2010 transects LR are all same general area as R3S2
all_counts <- all_counts %>%
mutate(site2 = case_when(
reef == "R2" & dir == "N" & year == 2010 ~ "RR",
reef == "R2" & dir == "S" & year == 2010 ~ "RR",
reef == "R3" & dir == "N" & year == 2010 ~ "LR",
reef == "R3" & dir == "S" & year == 2010 ~ "LR",
site %in% c("R2N1", "R2NC2", "R2S1", "R2SC1") & perm == "yes" ~ "RR",
site %in% c("R2N2", "R2NC1", "R2NC3", "R2S2", "R2SC2",
"R3S2", "R3N1", "R3SC2", "R3NC1") & perm == "yes" ~ "LR",
site %in% c("R3S1", "R3SC1") & perm == "yes" ~ "CP",
site %in% c("R3S3", "R3SC3") & perm == "yes" ~ "SG",
TRUE ~ site2))
# For transects with only single digit listed for transect number, change to site-site2-transect
all_counts <- all_counts %>%
mutate(transect = case_when(
nchar(transect) == 1 ~ paste(site, site2, transect, sep = "-"),
TRUE ~ transect)
)
save(all_counts, file = "data/processed/all_counts.RData")
# Subset scleractinians
scler <- filter(all_counts, category == "scleractinian") %>%
filter(year %in% c(2010, 2013, 2016, 2017)) # exclude 2015
# Total number of corals counted
sum(scler$count)
## [1] 11166
# Total number of transects performed
transects <- scler %>%
distinct(site, site2, transect, date)
nrow(transects)
## [1] 482