# Load packages
library(tidyverse)
library(stringr)
library(lubridate)
library(readxl)
library(janitor)
# Import data from file: Hardbottom_Baseline_POM_050714.xlsx
bl_hb <- readxl::read_xlsx(path = "data/tagged_corals/Hardbottom_Baseline_POM_050714.xlsx",
sheet = "Raw Data")
# Tidy data
bl_hb <- bl_hb %>%
clean_names() %>%
filter(tagged_coral_y_n == "Y") %>% # Tagged corals only
rename(site = site_e_g_hbnc1_cp,
transect = transect_e_g_hbna_1,
species = subcategory) %>%
mutate(date = as_date(date)) %>%
select(date, site, transect, coral_id, species, condition_qa_qc, surveyor) %>%
rename(condition = condition_qa_qc)
# Check and make sure no missing dates
#bl_hb %>% filter(is.na(date))
# Separate condition codes into multiple rows
bl_hb <- bl_hb %>%
separate_rows(condition, sep = "/")
# Import data from file: Reef2_Baseline_POM_2013_050714.xlsx
bl_r2 <- readxl::read_xlsx(path = "data/tagged_corals/Reef2_Baseline_POM_2013_050714.xlsx",
sheet = "Raw Data", guess_max = 3000)
# Tidy data
bl_r2 <- bl_r2 %>%
clean_names() %>%
filter(tagged_coral_y_n == "Y") %>%
rename(site = site_e_g_r2n1_rr,
transect = transect_e_g_r2n1_rr_1,
species = subcategory) %>%
mutate(date = as_date(date)) %>%
select(date, site, transect, coral_id, species, condition_qa_qc, surveyor) %>%
rename(condition = condition_qa_qc)
# Check and make sure no missing dates
#bl_r2 %>% filter(is.na(date))
# Correct date errors based on manual inspection of spreadsheet
bl_r2 <- bl_r2 %>%
mutate(date = case_when(
transect=="R2N1-RR-1" & coral_id==10 & is.na(date) ~ as_date("2013-11-02"),
transect=="R2NC3-LR-2" & coral_id==9 & is.na(date) ~ as_date("2013-11-19"),
TRUE ~ as_date(date)))
# Separate condition codes into multiple rows
bl_r2 <- bl_r2 %>%
separate_rows(condition, sep = "/")
# Import data from file: Reef2_Baseline_POM_2013_050714.xlsx
bl_r3 <- readxl::read_xlsx(path = "data/tagged_corals/Reef3_Baseline_POM_050714 (REVISED DOCUMENT).xlsx",
sheet = "Raw Data", guess_max = 3000)
# Tidy data
bl_r3 <- bl_r3 %>%
clean_names() %>%
filter(tagged_coral_y_n == "Y") %>%
rename(site = site_e_g_hbnc1_cp,
transect = transect_e_g_hbna_1,
species = subcategory) %>%
mutate(date = as_date(date)) %>%
select(date, site, transect, coral_id, species, qa_qc_condition_code, surveyor) %>%
rename(condition = qa_qc_condition_code)
# Check and make sure no missing dates
#bl_r3 %>% filter(is.na(date))
# Separate condition codes into multiple rows
bl_r3 <- bl_r3 %>%
separate_rows(condition, sep = "/") %>%
separate_rows(condition, sep = " ")
bl <- bind_rows(bl_hb, bl_r2, bl_r3) %>%
mutate_at(c("transect", "coral_id"), as.factor)
Hardbottom, middle, and outer reef data are all in the same spreadsheet.
# Import data from file: NEW_Offshore_Comp_Scleractinian_Cond_All_Weeks_8_10_15-QAQC.xlsx
comp <- readxl::read_xlsx(
path = "data/tagged_corals/NEW_Offshore_Comp_Scleractinian_Cond_All_Weeks_8_10_15-QAQC.xlsx",
sheet = "All Weeks", guess_max = 3000)
# Tidy data
comp <- comp %>%
clean_names() %>%
rename(condition = condition_code) %>%
select(date, week, visit, site, transect, coral_id, species, condition, surveyor) %>%
mutate(date = as_date(date)) %>%
mutate_at(c("transect", "coral_id"), as.factor)
# Correct typos and errors in date column (detected by looking at distinct date/week combinations)
comp <- comp %>%
# 2014-11-25 should be 2013-11-25 because it is Week #1
mutate(date = if_else(date == "2014-11-25", as_date("2013-11-25"), as_date(date))) %>%
# 2013-04-02 should be 2014-04-02
mutate(date = if_else(date == "2013-04-02", as_date("2014-04-02"), as_date(date))) %>%
# Looks like a mistake from filling down Date in Excel for Surveyor MLR
# Change all Week 27 dates for R2SC1-RR visit 1 to May 23, 2014
mutate(date = case_when(site == "R2SC1-RR" & week == 27 & visit == 1 ~ as_date("2014-05-23"),
TRUE ~ as_date(date))) %>%
# 2020-05-29 should be 2014-05-29
mutate(date = if_else(date == "2020-05-29", as_date("2014-05-29"), as_date(date))) %>%
# Fix missing dates for HBS1-CP week 28
mutate(date = case_when(site=="HBS1-CP" & week == 28 & is.na(date) ~ as_date("2014-05-29"),
TRUE ~ as_date(date)))
# Select columns
comp <- comp %>% select(date, site, transect, coral_id, species, condition, surveyor)
# Check and make sure no missing dates
#comp %>% filter(is.na(date))
# Import data from file: NEW_Offshore_Compliance_Scleractinian_Cond_R2_R3_IMPACT_ASSESS_data TAGGED CORALS.xlsx
pc1 <- readxl::read_xlsx(
path = "data/tagged_corals/NEW_Offshore_Compliance_Scleractinian_Cond_R2_R3_IMPACT_ASSESS_data TAGGED CORALS.xlsx",
sheet = "IA TAGGED ONLY")
# Tidy data
pc1 <- pc1 %>%
clean_names() %>%
rename(condition = condition_code) %>%
select(date, site, transect, coral_id, species, condition, surveyor) %>%
mutate(date = as_date(date)) %>%
mutate_at(c("transect", "coral_id"), as.factor)
# Check and make sure no missing dates
#pc1 %>% filter(is.na(date))
# Import data from file: NEW_Offshore_Post_Construction_Data_Entry_ALL_Weeks_GOOD 151015.xlsx
pc2 <- readxl::read_xlsx(
path = "data/tagged_corals/NEW_Offshore_Post_Construction_Data_Entry_ALL_Weeks_GOOD 151015.xlsx",
sheet = "Raw Data", guess_max = 3000)
# Tidy data
pc2 <- pc2 %>%
clean_names() %>%
filter(tagged_coral_y_n == "Y") %>% # Tagged corals only
rename(condition = condition_code_qaqc) %>%
select(date, site, transect, coral_id, species, condition, surveyor) %>%
mutate(date = as_date(date)) %>%
mutate_at(c("transect", "coral_id"), as.factor)
# Check and make sure no missing dates
#pc2 %>% filter(is.na(date))
# Import data from file: permanent_sites_impact_assessment_corals_2016_CSI_QAQC.xlsx
pc3 <- readxl::read_xlsx(
path = "data/tagged_corals/permanent_sites_impact_assessment_corals_2016_CSI_QAQC.XLSX",
sheet = "Raw Data", guess_max = 3000, skip = 1)
# Tidy data
pc3 <- pc3 %>%
clean_names() %>%
filter(tagged_coral_y_n == "Y") %>% # Tagged corals only
rename(condition = condition_code_qaqc) %>%
select(date, site, transect, coral_id, species, condition, surveyor) %>%
mutate(date = as_date(date)) %>%
mutate_at(c("transect", "coral_id"), as.factor)
# Check and make sure no missing dates
#pc3 %>% filter(is.na(date))
# Aggregate condition codes into single row for each observation
# pc3 <- pc3 %>%
# group_by(date, site, transect, coral_id, species) %>%
# summarise(condition = list(condition_code_qaqc)) %>%
# ungroup()
# Combine all data
tagged <- bind_rows(bl, comp, pc1, pc2, pc3)
tagged <- tagged %>%
mutate(site = str_extract(site, pattern = "[^-]+")) %>% # Remove "-RR" etc.
mutate(transect = str_extract(transect, pattern = ".$")) %>% # Get transect as single digit
unite(coral, site, transect, coral_id, sep = ".", remove = FALSE)
tagged <- tagged %>%
mutate(species = case_when(
species %in% c("MFOR") ~ "MDEC",
species %in% c("MFER", "MALI", "Mycetophyllia ferox") ~ "MYCE",
species %in% c("Orbicella annularis") ~ "OANN",
species %in% c("stephanoceonia intersepta", "Stephanocoenia intersepta",
"Stephanocoenia intercepta", "stephanocoenia intersepta") ~ "SINT",
species %in% c("Dichocoenia stokesii", "Dichocenia stokesii") ~ "DSTO",
species %in% c("Solenastrea bournoni") ~ "SBOU",
species %in% c("Montastraea cavernosa") ~ "MCAV",
species %in% c("Siderastrea siderea") ~ "SSID",
species %in% c("Meandrina meandrites") ~ "MMEA",
species %in% c("Diploria clivosa") ~ "DCLI",
species %in% c("Agaricia sp.") ~ "AGSP",
species %in% c("Madracis formosa") ~ "MFOR",
species %in% c("Madracis decactis") ~ "MDEC",
species %in% c("Porites astreoides") ~ "PAST",
species %in% c("Porites porites") ~ "PPOR",
species %in% c("Siderastrea sp.") ~ "SIDSP",
species %in% c("Oculina diffusa") ~ "ODIF",
species %in% c("Diploria strigosa", "Diplora strigosa", "DSTR") ~ "PSTR",
species %in% c("Diploria labyrinthiformis") ~ "DLAB",
species %in% c("Colpophyllia natans", "Colophyllia natans") ~ "CNAT",
species %in% c("Eusmilia fastigiata") ~ "EFAS",
species %in% c("Orbicella faveolata") ~ "OFAV",
species %in% c("Acropora cervicornis") ~ "ACER",
species %in% c("Agaricia agaricites") ~ "AAGA",
species %in% c("Agaricia lamarcki") ~ "ALAM",
TRUE ~ as.character(species)))
# Remove row with missing data
tagged <- tagged %>% drop_na(date)
# tagged %>%
# distinct(condition) %>%
# print(n = nrow(.))
tagged <- tagged %>%
mutate(condition = recode(condition, `sed dust` = "SED"))
# Downstream analysis revealed a single observation from R2SC1 on May 18, 2015, but the rest of the data from this site were taken May 19, 2015. The coral 'observed' on May 18 was also observed on May 19, so the May 18 entry may be some kind of error. Omit.
tagged <- tagged %>%
filter(date != "2015-05-18") #(this was the only observation on this date)
# Downstream analysis revealed a single observation from R3NC1 on April 25, 2015, but the rest of the data from this site were taken April 26, 2015. The coral 'observed' on April 25 was also observed on April 26, so the April 25 entry may be some kind of error. Omit.
tagged <- tagged %>%
filter(!(date == "2015-04-25" & coral == "R3NC1.1.3"))
spp_conflicts <- tagged %>%
distinct(coral, species) %>%
group_by(coral) %>%
filter(n() > 1) %>%
arrange(coral) %>%
summarise(conflicting_species = list(species))
183 of the tagged corals have conflicting species information.
# Get most frequently used species ID for each coral
spp_ids <- tagged %>%
group_by(coral) %>%
count(species) %>%
summarise(most_freq_species = species[which.max(n)])
# Replace species ID with most frequently used species ID for each coral
tagged <- tagged %>%
left_join(spp_ids, by = "coral") %>%
select(date, site, transect, coral_id, coral, most_freq_species, condition, surveyor) %>%
rename(species = most_freq_species)
tagged <- tagged %>%
mutate(condition = if_else(species == "ACER",
str_replace(condition, "WP", "WB"),
condition))
tagged %>% count(surveyor)
## # A tibble: 10 x 2
## surveyor n
## <chr> <int>
## 1 AS 10080
## 2 BG 5024
## 3 JC 18
## 4 JDC 944
## 5 MLR 1721
## 6 MR 30
## 7 RF 14673
## 8 WFP 5771
## 9 WP 414
## 10 WPF 32
# Change WP and WPF to WFP
# Change MR to MLR
# Change JC to JDC
tagged <- tagged %>%
mutate(surveyor = recode(surveyor,
WP = "WFP", WPF = "WFP",
MR = "MLR",
JC = "JDC"))
tagged <- tagged %>%
mutate(channel = if_else(str_detect(site, "C"), "control", "channelside"),
reef = str_sub(site, 1, 2),
dir = str_sub(site, 3, 3)) %>%
select(date, reef, dir, channel, site, transect, coral, species, condition, surveyor)
tagged_bool <- tagged %>%
mutate(hasCond = TRUE) %>%
distinct() %>%
spread(condition, hasCond, fill = FALSE)
tagged_nest <- tagged %>% nest(condition, .key = "condition")
tagged_c <- tagged %>%
group_by(date, reef, dir, channel, site, transect, coral, species) %>%
summarise(condition = paste(condition, collapse = ", ")) %>%
ungroup()
save(tagged, tagged_bool, tagged_nest, tagged_c, file = "data/processed/tagged.RData")
# Write all data to spreadsheet
dd <- tagged_c %>%
arrange(coral, date) %>%
select(-channel, -reef, -dir, -coral)
write.table(dd, file="data/processed/tagged_c.txt", row.names = FALSE, quote = FALSE, sep = "\t")
load("data/processed/tagged.RData")
# Number of observations
nrow(tagged_bool)
## [1] 23537
# Number of corals observed
tagged_bool %>%
distinct(coral) %>%
nrow(.)
## [1] 650
# Number of sites
tagged_bool %>%
distinct(site) %>%
nrow(.)
## [1] 26
# Date range of entire dataset
range(tagged_bool$date)
## [1] "2013-10-14" "2016-08-20"