Code
library(readxl)
library(dplyr)
library(tidyr)
library(purrr)
library(stringr)Load those libraries
library(readxl)
library(dplyr)
library(tidyr)
library(purrr)
library(stringr)Load those data, new names.
regen_0 <- local({
col_names <- c(
"site",
"treat",
"plot",
"spp",
"dbh_lt1",
"dbh_lt2",
"lcbh_gte2",
paste0("dbh", 1:28)
)
read_excel("../data/Regen10yr.xlsx", skip = 2, col_names = col_names)
})First, I’ll take a look at the character type columns.
select(regen_0, where(is.character)) |>
map(unique)$site
[1] "Waldo South" "Waldo North" "CAMP 6" "WHISKEY SPRINGS"
$treat
[1] "GS" "LD" "HA" "HD" "HD - 2B" "HD -2A"
$plot
[1] "NE" "SE" "SW" "NW" "W" "S" "E" "N"
$spp
[1] "TO" "RW" "DF" "GF"
[5] "MD" "WM" "CA NETTLE" "QUMU (chinkapin)"
I’ll take the following steps to clean up this data:
regen_1 <- regen_0 |>
mutate(across(where(is.character), tolower)) |>
mutate(spp = if_else(str_detect(spp, "qumu"), "gc", spp)) |>
filter(treat != "hd -2a", spp != "ca nettle") |>
mutate(treat = str_extract(treat, "gs|ld|ha|hd"))This resulted in the removal of 14 rows.
Next, data is in an “unlimited-wide” format, and of a few different types. There are counts for diameter classes (0-1 inch and 1 - 2 inch), and actual diameters (in centimeters) for all trees over 2 inches.
I’ll make the simplying assumption that individuals in the 0-1 and 1-2 inch size classes are on average at their midpoint in diameter. So each individual is 0.5 and 1.5 inches DBH for the first and second size classes, respectively.
I’ll put live crown base heights in a separate table. For stems, I’ll get one long list of each regenerating stem. Finally, I’ll convert inches to centimeters.
regen_lcbh <- select(regen_1, site, treat, plot, spp, lcbh_gte2)
regen_2 <- local({
regen_lt2 <- select(regen_1, site, treat, plot, spp, dbh_lt1, dbh_lt2)
regen_gt2 <- select(regen_1, site, treat, plot, spp, matches("dbh\\d"))
regen_gt2_long <- regen_gt2 |>
pivot_longer(matches("^dbh\\d+$"), values_to = "dbh", names_to = NULL) |>
filter(!is.na(dbh))
regen_lt2_long <- regen_lt2 |>
pivot_longer(
matches("^dbh_"),
names_to = "dbh",
names_pattern = ".*_(lt1|lt2)",
values_to = "count",
names_transform = \(x) case_match(x, "lt1" ~ 1.27, "lt2" ~ 3.81)
) |>
uncount(count)
bind_rows(regen_lt2_long, regen_gt2_long) |>
arrange(site, treat, plot, spp, dbh) |>
mutate(treat = factor(treat, c("gs", "ld", "ha", "hd")))
})We decided that the minor species are not that interesting here, we are grouping them into “other.” I’ll also order them they their prevelance which will aid in plotting.
# Reduce number of species
regen_3 <- mutate(
regen_2,
spp = if_else(spp %in% c("rw", "to", "df"), spp, "other"),
spp = forcats::fct_reorder(spp, spp, length),
)Save this data for summary, visualization, and potentially, modeling.
regen <- regen_3
save(regen, regen_lcbh, file = "regen_wrangled.rda")