10 Import and wrangle regeneration data

Load those libraries

Code

library(readxl)
library(dplyr)
library(tidyr)
library(purrr)
library(stringr)

Load those data, new names.

Code

regen_0 <- local({
  col_names <- c(
    "site",
    "treat",
    "plot",
    "spp",
    "dbh_lt1",
    "dbh_lt2",
    "lcbh_gte2",
    paste0("dbh", 1:28)
  )
  read_excel("../data/Regen10yr.xlsx", skip = 2, col_names = col_names)
})

10.1 Clean labels

First, I’ll take a look at the character type columns.

Code

select(regen_0, where(is.character)) |>
  map(unique)

$site
[1] "Waldo South"     "Waldo North"     "CAMP 6"          "WHISKEY SPRINGS"

$treat
[1] "GS"      "LD"      "HA"      "HD"      "HD - 2B" "HD -2A" 

$plot
[1] "NE" "SE" "SW" "NW" "W"  "S"  "E"  "N" 

$spp
[1] "TO"               "RW"               "DF"               "GF"              
[5] "MD"               "WM"               "CA NETTLE"        "QUMU (chinkapin)"

I’ll take the following steps to clean up this data:

I’ll make them all lowercase for convenience.
The decision was made earlier to omit the plot HD - 2A, AKA “MD” (?sec-load-fuel-data).
Remove observations of “CA NETTLE”
Change “QUMU …” to “GC”

Code

regen_1 <- regen_0 |>
  mutate(across(where(is.character), tolower)) |>
  mutate(spp = if_else(str_detect(spp, "qumu"), "gc", spp)) |>
  filter(treat != "hd -2a", spp != "ca nettle") |>
  mutate(treat = str_extract(treat, "gs|ld|ha|hd"))

This resulted in the removal of 14 rows.

10.2 Stretch data

Next, data is in an “unlimited-wide” format, and of a few different types. There are counts for diameter classes (0-1 inch and 1 - 2 inch), and actual diameters (in centimeters) for all trees over 2 inches.

I’ll make the simplying assumption that individuals in the 0-1 and 1-2 inch size classes are on average at their midpoint in diameter. So each individual is 0.5 and 1.5 inches DBH for the first and second size classes, respectively.

I’ll put live crown base heights in a separate table. For stems, I’ll get one long list of each regenerating stem. Finally, I’ll convert inches to centimeters.

Code

regen_lcbh <- select(regen_1, site, treat, plot, spp, lcbh_gte2)

regen_2 <- local({
  regen_lt2 <- select(regen_1, site, treat, plot, spp, dbh_lt1, dbh_lt2)
  regen_gt2 <- select(regen_1, site, treat, plot, spp, matches("dbh\\d"))

  regen_gt2_long <- regen_gt2 |>
    pivot_longer(matches("^dbh\\d+$"), values_to = "dbh", names_to = NULL) |>
    filter(!is.na(dbh))

  regen_lt2_long <- regen_lt2 |>
    pivot_longer(
      matches("^dbh_"),
      names_to = "dbh",
      names_pattern = ".*_(lt1|lt2)",
      values_to = "count",
      names_transform = \(x) case_match(x, "lt1" ~ 1.27, "lt2" ~ 3.81)
    ) |>
    uncount(count)

  bind_rows(regen_lt2_long, regen_gt2_long) |>
    arrange(site, treat, plot, spp, dbh) |>
    mutate(treat = factor(treat, c("gs", "ld", "ha", "hd")))
})

We decided that the minor species are not that interesting here, we are grouping them into “other.” I’ll also order them they their prevelance which will aid in plotting.

Code

# Reduce number of species
regen_3 <- mutate(
  regen_2,
  spp = if_else(spp %in% c("rw", "to", "df"), spp, "other"),
  spp = forcats::fct_reorder(spp, spp, length),
)

Save this data for summary, visualization, and potentially, modeling.

Code

regen <- regen_3
save(regen, regen_lcbh, file = "regen_wrangled.rda")