10  Import and wrangle regeneration data

Load those libraries

Code
library(readxl)
library(dplyr)
library(tidyr)
library(purrr)
library(stringr)

Load those data, new names.

Code
regen_0 <- local({
  col_names <- c(
    "site",
    "treat",
    "plot",
    "spp",
    "dbh_lt1",
    "dbh_lt2",
    "lcbh_gte2",
    paste0("dbh", 1:28)
  )
  read_excel("../data/Regen10yr.xlsx", skip = 2, col_names = col_names)
})

10.1 Clean labels

First, I’ll take a look at the character type columns.

Code
select(regen_0, where(is.character)) |>
  map(unique)
$site
[1] "Waldo South"     "Waldo North"     "CAMP 6"          "WHISKEY SPRINGS"

$treat
[1] "GS"      "LD"      "HA"      "HD"      "HD - 2B" "HD -2A" 

$plot
[1] "NE" "SE" "SW" "NW" "W"  "S"  "E"  "N" 

$spp
[1] "TO"               "RW"               "DF"               "GF"              
[5] "MD"               "WM"               "CA NETTLE"        "QUMU (chinkapin)"

I’ll take the following steps to clean up this data:

  • I’ll make them all lowercase for convenience.
  • The decision was made earlier to omit the plot HD - 2A, AKA “MD” (?sec-load-fuel-data).
  • Remove observations of “CA NETTLE”
  • Change “QUMU …” to “GC”
Code
regen_1 <- regen_0 |>
  mutate(across(where(is.character), tolower)) |>
  mutate(spp = if_else(str_detect(spp, "qumu"), "gc", spp)) |>
  filter(treat != "hd -2a", spp != "ca nettle") |>
  mutate(treat = str_extract(treat, "gs|ld|ha|hd"))

This resulted in the removal of 14 rows.

10.2 Stretch data

Next, data is in an “unlimited-wide” format, and of a few different types. There are counts for diameter classes (0-1 inch and 1 - 2 inch), and actual diameters (in centimeters) for all trees over 2 inches.

I’ll make the simplying assumption that individuals in the 0-1 and 1-2 inch size classes are on average at their midpoint in diameter. So each individual is 0.5 and 1.5 inches DBH for the first and second size classes, respectively.

I’ll put live crown base heights in a separate table. For stems, I’ll get one long list of each regenerating stem. Finally, I’ll convert inches to centimeters.

Code
regen_lcbh <- select(regen_1, site, treat, plot, spp, lcbh_gte2)

regen_2 <- local({
  regen_lt2 <- select(regen_1, site, treat, plot, spp, dbh_lt1, dbh_lt2)
  regen_gt2 <- select(regen_1, site, treat, plot, spp, matches("dbh\\d"))

  regen_gt2_long <- regen_gt2 |>
    pivot_longer(matches("^dbh\\d+$"), values_to = "dbh", names_to = NULL) |>
    filter(!is.na(dbh))

  regen_lt2_long <- regen_lt2 |>
    pivot_longer(
      matches("^dbh_"),
      names_to = "dbh",
      names_pattern = ".*_(lt1|lt2)",
      values_to = "count",
      names_transform = \(x) case_match(x, "lt1" ~ 1.27, "lt2" ~ 3.81)
    ) |>
    uncount(count)

  bind_rows(regen_lt2_long, regen_gt2_long) |>
    arrange(site, treat, plot, spp, dbh) |>
    mutate(treat = factor(treat, c("gs", "ld", "ha", "hd")))
})

We decided that the minor species are not that interesting here, we are grouping them into “other.” I’ll also order them they their prevelance which will aid in plotting.

Code
# Reduce number of species
regen_3 <- mutate(
  regen_2,
  spp = if_else(spp %in% c("rw", "to", "df"), spp, "other"),
  spp = forcats::fct_reorder(spp, spp, length),
)

Save this data for summary, visualization, and potentially, modeling.

Code
regen <- regen_3
save(regen, regen_lcbh, file = "regen_wrangled.rda")