--- title: "Advanced plotting" author: "Morgan Sparks, Bryan M. Maitland" bibliography: '`r system.file("references.bib", package="hatchR")`' output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{Advanced plotting} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- ```{r setup, include = FALSE} # rmd style knitr::opts_chunk$set( collapse = TRUE, comment = "#>", warning = FALSE, message = FALSE, # eval = FALSE, fig.align = "center", fig.width = 6 ) options(tibble.print_min = 5, tibble.print_max = 5) # load packages library(hatchR) library(purrr) library(tidyr) library(dplyr) library(ggplot2) library(lubridate) library(stringr) library(ggridges) library(patchwork) ``` ## Overview The goal of this vignette is to provide some examples of how you might access results from `predict_phenology()` and how you might plot your data. ```{r, eval=FALSE} library(hatchR) library(purrr) library(tidyr) library(dplyr) library(ggplot2) library(lubridate) library(stringr) library(ggridges) library(patchwork) ``` ## A simple example We'll return to our first example with Woody Island to show how you might present data for a single spawning season. First let's get our results and output them into a plotting format. ```{r} # run map to get a vector of models WI_mods <- map_df( c("hatch", "emerge"), model_select, author = "Beacham and Murray 1990", species = "sockeye", model = 2 ) |> select(expression) # make vector of spawn dates WI_spawn_dates <- c("1990-08-14", "1990-08-18", "1990-09-3") # make variables grid WI_var_grid <- expand_grid(model = WI_mods$expression, spawn.date = WI_spawn_dates) # run pmap for all variable combinations WI_results <- pmap(WI_var_grid, predict_phenology, data = woody_island, # additional arguments required by function dates = date, temperature = temp_c ) # Now that we have our model results, lets put them in a tibble for plotting WI_dev.period <- WI_results |> map_df("dev.period") |> tibble() |> mutate( phenology = c(rep("hatch", 3), rep("emerge", 3)), # add a phenology column days = as.numeric(stop - start), # column of the number of days (same as days2done) index = c(1:3, 5:7) # index for plotting later ) ``` Now we'll make a plot with the temperature and rectangles to show duration for the hatch and emergence periods. ```{r} # min and max for x-axis with 30 day buffer x_lims <- c(min(WI_dev.period$start), max(WI_dev.period$stop) + days(30)) y_lims <- c(-1, 16) # filter data for plot p_data <- woody_island |> filter(date >= x_lims[1], date <= x_lims[2]) plot1 <- p_data |> ggplot() + geom_rect( data = WI_dev.period, aes( xmin = start, xmax = stop, ymin = 10 - index, ymax = 10.5 - index, fill = phenology )) + geom_point(aes(x = date, y = temp_c), size = 0.25, ) + geom_line(aes(x = date, y = temp_c)) + lims(x = x_lims, y = y_lims) + scale_fill_manual(values = c("navyblue", "dodgerblue")) + theme_classic() + theme( legend.position = "inside", legend.position.inside = c(0.75, 0.75) ) plot1 ``` We can add some labels for days to hatch or emergence and change the labels on the legend. ```{r} plot1 + geom_label( data = WI_dev.period, aes( x = start + (stop - start) / 1.25, y = (10.25 - index), label = days ) ) + scale_fill_manual(values = c("navyblue", "dodgerblue"), labels = c("Emerge", "Hatch")) + labs(fill = "Phenology", y = "Temperature (C)") ``` ## Plotting multiple years Let's use the Crooked River data again as it has a number of continuous years of data without any temperature anomalies. ```{r} # spawn dates spawn_dates <- map( c(2011:2014), # year vector to map for custom function function(year) { # custom paste function c( paste0(year, "-09-01"), paste0(year, "-09-20"), paste0(year, "-09-30") ) } ) |> unlist() # run map to get a vector of models dev_mods <- map_df( c("hatch", "emerge"), model_select, author = "Austin et al. 2019", species = "bull trout", model = "MM" ) |> select(expression) # variable grid var_grid <- expand_grid(model = dev_mods$expression, spawn.date = spawn_dates) ### multiple input mapping crooked_predictions <- pmap(var_grid, # combos of variables to iterate over predict_phenology, # function data = crooked_river, # additional arguments required by function dates = date, temperature = temp_c ) # make duration dataframe CR_dev.period <- crooked_predictions |> map_df("dev.period") |> tibble() |> mutate( phenology = c(rep("hatch", 12), rep("emerge", 12)), # add a phenology column days = as.numeric(stop - start), # column of the number of days (same as days2done) year = year(start), # new column for just year (to facet wrap with) index = c(rep(1:3, 4), rep(5:7, 4)) ) |> # new index because we're making 4 independent graphs na.omit() |> filter(year >= 2011) # remove the 2010 year ``` We'll now make similar plot as above but use `facet_wrap()` to break plots apart by year. We do this by cutting to make a developmental year because phenology spans two calendar years. As a result, we cut between Aug. 1 of one year and Aug. 1 of the subsequent year (August 1 instead of Sept. 1 to match the plot limits we're setting up) and then make that a developmental year (the year the adult spawned). Then we can `facet_wrap()` to break the years up in the plot. ```{r} ### add a column called year (in this case the developmental year) cut_ints <- ymd( c( "2010-08-01", "2011-08-01", "2012-08-01", "2013-08-01", "2014-08-01", "2015-08-01", "2016-08-01" ), tz = "UTC" ) # cut to our cut intervals and label by developmental year crooked_river_sy <- crooked_river |> mutate(year = cut(date, breaks = cut_ints, labels = c(2010:2015) )) |> # cut coerces our labels to factors, this changes them back to numbers mutate(year = as.numeric(as.character(year))) |> filter(year > 2010 & year < 2015) # remove 2010 and 2015 # make plot (facet_wrap by development year) ggplot() + geom_rect(data = CR_dev.period, aes( xmin = start, xmax = stop, ymin = 10 - index, ymax = 10.5 - index, fill = phenology )) + geom_point(data = crooked_river_sy, aes(x = date, y = temp_c), size = 0.25, ) + geom_line(data = crooked_river_sy, aes(x = date, y = temp_c)) + # set limits lims(x = c(min(CR_dev.period$start) - days(30), max(CR_dev.period$stop) + days(30))) + scale_x_datetime(date_labels = "%b") + # facet wrap here subset plots on developmental year facet_wrap(~year, ncol = 1, scales = "free_x") + scale_fill_manual(values = c("navyblue", "dodgerblue"), labels = c("Emerge", "Hatch")) + # custom colors labs(fill = "Phenology", x = "Date", y = "Temperature (C)") + theme_classic() ``` As you can see, this allows us to put our figures on the same X scale despite flowing into multiple years. You'll have to figure out the right way to cut up your data (or find similar alternative options). But this solves a primary problem of plotting phenology for multiple years, which is that our results span multiple years and may start and stop in the same intervals over multiple years (particularly for fall spawning species). ::: callout-note Note that we run out of temperature data in 2015, which is why it is cutoff relative to the others. If you really wanted to fix that you could be adding on the necessary dates with no temperature data to the `crooked_river_sy` tibble. ::: ## Plotting multiple individual spawning events Another common plotting scenario is that you might have data for when a number of fish spawn in a single or multiple seasons and want to represent those results. We can show a hypothetical scenario with the bull trout example above. In this scenario, let's assume we have 300 spawning adult pairs that spawn in a roughly normal distribution with mean Sept. 15 and a standard deviation of 5 days (which makes Sept. 1st and 30th 3 sd from the mean). ```{r} # draw from a normal distribution with above parameters set.seed(322) # allows us to draw same random sample every time for the example fish_dist <- round(rnorm(300, mean = 15, sd = 5 ), 0) # summary summary(fish_dist) # change 0s to 1s fish_dist[which(fish_dist == 0)] <- 1 # look at distribution hist(fish_dist) # make a vector of fish dates and add mo_day to combine with fish_dist fish_dates <- tibble(date = seq(ymd("2014-09-01"), ymd("2014-09-30"), by = "days")) |> mutate(mo_day = mday(date)) # loop for repping (could do with map, too) fish_dates_norm <- NULL for (d in fish_dates$mo_day) { day <- fish_dates$date[d] # get date spawners <- fish_dist[fish_dist == d] # vector of spawners in fish_dist spawners_date <- rep(day, times = length(spawners)) # make vector of date repeated the length of the vector spawners fish_dates_norm <- append(fish_dates_norm, spawners_date) # append to out file } head(fish_dates_norm) # now let's make the same vector for 2013 so we can compare two years fish_dates_13 <- fish_dates_norm |> str_replace_all("2014", "2013") # replace all the 2014s with 2013 using string replace # add the two together in a vector and sort fish_dates_all <- sort(append(fish_dates_norm, fish_dates_13)) # looks good! head(fish_dates_all) tail(fish_dates_all) # remember these all need to be in a character string for predict_phenology()! fish_dates_str <- format(fish_dates_all, "%Y-%m-%d") ``` Now we can map hatch and emergence timing across the vector of dates we have created for the two years. Notice we have chosen two fairly thermally different years with quite different developmental phenology estimates (see plot from [Plotting multiple years]). Also, we're going to take a short cut with our mapping so we don't store all the results of each date (1200 total, 600 for each developmental period estimated) in our memory. So instead, we will pipe `|>` our `pmap()` into a `map_dbl("dev.period")` command so we only are left with our phenology estimates and not all the other data we would have otherwise outputted. ```{r} # make variable grid for pmap (we use dev_mods from previous example) spawn_grid <- expand_grid(model = dev_mods$expression, spawn.date = fish_dates_str) bull_trout_dist <- pmap(spawn_grid, predict_phenology, data = crooked_river, # additional arguments required by function dates = date, temperature = temp_c ) |> # pipe! map_df("dev.period") # just output dev.period ### now let's add the key columns for plotting # first you can see the output and the spawn grid have the same number of rows so # we can borrow the format from the spawn grid length(bull_trout_dist) dim(spawn_grid) # moreover from the grid, you can see that it predicts hatch for all the dates first # and then predicts emergence head(spawn_grid$model) tail(spawn_grid$model) # now we can borrow all that information to make the exact data object we want bull_trout_dist_phenology <- bull_trout_dist |> mutate( Date = stop, # rename stop to Date Phenology = c(rep("Hatch", 600), rep("Emerge", 600)) ) |> # make phenology column select(Date:Phenology) # select date and phenology columns to put in object # make a similar object with spawning data (notice we name the columns the exact same) bull_trout_spawn_phenology <- tibble(Date = fish_dates_all) |> mutate(Phenology = "Spawn") # combine spawn and hatch/emerge objects for a final plotting object bull_trout_all_phenology <- bind_rows( bull_trout_spawn_phenology, bull_trout_dist_phenology ) |> mutate(y = year(Date)) |> na.omit() head(bull_trout_all_phenology) tail(bull_trout_all_phenology) ``` Before we plot, let's briefly walk through what we did there. First we made our predictions but then only output the start and stop data with the `map_df("dev.period")` command. Next we added a `Phenology` column with the appropriate phenology type determined by the structure of our `spawn_grid` object. Then we made a similar object for spawning which we combined with the mapping ouput all into `bull_trout_all_phenology`. The reason for this is we will plot the distributions of spawning, hatching, and emergence across years. You can see we also need to cut like we did before to add a developmental year and we use `geom_blank()` and custom limits to set everything on the same scale. ```{r} # before we plot, because we are again plotting across years we need to cut up our data # still need 2012 and 2015 because phenology will leak over cut_ints_1314 <- ymd(c( "2012-08-01", "2013-08-01", "2014-08-01", "2015-08-01" ), tz = "UTC") # cut according to our cut intervals and label by developmental year bull_trout_phenology_cut <- bull_trout_all_phenology |> mutate(year = cut(Date, breaks = cut_ints_1314, labels = c(2012:2014) )) |> mutate(year = as.numeric(as.character(year))) |> na.omit() # look to see max development days to make custom lims bull_trout_dist |> mutate(phen_days = stop - start) |> slice_max(phen_days) # output max value # make custom lims using 30 days before first spawn and 244 days max (214 + 30) from above # makes a 30 day buffer cust_lims <- tibble(min = c(ymd(c("2013-08-01", "2014-08-01"), tz = "UTC"))) |> mutate(max = min + days(244)) |> pivot_longer(everything(), names_to = "type", values_to = "Date") |> mutate(year = c(2013, 2013, 2014, 2014)) # make plot ggplot() + geom_blank(data = cust_lims, aes(x = Date)) + # we use geom_blank to keep everything on the same limits from our custom lims object geom_density_ridges( data = bull_trout_phenology_cut, aes(x = Date, y = Phenology, color = Phenology, fill = Phenology), jittered_points = TRUE, position = position_points_jitter(width = 0.05, height = 0), point_shape = "|", point_size = 2, point_alpha = 1, alpha = 0.7 ) + lims(x = c(min(cust_lims$Date) - days(1), max(cust_lims$Date) + days(1))) + scale_x_datetime(date_labels = "%b") + scale_fill_brewer(palette = "Dark2") + scale_color_brewer(palette = "Dark2") + facet_wrap(~year, ncol = 1, scales = "free_x") + theme_classic() + theme(legend.position = "none") ``` As you can see, even small differences in thermal regime can make for big differences in developmental phenology. We could even combine this plot with the plot we made previously to show regime and duration alongside the distributions. For combining plots we highly recommend the [patchwork package](https://patchwork.data-imaginist.com/). ```{r} # same plot as before but we'll filter the years to 2013 and 2014 CR_dev.period_1314 <- CR_dev.period |> filter(year %in% c(2013, 2014)) crooked_river_sy_1314 <- crooked_river_sy |> filter(year %in% c(2013, 2014)) # name plot as object p1 p1 <- ggplot() + geom_rect(data = CR_dev.period_1314, aes( xmin = start, xmax = stop, # draw bars ymin = 10 - index, ymax = 10.5 - index, # use index to vertically place rects fill = phenology )) + geom_point(data = crooked_river_sy_1314, aes(x = date, y = temp_c), size = 0.25, ) + geom_line(data = crooked_river_sy_1314, aes(x = date, y = temp_c)) + lims(x = c(min(CR_dev.period$start) - days(30), max(CR_dev.period$stop) + days(30))) + # set limits scale_x_datetime(date_labels = "%b") + # change X label to month facet_wrap(~year, ncol = 1, scales = "free_x") + # facet wrap here subset plots on developmental year scale_fill_manual(values = c("navyblue", "dodgerblue"), labels = c("Emerge", "Hatch")) + # custom colors labs(fill = "Phenology", x = "Date", y = "Temperature (C)") + theme_classic() # name distribution plot as object p2 p2 <- ggplot() + geom_blank(data = cust_lims, aes(x = Date)) + # we use geom_blank to keep everything on the same limits from our custom lims object geom_density_ridges( data = bull_trout_phenology_cut, aes(x = Date, y = Phenology, color = Phenology, fill = Phenology), jittered_points = TRUE, position = position_points_jitter(width = 0.05, height = 0), point_shape = "|", point_size = 2, point_alpha = 1, alpha = 0.7 ) + lims(x = c(min(cust_lims$Date) - days(1), max(cust_lims$Date) + days(1))) + scale_x_datetime(date_labels = "%b") + scale_fill_brewer(palette = "Dark2") + scale_color_brewer(palette = "Dark2") + facet_wrap(~year, ncol = 1, scales = "free_x") + theme_classic() + theme(legend.position = "none") ### patchwork plot # plot the two plots side by side using + operator, you can stack with / operator p1 + p2 ``` There are many more ways you can plot your data but we these are meant to serve as templates and/or guides around common issues related to plotting phenology data. # References {.unnumbered}