Commit c282ab0b authored by Turner's avatar Turner
Browse files

Add context to trends

parent 310e5312
Loading
Loading
Loading
Loading

R/trends.R

0 → 100644
+148 −0
Original line number Diff line number Diff line
## Function for computation of trends in annual capacity factors
## Trends are evaluated for four alternative time periods:
## "Full period":       1970 - 2021
## "Flow period":       1980 - 2019 (commensurate with DayFlow)
## "Flow period plus":  1980 - 2021
## "923 period":        2001 - 2021 (reliable generation data)

get_CF_trends <- function(annual_CFs){

  tribble(
    ~start_yr, ~end_yr, ~analysis,
    1970, 2021, "full period",
    1980, 2019, "flow period",
    1980, 2021, "flow period plus",
    2001, 2021, "923 period"
  ) |>
    pmap_dfr(function(start_yr, end_yr, analysis){

      message(paste0("Computing trends for ",
                     analysis,
                     ": ", start_yr, "->", end_yr))

      annual_CFs |>
        filter(year %in% start_yr:end_yr) |>
        split(~EIA_ID) |>
        map_dfr(function(plant_data){

          # deal with any inf values (caused by cap = 0 in EIA data)
          plant_data |>
            mutate(CF = if_else(is.infinite(CF), NA_real_, CF)) ->
            plant_data_noInf

          n_yrs <- nrow(plant_data_noInf)

          # how many na values affected this slope analysis?
          plant_data_noInf |>
            filter(is.na(CF)) |>
            nrow() ->
            n_bad_yrs

          if((n_yrs - n_bad_yrs) / n_yrs < 0.8){
            return(
              tibble(
                EIA_ID = plant_data_noInf[["EIA_ID"]][1],
                sens_slope = NA_real_,
                intercept = NA_real_,
                p_value = NA_real_,
                coverage = (n_yrs - n_bad_yrs) / n_yrs
              )
            )
          }

          # compute Sen's slope
          senSlope(CF ~ year, data = plant_data,
                   intercept = "A1m") -> sen
          sen$coefficients[[2]] -> computed_slope
          sen$coefficients[[1]] -> computed_intercept

          # set up blocked bootstrap (block size = 3)
          tibble(
            index = 1:(n_yrs - 2)
          ) |>
            mutate(`1` = plant_data_noInf[["CF"]][index],
                   `2` = plant_data_noInf[["CF"]][index + 1],
                   `3` = plant_data_noInf[["CF"]][index + 2]) |>
            filter(!is.na(`1`), !is.na(`2`), !is.na(`3`)) ->
            blocks

          # blocked bootstrap
          n_size <- 10000

          # get serial correlation (to determine if block is needed)
          acf1 <- acf(plant_data_noInf$CF, plot = FALSE,
                      na.action = na.pass)[[1]][[2]]

          if(acf1 > 0.3){
            1:n_size |>
              future_map_dbl(function(x){

                1:(nrow(blocks)) |>
                  sample(replace = T, size = 14) ->
                  block_indices

                blocks[block_indices,] |>
                  pivot_longer(!index) |>
                  mutate(year = 1980:2021) ->
                  blocked_sample

                senSlope(value ~ year, data = blocked_sample,
                         intercept = "A1m") ->
                  block_sen

                return(block_sen[["coefficients"]][[2]])


              }, .options = furrr_options(seed = 123)) -> sample_sens
          }else{
            1:n_size |>
              future_map_dbl(function(x){

                tibble(CF = sample(plant_data_noInf$CF, replace = T)) |>
                  mutate(year = start_yr:end_yr) ->
                  boot_sample

                senSlope(CF ~ year, data = boot_sample,
                         intercept = "A1m") ->
                  boot_sen

                return(boot_sen[["coefficients"]][[2]])


              }, .options = furrr_options(seed = 123)) -> sample_sens
          }

          if(computed_slope >= 0){
            p <- sum(sample_sens > computed_slope) / n_size
          }else{
            p <- sum(sample_sens < computed_slope) / n_size
          }

          return(
            tibble(
              EIA_ID = plant_data_noInf[["EIA_ID"]][1],
              sens_slope = computed_slope,
              intercept = computed_intercept,
              p_value = p,
              coverage = (n_yrs - n_bad_yrs) / n_yrs
            )
          )

        }) ->
        all_slopes_for_analysis_x

      return(
        all_slopes_for_analysis_x |>
          mutate(analysis = !!analysis)
      )

    }) -> all_trend_analyses

  return(
    all_trend_analyses
  )


}