Commit 1ac14b4b authored by Turner's avatar Turner
Browse files

model set up

parent c282ab0b
Loading
Loading
Loading
Loading

R/dam_conversion.R

0 → 100644
+44 −0
Original line number Diff line number Diff line
## Function for converting EIA level data to EHA dams
## e.g., Hoover should be combined to a single point rather than...
## ... Hoover (AZ) and Hoover (NV) separately

convert_EIA_to_COMPLX <- function(EHA,
                                  EIA_data){

  EIA_data |>
    select(EIA_ID) |> unique() |>
    # remove post-2001 EIAs that cover multiple dams
    filter(!(EIA_ID %in% c(10186,
                           54639,
                           54134,
                           50447,
                           57690,
                           54134))) |>
    pull(EIA_ID) ->
    study_EIA_IDs


  st_read(paste0(EHA, "/Plant_external2023.shp")) |>
    as_tibble() |>
    mutate(COMPLXID = str_replace(EHA_PtID, "\\_..*", "")) |>
    select(EIA_ID = EIA_PtID, COMPLXID) |>
    unique() |>
    filter(!is.na(EIA_ID), EIA_ID %in% study_EIA_IDs) ->
    EIA_to_COMPLX_mapping

  EIA_data |>
    left_join(EIA_to_COMPLX_mapping,
              by = join_by(EIA_ID)) |>
    summarise(gen_MWh = sum(gen_MWh),
              nameplate_MW = sum(nameplate),
              .by = c(COMPLXID, year)) |>
    left_join(unique(select(EIA_data, year, n_hrs)),
              by = join_by(year)) |>
    mutate(cap_MWh = nameplate_MW * n_hrs,
           CF = gen_MWh / cap_MWh) |>
    arrange(COMPLXID) ->
    gen_cap_CF_1970_2021

  return(gen_cap_CF_1970_2021)

}

R/models.R

0 → 100644
+146 −0
Original line number Diff line number Diff line
## Functions for model set up, calibration, and validation

link_plant_to_COMID_and_H <- function(
    CF_trends,   # CF trends (contains all target dams)
    EHA,         # Dam locations and river reaches
    HESC,        # Dam hydraulic head and heights
    HILARRI      # Reservoir
    ){

  # get list of candidate dams/plants (EIA IDs)
  CF_trends |>
    filter(analysis == "flow period") |>
    filter(!is.na(sens_slope)) |>
    pull(COMPLXID) -> candidate_dams

  # get dam detail
  st_read(paste0(EHA, "/Plant_external2023.shp"),
          quiet = TRUE) |>
    arrange(-CH_MW) |>
    as_tibble() |>
    mutate(COMPLXID = str_replace(EHA_PtID, "\\_..*", "")) |>
    filter(COMPLXID %in% candidate_dams,
           Type != "PS",
           !State %in% c("HI", "AK")) |>
    select(plant_name = PtName, COMPLXID, mode = Mode) ->
    EHA_

  EHA_ |> filter(duplicated(COMPLXID)) |> pull(COMPLXID) ->
    duplicates

  EHA_ |> filter(COMPLXID %in% duplicates) |>
    arrange(COMPLXID) |>
    split(~COMPLXID) |>
    map_dfr(function(dam){
      tibble(
        plant_name = paste(dam$plant_name, collapse = " / "),
        COMPLXID = unique(dam$COMPLXID),
        mode = first(dam$mode)
      )
    }) -> duplicate_correction

  EHA_ |> filter(!COMPLXID %in% duplicates) |>
    bind_rows(duplicate_correction) -> EHA_ready

  # get dam hydraulic head and height information
  read_csv(paste0(HESC, "/HESC_V2_Core_Characteristics.csv"),
           show_col_types = FALSE) |>
    select(NHYD_HT, NID_HT, G_HT, COMPLXID) |>
    filter(COMPLXID %in% candidate_dams) |>
    mutate(NHYD_HT = if_else(NHYD_HT > 0, NHYD_HT, NA_real_),
           NID_HT = if_else(NID_HT > 0, NID_HT, NA_real_),
           G_HT = if_else(G_HT > 0, G_HT, NA_real_)) |>
    summarise(NHYD_HT = sum(NHYD_HT, na.rm = T),
              NID_HT = sum(NID_HT, na.rm = T),
              G_HT = sum(G_HT, na.rm = T),
              .by = COMPLXID) |>
    unique() ->
    HESC_ready

  # get mapping for plant to stream reach
  read_csv(paste0(HILARRI,
                  "/HILARRI_v2_SubsetHydropowerDams_Plants.csv"),
           show_col_types = FALSE) |>
    select(COMPLXID = eha_cmplx, COMID = nhdv2comid, Hyd_hd_est_m) |>
    unique() |>
    filter(COMPLXID %in% candidate_dams) |>
    # deal with weird duplicated COMID cases
    filter(!(COMPLXID == "hc0266" & is.na(COMID)),
           !(COMPLXID == "hc0015" & COMID == 12990),
           !(COMPLXID == "hc1239" & COMID == 14983642)) ->
    plant_to_COMID

    EHA_ready |>
      left_join(plant_to_COMID, by = join_by(COMPLXID)) |>
      mutate(COMID = as.integer(COMID)) |>
      left_join(HESC_ready, by = join_by(COMPLXID)) |>
      mutate(H_m = case_when(
        !is.na(NHYD_HT) ~ NHYD_HT,
        is.na(NHYD_HT) & !is.na(Hyd_hd_est_m) ~ Hyd_hd_est_m,
        is.na(NHYD_HT) & is.na(Hyd_hd_est_m) & !is.na(NID_HT) ~ NID_HT,
        is.na(NHYD_HT) & is.na(Hyd_hd_est_m) & is.na(NID_HT) & grepl("Run-of-river", mode) ~ 20,
        TRUE ~ G_HT
      )) |>
      select(plant_name, mode, COMPLXID, COMID, H_m) ->
      plant_data_for_flow_model_uncorrected

    return(plant_data_for_flow_model_uncorrected)

}

correct_COMID_matching <- function(uncorrected_mapping,
                                   COMID_corrections){

  read_csv(COMID_corrections, show_col_types = FALSE,
           comment = "#") ->
    corrections

  uncorrected_mapping |>
    left_join(corrections, by = join_by(COMPLXID)) |>
    mutate(
      COMID = if_else(!is.na(COMID_replacement), COMID_replacement, COMID),
      COMID_from = if_else(is.na(COMID_replacement),
                           "HILARRI", "Correction")) |>
    select(-COMID_replacement) |>
    mutate(COMID = as.integer(COMID)) ->
    plant_data_for_flow_model_corrected


  return(plant_data_for_flow_model_corrected)


}

prep_model_data <- function(plant_to_flow_H_mapping,
                            CFs, flows){

  read_parquet(paste0(flows,
                      "DayFlow_dams_annual_BCM_AORC.parquet")) ->
    annual_flow_1980_2019

  CFs |>
    filter(year %in% 1980:2019) |>
    left_join(plant_to_flow_H_mapping |>
                select(COMPLXID, COMID),
                by = join_by(COMPLXID)) |>
    filter(!is.na(COMID)) |>
    left_join(annual_flow_1980_2019,
              by = join_by(year, COMID)) |>
    filter(!is.na(Corrected) | !is.na(Naturalized)) |>
    mutate(Corrected = if_else(is.infinite(Corrected), NA_real_, Corrected)) |>
    select(COMPLXID, year,
           gen_MWh,
           nameplate_MW,
           n_hrs,
           cap_MWh,
           CF,
           COMID,
           Qc_BCM = Corrected,
           Qn_BCM = Naturalized) ->
    data_model_ready

  return(data_model_ready)

}

+14 −10
Original line number Diff line number Diff line
@@ -22,8 +22,8 @@ get_CF_trends <- function(annual_CFs){

      annual_CFs |>
        filter(year %in% start_yr:end_yr) |>
        split(~EIA_ID) |>
        map_dfr(function(plant_data){
        split(~COMPLXID) |>
        future_map_dfr(function(plant_data){

          # deal with any inf values (caused by cap = 0 in EIA data)
          plant_data |>
@@ -41,7 +41,7 @@ get_CF_trends <- function(annual_CFs){
          if((n_yrs - n_bad_yrs) / n_yrs < 0.8){
            return(
              tibble(
                EIA_ID = plant_data_noInf[["EIA_ID"]][1],
                COMPLXID = plant_data_noInf[["COMPLXID"]][1],
                sens_slope = NA_real_,
                intercept = NA_real_,
                p_value = NA_real_,
@@ -67,7 +67,7 @@ get_CF_trends <- function(annual_CFs){
            blocks

          # blocked bootstrap
          n_size <- 10000
          n_size <- 10

          # get serial correlation (to determine if block is needed)
          acf1 <- acf(plant_data_noInf$CF, plot = FALSE,
@@ -75,15 +75,19 @@ get_CF_trends <- function(annual_CFs){

          if(acf1 > 0.3){
            1:n_size |>
              future_map_dbl(function(x){
              map_dbl(function(x){

                1:(nrow(blocks)) |>
                  sample(replace = T, size = 14) ->
                  sample(replace = T,
                         size = ceiling(n_yrs / 3)) ->
                  block_indices

                blocks[block_indices,] |>
                  pivot_longer(!index) |>
                  mutate(year = 1980:2021) ->
                  select(value) -> synth_CFs

                synth_CFs[1:n_yrs,] |>
                  mutate(year = start_yr:end_yr) ->
                  blocked_sample

                senSlope(value ~ year, data = blocked_sample,
@@ -93,7 +97,7 @@ get_CF_trends <- function(annual_CFs){
                return(block_sen[["coefficients"]][[2]])


              }, .options = furrr_options(seed = 123)) -> sample_sens
              }) -> sample_sens
          }else{
            1:n_size |>
              future_map_dbl(function(x){
@@ -109,7 +113,7 @@ get_CF_trends <- function(annual_CFs){
                return(boot_sen[["coefficients"]][[2]])


              }, .options = furrr_options(seed = 123)) -> sample_sens
              }) -> sample_sens
          }

          if(computed_slope >= 0){
@@ -120,7 +124,7 @@ get_CF_trends <- function(annual_CFs){

          return(
            tibble(
              EIA_ID = plant_data_noInf[["EIA_ID"]][1],
              COMPLXID = plant_data_noInf[["COMPLXID"]][1],
              sens_slope = computed_slope,
              intercept = computed_intercept,
              p_value = p,
+77 −14
Original line number Diff line number Diff line
@@ -3,23 +3,27 @@ library(targets)

# Set target options:
tar_option_set(
  packages = c("tibble",
               "dplyr",
               "tidyr",
               "lubridate",
               "readxl",
               "readr",
               "arrow",
               "purrr",
               "furrr",
               "future",
               "sf",
               "foreign",
               "zoo")
  packages = c("tibble",    # tidy data tables
               "dplyr",     # data wrangling tools
               "tidyr",     # data wrangling tools
               "lubridate", # dates/times
               "readxl",    # read xl spreadsheets
               "readr",     # read and write csvs
               "arrow",     # read and write parquets
               "purrr",     # efficient function wrapping
               "furrr",     # ^^ apply in parallel
               "future",    # parallel processing
               "sf",        # geospatial tools
               "foreign",   # read old dbf data
               "zoo",       # interpolation functions
               "smwrStats", # Sen's slope computation
               "stringr"    # string manipulation functions
               )
)

#options(clustermq.scheduler = "multisession")
#plan(multisession)
library(future)
plan(multisession)

# Run the R scripts in the R/ folder:
tar_source()
@@ -36,10 +40,69 @@ list(
    "./data/EIA/Plant/",
    format = "file"
  ),
  tar_target(
    EHA,
    "./data/ORNL_EHAHydroPlant_PublicFY2023/",
    format = "file"
  ),
  tar_target(
    EIA_annual_gen_cap_CF_1970_2021,
    get_EIA_annual_gen(gnr_dir = EIA_529_906_920_923,
                       plt_dir = EIA_860),
    format = "parquet"
  ),
  tar_target(
    dam_annual_gen_cap_CF_1970_2021,
    convert_EIA_to_COMPLX(EHA = EHA,
                          EIA_data = EIA_annual_gen_cap_CF_1970_2021),
    format = "parquet"
  ),
  tar_target(
    CF_trends,
    get_CF_trends(annual_CFs = dam_annual_gen_cap_CF_1970_2021),
    format = "parquet"
  ),
  tar_target(
    HESC,
    "./data/HESC_v2/",
    format = "file"
  ),
  tar_target(
    HILARRI,
    "./data/HILARRI_v2/",
    format = "file"
  ),
  tar_target(
    plant_to_flow_H_mapping_uncorrected,
    link_plant_to_COMID_and_H(CF_trends = CF_trends,
                              EHA = EHA,
                              HESC = HESC,
                              HILARRI = HILARRI),
    format = "parquet"
  ),
  tar_target(
    COMID_corrections,
    "./data/COMID_corrections.csv",
    format = "file"
  ),
  tar_target(
    plant_to_flow_H_mapping,
    correct_COMID_matching(
      uncorrected_mapping = plant_to_flow_H_mapping_uncorrected,
      COMID_corrections = COMID_corrections
      ),
    format = "parquet"
  ),
  tar_target(
    DayFlow,
    "./data/DayFlow/",
    format = "file"
  ),
  tar_target(
    model_data_ready,
    prep_model_data(plant_to_flow_H_mapping = plant_to_flow_H_mapping,
                    CFs = dam_annual_gen_cap_CF_1970_2021,
                    flows = DayFlow),
    format = "parquet"
  )
)
+17 −1
Original line number Diff line number Diff line
name|type|data|command|depend|seed|path|time|size|bytes|format|repository|iteration|parent|children|seconds|warnings|error
.Random.seed|object|1ce8be4dd88ddeeb|||||||||||||||
.Random.seed|object|0bab4df3215c519f|||||||||||||||
CF_trends|stem|4f6920aa2279e064|48a9aa0dfb6d812e|a915d0a81d2d7ff2|1102116871||t19557.5746577899s|e708c89a04ef8849|61240|parquet|local|vector|||48.53|UNRELIABLE VALUE Future none unexpectedly generated random numbers without specifying argument seed. There is a risk that those random numbers are not statistically sound and the overall results might be invalid. To fix this, specify seedTRUE. This ensures that proper, parallelsafe random numbers are produced via the LEcuyerCMRG method. To disable this check, use seedNULL, or set option future.rng.onMisuse to ignore.. UNRELIABLE VALUE Future none unexpectedly generated random numbers without specifying argument seed. There is a risk that those random numbers are not statistically sound and the overall results might be invalid. To fix this, specify seedTRUE. This ensures that proper, parallelsafe random numbers are produced via the LEcuyerCMRG method. To disable this check, use seedNULL, or set option future.rng.onMisuse to ignore.. UNRELIABLE VALUE Future none unexpectedly generated random numbers without specifying argument seed. There is a risk that those random numbers are not statistically sound and the overall results might be invalid. To fix this, specify seedTRUE. This ensures that proper, parallelsafe random numbers are produced via the LEcuyerCMRG method. To disable this check, use seedNULL, or set option future.rng.onMisuse to ignore.. UNRELIABLE VALUE Future none unexpectedly generated random numbers without specifying argument seed. There is a risk that those random numbers are not statistically sound and the overall results might be invalid. To fix this, specify seedTRUE. This ensures that proper, parallelsafe random numbers are produced via the LEcuyerCMRG method. To disable this check, use seedNULL, or set option future.rng.onMisuse to ignore.. UNRELIABLE VALUE Future none unexpectedly generated random numbers without specifying argument seed. There is a risk that those random numbers are not statistically sound and the overall results might be invalid. To fix this, specify seedTRUE. This ensures tha|
COMID_corrections|stem|fe576eb2f1f316a7|a5c6dd01c8e10d3f|ef46db3751d8e999|1378202948|./data/COMID_corrections.csv|t19557.68855602s|dcdbb2976eb6c19f|2790|file|local|vector|||1.03||
convert_EIA_to_COMPLX|function|9df2b8bcf6782f60|||||||||||||||
correct_COMID_matching|function|f9d752099e09e4c2|||||||||||||||
dam_annual_gen_cap_CF_1970_2021|stem|27e68e5a9e4b3ac1|1e1968883cec7872|3c79369af4a2c155|-937239282||t19557.5727854494s|bb12815b67d3fb5b|726227|parquet|local|vector|||0.38||
DayFlow|stem|134be264bbd024f6|b70c126b26c07662|ef46db3751d8e999|958592900|./data/DayFlow/|t19557.7065932559s|f4066a65ff755f48|0|file|local|vector|||0||
EHA|stem|7c2955a818917c6b|a659f2306efa7339|ef46db3751d8e999|366006733|./data/ORNL_EHAHydroPlant_PublicFY2023/|t19556.7977354229s|a75e104011428481|4096|file|local|vector|||1||
EIA_529_906_920_923|stem|429fb55d18257b0e|7e8ad8e942ac6d8d|ef46db3751d8e999|-1754950433|./data/EIA/Generation/|t19552.6038571127s|db7d75b1b9e45804|24576|file|local|vector|||0.82||
EIA_860|stem|db95ac5d1da1bd03|fd2da67a7010fb85|ef46db3751d8e999|-1646985544|./data/EIA/Plant/|t19552.6038846689s|c8fc69c6c41e109f|12288|file|local|vector|||2.19||
EIA_annual_gen|stem|8df8ad9ee42d49d6|922aefb19c709325|b0d9bfb57fa52bff|-1955140278||t19555.5959348734s|c08769dc4ce04be3|812175|parquet|local|vector|||69.36||
EIA_annual_gen_cap_CF_1970_2021|stem|8df8ad9ee42d49d6|922aefb19c709325|b0d9bfb57fa52bff|-623511298||t19555.6015050991s|c08769dc4ce04be3|812175|parquet|local|vector|||68.46||
get_CF_trends|function|ce2b87235964cf7f|||||||||||||||
get_EIA_annual_gen|function|1f87eeb76f308b73|||||||||||||||
HESC|stem|f4bd48ea34a7e458|84861977bfb6d75f|ef46db3751d8e999|555984922|./data/HESC_v2/|t19556.7977358831s|a75e104011428481|4096|file|local|vector|||0||
HILARRI|stem|6ba817df1956a2d3|161f3e4c4845d356|ef46db3751d8e999|-336879803|./data/HILARRI_v2/|t19556.7977359964s|a75e104011428481|4096|file|local|vector|||1.01||
link_plant_to_COMID_and_H|function|c5a2ff003b637fe3|||||||||||||||
model_data_ready|stem|02cece710d62620b|867edde3114778f4|f8b8e3242ea39640|291744199||t19557.7533101363s|8f25a2ba05a2a42a|909537|parquet|local|vector|||0.06||
model_ready_data|stem||a77f878581754df7|ac348a9ac4e8a917|-797304698||t19556.8413976197s||0|parquet|local|vector|||0.16||1mCaused by error22m33m39m object EIA_ID not found
plant_to_flow_H_mapping|stem|020b47ef5aa1992c|7e1e72eda130934a|320fd707f779607c|-1974510388||t19557.7451326597s|e88f9f6264a9a96c|20818|parquet|local|vector|||0.01||
plant_to_flow_H_mapping_uncorrected|stem|c2c796de5d59c3b4|95618b304ba141cb|06cb53fc2bf0422a|-1018155617||t19557.7451323157s|e73f2f82f91eff84|20334|parquet|local|vector|||0.34||
prep_model_data|function|d2551bda4182f789|||||||||||||||