Commit 310e5312 authored by Turner's avatar Turner
Browse files

EIA inputs

parent 2ed4d19e
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -4,3 +4,4 @@
.Ruserdata
hydro-cf-trends.Rproj
data
output
+382 −1
Original line number Diff line number Diff line
## Functions for reading EIA spreadsheets

get_EIA_annual_gen <- function(xl_dir){
get_EIA_annual_gen <- function(gnr_dir, plt_dir){

  # Identify full list of conventional HY plants to target for analysis...
  # ... using 2021 generator data (most recent complete EIA set). We look...
  # ... for plants generating in 2021, then filter out the smallest < 5MW...
  # ... to make the study manageable while still capturing almost all capacity.

  suppressWarnings(
    read_xlsx(paste0(gnr_dir,
                     "f923_2021/",
                     "EIA923_Schedules_2_3_4_5_M_12_2021_Final_Revision.xlsx"),
              skip = 5, progress = FALSE) |>
      filter(`AER\r\nFuel Type Code` == "HYC") |>
      select(EIA_ID = `Plant Id`) |> unique() |>
      mutate(EIA_ID = as.integer(EIA_ID)) ->
      HYC_with_gen_2021
  )

  suppressWarnings(
    read_xlsx(paste0(plt_dir,
                     "/eia8602021/",
                     "3_1_Generator_Y2021.xlsx"),
              skip = 1) |>
      select(EIA_ID = `Plant Code`, nameplate = `Nameplate Capacity (MW)`,
             `Prime Mover`) |>
      filter(`Prime Mover` == "HY") |>
      mutate(EIA_ID = as.integer(EIA_ID)) |>
      summarise(nameplate = sum(nameplate), .by = EIA_ID) |>
      filter(EIA_ID %in% HYC_with_gen_2021[["EIA_ID"]]) ->
      HYC_cap_2021
  )

  # Return simple table showing impact of removing plants < 5MW
  HYC_cap_2021 |>
    mutate(sub_5MW = if_else(nameplate < 5, TRUE, FALSE)) |>
    summarise(n_plants = n(),
              total_cap = sum(nameplate), .by = sub_5MW) |>
    write_csv("./output/misc/impact_of_excluding_sub5MW_plants.csv")


  # get target plant list
  HYC_cap_2021 |>
    filter(nameplate >= 5) |>
    pull(EIA_ID) ->
    target_plants

  2001:2021L |>
    map_dfr(function(yr){

      if(yr %in% 2001:2003){

        read.dbf(paste0(
          plt_dir, "/eia860",
          yr, "/GENY", substr(yr,3,4), ".dbf"
        )) |>
          as_tibble() |>
          filter(PRIMEMOVER == "HY") |>
          select(EIA_ID = PLNTCODE, nameplate = NAMEPLATE) |>
          summarise(nameplate = sum(nameplate), .by = EIA_ID) ->
          nameplate_all
      }

      if(yr %in% 2004:2008){
        suppressWarnings(
          read_xls(paste0(
            plt_dir, "/eia860",
          yr, "/GenY", substr(yr, 3, 4), ".xls"
        )) |>
          as_tibble() |>
          filter(PRIMEMOVER == "HY") |>
          select(EIA_ID = PLNTCODE, nameplate = NAMEPLATE) |>
          summarise(nameplate = sum(nameplate), .by = EIA_ID) ->
          nameplate_all
        )

      }

      if(yr %in% 2009){
        suppressWarnings(
          read_xls(paste0(
            plt_dir, "/eia860",
            yr, "/GeneratorY", substr(yr, 3, 4), ".xls"
          )) |>
            as_tibble() |>
            filter(PRIME_MOVER == "HY") |>
            select(EIA_ID = PLANT_CODE , nameplate = NAMEPLATE) |>
            summarise(nameplate = sum(nameplate), .by = EIA_ID) ->
            nameplate_all
        )

      }

      if(yr %in% 2010){
        suppressWarnings(
          read_xls(paste0(
            plt_dir, "/eia860",
            yr, "/GeneratorsY", yr, ".xls"
          )) |>
            as_tibble() |>
            filter(PRIME_MOVER == "HY") |>
            select(EIA_ID = PLANT_CODE , nameplate = NAMEPLATE) |>
            summarise(nameplate = sum(nameplate), .by = EIA_ID) ->
            nameplate_all
        )

      }

      if(yr %in% 2011){
        suppressWarnings(
          read_xlsx(paste0(
            plt_dir, "/eia860",
            yr, "/GeneratorY", yr, ".xlsx"
          ), skip = 1) |>
            as_tibble() |>
            filter(PRIME_MOVER == "HY") |>
            select(EIA_ID = PLANT_CODE, nameplate = NAMEPLATE) |>
            summarise(nameplate = sum(nameplate), .by = EIA_ID) ->
            nameplate_all
        )

      }

      if(yr %in% 2012){
        suppressWarnings(
          read_xlsx(paste0(
            plt_dir, "/eia860",
            yr, "/GeneratorY", yr, ".xlsx"
          ), skip = 1) |>
            as_tibble() |>
            filter(`Prime Mover` == "HY") |>
            select(EIA_ID = `Plant Code`, nameplate = `Nameplate Capacity (MW)`) |>
            summarise(nameplate = sum(nameplate), .by = EIA_ID) ->
            nameplate_all
        )
      }

      if(yr %in% 2013:2021){
        suppressWarnings(
          read_xlsx(paste0(
            plt_dir, "/eia860",
            yr, "/3_1_Generator_Y", yr, ".xlsx"
          ), skip = 1) |>
            as_tibble() |>
            filter(`Prime Mover` == "HY") |>
            select(EIA_ID = `Plant Code`, nameplate = `Nameplate Capacity (MW)`) |>
            summarise(nameplate = sum(nameplate), .by = EIA_ID) ->
            nameplate_all
        )

      }

      tibble(EIA_ID = target_plants) |>
        left_join(nameplate_all, by = join_by(EIA_ID)) |>
        mutate(year = !!yr)
    }
    ) -> plant_nameplate_MW_2001_2021

  # reported plant capacity pre-2001 is available only via generation data files...

  1970:2000L |>
    map_dfr(function(yr){

      suppressWarnings(
        read_xls(paste0(gnr_dir, "f759",
                        yr, "u.xls")) |>
          mutate(PCODE = as.integer(PCODE)) |>
          filter(PCODE %in% target_plants) |>
          summarise(nameplate = 1e-3 * sum(CAPACITY), .by = PCODE) |>
          # ^^ unit correction to MW
          rename(EIA_ID = PCODE) ->
          plant_nameplate
      )
      return(
        tibble(EIA_ID = target_plants) |>
          left_join(plant_nameplate, by = join_by(EIA_ID)) |>
          mutate(year = !!yr)
      )
    }) ->
    plant_nameplate_MW_1970_2000


  bind_rows(
    plant_nameplate_MW_1970_2000,
    plant_nameplate_MW_2001_2021
  ) -> plant_nameplant_MW


  # interpolate gap in capacity data with zero values
  plant_nameplant_MW |>
    split(~EIA_ID) |>
    map_dfr(function(x){

      x |> mutate(
        nameplate = if_else(year %in% 1987:2000 & nameplate == 0,
                            NA_real_, nameplate),
        nameplate = na.approx(nameplate,
                              na.rm = F))

    }) -> plant_nameplate_MW_interpolated


  # Read generation

  1970:2000L |>
    map_dfr(function(yr){

      if(yr %in% 1970:1995){

        suppressWarnings(
          read_xls(paste0(gnr_dir, "f759",
                          yr, "u.xls")) |>
            mutate(PCODE = as.integer(PCODE)) |>
            select(EIA_ID = PCODE, starts_with("GEN")) |>
            pivot_longer(!EIA_ID) |>
            summarise(gen_MWh = sum(value), .by = EIA_ID) ->
            plant_gen
        )

      }

      if(yr %in% 1996:2000){

        suppressWarnings(
          read_xls(paste0(gnr_dir, "f759", yr, "u.xls")) |>
            mutate(PCODE = as.integer(PCODE)) |>
            select(EIA_ID = PCODE, NETGENERAT) |>
            summarise(gen_MWh = sum(NETGENERAT), .by = EIA_ID) ->
            plant_gen
        )

      }

      tibble(EIA_ID = target_plants) |>
        left_join(plant_gen, by = join_by(EIA_ID)) |>
        mutate(year = !!yr) |>
        mutate(gen_MWh = if_else(year %in% 1970:1989, gen_MWh * 1e-3, gen_MWh))

    }) ->
    gen_1970_2000


  2001:2021 %>%
    map_dfr(function(yr){

      if(yr %in% 2001:2002){

        file_name <- paste0(gnr_dir, "f906920_", yr, "/f906920_", yr, ".xls")

        # read EIA file
        suppressWarnings(
          read_xls(file_name, skip = 7, .name_repair = "unique_quiet") |>
            rename(eia_id = `Plant ID`) |>
            mutate(eia_id = as.integer(eia_id)) -> EIA_data
        )


        # filter EIA file for hydro
        EIA_data %>%
          filter(eia_id %in% target_plants) %>%
          select(EIA_ID = eia_id, MWh = `NET GENERATION (megawatthours)`) |>
          summarise(MWh = sum(MWh), .by = EIA_ID) ->
          EIA_hydro

        # make netgen numeric and add frequency of reporting "true_monthly?"
        return(
          tibble(EIA_ID = target_plants)  |>
            left_join(EIA_hydro, by = "EIA_ID") |>
            mutate_if(is.character, as.numeric) |>
            mutate(year = !!yr)
        )
      }


      if(yr %in% 2003:2007){

        file_name <- paste0(gnr_dir, "f906920_", yr, "/f906920_", yr, ".xls")

        # read EIA file
        suppressWarnings(
          read_xls(file_name, skip = 7, .name_repair = "unique_quiet") |>
            rename(EIA_ID = `Plant ID`) ->
            EIA_data
        )

        # filter EIA file for hydro
        EIA_data |>
          select(EIA_ID, MWh = `NET GENERATION (megawatthours)`)  |>
          filter(EIA_ID %in% target_plants) |>
          summarise(MWh = sum(MWh, na.rm = T), .by = EIA_ID) ->
          EIA_hydro

        return(
          tibble(EIA_ID = target_plants) |>
            left_join(EIA_hydro, by = "EIA_ID") |>
            mutate(year = !!yr)
        )
      }

      if(yr %in% 2008:2021){

        if(yr %in% 2008:2010){
          file_name <- paste0(gnr_dir, "f923_", yr,
                              "/EIA923 SCHEDULES ", yr, ".xls")
          suppressWarnings(
            read_xls(file_name, skip = 7, .name_repair = "unique_quiet") |>
              rename(EIA_ID = `Plant ID`) ->
              EIA_data
          )

        }

        if(yr %in% 2011:2020){
          file_name <- paste0(gnr_dir, "f923_", yr,
                              "/EIA923 SCHEDULES ", yr, ".xlsx")
          suppressWarnings(
            read_xlsx(file_name, skip = 5, .name_repair = "unique_quiet") %>%
              rename(EIA_ID = `Plant Id`) ->
              EIA_data
          )

        }


        if(yr == 2021){
          file_name <- paste0(gnr_dir, "f923_", yr,
                              "/EIA923_Schedules_2_3_4_5_M_12_2021_Final_Revision.xlsx")
          suppressWarnings(
            read_xlsx(file_name, skip = 5, .name_repair = "unique_quiet")  |>
              rename(EIA_ID = `Plant Id`) ->
              EIA_data
          )


        }

        # filter EIA file for hydro
        suppressWarnings(
          EIA_data  |>
            select(EIA_ID, any_of(c("NET GENERATION (megawatthours)",
                                    "Net Generation (Megawatthours)",
                                    "Net Generation\r\n(Megawatthours)"))) ->
            EIA_data_
        )


        names(EIA_data_) <- c("EIA_ID", "MWh")

        EIA_data_ |>
          filter(EIA_ID %in% target_plants) |>
          summarise(MWh = sum(MWh, na.rm = T), .by = EIA_ID) -> EIA_hydro

        return(
          tibble(EIA_ID = target_plants)  |>
            left_join(EIA_hydro, by = "EIA_ID") |>
            mutate(year = !!yr)
        )
      }
    }) -> EIA_hydro_netgen_2001_2021


  # generate table of hrs per year for computation of maximum output
  tibble(
    date = seq.Date(from = ymd("1970-01-01"), to = ymd("2021-12-31"), by = 1)
  ) |>
    mutate(year = year(date)) |>
    summarise(n_hrs = n() * 24, .by = year) ->
    hrs_per_year

  # combine nameplate and generation to get CF
  bind_rows(
    gen_1970_2000,
    EIA_hydro_netgen_2001_2021 |>
      rename(gen_MWh = MWh)
  ) |>
    left_join(plant_nameplate_MW_interpolated,
              by = join_by(EIA_ID, year)) |>
    left_join(hrs_per_year, join_by(year)) |>
    mutate(cap_MWh = nameplate * n_hrs,
           CF = gen_MWh / cap_MWh) ->
    gen_cap_CF_1970_2021

  return(gen_cap_CF_1970_2021)


}
+12 −3
Original line number Diff line number Diff line
@@ -13,7 +13,9 @@ tar_option_set(
               "purrr",
               "furrr",
               "future",
               "sf")
               "sf",
               "foreign",
               "zoo")
)

#options(clustermq.scheduler = "multisession")
@@ -30,7 +32,14 @@ list(
    format = "file"
  ),
  tar_target(
    EIA_annual_gen,
    get_EIA_annual_gen(xl_dir = EIA_529_906_920_923)
    EIA_860,
    "./data/EIA/Plant/",
    format = "file"
  ),
  tar_target(
    EIA_annual_gen_cap_CF_1970_2021,
    get_EIA_annual_gen(gnr_dir = EIA_529_906_920_923,
                       plt_dir = EIA_860),
    format = "parquet"
  )
)
+5 −2
Original line number Diff line number Diff line
name|type|data|command|depend|seed|path|time|size|bytes|format|repository|iteration|parent|children|seconds|warnings|error
.Random.seed|object|1ce8be4dd88ddeeb|||||||||||||||
EIA_529_906_920_923|stem|429fb55d18257b0e|7e8ad8e942ac6d8d|ef46db3751d8e999|-1754950433|./data/EIA/Generation/|t19552.6038571127s|db7d75b1b9e45804|24576|file|local|vector|||0.82||
EIA_annual_gen|stem|d131862dc333587a|058b1fa983426efc|7647f84d89c236d8|-1955140278||t19552.6216109002s|44e327d1ee1978d7|43|rds|local|vector|||0||
get_EIA_annual_gen|function|6f020b783a1d9451|||||||||||||||
EIA_860|stem|db95ac5d1da1bd03|fd2da67a7010fb85|ef46db3751d8e999|-1646985544|./data/EIA/Plant/|t19552.6038846689s|c8fc69c6c41e109f|12288|file|local|vector|||2.19||
EIA_annual_gen|stem|8df8ad9ee42d49d6|922aefb19c709325|b0d9bfb57fa52bff|-1955140278||t19555.5959348734s|c08769dc4ce04be3|812175|parquet|local|vector|||69.36||
EIA_annual_gen_cap_CF_1970_2021|stem|8df8ad9ee42d49d6|922aefb19c709325|b0d9bfb57fa52bff|-623511298||t19555.6015050991s|c08769dc4ce04be3|812175|parquet|local|vector|||68.46||
get_EIA_annual_gen|function|1f87eeb76f308b73|||||||||||||||