Loading R/models.R +133 −11 Original line number Diff line number Diff line Loading @@ -74,6 +74,10 @@ link_plant_to_COMID_and_H <- function( left_join(plant_to_COMID, by = join_by(COMPLXID)) |> mutate(COMID = as.integer(COMID)) |> left_join(HESC_ready, by = join_by(COMPLXID)) |> mutate(Hyd_hd_est_m = if_else(Hyd_hd_est_m == 0, NA_real_, Hyd_hd_est_m), NHYD_HT = if_else(NHYD_HT == 0, NA_real_, NHYD_HT), NID_HT = if_else(NID_HT == 0, NA_real_, NID_HT), G_HT = if_else(G_HT == 0, NA_real_, G_HT)) |> mutate(H_m = case_when( !is.na(NHYD_HT) ~ NHYD_HT, is.na(NHYD_HT) & !is.na(Hyd_hd_est_m) ~ Hyd_hd_est_m, Loading Loading @@ -121,22 +125,21 @@ prep_model_data <- function(plant_to_flow_H_mapping, CFs |> filter(year %in% 1980:2019) |> left_join(plant_to_flow_H_mapping |> select(COMPLXID, COMID), select(COMPLXID, COMID, H_m), by = join_by(COMPLXID)) |> filter(!is.na(COMID)) |> left_join(annual_flow_1980_2019, by = join_by(year, COMID)) |> filter(!is.na(Corrected) | !is.na(Naturalized)) |> filter(!is.na(Corrected)) |> mutate(Corrected = if_else(is.infinite(Corrected), NA_real_, Corrected)) |> select(COMPLXID, year, gen_MWh, nameplate_MW, n_hrs, cap_MWh, CF, COMID, Qc_BCM = Corrected, Qn_BCM = Naturalized) -> rename(Qc_BCM = Corrected, Qn_BCM = Naturalized) |> # compute energy potential mutate(pot_MWh_c = Qc_BCM * 1e9 * 9810 * H_m * 2.77778e-10, pot_MWh_n = Qn_BCM * 1e9 * 9810 * H_m * 2.77778e-10) |> # compute potential / capacity mutate(CFPc = pot_MWh_c / cap_MWh, CFPn = pot_MWh_n / cap_MWh) -> data_model_ready return(data_model_ready) Loading @@ -144,3 +147,122 @@ prep_model_data <- function(plant_to_flow_H_mapping, } simulate_annual_CF <- function(data){ data |> select(COMPLXID, year, CF, CFPc, CFPn) |> pivot_longer(!c(COMPLXID, year, CF), names_to = "flow_type", values_to = "CFP", names_prefix = "CFP") |> split(~COMPLXID) %>% #.[[1]] -> plant_CF_flow future_map_dfr(function(plant_CF_flow){ plant_CF_flow |> split(~flow_type) %>% #.[[1]] -> plant_CF_flow_x map_dfr(function(plant_CF_flow_x){ if(anyNA(plant_CF_flow_x$CFP)){ return( plant_CF_flow_x |> mutate(TmodP1 = NA_real_, TmodP2 = NA_real_, CF_pred = NA_real_) ) } get_TmodX2_rmse <- function(x){ p1 <- x[1] p2 <- x[2] plant_CF_flow_x |> mutate(pred = 1 + CFP - (p2 + CFP ^ p1)^(1/p1)) -> pred_added pull(pred_added, pred) -> CF_pred pull(pred_added, CF) -> target return( sqrt(mean((CF_pred - target)^2, na.rm = T)) ) } TmodX2 <- optim(par = c(1,1), fn = get_TmodX2_rmse, lower = c(1,1), upper = c(10, 20), method = "L-BFGS-B") return( plant_CF_flow_x |> mutate( TmodP1 = TmodX2$par[1], TmodP2 = TmodX2$par[2], CF_pred = 1 + CFP - (TmodP2 + CFP ^ TmodP1)^(1/TmodP1) ) ) }) }) -> all_model_predictions return(all_model_predictions) } evaluate_models <- function(model_results){ model_results |> #filter(COMPLXID == "hc1014") -> model_x split(~COMPLXID) |> map_dfr( function(model_x){ Xc <- filter(model_x, flow_type == "c", !is.na(CF), !is.na(CF_pred)) Xn <- filter(model_x, flow_type == "n", !is.na(CF), !is.na(CF_pred)) model_x |> select(COMPLXID, TmodP1, TmodP2, flow_type) |> unique() -> model_x_ if(all(Xc$CF_pred == 0)){ model_x_ |> filter(flow_type == "c") |> mutate(pearson = NA_real_, spearman = NA_real_, bias = NA_real_, flow_note = "Zero flow") -> Xc_summary }else{ model_x_ |> filter(flow_type == "c") |> mutate(pearson = cor(Xc$CF, Xc$CF_pred), spearman = cor(Xn$CF, Xn$CF_pred, method = "spearman"), bias = mean(Xc$CF_pred) - mean(Xc$CF), flow_note = "") -> Xc_summary } if(all(Xn$CF_pred == 0)){ model_x_ |> filter(flow_type == "n") |> mutate(pearson = NA_real_, spearman = NA_real_, bias = NA_real_, flow_note = "Zero flow") -> Xn_summary }else{ model_x_ |> filter(flow_type == "n") |> mutate(pearson = cor(Xn$CF, Xn$CF_pred), spearman = cor(Xn$CF, Xn$CF_pred, method = "spearman"), bias = mean(Xn$CF_pred) - mean(Xn$CF), flow_note = "") -> Xn_summary } bind_rows( Xc_summary, Xn_summary ) } ) -> model_evaluation_results return(model_evaluation_results) } Loading
R/models.R +133 −11 Original line number Diff line number Diff line Loading @@ -74,6 +74,10 @@ link_plant_to_COMID_and_H <- function( left_join(plant_to_COMID, by = join_by(COMPLXID)) |> mutate(COMID = as.integer(COMID)) |> left_join(HESC_ready, by = join_by(COMPLXID)) |> mutate(Hyd_hd_est_m = if_else(Hyd_hd_est_m == 0, NA_real_, Hyd_hd_est_m), NHYD_HT = if_else(NHYD_HT == 0, NA_real_, NHYD_HT), NID_HT = if_else(NID_HT == 0, NA_real_, NID_HT), G_HT = if_else(G_HT == 0, NA_real_, G_HT)) |> mutate(H_m = case_when( !is.na(NHYD_HT) ~ NHYD_HT, is.na(NHYD_HT) & !is.na(Hyd_hd_est_m) ~ Hyd_hd_est_m, Loading Loading @@ -121,22 +125,21 @@ prep_model_data <- function(plant_to_flow_H_mapping, CFs |> filter(year %in% 1980:2019) |> left_join(plant_to_flow_H_mapping |> select(COMPLXID, COMID), select(COMPLXID, COMID, H_m), by = join_by(COMPLXID)) |> filter(!is.na(COMID)) |> left_join(annual_flow_1980_2019, by = join_by(year, COMID)) |> filter(!is.na(Corrected) | !is.na(Naturalized)) |> filter(!is.na(Corrected)) |> mutate(Corrected = if_else(is.infinite(Corrected), NA_real_, Corrected)) |> select(COMPLXID, year, gen_MWh, nameplate_MW, n_hrs, cap_MWh, CF, COMID, Qc_BCM = Corrected, Qn_BCM = Naturalized) -> rename(Qc_BCM = Corrected, Qn_BCM = Naturalized) |> # compute energy potential mutate(pot_MWh_c = Qc_BCM * 1e9 * 9810 * H_m * 2.77778e-10, pot_MWh_n = Qn_BCM * 1e9 * 9810 * H_m * 2.77778e-10) |> # compute potential / capacity mutate(CFPc = pot_MWh_c / cap_MWh, CFPn = pot_MWh_n / cap_MWh) -> data_model_ready return(data_model_ready) Loading @@ -144,3 +147,122 @@ prep_model_data <- function(plant_to_flow_H_mapping, } simulate_annual_CF <- function(data){ data |> select(COMPLXID, year, CF, CFPc, CFPn) |> pivot_longer(!c(COMPLXID, year, CF), names_to = "flow_type", values_to = "CFP", names_prefix = "CFP") |> split(~COMPLXID) %>% #.[[1]] -> plant_CF_flow future_map_dfr(function(plant_CF_flow){ plant_CF_flow |> split(~flow_type) %>% #.[[1]] -> plant_CF_flow_x map_dfr(function(plant_CF_flow_x){ if(anyNA(plant_CF_flow_x$CFP)){ return( plant_CF_flow_x |> mutate(TmodP1 = NA_real_, TmodP2 = NA_real_, CF_pred = NA_real_) ) } get_TmodX2_rmse <- function(x){ p1 <- x[1] p2 <- x[2] plant_CF_flow_x |> mutate(pred = 1 + CFP - (p2 + CFP ^ p1)^(1/p1)) -> pred_added pull(pred_added, pred) -> CF_pred pull(pred_added, CF) -> target return( sqrt(mean((CF_pred - target)^2, na.rm = T)) ) } TmodX2 <- optim(par = c(1,1), fn = get_TmodX2_rmse, lower = c(1,1), upper = c(10, 20), method = "L-BFGS-B") return( plant_CF_flow_x |> mutate( TmodP1 = TmodX2$par[1], TmodP2 = TmodX2$par[2], CF_pred = 1 + CFP - (TmodP2 + CFP ^ TmodP1)^(1/TmodP1) ) ) }) }) -> all_model_predictions return(all_model_predictions) } evaluate_models <- function(model_results){ model_results |> #filter(COMPLXID == "hc1014") -> model_x split(~COMPLXID) |> map_dfr( function(model_x){ Xc <- filter(model_x, flow_type == "c", !is.na(CF), !is.na(CF_pred)) Xn <- filter(model_x, flow_type == "n", !is.na(CF), !is.na(CF_pred)) model_x |> select(COMPLXID, TmodP1, TmodP2, flow_type) |> unique() -> model_x_ if(all(Xc$CF_pred == 0)){ model_x_ |> filter(flow_type == "c") |> mutate(pearson = NA_real_, spearman = NA_real_, bias = NA_real_, flow_note = "Zero flow") -> Xc_summary }else{ model_x_ |> filter(flow_type == "c") |> mutate(pearson = cor(Xc$CF, Xc$CF_pred), spearman = cor(Xn$CF, Xn$CF_pred, method = "spearman"), bias = mean(Xc$CF_pred) - mean(Xc$CF), flow_note = "") -> Xc_summary } if(all(Xn$CF_pred == 0)){ model_x_ |> filter(flow_type == "n") |> mutate(pearson = NA_real_, spearman = NA_real_, bias = NA_real_, flow_note = "Zero flow") -> Xn_summary }else{ model_x_ |> filter(flow_type == "n") |> mutate(pearson = cor(Xn$CF, Xn$CF_pred), spearman = cor(Xn$CF, Xn$CF_pred, method = "spearman"), bias = mean(Xn$CF_pred) - mean(Xn$CF), flow_note = "") -> Xn_summary } bind_rows( Xc_summary, Xn_summary ) } ) -> model_evaluation_results return(model_evaluation_results) }