Add context to trends (c282ab0b) · Commits · Turner, Sean / hydro-CF-trends

R/trends.R

0 → 100644

+148 −0

Original line number	Diff line number	Diff line
		## Function for computation of trends in annual capacity factors
		## Trends are evaluated for four alternative time periods:
		## "Full period": 1970 - 2021
		## "Flow period": 1980 - 2019 (commensurate with DayFlow)
		## "Flow period plus": 1980 - 2021
		## "923 period": 2001 - 2021 (reliable generation data)

		get_CF_trends <- function(annual_CFs){

		tribble(
		~start_yr, ~end_yr, ~analysis,
		1970, 2021, "full period",
		1980, 2019, "flow period",
		1980, 2021, "flow period plus",
		2001, 2021, "923 period"
		) \|>
		pmap_dfr(function(start_yr, end_yr, analysis){

		message(paste0("Computing trends for ",
		analysis,
		": ", start_yr, "->", end_yr))

		annual_CFs \|>
		filter(year %in% start_yr:end_yr) \|>
		split(~EIA_ID) \|>
		map_dfr(function(plant_data){

		# deal with any inf values (caused by cap = 0 in EIA data)
		plant_data \|>
		mutate(CF = if_else(is.infinite(CF), NA_real_, CF)) ->
		plant_data_noInf

		n_yrs <- nrow(plant_data_noInf)

		# how many na values affected this slope analysis?
		plant_data_noInf \|>
		filter(is.na(CF)) \|>
		nrow() ->
		n_bad_yrs

		if((n_yrs - n_bad_yrs) / n_yrs < 0.8){
		return(
		tibble(
		EIA_ID = plant_data_noInf[["EIA_ID"]][1],
		sens_slope = NA_real_,
		intercept = NA_real_,
		p_value = NA_real_,
		coverage = (n_yrs - n_bad_yrs) / n_yrs
		)
		)
		}

		# compute Sen's slope
		senSlope(CF ~ year, data = plant_data,
		intercept = "A1m") -> sen
		sen$coefficients[[2]] -> computed_slope
		sen$coefficients[[1]] -> computed_intercept

		# set up blocked bootstrap (block size = 3)
		tibble(
		index = 1:(n_yrs - 2)
		) \|>
		mutate(`1` = plant_data_noInf[["CF"]][index],
		`2` = plant_data_noInf[["CF"]][index + 1],
		`3` = plant_data_noInf[["CF"]][index + 2]) \|>
		filter(!is.na(`1`), !is.na(`2`), !is.na(`3`)) ->
		blocks

		# blocked bootstrap
		n_size <- 10000

		# get serial correlation (to determine if block is needed)
		acf1 <- acf(plant_data_noInf$CF, plot = FALSE,
		na.action = na.pass)[[1]][[2]]

		if(acf1 > 0.3){
		1:n_size \|>
		future_map_dbl(function(x){

		1:(nrow(blocks)) \|>
		sample(replace = T, size = 14) ->
		block_indices

		blocks[block_indices,] \|>
		pivot_longer(!index) \|>
		mutate(year = 1980:2021) ->
		blocked_sample

		senSlope(value ~ year, data = blocked_sample,
		intercept = "A1m") ->
		block_sen

		return(block_sen[["coefficients"]][[2]])


		}, .options = furrr_options(seed = 123)) -> sample_sens
		}else{
		1:n_size \|>
		future_map_dbl(function(x){

		tibble(CF = sample(plant_data_noInf$CF, replace = T)) \|>
		mutate(year = start_yr:end_yr) ->
		boot_sample

		senSlope(CF ~ year, data = boot_sample,
		intercept = "A1m") ->
		boot_sen

		return(boot_sen[["coefficients"]][[2]])


		}, .options = furrr_options(seed = 123)) -> sample_sens
		}

		if(computed_slope >= 0){
		p <- sum(sample_sens > computed_slope) / n_size
		}else{
		p <- sum(sample_sens < computed_slope) / n_size
		}

		return(
		tibble(
		EIA_ID = plant_data_noInf[["EIA_ID"]][1],
		sens_slope = computed_slope,
		intercept = computed_intercept,
		p_value = p,
		coverage = (n_yrs - n_bad_yrs) / n_yrs
		)
		)

		}) ->
		all_slopes_for_analysis_x

		return(
		all_slopes_for_analysis_x \|>
		mutate(analysis = !!analysis)
		)

		}) -> all_trend_analyses

		return(
		all_trend_analyses
		)


		}