Merge branch 'refactor-autocsm' into 'main' (73ccb6bf) · Commits · ExaDigiT / sim-raps

README.md

+9 −0

Original line number	Diff line number	Diff line
		@@ -55,6 +55,15 @@ given instead of the parquet files for more quickly running subsequent simulatio

		python -m raps.telemetry -f $DPATH/slurm/joblive/$DATEDIR $DPATH/jobprofile/jobprofile/$DATEDIR

		## OpenStreetMap Attribution

		Map data used in this project is provided by [OpenStreetMap](https://www.openstreetmap.org/copyright) and is available under the Open Database License (ODbL). © OpenStreetMap contributors.

		## Open-Meteo API Attribution

		Weather data used in this project is provided by the [Open-Meteo API](https://open-meteo.com/en/docs). Open-Meteo offers free weather forecast data for various applications, and their API provides easy access to weather information without requiring user authentication.


		## Build and run Docker container

		make docker_build && make docker_run

config/frontier/cooling.json

+20 −27

Original line number	Diff line number	Diff line
		{
		"COOLING_EFFICIENCY": 0.945,
		"WET_BULB_TEMP": 290.0,
		"FMU_PATH": "models/FrontierTH_linear_properties_FMU_export4.fmu",
		"ZIP_CODE": 37831,
		"COUNTRY_CODE": "US",
		"FMU_PATH": "models/Simulator_olcf5_base.fmu",
		"FMU_UPDATE_FREQ": 15,
		"FMU_COLUMN_MAPPING": {
		"W_CDUP_Out": "Work Done by CDUPs",
		"Tr_sec_Out": "Rack Return Temperature (\u00b0C)",
		"Ts_sec_Out": "Rack Supply Temperature (\u00b0C)",
		"ps_sec_Out": "Rack Supply Pressure (psig)",
		"pr_sec_Out": "Rack Return Pressure (psig)",
		"Q_sec_Out": "Rack Flowrate (gpm)",
		"Q_fac_Out": "HTW/CTW Flowrate (gpm)",
		"p_fac_Out": "HTWR/HTWS/CTWR/CTWS Pressure (psig)",
		"T_fac_Out": "HTWR/HTWS/CTWR/CTWS Temperature (\u00b0C)",
		"W_HTWP_Out": "Power Consumption HTWPS (kW)",
		"W_CTWP_Out": "Power Consumption CTWPs (kW)",
		"W_CT_Out": "Power Consumption Fan (kW)",
		"N_HTWP_Out": "% Speed of HTWP",
		"N_CTWP_Out": "% Speed of CTWP",
		"n_CTWPs_Out": "nCTWPs Staged",
		"n_HTWPs_Out": "nHTWPs Staged",
		"PUE_Out": "PUE Output",
		"n_EHXs_Out": "nEHXs Staged",
		"n_CTs_Out": "nCTs Staged",
		"Tr_pri_Out": "Facility Return Temperature (\u00b0C)",
		"Ts_pri_Out": "Facility Supply Temperature (\u00b0C)",
		"ps_pri_Out": "Facility Supply Pressure (psig)",
		"pr_pri_Out": "Facility Return Pressure (psig)",
		"Q_bypass_Out": "CDU Loop Bypass Flowrate (gpm)",
		"Q_pri_Out": "Facility Flowrate (gpm)"
		}
		"T_sec_r_C": "Rack Return Temperature (\u00b0C)",
		"T_sec_s_C": "Rack Supply Temperature (\u00b0C)",
		"p_sec_r_psig": "Rack Supply Pressure (psig)",
		"p_sec_s_psig": "Rack Return Pressure (psig)",
		"V_flow_sec_GPM": "Rack Flowrate (gpm)",
		"T_prim_r_C": "Facility Return Temperature (\u00b0C)",
		"T_prim_s_C": "Facility Supply Temperature (\u00b0C)",
		"p_prim_s_psig": "Facility Supply Pressure (psig)",
		"p_prim_r_psig": "Facility Return Pressure (psig)",
		"V_flow_prim_GPM": "Facility Flowrate (gpm)",
		"W_flow_CDUP_kW": "Work Done By CDUP (kW)"
		},
		"TEMPERATURE_KEY": "simulator_1_centralEnergyPlant_1_coolingTowerLoop_1_sources_Towb",
		"W_HTWPs_KEY": "simulator[1].centralEnergyPlant[1].hotWaterLoop[1].summary.W_flow_HTWP_kW",
		"W_CTWPs_KEY": "simulator[1].centralEnergyPlant[1].coolingTowerLoop[1].summary.W_flow_CTWP_kW",
		"W_CTs_KEY": "simulator[1].centralEnergyPlant[1].coolingTowerLoop[1].summary.W_flow_CT_kW"

		}

main.py

+8 −1

Original line number	Diff line number	Diff line
		@@ -19,6 +19,8 @@ if sys.version_info < (required_major, required_minor):

		parser = argparse.ArgumentParser(description='Resource Allocator & Power Simulator (RAPS)')
		parser.add_argument('-c', '--cooling', action='store_true', help='Include FMU cooling model')
		parser.add_argument('--start', type=str, help='ISO8061 string for start of simulation')
		parser.add_argument('--end', type=str, help='ISO8061 string for end of simulation')
		parser.add_argument('-d', '--debug', action='store_true', help='Enable debug mode and disable rich layout')
		parser.add_argument('-e', '--encrypt', action='store_true', help='Encrypt any sensitive data in telemetry')
		parser.add_argument('-n', '--numjobs', type=int, default=1000, help='Number of jobs to schedule')
		@@ -62,6 +64,7 @@ from raps.power import compute_node_power_uncertainties, compute_node_power_vali
		from raps.scheduler import Scheduler, Job
		from raps.telemetry import Telemetry
		from raps.workload import Workload
		from raps.weather import Weather
		from raps.utils import create_casename, convert_to_seconds, write_dict_to_file

		load_config_variables([
		@@ -81,6 +84,9 @@ if args.cooling:
		cooling_model = ThermoFluidsModel(FMU_PATH)
		cooling_model.initialize()
		args.layout = "layout2"

		if args_dict['start']:
		cooling_model.weather = Weather(args_dict['start'])
		else:
		cooling_model = None

		@@ -99,6 +105,7 @@ flops_manager = FLOPSManager(SC_SHAPE)
		layout_manager = LayoutManager(args.layout, args.debug)
		sc = Scheduler(TOTAL_NODES, DOWN_NODES, power_manager, flops_manager, layout_manager,
		cooling_model, **args_dict)

		if args.replay:
		td = Telemetry(**args_dict)

pyproject.toml

+2 −1

Original line number	Diff line number	Diff line
		@@ -18,5 +18,6 @@ dependencies = [
		"pandas==2.0.3",
		"scipy==1.10.1",
		"pyarrow==15.0.1",
		"uncertainties==3.2.1"
		"uncertainties==3.2.1",
		"requests==2.32.3"
		]

raps/cooling.py

+158 −165

Original line number	Diff line number	Diff line
		@@ -4,66 +4,48 @@ an FMU (Functional Mock-up Unit).

		The module defines a `ThermoFluidsModel` class that encapsulates the
		initialization, simulation step execution,
		data conversion, and cleanup processes for the FMU-based model. Additionally,
		it includes a helper function to merge dictionaries.

		Functions
		---------
		merge_dicts(dict1, dict2)
		Merge two dictionaries into one.

		Classes
		-------
		ThermoFluidsModel
		A class to represent a thermo-fluids model using an FMU.
		data conversion, and cleanup processes for the FMU-based model.
		"""

		import shutil
		import re
		import numpy as np
		import pandas as pd
		from uncertainties import unumpy
		from uncertainties.core import AffineScalarFunc

		from fmpy import read_model_description, extract
		from fmpy.fmi2 import FMU2Slave
		from .config import load_config_variables
		from collections import OrderedDict
		from datetime import timedelta

		load_config_variables(['FMU_OUTPUT_KEYS','NUM_CDUS', 'COOLING_EFFICIENCY','WET_BULB_TEMP'], globals())
		load_config_variables(['NUM_CDUS', 'COOLING_EFFICIENCY','WET_BULB_TEMP', 'RACKS_PER_CDU', 'ZIP_CODE', 'COUNTRY_CODE', \
		'TEMPERATURE_KEY', 'W_HTWPs_KEY', 'W_CTWPs_KEY', 'W_CTs_KEY'], globals())

		# Define the Merge function outside of the class
		def merge_dicts(dict1, dict2):
		"""
		Merge two dictionaries into one.
		def get_matching_variables(variables, pattern):
		# Regex pattern to match strings containing .summary
		pattern = re.compile(pattern)

		Parameters
		----------
		dict1 : dict
		The first dictionary to merge.
		dict2 : dict
		The second dictionary to merge. If there are duplicate keys, the values
		from this dictionary will overwrite those from the first dictionary.
		# Filtering the list using the regex pattern
		filtered_vars = [var for var in variables if pattern.match(var)]

		Returns
		-------
		merged_dict : dict
		A new dictionary containing all the keys and values from both input dictionaries.
		If there are duplicate keys, the values from `dict2` will overwrite those from `dict1`.
		"""
		merged_dict = {dict1, dict2}
		return merged_dict
		return filtered_vars


		class ThermoFluidsModel:
		"""
		A class to represent a thermo-fluids model using an FMU (Functional Mock-up Unit).

		This class encapsulates the initialization, simulation step execution, data conversion,
		and cleanup processes for the FMU-based thermo-fluids model. It provides methods to
		initialize the model, execute simulation steps, generate runtime values, calculate Power
		Usage Effectiveness (PUE), and properly manage the FMU resources.

		Attributes
		----------
		FMU_PATH : str
		The file path to the FMU file.
		fmu_history : list
		A list to store the history of FMU states.
		A list to store the history of FMU states, combining cooling input, datacenter output,
		and central energy plant (CEP) output for each simulation step.
		inputs : list
		A list of input variables for the FMU.
		outputs : list
		@@ -72,21 +54,29 @@ class ThermoFluidsModel:
		The directory where the FMU file is extracted.
		fmu : FMU2Slave
		The instantiated FMU object.
		weather : Optional
		An object that provides weather-related data for simulations. Used when replay mode is on.

		Methods
		-------
		initialize():
		Initializes the FMU by extracting the file and setting up the model.
		step(current_time, fmu_inputs, step_size):
		Executes a simulation step with the given inputs and step size.
		convert_rowsdict_to_array(data):
		Converts the row dictionary data to a numpy array.
		Initializes the FMU by extracting the file, reading the model description, setting up input and output variables,
		and preparing the model for simulation.
		generate_runtime_values(cdu_power, sc) -> dict:
		Generates runtime values dynamically for the FMU inputs based on CDU power and other configuration parameters.
		generate_fmu_inputs(runtime_values: dict, uncertainties: bool = False) -> list:
		Converts runtime values to a list suitable for FMU inputs, handling uncertainties if specified.
		calculate_pue(cooling_input: dict, datacenter_output: dict, cep_output: dict) -> float:
		Calculates the Power Usage Effectiveness (PUE) of the data center based on the cooling, datacenter,
		and CEP output power values.
		step(current_time: float, fmu_inputs: list, step_size: float) -> Tuple[dict, dict, dict, float]:
		Executes a simulation step with the given inputs and step size. Returns the cooling input, datacenter output,
		CEP output, and PUE for the current step.
		terminate():
		Terminates the FMU instance.
		Terminates the FMU instance, ensuring that all resources are properly released.
		cleanup():
		Cleans up the extracted FMU directory.
		Cleans up the extracted FMU directory, ensuring no temporary files are left behind.
		"""

		def __init__(self, FMU_PATH):
		"""
		Constructs all the necessary attributes for the ThermoFluidsModel object.
		@@ -102,9 +92,7 @@ class ThermoFluidsModel:
		self.outputs = None
		self.unzipdir = None
		self.fmu = None
		self.template = None
		self.fmu_output_keys = []
		self.current_result = None
		self.weather = None

		def initialize(self):
		"""
		@@ -120,17 +108,17 @@ class ThermoFluidsModel:
		# Unzip the FMU file and get the unzip directory
		self.unzipdir = extract(self.FMU_PATH)
		model_description = read_model_description(self.FMU_PATH)
		# Collect value references
		vrs = {}

		# Add to list of variable names
		var_model = []
		for variable in model_description.modelVariables:
		vrs[variable.name] = variable.valueReference
		var_model.append(variable.name)

		outputs = get_matching_variables(var_model, r'.(\.summary\.\|^summary).')

		# Get the value references for the variables we want to get/set
		self.inputs = [v for v in model_description.modelVariables if v.causality == 'input']
		self.outputs = [v for v in model_description.modelVariables if v.causality == 'output']

		# Dynamically determine the FMU Output Keys
		self.fmu_output_keys = self.generate_fmu_output_keys()
		self.outputs = [v for v in model_description.modelVariables if v.name in outputs]

		# Instantiate and initialize the FMU
		self.fmu = FMU2Slave(guid=model_description.guid,
		@@ -142,46 +130,37 @@ class ThermoFluidsModel:
		self.fmu.enterInitializationMode()
		self.fmu.exitInitializationMode()

		def generate_fmu_output_keys(self):
		"""
		Generates the fmu output keys dynamically based on FMU's output variable names,
		preserving the order in which they appear.

		Returns
		-------
		output_keys : list of str
		A list of unique base names of the output variables in their order of appearance.
		"""
		seen_keys = OrderedDict()
		for output in self.outputs:
		# Split the name at the first '[' and take the base part
		base_name = output.name.split('[')[0]
		if base_name not in seen_keys:
		seen_keys[base_name] = None

		# Return the keys as a list
		return list(seen_keys.keys())

		def generate_runtime_values(self, cdu_power):
		def generate_runtime_values(self, cdu_power, sc) -> dict:
		"""
		Generate the runtime values for the FMU inputs dynamically.

		Parameters:
		cdu_power (array): The array of CDU powers.
		wetbulb_temp (float): The wetbulb temperature.
		sc (Scheduler Object): The current instance of a Scheduler.

		Returns:
		dict: A dictionary with the runtime values for the FMU inputs.
		"""
		runtime_values = {}

		# Dynamically generate the power inputs
		for i in range(NUM_CDUS):
		key = f"power[{i+1}]"
		runtime_values[key] = cdu_power[i] * COOLING_EFFICIENCY
		runtime_values = {
		f"simulator_1_datacenter_1_computeBlock_{i+1}_cabinet_1_sources_Q_flow_total": cdu_power[i] * COOLING_EFFICIENCY / RACKS_PER_CDU
		for i in range(NUM_CDUS)
		}

		# Default temperature is from the config
		temperature = WET_BULB_TEMP

		# If replay mode is on and weather data is available
		if sc.replay and self.weather and self.weather.start is not None and self.weather.has_coords:
		# Convert total seconds to timedelta object
		delta = timedelta(seconds=sc.current_time)
		target_datetime = self.weather.start + delta

		# Add the wetbulb temperature
		runtime_values["Towb"] = WET_BULB_TEMP
		# Get temperature from weather data
		temperature = self.weather.get_temperature(target_datetime) or WET_BULB_TEMP

		# Set the temperature value
		runtime_values[TEMPERATURE_KEY] = temperature

		return runtime_values

		@@ -189,123 +168,137 @@ class ThermoFluidsModel:
		"""
		Convert the runtime values based on the cooling model's inputs to a list suitable for FMU inputs.
		Raises an error if any input key is missing in runtime values.

		Parameters
		----------
		runtime_values : dict
		A dictionary containing runtime values for FMU inputs.
		uncertainties : bool, optional
		If True, processes the values to strip uncertainties for certain inputs.

		Returns
		-------
		fmu_inputs : list
		A list of input values suitable for FMU.
		"""
		# Initialize an empty list for FMU inputs
		fmu_inputs = []

		# Helper function to process uncertainty
		def process_uncertainty(value):
		"""Strip uncertainty if present, otherwise return the value as-is."""
		# Convert to nominal value if it's an AffineScalarFunc and uncertainties flag is set
		return unumpy.nominal_values(value) if uncertainties and isinstance(value, AffineScalarFunc) else value

		# Iterate through the cooling model's inputs
		for input_var in self.inputs:
		input_name = input_var.name # Get the name of the input variable
		# Check if the input name matches any key in the runtime values
		if input_name in runtime_values:
		# Append the value from runtime values to fmu_inputs
		if uncertainties:
		# Strip only the power values of the uncertainty, others should not be a ufloat
		# #Alternative uncomment line below and remove pattern match:
		# #fmu_inputs.append(unumpy.nominal_values(runtime_values[input_name]))
		pattern = re.compile(r"power", re.IGNORECASE)
		if bool(pattern.search(input_name)):
		fmu_inputs.append(unumpy.nominal_values(runtime_values[input_name]))
		else:
		fmu_inputs.append(runtime_values[input_name])
		else:
		fmu_inputs.append(runtime_values[input_name])
		else:
		# If you have additional values that the fmu isn't expecting
		# nothing will happen. However, an error will be raised
		# if a value for an expected key is missing in runtime values

		# Fetch the runtime value for the input name
		try:
		value = runtime_values[input_name]
		except KeyError:
		raise KeyError(f"Missing value for key '{input_name}' in runtime values.")

		# Process the value based on uncertainty and append
		fmu_inputs.append(process_uncertainty(value))

		return fmu_inputs

		def step(self, current_time, fmu_inputs, step_size):

		def calculate_pue(self, cooling_input, datacenter_output, cep_output):
		"""
		Executes a simulation step with the given inputs and step size.
		Calculate the Power Usage Effectiveness (PUE) of the data center.

		Parameters
		----------
		current_time : float
		The current simulation time.
		fmu_inputs : list
		A list of input values to set in the FMU.
		step_size : float
		The size of the simulation step.
		cooling_input : dict
		A dictionary containing input power values for cooling.
		datacenter_output : dict
		A dictionary containing output power values for the datacenter.
		cep_output : dict
		A dictionary containing output power values for the central energy plant.

		Returns
		-------
		data_array : numpy.ndarray
		A numpy array containing the simulation results for the current step.
		pue : float
		The calculated Power Usage Effectiveness (PUE).
		"""
		# Simulation Loop
		for index, v in enumerate(self.inputs):
		self.fmu.setReal([v.valueReference], [fmu_inputs[index]])
		# Utility function to convert kW to Watts
		def convert_to_watts(value_in_kw):
		"""Convert a value in kilowatts to Watts."""
		return np.array(value_in_kw) * 1e3 if value_in_kw is not None else 0.0

		# Perform one step
		self.fmu.doStep(currentCommunicationPoint=current_time, communicationStepSize=step_size)
		# Convert values from kW to Watts using the utility function
		W_HTWPs = convert_to_watts(cep_output.get(W_HTWPs_KEY))
		W_CTWPs = convert_to_watts(cep_output.get(W_CTWPs_KEY))
		W_CTs = convert_to_watts(cep_output.get(W_CTs_KEY))

		# Get the sum of the work done by all CDU pumps
		W_CDUPs = sum(
		convert_to_watts(datacenter_output.get(f'simulator[1].datacenter[1].computeBlock[{idx+1}].cdu[1].summary.W_flow_CDUP_kW'))
		for idx in range(NUM_CDUS)
		)

		# Get the values for 'inputs' and 'outputs'
		val_inputs = {}
		for v in self.inputs:
		val_inputs[v.name] = self.fmu.getReal([v.valueReference])[0]
		# Sum all values in the cooling_input dictionary
		total_cooling_input_power = np.sum(list(cooling_input.values()))

		val_outputs = {}
		for v in self.outputs:
		val_outputs[v.name] = self.fmu.getReal([v.valueReference])[0]
		# Ensure a non-zero value for total input power to avoid division by zero
		total_input_power = np.maximum(total_cooling_input_power, 1e-3)

		val_time = {'time': current_time}
		# Append the results
		rows_dict = merge_dicts(merge_dicts(val_time, val_inputs), val_outputs)
		self.fmu_history.append(rows_dict)
		data_array = self.convert_dict_to_array(val_outputs)
		self.current_result = data_array # Store the current fmu results for this timestep
		# Calculate PUE
		pue = (total_input_power + np.sum(W_CDUPs) + np.sum(W_HTWPs) + np.sum(W_CTWPs) + np.sum(W_CTs)) / total_input_power

		return data_array
		return pue

		def convert_dict_to_array(self, data):
		def step(self, current_time, fmu_inputs, step_size):
		"""
		Converts the row dictionary data to a numpy array.
		Executes a simulation step with the given inputs and step size.

		Parameters
		----------
		data : dict
		A dictionary containing the row data.
		current_time : float
		The current simulation time.
		fmu_inputs : list
		A list of input values to set in the FMU.
		step_size : float
		The size of the simulation step.

		Returns
		-------
		data_array : numpy.ndarray
		A numpy array with the extracted data values.
		cooling_input : dict
		A dictionary containing the input values for cooling.
		datacenter_output : dict
		A dictionary containing the output values for the datacenter.
		cep_output : dict
		A dictionary containing the output values for the central energy plant.
		pue : float
		The Power Usage Effectiveness (PUE) calculated from the outputs.
		"""
		data_array = np.zeros((NUM_CDUS, len(self.fmu_output_keys)))

		keys_to_extract = [f'{base}[{i}]' for base in self.fmu_output_keys for i in range(1, NUM_CDUS + 1)]
		# Iterate through the keys in data and extract relevant values to fill the array
		for key, value in data.items():
		if key in keys_to_extract:
		# Extract the unit number from the key, e.g.:
		#('cdu_coolingsubsystem_0_liquidoutlet_0_
		# liquidflow_secondary[1]' -> 1)
		parts = key.split('[')
		base_key = parts[0]
		unit_number = int(parts[1].split(']')[0])
		# Set FMU inputs
		for index, v in enumerate(self.inputs):
		self.fmu.setReal([v.valueReference], [fmu_inputs[index]])

		# Adjust the unit_number to be 1-based (Python uses 0-based indexing)
		unit_number -= 1
		# Perform one step in the FMU
		self.fmu.doStep(currentCommunicationPoint=current_time, communicationStepSize=step_size)

		# Find the index of base_key in self.fmu_output_keys
		base_index = self.fmu_output_keys.index(base_key)
		# Initialize dictionaries for cooling input, datacenter output, and CEP output
		cooling_input = {v.name: self.fmu.getReal([v.valueReference])[0] for v in self.inputs}
		datacenter_output = {v.name: self.fmu.getReal([v.valueReference])[0] for v in self.outputs if "datacenter" in v.name}
		cep_output = {v.name: self.fmu.getReal([v.valueReference])[0] for v in self.outputs if "centralEnergyPlant" in v.name}

		# Fill the corresponding element in the array
		data_array[unit_number, base_index] = value
		return data_array
		# Calculate PUE
		pue = self.calculate_pue(cooling_input, datacenter_output, cep_output)

		def get_cooling_df(self):
		# Initialize the columns for cooling_df
		cooling_columns = self.fmu_output_keys
		# Append time to each dictionary
		cooling_input['time'] = current_time
		datacenter_output['time'] = current_time
		cep_output['time'] = current_time

		# Generate cooling_df
		cooling_df = pd.DataFrame(self.current_result, columns=cooling_columns)
		# Append the combined results to the history
		self.fmu_history.append({cooling_input, datacenter_output, **cep_output})

		return cooling_df
		return cooling_input, datacenter_output, cep_output, pue

		def terminate(self):
		"""