From 05cdcc122dbc76c91d32fcbfabdc4cd7c9c9bb39 Mon Sep 17 00:00:00 2001 From: Jesse Hines Date: Thu, 18 Sep 2025 09:33:19 -0400 Subject: [PATCH 01/17] Add top-level exports --- raps/__init__.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/raps/__init__.py b/raps/__init__.py index e69de29..a15939f 100644 --- a/raps/__init__.py +++ b/raps/__init__.py @@ -0,0 +1,8 @@ +from .sim_config import SimConfig, SingleSimConfig, MultiPartSimConfig +from .system_config import ( + SystemConfig, SystemCoolingConfig, SystemNetworkConfig, SystemPowerConfig, SystemSchedulerConfig, + SystemSystemConfig, SystemUqConfig, +) +from raps.schedulers.default import PolicyType, BackfillType +from .engine import Engine +from .multi_part_engine import MultiPartEngine -- GitLab From e97d87b9f35c65568b448924da8f454c6868c33f Mon Sep 17 00:00:00 2001 From: Jesse Hines Date: Thu, 18 Sep 2025 10:49:07 -0400 Subject: [PATCH 02/17] Interpret paths relative to config files --- config/adastraMI250.yaml | 2 +- config/frontier.yaml | 2 +- config/lassen.yaml | 2 +- config/marconi100.yaml | 2 +- config/summit.yaml | 2 +- raps/raps_config.py | 4 ++-- raps/sim_config.py | 8 ++++---- raps/system_config.py | 28 ++++++++++++++-------------- raps/telemetry.py | 4 ++-- raps/utils.py | 25 ++++++++++++++++++++++--- 10 files changed, 49 insertions(+), 30 deletions(-) diff --git a/config/adastraMI250.yaml b/config/adastraMI250.yaml index c7b95b8..bff85c1 100644 --- a/config/adastraMI250.yaml +++ b/config/adastraMI250.yaml @@ -90,7 +90,7 @@ cooling: wet_bulb_temp: 290.0 zip_code: '37831' country_code: US - fmu_path: "models/Simulator_olcf5_base.fmu" + fmu_path: "../models/Simulator_olcf5_base.fmu" fmu_column_mapping: T_sec_r_C: "Rack Return Temperature (°C)" T_sec_s_C: "Rack Supply Temperature (°C)" diff --git a/config/frontier.yaml b/config/frontier.yaml index 3102f31..884c941 100644 --- a/config/frontier.yaml +++ b/config/frontier.yaml @@ -63,7 +63,7 @@ cooling: wet_bulb_temp: 290.0 zip_code: '37831' country_code: US - fmu_path: "models/Simulator_olcf5_base.fmu" + fmu_path: "../models/Simulator_olcf5_base.fmu" fmu_column_mapping: T_sec_r_C: "Rack Return Temperature (°C)" T_sec_s_C: "Rack Supply Temperature (°C)" diff --git a/config/lassen.yaml b/config/lassen.yaml index 640c55e..594479d 100644 --- a/config/lassen.yaml +++ b/config/lassen.yaml @@ -56,7 +56,7 @@ cooling: wet_bulb_temp: 290.0 zip_code: '94550' country_code: US - fmu_path: "models/POWER9CSM/fmus/lassen.fmu" + fmu_path: "../models/POWER9CSM/fmus/lassen.fmu" fmu_column_mapping: T_sec_r_C: "Rack Return Temperature (°C)" T_sec_s_C: "Rack Supply Temperature (°C)" diff --git a/config/marconi100.yaml b/config/marconi100.yaml index 797153e..0e66a7e 100644 --- a/config/marconi100.yaml +++ b/config/marconi100.yaml @@ -52,7 +52,7 @@ cooling: wet_bulb_temp: 290.0 zip_code: '30170' country_code: IT - fmu_path: "models/POWER9CSM/fmus/marconi100.fmu" + fmu_path: "../models/POWER9CSM/fmus/marconi100.fmu" fmu_column_mapping: T_sec_r_C: "Rack Return Temperature (°C)" T_sec_s_C: "Rack Supply Temperature (°C)" diff --git a/config/summit.yaml b/config/summit.yaml index 8dc6fe3..7b2b5fe 100644 --- a/config/summit.yaml +++ b/config/summit.yaml @@ -52,7 +52,7 @@ cooling: wet_bulb_temp: 290.0 zip_code: '37831' country_code: US - fmu_path: "models/POWER9CSM/fmus/summit.fmu" + fmu_path: "../models/POWER9CSM/fmus/summit.fmu" fmu_column_mapping: T_sec_r_C: "Rack Return Temperature (°C)" T_sec_s_C: "Rack Supply Temperature (°C)" diff --git a/raps/raps_config.py b/raps/raps_config.py index 6eddca8..d1e1385 100644 --- a/raps/raps_config.py +++ b/raps/raps_config.py @@ -1,5 +1,5 @@ from pathlib import Path -from raps.utils import ExpandedPath +from raps.utils import ResolvedPath from pydantic_settings import BaseSettings, SettingsConfigDict, YamlConfigSettingsSource ROOT_DIR = Path(__file__).parent.parent @@ -13,7 +13,7 @@ class RapsConfig(BaseSettings): # We'll be using SimConfig in the simulation server and those settings aren't applicable there, # so it makes sense to keep SimConfig scoped to the logical operation of the sim. - system_config_dir: ExpandedPath = ROOT_DIR / 'config' + system_config_dir: ResolvedPath = ROOT_DIR / 'config' """ Directory containing system configuration files """ model_config = SettingsConfigDict( diff --git a/raps/sim_config.py b/raps/sim_config.py index c27a2ab..7afb2f0 100644 --- a/raps/sim_config.py +++ b/raps/sim_config.py @@ -8,7 +8,7 @@ from annotated_types import Len import importlib from raps.schedulers.default import PolicyType, BackfillType from raps.utils import ( - parse_time_unit, convert_to_time_unit, infer_time_unit, ExpandedPath, create_casename, + parse_time_unit, convert_to_time_unit, infer_time_unit, ResolvedPath, create_casename, RAPSBaseModel, AutoAwareDatetime, SmartTimedelta, yaml_dump, ) from raps.system_config import SystemConfig, get_partition_configs, get_system_config @@ -79,7 +79,7 @@ class SimConfig(RAPSBaseModel, abc.ABC): seed: int | None = None """ Set RNG seed for deterministic simulation """ - output: ExpandedPath | Literal['none'] | None = None + output: ResolvedPath | Literal['none'] | None = None """ Where to output power, cooling, and loss models for later analysis. If omitted it will output to raps-output- by default. @@ -112,7 +112,7 @@ class SimConfig(RAPSBaseModel, abc.ABC): imtype: Literal["png", "svg", "jpg", "pdf", "eps"] = "png" """ Plot image type """ - replay: list[ExpandedPath] | None = None + replay: list[ResolvedPath] | None = None """ Either: path/to/joblive path/to/jobprofile OR filename.npz """ encrypt: bool = False @@ -214,7 +214,7 @@ class SimConfig(RAPSBaseModel, abc.ABC): # Accounts accounts: bool = False - accounts_json: ExpandedPath | None = None + accounts_json: ResolvedPath | None = None """ Path to accounts JSON file from previous run """ # Downtime diff --git a/raps/system_config.py b/raps/system_config.py index bd405be..9f8d6a9 100644 --- a/raps/system_config.py +++ b/raps/system_config.py @@ -7,9 +7,9 @@ from functools import cached_property import yaml from pydantic import ( model_validator, field_validator, model_serializer, SerializationInfo, - SerializerFunctionWrapHandler, + SerializerFunctionWrapHandler, ValidationInfo, ) -from raps.utils import RAPSBaseModel, deep_merge, deep_subtract_dicts +from raps.utils import RAPSBaseModel, deep_merge, deep_subtract_dicts, is_yaml_file, ResolvedPath, resolve_path from raps.raps_config import raps_config # Define Pydantic models for the config to handle parsing and validation @@ -130,7 +130,7 @@ class SystemCoolingConfig(RAPSBaseModel): wet_bulb_temp: float zip_code: str | None = None country_code: str | None = None - fmu_path: str + fmu_path: ResolvedPath fmu_column_mapping: dict[str, str] w_htwps_key: str w_ctwps_key: str @@ -179,9 +179,12 @@ class SystemConfig(RAPSBaseModel): network: SystemNetworkConfig | None = None @model_validator(mode="before") - def _load_base(cls, data): + def _load_base(cls, data, info: ValidationInfo): if isinstance(data, dict) and data.get("base"): - base = get_system_config(data['base']) + if is_yaml_file(data['base']): + base = get_system_config(str(resolve_path(data['base'], info))) + else: + base = get_system_config(data['base']) data = deep_merge(base.model_dump(mode='json'), data) return data @@ -263,13 +266,12 @@ def get_system_config(system: str | SystemConfig) -> SystemConfig: """ if isinstance(system, SystemConfig): # Just pass system through if its already parsed return system - - if system in list_systems(): + elif is_yaml_file(system): + config_path = Path(system).expanduser().resolve() + system_name = config_path.stem + else: config_path = raps_config.system_config_dir / f"{system}.yaml" system_name = system - else: - config_path = Path(system).resolve() - system_name = config_path.stem if not config_path.is_file(): raise FileNotFoundError(f'"{system}" not found. Valid systems are: {list_systems()}') @@ -277,10 +279,8 @@ def get_system_config(system: str | SystemConfig) -> SystemConfig: "system_name": system_name, # You can override system_name in the yaml as well **yaml.safe_load(config_path.read_text()), } - base = str(config.get('base', '')) - if base.endswith(".yaml"): - config['base'] = str(config_path.parent / base) # path relative to yaml - return SystemConfig.model_validate(config) + # Pass context so paths in the SystemConfig can be resolved relative to the yaml file + return SystemConfig.model_validate(config, context = {'base_path': config_path.parent}) def get_partition_configs(partitions: list[str | SystemConfig]) -> MultiPartitionSystemConfig: diff --git a/raps/telemetry.py b/raps/telemetry.py index 915fc97..b7f29b7 100644 --- a/raps/telemetry.py +++ b/raps/telemetry.py @@ -28,7 +28,7 @@ from raps.plotting import ( plot_network_histogram ) from raps.utils import ( - next_arrival_byconfargs, pydantic_add_args, SubParsers, ExpandedPath, WorkloadData, RAPSBaseModel, + next_arrival_byconfargs, pydantic_add_args, SubParsers, ResolvedPath, WorkloadData, RAPSBaseModel, ) @@ -36,7 +36,7 @@ from raps.utils import ( class TelemetryArgs(RAPSBaseModel): jid: str = '*' """ Replay job id """ - replay: list[ExpandedPath] | None = None + replay: list[ResolvedPath] | None = None """ path/to/joblive path/to/jobprofile -or- filename.npz (overrides --workload option) """ plot: list[Literal["jobs", "nodes"]] | None = None is_results_file: bool = False diff --git a/raps/utils.py b/raps/utils.py index 4414fd9..0eec0ea 100644 --- a/raps/utils.py +++ b/raps/utils.py @@ -22,7 +22,8 @@ import argparse from pathlib import Path from typing import Annotated as A, TypeVar, Callable, TypeAlias from pydantic import ( - BaseModel, TypeAdapter, AfterValidator, BeforeValidator, ConfigDict, AwareDatetime, ValidationError + BaseModel, TypeAdapter, AfterValidator, BeforeValidator, ConfigDict, AwareDatetime, ValidationError, + ValidationInfo, ) from pydantic_settings import BaseSettings, SettingsConfigDict, CliApp, CliSettingsSource import yaml @@ -683,8 +684,21 @@ def normalize_tz(d: datetime): return d.astimezone(timezone.utc) -ExpandedPath = A[Path, AfterValidator(lambda v: Path(v).expanduser().resolve())] -""" Type that that expands ~ and environment variables in a path string """ +def resolve_path(path: str | Path, info: ValidationInfo): + path = Path(path).expanduser() + if isinstance(info.context, dict) and info.context.get("base_path"): + base_path = Path(info.context["base_path"]).expanduser().resolve() + else: + base_path = Path.cwd() + return (base_path / path).resolve() + + +ResolvedPath = A[Path, AfterValidator(resolve_path)] +""" +Resolve a path, and expand ~ in the path string. +Paths can be resolved relative to specific path instead of cwd by passing +`context={"base_path": "my/path"}` in model_validate(). +""" AutoAwareDatetime = A[datetime, AfterValidator(normalize_tz)] """ Datetime type wrapper, makes sure timezone is set """ @@ -799,6 +813,11 @@ def read_yaml(config_file: str): return {} +def is_yaml_file(path: str | Path): + """ Return true if the path is .yaml, .yml, or .json """ + return Path(path).suffix in ['.yaml', '.yml', '.json'] + + class WorkloadData(RAPSBaseModel): """ Represents a workload, a list of jobs with some metadata. Returned by dataloaders load_data() -- GitLab From 4c68e9f96742af11158df0c84959ce7ecbebf44c Mon Sep 17 00:00:00 2001 From: Jesse Hines Date: Fri, 19 Sep 2025 12:50:36 -0400 Subject: [PATCH 03/17] Floor dates --- raps/sim_config.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/raps/sim_config.py b/raps/sim_config.py index 7afb2f0..5a8fcce 100644 --- a/raps/sim_config.py +++ b/raps/sim_config.py @@ -1,6 +1,7 @@ import argparse import abc from pathlib import Path +import pandas as pd from functools import cached_property from datetime import timedelta from typing import Literal, Annotated as A @@ -283,6 +284,11 @@ class SimConfig(RAPSBaseModel, abc.ABC): if self.start and self.fastforward: raise ValueError("start and fastforward are mutually exclusive") + if self.start: + self.start = pd.Timestamp(self.start).floor(self.time_unit).to_pydatetime() + if self.end: + self.end = pd.Timestamp(self.end).floor(self.time_unit).to_pydatetime() + if self.end: if not self.start: raise ValueError("end requires start to be set") -- GitLab From c1bd6a30967759b56b3b6901fb95c0dc49b9f7f8 Mon Sep 17 00:00:00 2001 From: Jesse Hines Date: Fri, 19 Sep 2025 13:58:30 -0400 Subject: [PATCH 04/17] Add script to fetch fmu models --- .gitignore | 2 +- Makefile | 8 ++++++++ config/adastraMI250.yaml | 2 +- config/frontier.yaml | 2 +- 4 files changed, 11 insertions(+), 3 deletions(-) diff --git a/.gitignore b/.gitignore index 5f7f2b5..ed10fab 100644 --- a/.gitignore +++ b/.gitignore @@ -5,5 +5,5 @@ venv *.npz *.prof simulation_results/ -models/*.fmu +models/fmu-models .shell-completion-cache diff --git a/Makefile b/Makefile index a2f4211..d66f02c 100644 --- a/Makefile +++ b/Makefile @@ -1,4 +1,6 @@ #Makefile +SHELL := /bin/bash + .PHONY: pip run docker_build docker_run all: pip @@ -19,3 +21,9 @@ docker_build: docker_run: docker run --platform linux/amd64 -it $(IMAGE_NAME) +fetch-fmu-models: + if [ ! -d ./models/fmu-models ]; then \ + git clone git@code.ornl.gov:exadigit/fmu-models.git ./models/fmu-models; \ + else \ + git -C ./models/fmu-models pull; \ + fi diff --git a/config/adastraMI250.yaml b/config/adastraMI250.yaml index bff85c1..88f68e9 100644 --- a/config/adastraMI250.yaml +++ b/config/adastraMI250.yaml @@ -90,7 +90,7 @@ cooling: wet_bulb_temp: 290.0 zip_code: '37831' country_code: US - fmu_path: "../models/Simulator_olcf5_base.fmu" + fmu_path: "../models/fmu-models/Frontier/Simulator_olcf5_base.fmu" fmu_column_mapping: T_sec_r_C: "Rack Return Temperature (°C)" T_sec_s_C: "Rack Supply Temperature (°C)" diff --git a/config/frontier.yaml b/config/frontier.yaml index 884c941..84891c1 100644 --- a/config/frontier.yaml +++ b/config/frontier.yaml @@ -63,7 +63,7 @@ cooling: wet_bulb_temp: 290.0 zip_code: '37831' country_code: US - fmu_path: "../models/Simulator_olcf5_base.fmu" + fmu_path: "../models/fmu-models/Frontier/Simulator_olcf5_base.fmu" fmu_column_mapping: T_sec_r_C: "Rack Return Temperature (°C)" T_sec_s_C: "Rack Supply Temperature (°C)" -- GitLab From 808c7cae531d0da7e51bfbdaeb9c4a0a76dc3a5c Mon Sep 17 00:00:00 2001 From: Jesse Hines Date: Mon, 22 Sep 2025 11:50:46 -0400 Subject: [PATCH 05/17] Add convenience method for get_stats --- raps/engine.py | 9 +++++++++ raps/stats.py | 10 +++++----- tests/util.py | 16 ++-------------- 3 files changed, 16 insertions(+), 19 deletions(-) diff --git a/raps/engine.py b/raps/engine.py index 67cd999..7075fef 100644 --- a/raps/engine.py +++ b/raps/engine.py @@ -40,6 +40,7 @@ from raps.account import Accounts from raps.downtime import Downtime from raps.weather import Weather from raps.sim_config import SimConfig +import raps.stats as engine_stats from bisect import bisect_right @@ -860,3 +861,11 @@ class Engine: self.power_manager.loss_history.append((self.current_timestep, total_loss_kw)) # engine self.sys_power = total_power_kw + + def get_stats(self): + return { + 'engine': engine_stats.get_engine_stats(self), + 'job': engine_stats.get_job_stats(self), + 'scheduler': engine_stats.get_scheduler_stats(self), + 'network': engine_stats.get_network_stats(self) if self.simulate_network else {}, + } diff --git a/raps/stats.py b/raps/stats.py index a420151..1703f7c 100644 --- a/raps/stats.py +++ b/raps/stats.py @@ -10,10 +10,10 @@ try to keep statistics consolidated in this file. import sys from .utils import sum_values, min_value, max_value, convert_seconds_to_hhmmss -from .engine import Engine +from . import engine -def get_engine_stats(engine: Engine): +def get_engine_stats(engine: "engine.Engine"): """ Return engine statistics """ timesteps = engine.current_timestep - engine.timestep_start num_samples = len(engine.power_manager.history) if engine.power_manager else 0 @@ -99,7 +99,7 @@ def min_max_sum(value, min, max, sum): return min, max, sum -def get_scheduler_stats(engine: Engine): +def get_scheduler_stats(engine: "engine.Engine"): if len(engine.scheduler_queue_history) != 0: average_queue = sum(engine.scheduler_queue_history) / len(engine.scheduler_queue_history) else: @@ -116,7 +116,7 @@ def get_scheduler_stats(engine: Engine): return stats -def get_network_stats(engine: Engine): +def get_network_stats(engine: "engine.Engine"): stats = {} if engine.net_util_history: @@ -141,7 +141,7 @@ def get_network_stats(engine: Engine): return stats -def get_job_stats(engine: Engine): +def get_job_stats(engine: "engine.Engine"): """ Return job statistics processed over the engine execution""" # Information on Job-Mix min_job_size, max_job_size, sum_job_size = sys.maxsize, -sys.maxsize - 1, 0 diff --git a/tests/util.py b/tests/util.py index 46736b3..7481564 100644 --- a/tests/util.py +++ b/tests/util.py @@ -6,9 +6,6 @@ import json from raps.engine import Engine from raps.multi_part_engine import MultiPartEngine from raps.sim_config import SingleSimConfig, MultiPartSimConfig -from raps.stats import ( - get_engine_stats, get_job_stats, get_scheduler_stats, get_network_stats, -) def find_project_root(): @@ -41,15 +38,6 @@ def _get_cmd(config, sub_cmd): return f"echo {shlex.quote(json.dumps(config))} | python main.py {sub_cmd} - -o none" -def _get_stats(engine: Engine): - return { - 'engine': get_engine_stats(engine), - 'job': get_job_stats(engine), - 'scheduler': get_scheduler_stats(engine), - 'network': get_network_stats(engine) if engine.simulate_network else None, - } - - def run_engine(sim_config, include_ticks=False) -> tuple[Engine, dict[str, Any]]: """ Run a simulation to completion. Returns the completed Engine and a dict containing the engine @@ -73,7 +61,7 @@ def run_engine(sim_config, include_ticks=False) -> tuple[Engine, dict[str, Any]] if include_ticks: stats['tick_datas'].append(tick) - stats.update(_get_stats(engine)) + stats.update(engine.get_stats()) return engine, stats @@ -103,6 +91,6 @@ def run_multi_part_engine(sim_config, include_ticks=False) -> tuple[MultiPartEng stats['tick_datas'].append(tick) for partition, engine in multi_engine.engines.items(): - stats['partitions'][partition] = _get_stats(engine) + stats['partitions'][partition] = engine.get_stats() return multi_engine, stats -- GitLab From 66008f99d932cb6e4314d1076f3c8b789df472ed Mon Sep 17 00:00:00 2001 From: Jesse Hines Date: Tue, 23 Sep 2025 10:43:44 -0400 Subject: [PATCH 06/17] Formatting --- main.py | 6 +++--- raps/__init__.py | 8 ++++++++ raps/constants.py | 1 - raps/system_config.py | 2 +- tests/systems/test_main_fastforward_run.py | 2 -- 5 files changed, 12 insertions(+), 7 deletions(-) diff --git a/main.py b/main.py index ab464b0..18ecd9a 100755 --- a/main.py +++ b/main.py @@ -16,9 +16,9 @@ import argcomplete # Importing all of raps' dependencies like pandas etc can be rather slow, often taking 1-2 seconds. So for snappy shell # completion we need avoid imports on the shell completion path. We could do this by shuffling the code around to # create the parser without importing any heavy-weight libraries. But that would be a pain to maintain and track that -# pandas or scipy aren't accidentally imported transitively. Pandas can also be convenient to use in validating SimConfig -# etc, which is needed to build the argparser. So instead, we cache the generated argparser object so that shell -# completion can run without importing the rest of raps. +# pandas or scipy aren't accidentally imported transitively. Pandas can also be convenient to use in validating +# SimConfig etc, which is needed to build the argparser. So instead, we cache the generated argparser object so that +# shell completion can run without importing the rest of raps. PARSER_CACHE = Path(__file__).parent / '.shell-completion-cache' diff --git a/raps/__init__.py b/raps/__init__.py index a15939f..a7f3523 100644 --- a/raps/__init__.py +++ b/raps/__init__.py @@ -6,3 +6,11 @@ from .system_config import ( from raps.schedulers.default import PolicyType, BackfillType from .engine import Engine from .multi_part_engine import MultiPartEngine + +__all__ = [ + "SimConfig", "SingleSimConfig", "MultiPartSimConfig", + "SystemConfig", "SystemCoolingConfig", "SystemNetworkConfig", "SystemPowerConfig", "SystemSchedulerConfig", + "SystemSystemConfig", "SystemUqConfig", + "PolicyType", "BackfillType", + "Engine", "MultiPartEngine", +] diff --git a/raps/constants.py b/raps/constants.py index 85b5e23..53711e1 100644 --- a/raps/constants.py +++ b/raps/constants.py @@ -2,7 +2,6 @@ RAPS Constants """ from pathlib import Path -from datetime import datetime ELLIPSES = '\u2026' OUTPUT_PATH = Path('simulation_results') diff --git a/raps/system_config.py b/raps/system_config.py index 9f8d6a9..8d8c7f5 100644 --- a/raps/system_config.py +++ b/raps/system_config.py @@ -280,7 +280,7 @@ def get_system_config(system: str | SystemConfig) -> SystemConfig: **yaml.safe_load(config_path.read_text()), } # Pass context so paths in the SystemConfig can be resolved relative to the yaml file - return SystemConfig.model_validate(config, context = {'base_path': config_path.parent}) + return SystemConfig.model_validate(config, context={'base_path': config_path.parent}) def get_partition_configs(partitions: list[str | SystemConfig]) -> MultiPartitionSystemConfig: diff --git a/tests/systems/test_main_fastforward_run.py b/tests/systems/test_main_fastforward_run.py index 9fe3216..5a3b38f 100644 --- a/tests/systems/test_main_fastforward_run.py +++ b/tests/systems/test_main_fastforward_run.py @@ -1,5 +1,3 @@ -import os -import subprocess import pytest from ..util import run_engine -- GitLab From 1a25f38ba9ec068301beafdfebf2874d14561a7d Mon Sep 17 00:00:00 2001 From: Jesse Hines Date: Tue, 23 Sep 2025 13:21:40 -0400 Subject: [PATCH 07/17] Minor fix to system_config serialization Prevent all the unspecified fields showing up in the yaml unless you set show-defaults True --- raps/system_config.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/raps/system_config.py b/raps/system_config.py index 8d8c7f5..c1e795d 100644 --- a/raps/system_config.py +++ b/raps/system_config.py @@ -185,7 +185,8 @@ class SystemConfig(RAPSBaseModel): base = get_system_config(str(resolve_path(data['base'], info))) else: base = get_system_config(data['base']) - data = deep_merge(base.model_dump(mode='json'), data) + base_data = base.model_dump(mode='json', exclude_unset=True) + data = deep_merge(base_data, data) return data @model_serializer(mode='wrap') -- GitLab From 376478306d8aa3cd4d9f15de6109fb41c049d46e Mon Sep 17 00:00:00 2001 From: Jesse Hines Date: Tue, 23 Sep 2025 13:52:32 -0400 Subject: [PATCH 08/17] Add fast option to stats --- raps/stats.py | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/raps/stats.py b/raps/stats.py index 1703f7c..832ad35 100644 --- a/raps/stats.py +++ b/raps/stats.py @@ -13,8 +13,11 @@ from .utils import sum_values, min_value, max_value, convert_seconds_to_hhmmss from . import engine -def get_engine_stats(engine: "engine.Engine"): - """ Return engine statistics """ +def get_engine_stats(engine: "engine.Engine", *, fast = False): + """ + Return engine statistics + Setting `fast = False` excludes some stats that are more expensive to calculate. + """ timesteps = engine.current_timestep - engine.timestep_start num_samples = len(engine.power_manager.history) if engine.power_manager else 0 time_simulated = convert_seconds_to_hhmmss(timesteps / engine.downscale) @@ -51,7 +54,7 @@ def get_engine_stats(engine: "engine.Engine"): else: stats['jobs_completed_percentage'] = 0 - if engine.node_occupancy_history: + if not fast and engine.node_occupancy_history: # Calculate average concurrent jobs per node (average density across all nodes and timesteps) total_jobs_running_timesteps = 0 max_concurrent_jobs_per_node = 0 @@ -72,12 +75,12 @@ def get_engine_stats(engine: "engine.Engine"): sum_jobs_per_active_node += sum(active_nodes_in_timestep) / len(active_nodes_in_timestep) count_active_timesteps_for_avg_active += 1 - # Average jobs per *active* node (user's desired "1" type) - avg_jobs_per_active_node = (sum_jobs_per_active_node / count_active_timesteps_for_avg_active) \ - if count_active_timesteps_for_avg_active > 0 else 0 + # Average jobs per *active* node (user's desired "1" type) + avg_jobs_per_active_node = (sum_jobs_per_active_node / count_active_timesteps_for_avg_active) \ + if count_active_timesteps_for_avg_active > 0 else 0 - stats['avg_concurrent_jobs_per_active_node'] = avg_jobs_per_active_node - stats['max_concurrent_jobs_per_node'] = max_concurrent_jobs_per_node + stats['avg_concurrent_jobs_per_active_node'] = avg_jobs_per_active_node + stats['max_concurrent_jobs_per_node'] = max_concurrent_jobs_per_node else: stats['avg_concurrent_jobs_per_node'] = None stats['max_concurrent_jobs_per_node'] = None -- GitLab From aeea2bc403adba31cbc60f794eadeb712b32f3fc Mon Sep 17 00:00:00 2001 From: Jesse Hines Date: Tue, 23 Sep 2025 14:56:43 -0400 Subject: [PATCH 09/17] Better --system help message --- raps/sim_config.py | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/raps/sim_config.py b/raps/sim_config.py index 5a8fcce..48103f0 100644 --- a/raps/sim_config.py +++ b/raps/sim_config.py @@ -12,7 +12,7 @@ from raps.utils import ( parse_time_unit, convert_to_time_unit, infer_time_unit, ResolvedPath, create_casename, RAPSBaseModel, AutoAwareDatetime, SmartTimedelta, yaml_dump, ) -from raps.system_config import SystemConfig, get_partition_configs, get_system_config +from raps.system_config import SystemConfig, get_partition_configs, get_system_config, list_systems from pydantic import model_validator, Field Distribution = Literal['uniform', 'weibull', 'normal'] @@ -425,12 +425,15 @@ class SimConfig(RAPSBaseModel, abc.ABC): class SingleSimConfig(SimConfig, abc.ABC): - system: SystemConfig | str = "frontier" - """ - Name of the system to simulate, e.g "frontier". Can also be a path to a yaml file containing - the SystemConfig. You can also make modificiations to the SystemConfig on the CLI using - `--system.base`, e.g. `--system.base frontier --system.cooling.fmu-path path/to/my.fmu` - """ + # Dynamic help string + system: A[SystemConfig | str, Field(description=f""" + Name of the system to simulate or a path to a yaml file containing the SystemConfig. + + You can also make modifications to the SystemConfig on the CLI using `--system.base`, e.g + `--system.base frontier --system.cooling.fmu-path path/to/my.fmu`. + + Built-in systems: {', '.join(list_systems())} + """)] = "frontier" @property def system_name(self) -> str: -- GitLab From 4c6b575172573d9e9cc97a979b35fac5b07881e6 Mon Sep 17 00:00:00 2001 From: Jesse Hines Date: Tue, 23 Sep 2025 15:17:39 -0400 Subject: [PATCH 10/17] Add get_gauge_limits back This was removed in 19f0189 when the scheduler was refactored. Add it back as an Engine method. --- raps/engine.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/raps/engine.py b/raps/engine.py index 7075fef..7359f9b 100644 --- a/raps/engine.py +++ b/raps/engine.py @@ -869,3 +869,15 @@ class Engine: 'scheduler': engine_stats.get_scheduler_stats(self), 'network': engine_stats.get_network_stats(self) if self.simulate_network else {}, } + + def get_gauge_limits(self): + """For setting max values in dashboard gauges""" + peak_flops = self.flops_manager.get_rpeak() + peak_power = self.power_manager.get_peak_power() + gflops_per_watt_max = peak_flops / 1E9 / peak_power + + return { + 'peak_flops': peak_flops, + 'peak_power': peak_power, + 'g_flops_w_peak': gflops_per_watt_max + } -- GitLab From dea2243ef7be0209e8b5aa9df26e9751b0101180 Mon Sep 17 00:00:00 2001 From: Jesse Hines Date: Wed, 24 Sep 2025 09:48:21 -0400 Subject: [PATCH 11/17] Resolve paths in sim_config yaml relative to yaml --- raps/run_sim.py | 8 ++++---- raps/train_rl.py | 4 ++-- raps/utils.py | 46 +++++++++++++++++++++++++++++++++++----------- 3 files changed, 41 insertions(+), 17 deletions(-) diff --git a/raps/run_sim.py b/raps/run_sim.py index 51bf6f5..aa2d9d9 100644 --- a/raps/run_sim.py +++ b/raps/run_sim.py @@ -11,7 +11,7 @@ from raps.ui import LayoutManager from raps.plotting import Plotter from raps.engine import Engine from raps.multi_part_engine import MultiPartEngine -from raps.utils import write_dict_to_file, pydantic_add_args, SubParsers, read_yaml +from raps.utils import write_dict_to_file, pydantic_add_args, SubParsers, read_yaml_parsed from raps.stats import ( get_engine_stats, get_job_stats, @@ -38,7 +38,7 @@ def run_sim_add_parser(subparsers: SubParsers): "cli_shortcuts": SIM_SHORTCUTS, }) parser.set_defaults( - impl=lambda args: run_sim(model_validate(args, read_yaml(args.config_file))) + impl=lambda args: run_sim(model_validate(args, read_yaml_parsed(SingleSimConfig, args.config_file))) ) @@ -209,7 +209,7 @@ def run_parts_sim_add_parser(subparsers: SubParsers): "cli_shortcuts": SIM_SHORTCUTS, }) parser.set_defaults( - impl=lambda args: run_parts_sim(model_validate(args, read_yaml(args.config_file))) + impl=lambda args: run_parts_sim(model_validate(args, read_yaml_parsed(MultiPartSimConfig, args.config_file))) ) @@ -293,7 +293,7 @@ def show_add_parser(subparsers: SubParsers): }) def impl(args): - sim_config = model_validate(args, read_yaml(args.config_file)) + sim_config = model_validate(args, read_yaml_parsed(SingleSimConfig, args.config_file)) show(sim_config, show_defaults=args.show_defaults) parser.set_defaults(impl=impl) diff --git a/raps/train_rl.py b/raps/train_rl.py index eac4172..d6ddd42 100644 --- a/raps/train_rl.py +++ b/raps/train_rl.py @@ -1,5 +1,5 @@ from raps.sim_config import SingleSimConfig, SIM_SHORTCUTS -from raps.utils import SubParsers, pydantic_add_args, read_yaml +from raps.utils import SubParsers, pydantic_add_args, read_yaml_parsed def train_rl_add_parser(subparsers: SubParsers): @@ -16,7 +16,7 @@ def train_rl_add_parser(subparsers: SubParsers): }) def impl(args): - model = model_validate(args, read_yaml(args.config_file)) + model = model_validate(args, read_yaml_parsed(SingleSimConfig, args.config_file)) model.scheduler = "rl" train_rl(model) parser.set_defaults(impl=impl) diff --git a/raps/utils.py b/raps/utils.py index 0eec0ea..8f84188 100644 --- a/raps/utils.py +++ b/raps/utils.py @@ -20,12 +20,12 @@ import uuid import json import argparse from pathlib import Path -from typing import Annotated as A, TypeVar, Callable, TypeAlias +from typing import Annotated as A, TypeVar, TypeAlias, Protocol from pydantic import ( BaseModel, TypeAdapter, AfterValidator, BeforeValidator, ConfigDict, AwareDatetime, ValidationError, ValidationInfo, ) -from pydantic_settings import BaseSettings, SettingsConfigDict, CliApp, CliSettingsSource +from pydantic_settings import BaseSettings, SettingsConfigDict, CliApp, CliSettingsSource, SettingsError import yaml from raps.job import Job @@ -714,13 +714,18 @@ class RAPSBaseModel(BaseModel): ) -T = TypeVar("T", bound=BaseModel) +T = TypeVar("T", bound=BaseModel, covariant=True) + + +class ModelArgsValidator(Protocol[T]): + def __call__(self, args: argparse.Namespace, init_data: dict | None = None) -> T: + ... def pydantic_add_args( parser: argparse.ArgumentParser, model_cls: type[T], model_config: SettingsConfigDict | None = None, -) -> Callable[[argparse.Namespace, dict | None], T]: +) -> ModelArgsValidator[T]: """ Add arguments to the parser from the model. Returns a function that can be used to parse the model from the argparse args. @@ -749,20 +754,20 @@ def pydantic_add_args( cli_settings_source = CliSettingsSource(SettingsModel, root_parser=parser) - def model_validate_args(args: argparse.Namespace, data: dict | None = None): + def model_args_validator(args: argparse.Namespace, init_data: dict | None = None): try: model = CliApp.run(SettingsModel, cli_args=args, cli_settings_source=cli_settings_source, - **(data or {}), + **(init_data or {}), ) # Recreate model so we don't return the SettingsModel subclass # use exclude_unset so that model_field_set is preserved as well return model_cls.model_validate(model.model_dump(exclude_unset=True)) - except ValidationError as err: + except (ValidationError, SettingsError) as err: print(err) sys.exit(1) - return model_validate_args + return model_args_validator SubParsers: TypeAlias = "argparse._SubParsersAction[argparse.ArgumentParser]" @@ -798,7 +803,7 @@ def yaml_dump(data, header_comment=''): ) -def read_yaml(config_file: str): +def read_yaml(config_file: str | None) -> dict: """ Parses yaml file. Pass "-" to read from stdin """ # Assume stdin if not terminal if config_file == "-" or (not config_file and not sys.stdin.isatty()): @@ -808,9 +813,28 @@ def read_yaml(config_file: str): else: data = "" if data.strip(): - return yaml.safe_load(data) + result = yaml.safe_load(data) else: - return {} + result = {} + if not isinstance(result, dict): + result = {} + return result + + +def read_yaml_parsed(cls: type[T], config_file = None) -> dict: + """ Like read_yaml, but parses the input to resolve paths etc. """ + yaml_data = read_yaml(config_file) + if yaml_data: + # Resolve paths in yaml relative to yaml + base_path = Path(config_file).parent if config_file and config_file != "-" else None + try: + model = cls.model_validate(yaml_data, context={"base_path": base_path}) + except ValidationError as err: + print(f'Failed to parse yaml "{config_file}"') + print(err) + sys.exit(1) + yaml_data = model.model_dump(mode='json', exclude_unset=True) + return yaml_data def is_yaml_file(path: str | Path): -- GitLab From c0115d47a839f28510f7baafe25c1ec14e7762ef Mon Sep 17 00:00:00 2001 From: Jesse Hines Date: Thu, 25 Sep 2025 10:06:58 -0400 Subject: [PATCH 12/17] Improvements to error handling --- raps/utils.py | 28 ++++++++++++++++------------ 1 file changed, 16 insertions(+), 12 deletions(-) diff --git a/raps/utils.py b/raps/utils.py index 8f84188..43f6e8b 100644 --- a/raps/utils.py +++ b/raps/utils.py @@ -27,6 +27,7 @@ from pydantic import ( ) from pydantic_settings import BaseSettings, SettingsConfigDict, CliApp, CliSettingsSource, SettingsError import yaml +from yaml import YAMLError from raps.job import Job @@ -817,23 +818,26 @@ def read_yaml(config_file: str | None) -> dict: else: result = {} if not isinstance(result, dict): - result = {} + raise ValueError("Expected yaml document to contain a top-level mapping") return result def read_yaml_parsed(cls: type[T], config_file = None) -> dict: - """ Like read_yaml, but parses the input to resolve paths etc. """ - yaml_data = read_yaml(config_file) - if yaml_data: - # Resolve paths in yaml relative to yaml - base_path = Path(config_file).parent if config_file and config_file != "-" else None - try: + """ + Like read_yaml, but parses the input to resolve paths etc. + Exits on error after printing message (for use in the CLI) + """ + try: + yaml_data = read_yaml(config_file) + if yaml_data: + # Resolve paths in yaml relative to the yaml file + base_path = Path(config_file).parent if config_file and config_file != "-" else None model = cls.model_validate(yaml_data, context={"base_path": base_path}) - except ValidationError as err: - print(f'Failed to parse yaml "{config_file}"') - print(err) - sys.exit(1) - yaml_data = model.model_dump(mode='json', exclude_unset=True) + yaml_data = model.model_dump(mode='json', exclude_unset=True) + except (ValidationError, ValueError, YAMLError) as err: + print(f'Failed to parse yaml "{config_file}"') + print(err) + sys.exit(1) return yaml_data -- GitLab From dbfd8bfbe00ae00da7bd80eff0ef5b799e004686 Mon Sep 17 00:00:00 2001 From: Jesse Hines Date: Thu, 25 Sep 2025 10:15:29 -0400 Subject: [PATCH 13/17] More fixes to path resolution --- raps/system_config.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/raps/system_config.py b/raps/system_config.py index c1e795d..f35b2cb 100644 --- a/raps/system_config.py +++ b/raps/system_config.py @@ -182,10 +182,9 @@ class SystemConfig(RAPSBaseModel): def _load_base(cls, data, info: ValidationInfo): if isinstance(data, dict) and data.get("base"): if is_yaml_file(data['base']): - base = get_system_config(str(resolve_path(data['base'], info))) - else: - base = get_system_config(data['base']) - base_data = base.model_dump(mode='json', exclude_unset=True) + data['base'] = str(resolve_path(data['base'], info)) + base_model = get_system_config(data['base']) + base_data = base_model.model_dump(mode='json', exclude_unset=True) data = deep_merge(base_data, data) return data -- GitLab From 7379bbdde4e774bce9e46cbe540db69a018761e3 Mon Sep 17 00:00:00 2001 From: Jesse Hines Date: Thu, 25 Sep 2025 16:47:12 -0400 Subject: [PATCH 14/17] Validate system reference immediately --- raps/sim_config.py | 30 +++++++++++++++++++++++------- raps/system_config.py | 19 ++++++++++++++----- raps/utils.py | 7 ++++--- 3 files changed, 41 insertions(+), 15 deletions(-) diff --git a/raps/sim_config.py b/raps/sim_config.py index 48103f0..2f9bc44 100644 --- a/raps/sim_config.py +++ b/raps/sim_config.py @@ -12,7 +12,9 @@ from raps.utils import ( parse_time_unit, convert_to_time_unit, infer_time_unit, ResolvedPath, create_casename, RAPSBaseModel, AutoAwareDatetime, SmartTimedelta, yaml_dump, ) -from raps.system_config import SystemConfig, get_partition_configs, get_system_config, list_systems +from raps.system_config import ( + SystemConfig, get_partition_configs, get_system_config, list_systems, resolve_system_reference, +) from pydantic import model_validator, Field Distribution = Literal['uniform', 'weibull', 'normal'] @@ -435,13 +437,22 @@ class SingleSimConfig(SimConfig, abc.ABC): Built-in systems: {', '.join(list_systems())} """)] = "frontier" + @model_validator(mode="after") + def _validate_system(self, info): + self.system = resolve_system_reference(self.system, info) + try: + self._system_configs = [get_system_config(self.system)] + except FileNotFoundError as e: + raise ValueError(str(e)) + return self + @property def system_name(self) -> str: return self.system_configs[0].system_name - @cached_property + @property def system_configs(self) -> list[SystemConfig]: - return [get_system_config(self.system)] + return self._system_configs class MultiPartSimConfig(SimConfig): @@ -451,6 +462,15 @@ class MultiPartSimConfig(SimConfig): to custom SystemConfig yaml files. """ + @model_validator(mode="after") + def _validate_partitions(self, info): + self.partitions = [resolve_system_reference(p, info) for p in self.partitions] + try: + self._multi_partition_system_config = get_partition_configs(self.partitions) + except FileNotFoundError as e: + raise ValueError(str(e)) + return self + @property def system_name(self) -> str: return self._multi_partition_system_config.system_name @@ -459,10 +479,6 @@ class MultiPartSimConfig(SimConfig): def system_configs(self) -> list[SystemConfig]: return self._multi_partition_system_config.partitions - @cached_property - def _multi_partition_system_config(self): - return get_partition_configs(self.partitions) - SIM_SHORTCUTS = { "partitions": "x", diff --git a/raps/system_config.py b/raps/system_config.py index f35b2cb..5253e0b 100644 --- a/raps/system_config.py +++ b/raps/system_config.py @@ -9,7 +9,9 @@ from pydantic import ( model_validator, field_validator, model_serializer, SerializationInfo, SerializerFunctionWrapHandler, ValidationInfo, ) -from raps.utils import RAPSBaseModel, deep_merge, deep_subtract_dicts, is_yaml_file, ResolvedPath, resolve_path +from raps.utils import ( + RAPSBaseModel, deep_merge, deep_subtract_dicts, is_yaml_file, ResolvedPath, validate_resolved_path, +) from raps.raps_config import raps_config # Define Pydantic models for the config to handle parsing and validation @@ -181,8 +183,7 @@ class SystemConfig(RAPSBaseModel): @model_validator(mode="before") def _load_base(cls, data, info: ValidationInfo): if isinstance(data, dict) and data.get("base"): - if is_yaml_file(data['base']): - data['base'] = str(resolve_path(data['base'], info)) + data['base'] = resolve_system_reference(data['base'], info) base_model = get_system_config(data['base']) base_data = base_model.model_dump(mode='json', exclude_unset=True) data = deep_merge(base_data, data) @@ -267,7 +268,7 @@ def get_system_config(system: str | SystemConfig) -> SystemConfig: if isinstance(system, SystemConfig): # Just pass system through if its already parsed return system elif is_yaml_file(system): - config_path = Path(system).expanduser().resolve() + config_path = Path(system) system_name = config_path.stem else: config_path = raps_config.system_config_dir / f"{system}.yaml" @@ -304,7 +305,7 @@ def get_partition_configs(partitions: list[str | SystemConfig]) -> MultiPartitio matched_systems = fnmatch.filter(systems, pat) combined_system_name.extend(s.split("/")[0] for s in matched_systems) elif Path(pat).is_dir(): - matched_systems = sorted(Path(pat).glob("*.yaml")) + matched_systems = sorted([str(s) for s in Path(pat).glob("*.yaml")]) combined_system_name.append(Path(pat).name) else: matched_systems = sorted(glob.glob(pat)) @@ -322,3 +323,11 @@ def get_partition_configs(partitions: list[str | SystemConfig]) -> MultiPartitio system_name=combined_system_name, partitions=parsed_configs, ) + + +def resolve_system_reference(system: str | SystemConfig, info: ValidationInfo): + """ If system is a yaml path, resolve it as a path. Otherwise leave it as a string """ + if isinstance(system, str) and is_yaml_file(system): + return str(validate_resolved_path(system, info)) + else: + return system diff --git a/raps/utils.py b/raps/utils.py index 43f6e8b..f8eea90 100644 --- a/raps/utils.py +++ b/raps/utils.py @@ -685,22 +685,23 @@ def normalize_tz(d: datetime): return d.astimezone(timezone.utc) -def resolve_path(path: str | Path, info: ValidationInfo): +def validate_resolved_path(path: str | Path, info: ValidationInfo): path = Path(path).expanduser() if isinstance(info.context, dict) and info.context.get("base_path"): - base_path = Path(info.context["base_path"]).expanduser().resolve() + base_path = Path(info.context["base_path"]).expanduser() else: base_path = Path.cwd() return (base_path / path).resolve() -ResolvedPath = A[Path, AfterValidator(resolve_path)] +ResolvedPath = A[Path, AfterValidator(validate_resolved_path)] """ Resolve a path, and expand ~ in the path string. Paths can be resolved relative to specific path instead of cwd by passing `context={"base_path": "my/path"}` in model_validate(). """ + AutoAwareDatetime = A[datetime, AfterValidator(normalize_tz)] """ Datetime type wrapper, makes sure timezone is set """ -- GitLab From 0c3fce718ae53b9da75a612326713ca243eca10c Mon Sep 17 00:00:00 2001 From: Jesse Hines Date: Thu, 25 Sep 2025 17:08:16 -0400 Subject: [PATCH 15/17] Add check for paths on server --- raps/utils.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/raps/utils.py b/raps/utils.py index f8eea90..de565d4 100644 --- a/raps/utils.py +++ b/raps/utils.py @@ -686,13 +686,18 @@ def normalize_tz(d: datetime): def validate_resolved_path(path: str | Path, info: ValidationInfo): + context = info.context or {} path = Path(path).expanduser() - if isinstance(info.context, dict) and info.context.get("base_path"): - base_path = Path(info.context["base_path"]).expanduser() + if context.get('base_path'): + base_path = Path(context["base_path"]).expanduser().resolve() else: base_path = Path.cwd() - return (base_path / path).resolve() - + path = (base_path / path).resolve() + # This is used on the simulation server to block reading arbitrary files + if context.get("force_under_base_path"): + if not path.is_relative_to(base_path): + raise ValueError(f"{path} is not under {base_path}") + return path ResolvedPath = A[Path, AfterValidator(validate_resolved_path)] """ -- GitLab From 1a53642237d1a8ce43391ab1778b3880d27cb4c9 Mon Sep 17 00:00:00 2001 From: Jesse Hines Date: Tue, 30 Sep 2025 15:15:42 -0400 Subject: [PATCH 16/17] Move get_stats --- raps/engine.py | 8 -------- raps/stats.py | 19 ++++++++++++++----- tests/util.py | 3 ++- 3 files changed, 16 insertions(+), 14 deletions(-) diff --git a/raps/engine.py b/raps/engine.py index 7359f9b..d0981cc 100644 --- a/raps/engine.py +++ b/raps/engine.py @@ -40,7 +40,6 @@ from raps.account import Accounts from raps.downtime import Downtime from raps.weather import Weather from raps.sim_config import SimConfig -import raps.stats as engine_stats from bisect import bisect_right @@ -862,13 +861,6 @@ class Engine: # engine self.sys_power = total_power_kw - def get_stats(self): - return { - 'engine': engine_stats.get_engine_stats(self), - 'job': engine_stats.get_job_stats(self), - 'scheduler': engine_stats.get_scheduler_stats(self), - 'network': engine_stats.get_network_stats(self) if self.simulate_network else {}, - } def get_gauge_limits(self): """For setting max values in dashboard gauges""" diff --git a/raps/stats.py b/raps/stats.py index 832ad35..a45140c 100644 --- a/raps/stats.py +++ b/raps/stats.py @@ -10,10 +10,10 @@ try to keep statistics consolidated in this file. import sys from .utils import sum_values, min_value, max_value, convert_seconds_to_hhmmss -from . import engine +from .engine import Engine -def get_engine_stats(engine: "engine.Engine", *, fast = False): +def get_engine_stats(engine: Engine, *, fast = False): """ Return engine statistics Setting `fast = False` excludes some stats that are more expensive to calculate. @@ -102,7 +102,7 @@ def min_max_sum(value, min, max, sum): return min, max, sum -def get_scheduler_stats(engine: "engine.Engine"): +def get_scheduler_stats(engine: Engine): if len(engine.scheduler_queue_history) != 0: average_queue = sum(engine.scheduler_queue_history) / len(engine.scheduler_queue_history) else: @@ -119,7 +119,7 @@ def get_scheduler_stats(engine: "engine.Engine"): return stats -def get_network_stats(engine: "engine.Engine"): +def get_network_stats(engine: Engine): stats = {} if engine.net_util_history: @@ -144,7 +144,7 @@ def get_network_stats(engine: "engine.Engine"): return stats -def get_job_stats(engine: "engine.Engine"): +def get_job_stats(engine: Engine): """ Return job statistics processed over the engine execution""" # Information on Job-Mix min_job_size, max_job_size, sum_job_size = sys.maxsize, -sys.maxsize - 1, 0 @@ -361,6 +361,15 @@ def get_job_stats(engine: "engine.Engine"): return job_stats +def get_stats(engine: Engine, *, fast = False): + return { + 'engine': get_engine_stats(engine, fast = fast), + 'job': get_job_stats(engine), + 'scheduler': get_scheduler_stats(engine), + 'network': get_network_stats(engine) if engine.simulate_network else {}, + } + + def print_formatted_report(engine_stats=None, job_stats=None, scheduler_stats=None, diff --git a/tests/util.py b/tests/util.py index 7481564..20c9054 100644 --- a/tests/util.py +++ b/tests/util.py @@ -4,6 +4,7 @@ from pathlib import Path import shlex import json from raps.engine import Engine +from raps.stats import get_stats from raps.multi_part_engine import MultiPartEngine from raps.sim_config import SingleSimConfig, MultiPartSimConfig @@ -61,7 +62,7 @@ def run_engine(sim_config, include_ticks=False) -> tuple[Engine, dict[str, Any]] if include_ticks: stats['tick_datas'].append(tick) - stats.update(engine.get_stats()) + stats.update(get_stats(engine)) return engine, stats -- GitLab From 33b4c133daa3dc01b7ad31d294d29d795bb81e1a Mon Sep 17 00:00:00 2001 From: Jesse Hines Date: Tue, 30 Sep 2025 15:20:24 -0400 Subject: [PATCH 17/17] Move get_guage_limits --- raps/engine.py | 13 ------------- raps/stats.py | 13 +++++++++++++ 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/raps/engine.py b/raps/engine.py index d0981cc..67cd999 100644 --- a/raps/engine.py +++ b/raps/engine.py @@ -860,16 +860,3 @@ class Engine: self.power_manager.loss_history.append((self.current_timestep, total_loss_kw)) # engine self.sys_power = total_power_kw - - - def get_gauge_limits(self): - """For setting max values in dashboard gauges""" - peak_flops = self.flops_manager.get_rpeak() - peak_power = self.power_manager.get_peak_power() - gflops_per_watt_max = peak_flops / 1E9 / peak_power - - return { - 'peak_flops': peak_flops, - 'peak_power': peak_power, - 'g_flops_w_peak': gflops_per_watt_max - } diff --git a/raps/stats.py b/raps/stats.py index a45140c..924a696 100644 --- a/raps/stats.py +++ b/raps/stats.py @@ -416,3 +416,16 @@ def print_formatted_report(engine_stats=None, "avg_per_job_slowdown": "{:.2f}x", "max_per_job_slowdown": "{:.2f}x", }) + + +def get_gauge_limits(engine: Engine): + """For setting max values in dashboard gauges""" + peak_flops = engine.flops_manager.get_rpeak() + peak_power = engine.power_manager.get_peak_power() + gflops_per_watt_max = peak_flops / 1E9 / peak_power + + return { + 'peak_flops': peak_flops, + 'peak_power': peak_power, + 'g_flops_w_peak': gflops_per_watt_max + } -- GitLab