diff --git a/.gitignore b/.gitignore index 5f7f2b5f827601e2c9e09c22b38c1210bd9b201c..ed10fabfb9ebac419c6c83f1721ac53bf7e81a9f 100644 --- a/.gitignore +++ b/.gitignore @@ -5,5 +5,5 @@ venv *.npz *.prof simulation_results/ -models/*.fmu +models/fmu-models .shell-completion-cache diff --git a/Makefile b/Makefile index a2f4211d4f9ed39068f925e2cced5884c0d4b749..d66f02c7000b5a3640a60bb4491df6d84600727b 100644 --- a/Makefile +++ b/Makefile @@ -1,4 +1,6 @@ #Makefile +SHELL := /bin/bash + .PHONY: pip run docker_build docker_run all: pip @@ -19,3 +21,9 @@ docker_build: docker_run: docker run --platform linux/amd64 -it $(IMAGE_NAME) +fetch-fmu-models: + if [ ! -d ./models/fmu-models ]; then \ + git clone git@code.ornl.gov:exadigit/fmu-models.git ./models/fmu-models; \ + else \ + git -C ./models/fmu-models pull; \ + fi diff --git a/config/adastraMI250.yaml b/config/adastraMI250.yaml index c7b95b850f447e22b11f649ca3f51a57faa38381..88f68e9bca9b30ed2311321fcaf9cc2009589d00 100644 --- a/config/adastraMI250.yaml +++ b/config/adastraMI250.yaml @@ -90,7 +90,7 @@ cooling: wet_bulb_temp: 290.0 zip_code: '37831' country_code: US - fmu_path: "models/Simulator_olcf5_base.fmu" + fmu_path: "../models/fmu-models/Frontier/Simulator_olcf5_base.fmu" fmu_column_mapping: T_sec_r_C: "Rack Return Temperature (°C)" T_sec_s_C: "Rack Supply Temperature (°C)" diff --git a/config/frontier.yaml b/config/frontier.yaml index 3102f31ce8cd2cfe65a697f5ea33da79d5eb8cee..84891c1179ccb90e8032bf749717eefe1f4c63ef 100644 --- a/config/frontier.yaml +++ b/config/frontier.yaml @@ -63,7 +63,7 @@ cooling: wet_bulb_temp: 290.0 zip_code: '37831' country_code: US - fmu_path: "models/Simulator_olcf5_base.fmu" + fmu_path: "../models/fmu-models/Frontier/Simulator_olcf5_base.fmu" fmu_column_mapping: T_sec_r_C: "Rack Return Temperature (°C)" T_sec_s_C: "Rack Supply Temperature (°C)" diff --git a/config/lassen.yaml b/config/lassen.yaml index 640c55e2704c7362aaf29bdbc46351c3d1c44370..594479d221d08f5606574682001138aa99281de9 100644 --- a/config/lassen.yaml +++ b/config/lassen.yaml @@ -56,7 +56,7 @@ cooling: wet_bulb_temp: 290.0 zip_code: '94550' country_code: US - fmu_path: "models/POWER9CSM/fmus/lassen.fmu" + fmu_path: "../models/POWER9CSM/fmus/lassen.fmu" fmu_column_mapping: T_sec_r_C: "Rack Return Temperature (°C)" T_sec_s_C: "Rack Supply Temperature (°C)" diff --git a/config/marconi100.yaml b/config/marconi100.yaml index 797153e108f39fd4ed343fbec892d3f09e4fea24..0e66a7e8c20ba117ea4f566aed60a172c43a4310 100644 --- a/config/marconi100.yaml +++ b/config/marconi100.yaml @@ -52,7 +52,7 @@ cooling: wet_bulb_temp: 290.0 zip_code: '30170' country_code: IT - fmu_path: "models/POWER9CSM/fmus/marconi100.fmu" + fmu_path: "../models/POWER9CSM/fmus/marconi100.fmu" fmu_column_mapping: T_sec_r_C: "Rack Return Temperature (°C)" T_sec_s_C: "Rack Supply Temperature (°C)" diff --git a/config/summit.yaml b/config/summit.yaml index 8dc6fe318d11dff7549b1e7317e60647da8a52f4..7b2b5fe301692ff48082c82dbe92de9b1ab0d444 100644 --- a/config/summit.yaml +++ b/config/summit.yaml @@ -52,7 +52,7 @@ cooling: wet_bulb_temp: 290.0 zip_code: '37831' country_code: US - fmu_path: "models/POWER9CSM/fmus/summit.fmu" + fmu_path: "../models/POWER9CSM/fmus/summit.fmu" fmu_column_mapping: T_sec_r_C: "Rack Return Temperature (°C)" T_sec_s_C: "Rack Supply Temperature (°C)" diff --git a/main.py b/main.py index ab464b058f3b616c9bfe37a553f350883148eeda..18ecd9a50688ec12705315c3ad6365a688727ada 100755 --- a/main.py +++ b/main.py @@ -16,9 +16,9 @@ import argcomplete # Importing all of raps' dependencies like pandas etc can be rather slow, often taking 1-2 seconds. So for snappy shell # completion we need avoid imports on the shell completion path. We could do this by shuffling the code around to # create the parser without importing any heavy-weight libraries. But that would be a pain to maintain and track that -# pandas or scipy aren't accidentally imported transitively. Pandas can also be convenient to use in validating SimConfig -# etc, which is needed to build the argparser. So instead, we cache the generated argparser object so that shell -# completion can run without importing the rest of raps. +# pandas or scipy aren't accidentally imported transitively. Pandas can also be convenient to use in validating +# SimConfig etc, which is needed to build the argparser. So instead, we cache the generated argparser object so that +# shell completion can run without importing the rest of raps. PARSER_CACHE = Path(__file__).parent / '.shell-completion-cache' diff --git a/raps/__init__.py b/raps/__init__.py index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..a7f352320df24d3559a8d5556877faa40e132c5d 100644 --- a/raps/__init__.py +++ b/raps/__init__.py @@ -0,0 +1,16 @@ +from .sim_config import SimConfig, SingleSimConfig, MultiPartSimConfig +from .system_config import ( + SystemConfig, SystemCoolingConfig, SystemNetworkConfig, SystemPowerConfig, SystemSchedulerConfig, + SystemSystemConfig, SystemUqConfig, +) +from raps.schedulers.default import PolicyType, BackfillType +from .engine import Engine +from .multi_part_engine import MultiPartEngine + +__all__ = [ + "SimConfig", "SingleSimConfig", "MultiPartSimConfig", + "SystemConfig", "SystemCoolingConfig", "SystemNetworkConfig", "SystemPowerConfig", "SystemSchedulerConfig", + "SystemSystemConfig", "SystemUqConfig", + "PolicyType", "BackfillType", + "Engine", "MultiPartEngine", +] diff --git a/raps/constants.py b/raps/constants.py index 85b5e232012a0ac3e269895bef5e380cd87a6d3e..53711e1c0bfc38df15662219864c81d7974b9fef 100644 --- a/raps/constants.py +++ b/raps/constants.py @@ -2,7 +2,6 @@ RAPS Constants """ from pathlib import Path -from datetime import datetime ELLIPSES = '\u2026' OUTPUT_PATH = Path('simulation_results') diff --git a/raps/raps_config.py b/raps/raps_config.py index 6eddca80a7507f3fdc782fedee4d522d5d582319..d1e1385cf72bc99e5badda31353fc35b9e45d678 100644 --- a/raps/raps_config.py +++ b/raps/raps_config.py @@ -1,5 +1,5 @@ from pathlib import Path -from raps.utils import ExpandedPath +from raps.utils import ResolvedPath from pydantic_settings import BaseSettings, SettingsConfigDict, YamlConfigSettingsSource ROOT_DIR = Path(__file__).parent.parent @@ -13,7 +13,7 @@ class RapsConfig(BaseSettings): # We'll be using SimConfig in the simulation server and those settings aren't applicable there, # so it makes sense to keep SimConfig scoped to the logical operation of the sim. - system_config_dir: ExpandedPath = ROOT_DIR / 'config' + system_config_dir: ResolvedPath = ROOT_DIR / 'config' """ Directory containing system configuration files """ model_config = SettingsConfigDict( diff --git a/raps/run_sim.py b/raps/run_sim.py index 51bf6f5e6ce01b0e41744cae2f224f7ba5799d0a..aa2d9d951c491d383c90504abb2c78f648d1da44 100644 --- a/raps/run_sim.py +++ b/raps/run_sim.py @@ -11,7 +11,7 @@ from raps.ui import LayoutManager from raps.plotting import Plotter from raps.engine import Engine from raps.multi_part_engine import MultiPartEngine -from raps.utils import write_dict_to_file, pydantic_add_args, SubParsers, read_yaml +from raps.utils import write_dict_to_file, pydantic_add_args, SubParsers, read_yaml_parsed from raps.stats import ( get_engine_stats, get_job_stats, @@ -38,7 +38,7 @@ def run_sim_add_parser(subparsers: SubParsers): "cli_shortcuts": SIM_SHORTCUTS, }) parser.set_defaults( - impl=lambda args: run_sim(model_validate(args, read_yaml(args.config_file))) + impl=lambda args: run_sim(model_validate(args, read_yaml_parsed(SingleSimConfig, args.config_file))) ) @@ -209,7 +209,7 @@ def run_parts_sim_add_parser(subparsers: SubParsers): "cli_shortcuts": SIM_SHORTCUTS, }) parser.set_defaults( - impl=lambda args: run_parts_sim(model_validate(args, read_yaml(args.config_file))) + impl=lambda args: run_parts_sim(model_validate(args, read_yaml_parsed(MultiPartSimConfig, args.config_file))) ) @@ -293,7 +293,7 @@ def show_add_parser(subparsers: SubParsers): }) def impl(args): - sim_config = model_validate(args, read_yaml(args.config_file)) + sim_config = model_validate(args, read_yaml_parsed(SingleSimConfig, args.config_file)) show(sim_config, show_defaults=args.show_defaults) parser.set_defaults(impl=impl) diff --git a/raps/sim_config.py b/raps/sim_config.py index c27a2aba21acf6ad592613254663a37e5c44b7f9..2f9bc4480b04f570822c3f9d666e6948c9ffd3c5 100644 --- a/raps/sim_config.py +++ b/raps/sim_config.py @@ -1,6 +1,7 @@ import argparse import abc from pathlib import Path +import pandas as pd from functools import cached_property from datetime import timedelta from typing import Literal, Annotated as A @@ -8,10 +9,12 @@ from annotated_types import Len import importlib from raps.schedulers.default import PolicyType, BackfillType from raps.utils import ( - parse_time_unit, convert_to_time_unit, infer_time_unit, ExpandedPath, create_casename, + parse_time_unit, convert_to_time_unit, infer_time_unit, ResolvedPath, create_casename, RAPSBaseModel, AutoAwareDatetime, SmartTimedelta, yaml_dump, ) -from raps.system_config import SystemConfig, get_partition_configs, get_system_config +from raps.system_config import ( + SystemConfig, get_partition_configs, get_system_config, list_systems, resolve_system_reference, +) from pydantic import model_validator, Field Distribution = Literal['uniform', 'weibull', 'normal'] @@ -79,7 +82,7 @@ class SimConfig(RAPSBaseModel, abc.ABC): seed: int | None = None """ Set RNG seed for deterministic simulation """ - output: ExpandedPath | Literal['none'] | None = None + output: ResolvedPath | Literal['none'] | None = None """ Where to output power, cooling, and loss models for later analysis. If omitted it will output to raps-output- by default. @@ -112,7 +115,7 @@ class SimConfig(RAPSBaseModel, abc.ABC): imtype: Literal["png", "svg", "jpg", "pdf", "eps"] = "png" """ Plot image type """ - replay: list[ExpandedPath] | None = None + replay: list[ResolvedPath] | None = None """ Either: path/to/joblive path/to/jobprofile OR filename.npz """ encrypt: bool = False @@ -214,7 +217,7 @@ class SimConfig(RAPSBaseModel, abc.ABC): # Accounts accounts: bool = False - accounts_json: ExpandedPath | None = None + accounts_json: ResolvedPath | None = None """ Path to accounts JSON file from previous run """ # Downtime @@ -283,6 +286,11 @@ class SimConfig(RAPSBaseModel, abc.ABC): if self.start and self.fastforward: raise ValueError("start and fastforward are mutually exclusive") + if self.start: + self.start = pd.Timestamp(self.start).floor(self.time_unit).to_pydatetime() + if self.end: + self.end = pd.Timestamp(self.end).floor(self.time_unit).to_pydatetime() + if self.end: if not self.start: raise ValueError("end requires start to be set") @@ -419,20 +427,32 @@ class SimConfig(RAPSBaseModel, abc.ABC): class SingleSimConfig(SimConfig, abc.ABC): - system: SystemConfig | str = "frontier" - """ - Name of the system to simulate, e.g "frontier". Can also be a path to a yaml file containing - the SystemConfig. You can also make modificiations to the SystemConfig on the CLI using - `--system.base`, e.g. `--system.base frontier --system.cooling.fmu-path path/to/my.fmu` - """ + # Dynamic help string + system: A[SystemConfig | str, Field(description=f""" + Name of the system to simulate or a path to a yaml file containing the SystemConfig. + + You can also make modifications to the SystemConfig on the CLI using `--system.base`, e.g + `--system.base frontier --system.cooling.fmu-path path/to/my.fmu`. + + Built-in systems: {', '.join(list_systems())} + """)] = "frontier" + + @model_validator(mode="after") + def _validate_system(self, info): + self.system = resolve_system_reference(self.system, info) + try: + self._system_configs = [get_system_config(self.system)] + except FileNotFoundError as e: + raise ValueError(str(e)) + return self @property def system_name(self) -> str: return self.system_configs[0].system_name - @cached_property + @property def system_configs(self) -> list[SystemConfig]: - return [get_system_config(self.system)] + return self._system_configs class MultiPartSimConfig(SimConfig): @@ -442,6 +462,15 @@ class MultiPartSimConfig(SimConfig): to custom SystemConfig yaml files. """ + @model_validator(mode="after") + def _validate_partitions(self, info): + self.partitions = [resolve_system_reference(p, info) for p in self.partitions] + try: + self._multi_partition_system_config = get_partition_configs(self.partitions) + except FileNotFoundError as e: + raise ValueError(str(e)) + return self + @property def system_name(self) -> str: return self._multi_partition_system_config.system_name @@ -450,10 +479,6 @@ class MultiPartSimConfig(SimConfig): def system_configs(self) -> list[SystemConfig]: return self._multi_partition_system_config.partitions - @cached_property - def _multi_partition_system_config(self): - return get_partition_configs(self.partitions) - SIM_SHORTCUTS = { "partitions": "x", diff --git a/raps/stats.py b/raps/stats.py index a42015155d156ccb1f7164c4bc4cb370a91428ac..924a69685a74fb66b287d259c1be5602b491dc14 100644 --- a/raps/stats.py +++ b/raps/stats.py @@ -13,8 +13,11 @@ from .utils import sum_values, min_value, max_value, convert_seconds_to_hhmmss from .engine import Engine -def get_engine_stats(engine: Engine): - """ Return engine statistics """ +def get_engine_stats(engine: Engine, *, fast = False): + """ + Return engine statistics + Setting `fast = False` excludes some stats that are more expensive to calculate. + """ timesteps = engine.current_timestep - engine.timestep_start num_samples = len(engine.power_manager.history) if engine.power_manager else 0 time_simulated = convert_seconds_to_hhmmss(timesteps / engine.downscale) @@ -51,7 +54,7 @@ def get_engine_stats(engine: Engine): else: stats['jobs_completed_percentage'] = 0 - if engine.node_occupancy_history: + if not fast and engine.node_occupancy_history: # Calculate average concurrent jobs per node (average density across all nodes and timesteps) total_jobs_running_timesteps = 0 max_concurrent_jobs_per_node = 0 @@ -72,12 +75,12 @@ def get_engine_stats(engine: Engine): sum_jobs_per_active_node += sum(active_nodes_in_timestep) / len(active_nodes_in_timestep) count_active_timesteps_for_avg_active += 1 - # Average jobs per *active* node (user's desired "1" type) - avg_jobs_per_active_node = (sum_jobs_per_active_node / count_active_timesteps_for_avg_active) \ - if count_active_timesteps_for_avg_active > 0 else 0 + # Average jobs per *active* node (user's desired "1" type) + avg_jobs_per_active_node = (sum_jobs_per_active_node / count_active_timesteps_for_avg_active) \ + if count_active_timesteps_for_avg_active > 0 else 0 - stats['avg_concurrent_jobs_per_active_node'] = avg_jobs_per_active_node - stats['max_concurrent_jobs_per_node'] = max_concurrent_jobs_per_node + stats['avg_concurrent_jobs_per_active_node'] = avg_jobs_per_active_node + stats['max_concurrent_jobs_per_node'] = max_concurrent_jobs_per_node else: stats['avg_concurrent_jobs_per_node'] = None stats['max_concurrent_jobs_per_node'] = None @@ -358,6 +361,15 @@ def get_job_stats(engine: Engine): return job_stats +def get_stats(engine: Engine, *, fast = False): + return { + 'engine': get_engine_stats(engine, fast = fast), + 'job': get_job_stats(engine), + 'scheduler': get_scheduler_stats(engine), + 'network': get_network_stats(engine) if engine.simulate_network else {}, + } + + def print_formatted_report(engine_stats=None, job_stats=None, scheduler_stats=None, @@ -404,3 +416,16 @@ def print_formatted_report(engine_stats=None, "avg_per_job_slowdown": "{:.2f}x", "max_per_job_slowdown": "{:.2f}x", }) + + +def get_gauge_limits(engine: Engine): + """For setting max values in dashboard gauges""" + peak_flops = engine.flops_manager.get_rpeak() + peak_power = engine.power_manager.get_peak_power() + gflops_per_watt_max = peak_flops / 1E9 / peak_power + + return { + 'peak_flops': peak_flops, + 'peak_power': peak_power, + 'g_flops_w_peak': gflops_per_watt_max + } diff --git a/raps/system_config.py b/raps/system_config.py index bd405beab618a846adf7d1474ea985f003ee3424..5253e0b1e58d5697e7f3786fed3c3afa0ad0c5c7 100644 --- a/raps/system_config.py +++ b/raps/system_config.py @@ -7,9 +7,11 @@ from functools import cached_property import yaml from pydantic import ( model_validator, field_validator, model_serializer, SerializationInfo, - SerializerFunctionWrapHandler, + SerializerFunctionWrapHandler, ValidationInfo, +) +from raps.utils import ( + RAPSBaseModel, deep_merge, deep_subtract_dicts, is_yaml_file, ResolvedPath, validate_resolved_path, ) -from raps.utils import RAPSBaseModel, deep_merge, deep_subtract_dicts from raps.raps_config import raps_config # Define Pydantic models for the config to handle parsing and validation @@ -130,7 +132,7 @@ class SystemCoolingConfig(RAPSBaseModel): wet_bulb_temp: float zip_code: str | None = None country_code: str | None = None - fmu_path: str + fmu_path: ResolvedPath fmu_column_mapping: dict[str, str] w_htwps_key: str w_ctwps_key: str @@ -179,10 +181,12 @@ class SystemConfig(RAPSBaseModel): network: SystemNetworkConfig | None = None @model_validator(mode="before") - def _load_base(cls, data): + def _load_base(cls, data, info: ValidationInfo): if isinstance(data, dict) and data.get("base"): - base = get_system_config(data['base']) - data = deep_merge(base.model_dump(mode='json'), data) + data['base'] = resolve_system_reference(data['base'], info) + base_model = get_system_config(data['base']) + base_data = base_model.model_dump(mode='json', exclude_unset=True) + data = deep_merge(base_data, data) return data @model_serializer(mode='wrap') @@ -263,13 +267,12 @@ def get_system_config(system: str | SystemConfig) -> SystemConfig: """ if isinstance(system, SystemConfig): # Just pass system through if its already parsed return system - - if system in list_systems(): + elif is_yaml_file(system): + config_path = Path(system) + system_name = config_path.stem + else: config_path = raps_config.system_config_dir / f"{system}.yaml" system_name = system - else: - config_path = Path(system).resolve() - system_name = config_path.stem if not config_path.is_file(): raise FileNotFoundError(f'"{system}" not found. Valid systems are: {list_systems()}') @@ -277,10 +280,8 @@ def get_system_config(system: str | SystemConfig) -> SystemConfig: "system_name": system_name, # You can override system_name in the yaml as well **yaml.safe_load(config_path.read_text()), } - base = str(config.get('base', '')) - if base.endswith(".yaml"): - config['base'] = str(config_path.parent / base) # path relative to yaml - return SystemConfig.model_validate(config) + # Pass context so paths in the SystemConfig can be resolved relative to the yaml file + return SystemConfig.model_validate(config, context={'base_path': config_path.parent}) def get_partition_configs(partitions: list[str | SystemConfig]) -> MultiPartitionSystemConfig: @@ -304,7 +305,7 @@ def get_partition_configs(partitions: list[str | SystemConfig]) -> MultiPartitio matched_systems = fnmatch.filter(systems, pat) combined_system_name.extend(s.split("/")[0] for s in matched_systems) elif Path(pat).is_dir(): - matched_systems = sorted(Path(pat).glob("*.yaml")) + matched_systems = sorted([str(s) for s in Path(pat).glob("*.yaml")]) combined_system_name.append(Path(pat).name) else: matched_systems = sorted(glob.glob(pat)) @@ -322,3 +323,11 @@ def get_partition_configs(partitions: list[str | SystemConfig]) -> MultiPartitio system_name=combined_system_name, partitions=parsed_configs, ) + + +def resolve_system_reference(system: str | SystemConfig, info: ValidationInfo): + """ If system is a yaml path, resolve it as a path. Otherwise leave it as a string """ + if isinstance(system, str) and is_yaml_file(system): + return str(validate_resolved_path(system, info)) + else: + return system diff --git a/raps/telemetry.py b/raps/telemetry.py index 915fc975a7fc021ced2c95146b046d6566534d9e..b7f29b79e2c983b22e422c334a0d50dfeaba88ba 100644 --- a/raps/telemetry.py +++ b/raps/telemetry.py @@ -28,7 +28,7 @@ from raps.plotting import ( plot_network_histogram ) from raps.utils import ( - next_arrival_byconfargs, pydantic_add_args, SubParsers, ExpandedPath, WorkloadData, RAPSBaseModel, + next_arrival_byconfargs, pydantic_add_args, SubParsers, ResolvedPath, WorkloadData, RAPSBaseModel, ) @@ -36,7 +36,7 @@ from raps.utils import ( class TelemetryArgs(RAPSBaseModel): jid: str = '*' """ Replay job id """ - replay: list[ExpandedPath] | None = None + replay: list[ResolvedPath] | None = None """ path/to/joblive path/to/jobprofile -or- filename.npz (overrides --workload option) """ plot: list[Literal["jobs", "nodes"]] | None = None is_results_file: bool = False diff --git a/raps/train_rl.py b/raps/train_rl.py index eac41724610ddd021c4c1650cf22ffaf42b4287e..d6ddd429f8ee33f3cba2a8930097c6ac9eca2d37 100644 --- a/raps/train_rl.py +++ b/raps/train_rl.py @@ -1,5 +1,5 @@ from raps.sim_config import SingleSimConfig, SIM_SHORTCUTS -from raps.utils import SubParsers, pydantic_add_args, read_yaml +from raps.utils import SubParsers, pydantic_add_args, read_yaml_parsed def train_rl_add_parser(subparsers: SubParsers): @@ -16,7 +16,7 @@ def train_rl_add_parser(subparsers: SubParsers): }) def impl(args): - model = model_validate(args, read_yaml(args.config_file)) + model = model_validate(args, read_yaml_parsed(SingleSimConfig, args.config_file)) model.scheduler = "rl" train_rl(model) parser.set_defaults(impl=impl) diff --git a/raps/utils.py b/raps/utils.py index 4414fd9c3899087420a1baa5ecb371103d605fe3..de565d48d9999ea312e1899f0c5c6c505b1bab19 100644 --- a/raps/utils.py +++ b/raps/utils.py @@ -20,12 +20,14 @@ import uuid import json import argparse from pathlib import Path -from typing import Annotated as A, TypeVar, Callable, TypeAlias +from typing import Annotated as A, TypeVar, TypeAlias, Protocol from pydantic import ( - BaseModel, TypeAdapter, AfterValidator, BeforeValidator, ConfigDict, AwareDatetime, ValidationError + BaseModel, TypeAdapter, AfterValidator, BeforeValidator, ConfigDict, AwareDatetime, ValidationError, + ValidationInfo, ) -from pydantic_settings import BaseSettings, SettingsConfigDict, CliApp, CliSettingsSource +from pydantic_settings import BaseSettings, SettingsConfigDict, CliApp, CliSettingsSource, SettingsError import yaml +from yaml import YAMLError from raps.job import Job @@ -683,8 +685,27 @@ def normalize_tz(d: datetime): return d.astimezone(timezone.utc) -ExpandedPath = A[Path, AfterValidator(lambda v: Path(v).expanduser().resolve())] -""" Type that that expands ~ and environment variables in a path string """ +def validate_resolved_path(path: str | Path, info: ValidationInfo): + context = info.context or {} + path = Path(path).expanduser() + if context.get('base_path'): + base_path = Path(context["base_path"]).expanduser().resolve() + else: + base_path = Path.cwd() + path = (base_path / path).resolve() + # This is used on the simulation server to block reading arbitrary files + if context.get("force_under_base_path"): + if not path.is_relative_to(base_path): + raise ValueError(f"{path} is not under {base_path}") + return path + +ResolvedPath = A[Path, AfterValidator(validate_resolved_path)] +""" +Resolve a path, and expand ~ in the path string. +Paths can be resolved relative to specific path instead of cwd by passing +`context={"base_path": "my/path"}` in model_validate(). +""" + AutoAwareDatetime = A[datetime, AfterValidator(normalize_tz)] """ Datetime type wrapper, makes sure timezone is set """ @@ -700,13 +721,18 @@ class RAPSBaseModel(BaseModel): ) -T = TypeVar("T", bound=BaseModel) +T = TypeVar("T", bound=BaseModel, covariant=True) + + +class ModelArgsValidator(Protocol[T]): + def __call__(self, args: argparse.Namespace, init_data: dict | None = None) -> T: + ... def pydantic_add_args( parser: argparse.ArgumentParser, model_cls: type[T], model_config: SettingsConfigDict | None = None, -) -> Callable[[argparse.Namespace, dict | None], T]: +) -> ModelArgsValidator[T]: """ Add arguments to the parser from the model. Returns a function that can be used to parse the model from the argparse args. @@ -735,20 +761,20 @@ def pydantic_add_args( cli_settings_source = CliSettingsSource(SettingsModel, root_parser=parser) - def model_validate_args(args: argparse.Namespace, data: dict | None = None): + def model_args_validator(args: argparse.Namespace, init_data: dict | None = None): try: model = CliApp.run(SettingsModel, cli_args=args, cli_settings_source=cli_settings_source, - **(data or {}), + **(init_data or {}), ) # Recreate model so we don't return the SettingsModel subclass # use exclude_unset so that model_field_set is preserved as well return model_cls.model_validate(model.model_dump(exclude_unset=True)) - except ValidationError as err: + except (ValidationError, SettingsError) as err: print(err) sys.exit(1) - return model_validate_args + return model_args_validator SubParsers: TypeAlias = "argparse._SubParsersAction[argparse.ArgumentParser]" @@ -784,7 +810,7 @@ def yaml_dump(data, header_comment=''): ) -def read_yaml(config_file: str): +def read_yaml(config_file: str | None) -> dict: """ Parses yaml file. Pass "-" to read from stdin """ # Assume stdin if not terminal if config_file == "-" or (not config_file and not sys.stdin.isatty()): @@ -794,9 +820,36 @@ def read_yaml(config_file: str): else: data = "" if data.strip(): - return yaml.safe_load(data) + result = yaml.safe_load(data) else: - return {} + result = {} + if not isinstance(result, dict): + raise ValueError("Expected yaml document to contain a top-level mapping") + return result + + +def read_yaml_parsed(cls: type[T], config_file = None) -> dict: + """ + Like read_yaml, but parses the input to resolve paths etc. + Exits on error after printing message (for use in the CLI) + """ + try: + yaml_data = read_yaml(config_file) + if yaml_data: + # Resolve paths in yaml relative to the yaml file + base_path = Path(config_file).parent if config_file and config_file != "-" else None + model = cls.model_validate(yaml_data, context={"base_path": base_path}) + yaml_data = model.model_dump(mode='json', exclude_unset=True) + except (ValidationError, ValueError, YAMLError) as err: + print(f'Failed to parse yaml "{config_file}"') + print(err) + sys.exit(1) + return yaml_data + + +def is_yaml_file(path: str | Path): + """ Return true if the path is .yaml, .yml, or .json """ + return Path(path).suffix in ['.yaml', '.yml', '.json'] class WorkloadData(RAPSBaseModel): diff --git a/tests/systems/test_main_fastforward_run.py b/tests/systems/test_main_fastforward_run.py index 9fe3216da154c74f730feea3b6d4faf5c84a3dbb..5a3b38fc77681b20ad2a064548b8ac64e0060d42 100644 --- a/tests/systems/test_main_fastforward_run.py +++ b/tests/systems/test_main_fastforward_run.py @@ -1,5 +1,3 @@ -import os -import subprocess import pytest from ..util import run_engine diff --git a/tests/util.py b/tests/util.py index 46736b31a8f970df1db78863bc54e33a80c64251..20c90544630a8a77324dfd45408c17b1f4f3515c 100644 --- a/tests/util.py +++ b/tests/util.py @@ -4,11 +4,9 @@ from pathlib import Path import shlex import json from raps.engine import Engine +from raps.stats import get_stats from raps.multi_part_engine import MultiPartEngine from raps.sim_config import SingleSimConfig, MultiPartSimConfig -from raps.stats import ( - get_engine_stats, get_job_stats, get_scheduler_stats, get_network_stats, -) def find_project_root(): @@ -41,15 +39,6 @@ def _get_cmd(config, sub_cmd): return f"echo {shlex.quote(json.dumps(config))} | python main.py {sub_cmd} - -o none" -def _get_stats(engine: Engine): - return { - 'engine': get_engine_stats(engine), - 'job': get_job_stats(engine), - 'scheduler': get_scheduler_stats(engine), - 'network': get_network_stats(engine) if engine.simulate_network else None, - } - - def run_engine(sim_config, include_ticks=False) -> tuple[Engine, dict[str, Any]]: """ Run a simulation to completion. Returns the completed Engine and a dict containing the engine @@ -73,7 +62,7 @@ def run_engine(sim_config, include_ticks=False) -> tuple[Engine, dict[str, Any]] if include_ticks: stats['tick_datas'].append(tick) - stats.update(_get_stats(engine)) + stats.update(get_stats(engine)) return engine, stats @@ -103,6 +92,6 @@ def run_multi_part_engine(sim_config, include_ticks=False) -> tuple[MultiPartEng stats['tick_datas'].append(tick) for partition, engine in multi_engine.engines.items(): - stats['partitions'][partition] = _get_stats(engine) + stats['partitions'][partition] = engine.get_stats() return multi_engine, stats