Loading raps/dataloaders/lassen.py +2 −1 Original line number Diff line number Diff line Loading @@ -245,7 +245,8 @@ def load_data_from_df(allocation_df, node_df, step_df, **kwargs): return WorkloadData( jobs=job_list, telemetry_start=telemetry_start_time, telemetry_end=telemetry_end_time, start_date=telemetry_start_timestamp, # TODO: Confirm whether lassen timestamps are UTC or PDT start_date=telemetry_start_timestamp.tz_localize("UTC"), ) Loading raps/dataloaders/mit_supercloud/loader.py +1 −2 Original line number Diff line number Diff line Loading @@ -119,7 +119,7 @@ from collections import Counter from datetime import datetime, timezone from raps.job import job_dict, Job from raps.utils import summarize_ranges, next_arrival, WorkloadData from raps.utils import summarize_ranges, WorkloadData from .utils import proc_cpu_series, proc_gpu_series, to_epoch from .utils import DEFAULT_START, DEFAULT_END Loading Loading @@ -211,7 +211,6 @@ def load_data(local_dataset_path, **kwargs): """ debug = kwargs.get("debug") config = kwargs.get("config") arrival = kwargs.get("arrival") NL_PATH = os.path.dirname(__file__) skip_counts = Counter() Loading raps/engine.py +1 −7 Original line number Diff line number Diff line Loading @@ -161,7 +161,6 @@ class Engine: self.flops_manager = flops_manager self.debug = sim_config.debug self.continuous_workload = continuous_workload self.output = sim_config.output self.replay = sim_config.replay self.downscale = sim_config.downscale # Factor to downscale the 1s timesteps (power of 10) self.simulate_network = sim_config.simulate_network Loading Loading @@ -215,10 +214,7 @@ class Engine: @staticmethod def from_sim_config(sim_config: SimConfig, partition: str | None = None): if partition: system_config_by_name = {s.system_name: s for s in sim_config.system_configs} system_config = system_config_by_name.get(partition) if not system_config: raise ValueError(f"Partition {partition} isn't in SimConfig") system_config = sim_config.get_system_config_by_name(partition) elif len(sim_config.system_configs) > 1: raise ValueError( "Engine can only run single-partition simulations. Use MultiPartEngine for " + Loading @@ -232,8 +228,6 @@ class Engine: sim_config_args = sim_config.get_legacy_args() sim_config_dict = sim_config.get_legacy_args_dict() sim_config_dict['config'] = system_config_dict if partition: sim_config_dict["system"] = sim_config.system_name if sim_config.seed: random.seed(sim_config.seed) Loading raps/multi_part_engine.py +2 −2 Original line number Diff line number Diff line from collections.abc import Iterable from raps.engine import Engine, TickData from raps.sim_config import SimConfig from raps.sim_config import MultiPartSimConfig from raps.utils import WorkloadData Loading @@ -11,7 +11,7 @@ class MultiPartEngine: self.jobs = jobs @staticmethod def from_sim_config(sim_config: SimConfig): def from_sim_config(sim_config: MultiPartSimConfig): if sim_config.replay: root_systems = set(s.system_name.split("/")[0] for s in sim_config.system_configs) # TODO should consider how to pass separate replay values for separate systems Loading raps/run_sim.py +20 −13 Original line number Diff line number Diff line Loading @@ -22,7 +22,7 @@ from raps.stats import ( print_formatted_report ) from raps.sim_config import SimConfig from raps.sim_config import SingleSimConfig, MultiPartSimConfig def read_yaml(config_file: str): Loading Loading @@ -62,7 +62,7 @@ def run_sim_add_parser(subparsers: SubParsers): YAML sim config file, can be used to configure an experiment instead of using CLI flags. Pass "-" to read from stdin. """) model_validate = pydantic_add_args(parser, SimConfig, model_config={ model_validate = pydantic_add_args(parser, SingleSimConfig, model_config={ "cli_shortcuts": shortcuts, }) parser.set_defaults( Loading @@ -70,23 +70,26 @@ def run_sim_add_parser(subparsers: SubParsers): ) def run_sim(sim_config: SimConfig): def run_sim(sim_config: SingleSimConfig): if sim_config.verbose or sim_config.debug: print(f"SimConfig: {sim_config.model_dump_json(indent=4)}") print(f"SingleSimConfig: {sim_config.model_dump_json(indent=4)}") if len(sim_config.system_configs) > 1: print("Use run-parts to run multi-partition simulations") sys.exit(1) engine, workload_data, time_delta = Engine.from_sim_config(sim_config) out = sim_config.output out = sim_config.get_output() if out: out.mkdir(parents=True) engine.telemetry.save_snapshot( dest=str(out), dest=str(out / 'snapshot.npz'), result=workload_data, args=sim_config, ) config_yaml = yaml_dump(sim_config.model_dump(mode="json", exclude_defaults=True)) (out / 'sim_config.yaml').write_text(config_yaml) jobs = workload_data.jobs timestep_start, timestep_end = workload_data.telemetry_start, workload_data.telemetry_end total_timesteps = timestep_end - timestep_start Loading Loading @@ -234,7 +237,7 @@ def run_parts_sim_add_parser(subparsers: SubParsers): YAML sim config file, can be used to configure an experiment instead of using CLI flags. Pass "-" to read from stdin. """) model_validate = pydantic_add_args(parser, SimConfig, model_config={ model_validate = pydantic_add_args(parser, MultiPartSimConfig, model_config={ "cli_shortcuts": shortcuts, }) parser.set_defaults( Loading @@ -242,8 +245,7 @@ def run_parts_sim_add_parser(subparsers: SubParsers): ) def run_parts_sim(sim_config: SimConfig): def run_parts_sim(sim_config: MultiPartSimConfig): if len(sim_config.system_configs) == 1: warnings.warn( "run_parts_sim is usually for multiple partitions. Did you mean to run with one?", Loading @@ -253,13 +255,18 @@ def run_parts_sim(sim_config: SimConfig): multi_engine, workload_results, timestep_start, timestep_end, time_delta = \ MultiPartEngine.from_sim_config(sim_config) if sim_config.output: out = sim_config.get_output() if out: out.mkdir(parents=True) for part, engine in multi_engine.engines.items(): engine.telemetry.save_snapshot( dest=str(sim_config.output / part.split('/')[-1]), dest=str(out / part.split('/')[-1]), result=workload_results[part], args=sim_config, ) config_yaml = yaml_dump(sim_config.model_dump(mode="json", exclude_defaults=True)) (out / 'sim_config.yaml').write_text(config_yaml) jobs = {p: w.jobs for p, w in workload_results.items()} ui_update_freq = sim_config.system_configs[0].scheduler.ui_update_freq Loading Loading @@ -317,7 +324,7 @@ def show_add_parser(subparsers: SubParsers): parser.add_argument("--show-defaults", default=False, help=""" If true, include defaults in the output YAML """) model_validate = pydantic_add_args(parser, SimConfig, model_config={ model_validate = pydantic_add_args(parser, SingleSimConfig, model_config={ "cli_shortcuts": shortcuts, }) Loading @@ -328,6 +335,6 @@ def show_add_parser(subparsers: SubParsers): parser.set_defaults(impl=impl) def show(sim_config: SimConfig, show_defaults=False): def show(sim_config: SingleSimConfig, show_defaults=False): data = sim_config.model_dump(mode="json", exclude_defaults=not show_defaults) print(yaml_dump(data), end="") Loading
raps/dataloaders/lassen.py +2 −1 Original line number Diff line number Diff line Loading @@ -245,7 +245,8 @@ def load_data_from_df(allocation_df, node_df, step_df, **kwargs): return WorkloadData( jobs=job_list, telemetry_start=telemetry_start_time, telemetry_end=telemetry_end_time, start_date=telemetry_start_timestamp, # TODO: Confirm whether lassen timestamps are UTC or PDT start_date=telemetry_start_timestamp.tz_localize("UTC"), ) Loading
raps/dataloaders/mit_supercloud/loader.py +1 −2 Original line number Diff line number Diff line Loading @@ -119,7 +119,7 @@ from collections import Counter from datetime import datetime, timezone from raps.job import job_dict, Job from raps.utils import summarize_ranges, next_arrival, WorkloadData from raps.utils import summarize_ranges, WorkloadData from .utils import proc_cpu_series, proc_gpu_series, to_epoch from .utils import DEFAULT_START, DEFAULT_END Loading Loading @@ -211,7 +211,6 @@ def load_data(local_dataset_path, **kwargs): """ debug = kwargs.get("debug") config = kwargs.get("config") arrival = kwargs.get("arrival") NL_PATH = os.path.dirname(__file__) skip_counts = Counter() Loading
raps/engine.py +1 −7 Original line number Diff line number Diff line Loading @@ -161,7 +161,6 @@ class Engine: self.flops_manager = flops_manager self.debug = sim_config.debug self.continuous_workload = continuous_workload self.output = sim_config.output self.replay = sim_config.replay self.downscale = sim_config.downscale # Factor to downscale the 1s timesteps (power of 10) self.simulate_network = sim_config.simulate_network Loading Loading @@ -215,10 +214,7 @@ class Engine: @staticmethod def from_sim_config(sim_config: SimConfig, partition: str | None = None): if partition: system_config_by_name = {s.system_name: s for s in sim_config.system_configs} system_config = system_config_by_name.get(partition) if not system_config: raise ValueError(f"Partition {partition} isn't in SimConfig") system_config = sim_config.get_system_config_by_name(partition) elif len(sim_config.system_configs) > 1: raise ValueError( "Engine can only run single-partition simulations. Use MultiPartEngine for " + Loading @@ -232,8 +228,6 @@ class Engine: sim_config_args = sim_config.get_legacy_args() sim_config_dict = sim_config.get_legacy_args_dict() sim_config_dict['config'] = system_config_dict if partition: sim_config_dict["system"] = sim_config.system_name if sim_config.seed: random.seed(sim_config.seed) Loading
raps/multi_part_engine.py +2 −2 Original line number Diff line number Diff line from collections.abc import Iterable from raps.engine import Engine, TickData from raps.sim_config import SimConfig from raps.sim_config import MultiPartSimConfig from raps.utils import WorkloadData Loading @@ -11,7 +11,7 @@ class MultiPartEngine: self.jobs = jobs @staticmethod def from_sim_config(sim_config: SimConfig): def from_sim_config(sim_config: MultiPartSimConfig): if sim_config.replay: root_systems = set(s.system_name.split("/")[0] for s in sim_config.system_configs) # TODO should consider how to pass separate replay values for separate systems Loading
raps/run_sim.py +20 −13 Original line number Diff line number Diff line Loading @@ -22,7 +22,7 @@ from raps.stats import ( print_formatted_report ) from raps.sim_config import SimConfig from raps.sim_config import SingleSimConfig, MultiPartSimConfig def read_yaml(config_file: str): Loading Loading @@ -62,7 +62,7 @@ def run_sim_add_parser(subparsers: SubParsers): YAML sim config file, can be used to configure an experiment instead of using CLI flags. Pass "-" to read from stdin. """) model_validate = pydantic_add_args(parser, SimConfig, model_config={ model_validate = pydantic_add_args(parser, SingleSimConfig, model_config={ "cli_shortcuts": shortcuts, }) parser.set_defaults( Loading @@ -70,23 +70,26 @@ def run_sim_add_parser(subparsers: SubParsers): ) def run_sim(sim_config: SimConfig): def run_sim(sim_config: SingleSimConfig): if sim_config.verbose or sim_config.debug: print(f"SimConfig: {sim_config.model_dump_json(indent=4)}") print(f"SingleSimConfig: {sim_config.model_dump_json(indent=4)}") if len(sim_config.system_configs) > 1: print("Use run-parts to run multi-partition simulations") sys.exit(1) engine, workload_data, time_delta = Engine.from_sim_config(sim_config) out = sim_config.output out = sim_config.get_output() if out: out.mkdir(parents=True) engine.telemetry.save_snapshot( dest=str(out), dest=str(out / 'snapshot.npz'), result=workload_data, args=sim_config, ) config_yaml = yaml_dump(sim_config.model_dump(mode="json", exclude_defaults=True)) (out / 'sim_config.yaml').write_text(config_yaml) jobs = workload_data.jobs timestep_start, timestep_end = workload_data.telemetry_start, workload_data.telemetry_end total_timesteps = timestep_end - timestep_start Loading Loading @@ -234,7 +237,7 @@ def run_parts_sim_add_parser(subparsers: SubParsers): YAML sim config file, can be used to configure an experiment instead of using CLI flags. Pass "-" to read from stdin. """) model_validate = pydantic_add_args(parser, SimConfig, model_config={ model_validate = pydantic_add_args(parser, MultiPartSimConfig, model_config={ "cli_shortcuts": shortcuts, }) parser.set_defaults( Loading @@ -242,8 +245,7 @@ def run_parts_sim_add_parser(subparsers: SubParsers): ) def run_parts_sim(sim_config: SimConfig): def run_parts_sim(sim_config: MultiPartSimConfig): if len(sim_config.system_configs) == 1: warnings.warn( "run_parts_sim is usually for multiple partitions. Did you mean to run with one?", Loading @@ -253,13 +255,18 @@ def run_parts_sim(sim_config: SimConfig): multi_engine, workload_results, timestep_start, timestep_end, time_delta = \ MultiPartEngine.from_sim_config(sim_config) if sim_config.output: out = sim_config.get_output() if out: out.mkdir(parents=True) for part, engine in multi_engine.engines.items(): engine.telemetry.save_snapshot( dest=str(sim_config.output / part.split('/')[-1]), dest=str(out / part.split('/')[-1]), result=workload_results[part], args=sim_config, ) config_yaml = yaml_dump(sim_config.model_dump(mode="json", exclude_defaults=True)) (out / 'sim_config.yaml').write_text(config_yaml) jobs = {p: w.jobs for p, w in workload_results.items()} ui_update_freq = sim_config.system_configs[0].scheduler.ui_update_freq Loading Loading @@ -317,7 +324,7 @@ def show_add_parser(subparsers: SubParsers): parser.add_argument("--show-defaults", default=False, help=""" If true, include defaults in the output YAML """) model_validate = pydantic_add_args(parser, SimConfig, model_config={ model_validate = pydantic_add_args(parser, SingleSimConfig, model_config={ "cli_shortcuts": shortcuts, }) Loading @@ -328,6 +335,6 @@ def show_add_parser(subparsers: SubParsers): parser.set_defaults(impl=impl) def show(sim_config: SimConfig, show_defaults=False): def show(sim_config: SingleSimConfig, show_defaults=False): data = sim_config.model_dump(mode="json", exclude_defaults=not show_defaults) print(yaml_dump(data), end="")