From ad281f680b932efbd1baac16c3f4f3804c6b4b10 Mon Sep 17 00:00:00 2001
From: Jesse Hines <hinesjr@ornl.gov>
Date: Mon, 25 Aug 2025 15:52:34 -0400
Subject: [PATCH 01/27] Update config computed properties

Add helper to get system config, and used cached_property to avoid recomputation
---
 raps/sim_config.py    | 16 ++++++++++++++--
 raps/system_config.py | 13 +++++++------
 2 files changed, 21 insertions(+), 8 deletions(-)

diff --git a/raps/sim_config.py b/raps/sim_config.py
index 127cec3..16b163f 100644
--- a/raps/sim_config.py
+++ b/raps/sim_config.py
@@ -1,6 +1,7 @@
 import argparse
 import sys
 import yaml
+from functools import cached_property
 from datetime import timedelta
 from pathlib import Path
 from typing import Literal
@@ -10,7 +11,7 @@ from raps.utils import (
     parse_time_unit, convert_to_time_unit, infer_time_unit, ExpandedPath,
     pydantic_add_args, yaml_dump, parse_td,
 )
-
+from raps.system_config import SystemConfig, get_partition_configs, get_system_config
 from pydantic import BaseModel, model_validator, computed_field
 from pydantic_settings import SettingsConfigDict
 
@@ -50,7 +51,7 @@ class SimConfig(BaseModel):
     """
 
     @computed_field
-    @property
+    @cached_property
     def downscale(self) -> int:
         return int(timedelta(seconds=1) / self.time_unit)
 
@@ -250,6 +251,17 @@ class SimConfig(BaseModel):
 
         return self
 
+    @cached_property
+    def system_configs(self) -> list[SystemConfig]:
+        """
+        Return the SystemConfigs for the selected systems.
+        Will be a single element array unless multiple `partitions` are selected.
+        """
+        if self.partitions:
+            return get_partition_configs(self.partitions).partitions
+        else:
+            return [get_system_config(self.system)]
+
     def get_legacy_args(self):
         """
         Return as an argparse.Namespace object for backwards compatability
diff --git a/raps/system_config.py b/raps/system_config.py
index e458c68..35f718c 100644
--- a/raps/system_config.py
+++ b/raps/system_config.py
@@ -3,6 +3,7 @@ import glob
 import fnmatch
 from typing import Any, Literal
 from pathlib import Path
+from functools import cached_property
 import yaml
 from pydantic import BaseModel, computed_field, model_validator, field_validator
 from raps.raps_config import raps_config
@@ -41,27 +42,27 @@ class SystemSystemConfig(BaseModel):
         return self
 
     @computed_field
-    @property
+    @cached_property
     def num_racks(self) -> int:
         return self.num_cdus * self.racks_per_cdu - len(self.missing_racks)
 
     @computed_field
-    @property
+    @cached_property
     def sc_shape(self) -> list[int]:
         return [self.num_cdus, self.racks_per_cdu, self.nodes_per_rack]
 
     @computed_field
-    @property
+    @cached_property
     def total_nodes(self) -> int:
         return self.num_cdus * self.racks_per_cdu * self.nodes_per_rack
 
     @computed_field
-    @property
+    @cached_property
     def blades_per_chassis(self) -> int:
         return int(self.nodes_per_rack / self.chassis_per_rack / self.nodes_per_blade)
 
     @computed_field
-    @property
+    @cached_property
     def power_df_header(self) -> list[str]:
         power_df_header = ["CDU"]
         for i in range(1, self.racks_per_cdu + 1):
@@ -73,7 +74,7 @@ class SystemSystemConfig(BaseModel):
         return power_df_header
 
     @computed_field
-    @property
+    @cached_property
     def available_nodes(self) -> int:
         return self.total_nodes - len(self.down_nodes)
 
-- 
GitLab


From 1d1f1d6e3948d70c04f2232735b1ba6eeaf05090 Mon Sep 17 00:00:00 2001
From: Jesse Hines <hinesjr@ornl.gov>
Date: Mon, 25 Aug 2025 16:16:09 -0400
Subject: [PATCH 02/27] Rename validation methods

---
 raps/sim_config.py | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/raps/sim_config.py b/raps/sim_config.py
index 16b163f..eb80385 100644
--- a/raps/sim_config.py
+++ b/raps/sim_config.py
@@ -210,7 +210,10 @@ class SimConfig(BaseModel):
     """ Specify the max queue length for continuous job generation """
 
     @model_validator(mode="before")
-    def _parse_times(cls, data):
+    def _validate_before(cls, data):
+        # This is called with the raw input, before Pydantic parses it, so data is just a dict and
+        # contain any data types.
+
         time_fields = [
             "time_delta", "time", "fastforward",
             "downtime_first", "downtime_interval", "downtime_length",
@@ -237,7 +240,8 @@ class SimConfig(BaseModel):
         return data
 
     @model_validator(mode="after")
-    def _validate(self):
+    def _validate_after(self):
+        # This is called after Pydantic has parsed everything into the model
         if self.system and self.partitions:
             raise ValueError("system and partitions are mutually exclusive")
         elif not self.system and not self.partitions:
-- 
GitLab


From db904f3c99bb86fb1b9714b6b70d6fd6fdf0f763 Mon Sep 17 00:00:00 2001
From: Jesse Hines <hinesjr@ornl.gov>
Date: Mon, 25 Aug 2025 17:58:47 -0400
Subject: [PATCH 03/27] Don't output if output isn't set

---
 main.py            | 71 ++++++++++++++++++++--------------------------
 raps/sim_config.py |  3 ++
 raps/telemetry.py  | 24 ++++------------
 3 files changed, 39 insertions(+), 59 deletions(-)

diff --git a/main.py b/main.py
index c3ba946..e64db2f 100644
--- a/main.py
+++ b/main.py
@@ -13,7 +13,7 @@ import pandas as pd
 import os
 import time
 import math
-#
+from pathlib import Path
 from raps.helpers import check_python_version
 #
 from raps.system_config import get_system_config
@@ -88,17 +88,11 @@ def main():
         td = Telemetry(**args_dict)
         jobs, timestep_start, timestep_end = \
             td.load_jobs_times_args_from_live_system()
-        if args.output is not None:
-            td.save_snapshot(jobs=jobs, timestep_start=timestep_start,
-                             timestep_end=timestep_end, args=args, filename=td.dirname)
-
     elif args.replay:
 
         td = Telemetry(**args_dict)
         jobs, timestep_start, timestep_end, args_from_file = \
             td.load_jobs_times_args_from_files(files=args.replay, args=args, config=config)
-        # TODO: Merge args and args_from_files? see telemetry.py:97
-
     else:  # Synthetic jobs
         wl = Workload(args, config)
         jobs = wl.generate_jobs()
@@ -118,9 +112,13 @@ def main():
             timestep_end = 88200  # 24 hours
 
         td = Telemetry(**args_dict)
-        td.save_snapshot(jobs=jobs, timestep_start=timestep_start,
-                         timestep_end=timestep_end, args=args, filename=td.dirname)
-
+    if args.output:
+        td.save_snapshot(
+            jobs=jobs,
+            timestep_start=timestep_start,
+            timestep_end=timestep_end,
+            args=args, filename=args.output,
+    )
     if args.fastforward is not None:
         timestep_start = timestep_start + args.fastforward
 
@@ -146,11 +144,6 @@ def main():
         **args_dict,
     )
 
-    DIR_NAME = td.dirname
-    OPATH = OUTPUT_PATH / DIR_NAME
-    print("Output directory is: ", OPATH)
-    sc.opath = OPATH
-
     if args.accounts:
         job_accounts = Accounts(jobs)
         if args.accounts_json:
@@ -160,11 +153,9 @@ def main():
             accounts = job_accounts
         sc.accounts = accounts
 
-    if args.plot or args.output is not None:
-        try:
-            os.makedirs(OPATH)
-        except OSError as error:
-            print(f"Error creating directory: {error}")
+    out = Path(args.output) if args.output else None
+    if out:
+        out.mkdir(parents = True)
 
     if args.verbose:
         print(jobs)
@@ -200,28 +191,29 @@ def main():
         downscale_str = "1" + downscale_str
 
     if args.plot:
+        assert out # SimConfig validation should check this
         if 'power' in args.plot:
             pl = Plotter(f"Time ({downscale_str}s)", 'Power (kW)', 'Power History',
-                         OPATH / f'power.{args.imtype}',
+                         out / f'power.{args.imtype}',
                          uncertainties=args.uncertainties)
             x, y = zip(*power_manager.history)
             pl.plot_history(x, y)
 
         if 'util' in args.plot:
             pl = Plotter(f"Time ({downscale_str}s)", 'System Utilization (%)',
-                         'System Utilization History', OPATH / f'util.{args.imtype}')
+                         'System Utilization History', out / f'util.{args.imtype}')
             x, y = zip(*sc.sys_util_history)
             pl.plot_history(x, y)
 
         if 'loss' in args.plot:
             pl = Plotter(f"Time ({downscale_str}s)", 'Power Losses (kW)', 'Power Loss History',
-                         OPATH / f'loss.{args.imtype}',
+                         out / f'loss.{args.imtype}',
                          uncertainties=args.uncertainties)
             x, y = zip(*power_manager.loss_history)
             pl.plot_history(x, y)
 
             pl = Plotter(f"Time ({downscale_str}s)", 'Power Losses (%)', 'Power Loss History',
-                         OPATH / f'loss_pct.{args.imtype}',
+                         out / f'loss_pct.{args.imtype}',
                          uncertainties=args.uncertainties)
             x, y = zip(*power_manager.loss_history_percentage)
             pl.plot_history(x, y)
@@ -230,7 +222,7 @@ def main():
             if cooling_model:
                 ylabel = 'pue'
                 title = 'FMU ' + ylabel + 'History'
-                pl = Plotter(f"Time ({downscale_str}s)", ylabel, title, OPATH / f'pue.{args.imtype}',
+                pl = Plotter(f"Time ({downscale_str}s)", ylabel, title, out / f'pue.{args.imtype}',
                              uncertainties=args.uncertainties)
                 df = pd.DataFrame(cooling_model.fmu_history)
                 df.to_parquet('cooling_model.parquet', engine='pyarrow')
@@ -249,50 +241,49 @@ def main():
             else:
                 print('Cooling model not enabled... skipping output of plot')
 
-    if args.output is not None:
-
+    if out:
         if args.uncertainties:
             # Parquet cannot handle annotated ufloat format AFAIK
             print('Data dump not implemented using uncertainties!')
         else:
             if cooling_model:
                 df = pd.DataFrame(cooling_model.fmu_history)
-                df.to_parquet(OPATH / 'cooling_model.parquet', engine='pyarrow')
+                df.to_parquet(out / 'cooling_model.parquet', engine='pyarrow')
 
             df = pd.DataFrame(power_manager.history)
-            df.to_parquet(OPATH / 'power_history.parquet', engine='pyarrow')
+            df.to_parquet(out / 'power_history.parquet', engine='pyarrow')
 
             df = pd.DataFrame(power_manager.loss_history)
-            df.to_parquet(OPATH / 'loss_history.parquet', engine='pyarrow')
+            df.to_parquet(out / 'loss_history.parquet', engine='pyarrow')
 
             df = pd.DataFrame(sc.sys_util_history)
-            df.to_parquet(OPATH / 'util.parquet', engine='pyarrow')
+            df.to_parquet(out / 'util.parquet', engine='pyarrow')
 
             # Schedule history
             job_history = pd.DataFrame(sc.get_job_history_dict())
-            job_history.to_csv(OPATH / "job_history.csv", index=False)
+            job_history.to_csv(out / "job_history.csv", index=False)
 
             scheduler_running_history = pd.DataFrame(sc.get_scheduler_running_history())
-            scheduler_running_history.to_csv(OPATH / "running_history.csv", index=False)
+            scheduler_running_history.to_csv(out / "running_history.csv", index=False)
             scheduler_queue_history = pd.DataFrame(sc.get_scheduler_running_history())
-            scheduler_queue_history.to_csv(OPATH / "queue_history.csv", index=False)
+            scheduler_queue_history.to_csv(out / "queue_history.csv", index=False)
 
             try:
-                with open(OPATH / 'stats.out', 'w') as f:
+                with open(out / 'stats.out', 'w') as f:
                     json.dump(engine_stats, f, indent=4)
                     json.dump(job_stats, f, indent=4)
             except TypeError:  # Is this the correct error code?
-                write_dict_to_file(engine_stats, OPATH / 'stats.out')
-                write_dict_to_file(job_stats, OPATH / 'stats.out')
+                write_dict_to_file(engine_stats, out / 'stats.out')
+                write_dict_to_file(job_stats, out / 'stats.out')
 
             if args.accounts:
                 try:
-                    with open(OPATH / 'accounts.json', 'w') as f:
+                    with open(out / 'accounts.json', 'w') as f:
                         json_string = json.dumps(sc.accounts.to_dict())
                         f.write(json_string)
                 except TypeError:
-                    write_dict_to_file(sc.accounts.to_dict(), OPATH / 'accounts.json')
-        print("Output directory is: ", OPATH)  # If output is enabled, the user wants this information as last output
+                    write_dict_to_file(sc.accounts.to_dict(), out / 'accounts.json')
+        print("Output directory is: ", out)  # If output is enabled, the user wants this information as last output
 
 
 if __name__ == "__main__":
diff --git a/raps/sim_config.py b/raps/sim_config.py
index eb80385..165e6c8 100644
--- a/raps/sim_config.py
+++ b/raps/sim_config.py
@@ -252,6 +252,9 @@ class SimConfig(BaseModel):
 
         if self.jobsize_is_power_of is not None and self.jobsize_is_of_degree is not None:
             raise ValueError("jobsize_is_power_of and jobsize_is_of_degree are mutually exclusive")
+        
+        if self.plot and not self.output:
+            raise ValueError("plot requires an output directory to be set")
 
         return self
 
diff --git a/raps/telemetry.py b/raps/telemetry.py
index f485daa..c3cfbec 100644
--- a/raps/telemetry.py
+++ b/raps/telemetry.py
@@ -69,18 +69,6 @@ class Telemetry:
         self.kwargs = kwargs
         self.system = kwargs.get('system')
         self.config = kwargs.get('config')
-        outname = kwargs.get('output')
-        if outname:
-            self.dirname = outname
-        elif kwargs.get("replay"):
-            # Try to extract date from given name to use as case directory
-            matched_date = re.search(r"\d{4}-\d{2}-\d{2}", kwargs['replay'][0])
-            if matched_date:
-                self.dirname = f"sim={matched_date.group(0)}"
-            else:
-                self.dirname = create_casename()
-        else:
-            self.dirname = create_casename()
 
         try:
             self.dataloader = importlib.import_module(f"raps.dataloaders.{self.system}", package=__package__)
@@ -302,10 +290,6 @@ class Telemetry:
                 raise ValueError("Forgot --is-results-file ?")
             timestep_start = min(timestep_start, timestep_start_from_data)
             timestep_end = max(timestep_end, timestep_end_from_data)
-            self.save_snapshot(jobs=jobs,
-                               timestep_start=timestep_start,
-                               timestep_end=timestep_end,
-                               args=args, filename=self.dirname)
         if args.time:
             timestep_end = timestep_start + convert_to_time_unit(args.time)
         elif not timestep_end:
@@ -324,8 +308,10 @@ def run_telemetry():
         jobs, timestep_start, timestep_end = \
             td.load_jobs_times_args_from_live_system()
         if args.output:
-            td.save_snapshot(jobs=jobs, timestep_start=timestep_start,
-                             timestep_end=timestep_end, args=args, filename=td.dirname)
+            td.save_snapshot(
+                jobs=jobs, timestep_start=timestep_start,
+                timestep_end=timestep_end, args=args, filename=args.output,
+            )
 
     elif args.replay:
         jobs, timestep_start, timestep_end, _ = \
@@ -416,7 +402,7 @@ def run_telemetry():
             plot_network_histogram(ax=ax, data=net_means)
     if args.output is not None:
         if args.output == "":
-            filename = f"{td.dirname}.svg"
+            filename = f"{args.output}.svg"
         else:
             filename = args.output
         plt.savefig(f'{filename}')
-- 
GitLab


From d3aa03d3b64a28688e63bfe288270238820d5a22 Mon Sep 17 00:00:00 2001
From: Jesse Hines <hinesjr@ornl.gov>
Date: Tue, 26 Aug 2025 09:48:18 -0400
Subject: [PATCH 04/27] Factor main.py logic into reusable function

---
 main.py            | 249 +++++++++++++--------------------------------
 raps/constants.py  |   1 -
 raps/engine.py     | 172 ++++++++++++++++++++++++++-----
 raps/network.py    |   2 +-
 raps/plotting.py   |   4 +-
 raps/sim_config.py |  10 +-
 raps/telemetry.py  |   4 +-
 7 files changed, 228 insertions(+), 214 deletions(-)

diff --git a/main.py b/main.py
index e64db2f..97487e3 100644
--- a/main.py
+++ b/main.py
@@ -7,35 +7,11 @@ model. Produces performance, utilization, and energy metrics, with
 optional plots and output files for analysis and validation.
 """
 import json
-import numpy as np
-import random
 import pandas as pd
-import os
-import time
-import math
-from pathlib import Path
 from raps.helpers import check_python_version
-#
-from raps.system_config import get_system_config
-from raps.constants import OUTPUT_PATH, SEED
-from raps.cooling import ThermoFluidsModel
 from raps.ui import LayoutManager
-from raps.flops import FLOPSManager
 from raps.plotting import Plotter
-from raps.power import (
-    PowerManager,
-    compute_node_power,
-    compute_node_power_validate
-)
-from raps.power import (
-    compute_node_power_uncertainties,
-    compute_node_power_validate_uncertainties
-)
 from raps.engine import Engine
-from raps.telemetry import Telemetry
-from raps.workload import Workload
-from raps.account import Accounts
-from raps.weather import Weather
 from raps.utils import write_dict_to_file
 from raps.stats import (
     get_engine_stats,
@@ -45,227 +21,140 @@ from raps.stats import (
     print_formatted_report
 )
 
-from raps.sim_config import args, args_dict
+from raps.sim_config import sim_config
 
 check_python_version()
 
 
 def main():
-    if args.verbose or args.debug:
-        print(args)
-
-    config = get_system_config(args.system).get_legacy()
-
-    if args.seed:
-        random.seed(SEED)
-        np.random.seed(SEED)
-
-    if args.cooling:
-        cooling_model = ThermoFluidsModel(**config)
-        cooling_model.initialize()
-        args.layout = "layout2"
-
-        if args_dict['start']:
-            cooling_model.weather = Weather(args_dict['start'], config=config)
-    else:
-        cooling_model = None
-
-    if args.validate:
-        if args.uncertainties:
-            power_manager = PowerManager(compute_node_power_validate_uncertainties, **config)
-        else:
-            power_manager = PowerManager(compute_node_power_validate, **config)
-    else:
-        if args.uncertainties:
-            power_manager = PowerManager(compute_node_power_uncertainties, **config)
-        else:
-            power_manager = PowerManager(compute_node_power, **config)
-    args_dict['config'] = config
-    flops_manager = FLOPSManager(**args_dict)
-
-    if args.live and not args.replay:
-        assert args.time is not None, {"--time must be set, specifing how long we want to predict"}
-        td = Telemetry(**args_dict)
-        jobs, timestep_start, timestep_end = \
-            td.load_jobs_times_args_from_live_system()
-    elif args.replay:
+    if sim_config.verbose or sim_config.debug:
+        print(f"SimConfig: {sim_config.model_dump_json(indent=4)}")
 
-        td = Telemetry(**args_dict)
-        jobs, timestep_start, timestep_end, args_from_file = \
-            td.load_jobs_times_args_from_files(files=args.replay, args=args, config=config)
-    else:  # Synthetic jobs
-        wl = Workload(args, config)
-        jobs = wl.generate_jobs()
+    engine, jobs, timestep_start, timestep_end, time_delta = Engine.from_sim_config(sim_config)
 
-        if args.verbose:
-            for job in jobs:
-                print('jobid:', job.id, '\tlen(gpu_trace):',
-                      len(job.gpu_trace) if isinstance(job.gpu_trace, list)
-                      else job.gpu_trace, '\twall_time(s):',
-                      job.wall_time)
-            time.sleep(2)
-
-        timestep_start = 0
-        if hasattr(jobs[0], 'end_time'):
-            timestep_end = int(math.ceil(max([job.end_time for job in jobs])))
-        else:
-            timestep_end = 88200  # 24 hours
-
-        td = Telemetry(**args_dict)
-    if args.output:
-        td.save_snapshot(
+    out = sim_config.output
+    if out:
+        out.mkdir(parents=True)
+        engine.telemetry.save_snapshot(
             jobs=jobs,
             timestep_start=timestep_start,
             timestep_end=timestep_end,
-            args=args, filename=args.output,
-    )
-    if args.fastforward is not None:
-        timestep_start = timestep_start + args.fastforward
-
-    if args.time is not None:
-        timestep_end = timestep_start + args.time
-
-    if args.time_delta is not None:
-        time_delta = args.time_delta
-    else:
-        time_delta = 1
-
-    if args.continuous_job_generation:
-        continuous_workload = wl
-    else:
-        continuous_workload = None
-
-    sc = Engine(
-        power_manager=power_manager,
-        flops_manager=flops_manager,
-        cooling_model=cooling_model,
-        continuous_workload=continuous_workload,
-        jobs=jobs,
-        **args_dict,
-    )
-
-    if args.accounts:
-        job_accounts = Accounts(jobs)
-        if args.accounts_json:
-            loaded_accounts = Accounts.from_json_filename(args.accounts_json)
-            accounts = Accounts.merge(loaded_accounts, job_accounts)
-        else:
-            accounts = job_accounts
-        sc.accounts = accounts
-
-    out = Path(args.output) if args.output else None
-    if out:
-        out.mkdir(parents = True)
-
-    if args.verbose:
-        print(jobs)
+            args=sim_config.get_legacy_args(), filename=str(out),
+        )
 
     total_timesteps = timestep_end - timestep_start
 
-    downscale = args.downscale
+    downscale = sim_config.downscale
     downscale_str = ""if downscale == 1 else f"/{downscale}"
     print(f"Simulating {len(jobs)} jobs for {total_timesteps}{downscale_str}"
           f" seconds from {timestep_start} to {timestep_end}.")
     print(f"Simulation time delta: {time_delta}{downscale_str} s,"
           f"Telemetry trace quanta: {jobs[0].trace_quanta}{downscale_str} s.")
-    layout_manager = LayoutManager(args.layout, engine=sc, debug=args.debug,
-                                   total_timesteps=total_timesteps,
-                                   args_dict=args_dict, **config)
-    layout_manager.run(jobs, timestep_start=timestep_start, timestep_end=timestep_end, time_delta=time_delta)
+    layout_manager = LayoutManager(
+        sim_config.layout, engine=engine,
+        debug=sim_config.debug, total_timesteps=total_timesteps,
+        args_dict=sim_config.get_legacy_args_dict(), **sim_config.system_configs[0].get_legacy(),
+    )
+    layout_manager.run(
+        jobs,
+        timestep_start=timestep_start, timestep_end=timestep_end, time_delta=time_delta,
+    )
 
-    engine_stats = get_engine_stats(sc)
-    job_stats = get_job_stats(sc)
-    scheduler_stats = get_scheduler_stats(sc)
-    if sc.simulate_network:
-        network_stats = get_network_stats(sc)
+    engine_stats = get_engine_stats(engine)
+    job_stats = get_job_stats(engine)
+    scheduler_stats = get_scheduler_stats(engine)
+    if engine.simulate_network:
+        network_stats = get_network_stats(engine)
     else:
         network_stats = None
 
-    print_formatted_report(engine_stats=engine_stats,
-                           job_stats=job_stats,
-                           scheduler_stats=scheduler_stats,
-                           network_stats=network_stats
-                           )
+    print_formatted_report(
+        engine_stats=engine_stats,
+        job_stats=job_stats,
+        scheduler_stats=scheduler_stats,
+        network_stats=network_stats,
+    )
 
     if downscale_str:
         downscale_str = "1" + downscale_str
 
-    if args.plot:
-        assert out # SimConfig validation should check this
-        if 'power' in args.plot:
+    if sim_config.plot:
+        assert out  # SimConfig validation should check this
+        if 'power' in sim_config.plot:
             pl = Plotter(f"Time ({downscale_str}s)", 'Power (kW)', 'Power History',
-                         out / f'power.{args.imtype}',
-                         uncertainties=args.uncertainties)
-            x, y = zip(*power_manager.history)
+                         out / f'power.{sim_config.imtype}',
+                         uncertainties=sim_config.uncertainties)
+            x, y = zip(*engine.power_manager.history)
             pl.plot_history(x, y)
 
-        if 'util' in args.plot:
+        if 'util' in sim_config.plot:
             pl = Plotter(f"Time ({downscale_str}s)", 'System Utilization (%)',
-                         'System Utilization History', out / f'util.{args.imtype}')
-            x, y = zip(*sc.sys_util_history)
+                         'System Utilization History', out / f'util.{sim_config.imtype}')
+            x, y = zip(*engine.sys_util_history)
             pl.plot_history(x, y)
 
-        if 'loss' in args.plot:
+        if 'loss' in sim_config.plot:
             pl = Plotter(f"Time ({downscale_str}s)", 'Power Losses (kW)', 'Power Loss History',
-                         out / f'loss.{args.imtype}',
-                         uncertainties=args.uncertainties)
-            x, y = zip(*power_manager.loss_history)
+                         out / f'loss.{sim_config.imtype}',
+                         uncertainties=sim_config.uncertainties)
+            x, y = zip(*engine.power_manager.loss_history)
             pl.plot_history(x, y)
 
             pl = Plotter(f"Time ({downscale_str}s)", 'Power Losses (%)', 'Power Loss History',
-                         out / f'loss_pct.{args.imtype}',
-                         uncertainties=args.uncertainties)
-            x, y = zip(*power_manager.loss_history_percentage)
+                         out / f'loss_pct.{sim_config.imtype}',
+                         uncertainties=sim_config.uncertainties)
+            x, y = zip(*engine.power_manager.loss_history_percentage)
             pl.plot_history(x, y)
 
-        if 'pue' in args.plot:
-            if cooling_model:
+        if 'pue' in sim_config.plot:
+            if engine.cooling_model:
                 ylabel = 'pue'
                 title = 'FMU ' + ylabel + 'History'
-                pl = Plotter(f"Time ({downscale_str}s)", ylabel, title, out / f'pue.{args.imtype}',
-                             uncertainties=args.uncertainties)
-                df = pd.DataFrame(cooling_model.fmu_history)
+                pl = Plotter(f"Time ({downscale_str}s)", ylabel, title,
+                             out / f'pue.{sim_config.imtype}',
+                             uncertainties=sim_config.uncertainties)
+                df = pd.DataFrame(engine.cooling_model.fmu_history)
                 df.to_parquet('cooling_model.parquet', engine='pyarrow')
                 pl.plot_history(df['time'], df[ylabel])
             else:
                 print('Cooling model not enabled... skipping output of plot')
 
-        if 'temp' in args.plot:
-            if cooling_model:
+        if 'temp' in sim_config.plot:
+            if engine.cooling_model:
                 ylabel = 'Tr_pri_Out[1]'
                 title = 'FMU ' + ylabel + 'History'
-                pl = Plotter(f"Time ({downscale_str}s)", ylabel, title, OPATH / 'temp.svg')
-                df = pd.DataFrame(cooling_model.fmu_history)
+                pl = Plotter(f"Time ({downscale_str}s)", ylabel, title, out / 'temp.svg')
+                df = pd.DataFrame(engine.cooling_model.fmu_history)
                 df.to_parquet('cooling_model.parquet', engine='pyarrow')
                 pl.plot_compare(df['time'], df[ylabel])
             else:
                 print('Cooling model not enabled... skipping output of plot')
 
     if out:
-        if args.uncertainties:
+        if sim_config.uncertainties:
             # Parquet cannot handle annotated ufloat format AFAIK
             print('Data dump not implemented using uncertainties!')
         else:
-            if cooling_model:
-                df = pd.DataFrame(cooling_model.fmu_history)
+            if engine.cooling_model:
+                df = pd.DataFrame(engine.cooling_model.fmu_history)
                 df.to_parquet(out / 'cooling_model.parquet', engine='pyarrow')
 
-            df = pd.DataFrame(power_manager.history)
+            df = pd.DataFrame(engine.power_manager.history)
             df.to_parquet(out / 'power_history.parquet', engine='pyarrow')
 
-            df = pd.DataFrame(power_manager.loss_history)
+            df = pd.DataFrame(engine.power_manager.loss_history)
             df.to_parquet(out / 'loss_history.parquet', engine='pyarrow')
 
-            df = pd.DataFrame(sc.sys_util_history)
+            df = pd.DataFrame(engine.sys_util_history)
             df.to_parquet(out / 'util.parquet', engine='pyarrow')
 
             # Schedule history
-            job_history = pd.DataFrame(sc.get_job_history_dict())
+            job_history = pd.DataFrame(engine.get_job_history_dict())
             job_history.to_csv(out / "job_history.csv", index=False)
 
-            scheduler_running_history = pd.DataFrame(sc.get_scheduler_running_history())
+            scheduler_running_history = pd.DataFrame(engine.get_scheduler_running_history())
             scheduler_running_history.to_csv(out / "running_history.csv", index=False)
-            scheduler_queue_history = pd.DataFrame(sc.get_scheduler_running_history())
+            scheduler_queue_history = pd.DataFrame(engine.get_scheduler_running_history())
             scheduler_queue_history.to_csv(out / "queue_history.csv", index=False)
 
             try:
@@ -276,13 +165,13 @@ def main():
                 write_dict_to_file(engine_stats, out / 'stats.out')
                 write_dict_to_file(job_stats, out / 'stats.out')
 
-            if args.accounts:
+            if sim_config.accounts:
                 try:
                     with open(out / 'accounts.json', 'w') as f:
-                        json_string = json.dumps(sc.accounts.to_dict())
+                        json_string = json.dumps(engine.accounts.to_dict())
                         f.write(json_string)
                 except TypeError:
-                    write_dict_to_file(sc.accounts.to_dict(), out / 'accounts.json')
+                    write_dict_to_file(engine.accounts.to_dict(), out / 'accounts.json')
         print("Output directory is: ", out)  # If output is enabled, the user wants this information as last output
 
 
diff --git a/raps/constants.py b/raps/constants.py
index 0cdd2fd..53711e1 100644
--- a/raps/constants.py
+++ b/raps/constants.py
@@ -5,4 +5,3 @@ from pathlib import Path
 
 ELLIPSES = '\u2026'
 OUTPUT_PATH = Path('simulation_results')
-SEED = 42
diff --git a/raps/engine.py b/raps/engine.py
index f79b140..667e904 100644
--- a/raps/engine.py
+++ b/raps/engine.py
@@ -1,6 +1,7 @@
 from typing import Optional, List
 import dataclasses
 import pandas as pd
+import numpy as np
 import threading
 import sys
 import tty
@@ -8,7 +9,8 @@ import termios
 import os
 import select
 import time
-
+import random
+import math
 from raps.job import Job, JobState
 from raps.policy import PolicyType
 from raps.utils import (
@@ -17,14 +19,27 @@ from raps.utils import (
 )
 from raps.resmgr import ResourceManager
 from raps.schedulers import load_scheduler
-from raps.power import record_power_stats_foreach_job
+from raps.power import (
+    PowerManager,
+    compute_node_power,
+    compute_node_power_validate,
+    record_power_stats_foreach_job,
+    compute_node_power_uncertainties,
+    compute_node_power_validate_uncertainties,
+)
 from raps.network import (
     NetworkModel,
     apply_job_slowdown,
     compute_system_network_stats
 )
-from raps.workload import continuous_job_generation
+from raps.telemetry import Telemetry
+from raps.cooling import ThermoFluidsModel
+from raps.flops import FLOPSManager
+from raps.workload import Workload, continuous_job_generation
+from raps.account import Accounts
 from raps.downtime import Downtime
+from raps.weather import Weather
+from raps.sim_config import SimConfig
 
 
 @dataclasses.dataclass
@@ -109,15 +124,20 @@ def keyboard_listener(state):
 class Engine:
     """Job scheduling simulation engine."""
 
-    def __init__(self, *, power_manager,
-                 flops_manager,
-                 cooling_model=None,
-                 config,
-                 jobs=None,
-                 total_initial_jobs=0,
-                 continuous_workload=None,  # Workload class to generate from for continuous generation
-                 **kwargs):
-        self.config = config
+    def __init__(self, *,
+        power_manager: PowerManager,
+        flops_manager: FLOPSManager,
+        telemetry: Telemetry,
+        cooling_model: ThermoFluidsModel | None = None,
+        jobs=None,
+        total_initial_jobs=0,
+        # Workload class to generate from for continuous generation
+        continuous_workload: Workload | None = None,
+        accounts=None,
+        sim_config: SimConfig,
+    ):
+        # TODO: multi-partition?
+        self.config = sim_config.system_configs[0].get_legacy()
         self.down_nodes = summarize_ranges(self.config['DOWN_NODES'])
         self.resource_manager = ResourceManager(
             total_nodes=self.config['TOTAL_NODES'],
@@ -127,7 +147,8 @@ class Engine:
         # Initialize running and queue, etc.
         self.running = []
         self.queue = []
-        self.accounts = None
+        self.accounts = accounts
+        self.telemetry = telemetry
         self.job_history_dict = []
         self.jobs_completed = 0
         self.jobs_killed = 0
@@ -137,12 +158,12 @@ class Engine:
         self.sys_power = 0
         self.power_manager = power_manager
         self.flops_manager = flops_manager
-        self.debug = kwargs.get('debug')
+        self.debug = sim_config.debug
         self.continuous_workload = continuous_workload
-        self.output = kwargs.get('output')
-        self.replay = kwargs.get('replay')
-        self.downscale = kwargs.get('downscale', 1)  # Factor to downscale the 1s timesteps (power of 10)
-        self.simulate_network = kwargs.get('simulate_network')
+        self.output = sim_config.output
+        self.replay = sim_config.replay
+        self.downscale = sim_config.downscale  # Factor to downscale the 1s timesteps (power of 10)
+        self.simulate_network = sim_config.simulate_network
         self.sys_util_history = []
         self.scheduler_queue_history = []
         self.scheduler_running_history = []
@@ -152,18 +173,18 @@ class Engine:
         self.avg_slowdown_history = []
         self.max_slowdown_history = []
         self.node_occupancy_history = []
-        self.downtime = Downtime(first_downtime=kwargs.get('downtime_first'),
-                                 downtime_interval=kwargs.get('downtime_interval'),
-                                 downtime_length=kwargs.get('downtime_length'))
+        self.downtime = Downtime(first_downtime=sim_config.downtime_first,
+                                 downtime_interval=sim_config.downtime_interval,
+                                 downtime_length=sim_config.downtime_length)
 
         # Set scheduler type - either based on config or command-line args - defaults to 'default'
         if self.config['multitenant']:
             scheduler_type = 'multitenant'
         else:
-            scheduler_type = kwargs.get('scheduler', 'default')
+            scheduler_type = sim_config.scheduler
 
-        policy_type = kwargs.get('policy')
-        backfill_type = kwargs.get('backfill')
+        policy_type = sim_config.policy
+        backfill_type = sim_config.backfill
 
         self.scheduler = load_scheduler(scheduler_type)(
             config=self.config,
@@ -172,7 +193,7 @@ class Engine:
             resource_manager=self.resource_manager,
             jobs=jobs
         )
-        if kwargs.get('live'):
+        if sim_config.live:
             assert self.scheduler.policy != PolicyType.REPLAY, \
                 "Cannot replay from a live system. Choose a scheduling policy!"
         print(f"Using scheduler: {str(self.scheduler.__class__).split('.')[2]}"
@@ -181,10 +202,109 @@ class Engine:
 
         if self.simulate_network:
             available_nodes = self.resource_manager.available_nodes
-            self.network_model = NetworkModel(available_nodes=available_nodes, config=config, kwargs=kwargs)
+            self.network_model = NetworkModel(
+                available_nodes=available_nodes,
+                config=self.config,
+            )
         else:
             self.network_model = None
 
+    @staticmethod
+    def from_sim_config(sim_config: SimConfig):
+        if len(sim_config.system_configs) > 1:
+            raise ValueError("from_sim_config does not support multipartition simulations yet")
+        system_config_dict = sim_config.system_configs[0].get_legacy()
+        sim_config_args = sim_config.get_legacy_args()
+        sim_config_dict = sim_config.get_legacy_args_dict()
+        sim_config_dict['config'] = system_config_dict
+
+        if sim_config.seed:
+            random.seed(sim_config.seed)
+            np.random.seed(sim_config.seed + 1)
+
+        if sim_config.cooling:
+            cooling_model = ThermoFluidsModel(**system_config_dict)
+            cooling_model.initialize()
+            if sim_config.start:
+                cooling_model.weather = Weather(sim_config.start, config=system_config_dict)
+        else:
+            cooling_model = None
+
+        if sim_config.power_scope == 'node':
+            if sim_config.uncertainties:
+                power_manager = PowerManager(compute_node_power_validate_uncertainties, **system_config_dict)
+            else:
+                power_manager = PowerManager(compute_node_power_validate, **system_config_dict)
+        else:
+            if sim_config.uncertainties:
+                power_manager = PowerManager(compute_node_power_uncertainties, **system_config_dict)
+            else:
+                power_manager = PowerManager(compute_node_power, **system_config_dict)
+
+        flops_manager = FLOPSManager(
+            config=system_config_dict,
+            validate=(sim_config.power_scope == "node"),
+        )
+
+        if sim_config.live and not sim_config.replay:
+            td = Telemetry(**sim_config_dict)
+            jobs, timestep_start, timestep_end = \
+                td.load_jobs_times_args_from_live_system()
+        elif sim_config.replay:
+            td = Telemetry(**sim_config_dict)
+            jobs, timestep_start, timestep_end, args_from_file = td.load_jobs_times_args_from_files(
+                files=sim_config.replay, args=sim_config_args, config=system_config_dict,
+            )
+        else:  # Synthetic jobs
+            wl = Workload(sim_config_args, system_config_dict)
+            jobs = wl.generate_jobs()
+            timestep_start = 0
+            if hasattr(jobs[0], 'end_time'):
+                timestep_end = int(math.ceil(max([job.end_time for job in jobs])))
+            else:
+                timestep_end = 88200  # 24 hours
+
+            td = Telemetry(**sim_config_dict)
+
+        # TODO refactor how stat/end/fastforward/time work
+        if sim_config.fastforward is not None:
+            timestep_start = timestep_start + sim_config.fastforward
+
+        if sim_config.time is not None:
+            timestep_end = timestep_start + sim_config.time
+
+        if sim_config.time_delta is not None:
+            time_delta = sim_config.time_delta
+        else:
+            time_delta = 1
+
+        if sim_config.continuous_job_generation:
+            continuous_workload = wl
+        else:
+            continuous_workload = None
+
+        accounts = None
+        if sim_config.accounts:
+            job_accounts = Accounts(jobs)
+            if sim_config.accounts_json:
+                loaded_accounts = Accounts.from_json_filename(sim_config.accounts_json)
+                accounts = Accounts.merge(loaded_accounts, job_accounts)
+            else:
+                accounts = job_accounts
+
+        engine = Engine(
+            power_manager=power_manager,
+            flops_manager=flops_manager,
+            cooling_model=cooling_model,
+            continuous_workload=continuous_workload,
+            jobs=jobs,
+            accounts=accounts,
+            telemetry=td,
+            sim_config=sim_config,
+        )
+
+        return engine, jobs, timestep_start, timestep_end, time_delta
+
     def add_running_jobs_to_queue(self, jobs_to_submit: List):
         """
         Modifies jobs_to_submit and self.queue
diff --git a/raps/network.py b/raps/network.py
index 9c457d4..b4340e4 100644
--- a/raps/network.py
+++ b/raps/network.py
@@ -8,7 +8,7 @@ from pathlib import Path
 class NetworkModel:
     """ """
 
-    def __init__(self, *, available_nodes, config, **kwargs):
+    def __init__(self, *, available_nodes, config):
         self.topology = config.get("TOPOLOGY")
         # if fat-tree, build the graph once
         if self.topology == "fat-tree":
diff --git a/raps/plotting.py b/raps/plotting.py
index 61a8f96..606ac7a 100644
--- a/raps/plotting.py
+++ b/raps/plotting.py
@@ -14,7 +14,7 @@ Plotter
 """
 
 import itertools
-
+from pathlib import Path
 import matplotlib.pyplot as plt
 import matplotlib.ticker as ticker
 from matplotlib.ticker import MaxNLocator
@@ -95,7 +95,7 @@ class Plotter(BasePlotter):
         The path to save the plot.
     """
 
-    def __init__(self, xlabel='', ylabel='', title='', save_path='out.svg', uncertainties=False):
+    def __init__(self, xlabel='', ylabel='', title='', save_path: Path | str = 'out.svg', uncertainties=False):
         """
         Constructs all the necessary attributes for the Plotter object.
 
diff --git a/raps/sim_config.py b/raps/sim_config.py
index 165e6c8..ccc8e28 100644
--- a/raps/sim_config.py
+++ b/raps/sim_config.py
@@ -66,7 +66,7 @@ class SimConfig(BaseModel):
     uncertainties: bool = False
     """ Use float-with-uncertainties (much slower) """
 
-    seed: bool = False
+    seed: int | None = None
     """ Set RNG seed for deterministic simulation """
     output: ExpandedPath | None = None
     """ Output power, cooling, and loss models for later analysis. Argument specifies name. """
@@ -250,12 +250,18 @@ class SimConfig(BaseModel):
         if not self.replay and not self.workload:
             self.workload = "random"
 
+        if self.cooling:
+            self.layout = "layout2"
+
         if self.jobsize_is_power_of is not None and self.jobsize_is_of_degree is not None:
             raise ValueError("jobsize_is_power_of and jobsize_is_of_degree are mutually exclusive")
-        
+
         if self.plot and not self.output:
             raise ValueError("plot requires an output directory to be set")
 
+        if self.live and not self.replay and self.time is None:
+            raise ValueError("--time must be set, specifing how long we want to predict")
+
         return self
 
     @cached_property
diff --git a/raps/telemetry.py b/raps/telemetry.py
index c3cfbec..32287be 100644
--- a/raps/telemetry.py
+++ b/raps/telemetry.py
@@ -6,7 +6,6 @@ parsing parquet files, and generating job state information.
 The module defines a `Telemetry` class for managing telemetry data and several
 helper functions for data encryption and conversion between node name and index formats.
 """
-import re
 import sys
 import random
 import argparse
@@ -57,7 +56,7 @@ from raps.plotting import (
     plot_nodes_gantt,
     plot_network_histogram
 )
-from raps.utils import next_arrival_byconfargs, create_casename, convert_to_time_unit
+from raps.utils import next_arrival_byconfargs, convert_to_time_unit
 # from raps.sim_config import args, args_dict
 
 
@@ -242,6 +241,7 @@ class Telemetry:
         jobs = []
         trigger_custom_dataloader = False
         for i, file in enumerate(files):
+            file = str(file)
             file = os.path.normpath(file.lstrip('"').rstrip('"'))
             if hasattr(args, 'is_results_file') and args.is_results_file:
                 if file.endswith(".csv"):
-- 
GitLab


From 5ede8b8bb37af4644ed89704073c02dda9a2f915 Mon Sep 17 00:00:00 2001
From: Jesse Hines <hinesjr@ornl.gov>
Date: Tue, 26 Aug 2025 11:47:46 -0400
Subject: [PATCH 05/27] Update gitignore

---
 .gitignore | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.gitignore b/.gitignore
index 74a41d8..bf49923 100644
--- a/.gitignore
+++ b/.gitignore
@@ -5,3 +5,4 @@ venv
 *.npz
 *.prof
 simulation_results/
+models/*.fmu
-- 
GitLab


From cb7d2254e36501a70cb2d1c21eddad86e9eb29c5 Mon Sep 17 00:00:00 2001
From: Jesse Hines <hinesjr@ornl.gov>
Date: Tue, 26 Aug 2025 14:57:29 -0400
Subject: [PATCH 06/27] Select partition in Engine

---
 raps/engine.py | 20 ++++++++++++++++----
 1 file changed, 16 insertions(+), 4 deletions(-)

diff --git a/raps/engine.py b/raps/engine.py
index 667e904..ddeea5b 100644
--- a/raps/engine.py
+++ b/raps/engine.py
@@ -210,10 +210,22 @@ class Engine:
             self.network_model = None
 
     @staticmethod
-    def from_sim_config(sim_config: SimConfig):
-        if len(sim_config.system_configs) > 1:
-            raise ValueError("from_sim_config does not support multipartition simulations yet")
-        system_config_dict = sim_config.system_configs[0].get_legacy()
+    def from_sim_config(sim_config: SimConfig, partition: str | None = None):
+        if partition:
+            system_config_by_name = {s.system_name: s for s in sim_config.system_configs}
+            system_config = system_config_by_name.get(partition)
+            if not system_config:
+                raise ValueError(f"Partition {partition} isn't in SimConfig")
+        elif len(sim_config.system_configs) > 1:
+            raise ValueError(
+                "Engine can only run single-partition simulations. Use MultiPartEngine for " +
+                "multi-partition simulations, or pass partition to select the partition to run."
+            )
+        else:
+            system_config = sim_config.system_configs[0]
+
+        # Some temporary backwards/compatibility wrappers
+        system_config_dict = system_config.get_legacy()
         sim_config_args = sim_config.get_legacy_args()
         sim_config_dict = sim_config.get_legacy_args_dict()
         sim_config_dict['config'] = system_config_dict
-- 
GitLab


From 831dba5db9b9c7a84c31d64c379cf278a8e75c40 Mon Sep 17 00:00:00 2001
From: Jesse Hines <hinesjr@ornl.gov>
Date: Tue, 26 Aug 2025 16:37:12 -0400
Subject: [PATCH 07/27] Minor tweaks to telemetry.py

---
 raps/telemetry.py | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/raps/telemetry.py b/raps/telemetry.py
index 32287be..43268c7 100644
--- a/raps/telemetry.py
+++ b/raps/telemetry.py
@@ -9,9 +9,8 @@ helper functions for data encryption and conversion between node name and index
 import sys
 import random
 import argparse
-# import itertools
+from pathlib import Path
 # import json
-import os.path
 from typing import Optional
 from types import ModuleType
 
@@ -241,8 +240,7 @@ class Telemetry:
         jobs = []
         trigger_custom_dataloader = False
         for i, file in enumerate(files):
-            file = str(file)
-            file = os.path.normpath(file.lstrip('"').rstrip('"'))
+            file = str(Path(file).resolve())
             if hasattr(args, 'is_results_file') and args.is_results_file:
                 if file.endswith(".csv"):
                     jobs, timestep_start, timestep, _ = self.load_csv_results(file)
@@ -283,7 +281,6 @@ class Telemetry:
                 break
 
         if trigger_custom_dataloader:  # custom data loader
-            print(*args.replay)
             try:
                 jobs, timestep_start_from_data, timestep_end_from_data = self.load_data(args.replay)
             except AssertionError:
-- 
GitLab


From b118c4e684a7ccdf24c97a88d2b9610a7ae116bf Mon Sep 17 00:00:00 2001
From: Jesse Hines <hinesjr@ornl.gov>
Date: Wed, 27 Aug 2025 11:02:28 -0400
Subject: [PATCH 08/27] Fix formatting

---
 raps/engine.py | 23 +++++++++++------------
 1 file changed, 11 insertions(+), 12 deletions(-)

diff --git a/raps/engine.py b/raps/engine.py
index ddeea5b..cfaa4e0 100644
--- a/raps/engine.py
+++ b/raps/engine.py
@@ -125,18 +125,17 @@ class Engine:
     """Job scheduling simulation engine."""
 
     def __init__(self, *,
-        power_manager: PowerManager,
-        flops_manager: FLOPSManager,
-        telemetry: Telemetry,
-        cooling_model: ThermoFluidsModel | None = None,
-        jobs=None,
-        total_initial_jobs=0,
-        # Workload class to generate from for continuous generation
-        continuous_workload: Workload | None = None,
-        accounts=None,
-        sim_config: SimConfig,
-    ):
-        # TODO: multi-partition?
+                 power_manager: PowerManager,
+                 flops_manager: FLOPSManager,
+                 telemetry: Telemetry,
+                 cooling_model: ThermoFluidsModel | None = None,
+                 jobs=None,
+                 total_initial_jobs=0,
+                 # Workload class to generate from for continuous generation
+                 continuous_workload: Workload | None = None,
+                 accounts=None,
+                 sim_config: SimConfig,
+                 ):
         self.config = sim_config.system_configs[0].get_legacy()
         self.down_nodes = summarize_ranges(self.config['DOWN_NODES'])
         self.resource_manager = ResourceManager(
-- 
GitLab


From f1d687e8e21179469a2ed0638143b8e67279fa2e Mon Sep 17 00:00:00 2001
From: Jesse Hines <hinesjr@ornl.gov>
Date: Wed, 27 Aug 2025 12:57:48 -0400
Subject: [PATCH 09/27] More helper getters on sim_config

---
 raps/sim_config.py    | 21 +++++++++++++++------
 raps/system_config.py |  2 +-
 2 files changed, 16 insertions(+), 7 deletions(-)

diff --git a/raps/sim_config.py b/raps/sim_config.py
index ccc8e28..0498c2f 100644
--- a/raps/sim_config.py
+++ b/raps/sim_config.py
@@ -11,7 +11,7 @@ from raps.utils import (
     parse_time_unit, convert_to_time_unit, infer_time_unit, ExpandedPath,
     pydantic_add_args, yaml_dump, parse_td,
 )
-from raps.system_config import SystemConfig, get_partition_configs, get_system_config
+from raps.system_config import SystemConfig, get_partition_configs
 from pydantic import BaseModel, model_validator, computed_field
 from pydantic_settings import SettingsConfigDict
 
@@ -264,16 +264,25 @@ class SimConfig(BaseModel):
 
         return self
 
-    @cached_property
+    @property
+    def system_name(self) -> str:
+        """
+        Name of the system.
+        Note, this is different than system, as system can be a file or None if partition is set.
+        """
+        return self._multi_partition_system_config.system_name
+
+    @property
     def system_configs(self) -> list[SystemConfig]:
         """
         Return the SystemConfigs for the selected systems.
         Will be a single element array unless multiple `partitions` are selected.
         """
-        if self.partitions:
-            return get_partition_configs(self.partitions).partitions
-        else:
-            return [get_system_config(self.system)]
+        return self._multi_partition_system_config.partitions
+
+    @cached_property
+    def _multi_partition_system_config(self):
+        return get_partition_configs(self.partitions if self.partitions else [self.system])
 
     def get_legacy_args(self):
         """
diff --git a/raps/system_config.py b/raps/system_config.py
index 35f718c..642bb98 100644
--- a/raps/system_config.py
+++ b/raps/system_config.py
@@ -121,7 +121,7 @@ class SystemSchedulerConfig(BaseModel):
     trace_quanta: int
     min_wall_time: int
     max_wall_time: int
-    ui_update_freq: int
+    ui_update_freq: int  # TODO should be moved to raps_config
     max_nodes_per_job: int
     job_end_probs: dict[JobEndStates, float]
     multitenant: bool = False
-- 
GitLab


From 10d6279f70b786c87c831b6fe084ee489adff085 Mon Sep 17 00:00:00 2001
From: Jesse Hines <hinesjr@ornl.gov>
Date: Wed, 27 Aug 2025 14:29:53 -0400
Subject: [PATCH 10/27] More fixes to telemetry.py

---
 raps/telemetry.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/raps/telemetry.py b/raps/telemetry.py
index 43268c7..e135bbc 100644
--- a/raps/telemetry.py
+++ b/raps/telemetry.py
@@ -240,7 +240,7 @@ class Telemetry:
         jobs = []
         trigger_custom_dataloader = False
         for i, file in enumerate(files):
-            file = str(Path(file).resolve())
+            file = str(Path(file))
             if hasattr(args, 'is_results_file') and args.is_results_file:
                 if file.endswith(".csv"):
                     jobs, timestep_start, timestep, _ = self.load_csv_results(file)
-- 
GitLab


From cf04c0e1a4c97ef0c5a9c11b6f0ae79000a25518 Mon Sep 17 00:00:00 2001
From: Jesse Hines <hinesjr@ornl.gov>
Date: Wed, 27 Aug 2025 15:41:57 -0400
Subject: [PATCH 11/27] Fixes to Engine partition selection

---
 raps/engine.py | 28 +++++++++++++++++++++++++---
 1 file changed, 25 insertions(+), 3 deletions(-)

diff --git a/raps/engine.py b/raps/engine.py
index cfaa4e0..bcb60ed 100644
--- a/raps/engine.py
+++ b/raps/engine.py
@@ -40,6 +40,7 @@ from raps.account import Accounts
 from raps.downtime import Downtime
 from raps.weather import Weather
 from raps.sim_config import SimConfig
+from raps.system_config import SystemConfig
 
 
 @dataclasses.dataclass
@@ -135,8 +136,9 @@ class Engine:
                  continuous_workload: Workload | None = None,
                  accounts=None,
                  sim_config: SimConfig,
+                 system_config: SystemConfig,
                  ):
-        self.config = sim_config.system_configs[0].get_legacy()
+        self.config = system_config.get_legacy()
         self.down_nodes = summarize_ranges(self.config['DOWN_NODES'])
         self.resource_manager = ResourceManager(
             total_nodes=self.config['TOTAL_NODES'],
@@ -228,6 +230,8 @@ class Engine:
         sim_config_args = sim_config.get_legacy_args()
         sim_config_dict = sim_config.get_legacy_args_dict()
         sim_config_dict['config'] = system_config_dict
+        if partition:
+            sim_config_dict["system"] = sim_config.system_name
 
         if sim_config.seed:
             random.seed(sim_config.seed)
@@ -262,9 +266,26 @@ class Engine:
             jobs, timestep_start, timestep_end = \
                 td.load_jobs_times_args_from_live_system()
         elif sim_config.replay:
-            td = Telemetry(**sim_config_dict)
+            # TODO: this will have issues if running separate systems or custom systems
+            partition_short = partition.split("/")[-1] if partition else None
+            td = Telemetry(
+                **sim_config_dict,
+                partition=partition,
+            )
+            if partition:
+                snap_map = {p.stem: p for p in sim_config.replay[0].glob("*.npz")}
+                if len(snap_map) > 0:
+                    if partition_short not in snap_map:
+                        raise RuntimeError(f"Snapshot '{partition_short}.npz' not in {sim_config.replay[0]}")
+                    replay_files = snap_map[partition_short]
+                else:
+                    replay_files = sim_config.replay
+            else:
+                replay_files = sim_config.replay
+
             jobs, timestep_start, timestep_end, args_from_file = td.load_jobs_times_args_from_files(
-                files=sim_config.replay, args=sim_config_args, config=system_config_dict,
+                files=replay_files,
+                args=sim_config_args, config=system_config_dict,
             )
         else:  # Synthetic jobs
             wl = Workload(sim_config_args, system_config_dict)
@@ -312,6 +333,7 @@ class Engine:
             accounts=accounts,
             telemetry=td,
             sim_config=sim_config,
+            system_config=system_config,
         )
 
         return engine, jobs, timestep_start, timestep_end, time_delta
-- 
GitLab


From a13a364c4f472db585d75007b5e914ec5899a883 Mon Sep 17 00:00:00 2001
From: Jesse Hines <hinesjr@ornl.gov>
Date: Wed, 27 Aug 2025 16:29:04 -0400
Subject: [PATCH 12/27] Factor multi-partition-sim into reusable class

---
 multi-part-sim-mpi.py     | 170 ---------------------------
 multi-part-sim.py         | 238 +++++++++++---------------------------
 raps/multi_part_engine.py |  54 +++++++++
 raps/ui.py                |   4 -
 4 files changed, 122 insertions(+), 344 deletions(-)
 delete mode 100644 multi-part-sim-mpi.py
 create mode 100644 raps/multi_part_engine.py

diff --git a/multi-part-sim-mpi.py b/multi-part-sim-mpi.py
deleted file mode 100644
index eabb19b..0000000
--- a/multi-part-sim-mpi.py
+++ /dev/null
@@ -1,170 +0,0 @@
-"""
-MPI-enabled driver for simulating multi-partition RAPS systems.
-Distributes partitions across ranks with mpi4py for parallel run.
-Supports telemetry replay or synthetic workloads with per-rank
-power, FLOPS, and scheduling models. Outputs debug and summary
-stats for heterogeneous systems (e.g., LUMI, Setonix, Adastra).
-"""
-
-from tqdm import tqdm
-from mpi4py import MPI
-from raps.utils import next_arrival
-from raps.workload import Workload
-from raps.telemetry import Telemetry
-from raps.power import PowerManager, compute_node_power
-from raps.flops import FLOPSManager
-from raps.engine import Engine
-from raps.ui import LayoutManager
-from raps.system_config import get_partition_configs
-from raps.sim_config import args
-import random
-from raps.helpers import check_python_version
-check_python_version()
-
-
-def main():
-    comm = MPI.COMM_WORLD
-    rank = comm.Get_rank()
-    size = comm.Get_size()
-
-    # 3) Load configs for every partition (all ranks do this)
-    multi_config = get_partition_configs(args.partitions)
-    partition_names = multi_config.partition_names
-    configs = [c.get_legacy() for c in multi_config.partitions]
-    args_dicts = [{**vars(args), 'config': cfg} for cfg in configs]
-
-    # 4) Each rank decides which partition‐indices it owns (round-robin):
-    local_partition_indices = [i for i in range(len(partition_names)) if (i % size) == rank]
-    local_partition_names = [partition_names[i] for i in local_partition_indices]
-    # local_configs = [configs[i] for i in local_partition_indices]   # Unused
-    # local_args_dicts = [args_dicts[i] for i in local_partition_indices]  # Unused
-
-    # 5) Rank 0 builds (or loads) the entire job list, assigns partitions, groups by partition,
-    #    then scatters exactly those jobs to each rank. Other ranks just sit in the scatter:
-    if rank == 0:
-        # --- a) “REPLAY” branch?
-        if args.replay:
-            td = Telemetry(**args_dicts[0])
-            print(f"[rank 0] Loading telemetry from {args.replay[0]}…")
-            jobs_full = td.load_snapshot(args.replay[0])
-            available_nodes = [c['AVAILABLE_NODES'] for c in configs]
-            for job in jobs_full:
-                job['partition'] = random.choices(partition_names, weights=available_nodes, k=1)[0]
-            if args.scale:
-                for job in tqdm(jobs_full, desc="[rank 0] Scaling jobs…"):
-                    job['nodes_required'] = random.randint(1, args.scale)
-                    job['requested_nodes'] = None
-            if args.arrival == 'poisson':
-                for job in tqdm(jobs_full, desc="[rank 0] Rescheduling arrivals…"):
-                    p_name = job['partition']
-                    p_cfg = configs[partition_names.index(p_name)]
-                    job['requested_nodes'] = None
-                    job['submit_time'] = next_arrival(1 / p_cfg['JOB_ARRIVAL_TIME'])
-
-        # --- b) “SYNTHETIC WORKLOAD” branch:
-        else:
-            wl = Workload(*configs)
-            jobs_full = getattr(wl, args.workload)(num_jobs=args.numjobs)
-            available_nodes = [c['AVAILABLE_NODES'] for c in configs]
-            for job in jobs_full:
-                job['partition'] = random.choices(partition_names, weights=available_nodes, k=1)[0]
-
-        # --- c) Group “jobs_full” by partition name:
-        jobs_by_partition = {p: [] for p in partition_names}
-        for job in jobs_full:
-            jobs_by_partition[job['partition']].append(job)
-
-        # --- d) Build a list-of-lists, one list per rank, containing the union
-        #     of all jobs for that rank’s partitions:
-        jobs_for_rank = [[] for _ in range(size)]
-        for p_idx, p_name in enumerate(partition_names):
-            tgt = p_idx % size
-            jobs_for_rank[tgt].extend(jobs_by_partition[p_name])
-    else:
-        jobs_for_rank = None
-
-    # 6) Scatter the per-rank job lists:
-    local_jobs = comm.scatter(jobs_for_rank, root=0)
-
-    # 7) Re‐group each rank’s “local_jobs” into a dict keyed by its local_partition_names:
-    local_jobs_by_partition = {p: [] for p in local_partition_names}
-    for job in local_jobs:
-        local_jobs_by_partition[job['partition']].append(job)
-
-    # 8) Build one LayoutManager (and Engine/PowerManager/FLOPSManager) per local partition:
-    layout_managers = {}
-    for idx, p_name in enumerate(local_partition_names):
-        global_idx = local_partition_indices[idx]
-        cfg = configs[global_idx]
-        ad = args_dicts[global_idx]
-
-        pm = PowerManager(compute_node_power, **cfg)
-        fm = FLOPSManager(**ad)
-        sc = Engine(power_manager=pm, flops_manager=fm,
-                    cooling_model=None, **ad)
-
-        layout_managers[p_name] = LayoutManager(args.layout,
-                                                engine=sc,
-                                                debug=args.debug,
-                                                **cfg)
-
-    # 9) Compute timestep_start / timestep_end (all ranks agree):
-    if args.fastforward:
-        fastforward = args.fastforward
-    else:
-        fastforward = 0
-
-    if args.time:
-        timesteps = args.time
-    else:
-        timesteps = 88200   # default 24 hours
-
-    timestep_start = fastforward
-    timestep_end = timestep_start + timesteps
-
-    # 10) Build a generator for each partition that this rank owns:
-    local_generators = {}
-    for p_name in local_partition_names:
-        gen = layout_managers[p_name].run_stepwise(
-            local_jobs_by_partition[p_name],
-            timestep_start=timestep_start,
-            timestep_end=timestep_end
-        )
-        local_generators[p_name] = gen
-
-    # 11) Main simulation loop (every rank steps its own partitions in lockstep):
-    UIF = configs[0]['UI_UPDATE_FREQ']  # assume same for all configs
-    for t in range(timesteps):
-        # --- a) Advance each local partition’s generator
-        for gen in local_generators.values():
-            try:
-                next(gen)
-            except StopIteration:
-                pass
-
-        # --- b) Every UI_UPDATE_FREQ, do per-rank prints + one global reduction
-        if (t % UIF) == 0:
-            # 1) sum our local sys_power
-            local_sys_power = sum(lm.engine.sys_power for lm in layout_managers.values())
-
-            # 2) print *our* partition‐level info now (so rank 0 and rank 1 will both print):
-            for p_name, lm in layout_managers.items():
-                sys_util = lm.engine.sys_util_history[-1] if lm.engine.sys_util_history else 0.0
-                print(f"[DEBUG][rank {rank}] {p_name} – Timestep {t} – "
-                      f"Jobs running: {len(lm.engine.running)} – "
-                      f"Utilization: {sys_util[1]:.2f}% – "
-                      f"Power: {lm.engine.sys_power:.1f}kW")
-
-            # 3) do an MPI reduce so that rank 0 knows the total across all ranks:
-            total_sys_power = comm.reduce(local_sys_power, op=MPI.SUM, root=0)
-            if rank == 0:
-                print(f"[DEBUG][rank {rank}] TOTAL system power (all partitions): {total_sys_power:.1f}kW")
-
-    # 12) Final barrier + exit message on rank 0
-    comm.Barrier()
-    if rank == 0:
-        print("Simulation complete (all ranks).")
-
-
-if __name__ == "__main__":
-    main()
diff --git a/multi-part-sim.py b/multi-part-sim.py
index 587dffb..8943484 100644
--- a/multi-part-sim.py
+++ b/multi-part-sim.py
@@ -7,179 +7,77 @@ lockstep.  Outputs per-partition performance, utilization, and energy
 statistics for systems such as MIT Supercloud, Setonix, Adastra, and LUMI.
 """
 
-from tqdm import tqdm
 from raps.stats import get_engine_stats, get_job_stats, get_scheduler_stats, get_network_stats
-from raps.utils import next_arrival
-from raps.workload import Workload
-from raps.telemetry import Telemetry
-from raps.power import PowerManager, compute_node_power
-from raps.flops import FLOPSManager
-from raps.engine import Engine
-from raps.ui import LayoutManager
-from raps.system_config import get_partition_configs
-from raps.sim_config import args
-import random
-import os
+from raps.multi_part_engine import MultiPartEngine
+from raps.sim_config import sim_config
 from raps.helpers import check_python_version
 check_python_version()
 
 
-# Load configurations for each partition
-multi_config = get_partition_configs(args.partitions)
-partition_names = multi_config.partition_names
-configs = [c.get_legacy() for c in multi_config.partitions]
-args.system = multi_config.system_name
-
-args_dicts = [
-    {**vars(args), 'config': config, 'partition': partition_names[i]}
-    for i, config in enumerate(configs)
-]
-
-# Initialize Workload
-if args.replay:
-
-    jobs_by_partition = {}
-    t0_by_partition = {}
-    t1_by_partition = {}
-
-    if args.replay[0].endswith('.npz'):
-        # snapshot mode: pick the right .npz for each partition
-        snap_map = {os.path.basename(p): p for p in args.replay}
-        for ad in args_dicts:
-            part = ad['partition']                        # e.g. 'mit_supercloud/part-cpu'
-            short = part.split('/')[-1]                   # 'part-cpu'
-            snap_file = f"{short}.npz"
-            if snap_file not in snap_map:
-                raise RuntimeError(f"Snapshot '{snap_file}' not in {args.replay}")
-            td = Telemetry(**ad)
-            print(f"[{part}] loading snapshot {snap_file} …")
-            jobs_part, t0, t1, args_from_file = td.load_snapshot(snap_map[snap_file])
-            jobs_by_partition[part] = jobs_part
-    else:
-        # raw load_data mode
-        for ad in args_dicts:
-            part = ad['partition']
-            td = Telemetry(**ad)
-            print(f"\n[{part}] loading traces from {args.replay[0]} …")
-            jobs_part, t0, t1 = td.load_data(args.replay)
-            jobs_by_partition[part] = jobs_part
-            # td.save_snapshot(jobs_part, t0, t1, args_from_file, filename=part.split('/')[-1])
-            # Check if args need to be extracted or merged! Not implemented yet!
-            td.save_snapshot(jobs=jobs_part, timestep_start=t0, timestep_end=t1,
-                             filename=part.split('/')[-1], args=args)
-
-    # --- report how many jobs per partition ---
-    for part, jl in jobs_by_partition.items():
-        print(f"[INFO] Partition '{part}': {len(jl)} jobs loaded")
-
-    # now flatten into a single job list (or keep separate for your engine)
-    all_jobs_flat = []
-    for part in partition_names:
-        for job in jobs_by_partition[part]:
-            job.partition = part
-            all_jobs_flat.append(job)
-
-    total_initial_jobs = len(all_jobs_flat)
-    jobs = all_jobs_flat
-
-    if args.scale:
-        for job in tqdm(jobs, desc=f"Scaling jobs to {args.scale} nodes"):
-            job.nodes_required = random.randint(1, args.scale)
-
-    if args.arrival == 'poisson':
-        for job in tqdm(jobs, desc="Adjusting job submission time"):
-            partition = job.partition
-            partition_config = configs[partition_names.index(partition)]
-            job.submit_time = next_arrival(1 / partition_config['JOB_ARRIVAL_TIME'])
-
-else:  # Synthetic workload
-    wl = Workload(args, *configs)
-
-    total_initial_jobs = args.numjobs
-
-    # Generate jobs based on workload type
-    jobs = getattr(wl, args.workload)(args=args)
-
-# Group jobs by partition
-jobs_by_partition = {partition: [] for partition in partition_names}
-for job in jobs:
-    jobs_by_partition[job.partition].append(job)
-
-# Initialize layout managers for each partition
-layout_managers = {}
-for i, (config, ad) in enumerate(zip(configs, args_dicts)):
-    pm = PowerManager(compute_node_power, **configs[i])
-    fm = FLOPSManager(**args_dicts[i])
-    sc = Engine(power_manager=pm, flops_manager=fm, cooling_model=None,
-                jobs=jobs_by_partition[config['system_name']], total_initial_jobs=total_initial_jobs, **args_dicts[i])
-    layout_managers[config['system_name']] = LayoutManager(
-        args.layout, engine=sc, debug=args.debug, args_dict=ad, **config)
-
-# Set simulation timesteps
-if args.fastforward:
-    fastfoward = args.fastforward
-else:
-    fastforward = 0
-if args.time:
-    timesteps = args.time
-else:
-    timesteps = 88200  # Default to 24 hours
-
-timestep_start = fastforward
-timestep_end = timestep_start + timesteps
-
-if args.time_delta:
-    time_delta = args.time_delta
-else:
-    time_delta = config['TRACE_QUANTA']
-
-# Create generators for each layout manager
-generators = {name: lm.run_stepwise(jobs_by_partition[name],
-                                    timestep_start=timestep_start,
-                                    timestep_end=timestep_end,
-                                    time_delta=time_delta)
-              for name, lm in layout_managers.items()}
-
-# Step through all generators in lockstep
-for timestep in range(timesteps):
-    for name, gen in generators.items():
-        next(gen)  # Advance each generator
-
-    # Print debug info every UI_UPDATE_FREQ
-    if timestep % configs[0]['UI_UPDATE_FREQ'] == 0:  # Assuming same frequency for all partitions
-        sys_power = 0
-        for name, lm in layout_managers.items():
-            sys_util = lm.engine.sys_util_history[-1] if lm.engine.sys_util_history else (0, 0.0)
-            if hasattr(lm.engine.resource_manager, 'allocated_cpu_cores'):
-                allocated_cores = lm.engine.resource_manager.allocated_cpu_cores
-                print(f"[DEBUG] {name} - Timestep {timestep} - Jobs running: {len(lm.engine.running)} -",
-                      f"Utilization: {sys_util[1]:.2f}% - Allocated Cores: {allocated_cores} - ",
-                      f"Power: {lm.engine.sys_power:.1f}kW", flush=True)
-            sys_power += lm.engine.sys_power
-        print(f"system power: {sys_power:.1f}kW", flush=True)
-
-print("Simulation complete.", flush=True)
-
-# Print statistics for each partition
-for name, lm in layout_managers.items():
-    print(f"\n=== Partition: {name} ===")
-
-    engine_stats = get_engine_stats(lm.engine)
-    job_stats = get_job_stats(lm.engine)
-    scheduler_stats = get_scheduler_stats(lm.engine)
-    if args.simulate_network:
-        network_stats = get_network_stats(lm.engine)
-
-    # Print a formatted report
-    print("\n--- Simulation Report ---")
-    for key, value in engine_stats.items():
-        print(f"{key.replace('_', ' ').title()}: {value}")
-    print("-------------------------\n")
-    print("\n--- Job Stat Report ---")
-    for key, value in job_stats.items():
-        print(f"{key.replace('_', ' ').title()}: {value}")
+def print_report(name: str, report: dict):
+    print(f"--- {name} ---")
+    for key, value in report.items():
+        print(f"{str(key).replace('_', ' ').title()}: {value}")
     print("-------------------------\n")
-    print("\n--- Scheduler Report ---")
-    for key, value in scheduler_stats.items():
-        print(f"{key.replace('_', ' ').title()}: {value}")
-    print("-------------------------")
+
+
+def main():
+    multi_engine, jobs, timestep_start, timestep_end, time_delta = MultiPartEngine.from_sim_config(sim_config)
+
+    timestep_end = timestep_end - timestep_start
+    timestep_start = 0
+
+    if sim_config.output:
+        for part, engine in multi_engine.engines.items():
+            engine.telemetry.save_snapshot(
+                jobs=jobs[part],
+                timestep_start=timestep_start, timestep_end=timestep_end,
+                filename=part.split('/')[-1],
+                args=sim_config.get_legacy_args(),
+            )
+
+    ui_update_freq = sim_config.system_configs[0].scheduler.ui_update_freq
+    gen = multi_engine.run_simulation(jobs, timestep_start, timestep_end, time_delta)
+
+    for tick_datas in gen:
+        sys_power = 0
+        tick_datas = {k: v for k, v in tick_datas.items() if v}  # Filter nones
+        timestep = list(tick_datas.values())[0].current_timestep if tick_datas else None
+
+        if timestep and timestep % ui_update_freq == 0:
+            for part, tick_data in tick_datas.items():
+                engine = multi_engine.engines[part]
+
+                sys_util = engine.sys_util_history[-1] if engine.sys_util_history else (0, 0.0)
+                if hasattr(engine.resource_manager, 'allocated_cpu_cores'):
+                    allocated_cores = engine.resource_manager.allocated_cpu_cores
+                    print(
+                        f"[DEBUG] {part} - Timestep {timestep} - Jobs running: {len(engine.running)} -",
+                        f"Utilization: {sys_util[1]:.2f}% - Allocated Cores: {allocated_cores} - ",
+                        f"Power: {engine.sys_power:.1f}kW",
+                        flush=True,
+                    )
+                sys_power += engine.sys_power
+            print(f"system power: {sys_power:.1f}kW", flush=True)
+
+    print("Simulation complete.", flush=True)
+
+    # Print statistics for each partition
+    for part, engine in multi_engine.engines.items():
+        print(f"\n=== Partition: {part} ===")
+
+        engine_stats = get_engine_stats(engine)
+        job_stats = get_job_stats(engine)
+        scheduler_stats = get_scheduler_stats(engine)
+        network_stats = get_network_stats(engine) if sim_config.simulate_network else {}
+
+        # Print a formatted report
+        print_report("Simulation Report", engine_stats)
+        print_report("Job Stat Report", job_stats)
+        print_report("Scheduler Report", scheduler_stats)
+        if network_stats:
+            print("Network Report", network_stats)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/raps/multi_part_engine.py b/raps/multi_part_engine.py
new file mode 100644
index 0000000..461425b
--- /dev/null
+++ b/raps/multi_part_engine.py
@@ -0,0 +1,54 @@
+from collections.abc import Iterable
+from raps.engine import Engine, TickData
+from raps.sim_config import SimConfig
+
+
+class MultiPartEngine:
+    def __init__(self, engines: dict[str, Engine], jobs: dict[str, list]):
+        self.partition_names = sorted(engines.keys())
+        self.engines = engines
+        self.jobs = jobs
+
+    @staticmethod
+    def from_sim_config(sim_config: SimConfig):
+        if sim_config.replay:
+            root_systems = set(s.system_name.split("/")[0] for s in sim_config.system_configs)
+            # TODO should consider how to pass separate replay values for separate systems
+            if len(root_systems) > 1:
+                raise ValueError("Replay for multi-system runs is not supported")
+
+        jobs_by_partition = {}
+        engines: dict[str, Engine] = {}
+
+        timestep_start, timestep_end, time_delta = 0, 0, 0
+        for partition in sim_config.system_configs:
+            name = partition.system_name
+            engine, jobs, timestep_start, timestep_end, time_delta = Engine.from_sim_config(
+                sim_config, partition=name,
+            )
+            for job in jobs:
+                job.partition = name
+            jobs_by_partition[name] = jobs
+            engines[name] = engine
+        total_initial_jobs = sum(len(j) for j in jobs_by_partition.values())
+        for engine in engines.values():
+            engine.total_initial_jobs = total_initial_jobs
+
+        multi_engine = MultiPartEngine(
+            engines=engines,
+            jobs=jobs_by_partition,
+        )
+
+        return multi_engine, jobs_by_partition, timestep_start, timestep_end, time_delta
+
+    def run_simulation(self, jobs: dict, timestep_start, timestep_end, time_delta=1
+                       ) -> Iterable[dict[str, TickData | None]]:
+        generators = []
+        for part in self.partition_names:
+            generators.append(self.engines[part].run_simulation(
+                jobs[part], timestep_start, timestep_end, time_delta,
+            ))
+        for tick_datas in zip(*generators, strict=True):
+            yield dict(zip(self.partition_names, tick_datas))
+
+        # TODO need to add a mode to run the partitions in parallel
diff --git a/raps/ui.py b/raps/ui.py
index d9c3bbe..5be3523 100644
--- a/raps/ui.py
+++ b/raps/ui.py
@@ -576,7 +576,3 @@ class LayoutManager:
                         self.update_progress_bar(1)
         finally:
             os.system("stty sane")
-
-    def run_stepwise(self, jobs, timestep_start, timestep_end, time_delta):
-        """ Prepares the UI and returns a generator for the simulation """
-        return self.engine.run_simulation(jobs, timestep_start, timestep_end, time_delta)
-- 
GitLab


From 4e12f976a2284e80f90cd2ac6a95d409c1a4f4c3 Mon Sep 17 00:00:00 2001
From: Jesse Hines <hinesjr@ornl.gov>
Date: Wed, 27 Aug 2025 16:31:49 -0400
Subject: [PATCH 13/27] Update .flake8

---
 .flake8 | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.flake8 b/.flake8
index ce4ab0a..ffffb5c 100644
--- a/.flake8
+++ b/.flake8
@@ -1,3 +1,3 @@
 [flake8]
-exclude = .git, __pycache__, venv*, simulation_results, third_party, models
+exclude = .git, __pycache__, venv*, simulation_results, third_party, models, .venv
 max-line-length = 120
-- 
GitLab


From e4e09123d4be4944ee2f4201cfd1bc5c135ebd0d Mon Sep 17 00:00:00 2001
From: Jesse Hines <hinesjr@ornl.gov>
Date: Wed, 27 Aug 2025 16:39:11 -0400
Subject: [PATCH 14/27] Remove default export of args and args_dict

---
 raps/downtime.py   |  7 ++++---
 raps/engine.py     |  4 +++-
 raps/sim_config.py |  2 --
 raps/telemetry.py  |  2 --
 raps/workload.py   | 22 ++++++++++++++--------
 5 files changed, 21 insertions(+), 16 deletions(-)

diff --git a/raps/downtime.py b/raps/downtime.py
index 97c9139..ae8b82d 100644
--- a/raps/downtime.py
+++ b/raps/downtime.py
@@ -1,7 +1,6 @@
 from __future__ import annotations
 from typing import TYPE_CHECKING
 from raps.job import JobState
-from raps.sim_config import args, sim_config
 import numpy as np
 
 
@@ -15,6 +14,7 @@ class Downtime:
                  first_downtime,
                  downtime_interval,
                  downtime_length,
+                 debug=False
                  ):
         self.skip = False
         if downtime_length == 0 or downtime_interval == 0 or \
@@ -25,6 +25,7 @@ class Downtime:
         self.start: int = first_downtime
         self.end: int = 0
         self.down: bool = False
+        self.debug = debug
 
     def check_and_trigger(self, *,
                           timestep: int,
@@ -46,7 +47,7 @@ class Downtime:
     def simulate_down(self, *,
                       engine: Engine
                       ):
-        if args.debug:
+        if self.debug:
             print("Simulated downtime: before downtime start")
             print(f"Running: {len(engine.running)}, queued: {len(engine.queue)}")
 
@@ -66,7 +67,7 @@ class Downtime:
 
         engine.queue += engine.running
         engine.running = []
-        if args.debug:
+        if self.debug:
             print("Simulated downtime: after downtime start")
             print(f"Running: {len(engine.running)}, queued: {len(engine.queue)}")
         self.down = True
diff --git a/raps/engine.py b/raps/engine.py
index bcb60ed..f6da02c 100644
--- a/raps/engine.py
+++ b/raps/engine.py
@@ -176,7 +176,9 @@ class Engine:
         self.node_occupancy_history = []
         self.downtime = Downtime(first_downtime=sim_config.downtime_first,
                                  downtime_interval=sim_config.downtime_interval,
-                                 downtime_length=sim_config.downtime_length)
+                                 downtime_length=sim_config.downtime_length,
+                                 debug=sim_config.debug,
+                                 )
 
         # Set scheduler type - either based on config or command-line args - defaults to 'default'
         if self.config['multitenant']:
diff --git a/raps/sim_config.py b/raps/sim_config.py
index 0498c2f..9237b92 100644
--- a/raps/sim_config.py
+++ b/raps/sim_config.py
@@ -358,8 +358,6 @@ def parse_args(cli_args=None) -> SimConfig:
 
 
 sim_config = parse_args()
-args = sim_config.get_legacy_args()
-args_dict = sim_config.get_legacy_args_dict()
 
 if __name__ == "__main__":
     print(yaml_dump(sim_config.model_dump(mode="json")))
diff --git a/raps/telemetry.py b/raps/telemetry.py
index e135bbc..fd271a2 100644
--- a/raps/telemetry.py
+++ b/raps/telemetry.py
@@ -16,7 +16,6 @@ from types import ModuleType
 
 
 if __name__ == "__main__":
-    # from raps.sim_config import args, args_dict
     parser = argparse.ArgumentParser(description='Telemetry data validator')
     parser.add_argument('--jid', type=str, default='*', help='Replay job id')
     parser.add_argument('-f', '--replay', nargs='+', type=str,
@@ -56,7 +55,6 @@ from raps.plotting import (
     plot_network_histogram
 )
 from raps.utils import next_arrival_byconfargs, convert_to_time_unit
-# from raps.sim_config import args, args_dict
 
 
 class Telemetry:
diff --git a/raps/workload.py b/raps/workload.py
index 151e2c3..be0b639 100644
--- a/raps/workload.py
+++ b/raps/workload.py
@@ -800,17 +800,23 @@ def plot_job_hist(jobs, config=None, dist_split=None, gantt_nodes=False):
 
 
 def run_workload():
-    from raps.sim_config import args, args_dict
-    from raps.system_config import get_system_config
-    config = get_system_config(args.system).get_legacy()
-    if args.replay:
+    from raps.sim_config import sim_config
+    args = sim_config.get_legacy_args()
+    args_dict = sim_config.get_legacy_args()
+    config = sim_config.system_configs[0].get_legacy()
+
+    if sim_config.replay:
         td = Telemetry(**args_dict)
-        jobs, _, _, _ = td.load_jobs_times_args_from_files(files=args.replay, args=args, config=config)
+        jobs, _, _, _ = td.load_jobs_times_args_from_files(files=sim_config.replay, args=args, config=config)
     else:
         workload = Workload(args, config)
-        jobs = getattr(workload, args.workload)(args=args)
-    plot_job_hist(jobs, config=config, dist_split=args.multimodal, gantt_nodes=args.gantt_nodes)
-    if args.output:
+        jobs = getattr(workload, sim_config.workload)(args=sim_config.get_legacy_args)
+    plot_job_hist(jobs,
+                  config=config,
+                  dist_split=sim_config.multimodal,
+                  gantt_nodes=sim_config.gantt_nodes)
+
+    if sim_config.output:
         timestep_start = min([x.submit_time for x in jobs])
         timestep_end = math.ceil(max([x.submit_time for x in jobs]) + max([x.expected_run_time for x in jobs]))
         filename = create_file_indexed('wl', create=False, ending="npz").split(".npz")[0]
-- 
GitLab


From 525bc49cb3635e0ea653185c70c980818200c58c Mon Sep 17 00:00:00 2001
From: Jesse Hines <hinesjr@ornl.gov>
Date: Wed, 27 Aug 2025 18:37:17 -0400
Subject: [PATCH 15/27] Combine entrypoint scripts into one with subcommands

---
 main.py            | 305 +++++++++++++++++++++------------------------
 multi-part-sim.py  |  83 ------------
 raps/run_sim.py    | 239 +++++++++++++++++++++++++++++++++++
 raps/sim_config.py |  65 +---------
 raps/workload.py   |   7 +-
 5 files changed, 385 insertions(+), 314 deletions(-)
 delete mode 100644 multi-part-sim.py
 create mode 100644 raps/run_sim.py

diff --git a/main.py b/main.py
index 97487e3..00a5cb5 100644
--- a/main.py
+++ b/main.py
@@ -1,178 +1,153 @@
 """
-Main driver for simulating the RAPS single-partition (homogeneous)
-system in the ExaDigiT digital twin. Supports synthetic workload
-generation or telemetry replay, dynamic power modeling (including
-conversion losses), and optional coupling to a thermo-fluids cooling
-model. Produces performance, utilization, and energy metrics, with
-optional plots and output files for analysis and validation.
+ExaDigiT Resource Allocator & Power Simulator (RAPS)
 """
-import json
-import pandas as pd
+import yaml
+import argparse
+import sys
+from pathlib import Path
 from raps.helpers import check_python_version
-from raps.ui import LayoutManager
-from raps.plotting import Plotter
-from raps.engine import Engine
-from raps.utils import write_dict_to_file
-from raps.stats import (
-    get_engine_stats,
-    get_job_stats,
-    get_scheduler_stats,
-    get_network_stats,
-    print_formatted_report
-)
-
-from raps.sim_config import sim_config
+from raps.sim_config import SimConfig
+from raps.run_sim import run_sim, run_multi_part_sim
+from raps.workload import run_workload
+from raps.utils import pydantic_add_args, yaml_dump
+from pydantic_settings import SettingsConfigDict
 
 check_python_version()
 
 
-def main():
-    if sim_config.verbose or sim_config.debug:
-        print(f"SimConfig: {sim_config.model_dump_json(indent=4)}")
-
-    engine, jobs, timestep_start, timestep_end, time_delta = Engine.from_sim_config(sim_config)
-
-    out = sim_config.output
-    if out:
-        out.mkdir(parents=True)
-        engine.telemetry.save_snapshot(
-            jobs=jobs,
-            timestep_start=timestep_start,
-            timestep_end=timestep_end,
-            args=sim_config.get_legacy_args(), filename=str(out),
-        )
-
-    total_timesteps = timestep_end - timestep_start
-
-    downscale = sim_config.downscale
-    downscale_str = ""if downscale == 1 else f"/{downscale}"
-    print(f"Simulating {len(jobs)} jobs for {total_timesteps}{downscale_str}"
-          f" seconds from {timestep_start} to {timestep_end}.")
-    print(f"Simulation time delta: {time_delta}{downscale_str} s,"
-          f"Telemetry trace quanta: {jobs[0].trace_quanta}{downscale_str} s.")
-    layout_manager = LayoutManager(
-        sim_config.layout, engine=engine,
-        debug=sim_config.debug, total_timesteps=total_timesteps,
-        args_dict=sim_config.get_legacy_args_dict(), **sim_config.system_configs[0].get_legacy(),
-    )
-    layout_manager.run(
-        jobs,
-        timestep_start=timestep_start, timestep_end=timestep_end, time_delta=time_delta,
-    )
-
-    engine_stats = get_engine_stats(engine)
-    job_stats = get_job_stats(engine)
-    scheduler_stats = get_scheduler_stats(engine)
-    if engine.simulate_network:
-        network_stats = get_network_stats(engine)
+def read_sim_yaml(config_file: str):
+    if config_file == "-":
+        return yaml.safe_load(sys.stdin.read())
+    elif config_file:
+        return yaml.safe_load(Path(config_file).read_text())
     else:
-        network_stats = None
+        return {}
 
-    print_formatted_report(
-        engine_stats=engine_stats,
-        job_stats=job_stats,
-        scheduler_stats=scheduler_stats,
-        network_stats=network_stats,
-    )
 
-    if downscale_str:
-        downscale_str = "1" + downscale_str
-
-    if sim_config.plot:
-        assert out  # SimConfig validation should check this
-        if 'power' in sim_config.plot:
-            pl = Plotter(f"Time ({downscale_str}s)", 'Power (kW)', 'Power History',
-                         out / f'power.{sim_config.imtype}',
-                         uncertainties=sim_config.uncertainties)
-            x, y = zip(*engine.power_manager.history)
-            pl.plot_history(x, y)
-
-        if 'util' in sim_config.plot:
-            pl = Plotter(f"Time ({downscale_str}s)", 'System Utilization (%)',
-                         'System Utilization History', out / f'util.{sim_config.imtype}')
-            x, y = zip(*engine.sys_util_history)
-            pl.plot_history(x, y)
-
-        if 'loss' in sim_config.plot:
-            pl = Plotter(f"Time ({downscale_str}s)", 'Power Losses (kW)', 'Power Loss History',
-                         out / f'loss.{sim_config.imtype}',
-                         uncertainties=sim_config.uncertainties)
-            x, y = zip(*engine.power_manager.loss_history)
-            pl.plot_history(x, y)
-
-            pl = Plotter(f"Time ({downscale_str}s)", 'Power Losses (%)', 'Power Loss History',
-                         out / f'loss_pct.{sim_config.imtype}',
-                         uncertainties=sim_config.uncertainties)
-            x, y = zip(*engine.power_manager.loss_history_percentage)
-            pl.plot_history(x, y)
-
-        if 'pue' in sim_config.plot:
-            if engine.cooling_model:
-                ylabel = 'pue'
-                title = 'FMU ' + ylabel + 'History'
-                pl = Plotter(f"Time ({downscale_str}s)", ylabel, title,
-                             out / f'pue.{sim_config.imtype}',
-                             uncertainties=sim_config.uncertainties)
-                df = pd.DataFrame(engine.cooling_model.fmu_history)
-                df.to_parquet('cooling_model.parquet', engine='pyarrow')
-                pl.plot_history(df['time'], df[ylabel])
-            else:
-                print('Cooling model not enabled... skipping output of plot')
-
-        if 'temp' in sim_config.plot:
-            if engine.cooling_model:
-                ylabel = 'Tr_pri_Out[1]'
-                title = 'FMU ' + ylabel + 'History'
-                pl = Plotter(f"Time ({downscale_str}s)", ylabel, title, out / 'temp.svg')
-                df = pd.DataFrame(engine.cooling_model.fmu_history)
-                df.to_parquet('cooling_model.parquet', engine='pyarrow')
-                pl.plot_compare(df['time'], df[ylabel])
-            else:
-                print('Cooling model not enabled... skipping output of plot')
-
-    if out:
-        if sim_config.uncertainties:
-            # Parquet cannot handle annotated ufloat format AFAIK
-            print('Data dump not implemented using uncertainties!')
-        else:
-            if engine.cooling_model:
-                df = pd.DataFrame(engine.cooling_model.fmu_history)
-                df.to_parquet(out / 'cooling_model.parquet', engine='pyarrow')
-
-            df = pd.DataFrame(engine.power_manager.history)
-            df.to_parquet(out / 'power_history.parquet', engine='pyarrow')
-
-            df = pd.DataFrame(engine.power_manager.loss_history)
-            df.to_parquet(out / 'loss_history.parquet', engine='pyarrow')
-
-            df = pd.DataFrame(engine.sys_util_history)
-            df.to_parquet(out / 'util.parquet', engine='pyarrow')
-
-            # Schedule history
-            job_history = pd.DataFrame(engine.get_job_history_dict())
-            job_history.to_csv(out / "job_history.csv", index=False)
-
-            scheduler_running_history = pd.DataFrame(engine.get_scheduler_running_history())
-            scheduler_running_history.to_csv(out / "running_history.csv", index=False)
-            scheduler_queue_history = pd.DataFrame(engine.get_scheduler_running_history())
-            scheduler_queue_history.to_csv(out / "queue_history.csv", index=False)
-
-            try:
-                with open(out / 'stats.out', 'w') as f:
-                    json.dump(engine_stats, f, indent=4)
-                    json.dump(job_stats, f, indent=4)
-            except TypeError:  # Is this the correct error code?
-                write_dict_to_file(engine_stats, out / 'stats.out')
-                write_dict_to_file(job_stats, out / 'stats.out')
-
-            if sim_config.accounts:
-                try:
-                    with open(out / 'accounts.json', 'w') as f:
-                        json_string = json.dumps(engine.accounts.to_dict())
-                        f.write(json_string)
-                except TypeError:
-                    write_dict_to_file(engine.accounts.to_dict(), out / 'accounts.json')
-        print("Output directory is: ", out)  # If output is enabled, the user wants this information as last output
+CLI_CONFIG = SettingsConfigDict(
+    cli_implicit_flags=True,
+    cli_kebab_case=True,
+)
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description = """
+            ExaDigiT Resource Allocator & Power Simulator (RAPS)
+        """,
+        allow_abbrev = False,
+    )
+    subparsers = parser.add_subparsers(required = True)
+
+    # Shortcut for common sim args
+    sim_shortcuts = {
+        "partitions": "x",
+        "cooling": "c",
+        "simulate-network": "net",
+        "fastforward": "ff",
+        "time": "t",
+        "debug": "d",
+        "numjobs": "n",
+        "verbose": "v",
+        "output": "o",
+        "uncertainties": "u",
+        "plot": "p",
+        "replay": "f",
+        "workload": "w",
+    }
+
+
+    ### raps run ###
+    cmd_run = subparsers.add_parser("run", description="""
+        Run single-partition (homogeneous) systems. Supports synthetic workload generation or
+        telemetry replay, dynamic power modeling (including conversion losses), and optional
+        coupling to a thermo-fluids cooling model. Produces performance, utilization, and
+        energy metrics, with optional plots and output files for analysis and validation.
+    """)
+    cmd_run.add_argument("config_file", nargs="?", default=None, help="""
+        YAML sim config file, can be used to configure an experiment instead of using CLI
+        flags. Pass "-" to read from stdin.
+    """)
+    cmd_run_validate = pydantic_add_args(cmd_run, SimConfig, model_config={
+        **CLI_CONFIG,
+        "cli_shortcuts": sim_shortcuts,
+    })
+    def cmd_run_func(args):
+        sim_config = cmd_run_validate(args, read_sim_yaml(args.config_file))
+        run_sim(sim_config)
+    cmd_run.set_defaults(func = cmd_run_func)
+
+
+    ### raps run-multi-part ###
+    # It might make sense to combine these into a single entrypoint. Though the multi-part run
+    # #doesn't support UI or the same output options.
+    cmd_run_multi_part = subparsers.add_parser("run-multi-part", description="""
+        Simulates multi-partition (heterogeneous) systems. Supports replaying telemetry or
+        generating synthetic workloads across CPU-only, GPU, and mixed partitions. Initializes
+        per-partition power, FLOPS, and scheduling models, then advances simulations in lockstep.
+        Outputs per-partition performance, utilization, and energy statistics for systems such as
+        MIT Supercloud, Setonix, Adastra, and LUMI.
+    """)
+    cmd_run_multi_part.add_argument("config_file", nargs="?", default=None, help="""
+        YAML sim config file, can be used to configure an experiment instead of using CLI
+        flags. Pass "-" to read from stdin.
+    """)
+    cmd_run_multi_part_validate = pydantic_add_args(cmd_run_multi_part, SimConfig, model_config={
+        **CLI_CONFIG,
+        "cli_shortcuts": sim_shortcuts,
+    })
+    def cmd_run_multi_part_func(args):
+        sim_config = cmd_run_multi_part_validate(args, read_sim_yaml(args.config_file))
+        run_multi_part_sim(sim_config)
+    cmd_run_multi_part.set_defaults(func = cmd_run_multi_part_func)
+
+
+    ### raps show ###
+    cmd_show = subparsers.add_parser("show", description="""
+        Outputs the given CLI args as a YAML config file that can be used to re-run the same
+        simulation.
+    """)
+    cmd_show.add_argument("config_file", nargs="?", default=None, help="""
+        Input YAML sim config file. Can be used to slightly modify an existing sim config.
+    """)
+    cmd_show.add_argument("--show-defaults", default=False, help="""
+        If true, include defaults in the output YAML
+    """)
+    cmd_show_validate = pydantic_add_args(cmd_show, SimConfig, model_config={
+        **CLI_CONFIG,
+        "cli_shortcuts": sim_shortcuts,
+    })
+    def cmd_show_func(args):
+        sim_config = cmd_show_validate(args, read_sim_yaml(args.config_file))
+        sim_config = sim_config.model_dump(mode = "json",
+                                           exclude_defaults = not args.show_defaults)
+        print(yaml_dump(sim_config), end="")
+    cmd_show.set_defaults(func = cmd_show_func)
+
+
+    ### raps workload ###
+    # TODO: Separate the arguments for this command
+    cmd_workload = subparsers.add_parser("workload", description="""
+        Outputs the given CLI args as a YAML config file that can be used to re-run the same
+        simulation.
+    """)
+    cmd_workload.add_argument(
+        "config_file", nargs="?", default=None,
+        help="Input YAML sim config file. Can be used to slightly modify an existing sim config",
+    )
+    cmd_workload_validate = pydantic_add_args(cmd_workload, SimConfig, model_config={
+        **CLI_CONFIG,
+        "cli_shortcuts": sim_shortcuts,
+    })
+    def cmd_workload_func(args):
+        sim_config = cmd_workload_validate(args, read_sim_yaml(args.config_file))
+        run_workload(sim_config)
+    cmd_show.set_defaults(func = cmd_workload_func)
+
+    # TODO: move telemetry and other misc scripts into here
+
+    args = parser.parse_args()
+    args.func(args)
 
 
 if __name__ == "__main__":
diff --git a/multi-part-sim.py b/multi-part-sim.py
deleted file mode 100644
index 8943484..0000000
--- a/multi-part-sim.py
+++ /dev/null
@@ -1,83 +0,0 @@
-"""
-Main driver for simulating multi-partition (heterogeneous) systems in the RAPS
-module of ExaDigiT.  Supports replaying telemetry or generating synthetic
-workloads across CPU-only, GPU, and mixed partitions.  Initializes per-
-partition power, FLOPS, and scheduling models, then advances simulations in
-lockstep.  Outputs per-partition performance, utilization, and energy
-statistics for systems such as MIT Supercloud, Setonix, Adastra, and LUMI.
-"""
-
-from raps.stats import get_engine_stats, get_job_stats, get_scheduler_stats, get_network_stats
-from raps.multi_part_engine import MultiPartEngine
-from raps.sim_config import sim_config
-from raps.helpers import check_python_version
-check_python_version()
-
-
-def print_report(name: str, report: dict):
-    print(f"--- {name} ---")
-    for key, value in report.items():
-        print(f"{str(key).replace('_', ' ').title()}: {value}")
-    print("-------------------------\n")
-
-
-def main():
-    multi_engine, jobs, timestep_start, timestep_end, time_delta = MultiPartEngine.from_sim_config(sim_config)
-
-    timestep_end = timestep_end - timestep_start
-    timestep_start = 0
-
-    if sim_config.output:
-        for part, engine in multi_engine.engines.items():
-            engine.telemetry.save_snapshot(
-                jobs=jobs[part],
-                timestep_start=timestep_start, timestep_end=timestep_end,
-                filename=part.split('/')[-1],
-                args=sim_config.get_legacy_args(),
-            )
-
-    ui_update_freq = sim_config.system_configs[0].scheduler.ui_update_freq
-    gen = multi_engine.run_simulation(jobs, timestep_start, timestep_end, time_delta)
-
-    for tick_datas in gen:
-        sys_power = 0
-        tick_datas = {k: v for k, v in tick_datas.items() if v}  # Filter nones
-        timestep = list(tick_datas.values())[0].current_timestep if tick_datas else None
-
-        if timestep and timestep % ui_update_freq == 0:
-            for part, tick_data in tick_datas.items():
-                engine = multi_engine.engines[part]
-
-                sys_util = engine.sys_util_history[-1] if engine.sys_util_history else (0, 0.0)
-                if hasattr(engine.resource_manager, 'allocated_cpu_cores'):
-                    allocated_cores = engine.resource_manager.allocated_cpu_cores
-                    print(
-                        f"[DEBUG] {part} - Timestep {timestep} - Jobs running: {len(engine.running)} -",
-                        f"Utilization: {sys_util[1]:.2f}% - Allocated Cores: {allocated_cores} - ",
-                        f"Power: {engine.sys_power:.1f}kW",
-                        flush=True,
-                    )
-                sys_power += engine.sys_power
-            print(f"system power: {sys_power:.1f}kW", flush=True)
-
-    print("Simulation complete.", flush=True)
-
-    # Print statistics for each partition
-    for part, engine in multi_engine.engines.items():
-        print(f"\n=== Partition: {part} ===")
-
-        engine_stats = get_engine_stats(engine)
-        job_stats = get_job_stats(engine)
-        scheduler_stats = get_scheduler_stats(engine)
-        network_stats = get_network_stats(engine) if sim_config.simulate_network else {}
-
-        # Print a formatted report
-        print_report("Simulation Report", engine_stats)
-        print_report("Job Stat Report", job_stats)
-        print_report("Scheduler Report", scheduler_stats)
-        if network_stats:
-            print("Network Report", network_stats)
-
-
-if __name__ == "__main__":
-    main()
diff --git a/raps/run_sim.py b/raps/run_sim.py
new file mode 100644
index 0000000..f59ad5d
--- /dev/null
+++ b/raps/run_sim.py
@@ -0,0 +1,239 @@
+"""
+Module containing the primary commands for use in the CLI. The simulation logic itself is kept in
+Engine and MultiPartEngine so that it can be used programmatically such as in the simulation server.
+These functions just handle rendering the terminal UI and outputting results to files etc.
+"""
+import json
+import pandas as pd
+import sys
+from raps.ui import LayoutManager
+from raps.plotting import Plotter
+from raps.engine import Engine
+from raps.multi_part_engine import MultiPartEngine
+from raps.utils import write_dict_to_file
+from raps.stats import (
+    get_engine_stats,
+    get_job_stats,
+    get_scheduler_stats,
+    get_network_stats,
+    print_formatted_report
+)
+
+from raps.sim_config import SimConfig
+
+
+def print_report(name: str, report: dict):
+    print(f"--- {name} ---")
+    for key, value in report.items():
+        print(f"{str(key).replace('_', ' ').title()}: {value}")
+    print("-------------------------\n")
+
+
+def run_sim(sim_config: SimConfig):
+    if sim_config.verbose or sim_config.debug:
+        print(f"SimConfig: {sim_config.model_dump_json(indent=4)}")
+    if len(sim_config.system_configs) > 1:
+        print("Use run-multi-part to run multi-partition simulations")
+        sys.exit(1)
+
+    engine, jobs, timestep_start, timestep_end, time_delta = Engine.from_sim_config(sim_config)
+
+    out = sim_config.output
+    if out:
+        out.mkdir(parents=True)
+        engine.telemetry.save_snapshot(
+            jobs=jobs,
+            timestep_start=timestep_start,
+            timestep_end=timestep_end,
+            args=sim_config.get_legacy_args(), filename=str(out),
+        )
+
+    total_timesteps = timestep_end - timestep_start
+
+    downscale = sim_config.downscale
+    downscale_str = ""if downscale == 1 else f"/{downscale}"
+    print(f"Simulating {len(jobs)} jobs for {total_timesteps}{downscale_str}"
+          f" seconds from {timestep_start} to {timestep_end}.")
+    print(f"Simulation time delta: {time_delta}{downscale_str} s,"
+          f"Telemetry trace quanta: {jobs[0].trace_quanta}{downscale_str} s.")
+    layout_manager = LayoutManager(
+        sim_config.layout, engine=engine,
+        debug=sim_config.debug, total_timesteps=total_timesteps,
+        args_dict=sim_config.get_legacy_args_dict(), **sim_config.system_configs[0].get_legacy(),
+    )
+    layout_manager.run(
+        jobs,
+        timestep_start=timestep_start, timestep_end=timestep_end, time_delta=time_delta,
+    )
+
+    engine_stats = get_engine_stats(engine)
+    job_stats = get_job_stats(engine)
+    scheduler_stats = get_scheduler_stats(engine)
+    if engine.simulate_network:
+        network_stats = get_network_stats(engine)
+    else:
+        network_stats = None
+
+    print_formatted_report(
+        engine_stats=engine_stats,
+        job_stats=job_stats,
+        scheduler_stats=scheduler_stats,
+        network_stats=network_stats,
+    )
+
+    if downscale_str:
+        downscale_str = "1" + downscale_str
+
+    if sim_config.plot:
+        assert out  # SimConfig validation should check this
+        if 'power' in sim_config.plot:
+            pl = Plotter(f"Time ({downscale_str}s)", 'Power (kW)', 'Power History',
+                         out / f'power.{sim_config.imtype}',
+                         uncertainties=sim_config.uncertainties)
+            x, y = zip(*engine.power_manager.history)
+            pl.plot_history(x, y)
+
+        if 'util' in sim_config.plot:
+            pl = Plotter(f"Time ({downscale_str}s)", 'System Utilization (%)',
+                         'System Utilization History', out / f'util.{sim_config.imtype}')
+            x, y = zip(*engine.sys_util_history)
+            pl.plot_history(x, y)
+
+        if 'loss' in sim_config.plot:
+            pl = Plotter(f"Time ({downscale_str}s)", 'Power Losses (kW)', 'Power Loss History',
+                         out / f'loss.{sim_config.imtype}',
+                         uncertainties=sim_config.uncertainties)
+            x, y = zip(*engine.power_manager.loss_history)
+            pl.plot_history(x, y)
+
+            pl = Plotter(f"Time ({downscale_str}s)", 'Power Losses (%)', 'Power Loss History',
+                         out / f'loss_pct.{sim_config.imtype}',
+                         uncertainties=sim_config.uncertainties)
+            x, y = zip(*engine.power_manager.loss_history_percentage)
+            pl.plot_history(x, y)
+
+        if 'pue' in sim_config.plot:
+            if engine.cooling_model:
+                ylabel = 'pue'
+                title = 'FMU ' + ylabel + 'History'
+                pl = Plotter(f"Time ({downscale_str}s)", ylabel, title,
+                             out / f'pue.{sim_config.imtype}',
+                             uncertainties=sim_config.uncertainties)
+                df = pd.DataFrame(engine.cooling_model.fmu_history)
+                df.to_parquet('cooling_model.parquet', engine='pyarrow')
+                pl.plot_history(df['time'], df[ylabel])
+            else:
+                print('Cooling model not enabled... skipping output of plot')
+
+        if 'temp' in sim_config.plot:
+            if engine.cooling_model:
+                ylabel = 'Tr_pri_Out[1]'
+                title = 'FMU ' + ylabel + 'History'
+                pl = Plotter(f"Time ({downscale_str}s)", ylabel, title, out / 'temp.svg')
+                df = pd.DataFrame(engine.cooling_model.fmu_history)
+                df.to_parquet('cooling_model.parquet', engine='pyarrow')
+                pl.plot_compare(df['time'], df[ylabel])
+            else:
+                print('Cooling model not enabled... skipping output of plot')
+
+    if out:
+        if sim_config.uncertainties:
+            # Parquet cannot handle annotated ufloat format AFAIK
+            print('Data dump not implemented using uncertainties!')
+        else:
+            if engine.cooling_model:
+                df = pd.DataFrame(engine.cooling_model.fmu_history)
+                df.to_parquet(out / 'cooling_model.parquet', engine='pyarrow')
+
+            df = pd.DataFrame(engine.power_manager.history)
+            df.to_parquet(out / 'power_history.parquet', engine='pyarrow')
+
+            df = pd.DataFrame(engine.power_manager.loss_history)
+            df.to_parquet(out / 'loss_history.parquet', engine='pyarrow')
+
+            df = pd.DataFrame(engine.sys_util_history)
+            df.to_parquet(out / 'util.parquet', engine='pyarrow')
+
+            # Schedule history
+            job_history = pd.DataFrame(engine.get_job_history_dict())
+            job_history.to_csv(out / "job_history.csv", index=False)
+
+            scheduler_running_history = pd.DataFrame(engine.get_scheduler_running_history())
+            scheduler_running_history.to_csv(out / "running_history.csv", index=False)
+            scheduler_queue_history = pd.DataFrame(engine.get_scheduler_running_history())
+            scheduler_queue_history.to_csv(out / "queue_history.csv", index=False)
+
+            try:
+                with open(out / 'stats.out', 'w') as f:
+                    json.dump(engine_stats, f, indent=4)
+                    json.dump(job_stats, f, indent=4)
+            except TypeError:  # Is this the correct error code?
+                write_dict_to_file(engine_stats, out / 'stats.out')
+                write_dict_to_file(job_stats, out / 'stats.out')
+
+            if sim_config.accounts:
+                try:
+                    with open(out / 'accounts.json', 'w') as f:
+                        json_string = json.dumps(engine.accounts.to_dict())
+                        f.write(json_string)
+                except TypeError:
+                    write_dict_to_file(engine.accounts.to_dict(), out / 'accounts.json')
+        print("Output directory is: ", out)  # If output is enabled, the user wants this information as last output
+
+
+def run_multi_part_sim(sim_config: SimConfig):
+    multi_engine, jobs, timestep_start, timestep_end, time_delta = MultiPartEngine.from_sim_config(sim_config)
+
+    timestep_end = timestep_end - timestep_start
+    timestep_start = 0
+
+    if sim_config.output:
+        for part, engine in multi_engine.engines.items():
+            engine.telemetry.save_snapshot(
+                jobs=jobs[part],
+                timestep_start=timestep_start, timestep_end=timestep_end,
+                filename=part.split('/')[-1],
+                args=sim_config.get_legacy_args(),
+            )
+
+    ui_update_freq = sim_config.system_configs[0].scheduler.ui_update_freq
+    gen = multi_engine.run_simulation(jobs, timestep_start, timestep_end, time_delta)
+
+    for tick_datas in gen:
+        sys_power = 0
+        tick_datas = {k: v for k, v in tick_datas.items() if v}  # Filter nones
+        timestep = list(tick_datas.values())[0].current_timestep if tick_datas else None
+
+        if timestep and timestep % ui_update_freq == 0:
+            for part, tick_data in tick_datas.items():
+                engine = multi_engine.engines[part]
+
+                sys_util = engine.sys_util_history[-1] if engine.sys_util_history else (0, 0.0)
+                if hasattr(engine.resource_manager, 'allocated_cpu_cores'):
+                    allocated_cores = engine.resource_manager.allocated_cpu_cores
+                    print(
+                        f"[DEBUG] {part} - Timestep {timestep} - Jobs running: {len(engine.running)} -",
+                        f"Utilization: {sys_util[1]:.2f}% - Allocated Cores: {allocated_cores} - ",
+                        f"Power: {engine.sys_power:.1f}kW",
+                        flush=True,
+                    )
+                sys_power += engine.sys_power
+            print(f"system power: {sys_power:.1f}kW", flush=True)
+
+    print("Simulation complete.", flush=True)
+
+    # Print statistics for each partition
+    for part, engine in multi_engine.engines.items():
+        print(f"\n=== Partition: {part} ===")
+
+        engine_stats = get_engine_stats(engine)
+        job_stats = get_job_stats(engine)
+        scheduler_stats = get_scheduler_stats(engine)
+        network_stats = get_network_stats(engine) if sim_config.simulate_network else {}
+
+        # Print a formatted report
+        print_report("Simulation Report", engine_stats)
+        print_report("Job Stat Report", job_stats)
+        print_report("Scheduler Report", scheduler_stats)
+        if network_stats:
+            print("Network Report", network_stats)
diff --git a/raps/sim_config.py b/raps/sim_config.py
index 9237b92..600833f 100644
--- a/raps/sim_config.py
+++ b/raps/sim_config.py
@@ -1,19 +1,13 @@
 import argparse
-import sys
-import yaml
 from functools import cached_property
 from datetime import timedelta
-from pathlib import Path
 from typing import Literal
 from raps.schedulers.default import PolicyType, BackfillType
-
 from raps.utils import (
-    parse_time_unit, convert_to_time_unit, infer_time_unit, ExpandedPath,
-    pydantic_add_args, yaml_dump, parse_td,
+    parse_time_unit, convert_to_time_unit, infer_time_unit, ExpandedPath, parse_td,
 )
 from raps.system_config import SystemConfig, get_partition_configs
-from pydantic import BaseModel, model_validator, computed_field
-from pydantic_settings import SettingsConfigDict
+from pydantic import BaseModel, model_validator
 
 Distribution = Literal['uniform', 'weibull', 'normal']
 
@@ -45,12 +39,11 @@ class SimConfig(BaseModel):
     Step size (unit specified by `time_unit`, default seconds).
     Can pass a string like 15s, 1m, 1h, 1ms
     """
-    time_unit: timedelta
+    time_unit: timedelta = timedelta(seconds = 1)
     """
     Units all time delta ints are measured in (default seconds)
     """
 
-    @computed_field
     @cached_property
     def downscale(self) -> int:
         return int(timedelta(seconds=1) / self.time_unit)
@@ -299,6 +292,7 @@ class SimConfig(BaseModel):
         args_dict = self.model_dump(mode="json")
         # validate has been renamed to power_scope
         args_dict['validate'] = args_dict["power_scope"] == "node"
+        args_dict['downscale'] = self.downscale
 
         # Convert Path objects to str
         if args_dict['output']:
@@ -310,54 +304,3 @@ class SimConfig(BaseModel):
 
         args_dict['sim_config'] = self
         return args_dict
-
-
-def parse_args(cli_args=None) -> SimConfig:
-    parser = argparse.ArgumentParser(
-        description="Resource Allocator & Power Simulator (RAPS)",
-        allow_abbrev=False,
-        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
-    )
-    parser.add_argument(
-        "config_file", nargs="?", default=None,
-        help=(
-            'YAML sim config file, can be used to configure an experiment instead of using CLI ' +
-            'flags. Pass "-" to read from stdin.'
-        )
-    )
-
-    model_validate_args = pydantic_add_args(parser, SimConfig, model_config=SettingsConfigDict(
-        cli_implicit_flags=True,
-        cli_kebab_case=True,
-        cli_shortcuts={
-            "partitions": "x",
-            "cooling": "c",
-            "simulate-network": "net",
-            "fastforward": "ff",
-            "time": "t",
-            "debug": "d",
-            "numjobs": "n",
-            "verbose": "v",
-            "output": "o",
-            "uncertainties": "u",
-            "plot": "p",
-            "replay": "f",
-            "workload": "w",
-        },
-    ))
-
-    args = parser.parse_args(cli_args)
-    if args.config_file == "-":
-        config_file_data = yaml.safe_load(sys.stdin.read())
-    elif args.config_file:
-        config_file_data = yaml.safe_load(Path(args.config_file).read_text())
-    else:
-        config_file_data = {}
-
-    return model_validate_args(args, config_file_data)
-
-
-sim_config = parse_args()
-
-if __name__ == "__main__":
-    print(yaml_dump(sim_config.model_dump(mode="json")))
diff --git a/raps/workload.py b/raps/workload.py
index be0b639..11600ad 100644
--- a/raps/workload.py
+++ b/raps/workload.py
@@ -39,6 +39,7 @@ import matplotlib.pyplot as plt
 from raps.telemetry import Telemetry
 from raps.job import job_dict, Job
 from raps.utils import create_file_indexed
+from raps.sim_config import SimConfig
 
 
 JOB_NAMES = ["LAMMPS", "GROMACS", "VASP", "Quantum ESPRESSO", "NAMD",
@@ -799,8 +800,7 @@ def plot_job_hist(jobs, config=None, dist_split=None, gantt_nodes=False):
     plt.show()
 
 
-def run_workload():
-    from raps.sim_config import sim_config
+def run_workload(sim_config: SimConfig):
     args = sim_config.get_legacy_args()
     args_dict = sim_config.get_legacy_args()
     config = sim_config.system_configs[0].get_legacy()
@@ -977,6 +977,3 @@ def continuous_job_generation(*, engine, timestep, jobs):
         new_jobs = engine.continuous_workload.generate_jobs()
         jobs.extend(new_jobs)
 
-
-if __name__ == "__main__":
-    run_workload()
-- 
GitLab


From 271984267a6bfbf2c107149675d32ff22e5831eb Mon Sep 17 00:00:00 2001
From: Jesse Hines <hinesjr@ornl.gov>
Date: Wed, 27 Aug 2025 20:50:00 -0400
Subject: [PATCH 16/27] Update tests

---
 tests/smoke.py                                       | 12 ++++++------
 tests/systems/test_main_basic_run.py                 |  2 +-
 tests/systems/test_main_cooling_run.py               |  2 +-
 tests/systems/test_main_cooling_uncertainty_run.py   |  2 +-
 tests/systems/test_main_fastforward_run.py           |  2 +-
 tests/systems/test_main_help.py                      |  2 +-
 tests/systems/test_main_network_run.py               |  2 +-
 tests/systems/test_main_network_withdata_run.py      |  2 +-
 tests/systems/test_main_noui_run.py                  |  2 +-
 tests/systems/test_main_time_delta_run.py            |  2 +-
 tests/systems/test_main_time_delta_sub_second_run.py |  2 +-
 tests/systems/test_main_time_ff_delta_run.py         |  2 +-
 tests/systems/test_main_time_run.py                  |  2 +-
 tests/systems/test_main_uncertainty_run.py           |  2 +-
 tests/systems/test_main_withdata_run.py              |  2 +-
 tests/systems/test_multi_part_sim_basic_run.py       |  2 +-
 tests/systems/test_multi_part_sim_network_run.py     |  2 +-
 tests/systems/test_multi_part_sim_withdata_run.py    |  2 +-
 tests/systems/test_workload_synthetic.py             |  2 +-
 tests/test_main.py                                   |  6 +++---
 20 files changed, 27 insertions(+), 27 deletions(-)

diff --git a/tests/smoke.py b/tests/smoke.py
index 0f9f4ca..7548de3 100644
--- a/tests/smoke.py
+++ b/tests/smoke.py
@@ -32,7 +32,7 @@ def run_command(command):
 def build_command(system, file_paths, additional_args=""):
     """Build the command string for the given system and file paths."""
     full_paths = " ".join([os.path.join(DATAPATH, path) for path in file_paths.split()])
-    return f"python main.py --system {system} -f {full_paths} -t {DEFAULT_TIME} {additional_args}".strip()
+    return f"python main.py run --system {system} -f {full_paths} -t {DEFAULT_TIME} {additional_args}".strip()
 
 
 def execute_system_tests(systems):
@@ -45,16 +45,16 @@ def execute_system_tests(systems):
 def synthetic_workload_tests():
     """Run synthetic workload tests."""
     print("Starting synthetic workload tests...")
-    run_command(f"python main.py -t {DEFAULT_TIME}")
-    run_command(f"python main.py -w benchmark -t {BENCH_TIME}")
-    run_command(f"python main.py -w peak -t {DEFAULT_TIME}")
-    run_command(f"python main.py -w idle -t {DEFAULT_TIME}")
+    run_command(f"python main.py run -t {DEFAULT_TIME}")
+    run_command(f"python main.py run -w benchmark -t {BENCH_TIME}")
+    run_command(f"python main.py run -w peak -t {DEFAULT_TIME}")
+    run_command(f"python main.py run -w idle -t {DEFAULT_TIME}")
 
 
 def hetero_tests():
     """Run heterogeneous workload tests."""
     print("Starting heterogeneous workload tests...")
-    run_command(f"python multi-part-sim.py -x setonix/part-cpu setonix/part-gpu -t {DEFAULT_TIME}")
+    run_command(f"python main.py run-multi-part -x setonix/part-cpu setonix/part-gpu -t {DEFAULT_TIME}")
 
 
 def main():
diff --git a/tests/systems/test_main_basic_run.py b/tests/systems/test_main_basic_run.py
index 8e31952..604d3a4 100644
--- a/tests/systems/test_main_basic_run.py
+++ b/tests/systems/test_main_basic_run.py
@@ -17,7 +17,7 @@ def test_main_basic_run(system, system_config,random_id):
 
     os.chdir(PROJECT_ROOT)
     result = subprocess.run([
-        "python", "main.py",
+        "python", "main.py", "run",
         "--time", "1m",
         "--system", system,
         "-o", random_id
diff --git a/tests/systems/test_main_cooling_run.py b/tests/systems/test_main_cooling_run.py
index 1411d8c..62d8621 100644
--- a/tests/systems/test_main_cooling_run.py
+++ b/tests/systems/test_main_cooling_run.py
@@ -18,7 +18,7 @@ def test_main_cooling_run(system, system_config, random_id):
 
     os.chdir(PROJECT_ROOT)
     result = subprocess.run([
-        "python", "main.py",
+        "python", "main.py", "run",
         "--time", "1h",
         "--system", system,
         "-c",
diff --git a/tests/systems/test_main_cooling_uncertainty_run.py b/tests/systems/test_main_cooling_uncertainty_run.py
index 2491d7a..742fe87 100644
--- a/tests/systems/test_main_cooling_uncertainty_run.py
+++ b/tests/systems/test_main_cooling_uncertainty_run.py
@@ -19,7 +19,7 @@ def test_main_cooling_uncertainty_run(request, system, system_config, random_id)
 
     os.chdir(PROJECT_ROOT)
     result = subprocess.run([
-        "python", "main.py",
+        "python", "main.py", "run",
         "--time", "3m",
         "--system", system,
         "-c",
diff --git a/tests/systems/test_main_fastforward_run.py b/tests/systems/test_main_fastforward_run.py
index 4b0584b..1215195 100644
--- a/tests/systems/test_main_fastforward_run.py
+++ b/tests/systems/test_main_fastforward_run.py
@@ -24,7 +24,7 @@ def test_main_fastforward_run(system, system_config, ff_arg, random_id):
 
     os.chdir(PROJECT_ROOT)
     result = subprocess.run([
-        "python", "main.py",
+        "python", "main.py", "run",
         "-t 1",
         "--fastforward", ff_arg,
         "--system", system,
diff --git a/tests/systems/test_main_help.py b/tests/systems/test_main_help.py
index f84c63d..3d87144 100644
--- a/tests/systems/test_main_help.py
+++ b/tests/systems/test_main_help.py
@@ -17,7 +17,7 @@ def test_main_help(system, system_config,random_id):
 
     os.chdir(PROJECT_ROOT)
     result = subprocess.run([
-        "python", "main.py",
+        "python", "main.py", "run",
         "-h"
     ], capture_output=True, text=True, stdin=subprocess.DEVNULL)
 
diff --git a/tests/systems/test_main_network_run.py b/tests/systems/test_main_network_run.py
index 8b80d5d..8c7db1e 100644
--- a/tests/systems/test_main_network_run.py
+++ b/tests/systems/test_main_network_run.py
@@ -21,7 +21,7 @@ def test_main_network_run(system, system_config, random_id):
 
     os.chdir(PROJECT_ROOT)
     result = subprocess.run([
-        "python", "main.py",
+        "python", "main.py", "run",
         "--time", "1m",
         "--system", system,
         "--net",
diff --git a/tests/systems/test_main_network_withdata_run.py b/tests/systems/test_main_network_withdata_run.py
index 31db05e..c8d74bc 100644
--- a/tests/systems/test_main_network_withdata_run.py
+++ b/tests/systems/test_main_network_withdata_run.py
@@ -27,7 +27,7 @@ def test_main_network_withdata_run(system, system_config, system_file, random_id
 
     os.chdir(PROJECT_ROOT)
     result = subprocess.run([
-        "python", "main.py",
+        "python", "main.py", "run",
         "--time", "1m",
         "--system", system,
         "-f", *file_list,
diff --git a/tests/systems/test_main_noui_run.py b/tests/systems/test_main_noui_run.py
index 5b12b55..af8bea8 100644
--- a/tests/systems/test_main_noui_run.py
+++ b/tests/systems/test_main_noui_run.py
@@ -17,7 +17,7 @@ def test_main_noui_run(system, system_config, random_id):
 
     os.chdir(PROJECT_ROOT)
     result = subprocess.run([
-        "python", "main.py",
+        "python", "main.py", "run",
         "--time", "1m",
         "--system", system,
         "--noui",
diff --git a/tests/systems/test_main_time_delta_run.py b/tests/systems/test_main_time_delta_run.py
index 9cb87a2..4f5f2b0 100644
--- a/tests/systems/test_main_time_delta_run.py
+++ b/tests/systems/test_main_time_delta_run.py
@@ -28,7 +28,7 @@ def test_main_time_delta_run(system, system_config, time_arg, tdelta_arg, random
 
     os.chdir(PROJECT_ROOT)
     result = subprocess.run([
-        "python", "main.py",
+        "python", "main.py", "run",
         "-t", time_arg,
         "--time-delta", tdelta_arg,
         "--system", system,
diff --git a/tests/systems/test_main_time_delta_sub_second_run.py b/tests/systems/test_main_time_delta_sub_second_run.py
index 9276011..4cc4426 100644
--- a/tests/systems/test_main_time_delta_sub_second_run.py
+++ b/tests/systems/test_main_time_delta_sub_second_run.py
@@ -29,7 +29,7 @@ def test_main_time_delta_sub_second_run(system, system_config, time_arg, tdelta_
 
     os.chdir(PROJECT_ROOT)
     result = subprocess.run([
-        "python", "main.py",
+        "python", "main.py", "run",
         "-t", time_arg,
         "--time-delta", tdelta_arg,
         "--system", system,
diff --git a/tests/systems/test_main_time_ff_delta_run.py b/tests/systems/test_main_time_ff_delta_run.py
index a136615..f66d34d 100644
--- a/tests/systems/test_main_time_ff_delta_run.py
+++ b/tests/systems/test_main_time_ff_delta_run.py
@@ -28,7 +28,7 @@ def test_main_time_ff_delta_run(system, system_config, time_arg, tdelta_arg,
 
     os.chdir(PROJECT_ROOT)
     result = subprocess.run([
-        "python", "main.py",
+        "python", "main.py", "run",
         "-t", time_arg,
         "--ff", ff_arg,
         "--time-delta", tdelta_arg,
diff --git a/tests/systems/test_main_time_run.py b/tests/systems/test_main_time_run.py
index e87e331..3cc140e 100644
--- a/tests/systems/test_main_time_run.py
+++ b/tests/systems/test_main_time_run.py
@@ -27,7 +27,7 @@ def test_main_time_run(system, system_config, time_args, random_id):
 
     os.chdir(PROJECT_ROOT)
     result = subprocess.run([
-        "python", "main.py",
+        "python", "main.py", "run",
         "--time", time_args,
         "--system", system,
         #--"-f", system_file,
diff --git a/tests/systems/test_main_uncertainty_run.py b/tests/systems/test_main_uncertainty_run.py
index 815a661..effdcc6 100644
--- a/tests/systems/test_main_uncertainty_run.py
+++ b/tests/systems/test_main_uncertainty_run.py
@@ -19,7 +19,7 @@ def test_main_uncertainty_run(system, system_config, random_id):
 
     os.chdir(PROJECT_ROOT)
     result = subprocess.run([
-        "python", "main.py",
+        "python", "main.py", "run",
         "--time", "3m",
         "--system", system,
         "-u",
diff --git a/tests/systems/test_main_withdata_run.py b/tests/systems/test_main_withdata_run.py
index 299d34c..0fec850 100644
--- a/tests/systems/test_main_withdata_run.py
+++ b/tests/systems/test_main_withdata_run.py
@@ -25,7 +25,7 @@ def test_main_withdata_run(system, system_config, system_file, random_id):
         assert os.path.isfile(file) or os.path.isdir(file), f"File `{file}' does not exist. does ./data exist or is RAPS_DATA_DIR set?"
     os.chdir(PROJECT_ROOT)
     result = subprocess.run([
-        "python", "main.py",
+        "python", "main.py", "run",
         "--time", "1m",
         "--system", system,
         "-f", ','.join(str(p) for p in file_list),
diff --git a/tests/systems/test_multi_part_sim_basic_run.py b/tests/systems/test_multi_part_sim_basic_run.py
index e8e64e9..0e274a0 100644
--- a/tests/systems/test_multi_part_sim_basic_run.py
+++ b/tests/systems/test_multi_part_sim_basic_run.py
@@ -18,7 +18,7 @@ def test_multi_part_sim_basic_run(system, system_config):
 
     os.chdir(PROJECT_ROOT)
     result = subprocess.run([
-        "python", "multi-part-sim.py",
+        "python", "main.py", "run-multi-part",
         "--time", "1h",
         "-x", f"{system}/*",
         #"--noui"
diff --git a/tests/systems/test_multi_part_sim_network_run.py b/tests/systems/test_multi_part_sim_network_run.py
index 3f53e99..fda12c9 100644
--- a/tests/systems/test_multi_part_sim_network_run.py
+++ b/tests/systems/test_multi_part_sim_network_run.py
@@ -21,7 +21,7 @@ def test_multi_part_sim_network_run(system, system_config, random_id):
 
     os.chdir(PROJECT_ROOT)
     result = subprocess.run([
-        "python", "multi-part-sim.py",
+        "python", "main.py", "run-multi-part",
         "--time", "1h",
         "-x", f"{system}/*",
         "--net",
diff --git a/tests/systems/test_multi_part_sim_withdata_run.py b/tests/systems/test_multi_part_sim_withdata_run.py
index f862aca..b811155 100644
--- a/tests/systems/test_multi_part_sim_withdata_run.py
+++ b/tests/systems/test_multi_part_sim_withdata_run.py
@@ -26,7 +26,7 @@ def test_multi_part_sim_withdata_run(system, system_config, system_file):
 
     os.chdir(PROJECT_ROOT)
     result = subprocess.run([
-        "python", "multi-part-sim.py",
+        "python", "main.py", "run-multi-part",
         "--time", "1h",
         "-x", f"{system}/*",
         "-f", *file_list,
diff --git a/tests/systems/test_workload_synthetic.py b/tests/systems/test_workload_synthetic.py
index dd5f8cf..959fc6d 100644
--- a/tests/systems/test_workload_synthetic.py
+++ b/tests/systems/test_workload_synthetic.py
@@ -75,7 +75,7 @@ def test_workload_synthetic_run(
     # Build the command line.  Each distribution tuple expands into:
     #   dist_name, <flag1>, <value1>, ...
     cmd = [
-        "python", "raps/workload.py",
+        "python", "main.py", "workload",
         "--system", system,
         "-w", "synthetic",
         "--jobsize-distribution", *flatten(jobdist),
diff --git a/tests/test_main.py b/tests/test_main.py
index 76f48a3..04bbc42 100644
--- a/tests/test_main.py
+++ b/tests/test_main.py
@@ -13,7 +13,7 @@ PROJECT_ROOT = Path(__file__).resolve().parent.parent  # adjust if needed
 def test_main_withui():
     os.chdir(PROJECT_ROOT)
     result = subprocess.run([
-                            "python", "main.py",
+                            "python", "main.py", "run",
                             "--time", "1h",
                             ], capture_output=True,
                                text=True
@@ -25,7 +25,7 @@ def test_main_withui():
 def test_main_noui():
     os.chdir(PROJECT_ROOT)
     result = subprocess.run([
-                            "python", "main.py",
+                            "python", "main.py", "run",
                             "--time", "1h",
                             "--noui"
                             ], capture_output=True,
@@ -39,7 +39,7 @@ def test_main_noui():
 def test_main_long():
     os.chdir(PROJECT_ROOT)
     result = subprocess.run([
-                            "python", "main.py",
+                            "python", "main.py", "run",
                             ], capture_output=True,
                                text=True
                             )
-- 
GitLab


From b93ded758b3f157d15b9303dffb90a20c82ed1c8 Mon Sep 17 00:00:00 2001
From: Jesse Hines <hinesjr@ornl.gov>
Date: Wed, 27 Aug 2025 20:58:58 -0400
Subject: [PATCH 17/27] Formatting

---
 main.py            | 34 +++++++++++++++++-----------------
 raps/sim_config.py |  2 +-
 raps/workload.py   |  1 -
 3 files changed, 18 insertions(+), 19 deletions(-)

diff --git a/main.py b/main.py
index 00a5cb5..c8d4c69 100644
--- a/main.py
+++ b/main.py
@@ -32,12 +32,12 @@ CLI_CONFIG = SettingsConfigDict(
 
 def main():
     parser = argparse.ArgumentParser(
-        description = """
+        description="""
             ExaDigiT Resource Allocator & Power Simulator (RAPS)
         """,
-        allow_abbrev = False,
+        allow_abbrev=False,
     )
-    subparsers = parser.add_subparsers(required = True)
+    subparsers = parser.add_subparsers(required=True)
 
     # Shortcut for common sim args
     sim_shortcuts = {
@@ -56,8 +56,7 @@ def main():
         "workload": "w",
     }
 
-
-    ### raps run ###
+    # ==== raps run ====
     cmd_run = subparsers.add_parser("run", description="""
         Run single-partition (homogeneous) systems. Supports synthetic workload generation or
         telemetry replay, dynamic power modeling (including conversion losses), and optional
@@ -72,13 +71,13 @@ def main():
         **CLI_CONFIG,
         "cli_shortcuts": sim_shortcuts,
     })
+
     def cmd_run_func(args):
         sim_config = cmd_run_validate(args, read_sim_yaml(args.config_file))
         run_sim(sim_config)
-    cmd_run.set_defaults(func = cmd_run_func)
+    cmd_run.set_defaults(func=cmd_run_func)
 
-
-    ### raps run-multi-part ###
+    # ==== raps run-multi-part ====
     # It might make sense to combine these into a single entrypoint. Though the multi-part run
     # #doesn't support UI or the same output options.
     cmd_run_multi_part = subparsers.add_parser("run-multi-part", description="""
@@ -96,13 +95,13 @@ def main():
         **CLI_CONFIG,
         "cli_shortcuts": sim_shortcuts,
     })
+
     def cmd_run_multi_part_func(args):
         sim_config = cmd_run_multi_part_validate(args, read_sim_yaml(args.config_file))
         run_multi_part_sim(sim_config)
-    cmd_run_multi_part.set_defaults(func = cmd_run_multi_part_func)
+    cmd_run_multi_part.set_defaults(func=cmd_run_multi_part_func)
 
-
-    ### raps show ###
+    # ==== raps show ====
     cmd_show = subparsers.add_parser("show", description="""
         Outputs the given CLI args as a YAML config file that can be used to re-run the same
         simulation.
@@ -117,15 +116,15 @@ def main():
         **CLI_CONFIG,
         "cli_shortcuts": sim_shortcuts,
     })
+
     def cmd_show_func(args):
         sim_config = cmd_show_validate(args, read_sim_yaml(args.config_file))
-        sim_config = sim_config.model_dump(mode = "json",
-                                           exclude_defaults = not args.show_defaults)
+        sim_config = sim_config.model_dump(mode="json",
+                                           exclude_defaults=not args.show_defaults)
         print(yaml_dump(sim_config), end="")
-    cmd_show.set_defaults(func = cmd_show_func)
+    cmd_show.set_defaults(func=cmd_show_func)
 
-
-    ### raps workload ###
+    # ==== raps workload ====
     # TODO: Separate the arguments for this command
     cmd_workload = subparsers.add_parser("workload", description="""
         Outputs the given CLI args as a YAML config file that can be used to re-run the same
@@ -139,10 +138,11 @@ def main():
         **CLI_CONFIG,
         "cli_shortcuts": sim_shortcuts,
     })
+
     def cmd_workload_func(args):
         sim_config = cmd_workload_validate(args, read_sim_yaml(args.config_file))
         run_workload(sim_config)
-    cmd_show.set_defaults(func = cmd_workload_func)
+    cmd_show.set_defaults(func=cmd_workload_func)
 
     # TODO: move telemetry and other misc scripts into here
 
diff --git a/raps/sim_config.py b/raps/sim_config.py
index 600833f..036ae8b 100644
--- a/raps/sim_config.py
+++ b/raps/sim_config.py
@@ -39,7 +39,7 @@ class SimConfig(BaseModel):
     Step size (unit specified by `time_unit`, default seconds).
     Can pass a string like 15s, 1m, 1h, 1ms
     """
-    time_unit: timedelta = timedelta(seconds = 1)
+    time_unit: timedelta = timedelta(seconds=1)
     """
     Units all time delta ints are measured in (default seconds)
     """
diff --git a/raps/workload.py b/raps/workload.py
index 11600ad..563071d 100644
--- a/raps/workload.py
+++ b/raps/workload.py
@@ -976,4 +976,3 @@ def continuous_job_generation(*, engine, timestep, jobs):
     if len(engine.queue) <= engine.continuous_workload.args.maxqueue:
         new_jobs = engine.continuous_workload.generate_jobs()
         jobs.extend(new_jobs)
-
-- 
GitLab


From 874d81b23099d6a833b5cef1f890db623d6e2eb3 Mon Sep 17 00:00:00 2001
From: Jesse Hines <hinesjr@ornl.gov>
Date: Wed, 27 Aug 2025 20:59:08 -0400
Subject: [PATCH 18/27] Formatting

---
 tests/conftest.py                             |  2 +-
 tests/systems/conftest.py                     | 28 +++++++++----------
 tests/systems/test_main_basic_run.py          |  2 +-
 tests/systems/test_main_fastforward_run.py    |  1 -
 tests/systems/test_main_help.py               |  2 +-
 .../systems/test_main_network_withdata_run.py |  3 +-
 tests/systems/test_main_time_delta_run.py     |  3 +-
 .../test_main_time_delta_sub_second_run.py    |  3 +-
 tests/systems/test_main_time_ff_delta_run.py  |  5 ++--
 tests/systems/test_main_time_run.py           |  1 -
 tests/systems/test_main_withdata_run.py       |  3 +-
 .../systems/test_multi_part_sim_basic_run.py  |  1 -
 .../test_multi_part_sim_network_run.py        |  5 ++--
 .../test_multi_part_sim_withdata_run.py       |  4 +--
 tests/systems/test_telemetry_withdata_run.py  |  3 +-
 tests/systems/test_workload_synthetic.py      | 21 ++++++--------
 tests/test_main.py                            |  6 ++--
 tests/util.py                                 |  9 ++++--
 18 files changed, 49 insertions(+), 53 deletions(-)

diff --git a/tests/conftest.py b/tests/conftest.py
index 8f05879..477588a 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -10,7 +10,7 @@ def pytest_addoption(parser):
 
 def pytest_runtest_setup(item):
     if "long" in item.keywords and not item.config.getoption("--runlong"):
-        #reason = f"Skipping {item.nodeid} because it requires --runlong"
+        # reason = f"Skipping {item.nodeid} because it requires --runlong"
         reason = "Skipping test because it requires --runlong"
         pytest.skip(reason)
 
diff --git a/tests/systems/conftest.py b/tests/systems/conftest.py
index bcde029..8e361e9 100644
--- a/tests/systems/conftest.py
+++ b/tests/systems/conftest.py
@@ -99,7 +99,7 @@ def system_config(system):
             "time_delta": True,
             "net": False,
         },
-        "lassen":{
+        "lassen": {
             "main": True,
             "telemetry": False,  # Takes very long!
             "multi-part-sim": False,
@@ -111,7 +111,7 @@ def system_config(system):
             "time_delta": True,
             "net": True,
         },
-        "marconi100":{
+        "marconi100": {
             "main": True,
             "telemetry": True,
             "multi-part-sim": False,
@@ -182,16 +182,16 @@ def system_config(system):
 @pytest.fixture
 def system_file(system):
     files = {
-        "40frontiers":[],
-        "adastraMI250":["AdastaJobsMI250_15days.parquet"],
-        "frontier":["slurm/joblive/date=2024-01-18/","jobprofile/date=2024-01-18/"],
-        "fugaku":["21_04.parquet"],
-        "gcloudv2":["/v2/google_cluster_data_2011_sample"],
-        "lassen":["Lassen-Supercomputer-Job-Dataset"],
-        "marconi100":["job_table.parquet"],
-        "mit_supercloud":["202201"],
-        "setonix":[""],
-        "summit":[],
-        "lumi":[]
+        "40frontiers": [],
+        "adastraMI250": ["AdastaJobsMI250_15days.parquet"],
+        "frontier": ["slurm/joblive/date=2024-01-18/", "jobprofile/date=2024-01-18/"],
+        "fugaku": ["21_04.parquet"],
+        "gcloudv2": ["/v2/google_cluster_data_2011_sample"],
+        "lassen": ["Lassen-Supercomputer-Job-Dataset"],
+        "marconi100": ["job_table.parquet"],
+        "mit_supercloud": ["202201"],
+        "setonix": [""],
+        "summit": [],
+        "lumi": []
     }
-    return files.get(system,files)
+    return files.get(system, files)
diff --git a/tests/systems/test_main_basic_run.py b/tests/systems/test_main_basic_run.py
index 604d3a4..c420b59 100644
--- a/tests/systems/test_main_basic_run.py
+++ b/tests/systems/test_main_basic_run.py
@@ -11,7 +11,7 @@ pytestmark = [
 ]
 
 
-def test_main_basic_run(system, system_config,random_id):
+def test_main_basic_run(system, system_config, random_id):
     if not system_config.get("main", False):
         pytest.skip(f"{system} does not support basic main run.")
 
diff --git a/tests/systems/test_main_fastforward_run.py b/tests/systems/test_main_fastforward_run.py
index 1215195..3eb567c 100644
--- a/tests/systems/test_main_fastforward_run.py
+++ b/tests/systems/test_main_fastforward_run.py
@@ -28,7 +28,6 @@ def test_main_fastforward_run(system, system_config, ff_arg, random_id):
         "-t 1",
         "--fastforward", ff_arg,
         "--system", system,
-        #--"-f", system_file,
         "--noui",
         "-o", random_id
     ], capture_output=True, text=True, stdin=subprocess.DEVNULL)
diff --git a/tests/systems/test_main_help.py b/tests/systems/test_main_help.py
index 3d87144..a651a38 100644
--- a/tests/systems/test_main_help.py
+++ b/tests/systems/test_main_help.py
@@ -11,7 +11,7 @@ pytestmark = [
 ]
 
 
-def test_main_help(system, system_config,random_id):
+def test_main_help(system, system_config, random_id):
     if not system_config.get("main", False):
         pytest.skip(f"{system} does not support basic main run.")
 
diff --git a/tests/systems/test_main_network_withdata_run.py b/tests/systems/test_main_network_withdata_run.py
index c8d74bc..1dcfee0 100644
--- a/tests/systems/test_main_network_withdata_run.py
+++ b/tests/systems/test_main_network_withdata_run.py
@@ -23,7 +23,8 @@ def test_main_network_withdata_run(system, system_config, system_file, random_id
     else:
         file_list = [DATA_PATH / system / system_file]
     for file in file_list:
-        assert os.path.isfile(file) or os.path.isdir(file), "File does not exist. does ./data exist or is RAPS_DATA_DIR set?"
+        assert os.path.isfile(file) or os.path.isdir(file), \
+            "File does not exist. does ./data exist or is RAPS_DATA_DIR set?"
 
     os.chdir(PROJECT_ROOT)
     result = subprocess.run([
diff --git a/tests/systems/test_main_time_delta_run.py b/tests/systems/test_main_time_delta_run.py
index 4f5f2b0..8808052 100644
--- a/tests/systems/test_main_time_delta_run.py
+++ b/tests/systems/test_main_time_delta_run.py
@@ -21,7 +21,7 @@ pytestmark = [
     ("10h", "1h"),
     ("10h", "3h"),
     ("3d", "1d")
-], ids=["1","1s","10s","1m","1h","3h","1d"])
+], ids=["1", "1s", "10s", "1m", "1h", "3h", "1d"])
 def test_main_time_delta_run(system, system_config, time_arg, tdelta_arg, random_id):
     if not system_config.get("time_delta", False):
         pytest.skip(f"{system} does not support time_delta run.")
@@ -32,7 +32,6 @@ def test_main_time_delta_run(system, system_config, time_arg, tdelta_arg, random
         "-t", time_arg,
         "--time-delta", tdelta_arg,
         "--system", system,
-        #--"-f", system_file,
         "--noui",
         "-o", random_id
     ], capture_output=True, text=True, stdin=subprocess.DEVNULL)
diff --git a/tests/systems/test_main_time_delta_sub_second_run.py b/tests/systems/test_main_time_delta_sub_second_run.py
index 4cc4426..0bedee8 100644
--- a/tests/systems/test_main_time_delta_sub_second_run.py
+++ b/tests/systems/test_main_time_delta_sub_second_run.py
@@ -22,7 +22,7 @@ pytestmark = [
     ("10cs", "1ms"),
     ("100ms", "1ms"),
     ("100ms", "1s"),
-], ids=["1ds","3ds","1cs","1ms","1cs-for-10ds","1ms-for-10cs","1ms-for-100ms","1s-for-100ms"])
+], ids=["1ds", "3ds", "1cs", "1ms", "1cs-for-10ds", "1ms-for-10cs", "1ms-for-100ms", "1s-for-100ms"])
 def test_main_time_delta_sub_second_run(system, system_config, time_arg, tdelta_arg, random_id):
     if not system_config.get("time_delta", False):
         pytest.skip(f"{system} does not support time_delta run.")
@@ -33,7 +33,6 @@ def test_main_time_delta_sub_second_run(system, system_config, time_arg, tdelta_
         "-t", time_arg,
         "--time-delta", tdelta_arg,
         "--system", system,
-        #--"-f", system_file,
         "--noui",
         "-o", random_id
     ], capture_output=True, text=True, stdin=subprocess.DEVNULL)
diff --git a/tests/systems/test_main_time_ff_delta_run.py b/tests/systems/test_main_time_ff_delta_run.py
index f66d34d..a6c8763 100644
--- a/tests/systems/test_main_time_ff_delta_run.py
+++ b/tests/systems/test_main_time_ff_delta_run.py
@@ -20,9 +20,9 @@ pytestmark = [
     ("10h", "1h", "2h"),
     ("10h", "3h", "1h"),
     pytest.param("3d", "1d", "1d", marks=pytest.mark.long, id="1d (long)"),
-], ids=["1","1s","10s","1m","1h","3h","1d"])
+], ids=["1", "1s", "10s", "1m", "1h", "3h", "1d"])
 def test_main_time_ff_delta_run(system, system_config, time_arg, tdelta_arg,
-                             ff_arg, random_id):
+                                ff_arg, random_id):
     if not system_config.get("time_delta", False):
         pytest.skip(f"{system} does not support time_delta run.")
 
@@ -33,7 +33,6 @@ def test_main_time_ff_delta_run(system, system_config, time_arg, tdelta_arg,
         "--ff", ff_arg,
         "--time-delta", tdelta_arg,
         "--system", system,
-        #--"-f", system_file,
         "--noui",
         "-o", random_id
     ], capture_output=True, text=True, stdin=subprocess.DEVNULL)
diff --git a/tests/systems/test_main_time_run.py b/tests/systems/test_main_time_run.py
index 3cc140e..c8e00b1 100644
--- a/tests/systems/test_main_time_run.py
+++ b/tests/systems/test_main_time_run.py
@@ -30,7 +30,6 @@ def test_main_time_run(system, system_config, time_args, random_id):
         "python", "main.py", "run",
         "--time", time_args,
         "--system", system,
-        #--"-f", system_file,
         "--noui",
         "-o", random_id
     ], capture_output=True, text=True, stdin=subprocess.DEVNULL)
diff --git a/tests/systems/test_main_withdata_run.py b/tests/systems/test_main_withdata_run.py
index 0fec850..a4cbd55 100644
--- a/tests/systems/test_main_withdata_run.py
+++ b/tests/systems/test_main_withdata_run.py
@@ -22,7 +22,8 @@ def test_main_withdata_run(system, system_config, system_file, random_id):
     else:
         file_list = [DATA_PATH / system / system_file]
     for file in file_list:
-        assert os.path.isfile(file) or os.path.isdir(file), f"File `{file}' does not exist. does ./data exist or is RAPS_DATA_DIR set?"
+        assert os.path.isfile(file) or os.path.isdir(file), \
+            f"File `{file}' does not exist. does ./data exist or is RAPS_DATA_DIR set?"
     os.chdir(PROJECT_ROOT)
     result = subprocess.run([
         "python", "main.py", "run",
diff --git a/tests/systems/test_multi_part_sim_basic_run.py b/tests/systems/test_multi_part_sim_basic_run.py
index 0e274a0..3ea2a9c 100644
--- a/tests/systems/test_multi_part_sim_basic_run.py
+++ b/tests/systems/test_multi_part_sim_basic_run.py
@@ -21,7 +21,6 @@ def test_multi_part_sim_basic_run(system, system_config):
         "python", "main.py", "run-multi-part",
         "--time", "1h",
         "-x", f"{system}/*",
-        #"--noui"
     ], capture_output=True, text=True, stdin=subprocess.DEVNULL)
     assert result.returncode == 0, f"Failed on {system}: {result.stderr}"
     del result
diff --git a/tests/systems/test_multi_part_sim_network_run.py b/tests/systems/test_multi_part_sim_network_run.py
index fda12c9..ccbadaa 100644
--- a/tests/systems/test_multi_part_sim_network_run.py
+++ b/tests/systems/test_multi_part_sim_network_run.py
@@ -25,12 +25,11 @@ def test_multi_part_sim_network_run(system, system_config, random_id):
         "--time", "1h",
         "-x", f"{system}/*",
         "--net",
-        #"--noui"
     ], capture_output=True, text=True, stdin=subprocess.DEVNULL)
     assert result.returncode == 0, f"Failed on {system}: {result.stderr}"
 
-    #TODO:
-    #Cleanup files after test!
+    # TODO:
+    # Cleanup files after test!
 
     del result
     gc.collect()
diff --git a/tests/systems/test_multi_part_sim_withdata_run.py b/tests/systems/test_multi_part_sim_withdata_run.py
index b811155..2b18305 100644
--- a/tests/systems/test_multi_part_sim_withdata_run.py
+++ b/tests/systems/test_multi_part_sim_withdata_run.py
@@ -22,7 +22,8 @@ def test_multi_part_sim_withdata_run(system, system_config, system_file):
     else:
         file_list = [DATA_PATH / system / system_file]
     for file in file_list:
-        assert os.path.isfile(file) or os.path.isdir(file), f"File `{file}' does not exist. does ./data exist or is RAPS_DATA_DIR set?"
+        assert os.path.isfile(file) or os.path.isdir(file), \
+            f"File `{file}' does not exist. does ./data exist or is RAPS_DATA_DIR set?"
 
     os.chdir(PROJECT_ROOT)
     result = subprocess.run([
@@ -30,7 +31,6 @@ def test_multi_part_sim_withdata_run(system, system_config, system_file):
         "--time", "1h",
         "-x", f"{system}/*",
         "-f", *file_list,
-        #"--noui"
     ], capture_output=True, text=True, stdin=subprocess.DEVNULL)
     assert result.returncode == 0, f"Failed on {system}: {result.stderr}"
     del result
diff --git a/tests/systems/test_telemetry_withdata_run.py b/tests/systems/test_telemetry_withdata_run.py
index 415fbfe..e6401c4 100644
--- a/tests/systems/test_telemetry_withdata_run.py
+++ b/tests/systems/test_telemetry_withdata_run.py
@@ -22,7 +22,8 @@ def test_telemetry_main_withdata_run(system, system_config, system_file, random_
     else:
         file_list = [DATA_PATH / system / system_file]
     for file in file_list:
-        assert os.path.isfile(file) or os.path.isdir(file), f"File `{file}' does not exist. does ./data exist or is RAPS_DATA_DIR set?"
+        assert os.path.isfile(file) or os.path.isdir(file), \
+            f"File `{file}' does not exist. does ./data exist or is RAPS_DATA_DIR set?"
     os.chdir(PROJECT_ROOT)
     result = subprocess.run([
         "python", "raps/telemetry.py",
diff --git a/tests/systems/test_workload_synthetic.py b/tests/systems/test_workload_synthetic.py
index 959fc6d..107b015 100644
--- a/tests/systems/test_workload_synthetic.py
+++ b/tests/systems/test_workload_synthetic.py
@@ -13,29 +13,26 @@ def flatten(dist):
     name, args = dist
     return [name, *args]
 
-def _build_args(dist_name, params):
-    return [dist_name, *params]
-
 
 jobdist_case = [
     ("weibull", ["--jobsize-weibull-shape", "0.75", "--jobsize-weibull-scale", "16"]),
     ("normal", ["--jobsize-normal-stddev", "100", "--jobsize-normal-mean", "16"]),
-    ("uniform",[]),
+    ("uniform", []),
 ]
 cpudist_case = [
     ("weibull", ["--cpuutil-weibull-shape", "0.75", "--cpuutil-weibull-scale", "16"]),
     ("normal", ["--cpuutil-normal-stddev", "100", "--cpuutil-normal-mean", "16"]),
-    ("uniform",[]),
+    ("uniform", []),
 ]
 gpudist_case = [
     ("weibull", ["--gpuutil-weibull-shape", "0.75", "--gpuutil-weibull-scale", "16"]),
     ("normal", ["--gpuutil-normal-stddev", "100", "--gpuutil-normal-mean", "16"]),
-    ("uniform",[]),
+    ("uniform", []),
 ]
 wtimedist_case = [
     ("weibull", ["--walltime-weibull-shape", "0.75", "--walltime-weibull-scale", "16"]),
     ("normal", ["--walltime-normal-stddev", "100", "--walltime-normal-mean", "16"]),
-    ("uniform",[]),
+    ("uniform", []),
 ]
 additional_params_cases = [
     "",  # nothing
@@ -47,16 +44,16 @@ additional_params_cases = [
 
 
 @pytest.mark.parametrize(
-    "jobdist", jobdist_case, ids=lambda d:d[0]
+    "jobdist", jobdist_case, ids=lambda d: d[0]
 )
 @pytest.mark.parametrize(
-    "cpudist", cpudist_case, ids=lambda d:d[0]
+    "cpudist", cpudist_case, ids=lambda d: d[0]
 )
 @pytest.mark.parametrize(
-    "gpudist", gpudist_case, ids=lambda d:d[0]
+    "gpudist", gpudist_case, ids=lambda d: d[0]
 )
 @pytest.mark.parametrize(
-    "wtimedist", wtimedist_case, ids=lambda d:d[0]
+    "wtimedist", wtimedist_case, ids=lambda d: d[0]
 )
 @pytest.mark.parametrize(
     "additional_params", additional_params_cases, ids=lambda p: (p or "none")
@@ -90,7 +87,7 @@ def test_workload_synthetic_run(
         cmd.extend(additional_params)
 
     cmd1 = ["python", "-c \"exit()\""]
-    result = subprocess.run(cmd1,capture_output=True,text=True,stdin=subprocess.DEVNULL)
+    result = subprocess.run(cmd1, capture_output=True, text=True, stdin=subprocess.DEVNULL)
     try:
         result = subprocess.run(
             cmd,
diff --git a/tests/test_main.py b/tests/test_main.py
index 04bbc42..5c08182 100644
--- a/tests/test_main.py
+++ b/tests/test_main.py
@@ -16,7 +16,7 @@ def test_main_withui():
                             "python", "main.py", "run",
                             "--time", "1h",
                             ], capture_output=True,
-                               text=True
+                            text=True
                             )
     assert result.returncode == 0
 
@@ -29,7 +29,7 @@ def test_main_noui():
                             "--time", "1h",
                             "--noui"
                             ], capture_output=True,
-                               text=True
+                            text=True
                             )
     assert result.returncode == 0
 
@@ -41,6 +41,6 @@ def test_main_long():
     result = subprocess.run([
                             "python", "main.py", "run",
                             ], capture_output=True,
-                               text=True
+                            text=True
                             )
     assert result.returncode == 0
diff --git a/tests/util.py b/tests/util.py
index 96609c7..6ee1df7 100644
--- a/tests/util.py
+++ b/tests/util.py
@@ -13,10 +13,13 @@ def find_project_root():
 
 PROJECT_ROOT = find_project_root()
 CONFIG_PATH = PROJECT_ROOT / "config"
-DATA_PATH = Path(os.getenv("RAPS_DATA_DIR",PROJECT_ROOT / "data")).resolve()
+DATA_PATH = Path(os.getenv("RAPS_DATA_DIR", PROJECT_ROOT / "data")).resolve()
 
-#Maybe usefull but now all systems are listed explicitly!
-system_list = [entry for entry in os.listdir(CONFIG_PATH) if os.path.isfile(os.path.join(CONFIG_PATH,entry,'system.json'))]
+# Maybe usefull but now all systems are listed explicitly!
+system_list = [
+    entry for entry in os.listdir(CONFIG_PATH)
+    if os.path.isfile(os.path.join(CONFIG_PATH, entry, 'system.json'))
+]
 
 
 def requires_all_markers(request, required_markers):
-- 
GitLab


From fdfdce9506865299fe093a550d5cbf22a076e0a5 Mon Sep 17 00:00:00 2001
From: Jesse Hines <hinesjr@ornl.gov>
Date: Wed, 27 Aug 2025 21:01:57 -0400
Subject: [PATCH 19/27] Add raps entrypoint

Now you can call it like raps run ...
---
 pyproject.toml | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/pyproject.toml b/pyproject.toml
index b7fbb99..f396280 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -32,3 +32,6 @@ dependencies = [
     "pydantic-settings>=2.10.1",
     "pre-commit"
 ]
+
+[project.scripts]
+raps = "main:main"
-- 
GitLab


From 73b5442882230ebc124b4667a79d9467f6b071ae Mon Sep 17 00:00:00 2001
From: Jesse Hines <hinesjr@ornl.gov>
Date: Thu, 28 Aug 2025 08:19:27 -0400
Subject: [PATCH 20/27] Add comment

---
 raps/run_sim.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/raps/run_sim.py b/raps/run_sim.py
index f59ad5d..7e1b24e 100644
--- a/raps/run_sim.py
+++ b/raps/run_sim.py
@@ -184,6 +184,9 @@ def run_sim(sim_config: SimConfig):
 def run_multi_part_sim(sim_config: SimConfig):
     multi_engine, jobs, timestep_start, timestep_end, time_delta = MultiPartEngine.from_sim_config(sim_config)
 
+    # TODO: The mit_supercloud dataloader seems to be outputting the wrong timesteps? mit_supercloud
+    # is the only multi-partition system with replay, so just manually overriding the timesteps here
+    # to fix it for now. The original multi-part-sim.py always started from timestep 0 as well. 
     timestep_end = timestep_end - timestep_start
     timestep_start = 0
 
-- 
GitLab


From 04d1f93008e51b8db9ae2b44cfc4767e529e4176 Mon Sep 17 00:00:00 2001
From: Jesse Hines <hinesjr@ornl.gov>
Date: Thu, 28 Aug 2025 08:29:36 -0400
Subject: [PATCH 21/27] Use report util

---
 raps/run_sim.py | 20 +++++++-------------
 1 file changed, 7 insertions(+), 13 deletions(-)

diff --git a/raps/run_sim.py b/raps/run_sim.py
index 7e1b24e..180beb1 100644
--- a/raps/run_sim.py
+++ b/raps/run_sim.py
@@ -22,13 +22,6 @@ from raps.stats import (
 from raps.sim_config import SimConfig
 
 
-def print_report(name: str, report: dict):
-    print(f"--- {name} ---")
-    for key, value in report.items():
-        print(f"{str(key).replace('_', ' ').title()}: {value}")
-    print("-------------------------\n")
-
-
 def run_sim(sim_config: SimConfig):
     if sim_config.verbose or sim_config.debug:
         print(f"SimConfig: {sim_config.model_dump_json(indent=4)}")
@@ -232,11 +225,12 @@ def run_multi_part_sim(sim_config: SimConfig):
         engine_stats = get_engine_stats(engine)
         job_stats = get_job_stats(engine)
         scheduler_stats = get_scheduler_stats(engine)
-        network_stats = get_network_stats(engine) if sim_config.simulate_network else {}
+        network_stats = get_network_stats(engine) if sim_config.simulate_network else None
 
         # Print a formatted report
-        print_report("Simulation Report", engine_stats)
-        print_report("Job Stat Report", job_stats)
-        print_report("Scheduler Report", scheduler_stats)
-        if network_stats:
-            print("Network Report", network_stats)
+        print_formatted_report(
+            engine_stats=engine_stats,
+            job_stats=job_stats,
+            scheduler_stats=scheduler_stats,
+            network_stats=network_stats,
+        )
-- 
GitLab


From 5deab5babcfe0d7af570f12d87a59fc8ad0bd01d Mon Sep 17 00:00:00 2001
From: Jesse Hines <hinesjr@ornl.gov>
Date: Thu, 28 Aug 2025 08:31:53 -0400
Subject: [PATCH 22/27] Add engine test

---
 tests/systems/test_engine.py | 42 ++++++++++++++++++++++++++++++++++++
 1 file changed, 42 insertions(+)
 create mode 100644 tests/systems/test_engine.py

diff --git a/tests/systems/test_engine.py b/tests/systems/test_engine.py
new file mode 100644
index 0000000..df70867
--- /dev/null
+++ b/tests/systems/test_engine.py
@@ -0,0 +1,42 @@
+import os
+import subprocess
+import gc
+import pytest
+from tests.util import PROJECT_ROOT
+from raps.engine import Engine
+from raps.sim_config import SimConfig
+from raps.stats import (
+    get_engine_stats,
+    get_job_stats,
+    get_scheduler_stats,
+    get_network_stats,
+)
+
+pytestmark = [
+    pytest.mark.system,
+    pytest.mark.nodata
+]
+
+
+def test_main_basic_run(system, system_config, random_id):
+    if not system_config.get("main", False):
+        pytest.skip(f"{system} does not support basic main run.")
+
+    sim_config = SimConfig.model_validate({
+        "system": system,
+        "time": "2m",
+    })
+    engine, jobs, timestep_start, timestep_end, time_delta = Engine.from_sim_config(sim_config)
+    ticks = list(engine.run_simulation(jobs, timestep_start, timestep_end, time_delta))
+
+    assert len(ticks) == 120
+
+    engine_stats = get_engine_stats(engine)
+    job_stats = get_job_stats(engine)
+    scheduler_stats = get_scheduler_stats(engine)
+    network_stats = get_network_stats(engine)
+
+    assert engine_stats['simulated'] == '0:02:00'
+    # TODO: More specific tests of values
+
+    gc.collect()
-- 
GitLab


From 4508bc030697a0015acfc1751c7e35f58249eff4 Mon Sep 17 00:00:00 2001
From: Jesse Hines <hinesjr@ornl.gov>
Date: Thu, 28 Aug 2025 09:14:22 -0400
Subject: [PATCH 23/27] Formatting

---
 raps/run_sim.py              |  2 +-
 tests/systems/test_engine.py | 17 +++++++----------
 2 files changed, 8 insertions(+), 11 deletions(-)

diff --git a/raps/run_sim.py b/raps/run_sim.py
index 180beb1..68229ad 100644
--- a/raps/run_sim.py
+++ b/raps/run_sim.py
@@ -179,7 +179,7 @@ def run_multi_part_sim(sim_config: SimConfig):
 
     # TODO: The mit_supercloud dataloader seems to be outputting the wrong timesteps? mit_supercloud
     # is the only multi-partition system with replay, so just manually overriding the timesteps here
-    # to fix it for now. The original multi-part-sim.py always started from timestep 0 as well. 
+    # to fix it for now. The original multi-part-sim.py always started from timestep 0 as well.
     timestep_end = timestep_end - timestep_start
     timestep_start = 0
 
diff --git a/tests/systems/test_engine.py b/tests/systems/test_engine.py
index df70867..425ce7b 100644
--- a/tests/systems/test_engine.py
+++ b/tests/systems/test_engine.py
@@ -1,15 +1,12 @@
-import os
-import subprocess
 import gc
 import pytest
-from tests.util import PROJECT_ROOT
 from raps.engine import Engine
 from raps.sim_config import SimConfig
 from raps.stats import (
     get_engine_stats,
-    get_job_stats,
-    get_scheduler_stats,
-    get_network_stats,
+    # get_job_stats,
+    # get_scheduler_stats,
+    # get_network_stats,
 )
 
 pytestmark = [
@@ -18,7 +15,7 @@ pytestmark = [
 ]
 
 
-def test_main_basic_run(system, system_config, random_id):
+def test_engine(system, system_config):
     if not system_config.get("main", False):
         pytest.skip(f"{system} does not support basic main run.")
 
@@ -32,9 +29,9 @@ def test_main_basic_run(system, system_config, random_id):
     assert len(ticks) == 120
 
     engine_stats = get_engine_stats(engine)
-    job_stats = get_job_stats(engine)
-    scheduler_stats = get_scheduler_stats(engine)
-    network_stats = get_network_stats(engine)
+    # job_stats = get_job_stats(engine)
+    # scheduler_stats = get_scheduler_stats(engine)
+    # network_stats = get_network_stats(engine)
 
     assert engine_stats['simulated'] == '0:02:00'
     # TODO: More specific tests of values
-- 
GitLab


From e7cfa243262fc3ddc8ef3a652bfe792d20dd7ba2 Mon Sep 17 00:00:00 2001
From: Jesse Hines <hinesjr@ornl.gov>
Date: Thu, 28 Aug 2025 09:40:54 -0400
Subject: [PATCH 24/27] Fix description

---
 main.py | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/main.py b/main.py
index c8d4c69..e5b2c2d 100644
--- a/main.py
+++ b/main.py
@@ -127,13 +127,12 @@ def main():
     # ==== raps workload ====
     # TODO: Separate the arguments for this command
     cmd_workload = subparsers.add_parser("workload", description="""
-        Outputs the given CLI args as a YAML config file that can be used to re-run the same
-        simulation.
+        Saves workload as a snapshot.
+    """)
+    cmd_workload.add_argument("config_file", nargs="?", default=None, help="""
+        YAML sim config file, can be used to configure an experiment instead of using CLI
+        flags. Pass "-" to read from stdin.
     """)
-    cmd_workload.add_argument(
-        "config_file", nargs="?", default=None,
-        help="Input YAML sim config file. Can be used to slightly modify an existing sim config",
-    )
     cmd_workload_validate = pydantic_add_args(cmd_workload, SimConfig, model_config={
         **CLI_CONFIG,
         "cli_shortcuts": sim_shortcuts,
-- 
GitLab


From a460323401545dca7cdb03ce9a7b6ce431486d26 Mon Sep 17 00:00:00 2001
From: Jesse Hines <hinesjr@ornl.gov>
Date: Thu, 28 Aug 2025 10:04:29 -0400
Subject: [PATCH 25/27] Fix typo in test_engine

---
 tests/systems/test_engine.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/systems/test_engine.py b/tests/systems/test_engine.py
index 425ce7b..974ed2e 100644
--- a/tests/systems/test_engine.py
+++ b/tests/systems/test_engine.py
@@ -33,7 +33,7 @@ def test_engine(system, system_config):
     # scheduler_stats = get_scheduler_stats(engine)
     # network_stats = get_network_stats(engine)
 
-    assert engine_stats['simulated'] == '0:02:00'
+    assert engine_stats['time simulated'] == '0:02:00'
     # TODO: More specific tests of values
 
     gc.collect()
-- 
GitLab


From b4090766f6a2cd0bbebe70c79bc6b82b9001dfc1 Mon Sep 17 00:00:00 2001
From: Jesse Hines <hinesjr@ornl.gov>
Date: Thu, 28 Aug 2025 12:54:27 -0400
Subject: [PATCH 26/27] Add telemetry script

---
 main.py                                      |  8 +++
 raps/telemetry.py                            | 62 +++++++++-----------
 tests/systems/test_telemetry_withdata_run.py |  2 +-
 3 files changed, 36 insertions(+), 36 deletions(-)

diff --git a/main.py b/main.py
index e5b2c2d..cd7e162 100644
--- a/main.py
+++ b/main.py
@@ -9,6 +9,7 @@ from raps.helpers import check_python_version
 from raps.sim_config import SimConfig
 from raps.run_sim import run_sim, run_multi_part_sim
 from raps.workload import run_workload
+from raps.telemetry import run_telemetry, run_telemetry_add_args
 from raps.utils import pydantic_add_args, yaml_dump
 from pydantic_settings import SettingsConfigDict
 
@@ -143,6 +144,13 @@ def main():
         run_workload(sim_config)
     cmd_show.set_defaults(func=cmd_workload_func)
 
+    # ==== raps telemetry ====
+    cmd_telemetry = subparsers.add_parser("telemetry", description="""
+        Telemetry data validator
+    """)
+    run_telemetry_add_args(cmd_telemetry)
+    cmd_telemetry.set_defaults(func=run_telemetry)
+
     # TODO: move telemetry and other misc scripts into here
 
     args = parser.parse_args()
diff --git a/raps/telemetry.py b/raps/telemetry.py
index fd271a2..5a09eb9 100644
--- a/raps/telemetry.py
+++ b/raps/telemetry.py
@@ -14,32 +14,6 @@ from pathlib import Path
 from typing import Optional
 from types import ModuleType
 
-
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser(description='Telemetry data validator')
-    parser.add_argument('--jid', type=str, default='*', help='Replay job id')
-    parser.add_argument('-f', '--replay', nargs='+', type=str,
-                        help='Either: path/to/joblive path/to/jobprofile'
-                             ' -or- filename.npz (overrides --workload option)')
-    parser.add_argument('-p', '--plot', type=str, default=None, choices=['jobs', 'nodes'], help='Output plots')
-    parser.add_argument("--is-results-file", action='store_true', default=False, help='Output plots')
-    parser.add_argument("--gantt-nodes", default=False, action='store_true', required=False,
-                        # duplicate in workload!
-                        help="Print Gannt with nodes required as line thickness (default false)")
-    parser.add_argument('-t', '--time', type=str, default=None,
-                        help='Length of time to simulate, e.g., 123, 123s, 27m, 3h, 7d')
-    parser.add_argument('--system', type=str, default='frontier', help='System config to use')
-    choices = ['prescribed', 'poisson']
-    parser.add_argument('--arrival', default=choices[0], type=str, choices=choices,
-                        help=f"Modify arrival distribution ({choices[1]}) "
-                        f"or use the original submit times ({choices[0]})")
-    parser.add_argument('-v', '--verbose', action='store_true', help='Enable verbose output')
-    parser.add_argument('-o', '--output', type=str, default=None, help='Store output in --output <arg> file.')
-    parser.add_argument("--live", action="store_true", help="Grab data from live system.")
-
-    args = parser.parse_args()
-    args_dict = vars(args)
-
 import importlib
 import numpy as np
 import pandas as pd
@@ -103,8 +77,7 @@ class Telemetry:
             timestep_end = int(data['timestep_end'])
         else:
             timestep_end = np.inf
-            print(timestep_end)
-            exit()
+            raise ValueError("Invalid timestep_end in snapshot")
         if 'args' in data:
             args_from_file = data['args'].tolist()
         else:
@@ -293,7 +266,30 @@ class Telemetry:
         return jobs, timestep_start, timestep_end, args
 
 
-def run_telemetry():
+def run_telemetry_add_args(parser: argparse.ArgumentParser):
+    parser.add_argument('--jid', type=str, default='*', help='Replay job id')
+    parser.add_argument('-f', '--replay', nargs='+', type=str,
+                        help='Either: path/to/joblive path/to/jobprofile'
+                             ' -or- filename.npz (overrides --workload option)')
+    parser.add_argument('-p', '--plot', type=str, default=None, choices=['jobs', 'nodes'], help='Output plots')
+    parser.add_argument("--is-results-file", action='store_true', default=False, help='Output plots')
+    parser.add_argument("--gantt-nodes", default=False, action='store_true', required=False,
+                        # duplicate in workload!
+                        help="Print Gannt with nodes required as line thickness (default false)")
+    parser.add_argument('-t', '--time', type=str, default=None,
+                        help='Length of time to simulate, e.g., 123, 123s, 27m, 3h, 7d')
+    parser.add_argument('--system', type=str, default='frontier', help='System config to use')
+    choices = ['prescribed', 'poisson']
+    parser.add_argument('--arrival', default=choices[0], type=str, choices=choices,
+                        help=f"Modify arrival distribution ({choices[1]}) "
+                        f"or use the original submit times ({choices[0]})")
+    parser.add_argument('-v', '--verbose', action='store_true', help='Enable verbose output')
+    parser.add_argument('-o', '--output', type=str, default=None, help='Store output in --output <arg> file.')
+    parser.add_argument("--live", action="store_true", help="Grab data from live system.")
+
+
+def run_telemetry(args):
+    args_dict = vars(args)
     config = get_system_config(args.system).get_legacy()
     args_dict['config'] = config
     td = Telemetry(**args_dict)
@@ -315,8 +311,8 @@ def run_telemetry():
                                                config=config)
 
     else:
-        parser.print_help()
-        exit()
+        print("Either --live or --replay is required")
+        sys.exit(1)
 
     timesteps = timestep_end - timestep_start
 
@@ -404,7 +400,3 @@ def run_telemetry():
         print(f"Saved to: {filename}")
     else:
         plt.show()
-
-
-if __name__ == "__main__":
-    run_telemetry()
diff --git a/tests/systems/test_telemetry_withdata_run.py b/tests/systems/test_telemetry_withdata_run.py
index e6401c4..e9685f7 100644
--- a/tests/systems/test_telemetry_withdata_run.py
+++ b/tests/systems/test_telemetry_withdata_run.py
@@ -26,7 +26,7 @@ def test_telemetry_main_withdata_run(system, system_config, system_file, random_
             f"File `{file}' does not exist. does ./data exist or is RAPS_DATA_DIR set?"
     os.chdir(PROJECT_ROOT)
     result = subprocess.run([
-        "python", "raps/telemetry.py",
+        "python", "main.py", "telemetry",
         "--system", system,
         "-f", *file_list,
         "-o", random_id
-- 
GitLab


From da664a242826964c50ff4a9fcf2d02778439a35e Mon Sep 17 00:00:00 2001
From: Jesse Hines <hinesjr@ornl.gov>
Date: Thu, 28 Aug 2025 14:10:30 -0400
Subject: [PATCH 27/27] Update README

---
 README.md | 52 +++++++++++++++++++++++-----------------------------
 1 file changed, 23 insertions(+), 29 deletions(-)

diff --git a/README.md b/README.md
index 665a64a..a561278 100644
--- a/README.md
+++ b/README.md
@@ -19,37 +19,37 @@ Note: Requires python3.12 or greater.
 
 ## Usage and help menu
 
-    python main.py -h
+    raps run -h
 
 ## Run simulator with default synthetic workload
 
-    python main.py
+    raps run
 
 ## Run simulator with telemetry replay
 
     # Frontier
     DATEDIR="date=2024-01-18"
     DPATH=~/data/frontier-sample-2024-01-18
-    python main.py -f $DPATH/slurm/joblive/$DATEDIR,$DPATH/jobprofile/$DATEDIR
+    raps run -f $DPATH/slurm/joblive/$DATEDIR,$DPATH/jobprofile/$DATEDIR
 
 ## Open Telemetry dataset
 
 For Marconi supercomputer, download `job_table.parquet` from https://zenodo.org/records/10127767
 
     # Marconi100
-    python main.py --system marconi100 -f ~/data/marconi100/job_table.parquet
+    raps run --system marconi100 -f ~/data/marconi100/job_table.parquet
 
 For Adastra MI250 supercomputer, download 'AdastaJobsMI250_15days.parquet' from https://zenodo.org/records/14007065
 
     # Adastra MI250
-    python main.py --system adastraMI250 -f AdastaJobsMI250_15days.parquet
+    raps run --system adastraMI250 -f AdastaJobsMI250_15days.parquet
 
 For Google cluster trace v2
 
-    python main.py --system gcloudv2 -f ~/data/gcloud/v2/google_cluster_data_2011_sample --ff 600
+    raps run --system gcloudv2 -f ~/data/gcloud/v2/google_cluster_data_2011_sample --ff 600
 
     # analyze dataset
-    python -m raps.telemetry --system gcloudv2 -f ~/data/gcloud/v2/google_cluster_data_2011_sample -v
+    raps telemetry --system gcloudv2 -f ~/data/gcloud/v2/google_cluster_data_2011_sample -v
 
 For MIT Supercloud
 
@@ -62,28 +62,28 @@ For MIT Supercloud
     python -m raps.dataloaders.mit_supercloud.cli download --start 2021-05-21T13:00 --end 2021-05-21T14:00
 
     # Load data and run simulation - will save data as part-cpu.npz and part-gpu.npz files
-    python multi-part-sim.py -x 'mit_supercloud/*' -f $DPATH --system mit_supercloud \
+    raps run-multi-part -x 'mit_supercloud/*' -f $DPATH --system mit_supercloud \
                              --start 2021-05-21T13:00 --end 2021-05-21T14:00
     # Note: if no start, end dates provided will default to run 24 hours between
     # 2021-05-21T00:00 to 2021-05-22T00:00 set by defaults in raps/dataloaders/mit_supercloud/utils.py
 
     # Re-run simulation using npz files (much faster load)
-    python multi-part-sim.py -x mit_supercloud/* -f part-*.npz --system mit_supercloud
+    raps run-multi-part -x mit_supercloud/* -f part-*.npz --system mit_supercloud
 
     # Synthetic tests for verification studies:
-    python multi-part-sim.py -x 'mit_supercloud/*' -w multitenant
+    raps run-multi-part -x 'mit_supercloud/*' -w multitenant
 
 For Lumi
 
     # Synthetic test for lumi multi-part-sim:
-    python multi-part-sim.py -x lumi/*
+    raps run-multi-part -x lumi/*
 
 ## Perform Network Simulation
 
 Lassen is one of the few datasets that has networking data. See `raps/dataloaders/lassen.py` for how to
 get the datasets. To run a network simulation, use the following command:
 
-    python main.py -f ~/data/lassen/Lassen-Supercomputer-Job-Dataset --system lassen --policy fcfs --backfill firstfit --ff 365d -t 12h --arrival poisson --net
+    raps run -f ~/data/lassen/Lassen-Supercomputer-Job-Dataset --system lassen --policy fcfs --backfill firstfit --ff 365d -t 12h --arrival poisson --net
 
 ## Snapshot of extracted workload data
 
@@ -91,7 +91,7 @@ To reduce the expense of extracting the needed data from the telemetry parquet f
 RAPS saves a snapshot of the extracted data in NPZ format. The NPZ file can be
 given instead of the parquet files for more quickly running subsequent simulations, e.g.:
 
-    python main.py -f jobs_2024-02-20_12-20-39.npz
+    raps run -f jobs_2024-02-20_12-20-39.npz
 
 
 ## Cooling models
@@ -103,37 +103,31 @@ We provide several cooling models in the repo https://code.ornl.gov/exadigit/POW
 Will install the POWER9CSM in the models folder. To activate cooling when running RAPS,
 use `--cooling` or `-c` argument. e.g.,
 
-    python main.py --system marconi100 -c
+    raps run --system marconi100 -c
 
-    python main.py --system lassen -c
+    raps run --system lassen -c
 
-    python main.py --system summit -c
+    raps run --system summit -c
 
 ## Support for multiple system partitions
 
 Multi-partition systems are supported by running the `multi-part-sim.py` script, where a list of configurations can be specified using the `-x` flag as follows:
 
-    python multi-part-sim.py -x setonix/part-cpu setonix/part-gpu
+    raps run-multi-part -x setonix/part-cpu setonix/part-gpu
 
 or simply:
 
-    python multi-part-sim.py -x setonix/* # bash
+    raps run-multi-part -x setonix/* # bash
 
-    python multi-part-sim.py -x 'setonix/*' # zsh
-
-To run this in parallel use:
-
-    mpiexec -n 2 python multi-part-sim-mpi.py -x setonix/part-cpu setonix/part-gpu
-
-*Note: first install `mpi4py` via pip or conda.*
+    raps run-multi-part -x 'setonix/*' # zsh
 
 This will simulate synthetic workloads on two partitions as defined in `config/setonix-cpu` and `config/setonix-gpu`. To replay telemetry workloads from another system, e.g., Marconi100's PM100 dataset, first create a .npz snapshot of the telemetry data, e.g.,
 
-    python main.py --system marconi100 -f /path/to/marconi100/job_table.parquet
+    raps run-multi-part --system marconi100 -f /path/to/marconi100/job_table.parquet
 
 This will dump a .npz file with a randomized name, e.g. ac23db.npz. Let's rename this file to pm100.npz for clarity. Note: can control-C when the simulation starts. Now, this pm100.npz file can be used with `multi-part-sim.py` as follows:
 
-    python multi-part-sim.py -x setonix/* -f pm100.npz --arrival poisson --scale 192
+    raps run-multi-part -x setonix/* -f pm100.npz --arrival poisson --scale 192
 
 ## Modifications to telemetry replay
 
@@ -151,11 +145,11 @@ python main.py -f $DPATH/slurm/joblive/$DATEDIR,$DPATH/jobprofile/$DATEDIR --pol
 
 ## Job-level power output example for replay of single job
 
-    python main.py -f $DPATH/slurm/joblive/$DATEDIR,$DPATH/jobprofile/$DATEDIR --jid 1234567 -o
+    raps run -f $DPATH/slurm/joblive/$DATEDIR,$DPATH/jobprofile/$DATEDIR --jid 1234567 -o
 
 ## Compute stats on telemetry data, e.g., average job arrival time
 
-    python -m raps.telemetry -f $DPATH/slurm/joblive/$DATEDIR,$DPATH/jobprofile/$DATEDIR
+    raps telemetry -f $DPATH/slurm/joblive/$DATEDIR,$DPATH/jobprofile/$DATEDIR
 
 ## Build and run Docker container
 
-- 
GitLab