From 4c539b51fa95ca5b32d676d0e321dea33b95c053 Mon Sep 17 00:00:00 2001
From: Matthias Maiterth <maiterthm@ornl.gov>
Date: Tue, 28 Oct 2025 11:08:37 -0400
Subject: [PATCH 1/2] Fixed old remnants of running_time. --> jobs have
 current_run_time

---
 raps/engine.py       | 12 ++++++------
 raps/job.py          |  6 +++---
 raps/network/base.py | 18 +++++++++++-------
 raps/power.py        |  3 +--
 raps/ui.py           |  6 +++---
 raps/utils.py        |  5 +++--
 6 files changed, 27 insertions(+), 23 deletions(-)

diff --git a/raps/engine.py b/raps/engine.py
index ac7e8c7..d502e45 100644
--- a/raps/engine.py
+++ b/raps/engine.py
@@ -499,7 +499,7 @@ class Engine:
         # update Running time
         for job in self.running:
             if job.current_state == JobState.RUNNING:
-                job.running_time = self.current_timestep - job.start_time
+                job.current_run_time = self.current_timestep - job.start_time
 
         # Stop the simulation if no more jobs are running or in the queue or in the job list.
         if autoshutdown and \
@@ -552,7 +552,7 @@ class Engine:
 
         for job in self.running:
 
-            job.running_time = self.current_timestep - job.start_time
+            job.current_run_time = self.current_timestep - job.start_time
 
             if job.current_state != JobState.RUNNING:
                 raise ValueError(
@@ -561,15 +561,15 @@ class Engine:
                 )
             else:  # if job.state == JobState.RUNNING:
                 # Error checks
-                if not replay and job.running_time > job.time_limit and job.end_time is not None:
+                if not replay and job.current_run_time > job.time_limit and job.end_time is not None:
                     raise Exception(f"Job exceded time limit! "
-                                    f"{job.running_time} > {job.time_limit}"
+                                    f"{job.current_run_time} > {job.time_limit}"
                                     f"\n{job}"
                                     f"\nCurrent timestep:{self.current_timestep - self.timestep_start} (rel)"
                                     )
-                if replay and job.running_time > job.expected_run_time:
+                if replay and job.current_run_time > job.expected_run_time:
                     raise Exception(f"Job should have ended in replay! "
-                                    f" {job.running_time} > {job.expected_run_time}"
+                                    f" {job.current_run_time} > {job.expected_run_time}"
                                     f"\n{job}"
                                     f"\nCurrent timestep:{self.current_timestep - self.timestep_start} (rel)"
                                     )
diff --git a/raps/job.py b/raps/job.py
index 05c455e..4d1dda0 100644
--- a/raps/job.py
+++ b/raps/job.py
@@ -180,7 +180,7 @@ class Job:
         self.trace_start_time = None  # Relative start time of the trace (to running time)
         self.trace_end_time = None    # Relative end time of the trace
         self.trace_quanta = None  # Trace quanta associated with the job # None means single value!
-        self.running_time = 0     # Current running time updated when simulating
+        self.current_run_time = 0     # Current running time updated when simulating
 
         # If a job dict was given, override the values from the job_dict:
         for key, value in job_dict.items():
@@ -232,7 +232,7 @@ class Job:
                 f"trace_start_time={self.trace_start_time}, "
                 f"trace_end_time={self.trace_end_time}, "
                 f"trace_quanta={self.trace_quanta}, "
-                f"running_time={self.running_time}, "
+                f"current_run_time={self.current_run_time}, "
                 f"power={self.power}, "
                 f"power_history={self.power_history})")
 
@@ -296,7 +296,7 @@ class JobStatistics:
         self.account = job.account
         self.num_nodes = len(job.scheduled_nodes)
         self.scheduled_nodes = job.scheduled_nodes
-        self.run_time = job.running_time
+        self.run_time = job.current_run_time
         self.submit_time = job.submit_time
         self.start_time = job.start_time
         self.end_time = job.end_time
diff --git a/raps/network/base.py b/raps/network/base.py
index bab2ec8..3f3daeb 100644
--- a/raps/network/base.py
+++ b/raps/network/base.py
@@ -4,6 +4,7 @@ from raps.utils import get_current_utilization
 from raps.network.fat_tree import node_id_to_host_name
 from raps.network.torus3d import link_loads_for_job_torus, torus_host_from_real_index
 
+
 def debug_print_trace(job, label: str = ""):
     """Print either the length (if iterable) or the value of job.gpu_trace."""
     if hasattr(job.gpu_trace, "__len__"):
@@ -138,6 +139,7 @@ def worst_link_util(loads, throughput):
             max_util = util
     return max_util
 
+
 def get_link_util_stats(loads, throughput, top_n=10):
     """
     Calculates a distribution of link utilization stats.
@@ -148,9 +150,9 @@ def get_link_util_stats(loads, throughput, top_n=10):
 
     # Calculate utilization for every link
     utilizations = {(edge): (byte_load * 8) / throughput for edge, byte_load in loads.items()}
-    
+
     util_values = list(utilizations.values())
-    
+
     stats = {
         'max': np.max(util_values),
         'mean': np.mean(util_values),
@@ -161,14 +163,16 @@ def get_link_util_stats(loads, throughput, top_n=10):
     # Get top N congested links
     sorted_links = sorted(utilizations.items(), key=lambda item: item[1], reverse=True)
     stats['top_links'] = sorted_links[:top_n]
-    
+
     return stats
 
+
 def max_throughput_per_tick(legacy_cfg: dict, trace_quanta: int) -> float:
     """Return bytes-per-tick throughput of a single link."""
     bw = legacy_cfg.get("NETWORK_MAX_BW") or 12.5e9
     return float(bw) * trace_quanta
 
+
 def simulate_inter_job_congestion(network_model, jobs, legacy_cfg, debug=False):
     """
     Simulates network congestion from a list of concurrently running jobs.
@@ -181,8 +185,8 @@ def simulate_inter_job_congestion(network_model, jobs, legacy_cfg, debug=False):
     trace_quanta = jobs[0].trace_quanta if jobs else 0
 
     for job in jobs:
-        # Assuming job.running_time is 0 for this static simulation
-        job.running_time = 0
+        # Assuming job.current_run_time is 0 for this static simulation
+        job.current_run_time = 0
         job.trace_start_time = 0
         net_tx = get_current_utilization(job.ntx_trace, job)
 
@@ -193,7 +197,7 @@ def simulate_inter_job_congestion(network_model, jobs, legacy_cfg, debug=False):
                 host_list = [node_id_to_host_name(n, k) for n in job.scheduled_nodes]
             else:  # dragonfly
                 host_list = [network_model.real_to_fat_idx[real_n] for real_n in job.scheduled_nodes]
-            
+
             job_loads = link_loads_for_job(network_model.net_graph, host_list, net_tx)
 
         elif network_model.topology == "torus3d":
@@ -214,5 +218,5 @@ def simulate_inter_job_congestion(network_model, jobs, legacy_cfg, debug=False):
 
     max_throughput = max_throughput_per_tick(legacy_cfg, trace_quanta)
     net_stats = get_link_util_stats(total_loads, max_throughput)
-    
+
     return net_stats
diff --git a/raps/power.py b/raps/power.py
index dd0745b..b1e6c9d 100644
--- a/raps/power.py
+++ b/raps/power.py
@@ -55,7 +55,7 @@ def compute_node_power(cpu_util, gpu_util, net_util, config):
     power_gpu = gpu_util * config['POWER_GPU_MAX'] + \
         (config['GPUS_PER_NODE'] - gpu_util) * config['POWER_GPU_IDLE']
 
-    if config.get("POWER_NIC_IDLE") != None and config.get("POWER_NIC_MAX") != None:
+    if config.get("POWER_NIC_IDLE") is not None and config.get("POWER_NIC_MAX") is not None:
         power_nic = config['POWER_NIC_IDLE'] + \
             (config['POWER_NIC_MAX'] - config['POWER_NIC_IDLE']) * net_util
     else:
@@ -432,7 +432,6 @@ class PowerManager:
         jobs_power = self.update_power_state(scheduled_nodes, cpu_utils, gpu_utils, net_utils)
 
         for i, job in enumerate(running_jobs):
-            # if job.running_time % self.config['TRACE_QUANTA'] == 0:
             job.power_history.append(jobs_power[i] * len(job.scheduled_nodes))
 
         # Update the power array UI component
diff --git a/raps/ui.py b/raps/ui.py
index 6330bc9..03ca136 100644
--- a/raps/ui.py
+++ b/raps/ui.py
@@ -191,10 +191,10 @@ class LayoutManager:
                 nodes_display = col_nodelist
 
             if self.engine.downscale != 1:
-                running_time_str = convert_seconds_to_hhmmss(job.running_time // self.engine.downscale) + \
-                    f" +{job.running_time % self.engine.downscale}/{self.engine.downscale}s"
+                running_time_str = convert_seconds_to_hhmmss(job.current_run_time // self.engine.downscale) + \
+                    f" +{job.current_run_time % self.engine.downscale}/{self.engine.downscale}s"
             else:
-                running_time_str = convert_seconds_to_hhmm(job.running_time)
+                running_time_str = convert_seconds_to_hhmm(job.current_run_time)
 
             row = [
                 str(job.id).zfill(5),
diff --git a/raps/utils.py b/raps/utils.py
index e232bce..d98be2a 100644
--- a/raps/utils.py
+++ b/raps/utils.py
@@ -640,7 +640,7 @@ def get_current_utilization(trace, job: Job):
     if not job.trace_quanta:
         raise ValueError("job.trace_quanta is not set; cannot compute utilization.")
 
-    time_quanta_index = int((job.running_time - job.trace_start_time) // job.trace_quanta)
+    time_quanta_index = int((job.current_run_time - job.trace_start_time) // job.trace_quanta)
     if time_quanta_index < 0:
         time_quanta_index = 0
 
@@ -700,6 +700,7 @@ def validate_resolved_path(path: str | Path, info: ValidationInfo):
             raise ValueError(f"{path} is not under {base_path}")
     return path
 
+
 ResolvedPath = A[Path, AfterValidator(validate_resolved_path)]
 """
 Resolve a path, and expand ~ in the path string.
@@ -829,7 +830,7 @@ def read_yaml(config_file: str | None) -> dict:
     return result
 
 
-def read_yaml_parsed(cls: type[T], config_file = None) -> dict:
+def read_yaml_parsed(cls: type[T], config_file=None) -> dict:
     """
     Like read_yaml, but parses the input to resolve paths etc.
     Exits on error after printing message (for use in the CLI)
-- 
GitLab


From 81070941ea676b9d77cf61621e47ff2a7c678828 Mon Sep 17 00:00:00 2001
From: Matthias Maiterth <maiterthm@ornl.gov>
Date: Tue, 28 Oct 2025 11:10:36 -0400
Subject: [PATCH 2/2] Scheduler stats displaying seconds again.

---
 raps/ui.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/raps/ui.py b/raps/ui.py
index 03ca136..cee033a 100644
--- a/raps/ui.py
+++ b/raps/ui.py
@@ -269,13 +269,13 @@ class LayoutManager:
         # Add data row with white values
         time_in_s = time // self.engine.downscale
         if (time_in_s < 946684800):  # Introducing Y2K into our codebase! Kek
-            time_str = convert_seconds_to_hhmm(time_in_s)
+            time_str = convert_seconds_to_hhmmss(time_in_s)
         else:
             # For the curious: If the simulation time in seconds is large than
             # unix timestamp for Jan 2000 this is a unix timestamp,
             time_str = f"{datetime.fromtimestamp(time_in_s).strftime('%Y-%m-%d %H:%M')}"
         if timestep_start != 0:  # append time simulated
-            time_str += f"\nSim: {convert_seconds_to_hhmm(time_in_s - timestep_start)}"
+            time_str += f"\nSim: {convert_seconds_to_hhmmss(time_in_s - timestep_start)}"
 
         row.append(time_str)
         row.append(str(nrun))
-- 
GitLab