Loading raps/sim_config.py +1 −1 Original line number Diff line number Diff line Loading @@ -136,7 +136,7 @@ class SimConfig(RAPSBaseModel, abc.ABC): # Workload arguments (TODO split into separate model) workload: Literal['random', 'benchmark', 'peak', 'idle', 'synthetic', 'multitenant', 'replay', 'randomAI', 'network_test', 'inter_job_congestion', 'calculon'] = "random" 'inter_job_congestion', 'calculon', 'hpl'] = "random" """ Type of synthetic workload """ multimodal: list[float] = [1.0] Loading raps/workloads/__init__.py +3 −1 Original line number Diff line number Diff line Loading @@ -13,6 +13,7 @@ from .basic import BasicWorkload from .calculon import Calculon from .constants import JOB_NAMES, ACCT_NAMES, MAX_PRIORITY from .distribution import DistributionWorkload from .hpl import HPL from .live import continuous_job_generation from .multitenant import MultitenantWorkload from .network import NetworkTestWorkload Loading Loading @@ -57,7 +58,8 @@ class Workload( MultitenantWorkload, NetworkTestWorkload, InterJobCongestionWorkload, Calculon Calculon, HPL ): """Final workload class with all workload types.""" pass Loading raps/workloads/hpl.py 0 → 100644 +140 −0 Original line number Diff line number Diff line """ Test using: python main.py run -w hpl -d python raps/workloads/hpl.py """ from raps.job import Job, job_dict import numpy as np import math, random, json class HPL: """Analytical HPL workload generator for ExaDigiT""" def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) def hpl(self, **kwargs): jobs = [] # Example: parameter sweep across node counts or block sizes hpl_tests = [ #{"M": 131072, "b": 576, "P": 192, "Q": 384, "Rtype": "1-ring"}, #{"M": 131072, "b": 576, "P": 16, "Q": 32, "Rtype": "1-ring"}, {"M": 741455, "b": 576, "P": 16, "Q": 32, "Rtype": "1-ring"}, ] #GCDS_PER_GPU = 2 for test in hpl_tests: for partition in self.partitions: cfg = self.config_map[partition] trace_quanta = cfg["TRACE_QUANTA"] # --- Analytical model evaluation --- results = self._run_hpl_model(**test) total_time = results["T_total"] gpu_util = results["gpu_util"] cpu_util = results["cpu_util"] num_samples = math.ceil(total_time / trace_quanta) + 1 gpu_trace = np.full(num_samples, gpu_util) cpu_trace = np.full(num_samples, cpu_util) job_info = job_dict( #nodes_required=test["P"] * test["Q"] // (cfg["GPUS_PER_NODE"] * GCDS_PER_GPU), nodes_required=test["P"] * test["Q"] // cfg["GPUS_PER_NODE"], scheduled_nodes=[], name=f"HPL_{test['M']}x{test['M']}", account="benchmark", cpu_trace=cpu_trace, gpu_trace=gpu_trace, ntx_trace=[], nrx_trace=[], id=None, end_state="COMPLETED", priority=100, partition=partition, time_limit=total_time, start_time=0, end_time=total_time, expected_run_time=total_time, trace_quanta=trace_quanta, trace_time=total_time, trace_start_time=0, trace_end_time=total_time, ) jobs.append(Job(job_info)) return jobs def _run_hpl_model(self, M, b, P, Q, Rtype="1-ring", f=0.6): # constants (Table II + Fig 2b) CAllgather = 6.3e9 C1ring = 7e9 Creduce = 46e6 Fcpublas = 240e9 Fgemm = 24e12 Ml = M / P Nl = M / Q nb = int(M / b) total_T = 0.0 print("*** nb:", nb) for i in range(nb): Ml_i = Ml - (i * b / P) Nl1_i = max((1 - f) * Nl - i * b / Q, 0) Nl2_i = f * Nl if i * b < f * Nl else Nl - i * b / Q TPDFACT = b ** 2 / Creduce + (2 / 3) * b ** 2 * Ml_i / Fcpublas TLBCAST = 16 * b * Ml_i / C1ring TUPD1 = 2 * b * Ml_i * Nl1_i / Fgemm TUPD2 = 2 * b * Ml_i * Nl2_i / Fgemm TRS1 = 16 * b * Nl1_i / CAllgather TRS2 = 16 * b * Nl2_i / CAllgather total_T += max(TPDFACT + TLBCAST + TRS1, TUPD2) + max(TRS2, TUPD1) # derive synthetic utilization gpu_util = min(1.0, (Fgemm / 25e12)) # normalized ratio cpu_util = min(1.0, (Fcpublas / 250e9)) return {"T_total": total_T, "gpu_util": gpu_util, "cpu_util": cpu_util} if __name__ == "__main__": import json import numpy as np # Mock minimal configuration values to mimic ExaDigiT runtime class DummyHPL(HPL): def __init__(self): # Provide fake partitions and system config self.partitions = ["gpu"] self.config_map = { "gpu": { "TRACE_QUANTA": 15.0, # seconds per trace tick "GPUS_PER_NODE": 4, "CPUS_PER_NODE": 64, } } # Instantiate dummy workload workload = DummyHPL() # Run synthetic job generation jobs = workload.hpl() print(f"Generated {len(jobs)} HPL jobs:\n") for i, job in enumerate(jobs): print(i, job) print(f"--- Job {i} ---") print(f"Name: {job.name}") print(f"Nodes required: {job.nodes_required}") print(f"Wall time: {job.trace_time:.2f} s") print(f"CPU trace length: {len(job.cpu_trace)}") print(f"GPU trace length: {len(job.gpu_trace)}") print(f"Avg CPU util: {np.mean(job.cpu_trace):.3f}") print(f"Avg GPU util: {np.mean(job.gpu_trace):.3f}") print(f"Expected run time: {job.expected_run_time:.2f}") print() Loading
raps/sim_config.py +1 −1 Original line number Diff line number Diff line Loading @@ -136,7 +136,7 @@ class SimConfig(RAPSBaseModel, abc.ABC): # Workload arguments (TODO split into separate model) workload: Literal['random', 'benchmark', 'peak', 'idle', 'synthetic', 'multitenant', 'replay', 'randomAI', 'network_test', 'inter_job_congestion', 'calculon'] = "random" 'inter_job_congestion', 'calculon', 'hpl'] = "random" """ Type of synthetic workload """ multimodal: list[float] = [1.0] Loading
raps/workloads/__init__.py +3 −1 Original line number Diff line number Diff line Loading @@ -13,6 +13,7 @@ from .basic import BasicWorkload from .calculon import Calculon from .constants import JOB_NAMES, ACCT_NAMES, MAX_PRIORITY from .distribution import DistributionWorkload from .hpl import HPL from .live import continuous_job_generation from .multitenant import MultitenantWorkload from .network import NetworkTestWorkload Loading Loading @@ -57,7 +58,8 @@ class Workload( MultitenantWorkload, NetworkTestWorkload, InterJobCongestionWorkload, Calculon Calculon, HPL ): """Final workload class with all workload types.""" pass Loading
raps/workloads/hpl.py 0 → 100644 +140 −0 Original line number Diff line number Diff line """ Test using: python main.py run -w hpl -d python raps/workloads/hpl.py """ from raps.job import Job, job_dict import numpy as np import math, random, json class HPL: """Analytical HPL workload generator for ExaDigiT""" def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) def hpl(self, **kwargs): jobs = [] # Example: parameter sweep across node counts or block sizes hpl_tests = [ #{"M": 131072, "b": 576, "P": 192, "Q": 384, "Rtype": "1-ring"}, #{"M": 131072, "b": 576, "P": 16, "Q": 32, "Rtype": "1-ring"}, {"M": 741455, "b": 576, "P": 16, "Q": 32, "Rtype": "1-ring"}, ] #GCDS_PER_GPU = 2 for test in hpl_tests: for partition in self.partitions: cfg = self.config_map[partition] trace_quanta = cfg["TRACE_QUANTA"] # --- Analytical model evaluation --- results = self._run_hpl_model(**test) total_time = results["T_total"] gpu_util = results["gpu_util"] cpu_util = results["cpu_util"] num_samples = math.ceil(total_time / trace_quanta) + 1 gpu_trace = np.full(num_samples, gpu_util) cpu_trace = np.full(num_samples, cpu_util) job_info = job_dict( #nodes_required=test["P"] * test["Q"] // (cfg["GPUS_PER_NODE"] * GCDS_PER_GPU), nodes_required=test["P"] * test["Q"] // cfg["GPUS_PER_NODE"], scheduled_nodes=[], name=f"HPL_{test['M']}x{test['M']}", account="benchmark", cpu_trace=cpu_trace, gpu_trace=gpu_trace, ntx_trace=[], nrx_trace=[], id=None, end_state="COMPLETED", priority=100, partition=partition, time_limit=total_time, start_time=0, end_time=total_time, expected_run_time=total_time, trace_quanta=trace_quanta, trace_time=total_time, trace_start_time=0, trace_end_time=total_time, ) jobs.append(Job(job_info)) return jobs def _run_hpl_model(self, M, b, P, Q, Rtype="1-ring", f=0.6): # constants (Table II + Fig 2b) CAllgather = 6.3e9 C1ring = 7e9 Creduce = 46e6 Fcpublas = 240e9 Fgemm = 24e12 Ml = M / P Nl = M / Q nb = int(M / b) total_T = 0.0 print("*** nb:", nb) for i in range(nb): Ml_i = Ml - (i * b / P) Nl1_i = max((1 - f) * Nl - i * b / Q, 0) Nl2_i = f * Nl if i * b < f * Nl else Nl - i * b / Q TPDFACT = b ** 2 / Creduce + (2 / 3) * b ** 2 * Ml_i / Fcpublas TLBCAST = 16 * b * Ml_i / C1ring TUPD1 = 2 * b * Ml_i * Nl1_i / Fgemm TUPD2 = 2 * b * Ml_i * Nl2_i / Fgemm TRS1 = 16 * b * Nl1_i / CAllgather TRS2 = 16 * b * Nl2_i / CAllgather total_T += max(TPDFACT + TLBCAST + TRS1, TUPD2) + max(TRS2, TUPD1) # derive synthetic utilization gpu_util = min(1.0, (Fgemm / 25e12)) # normalized ratio cpu_util = min(1.0, (Fcpublas / 250e9)) return {"T_total": total_T, "gpu_util": gpu_util, "cpu_util": cpu_util} if __name__ == "__main__": import json import numpy as np # Mock minimal configuration values to mimic ExaDigiT runtime class DummyHPL(HPL): def __init__(self): # Provide fake partitions and system config self.partitions = ["gpu"] self.config_map = { "gpu": { "TRACE_QUANTA": 15.0, # seconds per trace tick "GPUS_PER_NODE": 4, "CPUS_PER_NODE": 64, } } # Instantiate dummy workload workload = DummyHPL() # Run synthetic job generation jobs = workload.hpl() print(f"Generated {len(jobs)} HPL jobs:\n") for i, job in enumerate(jobs): print(i, job) print(f"--- Job {i} ---") print(f"Name: {job.name}") print(f"Nodes required: {job.nodes_required}") print(f"Wall time: {job.trace_time:.2f} s") print(f"CPU trace length: {len(job.cpu_trace)}") print(f"GPU trace length: {len(job.gpu_trace)}") print(f"Avg CPU util: {np.mean(job.cpu_trace):.3f}") print(f"Avg GPU util: {np.mean(job.gpu_trace):.3f}") print(f"Expected run time: {job.expected_run_time:.2f}") print()