Loading README.md +4 −0 Original line number Diff line number Diff line Loading @@ -89,6 +89,10 @@ get the datasets. To run a network simulation, use the following command: raps run -f /opt/data/lassen/Lassen-Supercomputer-Job-Dataset --system lassen --policy fcfs --backfill firstfit --start '2019-08-22T00:00:00+00:00' -t 12h --arrival poisson --net To simulate synthetic network tests: raps run --system lassen -w network_test --net -t 15m ## Snapshot of extracted workload data To reduce the expense of extracting the needed data from the telemetry parquet files, Loading raps/sim_config.py +2 −2 Original line number Diff line number Diff line Loading @@ -134,8 +134,8 @@ class SimConfig(RAPSBaseModel, abc.ABC): """ Grab data from live system. """ # Workload arguments (TODO split into separate model) workload: Literal['random', 'benchmark', 'peak', 'idle', 'synthetic', 'multitenant', 'replay', 'randomAI'] = "random" workload: Literal['random', 'benchmark', 'peak', 'idle', 'synthetic', 'multitenant', 'replay', 'randomAI', 'network_test'] = "random" """ Type of synthetic workload """ multimodal: list[float] = [1.0] Loading raps/workloads/__init__.py +3 −1 Original line number Diff line number Diff line Loading @@ -14,6 +14,7 @@ from .constants import JOB_NAMES, ACCT_NAMES, MAX_PRIORITY from .distribution import DistributionWorkload from .live import continuous_job_generation from .multitenant import MultitenantWorkload from .network import NetworkTestWorkload from .utils import plot_job_hist Loading Loading @@ -51,7 +52,8 @@ class Workload( BaseWorkload, DistributionWorkload, BasicWorkload, MultitenantWorkload MultitenantWorkload, NetworkTestWorkload ): """Final workload class with all workload types.""" pass Loading raps/workloads/network.py 0 → 100644 +36 −0 Original line number Diff line number Diff line from raps.job import Job, job_dict class NetworkTestWorkload: def network_test(self, **kwargs): """ A synthetic workload to test network congestion. """ config = kwargs.get('config', {}) # High network traffic to trigger congestion # These values are per-node, and the network simulation sums them up # so we need to make them high enough to exceed the total network bandwidth net_tx = 1e12 # bytes net_rx = 1e12 # bytes job_info = job_dict( nodes_required=2, name="network-test-job", account="test", cpu_trace=[1], gpu_trace=[1], ntx_trace=[net_tx], nrx_trace=[net_rx], end_state='COMPLETED', id=1, priority=100, partition='partition', submit_time=0, time_limit=3600, start_time=0, end_time=3600, expected_run_time=3600, trace_quanta=20, ) job = Job(job_info) return [job] Loading
README.md +4 −0 Original line number Diff line number Diff line Loading @@ -89,6 +89,10 @@ get the datasets. To run a network simulation, use the following command: raps run -f /opt/data/lassen/Lassen-Supercomputer-Job-Dataset --system lassen --policy fcfs --backfill firstfit --start '2019-08-22T00:00:00+00:00' -t 12h --arrival poisson --net To simulate synthetic network tests: raps run --system lassen -w network_test --net -t 15m ## Snapshot of extracted workload data To reduce the expense of extracting the needed data from the telemetry parquet files, Loading
raps/sim_config.py +2 −2 Original line number Diff line number Diff line Loading @@ -134,8 +134,8 @@ class SimConfig(RAPSBaseModel, abc.ABC): """ Grab data from live system. """ # Workload arguments (TODO split into separate model) workload: Literal['random', 'benchmark', 'peak', 'idle', 'synthetic', 'multitenant', 'replay', 'randomAI'] = "random" workload: Literal['random', 'benchmark', 'peak', 'idle', 'synthetic', 'multitenant', 'replay', 'randomAI', 'network_test'] = "random" """ Type of synthetic workload """ multimodal: list[float] = [1.0] Loading
raps/workloads/__init__.py +3 −1 Original line number Diff line number Diff line Loading @@ -14,6 +14,7 @@ from .constants import JOB_NAMES, ACCT_NAMES, MAX_PRIORITY from .distribution import DistributionWorkload from .live import continuous_job_generation from .multitenant import MultitenantWorkload from .network import NetworkTestWorkload from .utils import plot_job_hist Loading Loading @@ -51,7 +52,8 @@ class Workload( BaseWorkload, DistributionWorkload, BasicWorkload, MultitenantWorkload MultitenantWorkload, NetworkTestWorkload ): """Final workload class with all workload types.""" pass Loading
raps/workloads/network.py 0 → 100644 +36 −0 Original line number Diff line number Diff line from raps.job import Job, job_dict class NetworkTestWorkload: def network_test(self, **kwargs): """ A synthetic workload to test network congestion. """ config = kwargs.get('config', {}) # High network traffic to trigger congestion # These values are per-node, and the network simulation sums them up # so we need to make them high enough to exceed the total network bandwidth net_tx = 1e12 # bytes net_rx = 1e12 # bytes job_info = job_dict( nodes_required=2, name="network-test-job", account="test", cpu_trace=[1], gpu_trace=[1], ntx_trace=[net_tx], nrx_trace=[net_rx], end_state='COMPLETED', id=1, priority=100, partition='partition', submit_time=0, time_limit=3600, start_time=0, end_time=3600, expected_run_time=3600, trace_quanta=20, ) job = Job(job_info) return [job]