Loading config/aurora.yaml +1 −1 Original line number Diff line number Diff line Loading @@ -34,7 +34,7 @@ power: rectifier_efficiency: 0.96 power_cost: 0.094 scheduler: job_arrival_time: 100 job_arrival_time: 60 mtbf: 11 trace_quanta: 15 min_wall_time: 60 Loading raps/dataloaders/aurora.py +18 −4 Original line number Diff line number Diff line Loading @@ -4,7 +4,19 @@ from raps.telemetry import Job, job_dict from raps.utils import WorkloadData from datetime import datetime, timezone """ Download DIM_JOB_COMPOSITE dataset from https://reports.alcf.anl.gov/data/aurora.html """ """ Download DIM_JOB_COMPOSITE dataset from https://reports.alcf.anl.gov/data/aurora.html Test case: raps run -f /opt/data/aurora/ANL-ALCF-DJC-AURORA_20250127_20251031.csv \ --system aurora --policy fcfs --arrival poisson Note, that currently only reading NJOBS from the csv due to time constraints. Increase NJOBS to 663507 to read all jobs. """ NJOBS = 1000 def load_data(local_dataset_path, **kwargs): """ Loading @@ -29,7 +41,7 @@ def load_data(local_dataset_path, **kwargs): "WALLTIME_SECONDS", "RUNTIME_SECONDS", "USERNAME_GENID", "LOCATION" ] for chunk in pd.read_csv(filepath, chunksize=chunksize, on_bad_lines='warn', nrows=100): for chunk in pd.read_csv(filepath, chunksize=chunksize, on_bad_lines='warn', nrows=NJOBS): # Drop rows where essential timestamp data is missing chunk.dropna(subset=['QUEUED_TIMESTAMP', 'START_TIMESTAMP', 'END_TIMESTAMP'], inplace=True) Loading @@ -37,10 +49,12 @@ def load_data(local_dataset_path, **kwargs): submit_time = int(pd.to_datetime(row["QUEUED_TIMESTAMP"]).timestamp()) start_time = int(pd.to_datetime(row["START_TIMESTAMP"]).timestamp()) end_time = int(pd.to_datetime(row["END_TIMESTAMP"]).timestamp()) job_name = row.get("JOB_NAME", "N/A") job_id = job_name.split('.')[0] job = job_dict( id=str(row.get("COBALT_JOBID", "N/A")), name=row.get("JOB_NAME", "N/A"), id=job_id, name=job_name, submit_time=submit_time, start_time=start_time, end_time=end_time, Loading raps/sim_config.py +0 −2 Original line number Diff line number Diff line Loading @@ -324,8 +324,6 @@ class SimConfig(RAPSBaseModel, abc.ABC): self.workload = "replay" # default to replay if --replay is set if not self.policy: self.policy = "replay" if self.workload != "replay" or self.policy != 'replay': raise ValueError('workload & policy must be either omitted or "replay" when --replay is set') if self.scheduler != 'default': raise ValueError('scheduler must be omitted or set to default when --replay is set') else: Loading Loading
config/aurora.yaml +1 −1 Original line number Diff line number Diff line Loading @@ -34,7 +34,7 @@ power: rectifier_efficiency: 0.96 power_cost: 0.094 scheduler: job_arrival_time: 100 job_arrival_time: 60 mtbf: 11 trace_quanta: 15 min_wall_time: 60 Loading
raps/dataloaders/aurora.py +18 −4 Original line number Diff line number Diff line Loading @@ -4,7 +4,19 @@ from raps.telemetry import Job, job_dict from raps.utils import WorkloadData from datetime import datetime, timezone """ Download DIM_JOB_COMPOSITE dataset from https://reports.alcf.anl.gov/data/aurora.html """ """ Download DIM_JOB_COMPOSITE dataset from https://reports.alcf.anl.gov/data/aurora.html Test case: raps run -f /opt/data/aurora/ANL-ALCF-DJC-AURORA_20250127_20251031.csv \ --system aurora --policy fcfs --arrival poisson Note, that currently only reading NJOBS from the csv due to time constraints. Increase NJOBS to 663507 to read all jobs. """ NJOBS = 1000 def load_data(local_dataset_path, **kwargs): """ Loading @@ -29,7 +41,7 @@ def load_data(local_dataset_path, **kwargs): "WALLTIME_SECONDS", "RUNTIME_SECONDS", "USERNAME_GENID", "LOCATION" ] for chunk in pd.read_csv(filepath, chunksize=chunksize, on_bad_lines='warn', nrows=100): for chunk in pd.read_csv(filepath, chunksize=chunksize, on_bad_lines='warn', nrows=NJOBS): # Drop rows where essential timestamp data is missing chunk.dropna(subset=['QUEUED_TIMESTAMP', 'START_TIMESTAMP', 'END_TIMESTAMP'], inplace=True) Loading @@ -37,10 +49,12 @@ def load_data(local_dataset_path, **kwargs): submit_time = int(pd.to_datetime(row["QUEUED_TIMESTAMP"]).timestamp()) start_time = int(pd.to_datetime(row["START_TIMESTAMP"]).timestamp()) end_time = int(pd.to_datetime(row["END_TIMESTAMP"]).timestamp()) job_name = row.get("JOB_NAME", "N/A") job_id = job_name.split('.')[0] job = job_dict( id=str(row.get("COBALT_JOBID", "N/A")), name=row.get("JOB_NAME", "N/A"), id=job_id, name=job_name, submit_time=submit_time, start_time=start_time, end_time=end_time, Loading
raps/sim_config.py +0 −2 Original line number Diff line number Diff line Loading @@ -324,8 +324,6 @@ class SimConfig(RAPSBaseModel, abc.ABC): self.workload = "replay" # default to replay if --replay is set if not self.policy: self.policy = "replay" if self.workload != "replay" or self.policy != 'replay': raise ValueError('workload & policy must be either omitted or "replay" when --replay is set') if self.scheduler != 'default': raise ValueError('scheduler must be omitted or set to default when --replay is set') else: Loading