Improve some settings to get aurora replaying with fcfs reschedule (209d3e00) · Commits · ExaDigiT / sim-raps

config/aurora.yaml

+1 −1

Original line number	Diff line number	Diff line
		@@ -34,7 +34,7 @@ power:
		rectifier_efficiency: 0.96
		power_cost: 0.094
		scheduler:
		job_arrival_time: 100
		job_arrival_time: 60
		mtbf: 11
		trace_quanta: 15
		min_wall_time: 60

raps/dataloaders/aurora.py

+18 −4

Original line number	Diff line number	Diff line
		@@ -4,7 +4,19 @@ from raps.telemetry import Job, job_dict
		from raps.utils import WorkloadData
		from datetime import datetime, timezone

		""" Download DIM_JOB_COMPOSITE dataset from https://reports.alcf.anl.gov/data/aurora.html """
		"""
		Download DIM_JOB_COMPOSITE dataset from https://reports.alcf.anl.gov/data/aurora.html

		Test case:

		raps run -f /opt/data/aurora/ANL-ALCF-DJC-AURORA_20250127_20251031.csv \
		--system aurora --policy fcfs --arrival poisson

		Note, that currently only reading NJOBS from the csv due to time constraints.
		Increase NJOBS to 663507 to read all jobs.

		"""
		NJOBS = 1000

		def load_data(local_dataset_path, **kwargs):
		"""
		@@ -29,7 +41,7 @@ def load_data(local_dataset_path, **kwargs):
		"WALLTIME_SECONDS", "RUNTIME_SECONDS", "USERNAME_GENID", "LOCATION"
		]

		for chunk in pd.read_csv(filepath, chunksize=chunksize, on_bad_lines='warn', nrows=100):
		for chunk in pd.read_csv(filepath, chunksize=chunksize, on_bad_lines='warn', nrows=NJOBS):
		# Drop rows where essential timestamp data is missing
		chunk.dropna(subset=['QUEUED_TIMESTAMP', 'START_TIMESTAMP', 'END_TIMESTAMP'], inplace=True)

		@@ -37,10 +49,12 @@ def load_data(local_dataset_path, **kwargs):
		submit_time = int(pd.to_datetime(row["QUEUED_TIMESTAMP"]).timestamp())
		start_time = int(pd.to_datetime(row["START_TIMESTAMP"]).timestamp())
		end_time = int(pd.to_datetime(row["END_TIMESTAMP"]).timestamp())
		job_name = row.get("JOB_NAME", "N/A")
		job_id = job_name.split('.')[0]

		job = job_dict(
		id=str(row.get("COBALT_JOBID", "N/A")),
		name=row.get("JOB_NAME", "N/A"),
		id=job_id,
		name=job_name,
		submit_time=submit_time,
		start_time=start_time,
		end_time=end_time,

raps/sim_config.py

+0 −2

Original line number	Diff line number	Diff line
		@@ -324,8 +324,6 @@ class SimConfig(RAPSBaseModel, abc.ABC):
		self.workload = "replay" # default to replay if --replay is set
		if not self.policy:
		self.policy = "replay"
		if self.workload != "replay" or self.policy != 'replay':
		raise ValueError('workload & policy must be either omitted or "replay" when --replay is set')
		if self.scheduler != 'default':
		raise ValueError('scheduler must be omitted or set to default when --replay is set')
		else: