Commit 209d3e00 authored by Brewer, Wes's avatar Brewer, Wes
Browse files

Improve some settings to get aurora replaying with fcfs reschedule

parent 8379d84e
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
@@ -34,7 +34,7 @@ power:
  rectifier_efficiency: 0.96
  power_cost: 0.094
scheduler:
  job_arrival_time: 100
  job_arrival_time: 60
  mtbf: 11
  trace_quanta: 15
  min_wall_time: 60
+18 −4
Original line number Diff line number Diff line
@@ -4,7 +4,19 @@ from raps.telemetry import Job, job_dict
from raps.utils import WorkloadData
from datetime import datetime, timezone

""" Download DIM_JOB_COMPOSITE dataset from https://reports.alcf.anl.gov/data/aurora.html """
""" 
Download DIM_JOB_COMPOSITE dataset from https://reports.alcf.anl.gov/data/aurora.html 

Test case:

    raps run -f /opt/data/aurora/ANL-ALCF-DJC-AURORA_20250127_20251031.csv \
                --system aurora --policy fcfs --arrival poisson

Note, that currently only reading NJOBS from the csv due to time constraints.
Increase NJOBS to 663507 to read all jobs.

"""
NJOBS = 1000

def load_data(local_dataset_path, **kwargs):
    """
@@ -29,7 +41,7 @@ def load_data(local_dataset_path, **kwargs):
        "WALLTIME_SECONDS", "RUNTIME_SECONDS", "USERNAME_GENID", "LOCATION"
    ]

    for chunk in pd.read_csv(filepath, chunksize=chunksize, on_bad_lines='warn', nrows=100):
    for chunk in pd.read_csv(filepath, chunksize=chunksize, on_bad_lines='warn', nrows=NJOBS):
        # Drop rows where essential timestamp data is missing
        chunk.dropna(subset=['QUEUED_TIMESTAMP', 'START_TIMESTAMP', 'END_TIMESTAMP'], inplace=True)

@@ -37,10 +49,12 @@ def load_data(local_dataset_path, **kwargs):
            submit_time = int(pd.to_datetime(row["QUEUED_TIMESTAMP"]).timestamp())
            start_time = int(pd.to_datetime(row["START_TIMESTAMP"]).timestamp())
            end_time = int(pd.to_datetime(row["END_TIMESTAMP"]).timestamp())
            job_name = row.get("JOB_NAME", "N/A")
            job_id = job_name.split('.')[0]

            job = job_dict(
                id=str(row.get("COBALT_JOBID", "N/A")),
                name=row.get("JOB_NAME", "N/A"),
                id=job_id,
                name=job_name,
                submit_time=submit_time,
                start_time=start_time,
                end_time=end_time,
+0 −2
Original line number Diff line number Diff line
@@ -324,8 +324,6 @@ class SimConfig(RAPSBaseModel, abc.ABC):
                self.workload = "replay"  # default to replay if --replay is set
            if not self.policy:
                self.policy = "replay"
            if self.workload != "replay" or self.policy != 'replay':
                raise ValueError('workload & policy must be either omitted or "replay" when --replay is set')
            if self.scheduler != 'default':
                raise ValueError('scheduler must be omitted or set to default when --replay is set')
        else: