Verified Commit 81ab189f authored by Hines, Jesse's avatar Hines, Jesse
Browse files

Fix conflicts

parent 34b3e6f7
Loading
Loading
Loading
Loading
+11 −17
Original line number Diff line number Diff line
@@ -116,9 +116,10 @@ import re
from tqdm import tqdm
from typing import Dict, Union, Optional
from collections import Counter
from datetime import datetime, timezone

from raps.job import job_dict, Job
from raps.utils import summarize_ranges, next_arrival
from raps.utils import summarize_ranges, next_arrival, WorkloadData
from .utils import proc_cpu_series, proc_gpu_series, to_epoch
from .utils import DEFAULT_START, DEFAULT_END

@@ -585,21 +586,10 @@ def load_data(local_dataset_path, **kwargs):
        cpu_peak = cpu_cores_req / cores_per_cpu / cpus_per_node  # Is this per CPU?
        cpu_tr = [min(x/cores_per_cpu/cpus_per_node, cpu_peak) for x in cpu_tr]

        if arrival == "poisson":
            job_arrival_time = config.get("JOB_ARRIVAL_TIME")
            submit_time = next_arrival(1 / job_arrival_time)
            start_time = submit_time
            end_time = None
            scheduled_nodes = None
            telemetry_start = 0
            telemetry_end = 86640
        else:  # replay
        start_time = t0 - start_ts
        end_time = t1 - start_ts
        submit_time = rec.get("time_submit") - start_ts
        scheduled_nodes = rec.get("scheduled_nodes")
            telemetry_start = int(sl.time_start.min())
            telemetry_end = int(sl.time_end.max())

        current_job_dict = job_dict(
            nodes_required=nr,
@@ -642,4 +632,8 @@ def load_data(local_dataset_path, **kwargs):
    for reason, count in skip_counts.items():
        print(f"- {reason}: {count}")

    return jobs_list, telemetry_start, telemetry_end  # min_overall_utime, max_overall_utime, args_namespace
    return WorkloadData(
        jobs=jobs_list,
        telemetry_start=0, telemetry_end=int(end_ts - start_ts),
        start_date=datetime.fromtimestamp(start_ts, timezone.utc),
    )
+4 −1
Original line number Diff line number Diff line
@@ -21,7 +21,7 @@ class MultiPartEngine:
        workloads_by_partition: dict[str, WorkloadData] = {}
        engines: dict[str, Engine] = {}

        timestep_start, timestep_end, time_delta = 0, 0, 0
        time_delta = 0
        for partition in sim_config.system_configs:
            name = partition.system_name
            engine, workload_data, time_delta = Engine.from_sim_config(
@@ -31,6 +31,9 @@ class MultiPartEngine:
                job.partition = name
            workloads_by_partition[name] = workload_data
            engines[name] = engine
        timestep_start = min(w.telemetry_start for w in workloads_by_partition.values())
        timestep_end = min(w.telemetry_end for w in workloads_by_partition.values())

        total_initial_jobs = sum(len(j.jobs) for j in workloads_by_partition.values())
        for engine in engines.values():
            engine.total_initial_jobs = total_initial_jobs
+0 −6
Original line number Diff line number Diff line
@@ -245,12 +245,6 @@ def run_multi_part_sim(sim_config: SimConfig):
    multi_engine, workload_results, timestep_start, timestep_end, time_delta = \
        MultiPartEngine.from_sim_config(sim_config)

    # TODO: The mit_supercloud dataloader seems to be outputting the wrong timesteps? mit_supercloud
    # is the only multi-partition system with replay, so just manually overriding the timesteps here
    # to fix it for now. The original multi-part-sim.py always started from timestep 0 as well.
    timestep_end = timestep_end - timestep_start
    timestep_start = 0

    if sim_config.output:
        for part, engine in multi_engine.engines.items():
            engine.telemetry.save_snapshot(