Commit 15aacee4 authored by Maiterth, Matthias's avatar Maiterth, Matthias
Browse files

Merge branch 'random-output' into 'develop'

Output and other changes

See merge request !115
parents 0db3be6f 45c37286
Loading
Loading
Loading
Loading
+2 −1
Original line number Diff line number Diff line
@@ -245,7 +245,8 @@ def load_data_from_df(allocation_df, node_df, step_df, **kwargs):
    return WorkloadData(
        jobs=job_list,
        telemetry_start=telemetry_start_time, telemetry_end=telemetry_end_time,
        start_date=telemetry_start_timestamp,
        # TODO: Confirm whether lassen timestamps are UTC or PDT
        start_date=telemetry_start_timestamp.tz_localize("UTC"),
    )


+1 −2
Original line number Diff line number Diff line
@@ -119,7 +119,7 @@ from collections import Counter
from datetime import datetime, timezone

from raps.job import job_dict, Job
from raps.utils import summarize_ranges, next_arrival, WorkloadData
from raps.utils import summarize_ranges, WorkloadData
from .utils import proc_cpu_series, proc_gpu_series, to_epoch
from .utils import DEFAULT_START, DEFAULT_END

@@ -211,7 +211,6 @@ def load_data(local_dataset_path, **kwargs):
    """
    debug = kwargs.get("debug")
    config = kwargs.get("config")
    arrival = kwargs.get("arrival")
    NL_PATH = os.path.dirname(__file__)

    skip_counts = Counter()
+1 −7
Original line number Diff line number Diff line
@@ -161,7 +161,6 @@ class Engine:
        self.flops_manager = flops_manager
        self.debug = sim_config.debug
        self.continuous_workload = continuous_workload
        self.output = sim_config.output
        self.replay = sim_config.replay
        self.downscale = sim_config.downscale  # Factor to downscale the 1s timesteps (power of 10)
        self.simulate_network = sim_config.simulate_network
@@ -215,10 +214,7 @@ class Engine:
    @staticmethod
    def from_sim_config(sim_config: SimConfig, partition: str | None = None):
        if partition:
            system_config_by_name = {s.system_name: s for s in sim_config.system_configs}
            system_config = system_config_by_name.get(partition)
            if not system_config:
                raise ValueError(f"Partition {partition} isn't in SimConfig")
            system_config = sim_config.get_system_config_by_name(partition)
        elif len(sim_config.system_configs) > 1:
            raise ValueError(
                "Engine can only run single-partition simulations. Use MultiPartEngine for " +
@@ -232,8 +228,6 @@ class Engine:
        sim_config_args = sim_config.get_legacy_args()
        sim_config_dict = sim_config.get_legacy_args_dict()
        sim_config_dict['config'] = system_config_dict
        if partition:
            sim_config_dict["system"] = sim_config.system_name

        if sim_config.seed:
            random.seed(sim_config.seed)
+2 −2
Original line number Diff line number Diff line
from collections.abc import Iterable
from raps.engine import Engine, TickData
from raps.sim_config import SimConfig
from raps.sim_config import MultiPartSimConfig
from raps.utils import WorkloadData


@@ -11,7 +11,7 @@ class MultiPartEngine:
        self.jobs = jobs

    @staticmethod
    def from_sim_config(sim_config: SimConfig):
    def from_sim_config(sim_config: MultiPartSimConfig):
        if sim_config.replay:
            root_systems = set(s.system_name.split("/")[0] for s in sim_config.system_configs)
            # TODO should consider how to pass separate replay values for separate systems
+20 −13
Original line number Diff line number Diff line
@@ -22,7 +22,7 @@ from raps.stats import (
    print_formatted_report
)

from raps.sim_config import SimConfig
from raps.sim_config import SingleSimConfig, MultiPartSimConfig


def read_yaml(config_file: str):
@@ -62,7 +62,7 @@ def run_sim_add_parser(subparsers: SubParsers):
        YAML sim config file, can be used to configure an experiment instead of using CLI
        flags. Pass "-" to read from stdin.
    """)
    model_validate = pydantic_add_args(parser, SimConfig, model_config={
    model_validate = pydantic_add_args(parser, SingleSimConfig, model_config={
        "cli_shortcuts": shortcuts,
    })
    parser.set_defaults(
@@ -70,23 +70,26 @@ def run_sim_add_parser(subparsers: SubParsers):
    )


def run_sim(sim_config: SimConfig):
def run_sim(sim_config: SingleSimConfig):
    if sim_config.verbose or sim_config.debug:
        print(f"SimConfig: {sim_config.model_dump_json(indent=4)}")
        print(f"SingleSimConfig: {sim_config.model_dump_json(indent=4)}")
    if len(sim_config.system_configs) > 1:
        print("Use run-parts to run multi-partition simulations")
        sys.exit(1)

    engine, workload_data, time_delta = Engine.from_sim_config(sim_config)

    out = sim_config.output
    out = sim_config.get_output()
    if out:
        out.mkdir(parents=True)
        engine.telemetry.save_snapshot(
            dest=str(out),
            dest=str(out / 'snapshot.npz'),
            result=workload_data,
            args=sim_config,
        )
        config_yaml = yaml_dump(sim_config.model_dump(mode="json", exclude_defaults=True))
        (out / 'sim_config.yaml').write_text(config_yaml)

    jobs = workload_data.jobs
    timestep_start, timestep_end = workload_data.telemetry_start, workload_data.telemetry_end
    total_timesteps = timestep_end - timestep_start
@@ -234,7 +237,7 @@ def run_parts_sim_add_parser(subparsers: SubParsers):
        YAML sim config file, can be used to configure an experiment instead of using CLI
        flags. Pass "-" to read from stdin.
    """)
    model_validate = pydantic_add_args(parser, SimConfig, model_config={
    model_validate = pydantic_add_args(parser, MultiPartSimConfig, model_config={
        "cli_shortcuts": shortcuts,
    })
    parser.set_defaults(
@@ -242,8 +245,7 @@ def run_parts_sim_add_parser(subparsers: SubParsers):
    )


def run_parts_sim(sim_config: SimConfig):

def run_parts_sim(sim_config: MultiPartSimConfig):
    if len(sim_config.system_configs) == 1:
        warnings.warn(
            "run_parts_sim is usually for multiple partitions. Did you mean to run with one?",
@@ -253,13 +255,18 @@ def run_parts_sim(sim_config: SimConfig):
    multi_engine, workload_results, timestep_start, timestep_end, time_delta = \
        MultiPartEngine.from_sim_config(sim_config)

    if sim_config.output:
    out = sim_config.get_output()
    if out:
        out.mkdir(parents=True)
        for part, engine in multi_engine.engines.items():
            engine.telemetry.save_snapshot(
                dest=str(sim_config.output / part.split('/')[-1]),
                dest=str(out / part.split('/')[-1]),
                result=workload_results[part],
                args=sim_config,
            )
        config_yaml = yaml_dump(sim_config.model_dump(mode="json", exclude_defaults=True))
        (out / 'sim_config.yaml').write_text(config_yaml)

    jobs = {p: w.jobs for p, w in workload_results.items()}

    ui_update_freq = sim_config.system_configs[0].scheduler.ui_update_freq
@@ -317,7 +324,7 @@ def show_add_parser(subparsers: SubParsers):
    parser.add_argument("--show-defaults", default=False, help="""
        If true, include defaults in the output YAML
    """)
    model_validate = pydantic_add_args(parser, SimConfig, model_config={
    model_validate = pydantic_add_args(parser, SingleSimConfig, model_config={
        "cli_shortcuts": shortcuts,
    })

@@ -328,6 +335,6 @@ def show_add_parser(subparsers: SubParsers):
    parser.set_defaults(impl=impl)


def show(sim_config: SimConfig, show_defaults=False):
def show(sim_config: SingleSimConfig, show_defaults=False):
    data = sim_config.model_dump(mode="json", exclude_defaults=not show_defaults)
    print(yaml_dump(data), end="")
Loading