Commit 85b5fe53 authored by Maiterth, Matthias's avatar Maiterth, Matthias
Browse files

Merge branch 'breakup-workloads' into 'develop'

Breakup workload.py into workloads/*.py

See merge request !118
parents a7515fb3 421b1c7a
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
@@ -4,7 +4,7 @@ ExaDigiT Resource Allocator & Power Simulator (RAPS)
import argparse
from raps.helpers import check_python_version
from raps.run_sim import run_sim_add_parser, run_parts_sim_add_parser, show_add_parser
from raps.workload import run_workload_add_parser
from raps.workloads import run_workload_add_parser
from raps.telemetry import run_telemetry_add_parser
from raps.train_rl import train_rl_add_parser

+1 −1
Original line number Diff line number Diff line
@@ -34,7 +34,7 @@ from raps.network import (
from raps.telemetry import Telemetry
from raps.cooling import ThermoFluidsModel
from raps.flops import FLOPSManager
from raps.workload import Workload, continuous_job_generation
from raps.workloads import Workload, continuous_job_generation
from raps.account import Accounts
from raps.downtime import Downtime
from raps.weather import Weather
+75 −0
Original line number Diff line number Diff line
"""Workloads package init."""

import math
import numpy as np

from raps.utils import WorkloadData, SubParsers
from raps.utils import pydantic_add_args
from raps.sim_config import SingleSimConfig

from .basic import BasicWorkload
from .constants import JOB_NAMES, ACCT_NAMES, MAX_PRIORITY
from .distribution import DistributionWorkload
from .live import continuous_job_generation, run_workload
from .multitenant import MultitenantWorkload
from .utils import plot_job_hist


class BaseWorkload:
    """Base class with common workload logic."""

    def __init__(self, args, *configs):
        self.partitions = [c['system_name'] for c in configs]
        self.config_map = {c['system_name']: c for c in configs}
        self.args = args

    def generate_jobs(self):
        jobs = getattr(self, self.args.workload)(args=self.args)
        timestep_end = int(math.ceil(max([job.end_time for job in jobs])))
        return WorkloadData(
            jobs=jobs,
            telemetry_start=0,
            telemetry_end=timestep_end,
            start_date=self.args.start,
        )

    def compute_traces(self,
                       cpu_util: float,
                       gpu_util: float,
                       expected_run_time: int,
                       trace_quanta: int
                       ) -> tuple[np.ndarray, np.ndarray]:
        """ Compute CPU and GPU traces based on mean CPU & GPU utilizations and wall time. """
        cpu_trace = cpu_util * np.ones(int(expected_run_time) // trace_quanta)
        gpu_trace = gpu_util * np.ones(int(expected_run_time) // trace_quanta)
        return (cpu_trace, gpu_trace)
        
class Workload(
    BaseWorkload,
    DistributionWorkload,
    BasicWorkload,
    MultitenantWorkload
):
    """Final workload class with all workload types."""
    pass

__all__ = [
    "Workload",
    "JOB_NAMES", "ACCT_NAMES", "MAX_PRIORITY",
]


def run_workload_add_parser(subparsers: SubParsers):
    from raps.sim_config import SIM_SHORTCUTS
    # TODO: Separate the arguments for this command
    parser = subparsers.add_parser("workload", description="""
        Saves workload as a snapshot.
    """)
    parser.add_argument("config_file", nargs="?", default=None, help="""
        YAML sim config file, can be used to configure an experiment instead of using CLI
        flags. Pass "-" to read from stdin.
    """)
    model_validate = pydantic_add_args(parser, SingleSimConfig, model_config={
        "cli_shortcuts": SIM_SHORTCUTS,
    })
    parser.set_defaults(impl=lambda args: run_workload(model_validate(args, {})))
+419 −0

File changed and moved.

Preview size limit exceeded, changes collapsed.

+13 −0
Original line number Diff line number Diff line
"""Shared constants for workloads."""

JOB_NAMES = [
    "LAMMPS", "GROMACS", "VASP", "Quantum ESPRESSO", "NAMD",
    "OpenFOAM", "WRF", "AMBER", "CP2K", "nek5000", "CHARMM",
    "ABINIT", "Cactus", "Charm++", "NWChem", "STAR-CCM+",
    "Gaussian", "ANSYS", "COMSOL", "PLUMED", "nekrs",
    "TensorFlow", "PyTorch", "BLAST", "Spark", "GAMESS",
    "ORCA", "Simulink", "MOOSE", "ELK"
]

ACCT_NAMES = [f"ACT{i:02d}" for i in range(1, 15)]
MAX_PRIORITY = 500000
Loading