Commit 5428038c authored by Brewer, Wes's avatar Brewer, Wes
Browse files

Add support to specify multiple partitions as 'setonix/*'

parent bc94f75e
Loading
Loading
Loading
Loading
+5 −1
Original line number Diff line number Diff line
@@ -58,13 +58,17 @@ Multi-partition systems are supported by running the `multi-part-sim.py` script,

    python multi-part-sim.py -x setonix/part-cpu setonix/part-gpu

or simply:

    python multi-part-sim.py -x setonix/*

This will simulate synthetic workloads on two partitions as defined in `config/setonix-cpu` and `config/setonix-gpu`. To replay telemetry workloads from another system, e.g., Marconi100's PM100 dataset, first create a .npz snapshot of the telemetry data, e.g., 

    python main.py --system marconi100 -f /path/to/marconi100/job_table.parquet

This will dump a .npz file with a randomized name, e.g. ac23db.npz. Let's rename this file to pm100.npz for clarity. Note: can control-C when the simulation starts. Now, this pm100.npz file can be used with `multi-part-sim.py` as follows:

    python multi-part-sim.py -x setonix/part-cpu setonix/part-gpu -f pm100.npz --reschedule --scale 192
    python multi-part-sim.py -x setonix/* -f pm100.npz --reschedule --scale 192

The `--reschedule` flag will use the internal scheduler to determine what nodes to schedule for each job, and the `--scale` flag will specify the maximum number of nodes for each job (generally set this to the max number of nodes of the smallest partition). 

+9 −1
Original line number Diff line number Diff line
from raps.helpers import check_python_version
check_python_version()

import glob
import os
import random
import sys

from args import args
from raps.config import ConfigManager
from raps.config import ConfigManager, CONFIG_PATH
from raps.policy import PolicyType
from raps.ui import LayoutManager
from raps.scheduler import Scheduler
@@ -18,6 +20,12 @@ from tqdm import tqdm

# Load configurations for each partition
partition_names = args.partitions

print(args.partitions)
if '*' in args.partitions[0]:
    paths = glob.glob(os.path.join(CONFIG_PATH, args.partitions[0]))
    partition_names = [os.path.join(*p.split(os.sep)[-2:]) for p in paths]

configs = [ConfigManager(system_name=partition).get_config() for partition in partition_names]
args_dicts = [{**vars(args), 'config': config} for config in configs]