Commit 7e4a960f authored by Brewer, Wes's avatar Brewer, Wes
Browse files

Merge branch 'multi-partition-hpc-3' into 'main'

Add support for multi-partition systems

See merge request !68
parents c7ffb27a ec844be6
Loading
Loading
Loading
Loading
+21 −1
Original line number Diff line number Diff line
@@ -40,10 +40,10 @@ For Marconi supercomputer, download `job_table.parquet` from https://zenodo.org/
    python main.py --system marconi100 -f ~/data/marconi100/job_table.parquet 

For Adastra MI250 supercomputer, download 'AdastaJobsMI250_15days.parquet' from https://zenodo.org/records/14007065

    # Adastra MI250
    python main.py --system adastraMI250 -f AdastaJobsMI250_15days.parquet 


## Snapshot of extracted workload data

To reduce the expense of extracting the needed data from the telemetry parquet files,
@@ -52,6 +52,26 @@ given instead of the parquet files for more quickly running subsequent simulatio

    python main.py -f jobs_2024-02-20_12-20-39.npz

## Support for multiple system partitions

Multi-partition systems are supported by running the `multi-part-sim.py` script, where a list of configurations can be specified using the `-x` flag as follows:

    python multi-part-sim.py -x setonix/part-cpu setonix/part-gpu

or simply:

    python multi-part-sim.py -x setonix/*

This will simulate synthetic workloads on two partitions as defined in `config/setonix-cpu` and `config/setonix-gpu`. To replay telemetry workloads from another system, e.g., Marconi100's PM100 dataset, first create a .npz snapshot of the telemetry data, e.g., 

    python main.py --system marconi100 -f /path/to/marconi100/job_table.parquet

This will dump a .npz file with a randomized name, e.g. ac23db.npz. Let's rename this file to pm100.npz for clarity. Note: can control-C when the simulation starts. Now, this pm100.npz file can be used with `multi-part-sim.py` as follows:

    python multi-part-sim.py -x setonix/* -f pm100.npz --reschedule --scale 192

The `--reschedule` flag will use the internal scheduler to determine what nodes to schedule for each job, and the `--scale` flag will specify the maximum number of nodes for each job (generally set this to the max number of nodes of the smallest partition). 

## Job-level power output example for replay of single job

    python main.py -f $DPATH/slurm/joblive/$DATEDIR $DPATH/jobprofile/$DATEDIR --jid 1234567 -o

args.py

0 → 100644
+39 −0
Original line number Diff line number Diff line
import argparse
from raps.policy import PolicyType

parser = argparse.ArgumentParser(description='Resource Allocator & Power Simulator (RAPS)')
parser.add_argument('-c', '--cooling', action='store_true', help='Include FMU cooling model')
parser.add_argument('--start', type=str, help='ISO8061 string for start of simulation')
parser.add_argument('--end', type=str, help='ISO8061 string for end of simulation')
parser.add_argument('-d', '--debug', action='store_true', help='Enable debug mode and disable rich layout')
parser.add_argument('-e', '--encrypt', action='store_true', help='Encrypt any sensitive data in telemetry')
parser.add_argument('-n', '--numjobs', type=int, default=1000, help='Number of jobs to schedule')
parser.add_argument('-t', '--time', type=str, default=None, help='Length of time to simulate, e.g., 123, 123s, 27m, 3h, 7d')
parser.add_argument('-ff', '--fastforward', type=str, default=None, help='Fast-forward by time amount (uses same units as -t)')
parser.add_argument('-v', '--verbose', action='store_true', help='Enable verbose output')
parser.add_argument('--seed', action='store_true', help='Set random number seed for deterministic simulation')
parser.add_argument('-f', '--replay', nargs='+', type=str, help='Either: path/to/joblive path/to/jobprofile' + \
                                                                ' -or- filename.npz (overrides --workload option)')
parser.add_argument('--reschedule', action='store_true', help='Reschedule the telemetry workload')
parser.add_argument('-u', '--uncertainties', action='store_true',
                    help='Change from floating point units to floating point units with uncertainties.' + \
                                                                ' Very expensive w.r.t simulation time!')
parser.add_argument('--jid', type=str, default='*', help='Replay job id')
parser.add_argument('--validate', action='store_true', help='Use node power instead of CPU/GPU utilizations')
parser.add_argument('-o', '--output', action='store_true', help='Output power, cooling, and loss models for later analysis')
parser.add_argument('-p', '--plot', nargs='+', choices=['power', 'loss', 'pue', 'temp', 'util'],
                    help='Specify one or more types of plots to generate: power, loss, pue, util, temp')
choices = ['png', 'svg', 'jpg', 'pdf', 'eps']
parser.add_argument('--imtype', type=str, choices=choices, default=choices[0], help='Plot image type')
parser.add_argument('--scale', type=int, default=0, help='Scale telemetry to max nodes specified in order to run telemetry on a smaller smaller target system/partition, e.g., --scale 192')
parser.add_argument('--system', type=str, default='frontier', help='System config to use')
choices = [policy.value for policy in PolicyType]
parser.add_argument('-s', '--schedule', type=str, choices=choices, default=choices[0], help='Schedule policy to use')
choices = ['random', 'benchmark', 'peak', 'idle']
parser.add_argument('-w', '--workload', type=str, choices=choices, default=choices[0], help='Type of synthetic workload')
choices = ['layout1', 'layout2']
parser.add_argument('-x', '--partitions', nargs='+', default=None, help='List of machine configurations to use, e.g., -x setonix-cpu setonix-gpu')
parser.add_argument('--layout', type=str, choices=choices, default=choices[0], help='Layout of UI')
args = parser.parse_args()
args_dict = vars(args)
print(args_dict)
+18 −0
Original line number Diff line number Diff line
{
    "POWER_GPU_IDLE": 88,
    "POWER_GPU_MAX": 560,
    "POWER_CPU_IDLE": 90,
    "POWER_CPU_MAX": 280,
    "POWER_MEM": 74.26,
    "POWER_NVME": 30,
    "POWER_NIC": 20,
    "POWER_CDU": 8473.47,
    "POWER_SWITCH": 250,
    "POWER_UPDATE_FREQ": 15,
    "RECTIFIER_PEAK_THRESHOLD": 13670,
    "SIVOC_LOSS_CONSTANT": 13,
    "SIVOC_EFFICIENCY": 0.98,
    "RECTIFIER_LOSS_CONSTANT": 17,
    "RECTIFIER_EFFICIENCY": 0.96,
    "POWER_COST": 0.094
}
+18 −0
Original line number Diff line number Diff line
{
    "SEED": 42,
    "JOB_ARRIVAL_TIME": 900,
    "MTBF": 11,
    "MAX_TIME": 88200,
    "TRACE_QUANTA": 20,
    "MIN_WALL_TIME": 3600,
    "MAX_WALL_TIME": 43200,
    "UI_UPDATE_FREQ": 900,
    "MAX_NODES_PER_JOB": 3000,
    "JOB_END_PROBS": {
        "COMPLETED": 0.63,
        "FAILED": 0.13,
        "CANCELLED": 0.12,
        "TIMEOUT": 0.11,
        "NODE_FAIL": 0.01
    }
}
+20 −0
Original line number Diff line number Diff line
{
    "NUM_CDUS": 1,
    "RACKS_PER_CDU": 7,
    "NODES_PER_RACK": 256,
    "RECTIFIERS_PER_RACK": 32,
    "CHASSIS_PER_RACK": 8,
    "NODES_PER_BLADE": 4,
    "SWITCHES_PER_CHASSIS": 4,
    "NICS_PER_NODE": 4,
    "RECTIFIERS_PER_CHASSIS": 4,
    "NODES_PER_RECTIFIER": 4,
    "MISSING_RACKS": [],
    "DOWN_NODES": [1600, 1601, 1602, 1603, 1604, 1605, 1606, 1607, 1608, 1609, 1610, 1611, 1612, 1613, 1614, 1615, 1616, 1617, 1618, 1619, 1620, 1621, 1622, 1623, 1624, 1625, 1626, 1627, 1628, 1629, 1630, 1631, 1632, 1633, 1634, 1635, 1636, 1637, 1638, 1639, 1640, 1641, 1642, 1643, 1644, 1645, 1646, 1647, 1648, 1649, 1650, 1651, 1652, 1653, 1654, 1655, 1656, 1657, 1658, 1659, 1660, 1661, 1662, 1663, 1664, 1665, 1666, 1667, 1668, 1669, 1670, 1671, 1672, 1673, 1674, 1675, 1676, 1677, 1678, 1679, 1680, 1681, 1682, 1683, 1684, 1685, 1686, 1687, 1688, 1689, 1690, 1691, 1692, 1693, 1694, 1695, 1696, 1697, 1698, 1699, 1700, 1701, 1702, 1703, 1704, 1705, 1706, 1707, 1708, 1709, 1710, 1711, 1712, 1713, 1714, 1715, 1716, 1717, 1718, 1719, 1720, 1721, 1722, 1723, 1724, 1725, 1726, 1727, 1728, 1729, 1730, 1731, 1732, 1733, 1734, 1735, 1736, 1737, 1738, 1739, 1740, 1741, 1742, 1743, 1744, 1745, 1746, 1747, 1748, 1749, 1750, 1751, 1752, 1753, 1754, 1755, 1756, 1757, 1758, 1759, 1760, 1761, 1762, 1763, 1764, 1765, 1766, 1767, 1768, 1769, 1770, 1771, 1772, 1773, 1774, 1775, 1776, 1777, 1778, 1779, 1780, 1781, 1782, 1783, 1784, 1785, 1786, 1787, 1788, 1789, 1790, 1791],
    "CPUS_PER_NODE": 2,
    "GPUS_PER_NODE": 0,
    "CPU_PEAK_FLOPS": 2.50944E12,
    "GPU_PEAK_FLOPS": 0,
    "CPU_FP_RATIO": 0.667,
    "GPU_FP_RATIO": 0.667
}
Loading