Merge branch 'multi-partition-hpc-3' into 'main' (7e4a960f) · Commits · ExaDigiT / sim-raps

README.md

+21 −1

Original line number	Diff line number	Diff line
		@@ -40,10 +40,10 @@ For Marconi supercomputer, download `job_table.parquet` from https://zenodo.org/
		python main.py --system marconi100 -f ~/data/marconi100/job_table.parquet

		For Adastra MI250 supercomputer, download 'AdastaJobsMI250_15days.parquet' from https://zenodo.org/records/14007065

		# Adastra MI250
		python main.py --system adastraMI250 -f AdastaJobsMI250_15days.parquet


		## Snapshot of extracted workload data

		To reduce the expense of extracting the needed data from the telemetry parquet files,
		@@ -52,6 +52,26 @@ given instead of the parquet files for more quickly running subsequent simulatio

		python main.py -f jobs_2024-02-20_12-20-39.npz

		## Support for multiple system partitions

		Multi-partition systems are supported by running the `multi-part-sim.py` script, where a list of configurations can be specified using the `-x` flag as follows:

		python multi-part-sim.py -x setonix/part-cpu setonix/part-gpu

		or simply:

		python multi-part-sim.py -x setonix/*

		This will simulate synthetic workloads on two partitions as defined in `config/setonix-cpu` and `config/setonix-gpu`. To replay telemetry workloads from another system, e.g., Marconi100's PM100 dataset, first create a .npz snapshot of the telemetry data, e.g.,

		python main.py --system marconi100 -f /path/to/marconi100/job_table.parquet

		This will dump a .npz file with a randomized name, e.g. ac23db.npz. Let's rename this file to pm100.npz for clarity. Note: can control-C when the simulation starts. Now, this pm100.npz file can be used with `multi-part-sim.py` as follows:

		python multi-part-sim.py -x setonix/* -f pm100.npz --reschedule --scale 192

		The `--reschedule` flag will use the internal scheduler to determine what nodes to schedule for each job, and the `--scale` flag will specify the maximum number of nodes for each job (generally set this to the max number of nodes of the smallest partition).

		## Job-level power output example for replay of single job

		python main.py -f $DPATH/slurm/joblive/$DATEDIR $DPATH/jobprofile/$DATEDIR --jid 1234567 -o

args.py

0 → 100644

+39 −0

Original line number	Diff line number	Diff line
		import argparse
		from raps.policy import PolicyType

		parser = argparse.ArgumentParser(description='Resource Allocator & Power Simulator (RAPS)')
		parser.add_argument('-c', '--cooling', action='store_true', help='Include FMU cooling model')
		parser.add_argument('--start', type=str, help='ISO8061 string for start of simulation')
		parser.add_argument('--end', type=str, help='ISO8061 string for end of simulation')
		parser.add_argument('-d', '--debug', action='store_true', help='Enable debug mode and disable rich layout')
		parser.add_argument('-e', '--encrypt', action='store_true', help='Encrypt any sensitive data in telemetry')
		parser.add_argument('-n', '--numjobs', type=int, default=1000, help='Number of jobs to schedule')
		parser.add_argument('-t', '--time', type=str, default=None, help='Length of time to simulate, e.g., 123, 123s, 27m, 3h, 7d')
		parser.add_argument('-ff', '--fastforward', type=str, default=None, help='Fast-forward by time amount (uses same units as -t)')
		parser.add_argument('-v', '--verbose', action='store_true', help='Enable verbose output')
		parser.add_argument('--seed', action='store_true', help='Set random number seed for deterministic simulation')
		parser.add_argument('-f', '--replay', nargs='+', type=str, help='Either: path/to/joblive path/to/jobprofile' + \
		' -or- filename.npz (overrides --workload option)')
		parser.add_argument('--reschedule', action='store_true', help='Reschedule the telemetry workload')
		parser.add_argument('-u', '--uncertainties', action='store_true',
		help='Change from floating point units to floating point units with uncertainties.' + \
		' Very expensive w.r.t simulation time!')
		parser.add_argument('--jid', type=str, default='*', help='Replay job id')
		parser.add_argument('--validate', action='store_true', help='Use node power instead of CPU/GPU utilizations')
		parser.add_argument('-o', '--output', action='store_true', help='Output power, cooling, and loss models for later analysis')
		parser.add_argument('-p', '--plot', nargs='+', choices=['power', 'loss', 'pue', 'temp', 'util'],
		help='Specify one or more types of plots to generate: power, loss, pue, util, temp')
		choices = ['png', 'svg', 'jpg', 'pdf', 'eps']
		parser.add_argument('--imtype', type=str, choices=choices, default=choices[0], help='Plot image type')
		parser.add_argument('--scale', type=int, default=0, help='Scale telemetry to max nodes specified in order to run telemetry on a smaller smaller target system/partition, e.g., --scale 192')
		parser.add_argument('--system', type=str, default='frontier', help='System config to use')
		choices = [policy.value for policy in PolicyType]
		parser.add_argument('-s', '--schedule', type=str, choices=choices, default=choices[0], help='Schedule policy to use')
		choices = ['random', 'benchmark', 'peak', 'idle']
		parser.add_argument('-w', '--workload', type=str, choices=choices, default=choices[0], help='Type of synthetic workload')
		choices = ['layout1', 'layout2']
		parser.add_argument('-x', '--partitions', nargs='+', default=None, help='List of machine configurations to use, e.g., -x setonix-cpu setonix-gpu')
		parser.add_argument('--layout', type=str, choices=choices, default=choices[0], help='Layout of UI')
		args = parser.parse_args()
		args_dict = vars(args)
		print(args_dict)

config/setonix/part-cpu/power.json

0 → 100644

+18 −0

Original line number	Diff line number	Diff line
		{
		"POWER_GPU_IDLE": 88,
		"POWER_GPU_MAX": 560,
		"POWER_CPU_IDLE": 90,
		"POWER_CPU_MAX": 280,
		"POWER_MEM": 74.26,
		"POWER_NVME": 30,
		"POWER_NIC": 20,
		"POWER_CDU": 8473.47,
		"POWER_SWITCH": 250,
		"POWER_UPDATE_FREQ": 15,
		"RECTIFIER_PEAK_THRESHOLD": 13670,
		"SIVOC_LOSS_CONSTANT": 13,
		"SIVOC_EFFICIENCY": 0.98,
		"RECTIFIER_LOSS_CONSTANT": 17,
		"RECTIFIER_EFFICIENCY": 0.96,
		"POWER_COST": 0.094
		}

config/setonix/part-cpu/scheduler.json

0 → 100644

+18 −0

Original line number	Diff line number	Diff line
		{
		"SEED": 42,
		"JOB_ARRIVAL_TIME": 900,
		"MTBF": 11,
		"MAX_TIME": 88200,
		"TRACE_QUANTA": 20,
		"MIN_WALL_TIME": 3600,
		"MAX_WALL_TIME": 43200,
		"UI_UPDATE_FREQ": 900,
		"MAX_NODES_PER_JOB": 3000,
		"JOB_END_PROBS": {
		"COMPLETED": 0.63,
		"FAILED": 0.13,
		"CANCELLED": 0.12,
		"TIMEOUT": 0.11,
		"NODE_FAIL": 0.01
		}
		}

config/setonix/part-cpu/system.json

0 → 100644

+20 −0

Original line number	Diff line number	Diff line
		{
		"NUM_CDUS": 1,
		"RACKS_PER_CDU": 7,
		"NODES_PER_RACK": 256,
		"RECTIFIERS_PER_RACK": 32,
		"CHASSIS_PER_RACK": 8,
		"NODES_PER_BLADE": 4,
		"SWITCHES_PER_CHASSIS": 4,
		"NICS_PER_NODE": 4,
		"RECTIFIERS_PER_CHASSIS": 4,
		"NODES_PER_RECTIFIER": 4,
		"MISSING_RACKS": [],
		"DOWN_NODES": [1600, 1601, 1602, 1603, 1604, 1605, 1606, 1607, 1608, 1609, 1610, 1611, 1612, 1613, 1614, 1615, 1616, 1617, 1618, 1619, 1620, 1621, 1622, 1623, 1624, 1625, 1626, 1627, 1628, 1629, 1630, 1631, 1632, 1633, 1634, 1635, 1636, 1637, 1638, 1639, 1640, 1641, 1642, 1643, 1644, 1645, 1646, 1647, 1648, 1649, 1650, 1651, 1652, 1653, 1654, 1655, 1656, 1657, 1658, 1659, 1660, 1661, 1662, 1663, 1664, 1665, 1666, 1667, 1668, 1669, 1670, 1671, 1672, 1673, 1674, 1675, 1676, 1677, 1678, 1679, 1680, 1681, 1682, 1683, 1684, 1685, 1686, 1687, 1688, 1689, 1690, 1691, 1692, 1693, 1694, 1695, 1696, 1697, 1698, 1699, 1700, 1701, 1702, 1703, 1704, 1705, 1706, 1707, 1708, 1709, 1710, 1711, 1712, 1713, 1714, 1715, 1716, 1717, 1718, 1719, 1720, 1721, 1722, 1723, 1724, 1725, 1726, 1727, 1728, 1729, 1730, 1731, 1732, 1733, 1734, 1735, 1736, 1737, 1738, 1739, 1740, 1741, 1742, 1743, 1744, 1745, 1746, 1747, 1748, 1749, 1750, 1751, 1752, 1753, 1754, 1755, 1756, 1757, 1758, 1759, 1760, 1761, 1762, 1763, 1764, 1765, 1766, 1767, 1768, 1769, 1770, 1771, 1772, 1773, 1774, 1775, 1776, 1777, 1778, 1779, 1780, 1781, 1782, 1783, 1784, 1785, 1786, 1787, 1788, 1789, 1790, 1791],
		"CPUS_PER_NODE": 2,
		"GPUS_PER_NODE": 0,
		"CPU_PEAK_FLOPS": 2.50944E12,
		"GPU_PEAK_FLOPS": 0,
		"CPU_FP_RATIO": 0.667,
		"GPU_FP_RATIO": 0.667
		}