Commit 82f348a2 authored by Maiterth, Matthias's avatar Maiterth, Matthias
Browse files

OCIZettascale10 Rough sketch with what ExaDigit can model.

This is a rough estimate according to
https://www.hpcwire.com/off-the-wire/oracle-unveils-next-gen-oracle-cloud-infrastructure-zettascale10-cluster-for-ai/
Getting the overal numbers in the ballpark, while
the Kyber + Kyber Side Car is not currently direcly modelled.
Grain of salts included. The ballpark is ok.

Run with:
python main.py run --system OCIZettascale10 --workload randomAI --continuous-job-generation
parent 0e40c7ea
Loading
Loading
Loading
Loading
+59 −0
Original line number Diff line number Diff line
system:
  num_cdus: 2778  #  800,000 Vera Rubin total
  racks_per_cdu: 3
  nodes_per_rack: 72   # 600kW # like NV72
  chassis_per_rack: 1
  nodes_per_blade: 1
  switches_per_chassis: 72  # Chassis concept is Cray => NV72?
  nics_per_node: 1  # Most likely 4
  rectifiers_per_chassis: 1  # power / losses will be set to zero as this is unknown
  nodes_per_rectifier: 1  # power / losses will be set to zero as this is unknown
  #missing_racks:
  down_nodes: []
  cpus_per_node: 1
  gpus_per_node: 4  # Chiplets 4?
  cpu_peak_flops: 2048000000000.0  # Insignificant
  gpu_peak_flops: 15000000000000000000.0  # 15EFlops/s FP4
  cpu_fp_ratio: 0.667
  gpu_fp_ratio: 0.667
power:
  power_gpu_idle:  200 # 200 == 4* 50
  power_gpu_max:  2200  # 2kW  per node = 4*525
  power_cpu_idle: 90
  power_cpu_max: 280
  power_mem: 74.26
  power_nic: 20
  power_nvme: 30
  power_switch: 250
  power_cdu: 8473.47
  power_update_freq: 15
  rectifier_peak_threshold: 13670
  sivoc_loss_constant: 0
  sivoc_efficiency: 1.00
  rectifier_loss_constant: 0
  rectifier_efficiency: 1.00
  power_cost: 0.094
scheduler:
  job_arrival_time: 1
  mtbf: 11
  trace_quanta: 15
  min_wall_time: 3600
  max_wall_time: 43200
  ui_update_freq: 900
  max_nodes_per_job: 9000
  job_end_probs:
    COMPLETED: 0.63
    FAILED: 0.13
    CANCELLED: 0.12
    TIMEOUT: 0.11
    NODE_FAIL: 0.01
uq:
  power_gpu_uncertainty: 0.05
  power_cpu_uncertainty: 0.05
  power_mem_uncertainty: 0.05
  power_nic_uncertainty: 0.05
  power_nvme_uncertainty: 0.05
  power_cdus_uncertainty: 0.05
  power_node_uncertainty: 0.002
  power_switch_uncertainty: 0.05
  rectifier_power_uncertainty: 0.05
+2 −1
Original line number Diff line number Diff line
@@ -134,7 +134,8 @@ class SimConfig(RAPSBaseModel, abc.ABC):
    """ Grab data from live system. """

    # Workload arguments (TODO split into separate model)
    workload: Literal['random', 'benchmark', 'peak', 'idle', 'synthetic', 'multitenant', 'replay'] = "random"
    workload: Literal['random', 'benchmark', 'peak', 'idle',
                      'synthetic', 'multitenant', 'replay', 'randomAI'] = "random"

    """ Type of synthetic workload """
    multimodal: list[float] = [1.0]
+10 −7
Original line number Diff line number Diff line
@@ -11,6 +11,7 @@ from raps.utils import (

from .constants import JOB_NAMES, ACCT_NAMES, MAX_PRIORITY


class BasicWorkload:

    # Test for random 'reasonable' AI jobs
@@ -19,17 +20,17 @@ class BasicWorkload:
        jobs = []
        for i in range(args.numjobs):
            draw = random.randint(0, 10)
            if draw == 0:
            if draw != 0:
                et = random.randint(7200, 28800)
                nr = random.choice([128, 256, 512, 1024, 1280, 1792, 2048])
                new_job = Job(job_dict(nodes_required=nr,
                                       name="LLM",
                                       name="LLM Production",
                                       account="llmUser",
                                       end_state="Success",
                                       id=random.randint(1, 99999),
                                       cpu_trace=0.1,
                                       gpu_trace=(random.uniform(0.55, 0.8) *
                                                  self.config_map[self.args.system]['GPUS_PER_NODE']),
                                       gpu_trace=(random.uniform(0.55, 0.8)
                                                  * self.config_map[self.args.system]['GPUS_PER_NODE']),
                                       ntx_trace=None,
                                       nrx_trace=None,
                                       submit_time=0,
@@ -38,8 +39,10 @@ class BasicWorkload:
                                       end_time=et,
                                       expected_run_time=et))
            else:
                new_job = Job(job_dict(nodes_required=1,
                                       name="LLM",
                et = random.randint(300, 7200)
                nr = random.choice([1, 1, 1, 1, 1, 2, 4, 8, 16, 32, 128])
                new_job = Job(job_dict(nodes_required=nr,
                                       name="User-Test LLM",
                                       account="llmUser",
                                       end_state="Success",
                                       id=random.randint(1, 99999),
@@ -50,7 +53,7 @@ class BasicWorkload:
                                       submit_time=0,
                                       time_limit=43200,
                                       start_time=0,
                                       end_time=7200,
                                       end_time=et,
                                       expected_run_time=random.randint(60, 7200)))
            jobs.append(new_job)
        return jobs
+1 −1
Original line number Diff line number Diff line
def continuous_job_generation(self, *, engine, timestep, jobs):
def continuous_job_generation(*, engine, timestep, jobs):
    # print("if len(engine.queue) <= engine.continuous_workload.args.maxqueue:")
    # print(f"if {len(engine.queue)} <= {engine.continuous_workload.args.maxqueue}:")
    if len(engine.queue) <= engine.continuous_workload.args.maxqueue: