Loading raps/dataloaders/frontier.py +1 −1 Original line number Diff line number Diff line Loading @@ -153,7 +153,7 @@ def load_data_from_df(jobs_df: pd.DataFrame, jobprofile_df: pd.DataFrame, **kwar scheduled_nodes.append(indices) if gpu_trace.size > 0 and (jid == job_id or jid == '*') and time_offset > 0: job_info = job_dict(nodes_required, name, cpu_trace, gpu_trace, wall_time, job_info = job_dict(nodes_required, name, cpu_trace, gpu_trace, [], [], wall_time, end_state, scheduled_nodes, time_offset, job_id) jobs.append(job_info) Loading raps/dataloaders/fugaku.py +2 −0 Original line number Diff line number Diff line Loading @@ -94,6 +94,8 @@ def load_data_from_df(df, **kwargs): name=name, cpu_trace=cpu_trace, gpu_trace=gpu_trace, ntx_trace=[], nrx_trace=[], wall_time=wall_time, end_state=end_state, scheduled_nodes=scheduled_nodes, Loading raps/dataloaders/marconi100.py +1 −1 Original line number Diff line number Diff line Loading @@ -163,7 +163,7 @@ def load_data_from_df(jobs_df: pd.DataFrame, **kwargs): scheduled_nodes = (jobs_df.loc[jidx, 'nodes']).tolist() if gpu_trace.size > 0 and time_offset >= 0: job_info = job_dict(nodes_required, name, cpu_trace, gpu_trace, wall_time, job_info = job_dict(nodes_required, name, cpu_trace, gpu_trace, [], [], wall_time, end_state, scheduled_nodes, time_offset, job_id, priority) jobs.append(job_info) Loading raps/power.py +6 −1 Original line number Diff line number Diff line Loading @@ -37,6 +37,7 @@ load_config_variables([ 'POWER_CPU_UNCERTAINTY', 'POWER_MEM', 'POWER_MEM_UNCERTAINTY', 'POWER_NIC', 'POWER_NIC_IDLE', 'POWER_NIC_MAX', 'POWER_NIC_UNCERTAINTY', Loading Loading @@ -102,7 +103,11 @@ def compute_node_power(cpu_util, gpu_util, net_util, verbose=False): """ power_cpu = cpu_util * POWER_CPU_MAX + (CPUS_PER_NODE - cpu_util) * POWER_CPU_IDLE power_gpu = gpu_util * POWER_GPU_MAX + (GPUS_PER_NODE - gpu_util) * POWER_GPU_IDLE try: power_nic = POWER_NIC_IDLE + (POWER_NIC_MAX - POWER_NIC_IDLE) * net_util except: power_nic = POWER_NIC power_total = power_cpu + power_gpu + POWER_MEM + NICS_PER_NODE * power_nic + POWER_NVME Loading raps/workload.py +11 −7 Original line number Diff line number Diff line Loading @@ -73,11 +73,12 @@ class Workload(object): end_state = determine_state(JOB_END_PROBS) cpu_trace, gpu_trace = self.compute_traces(cpu_util, gpu_util, wall_time) priority = random.randint(0, MAX_PRIORITY) net_tx, net_rx = [], [] # Jobs arrive according to Poisson process time_to_next_job = next_arrival() jobs.append(job_dict(nodes_required, name, cpu_trace, gpu_trace, \ jobs.append(job_dict(nodes_required, name, cpu_trace, gpu_trace, net_tx, net_rx, \ wall_time, end_state, None, time_to_next_job, None, priority)) return jobs Loading @@ -94,7 +95,8 @@ class Workload(object): jobs = self.generate_random_jobs(num_jobs=0) cpu_util, gpu_util = CPUS_PER_NODE, GPUS_PER_NODE cpu_trace, gpu_trace = self.compute_traces(cpu_util, gpu_util, 10800) job_info = job_dict(AVAILABLE_NODES, "Max Test", cpu_trace, gpu_trace, \ net_tx, net_rx = [], [] job_info = job_dict(AVAILABLE_NODES, "Max Test", cpu_trace, gpu_trace, net_tx, net_rx, \ len(gpu_trace)*TRACE_QUANTA, 'COMPLETED', None, 100, None) jobs.insert(0, job_info) return jobs Loading @@ -105,7 +107,8 @@ class Workload(object): jobs = self.generate_random_jobs(num_jobs=0) cpu_util, gpu_util = 0, 0 cpu_trace, gpu_trace = self.compute_traces(cpu_util, gpu_util, 43200) job_info = job_dict(AVAILABLE_NODES, "Idle Test", cpu_trace, gpu_trace, \ net_tx, net_rx = [], [] job_info = job_dict(AVAILABLE_NODES, "Idle Test", cpu_trace, gpu_trace, net_tx, net_rx, \ len(gpu_trace)*TRACE_QUANTA, 'COMPLETED', None, 0, None) jobs.insert(0, job_info) return jobs Loading @@ -115,29 +118,30 @@ class Workload(object): """Benchmark tests""" jobs = self.generate_random_jobs(num_jobs=0) net_tx, net_rx = [], [] # Max test cpu_util, gpu_util = 1, 4 cpu_trace, gpu_trace = self.compute_traces(cpu_util, gpu_util, 10800) job_info = jobs_dict(AVAILABLE_NODES, "Max Test", cpu_trace, gpu_trace, job_info = jobs_dict(AVAILABLE_NODES, "Max Test", cpu_trace, gpu_trace, net_tx, net_rx, \ len(gpu_trace)*TRACE_QUANTA, 'COMPLETED', None, 100, None) jobs.insert(0, job_info) # OpenMxP run cpu_util, gpu_util = 0, 4 cpu_trace, gpu_trace = self.compute_traces(cpu_util, gpu_util, 3600) job_info = jobs_dict(AVAILABLE_NODES, "OpenMxP", cpu_trace, gpu_trace, job_info = jobs_dict(AVAILABLE_NODES, "OpenMxP", cpu_trace, gpu_trace, net_tx, net_rx, \ len(gpu_trace)*TRACE_QUANTA, 'COMPLETED', None, 300, None) jobs.insert(0, job_info) # HPL run cpu_util, gpu_util = 0.33, 0.79 * 4 # based on 24-01-18 run cpu_trace, gpu_trace = self.compute_traces(cpu_util, gpu_util, 3600) job_info = jobs_dict(AVAILABLE_NODES, "HPL", cpu_trace, gpu_trace, job_info = jobs_dict(AVAILABLE_NODES, "HPL", cpu_trace, gpu_trace, net_tx, net_rx, \ len(gpu_trace)*TRACE_QUANTA, 'COMPLETED', None, 200, None) jobs.insert(0, job_info) # Idle test cpu_util, gpu_util = 0, 0 cpu_trace, gpu_trace = self.compute_traces(cpu_util, gpu_util, 3600) job_info = jobs_dict(AVAILABLE_NODES, "Idle Test", cpu_trace, gpu_trace, job_info = jobs_dict(AVAILABLE_NODES, "Idle Test", cpu_trace, gpu_trace, net_tx, net_rx, \ len(gpu_trace)*TRACE_QUANTA, 'COMPLETED', None, 0, None) jobs.insert(0, job_info) Loading Loading
raps/dataloaders/frontier.py +1 −1 Original line number Diff line number Diff line Loading @@ -153,7 +153,7 @@ def load_data_from_df(jobs_df: pd.DataFrame, jobprofile_df: pd.DataFrame, **kwar scheduled_nodes.append(indices) if gpu_trace.size > 0 and (jid == job_id or jid == '*') and time_offset > 0: job_info = job_dict(nodes_required, name, cpu_trace, gpu_trace, wall_time, job_info = job_dict(nodes_required, name, cpu_trace, gpu_trace, [], [], wall_time, end_state, scheduled_nodes, time_offset, job_id) jobs.append(job_info) Loading
raps/dataloaders/fugaku.py +2 −0 Original line number Diff line number Diff line Loading @@ -94,6 +94,8 @@ def load_data_from_df(df, **kwargs): name=name, cpu_trace=cpu_trace, gpu_trace=gpu_trace, ntx_trace=[], nrx_trace=[], wall_time=wall_time, end_state=end_state, scheduled_nodes=scheduled_nodes, Loading
raps/dataloaders/marconi100.py +1 −1 Original line number Diff line number Diff line Loading @@ -163,7 +163,7 @@ def load_data_from_df(jobs_df: pd.DataFrame, **kwargs): scheduled_nodes = (jobs_df.loc[jidx, 'nodes']).tolist() if gpu_trace.size > 0 and time_offset >= 0: job_info = job_dict(nodes_required, name, cpu_trace, gpu_trace, wall_time, job_info = job_dict(nodes_required, name, cpu_trace, gpu_trace, [], [], wall_time, end_state, scheduled_nodes, time_offset, job_id, priority) jobs.append(job_info) Loading
raps/power.py +6 −1 Original line number Diff line number Diff line Loading @@ -37,6 +37,7 @@ load_config_variables([ 'POWER_CPU_UNCERTAINTY', 'POWER_MEM', 'POWER_MEM_UNCERTAINTY', 'POWER_NIC', 'POWER_NIC_IDLE', 'POWER_NIC_MAX', 'POWER_NIC_UNCERTAINTY', Loading Loading @@ -102,7 +103,11 @@ def compute_node_power(cpu_util, gpu_util, net_util, verbose=False): """ power_cpu = cpu_util * POWER_CPU_MAX + (CPUS_PER_NODE - cpu_util) * POWER_CPU_IDLE power_gpu = gpu_util * POWER_GPU_MAX + (GPUS_PER_NODE - gpu_util) * POWER_GPU_IDLE try: power_nic = POWER_NIC_IDLE + (POWER_NIC_MAX - POWER_NIC_IDLE) * net_util except: power_nic = POWER_NIC power_total = power_cpu + power_gpu + POWER_MEM + NICS_PER_NODE * power_nic + POWER_NVME Loading
raps/workload.py +11 −7 Original line number Diff line number Diff line Loading @@ -73,11 +73,12 @@ class Workload(object): end_state = determine_state(JOB_END_PROBS) cpu_trace, gpu_trace = self.compute_traces(cpu_util, gpu_util, wall_time) priority = random.randint(0, MAX_PRIORITY) net_tx, net_rx = [], [] # Jobs arrive according to Poisson process time_to_next_job = next_arrival() jobs.append(job_dict(nodes_required, name, cpu_trace, gpu_trace, \ jobs.append(job_dict(nodes_required, name, cpu_trace, gpu_trace, net_tx, net_rx, \ wall_time, end_state, None, time_to_next_job, None, priority)) return jobs Loading @@ -94,7 +95,8 @@ class Workload(object): jobs = self.generate_random_jobs(num_jobs=0) cpu_util, gpu_util = CPUS_PER_NODE, GPUS_PER_NODE cpu_trace, gpu_trace = self.compute_traces(cpu_util, gpu_util, 10800) job_info = job_dict(AVAILABLE_NODES, "Max Test", cpu_trace, gpu_trace, \ net_tx, net_rx = [], [] job_info = job_dict(AVAILABLE_NODES, "Max Test", cpu_trace, gpu_trace, net_tx, net_rx, \ len(gpu_trace)*TRACE_QUANTA, 'COMPLETED', None, 100, None) jobs.insert(0, job_info) return jobs Loading @@ -105,7 +107,8 @@ class Workload(object): jobs = self.generate_random_jobs(num_jobs=0) cpu_util, gpu_util = 0, 0 cpu_trace, gpu_trace = self.compute_traces(cpu_util, gpu_util, 43200) job_info = job_dict(AVAILABLE_NODES, "Idle Test", cpu_trace, gpu_trace, \ net_tx, net_rx = [], [] job_info = job_dict(AVAILABLE_NODES, "Idle Test", cpu_trace, gpu_trace, net_tx, net_rx, \ len(gpu_trace)*TRACE_QUANTA, 'COMPLETED', None, 0, None) jobs.insert(0, job_info) return jobs Loading @@ -115,29 +118,30 @@ class Workload(object): """Benchmark tests""" jobs = self.generate_random_jobs(num_jobs=0) net_tx, net_rx = [], [] # Max test cpu_util, gpu_util = 1, 4 cpu_trace, gpu_trace = self.compute_traces(cpu_util, gpu_util, 10800) job_info = jobs_dict(AVAILABLE_NODES, "Max Test", cpu_trace, gpu_trace, job_info = jobs_dict(AVAILABLE_NODES, "Max Test", cpu_trace, gpu_trace, net_tx, net_rx, \ len(gpu_trace)*TRACE_QUANTA, 'COMPLETED', None, 100, None) jobs.insert(0, job_info) # OpenMxP run cpu_util, gpu_util = 0, 4 cpu_trace, gpu_trace = self.compute_traces(cpu_util, gpu_util, 3600) job_info = jobs_dict(AVAILABLE_NODES, "OpenMxP", cpu_trace, gpu_trace, job_info = jobs_dict(AVAILABLE_NODES, "OpenMxP", cpu_trace, gpu_trace, net_tx, net_rx, \ len(gpu_trace)*TRACE_QUANTA, 'COMPLETED', None, 300, None) jobs.insert(0, job_info) # HPL run cpu_util, gpu_util = 0.33, 0.79 * 4 # based on 24-01-18 run cpu_trace, gpu_trace = self.compute_traces(cpu_util, gpu_util, 3600) job_info = jobs_dict(AVAILABLE_NODES, "HPL", cpu_trace, gpu_trace, job_info = jobs_dict(AVAILABLE_NODES, "HPL", cpu_trace, gpu_trace, net_tx, net_rx, \ len(gpu_trace)*TRACE_QUANTA, 'COMPLETED', None, 200, None) jobs.insert(0, job_info) # Idle test cpu_util, gpu_util = 0, 0 cpu_trace, gpu_trace = self.compute_traces(cpu_util, gpu_util, 3600) job_info = jobs_dict(AVAILABLE_NODES, "Idle Test", cpu_trace, gpu_trace, job_info = jobs_dict(AVAILABLE_NODES, "Idle Test", cpu_trace, gpu_trace, net_tx, net_rx, \ len(gpu_trace)*TRACE_QUANTA, 'COMPLETED', None, 0, None) jobs.insert(0, job_info) Loading