From 9a8050311e2b1303d69b7cc256dc18aff46e2c37 Mon Sep 17 00:00:00 2001 From: Wes Brewer Date: Tue, 11 Feb 2025 20:36:12 -0500 Subject: [PATCH 001/388] Add support for both submit_time and start_time on telemetry replay - currently just supported for Frontier --- raps/dataloaders/frontier.py | 13 +++++++------ raps/job.py | 5 +++-- raps/schedulers/default.py | 3 +++ raps/telemetry.py | 1 + 4 files changed, 14 insertions(+), 8 deletions(-) diff --git a/raps/dataloaders/frontier.py b/raps/dataloaders/frontier.py index 6edd052..172a5f1 100644 --- a/raps/dataloaders/frontier.py +++ b/raps/dataloaders/frontier.py @@ -136,9 +136,13 @@ def load_data_from_df(jobs_df: pd.DataFrame, jobprofile_df: pd.DataFrame, **kwar wall_time = gpu_trace.size * config['TRACE_QUANTA'] # seconds + time_submit = jobs_df.loc[jidx, 'time_submission'] + diff = time_submit - time_zero + time_submit = max(diff.total_seconds(), 0) + time_start = jobs_df.loc[jidx+1, 'time_start'] diff = time_start - time_zero - time_offset = max(diff.total_seconds(), 0) + time_start = max(diff.total_seconds(), 0) if fastforward: time_offset -= fastforward @@ -155,9 +159,6 @@ def load_data_from_df(jobs_df: pd.DataFrame, jobprofile_df: pd.DataFrame, **kwar elif reschedule == 'submit-time': scheduled_nodes = None - time_submit = jobs_df.loc[jidx, 'time_submission'] - diff = time_submit - time_zero - time_offset = max(diff.total_seconds(), 0) priority = aging_boost(nodes_required) #raise NotImplementedError @@ -168,9 +169,9 @@ def load_data_from_df(jobs_df: pd.DataFrame, jobprofile_df: pd.DataFrame, **kwar indices = xname_to_index(xname, config) scheduled_nodes.append(indices) - if gpu_trace.size > 0 and (jid == job_id or jid == '*') and time_offset > 0: + if gpu_trace.size > 0 and (jid == job_id or jid == '*') and time_start > 0: job_info = job_dict(nodes_required, name, account, cpu_trace, gpu_trace, [], [], wall_time, - end_state, scheduled_nodes, time_offset, job_id, priority) + end_state, scheduled_nodes, time_submit, job_id, priority, time_start) jobs.append(job_info) return jobs diff --git a/raps/job.py b/raps/job.py index c0b0e9b..a896d77 100644 --- a/raps/job.py +++ b/raps/job.py @@ -1,7 +1,7 @@ from enum import Enum def job_dict(nodes_required, name, account, cpu_trace, gpu_trace, ntx_trace, nrx_trace, \ - wall_time, end_state, scheduled_nodes, time_offset, job_id, priority=0, partition=0): + wall_time, end_state, scheduled_nodes, time_offset, job_id, priority=0, partition=0, start_time=0): """ Return job info dictionary """ return { 'nodes_required': nodes_required, @@ -17,7 +17,8 @@ def job_dict(nodes_required, name, account, cpu_trace, gpu_trace, ntx_trace, nrx 'submit_time': time_offset, 'id': job_id, 'priority': priority, - 'partition': partition + 'partition': partition, + 'start_time': start_time } diff --git a/raps/schedulers/default.py b/raps/schedulers/default.py index c3291ac..50e1a8d 100644 --- a/raps/schedulers/default.py +++ b/raps/schedulers/default.py @@ -11,6 +11,7 @@ class PolicyType(Enum): PRIORITY = 'priority' FUGAKU_PTS = 'fugaku_pts' SJF = 'sjf' + REPLAY = 'replay' class Scheduler: @@ -34,6 +35,8 @@ class Scheduler: return sorted(queue, key=lambda job: job.priority, reverse=True) elif self.policy == PolicyType.FUGAKU_PTS: return self.sort_fugaku_redeeming(queue, accounts) + elif self.policy == PolicyType.REPLAY: + return sorted(queue, key=lambda job: job.start_time) else: raise ValueError(f"Unknown policy type: {self.policy}") diff --git a/raps/telemetry.py b/raps/telemetry.py index 2616bc7..0355562 100644 --- a/raps/telemetry.py +++ b/raps/telemetry.py @@ -51,6 +51,7 @@ class Telemetry: def load_snapshot(self, snapshot: str) -> (list, dict): """Reads a snapshot from a compressed file and returns the jobs.""" jobs, accounts_dict = np.load(snapshot, allow_pickle=True, mmap_mode='r') + print(jobs) return jobs['jobs'].tolist(), Accounts.initialize_accounts_from_dict(accounts_dict) def load_data(self, files): -- GitLab From d85910c5e91ed4b8e729b2c4301842ecff76e901 Mon Sep 17 00:00:00 2001 From: Wes Brewer Date: Tue, 11 Feb 2025 20:54:40 -0500 Subject: [PATCH 002/388] Add support for 'time_submit' in Marconi100 & fix some things in Frontier dataloader --- raps/dataloaders/frontier.py | 6 ++++-- raps/dataloaders/marconi100.py | 22 ++++++++++++++-------- 2 files changed, 18 insertions(+), 10 deletions(-) diff --git a/raps/dataloaders/frontier.py b/raps/dataloaders/frontier.py index 172a5f1..c5d457f 100644 --- a/raps/dataloaders/frontier.py +++ b/raps/dataloaders/frontier.py @@ -145,7 +145,8 @@ def load_data_from_df(jobs_df: pd.DataFrame, jobprofile_df: pd.DataFrame, **kwar time_start = max(diff.total_seconds(), 0) if fastforward: - time_offset -= fastforward + time_start -= fastforward + time_submit -= fastforward xnames = jobs_df.loc[jidx, 'xnames'] # Don't replay any job with an empty set of xnames @@ -155,6 +156,7 @@ def load_data_from_df(jobs_df: pd.DataFrame, jobprofile_df: pd.DataFrame, **kwar if reschedule == 'poisson': # Let the scheduler reschedule the jobs scheduled_nodes = None time_offset = next_arrival(1/config['JOB_ARRIVAL_TIME']) + time_start = None priority = aging_boost(nodes_required) elif reschedule == 'submit-time': @@ -169,7 +171,7 @@ def load_data_from_df(jobs_df: pd.DataFrame, jobprofile_df: pd.DataFrame, **kwar indices = xname_to_index(xname, config) scheduled_nodes.append(indices) - if gpu_trace.size > 0 and (jid == job_id or jid == '*') and time_start > 0: + if gpu_trace.size > 0 and (jid == job_id or jid == '*') and time_submit > 0: job_info = job_dict(nodes_required, name, account, cpu_trace, gpu_trace, [], [], wall_time, end_state, scheduled_nodes, time_submit, job_id, priority, time_start) jobs.append(job_info) diff --git a/raps/dataloaders/marconi100.py b/raps/dataloaders/marconi100.py index c6a97f8..ea8696b 100644 --- a/raps/dataloaders/marconi100.py +++ b/raps/dataloaders/marconi100.py @@ -130,31 +130,37 @@ def load_data_from_df(jobs_df: pd.DataFrame, **kwargs): priority = int(jobs_df.loc[jidx, 'priority']) - # wall_time = jobs_df.loc[i, 'run_time'] wall_time = gpu_trace.size * config['TRACE_QUANTA'] # seconds end_state = jobs_df.loc[jidx, 'job_state'] + time_start = jobs_df.loc[jidx+1, 'start_time'] - diff = time_start - time_zero + time_start = time_start - time_zero + + time_submit = jobs_df.loc[jidx, 'submit_time'] + time_submit = time_submit - time_zero if jid == '*': - time_offset = max(diff.total_seconds(), 0) + time_submit = max(time_submit.total_seconds(), 0) else: # When extracting out a single job, run one iteration past the end of the job - time_offset = config['UI_UPDATE_FREQ'] + time_submit = config['UI_UPDATE_FREQ'] - if fastforward: time_offset -= fastforward + if fastforward: + time_start -= fastforward + time_submit -= fastforward if reschedule == 'poisson': # Let the scheduler reschedule the jobs scheduled_nodes = None - time_offset = next_arrival(1/config['JOB_ARRIVAL_TIME']) + time_submit = next_arrival(1/config['JOB_ARRIVAL_TIME']) + time_start = None elif reschedule == 'submit-time': raise NotImplementedError else: # Prescribed replay scheduled_nodes = (jobs_df.loc[jidx, 'nodes']).tolist() - if gpu_trace.size > 0 and time_offset >= 0: + if gpu_trace.size > 0 and time_submit >= 0: job_info = job_dict(nodes_required, name, account, cpu_trace, gpu_trace, [], [], wall_time, - end_state, scheduled_nodes, time_offset, job_id, priority) + end_state, scheduled_nodes, time_submit, job_id, priority, time_start) jobs.append(job_info) return jobs -- GitLab From e8425cdfe613b688b533f74102834d9942943a6d Mon Sep 17 00:00:00 2001 From: Wes Brewer Date: Wed, 12 Feb 2025 12:24:19 -0500 Subject: [PATCH 003/388] Fix bug `if synthetic_bool or telemetry_bool:` -> `if nodes_available:` --- raps/schedulers/default.py | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/raps/schedulers/default.py b/raps/schedulers/default.py index 50e1a8d..3aaf9ce 100644 --- a/raps/schedulers/default.py +++ b/raps/schedulers/default.py @@ -48,17 +48,18 @@ class Scheduler: # Iterate over a copy of the queue since we might remove items for job in queue[:]: - # For synthetic jobs the number of requested nodes is given. - # Make sure the available nodes count meets job.nodes_required. - synthetic_bool = len(self.resource_manager.available_nodes) >= job.nodes_required - - # For telemetry replay jobs a list of requested nodes is provided. # Make sure the requested nodes are available. - telemetry_bool = False - if job.requested_nodes: - telemetry_bool = set(job.requested_nodes).issubset(set(self.resource_manager.available_nodes)) + nodes_available = False + if job.requested_nodes: # nodes specified, i.e., telemetry replay + nodes_available = set(job.requested_nodes).issubset(set(self.resource_manager.available_nodes)) + else: # synthetic + nodes_available = len(self.resource_manager.available_nodes) >= job.nodes_required + + if self.policy == PolicyType.REPLAY and current_time < job.start_time: + # Don't start replay jobs until they reach their start_time + nodes_available = False - if synthetic_bool or telemetry_bool: + if nodes_available: self.resource_manager.assign_nodes_to_job(job, current_time) running.append(job) queue.remove(job) -- GitLab From 478c0b8f56ea0028ebf76ffbacfb54ecb94904a4 Mon Sep 17 00:00:00 2001 From: Wes Brewer Date: Wed, 12 Feb 2025 23:02:43 -0500 Subject: [PATCH 004/388] Fix a number of issues with replaying jobs including both submit_time and start_time. Some features of accounts have been disabled b/c of a buf found with loading .npz files. --- README.md | 2 +- args.py | 4 ++-- config/frontier/scheduler.json | 2 +- main.py | 12 ++++++------ raps/dataloaders/adastraMI250.py | 2 -- raps/dataloaders/frontier.py | 12 +++++------- raps/dataloaders/fugaku.py | 2 -- raps/dataloaders/lassen.py | 2 -- raps/dataloaders/marconi100.py | 4 +--- raps/engine.py | 20 ++++++++++---------- raps/telemetry.py | 11 +++++------ 11 files changed, 31 insertions(+), 42 deletions(-) diff --git a/README.md b/README.md index d54ff3d..3d4db55 100644 --- a/README.md +++ b/README.md @@ -70,7 +70,7 @@ This will simulate synthetic workloads on two partitions as defined in `config/s This will dump a .npz file with a randomized name, e.g. ac23db.npz. Let's rename this file to pm100.npz for clarity. Note: can control-C when the simulation starts. Now, this pm100.npz file can be used with `multi-part-sim.py` as follows: - python multi-part-sim.py -x setonix/* -f pm100.npz --reschedule poisson --scale 192 + python multi-part-sim.py -x setonix/* -f pm100.npz --reschedule --scale 192 The `--reschedule` flag will use the internal scheduler to determine what nodes to schedule for each job, and the `--scale` flag will specify the maximum number of nodes for each job (generally set this to the max number of nodes of the smallest partition). diff --git a/args.py b/args.py index 8cfcd77..497997b 100644 --- a/args.py +++ b/args.py @@ -14,8 +14,8 @@ parser.add_argument('-v', '--verbose', action='store_true', help='Enable verbose parser.add_argument('--seed', action='store_true', help='Set random number seed for deterministic simulation') parser.add_argument('-f', '--replay', nargs='+', type=str, help='Either: path/to/joblive path/to/jobprofile' + \ ' -or- filename.npz (overrides --workload option)') -choices = ['poisson', 'submit-time'] -parser.add_argument('--reschedule', type=str, choices=choices, help='Reschedule the telemetry workload') +choices = ['poisson'] # to allow for future additional options +parser.add_argument('--reschedule', nargs='?', const='poisson', type=str, choices=choices, help='Reschedule the telemetry workload') parser.add_argument('-u', '--uncertainties', action='store_true', help='Change from floating point units to floating point units with uncertainties.' + \ ' Very expensive w.r.t simulation time!') diff --git a/config/frontier/scheduler.json b/config/frontier/scheduler.json index 5caf890..3cc1744 100644 --- a/config/frontier/scheduler.json +++ b/config/frontier/scheduler.json @@ -1,6 +1,6 @@ { "SEED": 42, - "JOB_ARRIVAL_TIME": 900, + "JOB_ARRIVAL_TIME": 100, "MTBF": 11, "TRACE_QUANTA": 15, "MIN_WALL_TIME": 3600, diff --git a/main.py b/main.py index 7e8919d..7163732 100644 --- a/main.py +++ b/main.py @@ -72,6 +72,9 @@ layout_manager = LayoutManager(args.layout, engine=sc, debug=args.debug, **confi if args.replay: + if not args.reschedule: + args.policy = "replay" + if args.fastforward: args.fastforward = convert_to_seconds(args.fastforward) @@ -89,7 +92,7 @@ if args.replay: # Read telemetry data (either npz file or via custom data loader) if args.replay[0].endswith(".npz"): # Replay .npz file print(f"Loading {args.replay[0]}...") - jobs, accounts = td.load_snapshot(args.replay[0]) + jobs = td.load_snapshot(args.replay[0]) if args.scale: for job in tqdm(jobs, desc=f"Scaling jobs to {args.scale} nodes"): @@ -101,16 +104,13 @@ if args.replay: for job in tqdm(jobs, desc="Rescheduling jobs"): job['requested_nodes'] = None job['submit_time'] = next_arrival(1 / config['JOB_ARRIVAL_TIME']) - elif args.reschedule == 'submit-time': - raise NotImplementedError else: # custom data loader print(*args.replay) jobs = td.load_data(args.replay) accounts = Accounts(jobs) - sc.accounts = accounts accounts_dict = accounts.to_dict() - td.save_snapshot(jobs, accounts, filename=DIR_NAME) + td.save_snapshot(jobs, filename=DIR_NAME) # Set number of timesteps based on the last job running which we assume # is the maximum value of submit_time + wall_time of all the jobs @@ -148,7 +148,7 @@ else: # Synthetic jobs OPATH = OUTPUT_PATH / DIR_NAME print("Output directory is: ", OPATH) sc.opath = OPATH -sc.accounts = accounts +#sc.accounts = accounts if args.plot or args.output: try: diff --git a/raps/dataloaders/adastraMI250.py b/raps/dataloaders/adastraMI250.py index 58eaec7..7f5c6af 100644 --- a/raps/dataloaders/adastraMI250.py +++ b/raps/dataloaders/adastraMI250.py @@ -160,8 +160,6 @@ def load_data_from_df(jobs_df: pd.DataFrame, **kwargs): if reschedule == 'poisson': # Let the scheduler reschedule the jobs scheduled_nodes = None time_offset = next_arrival(1/config['JOB_ARRIVAL_TIME']) - elif reschedule == 'submit-time': - raise NotImplementedError else: # Prescribed replay scheduled_nodes = (jobs_df.loc[jidx, 'nodes']).tolist() diff --git a/raps/dataloaders/frontier.py b/raps/dataloaders/frontier.py index c5d457f..a243a01 100644 --- a/raps/dataloaders/frontier.py +++ b/raps/dataloaders/frontier.py @@ -140,7 +140,7 @@ def load_data_from_df(jobs_df: pd.DataFrame, jobprofile_df: pd.DataFrame, **kwar diff = time_submit - time_zero time_submit = max(diff.total_seconds(), 0) - time_start = jobs_df.loc[jidx+1, 'time_start'] + time_start = jobs_df.loc[jidx, 'time_start'] diff = time_start - time_zero time_start = max(diff.total_seconds(), 0) @@ -159,11 +159,6 @@ def load_data_from_df(jobs_df: pd.DataFrame, jobprofile_df: pd.DataFrame, **kwar time_start = None priority = aging_boost(nodes_required) - elif reschedule == 'submit-time': - scheduled_nodes = None - priority = aging_boost(nodes_required) - #raise NotImplementedError - else: # Prescribed replay scheduled_nodes = [] priority = 0 # not used for replay @@ -171,7 +166,10 @@ def load_data_from_df(jobs_df: pd.DataFrame, jobprofile_df: pd.DataFrame, **kwar indices = xname_to_index(xname, config) scheduled_nodes.append(indices) - if gpu_trace.size > 0 and (jid == job_id or jid == '*') and time_submit > 0: + if gpu_trace.size == 0: + print("ignoring job b/c zero trace:", jidx, time_submit, time_start, nodes_required) + + if gpu_trace.size > 0 and (jid == job_id or jid == '*') and time_submit >= 0: job_info = job_dict(nodes_required, name, account, cpu_trace, gpu_trace, [], [], wall_time, end_state, scheduled_nodes, time_submit, job_id, priority, time_start) jobs.append(job_info) diff --git a/raps/dataloaders/fugaku.py b/raps/dataloaders/fugaku.py index bc28ec3..fec9125 100644 --- a/raps/dataloaders/fugaku.py +++ b/raps/dataloaders/fugaku.py @@ -86,8 +86,6 @@ def load_data_from_df(df, **kwargs): submit_time = row['adt'] if 'adt' in df.columns else min_time if reschedule == 'poisson': # Let the scheduler reschedule the jobs time_offset = next_arrival(1/config['JOB_ARRIVAL_TIME']) - elif reschedule == 'submit-time': - raise NotImplementedError else: time_offset = (submit_time - min_time).total_seconds() # Compute time offset in seconds diff --git a/raps/dataloaders/lassen.py b/raps/dataloaders/lassen.py index b14cabe..5aaa193 100644 --- a/raps/dataloaders/lassen.py +++ b/raps/dataloaders/lassen.py @@ -124,8 +124,6 @@ def load_data_from_df(allocation_df, node_df, step_df, **kwargs): if reschedule == 'poisson': # Let the scheduler reschedule the jobs scheduled_nodes = None time_offset = next_arrival(1/config['JOB_ARRIVAL_TIME']) - elif reschedule == 'submit-time': - raise NotImplementedError else: scheduled_nodes = get_scheduled_nodes(row['allocation_id'], node_df) time_offset = compute_time_offset(row['begin_time'], min_time) diff --git a/raps/dataloaders/marconi100.py b/raps/dataloaders/marconi100.py index ea8696b..af3d1fb 100644 --- a/raps/dataloaders/marconi100.py +++ b/raps/dataloaders/marconi100.py @@ -133,7 +133,7 @@ def load_data_from_df(jobs_df: pd.DataFrame, **kwargs): wall_time = gpu_trace.size * config['TRACE_QUANTA'] # seconds end_state = jobs_df.loc[jidx, 'job_state'] - time_start = jobs_df.loc[jidx+1, 'start_time'] + time_start = jobs_df.loc[jidx, 'start_time'] time_start = time_start - time_zero time_submit = jobs_df.loc[jidx, 'submit_time'] @@ -153,8 +153,6 @@ def load_data_from_df(jobs_df: pd.DataFrame, **kwargs): scheduled_nodes = None time_submit = next_arrival(1/config['JOB_ARRIVAL_TIME']) time_start = None - elif reschedule == 'submit-time': - raise NotImplementedError else: # Prescribed replay scheduled_nodes = (jobs_df.loc[jidx, 'nodes']).tolist() diff --git a/raps/engine.py b/raps/engine.py index 0886b38..82b8ce5 100644 --- a/raps/engine.py +++ b/raps/engine.py @@ -37,7 +37,6 @@ class Engine: total_nodes=self.config['TOTAL_NODES'], down_nodes=self.config['DOWN_NODES'] ) - # Initialize running and queue, etc. self.running = [] self.queue = [] @@ -63,12 +62,16 @@ class Engine: ) print(f"Using scheduler: {scheduler_type}") - def eligible_jobs(self,jobs_to_submit): + def eligible_jobs(self, jobs_to_submit): + # Build a list of jobs whose submit_time is <= current_time. + eligible = [job for job in jobs_to_submit if job['submit_time'] <= self.current_time] + # Remove those jobs from jobs_to_submit: + jobs_to_submit[:] = [job for job in jobs_to_submit if job['submit_time'] > self.current_time] + # Convert them to Job instances and build list of eligible jobs. eligible_jobs_list = [] - while jobs_to_submit and jobs_to_submit[0]['submit_time'] <= self.current_time: - job_info = jobs_to_submit.pop(0) - job = Job(job_info, self.current_time) - eligible_jobs_list.append(job) + for job_data in eligible: + job_instance = Job(job_data, self.current_time) + eligible_jobs_list.append(job_instance) return eligible_jobs_list def tick(self): @@ -130,14 +133,11 @@ class Engine: self.running.remove(job) self.jobs_completed += 1 job_stats = job.statistics() - self.accounts.update_account_statistics(job_stats) + #self.accounts.update_account_statistics(job_stats) self.job_history_dict.append(job_stats.__dict__) # Free the nodes via the resource manager. self.resource_manager.free_nodes_from_job(job) - # Ask scheduler to schedule any jobs waiting in queue - self.scheduler.schedule(self.queue, self.running, self.current_time, self.accounts) - # Update the power array UI component rack_power, rect_losses = self.power_manager.compute_rack_power() sivoc_losses = self.power_manager.compute_sivoc_losses() diff --git a/raps/telemetry.py b/raps/telemetry.py index 0355562..9d2fd83 100644 --- a/raps/telemetry.py +++ b/raps/telemetry.py @@ -44,15 +44,14 @@ class Telemetry: except: print("WARNING: Failed to load dataloader") - def save_snapshot(self, jobs: list, accounts: dict, filename: str): + def save_snapshot(self, jobs: list, filename: str): """Saves a snapshot of the jobs to a compressed file. """ - np.savez_compressed(filename, jobs=jobs, accounts=accounts) + np.savez_compressed(filename, jobs=jobs) - def load_snapshot(self, snapshot: str) -> (list, dict): + def load_snapshot(self, snapshot: str) -> list: """Reads a snapshot from a compressed file and returns the jobs.""" - jobs, accounts_dict = np.load(snapshot, allow_pickle=True, mmap_mode='r') - print(jobs) - return jobs['jobs'].tolist(), Accounts.initialize_accounts_from_dict(accounts_dict) + jobs = np.load(snapshot, allow_pickle=True, mmap_mode='r') + return jobs['jobs'].tolist() def load_data(self, files): """Load telemetry data using custom data loaders.""" -- GitLab From 62c746141d37aaae3a18d03700ec14bb85a98d1e Mon Sep 17 00:00:00 2001 From: Wes Brewer Date: Thu, 13 Feb 2025 14:53:01 -0500 Subject: [PATCH 005/388] Move args.policy smart logic from main.py to args.py --- args.py | 18 ++++++++++++++++-- main.py | 3 --- 2 files changed, 16 insertions(+), 5 deletions(-) diff --git a/args.py b/args.py index 497997b..b354d4f 100644 --- a/args.py +++ b/args.py @@ -1,4 +1,5 @@ import argparse +import sys from raps.schedulers.default import PolicyType parser = argparse.ArgumentParser(description='Resource Allocator & Power Simulator (RAPS)') @@ -30,8 +31,8 @@ parser.add_argument('--scale', type=int, default=0, help='Scale telemetry to max parser.add_argument('--system', type=str, default='frontier', help='System config to use') choices = ['default', 'nrel', 'anl', 'flux'] parser.add_argument('--scheduler', type=str, choices=choices, default=choices[0], help='Name of scheduler') -choices = [policy.value for policy in PolicyType] -parser.add_argument('--policy', type=str, choices=choices, default=choices[0], help='Schedule policy to use') +policies = [policy.value for policy in PolicyType] +parser.add_argument('--policy', type=str, choices=policies, default=None, help='Schedule policy to use') choices = ['random', 'benchmark', 'peak', 'idle'] parser.add_argument('-w', '--workload', type=str, choices=choices, default=choices[0], help='Type of synthetic workload') choices = ['layout1', 'layout2'] @@ -42,3 +43,16 @@ parser.add_argument('--accounts-json', type=str, help='Json of account stats gen args = parser.parse_args() args_dict = vars(args) print(args_dict) + +# Determine the default policy based on --replay +policy_specified = args.policy is not None # was policy set explicitly + +if not policy_specified: + if args.replay: # if --replay is provided, default to "replay" + args.policy = "replay" + print(f"No policy specified, using default for replay: {args.policy}") + else: # otherwise, default to "fcfs" + args.policy = policies[0] + print(f"No policy specified, using default: {args.policy}") + +print("Final policy:", args.policy) diff --git a/main.py b/main.py index 7163732..deda9f9 100644 --- a/main.py +++ b/main.py @@ -72,9 +72,6 @@ layout_manager = LayoutManager(args.layout, engine=sc, debug=args.debug, **confi if args.replay: - if not args.reschedule: - args.policy = "replay" - if args.fastforward: args.fastforward = convert_to_seconds(args.fastforward) -- GitLab From 8faede422b6bb91ab878207d832408688ac53526 Mon Sep 17 00:00:00 2001 From: Wes Brewer Date: Thu, 13 Feb 2025 15:00:40 -0500 Subject: [PATCH 006/388] Change --reschedule to --arrival (need to make additional changes in raps/*) --- args.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/args.py b/args.py index b354d4f..98de2cc 100644 --- a/args.py +++ b/args.py @@ -15,8 +15,8 @@ parser.add_argument('-v', '--verbose', action='store_true', help='Enable verbose parser.add_argument('--seed', action='store_true', help='Set random number seed for deterministic simulation') parser.add_argument('-f', '--replay', nargs='+', type=str, help='Either: path/to/joblive path/to/jobprofile' + \ ' -or- filename.npz (overrides --workload option)') -choices = ['poisson'] # to allow for future additional options -parser.add_argument('--reschedule', nargs='?', const='poisson', type=str, choices=choices, help='Reschedule the telemetry workload') +choices = ['prescribed', 'poisson'] +parser.add_argument('--arrival', default=choices[0], type=str, choices=choices, help=f'Modify arrival distribution ({choices[1]}) or use the original submit times ({choices[0]})') parser.add_argument('-u', '--uncertainties', action='store_true', help='Change from floating point units to floating point units with uncertainties.' + \ ' Very expensive w.r.t simulation time!') -- GitLab From 239b99d46a442ff250d7485ab6630ddf35f17a90 Mon Sep 17 00:00:00 2001 From: Matthias Maiterth Date: Thu, 13 Feb 2025 15:01:53 -0500 Subject: [PATCH 007/388] Moving PolicyType out of schedulers/default to policy.py for use with other schedulers. --- raps/policy.py | 10 ++++++++++ raps/schedulers/default.py | 14 ++------------ 2 files changed, 12 insertions(+), 12 deletions(-) create mode 100644 raps/policy.py diff --git a/raps/policy.py b/raps/policy.py new file mode 100644 index 0000000..d1bcbf2 --- /dev/null +++ b/raps/policy.py @@ -0,0 +1,10 @@ +from enum import Enum + + +class PolicyType(Enum): + """Supported scheduling policies.""" + FCFS = 'fcfs' + BACKFILL = 'backfill' + PRIORITY = 'priority' + FUGAKU_PTS = 'fugaku_pts' + REPLAY = 'replay' diff --git a/raps/schedulers/default.py b/raps/schedulers/default.py index 3aaf9ce..aa6954f 100644 --- a/raps/schedulers/default.py +++ b/raps/schedulers/default.py @@ -3,15 +3,7 @@ from ..utils import summarize_ranges from ..workload import MAX_PRIORITY - -class PolicyType(Enum): - """Supported scheduling policies.""" - FCFS = 'fcfs' - BACKFILL = 'backfill' - PRIORITY = 'priority' - FUGAKU_PTS = 'fugaku_pts' - SJF = 'sjf' - REPLAY = 'replay' +from ..policy import PolicyType class Scheduler: @@ -29,8 +21,6 @@ class Scheduler: """Sort jobs based on the selected scheduling policy.""" if self.policy == PolicyType.FCFS or self.policy == PolicyType.BACKFILL: return sorted(queue, key=lambda job: job.submit_time) - elif self.policy == PolicyType.SJF: - return sorted(queue, key=lambda job: job.wall_time) elif self.policy == PolicyType.PRIORITY: return sorted(queue, key=lambda job: job.priority, reverse=True) elif self.policy == PolicyType.FUGAKU_PTS: @@ -38,7 +28,7 @@ class Scheduler: elif self.policy == PolicyType.REPLAY: return sorted(queue, key=lambda job: job.start_time) else: - raise ValueError(f"Unknown policy type: {self.policy}") + raise ValueError(f"Policy not implemented: {self.policy}") def schedule(self, queue, running, current_time, accounts=None, sorted=False, debug=False): # Sort the queue in place. -- GitLab From bce245e7e81caf77ffa04c6869231a341530a5c9 Mon Sep 17 00:00:00 2001 From: Matthias Maiterth Date: Thu, 13 Feb 2025 16:09:11 -0500 Subject: [PATCH 008/388] Split stats output to system_stats and job_stats --- main.py | 9 ++++++--- raps/engine.py | 55 +++++++++++++++++++++++++++++++++++++++++--------- raps/job.py | 1 + raps/utils.py | 12 +++++++++++ 4 files changed, 65 insertions(+), 12 deletions(-) diff --git a/main.py b/main.py index deda9f9..da28fe5 100644 --- a/main.py +++ b/main.py @@ -158,13 +158,16 @@ if args.verbose: layout_manager.run(jobs, timesteps=timesteps) -output_stats = sc.get_stats() +system_stats = sc.get_system_stats() +job_stats = sc.get_job_stats() # Following b/c we get the following error when we use PM100 telemetry dataset # TypeError: Object of type int64 is not JSON serializable try: - print(json.dumps(output_stats, indent=4)) + print(json.dumps(system_stats, indent=4)) + print(json.dumps(job_stats, indent=4)) except: - print(output_stats) + print(system_stats) + print(job_stats) if args.plot: diff --git a/raps/engine.py b/raps/engine.py index 82b8ce5..6cc7812 100644 --- a/raps/engine.py +++ b/raps/engine.py @@ -2,9 +2,12 @@ from typing import Optional import dataclasses import pandas as pd +import sys + from .job import Job, JobState from .network import network_utilization from .utils import summarize_ranges, expand_ranges, get_utilization +from .utils import sum_values, min_value, max_value from .resmgr import ResourceManager from .schedulers import load_scheduler @@ -227,14 +230,10 @@ class Engine: yield self.tick() - def get_stats(self): + def get_system_stats(self): """ Return output statistics """ - sum_values = lambda values: sum(x[1] for x in values) if values else 0 - min_value = lambda values: min(x[1] for x in values) if values else 0 - max_value = lambda values: max(x[1] for x in values) if values else 0 num_samples = len(self.power_manager.history) if self.power_manager else 0 - throughput = self.jobs_completed / self.timesteps * 3600 if self.timesteps else 0 # Jobs per hour average_power_mw = sum_values(self.power_manager.history) / num_samples / 1000 if num_samples else 0 average_loss_mw = sum_values(self.power_manager.loss_history) / num_samples / 1000 if num_samples else 0 min_loss_mw = min_value(self.power_manager.loss_history) / 1000 if num_samples else 0 @@ -248,10 +247,6 @@ class Engine: stats = { 'num_samples': num_samples, - 'jobs completed': self.jobs_completed, - 'throughput': f'{throughput:.2f} jobs/hour', - 'jobs still running': [job.id for job in self.running], - 'jobs still in queue': [job.id for job in self.queue], 'average power': f'{average_power_mw:.2f} MW', 'min loss': f'{min_loss_mw:.2f} MW', 'average loss': f'{average_loss_mw:.2f} MW', @@ -266,3 +261,45 @@ class Engine: def get_job_history_dict(self): return self.job_history_dict + + def get_job_stats(self): + throughput = self.jobs_completed / self.timesteps * 3600 if self.timesteps else 0 # Jobs per hour + min_wait_time = sys.maxsize + max_wait_time = -sys.maxsize - 1 + aggregate_wait_time = 0 + min_turnaround_time = sys.maxsize + max_turnaround_time = -sys.maxsize - 1 + aggregate_turnaround_time = 0 + for job in self.job_history_dict: + wait_time = job["start_time"] - job["submit_time"] + aggregate_wait_time += wait_time + turnaround_time = job["end_time"] - job["submit_time"] + aggregate_turnaround_time += turnaround_time + if wait_time < min_wait_time: + min_wait_time = wait_time + if wait_time > max_wait_time: + max_wait_time = wait_time + if turnaround_time < min_turnaround_time: + min_turnaround_time = turnaround_time + if turnaround_time > max_turnaround_time: + max_turnaround_time = turnaround_time + + if len(self.job_history_dict) != 0: + average_wait_time = aggregate_wait_time / len(self.job_history_dict) + average_turnaround_time = aggregate_turnaround_time / len(self.job_history_dict) + else: + average_wait_time = -1 + average_turnaround_time = -1 + job_stats = { + 'jobs completed': self.jobs_completed, + 'throughput': f'{throughput:.2f} jobs/hour', + 'jobs still running': [job.id for job in self.running], + 'jobs still in queue': [job.id for job in self.queue], + 'min_wait_time': min_wait_time, + 'max_wait_time': max_wait_time, + 'average_wait_time': average_wait_time, + 'min_turnaround_time': min_turnaround_time, + 'max_turnaround_time': max_turnaround_time, + 'average_turnaround_time': average_turnaround_time + } + return job_stats diff --git a/raps/job.py b/raps/job.py index a896d77..35ff32d 100644 --- a/raps/job.py +++ b/raps/job.py @@ -116,6 +116,7 @@ class JobStatistics: self.account = job.account self.num_nodes = len(job.scheduled_nodes) self.run_time = job.running_time + self.submit_time = job.submit_time self.start_time = job.start_time self.end_time = job.end_time self.state = job._state diff --git a/raps/utils.py b/raps/utils.py index 5ead3d1..ce9dc57 100644 --- a/raps/utils.py +++ b/raps/utils.py @@ -18,6 +18,18 @@ import uuid import json +def sum_values(values): + return sum(x[1] for x in values) if values else 0 + + +def min_value(values): + return min(x[1] for x in values) if values else 0 + + +def max_value(values): + return max(x[1] for x in values) if values else 0 + + def convert_seconds(seconds): """Convert seconds to time format: 3661s -> 01:01""" td = timedelta(seconds=seconds) -- GitLab From bbf1a4e865ec17303456b00b8395bf66037fce50 Mon Sep 17 00:00:00 2001 From: Matthias Maiterth Date: Thu, 13 Feb 2025 16:12:26 -0500 Subject: [PATCH 009/388] If no jobs completed, set defaults to -1 --- raps/engine.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/raps/engine.py b/raps/engine.py index 6cc7812..7a66fb8 100644 --- a/raps/engine.py +++ b/raps/engine.py @@ -288,6 +288,11 @@ class Engine: average_wait_time = aggregate_wait_time / len(self.job_history_dict) average_turnaround_time = aggregate_turnaround_time / len(self.job_history_dict) else: + # Set these to -1 to indicate nothing ran + min_wait_time = -1 + max_wait_time = -1 + min_turnaround_time = -1 + max_turnaround_time = -1 average_wait_time = -1 average_turnaround_time = -1 job_stats = { -- GitLab From a8f2613eb51210f4da5dad28d7919b6471cc8929 Mon Sep 17 00:00:00 2001 From: Matthias Maiterth Date: Thu, 13 Feb 2025 16:32:15 -0500 Subject: [PATCH 010/388] Fixed renaming of args.reschedule to args.policy in main.py --- main.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/main.py b/main.py index da28fe5..89f572f 100644 --- a/main.py +++ b/main.py @@ -96,7 +96,7 @@ if args.replay: job['nodes_required'] = random.randint(1, args.scale) job['requested_nodes'] = None # Setting to None triggers scheduler to assign nodes - if args.reschedule == 'poisson': + if args.policy == 'poisson': print("available nodes:", config['AVAILABLE_NODES']) for job in tqdm(jobs, desc="Rescheduling jobs"): job['requested_nodes'] = None -- GitLab From e4586949ca52fdd8ff3d2136de87d1fda6df02fa Mon Sep 17 00:00:00 2001 From: Matthias Maiterth Date: Thu, 13 Feb 2025 19:07:43 -0500 Subject: [PATCH 011/388] Added additional stats, and move them to raps/stats.py --- main.py | 15 +++--- raps/engine.py | 76 --------------------------- raps/stats.py | 136 +++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 145 insertions(+), 82 deletions(-) create mode 100644 raps/stats.py diff --git a/main.py b/main.py index 89f572f..4cb3081 100644 --- a/main.py +++ b/main.py @@ -32,6 +32,7 @@ from raps.workload import Workload from raps.account import Accounts from raps.weather import Weather from raps.utils import create_casename, convert_to_seconds, write_dict_to_file, next_arrival +from raps.stats import get_engine_stats, get_job_stats config = ConfigManager(system_name=args.system).get_config() @@ -158,15 +159,15 @@ if args.verbose: layout_manager.run(jobs, timesteps=timesteps) -system_stats = sc.get_system_stats() -job_stats = sc.get_job_stats() +engine_stats = get_engine_stats(sc) +job_stats = get_job_stats(sc) # Following b/c we get the following error when we use PM100 telemetry dataset # TypeError: Object of type int64 is not JSON serializable try: - print(json.dumps(system_stats, indent=4)) + print(json.dumps(engine_stats, indent=4)) print(json.dumps(job_stats, indent=4)) except: - print(system_stats) + print(engine_stats) print(job_stats) @@ -245,9 +246,11 @@ if args.output: try: with open(OPATH / 'stats.out', 'w') as f: - json.dump(output_stats, f, indent=4) + json.dump(engine_stats, f, indent=4) + json.dump(job_stats, f, indent=4) except: - write_dict_to_file(output_stats, OPATH / 'stats.out') + write_dict_to_file(engine_stats, OPATH / 'stats.out') + write_dict_to_file(job_stats, OPATH / 'stats.out') try: with open(OPATH / 'accounts.json', 'w') as f: diff --git a/raps/engine.py b/raps/engine.py index 7a66fb8..fff515f 100644 --- a/raps/engine.py +++ b/raps/engine.py @@ -230,81 +230,5 @@ class Engine: yield self.tick() - def get_system_stats(self): - """ Return output statistics """ - num_samples = len(self.power_manager.history) if self.power_manager else 0 - - average_power_mw = sum_values(self.power_manager.history) / num_samples / 1000 if num_samples else 0 - average_loss_mw = sum_values(self.power_manager.loss_history) / num_samples / 1000 if num_samples else 0 - min_loss_mw = min_value(self.power_manager.loss_history) / 1000 if num_samples else 0 - max_loss_mw = max_value(self.power_manager.loss_history) / 1000 if num_samples else 0 - - loss_fraction = average_loss_mw / average_power_mw if average_power_mw else 0 - efficiency = 1 - loss_fraction if loss_fraction else 0 - total_energy_consumed = average_power_mw * self.timesteps / 3600 if self.timesteps else 0 # MW-hr - emissions = total_energy_consumed * 852.3 / 2204.6 / efficiency if efficiency else 0 - total_cost = total_energy_consumed * 1000 * self.config.get('POWER_COST', 0) # Total cost in dollars - - stats = { - 'num_samples': num_samples, - 'average power': f'{average_power_mw:.2f} MW', - 'min loss': f'{min_loss_mw:.2f} MW', - 'average loss': f'{average_loss_mw:.2f} MW', - 'max loss': f'{max_loss_mw:.2f} MW', - 'system power efficiency': f'{efficiency * 100:.2f}%', - 'total energy consumed': f'{total_energy_consumed:.2f} MW-hr', - 'carbon emissions': f'{emissions:.2f} metric tons CO2', - 'total cost': f'${total_cost:.2f}' - } - - return stats - def get_job_history_dict(self): return self.job_history_dict - - def get_job_stats(self): - throughput = self.jobs_completed / self.timesteps * 3600 if self.timesteps else 0 # Jobs per hour - min_wait_time = sys.maxsize - max_wait_time = -sys.maxsize - 1 - aggregate_wait_time = 0 - min_turnaround_time = sys.maxsize - max_turnaround_time = -sys.maxsize - 1 - aggregate_turnaround_time = 0 - for job in self.job_history_dict: - wait_time = job["start_time"] - job["submit_time"] - aggregate_wait_time += wait_time - turnaround_time = job["end_time"] - job["submit_time"] - aggregate_turnaround_time += turnaround_time - if wait_time < min_wait_time: - min_wait_time = wait_time - if wait_time > max_wait_time: - max_wait_time = wait_time - if turnaround_time < min_turnaround_time: - min_turnaround_time = turnaround_time - if turnaround_time > max_turnaround_time: - max_turnaround_time = turnaround_time - - if len(self.job_history_dict) != 0: - average_wait_time = aggregate_wait_time / len(self.job_history_dict) - average_turnaround_time = aggregate_turnaround_time / len(self.job_history_dict) - else: - # Set these to -1 to indicate nothing ran - min_wait_time = -1 - max_wait_time = -1 - min_turnaround_time = -1 - max_turnaround_time = -1 - average_wait_time = -1 - average_turnaround_time = -1 - job_stats = { - 'jobs completed': self.jobs_completed, - 'throughput': f'{throughput:.2f} jobs/hour', - 'jobs still running': [job.id for job in self.running], - 'jobs still in queue': [job.id for job in self.queue], - 'min_wait_time': min_wait_time, - 'max_wait_time': max_wait_time, - 'average_wait_time': average_wait_time, - 'min_turnaround_time': min_turnaround_time, - 'max_turnaround_time': max_turnaround_time, - 'average_turnaround_time': average_turnaround_time - } - return job_stats diff --git a/raps/stats.py b/raps/stats.py new file mode 100644 index 0000000..b067429 --- /dev/null +++ b/raps/stats.py @@ -0,0 +1,136 @@ +""" +This module provides functionality for generating statistics. +These are statistics on +the engine +the jobs + +Both could be part of the engine or jobs class, but as the are very verbose, try to keep statistics consolidated in this file. +""" +import sys +from .utils import sum_values, min_value, max_value + +from .engine import Engine + + +def get_engine_stats(engine: Engine): + """ Return engine statistics """ + num_samples = len(engine.power_manager.history) if engine.power_manager else 0 + + average_power_mw = sum_values(engine.power_manager.history) / num_samples / 1000 if num_samples else 0 + average_loss_mw = sum_values(engine.power_manager.loss_history) / num_samples / 1000 if num_samples else 0 + min_loss_mw = min_value(engine.power_manager.loss_history) / 1000 if num_samples else 0 + max_loss_mw = max_value(engine.power_manager.loss_history) / 1000 if num_samples else 0 + + loss_fraction = average_loss_mw / average_power_mw if average_power_mw else 0 + efficiency = 1 - loss_fraction if loss_fraction else 0 + total_energy_consumed = average_power_mw * engine.timesteps / 3600 if engine.timesteps else 0 # MW-hr + emissions = total_energy_consumed * 852.3 / 2204.6 / efficiency if efficiency else 0 + total_cost = total_energy_consumed * 1000 * engine.config.get('POWER_COST', 0) # Total cost in dollars + + stats = { + 'num_samples': num_samples, + 'average power': f'{average_power_mw:.2f} MW', + 'min loss': f'{min_loss_mw:.2f} MW', + 'average loss': f'{average_loss_mw:.2f} MW', + 'max loss': f'{max_loss_mw:.2f} MW', + 'system power efficiency': f'{efficiency * 100:.2f}%', + 'total energy consumed': f'{total_energy_consumed:.2f} MW-hr', + 'carbon emissions': f'{emissions:.2f} metric tons CO2', + 'total cost': f'${total_cost:.2f}' + } + + return stats + + +def min_max_sum(value,min,max,sum): + if value < min: + min = value + if value > max: + max = value + sum += value + return min,max,sum + + +def get_job_stats(engine: Engine): + """ Return job statistics processed over the engine execution""" + # Information on Job-Mix + min_job_size, max_job_size, sum_job_size = sys.maxsize, -sys.maxsize - 1, 0 + min_runtime, max_runtime, sum_runtime = sys.maxsize, -sys.maxsize - 1, 0 + min_agg_node_hours, max_agg_node_hours, sum_agg_node_hours = sys.maxsize, -sys.maxsize - 1, 0 + # Completion statistics + throughput = engine.jobs_completed / engine.timesteps * 3600 if engine.timesteps else 0 # Jobs per hour + + min_wait_time, max_wait_time, sum_wait_time = sys.maxsize, -sys.maxsize - 1, 0 + min_turnaround_time, max_turnaround_time, sum_turnaround_time = sys.maxsize, -sys.maxsize - 1, 0 + + min_awrt, max_awrt, sum_awrt = sys.maxsize, -sys.maxsize - 1, 0 + + # Information on Job-Mix + for job in engine.job_history_dict: + job_size = job['num_nodes'] + min_job_size,max_job_size,sum_job_size = \ + min_max_sum(job_size, min_job_size, max_job_size, sum_job_size) + + runtime = job['end_time'] - job['start_time'] + min_runtime, max_runtime, sum_runtime = \ + min_max_sum(runtime, min_runtime, max_runtime, sum_runtime) + + agg_node_hours = runtime * job_size # Aggreagte node hours + min_agg_node_hours, max_agg_node_hours, sum_agg_node_hours = \ + min_max_sum(agg_node_hours, min_agg_node_hours, max_agg_node_hours, sum_agg_node_hours) + + # Completion statistics + wait_time = job["start_time"] - job["submit_time"] + min_wait_time,max_wait_time,sum_wait_time = \ + min_max_sum(wait_time, min_wait_time, max_wait_time, sum_wait_time) + + turnaround_time = job["end_time"] - job["submit_time"] + min_turnaround_time, max_turnaround_time, sum_turnaround_time = \ + min_max_sum(turnaround_time, min_turnaround_time, max_turnaround_time, sum_turnaround_time) + + awrt = agg_node_hours * turnaround_time # Area Weighted Response Time + min_awrt, max_awrt, sum_awrt = min_max_sum(awrt, min_awrt, max_awrt, sum_awrt) + + if len(engine.job_history_dict) != 0: + avg_job_size = sum_job_size / len(engine.job_history_dict) + avg_runtime = sum_runtime / len(engine.job_history_dict) + avg_agg_node_hours = sum_agg_node_hours / len(engine.job_history_dict) + avg_wait_time = sum_wait_time / len(engine.job_history_dict) + avg_turnaround_time = sum_turnaround_time / len(engine.job_history_dict) + avg_awrt = sum_awrt / len(engine.job_history_dict) + else: + # Set these to -1 to indicate nothing ran + min_job_size, max_job_size, avg_job_size = -1,-1,-1 + min_runtime, max_runtime, avg_runtime = -1,-1,-1 + min_agg_node_hours, max_agg_node_hours, avg_agg_node_hours = -1,-1,-1 + min_wait_time, max_wait_time, avg_wait_time = -1,-1,-1 + min_turnaround_time, max_turnaround_time, avg_turnaround_time = -1,-1,-1 + min_awrt, max_awrt, avg_awrt = -1,-1,-1 + + job_stats = { + 'jobs completed': engine.jobs_completed, + 'throughput': f'{throughput:.2f} jobs/hour', + 'jobs still running': [job.id for job in engine.running], + 'jobs still in queue': [job.id for job in engine.queue], + # Information on job-mix executed + 'min job size': min_job_size, + 'max job size': max_job_size, + 'average job size': avg_job_size, + 'min runtime': min_runtime, + 'max runtime': max_runtime, + 'average runtime': avg_runtime, + 'min_aggregate_node_hours': min_agg_node_hours, + 'max_aggregate_node_hours': max_agg_node_hours, + 'avg_aggregate_node_hours': avg_agg_node_hours, + # Completion statistics + 'min_wait_time': min_wait_time, + 'max_wait_time': max_wait_time, + 'average_wait_time': avg_wait_time, + 'min_turnaround_time': min_turnaround_time, + 'max_turnaround_time': max_turnaround_time, + 'average_turnaround_time': avg_turnaround_time, + 'min_area_weighted_response_time': min_awrt, + 'max_area_weighted_response_time': max_awrt, + 'avg_area_weighted_response_time': avg_awrt + } + return job_stats -- GitLab From e51d93032cd7ea0aacd47d79f9be2aa73d7302af Mon Sep 17 00:00:00 2001 From: Matthias Maiterth Date: Thu, 13 Feb 2025 19:31:04 -0500 Subject: [PATCH 012/388] Added --accounts option, to track the accounts. Note --accounts and --accounts-json are now options, --accounts enables the tracking of accounts, while --accounts-json provides a file path to load previously dumped accounts.json files. --accounts in combnination with -o writes said accounts.json to the output directory. --- args.py | 1 + main.py | 33 +++++++++++++++++---------------- raps/engine.py | 3 ++- 3 files changed, 20 insertions(+), 17 deletions(-) diff --git a/args.py b/args.py index 98de2cc..9703a85 100644 --- a/args.py +++ b/args.py @@ -38,6 +38,7 @@ parser.add_argument('-w', '--workload', type=str, choices=choices, default=choic choices = ['layout1', 'layout2'] parser.add_argument('-x', '--partitions', nargs='+', default=None, help='List of machine configurations to use, e.g., -x setonix-cpu setonix-gpu') parser.add_argument('--layout', type=str, choices=choices, default=choices[0], help='Layout of UI') +parser.add_argument('--accounts', action='store_true', help='Flag indicating if accounts should be tracked') parser.add_argument('--accounts-json', type=str, help='Json of account stats generated in previous run. see raps/accounts.py') args = parser.parse_args() diff --git a/main.py b/main.py index 4cb3081..fad83a1 100644 --- a/main.py +++ b/main.py @@ -106,8 +106,6 @@ if args.replay: else: # custom data loader print(*args.replay) jobs = td.load_data(args.replay) - accounts = Accounts(jobs) - accounts_dict = accounts.to_dict() td.save_snapshot(jobs, filename=DIR_NAME) # Set number of timesteps based on the last job running which we assume @@ -123,12 +121,6 @@ if args.replay: else: # Synthetic jobs wl = Workload(config) jobs = getattr(wl, args.workload)(num_jobs=args.numjobs) - job_accounts = Accounts(jobs) - if args.accounts_json: - loaded_accounts = Accounts.from_json_filename(args.accounts_json) - accounts = Accounts.merge(loaded_accounts,job_accounts) - else: - accounts = job_accounts if args.verbose: for job_vector in jobs: @@ -146,7 +138,15 @@ else: # Synthetic jobs OPATH = OUTPUT_PATH / DIR_NAME print("Output directory is: ", OPATH) sc.opath = OPATH -#sc.accounts = accounts + +if args.accounts: + job_accounts = Accounts(jobs) + if args.accounts_json: + loaded_accounts = Accounts.from_json_filename(args.accounts_json) + accounts = Accounts.merge(loaded_accounts,job_accounts) + else: + accounts = job_accounts + sc.accounts = accounts if args.plot or args.output: try: @@ -248,14 +248,15 @@ if args.output: with open(OPATH / 'stats.out', 'w') as f: json.dump(engine_stats, f, indent=4) json.dump(job_stats, f, indent=4) - except: + except TypeError: # Is this the correct error code? write_dict_to_file(engine_stats, OPATH / 'stats.out') write_dict_to_file(job_stats, OPATH / 'stats.out') - try: - with open(OPATH / 'accounts.json', 'w') as f: - json_string = json.dumps(sc.accounts.to_dict()) - f.write(json_string) - except TypeError: - raise TypeError(f"{sc.accounts} could not be parsed by json.dump") + if args.accounts: + try: + with open(OPATH / 'accounts.json', 'w') as f: + json_string = json.dumps(sc.accounts.to_dict()) + f.write(json_string) + except TypeError: + raise TypeError(f"{sc.accounts} could not be parsed by json.dump") print("Output directory is: ", OPATH) # If output is enabled, the user wants this information as last output diff --git a/raps/engine.py b/raps/engine.py index fff515f..39ec7f0 100644 --- a/raps/engine.py +++ b/raps/engine.py @@ -136,7 +136,8 @@ class Engine: self.running.remove(job) self.jobs_completed += 1 job_stats = job.statistics() - #self.accounts.update_account_statistics(job_stats) + if self.accounts: + self.accounts.update_account_statistics(job_stats) self.job_history_dict.append(job_stats.__dict__) # Free the nodes via the resource manager. self.resource_manager.free_nodes_from_job(job) -- GitLab From 64fea8785a452711bc37369112b2af8e813b3697 Mon Sep 17 00:00:00 2001 From: Wes Brewer Date: Thu, 13 Feb 2025 22:08:10 -0500 Subject: [PATCH 013/388] Change args.reschedule == "poisson" to args.arrival == "poisson" --- multi-part-sim.py | 5 +++-- raps/dataloaders/adastraMI250.py | 13 ++++++------- raps/dataloaders/frontier.py | 4 ++-- raps/dataloaders/fugaku.py | 8 ++++---- raps/dataloaders/lassen.py | 27 ++++++++++++++++----------- raps/dataloaders/marconi100.py | 8 ++++---- raps/resmgr.py | 2 +- raps/telemetry.py | 5 +++-- 8 files changed, 39 insertions(+), 33 deletions(-) diff --git a/multi-part-sim.py b/multi-part-sim.py index 342d463..37a032a 100644 --- a/multi-part-sim.py +++ b/multi-part-sim.py @@ -49,13 +49,14 @@ if args.replay: job['nodes_required'] = random.randint(1, args.scale) job['requested_nodes'] = None # Setting to None triggers scheduler to assign nodes - if args.reschedule == 'poisson': + if args.arrival == 'poisson': for job in tqdm(jobs, desc="Rescheduling jobs"): partition = job['partition'] partition_config = configs[partition_names.index(partition)] job['requested_nodes'] = None job['submit_time'] = next_arrival(1 / partition_config['JOB_ARRIVAL_TIME']) - elif args.reschedule == 'submit-time': + + elif args.arrival == 'prescribed': raise NotImplementedError else: # Synthetic workload diff --git a/raps/dataloaders/adastraMI250.py b/raps/dataloaders/adastraMI250.py index 7f5c6af..1dcc95f 100644 --- a/raps/dataloaders/adastraMI250.py +++ b/raps/dataloaders/adastraMI250.py @@ -7,8 +7,8 @@ # to simulate the dataset python main.py -f /path/to/AdastaJobsMI250_15days.parquet --system adastra - # to reschedule - python main.py -f /path/to/AdastaJobsMI250_15days.parquet --system adastra --reschedule poisson + # to replay with different arrival distribution + python main.py -f /path/to/AdastaJobsMI250_15days.parquet --system adastra --arrival poisson # to fast-forward 60 days and replay for 1 day python main.py -f /path/to/AdastaJobsMI250_15days.parquet --system adastra -ff 60d -t 1d @@ -56,7 +56,7 @@ def load_data_from_df(jobs_df: pd.DataFrame, **kwargs): count_jobs_notOK = 0 config = kwargs.get('config') min_time = kwargs.get('min_time', None) - reschedule = kwargs.get('reschedule') + arrival = kwargs.get('arrival') fastforward = kwargs.get('fastforward') validate = kwargs.get('validate') jid = kwargs.get('jid', '*') @@ -157,21 +157,20 @@ def load_data_from_df(jobs_df: pd.DataFrame, **kwargs): if fastforward: time_offset -= fastforward - if reschedule == 'poisson': # Let the scheduler reschedule the jobs + if arrival == 'poisson': # Modify the arrival times of the jobs according to Poisson distribution scheduled_nodes = None time_offset = next_arrival(1/config['JOB_ARRIVAL_TIME']) else: # Prescribed replay scheduled_nodes = (jobs_df.loc[jidx, 'nodes']).tolist() if time_offset >= 0 and wall_time > 0: - #print("start_time",time_start,"\tend_time",time_end,"\twall_time",wall_time,"\tquanta wall time",gpu_trace.size * TRACE_QUANTA ) job_info = job_dict(nodes_required, name, account, cpu_trace, gpu_trace, [],[],wall_time, end_state, scheduled_nodes, time_offset, job_id, priority) jobs.append(job_info) else: - count_jobs_notOK = count_jobs_notOK + 1 + count_jobs_notOK += 1 - print("many jobs not OK !!!!!!!!!!!!!!! : ",count_jobs_notOK) + print("jobs not added: ", count_jobs_notOK) return jobs def xname_to_index(xname: str, config: dict): diff --git a/raps/dataloaders/frontier.py b/raps/dataloaders/frontier.py index a243a01..ab4d47d 100644 --- a/raps/dataloaders/frontier.py +++ b/raps/dataloaders/frontier.py @@ -61,7 +61,7 @@ def load_data_from_df(jobs_df: pd.DataFrame, jobprofile_df: pd.DataFrame, **kwar config = kwargs.get('config') encrypt_bool = kwargs.get('encrypt') fastforward = kwargs.get('fastforward') - reschedule = kwargs.get('reschedule') + arrival = kwargs.get('arrival') validate = kwargs.get('validate') jid = kwargs.get('jid', '*') @@ -153,7 +153,7 @@ def load_data_from_df(jobs_df: pd.DataFrame, jobprofile_df: pd.DataFrame, **kwar if '' in xnames: continue - if reschedule == 'poisson': # Let the scheduler reschedule the jobs + if arrival == 'poisson': # Modify the arrival times of the jobs according to Poisson distribution scheduled_nodes = None time_offset = next_arrival(1/config['JOB_ARRIVAL_TIME']) time_start = None diff --git a/raps/dataloaders/fugaku.py b/raps/dataloaders/fugaku.py index fec9125..601119f 100644 --- a/raps/dataloaders/fugaku.py +++ b/raps/dataloaders/fugaku.py @@ -7,10 +7,10 @@ Also, power in F-Data is only given at node-level. We can use node-level power by adding the --validate option. - The '--reschedule poisson' will compute submit times from Poisson distribution, instead of using + The '--arrival poisson' will compute submit times from Poisson distribution, instead of using the submit times given in F-Data. - python main.py --system fugaku -f /path/to/21_04.parquet --reschedule poisson --validate + python main.py --system fugaku -f /path/to/21_04.parquet --arrival poisson --validate """ import pandas as pd @@ -50,7 +50,7 @@ def load_data_from_df(df, **kwargs): """ encrypt_bool = kwargs.get('encrypt') fastforward = kwargs.get('fastforward') - reschedule = kwargs.get('reschedule') + arrival = kwargs.get('arrival') validate = kwargs.get('validate') jid = kwargs.get('jid', '*') config = kwargs.get('config') @@ -84,7 +84,7 @@ def load_data_from_df(df, **kwargs): #scheduled_nodes = row['nnuma'] if 'nnuma' in df.columns else 0 scheduled_nodes = None submit_time = row['adt'] if 'adt' in df.columns else min_time - if reschedule == 'poisson': # Let the scheduler reschedule the jobs + if arrival == 'poisson': # Modify the arrival times of according to Poisson distribution time_offset = next_arrival(1/config['JOB_ARRIVAL_TIME']) else: time_offset = (submit_time - min_time).total_seconds() # Compute time offset in seconds diff --git a/raps/dataloaders/lassen.py b/raps/dataloaders/lassen.py index 5aaa193..8e2c37f 100644 --- a/raps/dataloaders/lassen.py +++ b/raps/dataloaders/lassen.py @@ -19,8 +19,8 @@ Usage Instructions: # to simulate the dataset as submitted python main.py -f /path/to/LAST/Lassen-Supercomputer-Job-Dataset --system lassen - # to reschedule - python main.py -f /path/to/LAST/Lassen-Supercomputer-Job-Dataset --system lassen --reschedule poisson + # to modify the submit times of the telemetry according to Poisson distribution + python main.py -f /path/to/LAST/Lassen-Supercomputer-Job-Dataset --system lassen --arrival poisson # to fast-forward 37 days and replay for 1 day python main.py -f /path/to/LAST/Lassen-Supercomputer-Job-Dataset --system lassen -ff 37d -t 1d @@ -56,7 +56,7 @@ def load_data_from_df(allocation_df, node_df, step_df, **kwargs): """ config = kwargs.get('config') jid = kwargs.get('jid', '*') - reschedule = kwargs.get('reschedule') + arrival = kwargs.get('arrival') fastforward = kwargs.get('fastforward') verbose = kwargs.get('verbose') min_time = kwargs.get('min_time', None) @@ -64,6 +64,7 @@ def load_data_from_df(allocation_df, node_df, step_df, **kwargs): if fastforward: print(f"fast-forwarding {fastforward} seconds") + allocation_df['job_submit_time'] = pd.to_datetime(allocation_df['job_submit_time'], format='mixed', errors='coerce') allocation_df['begin_time'] = pd.to_datetime(allocation_df['begin_time'], format='mixed', errors='coerce') allocation_df['end_time'] = pd.to_datetime(allocation_df['end_time'], format='mixed', errors='coerce') @@ -121,14 +122,17 @@ def load_data_from_df(allocation_df, node_df, step_df, **kwargs): net_tx, net_rx = generate_network_sequences(ib_tx, ib_rx, samples, lambda_poisson=0.3) - if reschedule == 'poisson': # Let the scheduler reschedule the jobs + if arrival == 'poisson': # Modify the submit times according to Poisson process scheduled_nodes = None - time_offset = next_arrival(1/config['JOB_ARRIVAL_TIME']) - else: + time_submit = next_arrival(1/config['JOB_ARRIVAL_TIME']) + time_start = None # Scheduler will determine start time + else: # Prescribed replay scheduled_nodes = get_scheduled_nodes(row['allocation_id'], node_df) - time_offset = compute_time_offset(row['begin_time'], min_time) + time_submit = compute_time_offset(row['job_submit_time'], min_time) + time_start = compute_time_offset(row['begin_time'], min_time) if fastforward: - time_offset -= fastforward + time_submit -= fastforward + time_start -= fastforward if verbose: print('ib_tx, ib_rx, samples:', ib_tx, ib_rx, samples) @@ -136,7 +140,7 @@ def load_data_from_df(allocation_df, node_df, step_df, **kwargs): print('rx:', net_rx) print('scheduled_nodes:', nodes_required, scheduled_nodes) - if time_offset >= 0: + if time_submit >= 0: job_info = job_dict(nodes_required, row['hashed_user_id'], @@ -144,9 +148,10 @@ def load_data_from_df(allocation_df, node_df, step_df, **kwargs): cpu_trace, gpu_trace, net_tx, net_rx, wall_time, row['exit_status'], scheduled_nodes, - time_offset, + time_submit, job_id, - row.get('priority', 0)) + row.get('priority', 0), + time_start) job_list.append(job_info) diff --git a/raps/dataloaders/marconi100.py b/raps/dataloaders/marconi100.py index af3d1fb..65de113 100644 --- a/raps/dataloaders/marconi100.py +++ b/raps/dataloaders/marconi100.py @@ -11,8 +11,8 @@ # to simulate the dataset python main.py -f /path/to/job_table.parquet --system marconi100 - # to reschedule - python main.py -f /path/to/job_table.parquet --system marconi100 --reschedule poisson + # to replay using modified arrival times + python main.py -f /path/to/job_table.parquet --system marconi100 --arrival poisson # to fast-forward 60 days and replay for 1 day python main.py -f /path/to/job_table.parquet --system marconi100 -ff 60d -t 1d @@ -59,7 +59,7 @@ def load_data_from_df(jobs_df: pd.DataFrame, **kwargs): """ config = kwargs.get('config') min_time = kwargs.get('min_time', None) - reschedule = kwargs.get('reschedule') + arrival = kwargs.get('arrival') fastforward = kwargs.get('fastforward') validate = kwargs.get('validate') jid = kwargs.get('jid', '*') @@ -149,7 +149,7 @@ def load_data_from_df(jobs_df: pd.DataFrame, **kwargs): time_start -= fastforward time_submit -= fastforward - if reschedule == 'poisson': # Let the scheduler reschedule the jobs + if arrival == 'poisson': # Modify the arrival times according to Poisson distribution scheduled_nodes = None time_submit = next_arrival(1/config['JOB_ARRIVAL_TIME']) time_start = None diff --git a/raps/resmgr.py b/raps/resmgr.py index 8abce81..fab490f 100644 --- a/raps/resmgr.py +++ b/raps/resmgr.py @@ -21,7 +21,7 @@ class ResourceManager: if job.requested_nodes: # Telemetry replay case job.scheduled_nodes = job.requested_nodes self.available_nodes = [n for n in self.available_nodes if n not in job.scheduled_nodes] - else: # Synthetic or reschedule case + else: # Synthetic or case using modified/poisson arrival times job.scheduled_nodes = self.available_nodes[:job.nodes_required] self.available_nodes = self.available_nodes[job.nodes_required:] diff --git a/raps/telemetry.py b/raps/telemetry.py index 9d2fd83..ab4f237 100644 --- a/raps/telemetry.py +++ b/raps/telemetry.py @@ -17,7 +17,8 @@ if __name__ == "__main__": ' -or- filename.npz (overrides --workload option)') parser.add_argument('-p', '--plot', action='store_true', help='Output plots') parser.add_argument('--system', type=str, default='frontier', help='System config to use') - parser.add_argument('--reschedule', action='store_true', help='Reschedule the telemetry workload') + choices = ['prescribed', 'poisson'] + parser.add_argument('--arrival', default=choices[0], type=str, choices=choices, help=f'Modify arrival distribution ({choices[1]}) or use the original submit times ({choices[0]})') parser.add_argument('-v', '--verbose', action='store_true', help='Enable verbose output') args = parser.parse_args() @@ -84,7 +85,7 @@ if __name__ == "__main__": if args.replay[0].endswith(".npz"): print(f"Loading {args.replay[0]}...") jobs = td.load_snapshot(args.replay[0]) - if args.reschedule: + if args.arrival == "poisson": for job in tqdm(jobs, desc="Updating requested_nodes"): job['requested_nodes'] = None job['submit_time'] = next_arrival(1 / config['JOB_ARRIVAL_TIME']) -- GitLab From b3dcd88b5d8fcd94769a2810f5392a0f07cfd123 Mon Sep 17 00:00:00 2001 From: Wes Brewer Date: Fri, 14 Feb 2025 11:16:45 -0500 Subject: [PATCH 014/388] Improve documentation of different ways to modify telemetry replay, and better annotations and restructuring of args.py --- README.md | 14 ++++++++++++-- args.py | 46 +++++++++++++++++++++++++++++----------------- 2 files changed, 41 insertions(+), 19 deletions(-) diff --git a/README.md b/README.md index 3d4db55..5b953b9 100644 --- a/README.md +++ b/README.md @@ -70,9 +70,19 @@ This will simulate synthetic workloads on two partitions as defined in `config/s This will dump a .npz file with a randomized name, e.g. ac23db.npz. Let's rename this file to pm100.npz for clarity. Note: can control-C when the simulation starts. Now, this pm100.npz file can be used with `multi-part-sim.py` as follows: - python multi-part-sim.py -x setonix/* -f pm100.npz --reschedule --scale 192 + python multi-part-sim.py -x setonix/* -f pm100.npz --arrival poisson --scale 192 -The `--reschedule` flag will use the internal scheduler to determine what nodes to schedule for each job, and the `--scale` flag will specify the maximum number of nodes for each job (generally set this to the max number of nodes of the smallest partition). +## Modifications to telemetry replay + +There are three ways to modify replaying of telemetry data: + + 1. `--arrival`. Changing the arrival time distribution - replay cases will default to `--arrival prescribed`, where the jobs will be submitted exactly as they were submitted on the physical machine. This can be changed to `--arrival poisson` to change when the jobs arrive, which is especially useful in cases where there may be gaps in time, e.g., when the system goes down for several days, or the system is is underutilized. + + 2. `--policy`. Changing the way the jobs are scheduled. The `--policy` flag will be set by default to `replay` in cases where a telemetry file is provided, in which case the jobs will be scheduled according to the start times provided. Changing the `--policy` to `fcfs` or `backfill` will use the internal scheduler. + + 3. `--scale`. Changing the scale of each job in the telemetry data. The `--scale` flag will specify the maximum number of nodes for each job (generally set this to the max number of nodes of the smallest partition), and randomly select the number of nodes for each job from one to max nodes. This flag is useful when replaying telemetry from a larger system onto a smaller system. + + 4. `--shuffle`. Shuffle the jobs before playing. ## Job-level power output example for replay of single job diff --git a/args.py b/args.py index 9703a85..855e244 100644 --- a/args.py +++ b/args.py @@ -3,41 +3,53 @@ import sys from raps.schedulers.default import PolicyType parser = argparse.ArgumentParser(description='Resource Allocator & Power Simulator (RAPS)') + +# System configurations +parser.add_argument('--system', type=str, default='frontier', help='System config to use') +parser.add_argument('-x', '--partitions', nargs='+', default=None, help='List of machine configurations to use, e.g., -x setonix-cpu setonix-gpu') parser.add_argument('-c', '--cooling', action='store_true', help='Include FMU cooling model') -parser.add_argument('--start', type=str, help='ISO8061 string for start of simulation') -parser.add_argument('--end', type=str, help='ISO8061 string for end of simulation') + +# Simulation runtime options +parser.add_argument('-t', '--time', type=str, default=None, help='Length of time to simulate, e.g., 123, 123s, 27m, 3h, 7d') parser.add_argument('-d', '--debug', action='store_true', help='Enable debug mode and disable rich layout') -parser.add_argument('-e', '--encrypt', action='store_true', help='Encrypt any sensitive data in telemetry') parser.add_argument('-n', '--numjobs', type=int, default=1000, help='Number of jobs to schedule') -parser.add_argument('-t', '--time', type=str, default=None, help='Length of time to simulate, e.g., 123, 123s, 27m, 3h, 7d') -parser.add_argument('-ff', '--fastforward', type=str, default=None, help='Fast-forward by time amount (uses same units as -t)') parser.add_argument('-v', '--verbose', action='store_true', help='Enable verbose output') +choices = ['layout1', 'layout2'] +parser.add_argument('--layout', type=str, choices=choices, default=choices[0], help='Layout of UI') +parser.add_argument('--start', type=str, help='ISO8061 string for start of simulation') +parser.add_argument('--end', type=str, help='ISO8061 string for end of simulation') parser.add_argument('--seed', action='store_true', help='Set random number seed for deterministic simulation') -parser.add_argument('-f', '--replay', nargs='+', type=str, help='Either: path/to/joblive path/to/jobprofile' + \ - ' -or- filename.npz (overrides --workload option)') -choices = ['prescribed', 'poisson'] -parser.add_argument('--arrival', default=choices[0], type=str, choices=choices, help=f'Modify arrival distribution ({choices[1]}) or use the original submit times ({choices[0]})') parser.add_argument('-u', '--uncertainties', action='store_true', help='Change from floating point units to floating point units with uncertainties.' + \ ' Very expensive w.r.t simulation time!') -parser.add_argument('--jid', type=str, default='*', help='Replay job id') -parser.add_argument('--validate', action='store_true', help='Use node power instead of CPU/GPU utilizations') + +# Output options parser.add_argument('-o', '--output', action='store_true', help='Output power, cooling, and loss models for later analysis') parser.add_argument('-p', '--plot', nargs='+', choices=['power', 'loss', 'pue', 'temp', 'util'], help='Specify one or more types of plots to generate: power, loss, pue, util, temp') choices = ['png', 'svg', 'jpg', 'pdf', 'eps'] parser.add_argument('--imtype', type=str, choices=choices, default=choices[0], help='Plot image type') + +# Telemetry data +parser.add_argument('-f', '--replay', nargs='+', type=str, help='Either: path/to/joblive path/to/jobprofile' + \ + ' -or- filename.npz (overrides --workload option)') +parser.add_argument('-ff', '--fastforward', type=str, default=None, help='Fast-forward by time amount (uses same units as -t)') +parser.add_argument('-e', '--encrypt', action='store_true', help='Encrypt any sensitive data in telemetry') +parser.add_argument('--validate', action='store_true', help='Use node power instead of CPU/GPU utilizations') +parser.add_argument('--jid', type=str, default='*', help='Replay job id') parser.add_argument('--scale', type=int, default=0, help='Scale telemetry to max nodes specified in order to run telemetry on a smaller smaller target system/partition, e.g., --scale 192') -parser.add_argument('--system', type=str, default='frontier', help='System config to use') + +# Synthetic workloads +choices = ['random', 'benchmark', 'peak', 'idle'] +parser.add_argument('-w', '--workload', type=str, choices=choices, default=choices[0], help='Type of synthetic workload') + +# Scheduling options choices = ['default', 'nrel', 'anl', 'flux'] parser.add_argument('--scheduler', type=str, choices=choices, default=choices[0], help='Name of scheduler') policies = [policy.value for policy in PolicyType] +choices = ['prescribed', 'poisson'] +parser.add_argument('--arrival', default=choices[0], type=str, choices=choices, help=f'Modify arrival distribution ({choices[1]}) or use the original submit times ({choices[0]})') parser.add_argument('--policy', type=str, choices=policies, default=None, help='Schedule policy to use') -choices = ['random', 'benchmark', 'peak', 'idle'] -parser.add_argument('-w', '--workload', type=str, choices=choices, default=choices[0], help='Type of synthetic workload') -choices = ['layout1', 'layout2'] -parser.add_argument('-x', '--partitions', nargs='+', default=None, help='List of machine configurations to use, e.g., -x setonix-cpu setonix-gpu') -parser.add_argument('--layout', type=str, choices=choices, default=choices[0], help='Layout of UI') parser.add_argument('--accounts', action='store_true', help='Flag indicating if accounts should be tracked') parser.add_argument('--accounts-json', type=str, help='Json of account stats generated in previous run. see raps/accounts.py') -- GitLab From 398b8fc1b5743e2f8ea70c29e6bb1a9ed66fc46d Mon Sep 17 00:00:00 2001 From: Matthias Maiterth Date: Fri, 14 Feb 2025 15:22:23 -0500 Subject: [PATCH 015/388] Early check on available nodes compared to number of requested nodes. --- raps/engine.py | 8 ++++++-- raps/schedulers/default.py | 5 ++++- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/raps/engine.py b/raps/engine.py index 39ec7f0..57088b5 100644 --- a/raps/engine.py +++ b/raps/engine.py @@ -1,4 +1,4 @@ -from typing import Optional +from typing import Optional, List import dataclasses import pandas as pd @@ -65,7 +65,11 @@ class Engine: ) print(f"Using scheduler: {scheduler_type}") - def eligible_jobs(self, jobs_to_submit): + def eligible_jobs(self, jobs_to_submit: List): + """ + Returns list of eligible jobs and: + modifies the jobs_to_submit removing them from the passed list (Mutable)! + """ # Build a list of jobs whose submit_time is <= current_time. eligible = [job for job in jobs_to_submit if job['submit_time'] <= self.current_time] # Remove those jobs from jobs_to_submit: diff --git a/raps/schedulers/default.py b/raps/schedulers/default.py index aa6954f..4435da7 100644 --- a/raps/schedulers/default.py +++ b/raps/schedulers/default.py @@ -41,7 +41,10 @@ class Scheduler: # Make sure the requested nodes are available. nodes_available = False if job.requested_nodes: # nodes specified, i.e., telemetry replay - nodes_available = set(job.requested_nodes).issubset(set(self.resource_manager.available_nodes)) + if len(job.requested_nodes) < len(self.resource_manager.available_nodes): + nodes_available = set(job.requested_nodes).issubset(set(self.resource_manager.available_nodes)) + else: + break else: # synthetic nodes_available = len(self.resource_manager.available_nodes) >= job.nodes_required -- GitLab From cfef006e68a9434b6302ce78cfc182b4e2d5c968 Mon Sep 17 00:00:00 2001 From: Matthias Maiterth Date: Fri, 14 Feb 2025 15:32:38 -0500 Subject: [PATCH 016/388] Fixed < to <= (exact fits of nodes available should still schedule) --- raps/schedulers/default.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/raps/schedulers/default.py b/raps/schedulers/default.py index 4435da7..fc2de0e 100644 --- a/raps/schedulers/default.py +++ b/raps/schedulers/default.py @@ -30,6 +30,7 @@ class Scheduler: else: raise ValueError(f"Policy not implemented: {self.policy}") + @profile def schedule(self, queue, running, current_time, accounts=None, sorted=False, debug=False): # Sort the queue in place. if not sorted: @@ -41,7 +42,7 @@ class Scheduler: # Make sure the requested nodes are available. nodes_available = False if job.requested_nodes: # nodes specified, i.e., telemetry replay - if len(job.requested_nodes) < len(self.resource_manager.available_nodes): + if len(job.requested_nodes) <= len(self.resource_manager.available_nodes): nodes_available = set(job.requested_nodes).issubset(set(self.resource_manager.available_nodes)) else: break -- GitLab From 7ba6d4d8501e43104ca255e8cec2eb19b4298660 Mon Sep 17 00:00:00 2001 From: Matthias Maiterth Date: Fri, 14 Feb 2025 15:34:54 -0500 Subject: [PATCH 017/388] Removing @profile hint --- raps/schedulers/default.py | 1 - 1 file changed, 1 deletion(-) diff --git a/raps/schedulers/default.py b/raps/schedulers/default.py index fc2de0e..a070e4b 100644 --- a/raps/schedulers/default.py +++ b/raps/schedulers/default.py @@ -30,7 +30,6 @@ class Scheduler: else: raise ValueError(f"Policy not implemented: {self.policy}") - @profile def schedule(self, queue, running, current_time, accounts=None, sorted=False, debug=False): # Sort the queue in place. if not sorted: -- GitLab From 567ff14dc58584481ddec70fc60029f6eb8dd5e0 Mon Sep 17 00:00:00 2001 From: Matthias Maiterth Date: Fri, 14 Feb 2025 19:37:35 -0500 Subject: [PATCH 018/388] First try to implement the old replay scheduler. Both schedule versions raise errors. With the given dataset both placement checks try to schedule on unavailable nodes. This needs to be debugged before continuing and potentially merging. --- args.py | 2 +- raps/engine.py | 4 +-- raps/schedulers/replay.py | 70 +++++++++++++++++++++++++++++++++++++++ 3 files changed, 72 insertions(+), 4 deletions(-) create mode 100644 raps/schedulers/replay.py diff --git a/args.py b/args.py index 855e244..ddc2a6e 100644 --- a/args.py +++ b/args.py @@ -44,7 +44,7 @@ choices = ['random', 'benchmark', 'peak', 'idle'] parser.add_argument('-w', '--workload', type=str, choices=choices, default=choices[0], help='Type of synthetic workload') # Scheduling options -choices = ['default', 'nrel', 'anl', 'flux'] +choices = ['default', 'replay', 'nrel', 'anl', 'flux'] parser.add_argument('--scheduler', type=str, choices=choices, default=choices[0], help='Name of scheduler') policies = [policy.value for policy in PolicyType] choices = ['prescribed', 'poisson'] diff --git a/raps/engine.py b/raps/engine.py index 57088b5..8e214be 100644 --- a/raps/engine.py +++ b/raps/engine.py @@ -220,10 +220,8 @@ class Engine: # Identify eligible jobs and add them to the queue. self.queue += self.eligible_jobs(jobs_to_submit) - # Sort the queue according to the policy - self.queue = self.scheduler.sort_jobs(self.queue, self.accounts) # Schedule jobs that are now in the queue. - self.scheduler.schedule(self.queue, self.running, self.current_time, sorted=True) + self.scheduler.schedule(self.queue, self.running, self.current_time, sorted=False) # Stop the simulation if no more jobs are running or in the queue. if autoshutdown and not self.queue and not self.running and not self.replay: diff --git a/raps/schedulers/replay.py b/raps/schedulers/replay.py new file mode 100644 index 0000000..a4fc918 --- /dev/null +++ b/raps/schedulers/replay.py @@ -0,0 +1,70 @@ +from ..policy import PolicyType + + +class Scheduler: + """ + Mock Scheduler only considering start time. + There is no scheduling going on but job placement according to start time. + + Default job scheduler with various scheduling policies. + """ + + def __init__(self, config, policy, resource_manager=None): + self.config = config + self.policy = PolicyType(policy) + if resource_manager is None: + raise ValueError("Scheduler requires a ResourceManager instance") + self.resource_manager = resource_manager + self.debug = False + + def sort_jobs(self, queue, accounts=None): + """Sort jobs based on the selected scheduling policy.""" + return sorted(queue, key=lambda job: job.start_time) + +### NOTE: +# Both schdule and schedule_v2 do not work, as the resource_manager claims nodes not available. +# This needs to be fixed. + + def schedule(self, queue, running, current_time, accounts=None, sorted=False, debug=False): + #### DOES NOT WORK, Nodes are not available! in resrouce_manager.assign_nodes_to_job! + # Sort the queue in place. + if not sorted: + queue[:] = self.sort_jobs(queue, accounts) + + # Filter Jobs with start_time in this epoch + queue[:] = [job for job in queue if job.start_time <= current_time] + + # Iterate over a copy of the queue since we might remove items + for job in queue[:]: + nodes_available = set(job.requested_nodes).issubset(set(self.resource_manager.available_nodes)) + self.resource_manager.assign_nodes_to_job(job, current_time) + running.append(job) + queue.remove(job) + continue + + def schedule_v2(self, queue, running, current_time, accounts=None, sorted=False, debug=False): + #### DOES NOT WORK, Nodes are not available! + # Sort the queue in place. + if not sorted: + queue[:] = self.sort_jobs(queue, accounts) + + # Filter Jobs with start_time in this epoch + queue[:] = [job for job in queue if job.start_time <= current_time] + + for job in queue[:]: + nodes_available = False + if job.requested_nodes: # nodes specified, i.e., telemetry replay + if len(job.requested_nodes) <= len(self.resource_manager.available_nodes): + nodes_available = set(job.requested_nodes).issubset(set(self.resource_manager.available_nodes)) + else: + continue # continue instead of break, as later job with specific nodes may still be placed! + else: # synthetic + raise ValueError("No jobs requested?") + + if nodes_available: + self.resource_manager.assign_nodes_to_job(job, current_time) + running.append(job) + queue.remove(job) + else: + raise ValueError("Nodes not available!") # Jobs may be queued + pass # Try next time -- GitLab From c25b3c8f3d87823dc76c4d17b9c354a147308565 Mon Sep 17 00:00:00 2001 From: Matthias Maiterth Date: Tue, 18 Feb 2025 16:25:12 -0500 Subject: [PATCH 019/388] Added a replay scheduler, for testing and reworked Job class tick Modified Job so that they have: time_submit time_limit_ time_start time_end wall_time trace_time and current_time Modified engine: tick: introduced new prepare_timestep function consistig of two tasks from tick: 1. moved newly downed nodes out of tick 2. moved completed nodes out of tick prepare_timestep is now called at the beginning of each run_simulation loop Added the replay scheduler Modified the default scheduler to be able to use the replay policy, matching the replay scheduler functionality Finally modified the workloads.py to match the new job description. Note this is not yet fully tested, this breaks everything but frontier telemetry. Next: Testing, and fixing the related issues! --- main.py | 4 +-- raps/dataloaders/frontier.py | 58 +++++++++++++++++++++++------- raps/engine.py | 68 ++++++++++++++++++++++++++---------- raps/job.py | 41 +++++++++++++++------- raps/resmgr.py | 3 +- raps/schedulers/default.py | 37 +++++++++++++++----- raps/schedulers/replay.py | 31 +++------------- raps/telemetry.py | 2 +- raps/workload.py | 60 +++++++++++++++++++------------ 9 files changed, 198 insertions(+), 106 deletions(-) diff --git a/main.py b/main.py index fad83a1..cd0b3c4 100644 --- a/main.py +++ b/main.py @@ -113,7 +113,7 @@ if args.replay: if args.time: timesteps = convert_to_seconds(args.time) else: - timesteps = int(max(job['wall_time'] + job['submit_time'] for job in jobs)) + 1 + timesteps = int(max(job['wall_time'] + job['start_time'] for job in jobs)) + 1 print(f'Simulating {len(jobs)} jobs for {timesteps} seconds') time.sleep(1) @@ -124,7 +124,7 @@ else: # Synthetic jobs if args.verbose: for job_vector in jobs: - job = Job(job_vector, 0) + job = Job(job_vector, 0) # What does 0 stand for here? print('jobid:', job.id, '\tlen(gpu_trace):', len(job.gpu_trace), '\twall_time(s):', job.wall_time) time.sleep(2) diff --git a/raps/dataloaders/frontier.py b/raps/dataloaders/frontier.py index ab4d47d..9214fa8 100644 --- a/raps/dataloaders/frontier.py +++ b/raps/dataloaders/frontier.py @@ -67,6 +67,8 @@ def load_data_from_df(jobs_df: pd.DataFrame, jobprofile_df: pd.DataFrame, **kwar if fastforward: print(f"fast-forwarding {fastforward} seconds") + else: + fastforward = 0 min_time = kwargs.get('min_time', None) @@ -87,7 +89,10 @@ def load_data_from_df(jobs_df: pd.DataFrame, jobprofile_df: pd.DataFrame, **kwar if min_time: time_zero = min_time else: - time_zero = jobs_df['time_snapshot'].min() + time_zero = jobs_df['time_snapshot'].min() # Earliets time snapshot within the day! + first_start_time = jobs_df['time_start'].min() + diff = time_zero - first_start_time # Check if fast forward makes sense! + fastforward += diff.total_seconds() num_jobs = len(jobs_df) print("time_zero:", time_zero, "num_jobs", num_jobs) @@ -134,19 +139,41 @@ def load_data_from_df(jobs_df: pd.DataFrame, jobprofile_df: pd.DataFrame, **kwar cpu_trace[np.isnan(cpu_trace)] = 0 gpu_trace[np.isnan(gpu_trace)] = 0 - wall_time = gpu_trace.size * config['TRACE_QUANTA'] # seconds - time_submit = jobs_df.loc[jidx, 'time_submission'] - diff = time_submit - time_zero - time_submit = max(diff.total_seconds(), 0) + time_submit_timestamp = jobs_df.loc[jidx, 'time_submission'] + diff = time_submit_timestamp - time_zero + # time_submit = max(diff.total_seconds(), 0) + time_submit = diff.total_seconds() + + time_limit = jobs_df.loc[jidx, 'time_limit'] # timelimit in seconds + + time_start_timestamp = jobs_df.loc[jidx, 'time_start'] + diff = time_start_timestamp - time_zero + # time_start = max(diff.total_seconds(), 0) + time_start = diff.total_seconds() + + time_end_timestamp = jobs_df.loc[jidx, 'time_end'] + diff = time_end_timestamp - time_zero + time_end = diff.total_seconds() + + wall_time = time_end - time_start + if np.isnan(wall_time): + wall_time = 0 + + trace_time = gpu_trace.size * config['TRACE_QUANTA'] # seconds + if wall_time > trace_time: + missing_steps = int(wall_time - trace_time) + cpu_trace = np.concatenate((cpu_trace,np.array([cpu_min_power] * missing_steps))) + gpu_trace = np.concatenate((cpu_trace,np.array([cpu_min_power] * missing_steps))) + wall_time = trace_time # Pretending to have a full trace + print(f"Job: {job_id} extended {missing_steps} Values with idle power!") + #raise ValueError(f"Job: {job_id} {wall_time} > {trace_time}") - time_start = jobs_df.loc[jidx, 'time_start'] - diff = time_start - time_zero - time_start = max(diff.total_seconds(), 0) if fastforward: - time_start -= fastforward time_submit -= fastforward + time_start -= fastforward + time_end -= fastforward xnames = jobs_df.loc[jidx, 'xnames'] # Don't replay any job with an empty set of xnames @@ -155,8 +182,9 @@ def load_data_from_df(jobs_df: pd.DataFrame, jobprofile_df: pd.DataFrame, **kwar if arrival == 'poisson': # Modify the arrival times of the jobs according to Poisson distribution scheduled_nodes = None - time_offset = next_arrival(1/config['JOB_ARRIVAL_TIME']) - time_start = None + time_offset = next_arrival(1 / config['JOB_ARRIVAL_TIME']) + time_start = None # ? + time_end = None # ? priority = aging_boost(nodes_required) else: # Prescribed replay @@ -170,8 +198,12 @@ def load_data_from_df(jobs_df: pd.DataFrame, jobprofile_df: pd.DataFrame, **kwar print("ignoring job b/c zero trace:", jidx, time_submit, time_start, nodes_required) if gpu_trace.size > 0 and (jid == job_id or jid == '*') and time_submit >= 0: - job_info = job_dict(nodes_required, name, account, cpu_trace, gpu_trace, [], [], wall_time, - end_state, scheduled_nodes, time_submit, job_id, priority, time_start) + job_info = job_dict(nodes_required, name, account, cpu_trace, gpu_trace, [], [], + end_state, scheduled_nodes, + job_id, priority, # partition missing + submit_time=time_submit, time_limit=time_limit, + start_time=time_start, end_time=time_end, + wall_time=wall_time, trace_time=trace_time) jobs.append(job_info) return jobs diff --git a/raps/engine.py b/raps/engine.py index 8e214be..54e136e 100644 --- a/raps/engine.py +++ b/raps/engine.py @@ -5,6 +5,7 @@ import pandas as pd import sys from .job import Job, JobState +from .policy import PolicyType from .network import network_utilization from .utils import summarize_ranges, expand_ranges, get_utilization from .utils import sum_values, min_value, max_value @@ -77,18 +78,30 @@ class Engine: # Convert them to Job instances and build list of eligible jobs. eligible_jobs_list = [] for job_data in eligible: - job_instance = Job(job_data, self.current_time) + job_instance = Job(job_data, self.current_time) # current_time is not used in Job() eligible_jobs_list.append(job_instance) return eligible_jobs_list - def tick(self): - """Simulate a timestep.""" + def prepare_timestep(self, replay:bool = True): completed_jobs = [job for job in self.running if job.end_time is not None and job.end_time <= self.current_time] - # Simulate node failure - newly_downed_nodes = self.resource_manager.node_failure(self.config['MTBF']) - for node in newly_downed_nodes: - self.power_manager.set_idle(node) + for job in completed_jobs: + self.running.remove(job) + self.jobs_completed += 1 + job_stats = job.statistics() + if self.accounts: + self.accounts.update_account_statistics(job_stats) + self.job_history_dict.append(job_stats.__dict__) + # Free the nodes via the resource manager. + self.resource_manager.free_nodes_from_job(job) + + if not replay: + # Simulate node failure + newly_downed_nodes = self.resource_manager.node_failure(self.config['MTBF']) + for node in newly_downed_nodes: + self.power_manager.set_idle(node) + else: + newly_downed_nodes = [] # Update active/free nodes self.num_free_nodes = len(self.resource_manager.available_nodes) @@ -96,17 +109,29 @@ class Engine: - len(self.resource_manager.available_nodes) \ - len(self.resource_manager.down_nodes) + + return completed_jobs, newly_downed_nodes + + + def tick(self): + """Simulate a timestep.""" + # Update running time for all running jobs scheduled_nodes = [] cpu_utils = [] gpu_utils = [] net_utils = [] for job in self.running: - if job.end_time == self.current_time: + if job.end_time <= self.current_time: job.state = JobState.COMPLETED if job.state == JobState.RUNNING: job.running_time = self.current_time - job.start_time + if job.running_time > job.trace_time: + raise ValueError(f"Trace Ended before job ended!\n\ + {job.running_time} > {job.trace_time}\n\ + {len(job.cpu_trace)} vs. {self.running_time // self.config['TRACE_QUANTA']}\ + ") time_quanta_index = (self.current_time - job.start_time) // self.config['TRACE_QUANTA'] cpu_util = get_utilization(job.cpu_trace, time_quanta_index) gpu_util = get_utilization(job.gpu_trace, time_quanta_index) @@ -136,15 +161,6 @@ class Engine: job.power_history.append(jobs_power[i] * len(job.scheduled_nodes)) del _running_jobs - for job in completed_jobs: - self.running.remove(job) - self.jobs_completed += 1 - job_stats = job.statistics() - if self.accounts: - self.accounts.update_account_statistics(job_stats) - self.job_history_dict.append(job_stats.__dict__) - # Free the nodes via the resource manager. - self.resource_manager.free_nodes_from_job(job) # Update the power array UI component rack_power, rect_losses = self.power_manager.compute_rack_power() @@ -192,7 +208,7 @@ class Engine: tick_data = TickData( current_time=self.current_time, - completed=completed_jobs, + completed=None, running=self.running, queue=self.queue, down_nodes=expand_ranges(self.down_nodes[1:]), @@ -216,7 +232,17 @@ class Engine: # Sort pending jobs by submit_time. jobs_to_submit = sorted(jobs, key=lambda j: j['submit_time']) + # Missing prepareation: + # Remove Jobs that have already ended. + # Place jobs that are currently running. + + if self.scheduler.policy == PolicyType.REPLAY: + replay = True + else: + replay = False + for timestep in range(timesteps): + completed_jobs, newly_downed_nodes = self.prepare_timestep(replay) # Identify eligible jobs and add them to the queue. self.queue += self.eligible_jobs(jobs_to_submit) @@ -231,7 +257,11 @@ class Engine: if self.debug and timestep % self.config['UI_UPDATE_FREQ'] == 0: print(".", end="", flush=True) - yield self.tick() + + + tick_data = self.tick() + tick_data.completed = completed_jobs + yield tick_data def get_job_history_dict(self): return self.job_history_dict diff --git a/raps/job.py b/raps/job.py index 35ff32d..d1524d7 100644 --- a/raps/job.py +++ b/raps/job.py @@ -1,7 +1,10 @@ from enum import Enum -def job_dict(nodes_required, name, account, cpu_trace, gpu_trace, ntx_trace, nrx_trace, \ - wall_time, end_state, scheduled_nodes, time_offset, job_id, priority=0, partition=0, start_time=0): +def job_dict(nodes_required, name, account, \ + cpu_trace, gpu_trace, ntx_trace, nrx_trace, \ + end_state, scheduled_nodes, job_id, priority=0, partition=0, + submit_time=0, time_limit=0, start_time=0, end_time=0, + wall_time=0, trace_time=0): """ Return job info dictionary """ return { 'nodes_required': nodes_required, @@ -11,14 +14,18 @@ def job_dict(nodes_required, name, account, cpu_trace, gpu_trace, ntx_trace, nrx 'gpu_trace': gpu_trace, 'ntx_trace': ntx_trace, 'nrx_trace': nrx_trace, - 'wall_time': wall_time, 'end_state': end_state, 'requested_nodes': scheduled_nodes, - 'submit_time': time_offset, 'id': job_id, 'priority': priority, 'partition': partition, - 'start_time': start_time + # Times: + 'submit_time': submit_time, + 'time_limit': time_limit, + 'start_time': start_time, + 'end_time': end_time, + 'wall_time': wall_time, + 'trace_time': trace_time } @@ -36,22 +43,29 @@ class Job: """Represents a job to be scheduled and executed in the distributed computing system. Each job consists of various attributes such as the number of nodes required for execution, - CPU and GPU utilization, wall time, and other relevant parameters (see utils.job_dict). + CPU and GPU utilization, trace time, and other relevant parameters (see utils.job_dict). The job can transition through different states during its lifecycle, including PENDING, RUNNING, COMPLETED, CANCELLED, FAILED, or TIMEOUT. """ _id_counter = 0 def __init__(self, job_dict, current_time, state=JobState.PENDING, account=None): + # # current_time unused! # Initializations: - self.start_time = None - self.end_time = None - self.running_time = 0 self.power = 0 self.scheduled_nodes = [] self.power_history = [] self._state = state self.account = account + # Times: + self.submit_time = None # Actual submit time + self.time_limit = None # Time limit set at submission + self.start_time = None # Actual start time when executing or from telemetry + self.end_time = None # Actual end time when executing or from telemetry + self.wall_time = None # end_time - start_time + self.trace_time = None # Time period for which traces are available + self.running_time = 0 # Current running time updated when simulating + # If a job dict was given, override the values from the job_dict: for key, value in job_dict.items(): setattr(self, key, value) @@ -63,10 +77,13 @@ class Job: """Return a string representation of the job.""" return (f"Job(id={self.id}, name={self.name}, account={self.account}, " f"nodes_required={self.nodes_required}, " - f"cpu_trace={self.cpu_trace}, gpu_trace={self.gpu_trace}, wall_time={self.wall_time}, " + f"cpu_trace={self.cpu_trace}, gpu_trace={self.gpu_trace}, " f"end_state={self.end_state}, requested_nodes={self.requested_nodes}, " - f"submit_time={self.submit_time}, start_time={self.start_time}, " - f"end_time={self.end_time}, running_time={self.running_time}, state={self._state}, " + f"submit_time={self.submit_time}, time_limit={self.time_limit}, " + f"start_time={self.start_time}, end_time={self.end_time}, " + f"wall_time={self.wall_time}, " + f"trace_time={self.trace_time}, " + f"running_time={self.running_time}, state={self._state}, " f"scheduled_nodes={self.scheduled_nodes}, power={self.power}, " f"power_history={self.power_history})") diff --git a/raps/resmgr.py b/raps/resmgr.py index fab490f..ee3caef 100644 --- a/raps/resmgr.py +++ b/raps/resmgr.py @@ -25,7 +25,8 @@ class ResourceManager: job.scheduled_nodes = self.available_nodes[:job.nodes_required] self.available_nodes = self.available_nodes[job.nodes_required:] - # Set job start and end times + # Set job start and end times according to simulation + # This overrides actual times from telemetry and set state to 'running' job.start_time = current_time job.end_time = current_time + job.wall_time job.state = JobState.RUNNING # Mark job as running diff --git a/raps/schedulers/default.py b/raps/schedulers/default.py index a070e4b..b930d49 100644 --- a/raps/schedulers/default.py +++ b/raps/schedulers/default.py @@ -37,21 +37,39 @@ class Scheduler: # Iterate over a copy of the queue since we might remove items for job in queue[:]: - + if self.policy == PolicyType.REPLAY: + if job.start_time >= current_time: + continue + else: + pass + else: + pass # Make sure the requested nodes are available. nodes_available = False if job.requested_nodes: # nodes specified, i.e., telemetry replay if len(job.requested_nodes) <= len(self.resource_manager.available_nodes): - nodes_available = set(job.requested_nodes).issubset(set(self.resource_manager.available_nodes)) + if self.policy == PolicyType.REPLAY: # Check if exact set is available: + nodes_available = set(job.requested_nodes).issubset(set(self.resource_manager.available_nodes)) + else: + # Sufficiently large number of nodes available + # but no exact set is required! + nodes_available = True + # remove the request for specific nodes and ask for n nodes + job.nodes_required = len(job.requested_nodes) + job.requested_nodes = [] else: - break - else: # synthetic + # Next we check if we continue or abort. + # This may be policy dependent. I break by default but this may not be correct. + if self.policy == PolicyType.FCFS or \ + self.policy == PolicyType.PRIORITY or \ + self.policy == PolicyType.FUGAKU_PTS: # self.policy == PolicyType ?? + break # The job at the front of the queue doesnt fit, wait until it fits. + elif self.policy == PolicyType.REPLAY: + continue # The job at the front of the queue doesn't fit, but there are other jobs that may fit, look at the next one. + else: + raise NotImplementedError("Depending on the Policy this choice should be explicit. Add the implementation above!") + else: # synthetic jobs dont have nodes assigned: nodes_available = len(self.resource_manager.available_nodes) >= job.nodes_required - - if self.policy == PolicyType.REPLAY and current_time < job.start_time: - # Don't start replay jobs until they reach their start_time - nodes_available = False - if nodes_available: self.resource_manager.assign_nodes_to_job(job, current_time) running.append(job) @@ -60,6 +78,7 @@ class Scheduler: scheduled_nodes = summarize_ranges(job.scheduled_nodes) print(f"t={current_time}: Scheduled job {job.id} with wall time {job.wall_time} on nodes {scheduled_nodes}") else: + # not sure if this does what it should! if self.policy == PolicyType.BACKFILL: # Try to find a backfill candidate from the entire queue. backfill_job = self.find_backfill_job(queue, len(self.resource_manager.available_nodes), current_time) diff --git a/raps/schedulers/replay.py b/raps/schedulers/replay.py index a4fc918..d02a1c5 100644 --- a/raps/schedulers/replay.py +++ b/raps/schedulers/replay.py @@ -21,37 +21,16 @@ class Scheduler: """Sort jobs based on the selected scheduling policy.""" return sorted(queue, key=lambda job: job.start_time) -### NOTE: -# Both schdule and schedule_v2 do not work, as the resource_manager claims nodes not available. -# This needs to be fixed. - def schedule(self, queue, running, current_time, accounts=None, sorted=False, debug=False): - #### DOES NOT WORK, Nodes are not available! in resrouce_manager.assign_nodes_to_job! # Sort the queue in place. if not sorted: queue[:] = self.sort_jobs(queue, accounts) - # Filter Jobs with start_time in this epoch - queue[:] = [job for job in queue if job.start_time <= current_time] - - # Iterate over a copy of the queue since we might remove items for job in queue[:]: - nodes_available = set(job.requested_nodes).issubset(set(self.resource_manager.available_nodes)) - self.resource_manager.assign_nodes_to_job(job, current_time) - running.append(job) - queue.remove(job) - continue - - def schedule_v2(self, queue, running, current_time, accounts=None, sorted=False, debug=False): - #### DOES NOT WORK, Nodes are not available! - # Sort the queue in place. - if not sorted: - queue[:] = self.sort_jobs(queue, accounts) + # Skip jobs in queue with start time in the future + if job.start_time >= current_time: + continue - # Filter Jobs with start_time in this epoch - queue[:] = [job for job in queue if job.start_time <= current_time] - - for job in queue[:]: nodes_available = False if job.requested_nodes: # nodes specified, i.e., telemetry replay if len(job.requested_nodes) <= len(self.resource_manager.available_nodes): @@ -66,5 +45,5 @@ class Scheduler: running.append(job) queue.remove(job) else: - raise ValueError("Nodes not available!") # Jobs may be queued - pass # Try next time + # This is a replay so this should not happen + raise ValueError(f"Nodes not available!\nRequested:{job.requested_nodes}\nAvailable:{self.resource_manager.available_nodes}\n{job.__dict__}") diff --git a/raps/telemetry.py b/raps/telemetry.py index ab4f237..da99d64 100644 --- a/raps/telemetry.py +++ b/raps/telemetry.py @@ -100,7 +100,7 @@ if __name__ == "__main__": submit_times = [] last = 0 for job_vector in jobs: - job = Job(job_vector, 0) + job = Job(job_vector, 0) # current_time is never used in Job() wt_list.append(job.wall_time) nr_list.append(job.nodes_required) submit_times.append(job.submit_time) diff --git a/raps/workload.py b/raps/workload.py index c5dc898..d4d9c54 100644 --- a/raps/workload.py +++ b/raps/workload.py @@ -74,6 +74,7 @@ class Workload: mu = (config['MAX_WALL_TIME'] + config['MIN_WALL_TIME']) / 2 sigma = (config['MAX_WALL_TIME'] - config['MIN_WALL_TIME']) / 6 wall_time = truncated_normalvariate(mu, sigma, config['MIN_WALL_TIME'], config['MAX_WALL_TIME']) // 3600 * 3600 + time_limit = truncated_normalvariate(mu, sigma, wall_time, config['MAX_WALL_TIME']) // 3600 * 3600 end_state = determine_state(config['JOB_END_PROBS']) cpu_trace, gpu_trace = self.compute_traces(cpu_util, gpu_util, wall_time, config['TRACE_QUANTA']) priority = random.randint(0, MAX_PRIORITY) @@ -83,7 +84,8 @@ class Workload: time_to_next_job = next_arrival(1 / config['JOB_ARRIVAL_TIME']) jobs.append(job_dict(nodes_required, name, account, cpu_trace, gpu_trace, net_tx, net_rx, \ - wall_time, end_state, None, time_to_next_job, None, priority, partition)) + end_state, None, job_index, priority, partition, + time_to_next_job, time_limit, time_to_next_job, time_to_next_job + wall_time, wall_time, wall_time)) return jobs @@ -116,13 +118,17 @@ class Workload: gpu_trace, # GPU trace net_tx, # Network transmit trace net_rx, # Network receive trace - len(gpu_trace) * config['TRACE_QUANTA'], # Wall time 'COMPLETED', # End state - None, # Scheduled nodes - 0, # Time to next job + None, # Requested Nodes ?! This needs to be fixed! job_dict and Job class are inconsitent Job() None, # Job ID 100, # Priority - partition # Partition name + partition, # Partition name + 0, # Submit time + len(gpu_trace) * config['TRACE_QUANTA'] + 1, # Time limit + 0, # Start time / or None + len(gpu_trace) * config['TRACE_QUANTA'], # End time / or None + len(gpu_trace) * config['TRACE_QUANTA'], # Wall time + len(gpu_trace) * config['TRACE_QUANTA'] # Trace time ) print(job_info) jobs.append(job_info) # Add job to the list @@ -148,19 +154,23 @@ class Workload: # Create job info for this partition job_info = job_dict( config['AVAILABLE_NODES'], # Nodes required - f"Idle Test {partition}", # Name with partition label + f"Idle Test {partition}", # Name with partition label ACCT_NAMES[0], # User account cpu_trace, # CPU trace gpu_trace, # GPU trace net_tx, # Network transmit trace net_rx, # Network receive trace - len(gpu_trace) * config['TRACE_QUANTA'], # Wall time 'COMPLETED', # End state - None, # Scheduled nodes - 0, # Time to next job + None, # Requested Nodes ?! This needs to be fixed! job_dict and Job class are inconsitent Job() None, # Job ID 100, # Priority - partition # Partition name + partition, # Partition name + 0, # Submit time + len(gpu_trace) * config['TRACE_QUANTA'] + 1, # Time limit + 0, # Start time / or None + len(gpu_trace) * config['TRACE_QUANTA'], # End time / or None + len(gpu_trace) * config['TRACE_QUANTA'], # Wall time + len(gpu_trace) * config['TRACE_QUANTA'] # Trace time ) jobs.append(job_info) # Add job to the list @@ -182,11 +192,13 @@ class Workload: # Max test cpu_util, gpu_util = 1, 4 cpu_trace, gpu_trace = self.compute_traces(cpu_util, gpu_util, 10800, config['TRACE_QUANTA']) + job_info = job_dict( config['AVAILABLE_NODES'], - f"Max Test {partition}", account, - cpu_trace, gpu_trace, net_tx, net_rx, - len(gpu_trace) * config['TRACE_QUANTA'], 'COMPLETED', None, 100, None, 0, partition + f"Max Test {partition}", account, cpu_trace, gpu_trace, net_tx, net_rx, + 'COMPLETED', None, None, 100, partition, + 0, len(gpu_trace) * config['TRACE_QUANTA'] + 1, + 0, 10800, len(gpu_trace) * config['TRACE_QUANTA'], len(gpu_trace) * config['TRACE_QUANTA'] ) jobs.append(job_info) @@ -195,9 +207,10 @@ class Workload: cpu_trace, gpu_trace = self.compute_traces(cpu_util, gpu_util, 3600, config['TRACE_QUANTA']) job_info = job_dict( config['AVAILABLE_NODES'], - f"OpenMxP {partition}", account, - cpu_trace, gpu_trace, net_tx, net_rx, - len(gpu_trace) * config['TRACE_QUANTA'], 'COMPLETED', None, 300, None, 0, partition + f"OpenMxP {partition}", account, cpu_trace, gpu_trace, net_tx, net_rx, + 'COMPLETED', None, None, 100, partition, + 0, len(gpu_trace) * config['TRACE_QUANTA'] + 1, + 10800, 14200, len(gpu_trace) * config['TRACE_QUANTA'], len(gpu_trace) * config['TRACE_QUANTA'] ) jobs.append(job_info) @@ -206,9 +219,10 @@ class Workload: cpu_trace, gpu_trace = self.compute_traces(cpu_util, gpu_util, 3600, config['TRACE_QUANTA']) job_info = job_dict( config['AVAILABLE_NODES'], - f"HPL {partition}", account, - cpu_trace, gpu_trace, net_tx, net_rx, - len(gpu_trace) * config['TRACE_QUANTA'], 'COMPLETED', None, 200, None, 0, partition + f"HPL {partition}", account, cpu_trace, gpu_trace, net_tx, net_rx, + 'COMPLETED', None, None, 100, partition, + 0, len(gpu_trace) * config['TRACE_QUANTA'] + 1, + 14200, 17800, len(gpu_trace) * config['TRACE_QUANTA'], len(gpu_trace) * config['TRACE_QUANTA'] ) jobs.append(job_info) @@ -217,11 +231,11 @@ class Workload: cpu_trace, gpu_trace = self.compute_traces(cpu_util, gpu_util, 3600, config['TRACE_QUANTA']) job_info = job_dict( config['AVAILABLE_NODES'], - f"Idle Test {partition}", account, - cpu_trace, gpu_trace, net_tx, net_rx, - len(gpu_trace) * config['TRACE_QUANTA'], 'COMPLETED', None, 0, None, 0, partition + f"Idle Test {partition}", account, cpu_trace, gpu_trace, net_tx, net_rx, + 'COMPLETED', None, None, 100, partition, + 0, len(gpu_trace) * config['TRACE_QUANTA'] + 1, + 17800, 21400, len(gpu_trace) * config['TRACE_QUANTA'], len(gpu_trace) * config['TRACE_QUANTA'] ) jobs.append(job_info) return jobs - -- GitLab From 111fbc40699a431106de4fe6ca46be2072db9c69 Mon Sep 17 00:00:00 2001 From: Matthias Maiterth Date: Thu, 20 Feb 2025 16:03:08 -0500 Subject: [PATCH 020/388] Reworked simulation loop. Added start and end of the simulation The simulation now has a start time and an end time Everything runs related to the reference time zero. The simulation loop and jobs are adjusted accordingly. Additionally, a setup stage is added to prepare the running queue. The data loader is rewritten to adjust the job's related times. This is reflected in the frontier data loader with extensive comments on the time layout. (See below) ------ This is explanation on the frontier data loader: These form the maximum extent of the simuluation time. telemetry_start and telemetry_end. [ ] ^ ^ telemetry_start telemetry_end These values form the maximum extent of the simulation. Telemetry start == 0! This means that any time before that is negative, while anything after this is positive. Next is the actual extent of the simulation: [ ] ^ ^ simulation_start simulation_end The start of the simulation simulation_start and telemetry_start are only the same when fastfoward is 0. In general simulation_end and telemetry_end are the same, as this is the last time step we can simulate. Both simulation_start and _end are set in engine.py Additionally, jobs can have started before telemetry_start, And can have a recorded ending after simulation_end, [ ] ^ ^ first_start_timestamp last_end_timestamp This means that the time between first_start_timestamp and telemetry_start has no associated values in the traces! The missing values after simulation_end can be ignored, as the simulatuion will have stoped before. However, the times before telemetry_start have to be padded to generate correct offsets within their data! Within the simulation a job's current time is specified as the difference between its start_time and the current timestep of the simulation. With this each job's - submit_time - time_limit - start_time - end_time - wall_time (end_time-start_time, actual runtime in seconds) - trace_time (lenght of each trace in seconds) has to be set for use within the simulation --- main.py | 20 ++-- raps/dataloaders/frontier.py | 186 +++++++++++++++++++++++++---------- raps/engine.py | 84 +++++++++++----- raps/job.py | 22 +++-- raps/policy.py | 2 + raps/schedulers/default.py | 47 ++++++++- raps/schedulers/replay.py | 9 +- raps/telemetry.py | 2 +- raps/ui.py | 8 +- raps/workload.py | 7 +- 10 files changed, 281 insertions(+), 106 deletions(-) diff --git a/main.py b/main.py index cd0b3c4..c34b78c 100644 --- a/main.py +++ b/main.py @@ -75,6 +75,7 @@ if args.replay: if args.fastforward: args.fastforward = convert_to_seconds(args.fastforward) + timestep_start = args.fastforward td = Telemetry(**args_dict) @@ -105,18 +106,16 @@ if args.replay: else: # custom data loader print(*args.replay) - jobs = td.load_data(args.replay) + jobs, timestep_start, timestep_end = td.load_data(args.replay) td.save_snapshot(jobs, filename=DIR_NAME) # Set number of timesteps based on the last job running which we assume # is the maximum value of submit_time + wall_time of all the jobs if args.time: - timesteps = convert_to_seconds(args.time) - else: - timesteps = int(max(job['wall_time'] + job['start_time'] for job in jobs)) + 1 + timestep_end = convert_to_seconds(args.time) + elif not timestep_end: + timestep_end = int(max(job['wall_time'] + job['start_time'] for job in jobs)) + 1 - print(f'Simulating {len(jobs)} jobs for {timesteps} seconds') - time.sleep(1) else: # Synthetic jobs wl = Workload(config) @@ -124,14 +123,14 @@ else: # Synthetic jobs if args.verbose: for job_vector in jobs: - job = Job(job_vector, 0) # What does 0 stand for here? + job = Job(job_vector) print('jobid:', job.id, '\tlen(gpu_trace):', len(job.gpu_trace), '\twall_time(s):', job.wall_time) time.sleep(2) if args.time: - timesteps = convert_to_seconds(args.time) + timestep_end = convert_to_seconds(args.time) else: - timesteps = 88200 # 24 hours + timestep_end = 88200 # 24 hours DIR_NAME = create_casename() @@ -157,7 +156,8 @@ if args.plot or args.output: if args.verbose: print(jobs) -layout_manager.run(jobs, timesteps=timesteps) +print(f'Simulating {len(jobs)} jobs for {timestep_end - timestep_start} seconds') +layout_manager.run(jobs, timestep_start=timestep_start, timestep_end=timestep_end) engine_stats = get_engine_stats(sc) job_stats = get_job_stats(sc) diff --git a/raps/dataloaders/frontier.py b/raps/dataloaders/frontier.py index 9214fa8..61fcc5f 100644 --- a/raps/dataloaders/frontier.py +++ b/raps/dataloaders/frontier.py @@ -57,20 +57,78 @@ def load_data_from_df(jobs_df: pd.DataFrame, jobprofile_df: pd.DataFrame, **kwar ------- list The list of parsed jobs. + + telemetry_start + the first timestep in which the simulation be executed. + + telemetry_end + the last timestep in which the simulation can be executed. + ---- + Explanation regarding times: + + The loaded dataframe contains + a first timestamp with associated data + and a last timestamp with associated data + + These form the maximum extent of the simuluation time. + telemetry_start and telemetry_end. + + [ ] + ^ ^ + telemetry_start telemetry_end + + These values form the maximum extent of the simulation. + Telemetry start == 0! This means that any time before that is negative, + while anything after this is positive. + Next is the actual extent of the simulation: + + [ ] + ^ ^ + simulation_start simulation_end + + The start of the simulation simulation_start and telemetry_start are only + the same when fastfoward is 0. + In general simulation_end and telemetry_end are the same, as this is the + last time step we can simulate. + Both simulation_start and _end are set in engine.py + + Additionally, jobs can have started before telemetry_start, + And can have a recorded ending after simulation_end, + [ ] + ^ ^ + first_start_timestamp last_end_timestamp + + This means that the time between first_start_timestamp and telemetry_start + has no associated values in the traces! + The missing values after simulation_end can be ignored, as the simulatuion will have stoped before. + + However, the times before telemetry_start have to be padded to generate + correct offsets within their data! + Within the simulation a job's current time is specified as the difference + between its start_time and the current timestep of the simulation. + + With this each job's + - submit_time + - time_limit + - start_time + - end_time + - wall_time (end_time-start_time, actual runtime in seconds) + - trace_time (lenght of each trace in seconds) + has to be set for use within the simulation + + The returned values are these three: + - The list of parsed jobs. (as a job_dict) + - telemetry_start: int (in seconds) + - telemetry_end: int (in seconds) + + The implementation follows: """ config = kwargs.get('config') encrypt_bool = kwargs.get('encrypt') - fastforward = kwargs.get('fastforward') arrival = kwargs.get('arrival') validate = kwargs.get('validate') jid = kwargs.get('jid', '*') - - if fastforward: - print(f"fast-forwarding {fastforward} seconds") - else: - fastforward = 0 - - min_time = kwargs.get('min_time', None) + debug = kwargs.get('debug') # Sort jobs dataframe based on values in time_start column, adjust indices after sorting jobs_df = jobs_df[jobs_df['time_start'].notna()] @@ -85,17 +143,27 @@ def load_data_from_df(jobs_df: pd.DataFrame, jobprofile_df: pd.DataFrame, **kwar jobprofile_df = jobprofile_df.sort_values(by='timestamp') jobprofile_df = jobprofile_df.reset_index(drop=True) - # Take earliest time as baseline reference - if min_time: - time_zero = min_time - else: - time_zero = jobs_df['time_snapshot'].min() # Earliets time snapshot within the day! - first_start_time = jobs_df['time_start'].min() - diff = time_zero - first_start_time # Check if fast forward makes sense! - fastforward += diff.total_seconds() + #telemetry_start_timestamp = jobs_df['time_snapshot'].min() # Earliets time snapshot within the day! + telemetry_start_timestamp = jobprofile_df['timestamp'].min() # Earliets time snapshot within the day! + #telemetry_end_timestamp = jobs_df['time_snapshot'].max() # This time has nothing to do with the jobs! + telemetry_end_timestamp = jobprofile_df['timestamp'].max() # Earliets time snapshot within the day! + + # Time that can be simulated # Take earliest time as baseline reference + telemetry_start = 0 # second 0 of the simulation + diff = telemetry_end_timestamp - telemetry_start_timestamp + telemetry_end = int(diff.total_seconds()) + + first_start_timestamp = jobs_df['time_start'].min() + diff = first_start_timestamp - telemetry_start_timestamp + first_start = int(diff.total_seconds()) # negative seconds or 0 + num_jobs = len(jobs_df) - print("time_zero:", time_zero, "num_jobs", num_jobs) + if debug: + print("num_jobs:", num_jobs) + print("telemetry_start:", telemetry_start, "simulation_fin", telemetry_end) + print("telemetry_start_timestamp:", telemetry_start_timestamp, "telemetry_end_timestamp", telemetry_end_timestamp) + print("first_start_timestamp:",first_start_timestamp, "last start timestamp:", jobs_df['time_start'].max()) jobs = [] # Map dataframe to job state. Add results to jobs list @@ -112,13 +180,13 @@ def load_data_from_df(jobs_df: pd.DataFrame, jobprofile_df: pd.DataFrame, **kwar name = encrypt(name) if validate: - cpu_power = jobprofile_df[jobprofile_df['allocation_id'] + cpu_power = jobprofile_df[jobprofile_df['allocation_id'] \ == allocation_id]['mean_node_power'] cpu_trace = cpu_power.values gpu_trace = cpu_trace else: - cpu_power = jobprofile_df[jobprofile_df['allocation_id'] + cpu_power = jobprofile_df[jobprofile_df['allocation_id'] \ == allocation_id]['sum_cpu0_power'] cpu_power_array = cpu_power.values cpu_min_power = nodes_required * config['POWER_CPU_IDLE'] * config['CPUS_PER_NODE'] @@ -126,7 +194,7 @@ def load_data_from_df(jobs_df: pd.DataFrame, jobprofile_df: pd.DataFrame, **kwar cpu_util = power_to_utilization(cpu_power_array, cpu_min_power, cpu_max_power) cpu_trace = cpu_util * config['CPUS_PER_NODE'] - gpu_power = jobprofile_df[jobprofile_df['allocation_id'] + gpu_power = jobprofile_df[jobprofile_df['allocation_id'] \ == allocation_id]['sum_gpu_power'] gpu_power_array = gpu_power.values @@ -139,41 +207,43 @@ def load_data_from_df(jobs_df: pd.DataFrame, jobprofile_df: pd.DataFrame, **kwar cpu_trace[np.isnan(cpu_trace)] = 0 gpu_trace[np.isnan(gpu_trace)] = 0 - - time_submit_timestamp = jobs_df.loc[jidx, 'time_submission'] - diff = time_submit_timestamp - time_zero - # time_submit = max(diff.total_seconds(), 0) - time_submit = diff.total_seconds() + # Times: + submit_timestamp = jobs_df.loc[jidx, 'time_submission'] + diff = submit_timestamp - telemetry_start_timestamp + submit_time = diff.total_seconds() time_limit = jobs_df.loc[jidx, 'time_limit'] # timelimit in seconds - time_start_timestamp = jobs_df.loc[jidx, 'time_start'] - diff = time_start_timestamp - time_zero - # time_start = max(diff.total_seconds(), 0) - time_start = diff.total_seconds() + start_timestamp = jobs_df.loc[jidx, 'time_start'] + diff = start_timestamp - telemetry_start_timestamp + start_time = diff.total_seconds() - time_end_timestamp = jobs_df.loc[jidx, 'time_end'] - diff = time_end_timestamp - time_zero - time_end = diff.total_seconds() + end_time_timestamp = jobs_df.loc[jidx, 'time_end'] + diff = end_time_timestamp - telemetry_start_timestamp + end_time = diff.total_seconds() - wall_time = time_end - time_start + + wall_time = end_time - start_time if np.isnan(wall_time): wall_time = 0 trace_time = gpu_trace.size * config['TRACE_QUANTA'] # seconds if wall_time > trace_time: missing_steps = int(wall_time - trace_time) - cpu_trace = np.concatenate((cpu_trace,np.array([cpu_min_power] * missing_steps))) - gpu_trace = np.concatenate((cpu_trace,np.array([cpu_min_power] * missing_steps))) - wall_time = trace_time # Pretending to have a full trace - print(f"Job: {job_id} extended {missing_steps} Values with idle power!") - #raise ValueError(f"Job: {job_id} {wall_time} > {trace_time}") - - - if fastforward: - time_submit -= fastforward - time_start -= fastforward - time_end -= fastforward + if start_time < 0: + cpu_trace = np.concatenate((np.array([cpu_min_power] * missing_steps),cpu_trace)) + gpu_trace = np.concatenate((np.array([cpu_min_power] * missing_steps),gpu_trace)) + print(f"Job: {job_id} prepended {missing_steps} Values with idle power!") + print(f"{start_time} - {end_time}") + elif end_time > telemetry_end: + cpu_trace = np.concatenate((cpu_trace,np.array([cpu_min_power] * missing_steps))) + gpu_trace = np.concatenate((gpu_trace,np.array([cpu_min_power] * missing_steps))) + print(f"Job: {job_id} appended {missing_steps} Values with idle power!") + print(f"{start_time} - {end_time}") + else: + print(f"Job: {job_id} {start_time} - {end_time}!") + raise ValueError("Missing values not at start nor end.") + trace_time = wall_time # Pretending to have a full trace, This may not be needed! xnames = jobs_df.loc[jidx, 'xnames'] # Don't replay any job with an empty set of xnames @@ -182,9 +252,9 @@ def load_data_from_df(jobs_df: pd.DataFrame, jobprofile_df: pd.DataFrame, **kwar if arrival == 'poisson': # Modify the arrival times of the jobs according to Poisson distribution scheduled_nodes = None - time_offset = next_arrival(1 / config['JOB_ARRIVAL_TIME']) - time_start = None # ? - time_end = None # ? + submit_time = next_arrival(1 / config['JOB_ARRIVAL_TIME']) + start_time = None # ? + end_time = None # ? priority = aging_boost(nodes_required) else: # Prescribed replay @@ -194,19 +264,31 @@ def load_data_from_df(jobs_df: pd.DataFrame, jobprofile_df: pd.DataFrame, **kwar indices = xname_to_index(xname, config) scheduled_nodes.append(indices) + # Throw out jobs that are not valid! if gpu_trace.size == 0: - print("ignoring job b/c zero trace:", jidx, time_submit, time_start, nodes_required) + print("ignoring job b/c zero trace:", jidx, submit_time, start_time, nodes_required) + continue # SKIP! + if end_time < telemetry_start: + # raise ValueError("Job ends before frist recorded telemetry entry:",job_id, "start:", start_time,"end:",end_time, " Telemetry: ", len(gpu_trace), "entries.") + print("Job ends before frist recorded telemetry entry:",job_id, "start:", start_time,"end:",end_time, " Telemetry: ", len(gpu_trace), "entries.") + continue # SKIP! + if start_time > telemetry_end: + # raise ValueError("Job starts after last recorded telemetry entry:",job_id, "start:", start_time,"end:",end_time, " Telemetry: ", len(gpu_trace), "entries.") + print("Job starts after last recorded telemetry entry:",job_id, "start:", start_time,"end:",end_time, " Telemetry: ", len(gpu_trace), "entries.") + continue # SKIP! + + - if gpu_trace.size > 0 and (jid == job_id or jid == '*') and time_submit >= 0: + if gpu_trace.size > 0 and (jid == job_id or jid == '*'): # and time_submit >= 0: job_info = job_dict(nodes_required, name, account, cpu_trace, gpu_trace, [], [], end_state, scheduled_nodes, job_id, priority, # partition missing - submit_time=time_submit, time_limit=time_limit, - start_time=time_start, end_time=time_end, + submit_time=submit_time, time_limit=time_limit, + start_time=start_time, end_time=end_time, wall_time=wall_time, trace_time=trace_time) jobs.append(job_info) - return jobs + return jobs, telemetry_start, telemetry_end def xname_to_index(xname: str, config: dict): diff --git a/raps/engine.py b/raps/engine.py index 54e136e..f056022 100644 --- a/raps/engine.py +++ b/raps/engine.py @@ -66,10 +66,36 @@ class Engine: ) print(f"Using scheduler: {scheduler_type}") - def eligible_jobs(self, jobs_to_submit: List): + + def add_running_jobs_to_queue(self, jobs_to_submit: List): + """ + Mofifies jobs_to_submit + and self.queue + + This is a preparatory step and should only be called before the main + loop of run_simulation. + Adds running jobs to the queueu, and removes them from the jobs_to_submit + jobs_to_submit still holds the jobs that need be submitted in the future. + """ + # Build a list of jobs whose start_time is <= current_time. + eligible = [job for job in jobs_to_submit if job['start_time'] < self.current_time] + # Remove those jobs from jobs_to_submit: + jobs_to_submit[:] = [job for job in jobs_to_submit if job['start_time'] >= self.current_time] + # Convert them to Job instances and build list of eligible jobs. + eligible_jobs_list = [] + for job_data in eligible: + job_instance = Job(job_data) + eligible_jobs_list.append(job_instance) + self.queue += eligible_jobs_list + + + def add_eligible_jobs_to_queue(self, jobs_to_submit: List): """ - Returns list of eligible jobs and: - modifies the jobs_to_submit removing them from the passed list (Mutable)! + Mofifies jobs_to_submit + and self.queue + + Adds eligible jobs to the queueu, and removes them from the jobs_to_submit + jobs_to_submit still holds the jobs that need be submitted in the future. """ # Build a list of jobs whose submit_time is <= current_time. eligible = [job for job in jobs_to_submit if job['submit_time'] <= self.current_time] @@ -78,14 +104,16 @@ class Engine: # Convert them to Job instances and build list of eligible jobs. eligible_jobs_list = [] for job_data in eligible: - job_instance = Job(job_data, self.current_time) # current_time is not used in Job() + job_instance = Job(job_data) eligible_jobs_list.append(job_instance) - return eligible_jobs_list + self.queue += eligible_jobs_list def prepare_timestep(self, replay:bool = True): completed_jobs = [job for job in self.running if job.end_time is not None and job.end_time <= self.current_time] for job in completed_jobs: + job.state = JobState.COMPLETED + self.running.remove(job) self.jobs_completed += 1 job_stats = job.statistics() @@ -109,7 +137,6 @@ class Engine: - len(self.resource_manager.available_nodes) \ - len(self.resource_manager.down_nodes) - return completed_jobs, newly_downed_nodes @@ -122,8 +149,6 @@ class Engine: gpu_utils = [] net_utils = [] for job in self.running: - if job.end_time <= self.current_time: - job.state = JobState.COMPLETED if job.state == JobState.RUNNING: job.running_time = self.current_time - job.start_time @@ -132,7 +157,7 @@ class Engine: {job.running_time} > {job.trace_time}\n\ {len(job.cpu_trace)} vs. {self.running_time // self.config['TRACE_QUANTA']}\ ") - time_quanta_index = (self.current_time - job.start_time) // self.config['TRACE_QUANTA'] + time_quanta_index = int(job.running_time // self.config['TRACE_QUANTA']) cpu_util = get_utilization(job.cpu_trace, time_quanta_index) gpu_util = get_utilization(job.gpu_trace, time_quanta_index) net_util = 0 @@ -145,9 +170,11 @@ class Engine: else: net_utils.append(0) - scheduled_nodes.append(job.scheduled_nodes) + scheduled_nodes.append(job.scheduled_nodes) # ? cpu_utils.append(cpu_util) gpu_utils.append(gpu_util) + else: + raise ValueError(f"Job is in running list, but state is not RUNNING: job.state == {job.state}") if len(scheduled_nodes) > 0: self.flops_manager.update_flop_state(scheduled_nodes, cpu_utils, gpu_utils) @@ -225,27 +252,42 @@ class Engine: self.current_time += 1 return tick_data - def run_simulation(self, jobs, timesteps, autoshutdown=False): - """Generator that yields after each simulation tick.""" - self.timesteps = timesteps + def prepare_system_state(self, all_jobs:List, timestep_start): + # Modifies Jobs object + self.current_time = timestep_start + + #keep only jobs that have not yet ended + all_jobs[:] = [job for job in all_jobs if job['end_time'] >= timestep_start] - # Sort pending jobs by submit_time. - jobs_to_submit = sorted(jobs, key=lambda j: j['submit_time']) + all_jobs.sort(key=lambda j: j['submit_time']) - # Missing prepareation: - # Remove Jobs that have already ended. - # Place jobs that are currently running. + self.add_running_jobs_to_queue(all_jobs) + # Now process job queue one by one (needed to get the start_time right!) + for job in self.queue: + self.scheduler.schedule([job], self.running, job.start_time, sorted=True) + if len(self.queue) != len(self.running): + raise ValueError(f"Something went wrong! Not all jobs could be placed!\nPotential confligt in queue:\n{self.queue}") + self.queue = [] # Empty queue needed as addition one by one does not empty the queue! + + def run_simulation(self, jobs, timestep_start, timestep_end, autoshutdown=False): + """Generator that yields after each simulation tick.""" + self.timesteps = timestep_end - timestep_start # Where is this used? + + # Place jobs that are currently running, onto the system. + self.prepare_system_state(jobs, timestep_start) if self.scheduler.policy == PolicyType.REPLAY: replay = True else: replay = False - for timestep in range(timesteps): + for timestep in range(timestep_start,timestep_end): completed_jobs, newly_downed_nodes = self.prepare_timestep(replay) # Identify eligible jobs and add them to the queue. - self.queue += self.eligible_jobs(jobs_to_submit) + #self.queue += self.eligible_jobs(jobs, self.current_time) + #jobs = self.add_eligible_jobs_to_queue(jobs) + self.add_eligible_jobs_to_queue(jobs) # Schedule jobs that are now in the queue. self.scheduler.schedule(self.queue, self.running, self.current_time, sorted=False) @@ -257,8 +299,6 @@ class Engine: if self.debug and timestep % self.config['UI_UPDATE_FREQ'] == 0: print(".", end="", flush=True) - - tick_data = self.tick() tick_data.completed = completed_jobs yield tick_data diff --git a/raps/job.py b/raps/job.py index d1524d7..1492da2 100644 --- a/raps/job.py +++ b/raps/job.py @@ -49,22 +49,23 @@ class Job: """ _id_counter = 0 - def __init__(self, job_dict, current_time, state=JobState.PENDING, account=None): + def __init__(self, job_dict, state=JobState.PENDING, account=None): # # current_time unused! # Initializations: self.power = 0 - self.scheduled_nodes = [] + self.scheduled_nodes = [] # Explicit list of requested nodes + self.nodes_required = 0 # If scheduled_nodes is set this can be derived. self.power_history = [] self._state = state self.account = account # Times: - self.submit_time = None # Actual submit time - self.time_limit = None # Time limit set at submission - self.start_time = None # Actual start time when executing or from telemetry - self.end_time = None # Actual end time when executing or from telemetry - self.wall_time = None # end_time - start_time - self.trace_time = None # Time period for which traces are available - self.running_time = 0 # Current running time updated when simulating + self.submit_time = None # Actual submit time + self.time_limit = None # Time limit set at submission + self.start_time = None # Actual start time when executing or from telemetry + self.end_time = None # Actual end time when executing or from telemetry + self.wall_time = None # end_time - start_time + self.trace_time = None # Time period for which traces are available + self.running_time = 0 # Current running time updated when simulating # If a job dict was given, override the values from the job_dict: for key, value in job_dict.items(): @@ -73,6 +74,9 @@ class Job: if not self.id: self.id = Job._get_next_id() + if self.scheduled_nodes and self.nodes_required == 0: + self.nodes_required = len(self.scheduled_nodes) + def __repr__(self): """Return a string representation of the job.""" return (f"Job(id={self.id}, name={self.name}, account={self.account}, " diff --git a/raps/policy.py b/raps/policy.py index d1bcbf2..5a98838 100644 --- a/raps/policy.py +++ b/raps/policy.py @@ -8,3 +8,5 @@ class PolicyType(Enum): PRIORITY = 'priority' FUGAKU_PTS = 'fugaku_pts' REPLAY = 'replay' + SJF = 'sjf' + LJF = 'ljf' diff --git a/raps/schedulers/default.py b/raps/schedulers/default.py index b930d49..0ba986c 100644 --- a/raps/schedulers/default.py +++ b/raps/schedulers/default.py @@ -1,3 +1,4 @@ +from typing import List from enum import Enum from ..utils import summarize_ranges @@ -25,6 +26,10 @@ class Scheduler: return sorted(queue, key=lambda job: job.priority, reverse=True) elif self.policy == PolicyType.FUGAKU_PTS: return self.sort_fugaku_redeeming(queue, accounts) + if self.policy == PolicyType.SJF: + return sorted(queue, key=lambda job: job.time_limit) + if self.policy == PolicyType.LJF: + return sorted(queue, key=lambda job: job.nodes_required) elif self.policy == PolicyType.REPLAY: return sorted(queue, key=lambda job: job.start_time) else: @@ -38,7 +43,7 @@ class Scheduler: # Iterate over a copy of the queue since we might remove items for job in queue[:]: if self.policy == PolicyType.REPLAY: - if job.start_time >= current_time: + if job.start_time > current_time: continue else: pass @@ -61,10 +66,15 @@ class Scheduler: # Next we check if we continue or abort. # This may be policy dependent. I break by default but this may not be correct. if self.policy == PolicyType.FCFS or \ - self.policy == PolicyType.PRIORITY or \ - self.policy == PolicyType.FUGAKU_PTS: # self.policy == PolicyType ?? + self.policy == PolicyType.PRIORITY or\ + self.policy == PolicyType.FUGAKU_PTS or \ + self.policy == PolicyType.LJF or \ + False: # self.policy == PolicyType ?? break # The job at the front of the queue doesnt fit, wait until it fits. - elif self.policy == PolicyType.REPLAY: + elif self.policy == PolicyType.REPLAY or \ + self.policy == PolicyType.BACKFILL or \ + self.policy == PolicyType.SJF or\ + False: continue # The job at the front of the queue doesn't fit, but there are other jobs that may fit, look at the next one. else: raise NotImplementedError("Depending on the Policy this choice should be explicit. Add the implementation above!") @@ -90,6 +100,35 @@ class Scheduler: scheduled_nodes = summarize_ranges(backfill_job.scheduled_nodes) print(f"t={current_time}: Backfilling job {backfill_job.id} with wall time {backfill_job.wall_time} on nodes {scheduled_nodes}") + def prepare_system_state(self,jobs_to_submit:List, running, timestep_start): + # def schedule(self, queue, running, current_time, accounts=None, sorted=False, debug=False): + """ + In the case of replay and fast forward, previously placed jobs should be present. + + """ + if self.policy == PolicyType.REPLAY: + total_jobs = len(jobs_to_submit) + print(f"All jobs: {total_jobs}") + + # Keep only jobs have an end time in the future future. + jobs_to_submit[:] = [job for job in jobs_to_submit if job['end_time'] >= timestep_start] + print(f"Num jobs in the past: {total_jobs - len(jobs_to_submit)}") + + # Identify jobs that started in the past and Split them from the jobs that will start in the future: + jobs_to_start_now = [job for job in jobs_to_submit if job['start_time'] < timestep_start] + print(f"Num jobs that started in the past: {len(jobs_to_start_now)}") + + jobs_to_submit[:] = [job for job in jobs_to_submit if job['start_time'] >= timestep_start] + print(f"Num jobs to be schedule in the simulation: {len(jobs_to_submit)}") + + # Now schedule them with their orignal start time. + # This has to be done one by one! + for job in jobs_to_start_now: + self.schedule([job], running, job['start_time'], sorted=True) + # self.schedule(jobs_to_start_now, running, 0, False) + return jobs_to_submit + else: + return jobs_to_submit def find_backfill_job(self, queue, num_free_nodes, current_time): """Finds a backfill job based on available nodes and estimated completion times. diff --git a/raps/schedulers/replay.py b/raps/schedulers/replay.py index d02a1c5..00b1283 100644 --- a/raps/schedulers/replay.py +++ b/raps/schedulers/replay.py @@ -21,6 +21,9 @@ class Scheduler: """Sort jobs based on the selected scheduling policy.""" return sorted(queue, key=lambda job: job.start_time) + def prepare_system_state(self,queue,running): + return queue + def schedule(self, queue, running, current_time, accounts=None, sorted=False, debug=False): # Sort the queue in place. if not sorted: @@ -38,7 +41,11 @@ class Scheduler: else: continue # continue instead of break, as later job with specific nodes may still be placed! else: # synthetic - raise ValueError("No jobs requested?") + if job.nodes_required: + pass + else: + raise ValueError("No number of nodes specified.") + if nodes_available: self.resource_manager.assign_nodes_to_job(job, current_time) diff --git a/raps/telemetry.py b/raps/telemetry.py index da99d64..ce78b5b 100644 --- a/raps/telemetry.py +++ b/raps/telemetry.py @@ -100,7 +100,7 @@ if __name__ == "__main__": submit_times = [] last = 0 for job_vector in jobs: - job = Job(job_vector, 0) # current_time is never used in Job() + job = Job(job_vector) wt_list.append(job.wall_time) nr_list.append(job.nodes_required) submit_times.append(job.submit_time) diff --git a/raps/ui.py b/raps/ui.py index d71caf5..d7af1b3 100644 --- a/raps/ui.py +++ b/raps/ui.py @@ -398,13 +398,13 @@ class LayoutManager: self.console.clear() self.console.print(self.layout) - def run(self, jobs, timesteps): + def run(self, jobs, timestep_start, timestep_end): """ Runs the UI, blocking until the simulation is complete """ - for data in self.engine.run_simulation(jobs, timesteps): + for data in self.engine.run_simulation(jobs, timestep_start, timestep_end): if data.current_time % self.config['UI_UPDATE_FREQ'] == 0: self.update(data) self.render() - def run_stepwise(self, jobs, timesteps): + def run_stepwise(self, jobs, timestep_start, timestep_end): """ Prepares the UI and returns a generator for the simulation """ - return self.engine.run_simulation(jobs, timesteps) + return self.engine.run_simulation(jobs, timestep_start, timestep_end) diff --git a/raps/workload.py b/raps/workload.py index d4d9c54..6b20495 100644 --- a/raps/workload.py +++ b/raps/workload.py @@ -119,7 +119,7 @@ class Workload: net_tx, # Network transmit trace net_rx, # Network receive trace 'COMPLETED', # End state - None, # Requested Nodes ?! This needs to be fixed! job_dict and Job class are inconsitent Job() + list(range(config['AVAILABLE_NODES'])), # Explicitly all nodes to test replay None, # Job ID 100, # Priority partition, # Partition name @@ -161,7 +161,7 @@ class Workload: net_tx, # Network transmit trace net_rx, # Network receive trace 'COMPLETED', # End state - None, # Requested Nodes ?! This needs to be fixed! job_dict and Job class are inconsitent Job() + list(range(config['AVAILABLE_NODES'])), # Explicitly all nodes to test replay None, # Job ID 100, # Priority partition, # Partition name @@ -182,13 +182,14 @@ class Workload: # List to hold jobs for all partitions jobs = [] account = ACCT_NAMES[0] - # Iterate through each partition and its config for partition in self.partitions: # Fetch partition-specific configuration config = self.config_map[partition] net_tx, net_rx = [], [] + list_of_all_nodes = list(range(config['AVAILABLE_NODES'])) + # Max test cpu_util, gpu_util = 1, 4 cpu_trace, gpu_trace = self.compute_traces(cpu_util, gpu_util, 10800, config['TRACE_QUANTA']) -- GitLab From 1d3013a1930b14548549b2192c7283b940c975d7 Mon Sep 17 00:00:00 2001 From: Matthias Maiterth Date: Thu, 20 Feb 2025 18:48:43 -0500 Subject: [PATCH 021/388] fixed traces showing high power, ! But the previous Error shows that the wrong index was read! This needs to be fixed. --- raps/dataloaders/frontier.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/raps/dataloaders/frontier.py b/raps/dataloaders/frontier.py index 61fcc5f..e4158c0 100644 --- a/raps/dataloaders/frontier.py +++ b/raps/dataloaders/frontier.py @@ -231,13 +231,13 @@ def load_data_from_df(jobs_df: pd.DataFrame, jobprofile_df: pd.DataFrame, **kwar if wall_time > trace_time: missing_steps = int(wall_time - trace_time) if start_time < 0: - cpu_trace = np.concatenate((np.array([cpu_min_power] * missing_steps),cpu_trace)) - gpu_trace = np.concatenate((np.array([cpu_min_power] * missing_steps),gpu_trace)) + cpu_trace = np.concatenate((np.array([0] * missing_steps),cpu_trace)) + gpu_trace = np.concatenate((np.array([0] * missing_steps),gpu_trace)) print(f"Job: {job_id} prepended {missing_steps} Values with idle power!") print(f"{start_time} - {end_time}") elif end_time > telemetry_end: - cpu_trace = np.concatenate((cpu_trace,np.array([cpu_min_power] * missing_steps))) - gpu_trace = np.concatenate((gpu_trace,np.array([cpu_min_power] * missing_steps))) + cpu_trace = np.concatenate((cpu_trace,np.array([0] * missing_steps))) + gpu_trace = np.concatenate((gpu_trace,np.array([0] * missing_steps))) print(f"Job: {job_id} appended {missing_steps} Values with idle power!") print(f"{start_time} - {end_time}") else: -- GitLab From 3e8fbb29335802a03881d2bb1a34dbf03748a6cc Mon Sep 17 00:00:00 2001 From: Matthias Maiterth Date: Thu, 20 Feb 2025 18:53:12 -0500 Subject: [PATCH 022/388] This change allows to reproduce a logic error that needs to be fixed! The previous change added padding to the job telemetry if the job started before the simulation. Original Code: cpu_trace = np.concatenate((np.array([] * missing_steps),cpu_trace)) This resulted in GW Power as the values should be between 0 and NUM_PROC. This was fixed by setting the values to 0: cpu_trace = np.concatenate((np.array([0] * missing_steps),cpu_trace)) However: The inial code showed that the error is not only in the number produced, but in the offset chosen within the cpu_trace (and gpu_trace). The newly committed code adds cpu_trace = np.concatenate((np.array([np.NaN] * missing_steps),cpu_trace)) Which triggers this error. The np.NaN should be in the code an the logic error needs to be fixed! --- raps/dataloaders/frontier.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/raps/dataloaders/frontier.py b/raps/dataloaders/frontier.py index e4158c0..6e59686 100644 --- a/raps/dataloaders/frontier.py +++ b/raps/dataloaders/frontier.py @@ -231,13 +231,13 @@ def load_data_from_df(jobs_df: pd.DataFrame, jobprofile_df: pd.DataFrame, **kwar if wall_time > trace_time: missing_steps = int(wall_time - trace_time) if start_time < 0: - cpu_trace = np.concatenate((np.array([0] * missing_steps),cpu_trace)) - gpu_trace = np.concatenate((np.array([0] * missing_steps),gpu_trace)) + cpu_trace = np.concatenate((np.array([np.NaN] * missing_steps),cpu_trace)) + gpu_trace = np.concatenate((np.array([np.NaN] * missing_steps),gpu_trace)) print(f"Job: {job_id} prepended {missing_steps} Values with idle power!") print(f"{start_time} - {end_time}") elif end_time > telemetry_end: - cpu_trace = np.concatenate((cpu_trace,np.array([0] * missing_steps))) - gpu_trace = np.concatenate((gpu_trace,np.array([0] * missing_steps))) + cpu_trace = np.concatenate((cpu_trace,np.array([np.NaN] * missing_steps))) + gpu_trace = np.concatenate((gpu_trace,np.array([np.NaN] * missing_steps))) print(f"Job: {job_id} appended {missing_steps} Values with idle power!") print(f"{start_time} - {end_time}") else: -- GitLab From 8def503eccd880570626981ed05587a843235b0b Mon Sep 17 00:00:00 2001 From: Matthias Maiterth Date: Tue, 25 Feb 2025 14:04:46 -0500 Subject: [PATCH 023/388] Initial fix to fast-forward missing timestep calculation and queue cleanup in prepare timestep --- main.py | 13 ++++++++----- raps/dataloaders/frontier.py | 4 ++-- raps/engine.py | 21 ++++++++++++++------- 3 files changed, 24 insertions(+), 14 deletions(-) diff --git a/main.py b/main.py index c34b78c..b2ca73a 100644 --- a/main.py +++ b/main.py @@ -71,11 +71,13 @@ sc = Engine( ) layout_manager = LayoutManager(args.layout, engine=sc, debug=args.debug, **config) -if args.replay: +timestep_start = 0 +if args.fastforward: + args.fastforward = convert_to_seconds(args.fastforward) + timestep_start = args.fastforward + - if args.fastforward: - args.fastforward = convert_to_seconds(args.fastforward) - timestep_start = args.fastforward +if args.replay: td = Telemetry(**args_dict) @@ -106,8 +108,9 @@ if args.replay: else: # custom data loader print(*args.replay) - jobs, timestep_start, timestep_end = td.load_data(args.replay) + jobs, timestep_start_from_data, timestep_end = td.load_data(args.replay) td.save_snapshot(jobs, filename=DIR_NAME) + timestep_start += timestep_start_from_data # + timestep_start_from_data # Set number of timesteps based on the last job running which we assume # is the maximum value of submit_time + wall_time of all the jobs diff --git a/raps/dataloaders/frontier.py b/raps/dataloaders/frontier.py index 6e59686..123707f 100644 --- a/raps/dataloaders/frontier.py +++ b/raps/dataloaders/frontier.py @@ -191,7 +191,7 @@ def load_data_from_df(jobs_df: pd.DataFrame, jobprofile_df: pd.DataFrame, **kwar cpu_power_array = cpu_power.values cpu_min_power = nodes_required * config['POWER_CPU_IDLE'] * config['CPUS_PER_NODE'] cpu_max_power = nodes_required * config['POWER_CPU_MAX'] * config['CPUS_PER_NODE'] - cpu_util = power_to_utilization(cpu_power_array, cpu_min_power, cpu_max_power) + cpu_util = power_to_utilization(cpu_power_array, cpu_min_power, cpu_max_power) # Will be negative! as cpu_power_array[i] can be smaller than cpu_min_power cpu_trace = cpu_util * config['CPUS_PER_NODE'] gpu_power = jobprofile_df[jobprofile_df['allocation_id'] \ @@ -229,7 +229,7 @@ def load_data_from_df(jobs_df: pd.DataFrame, jobprofile_df: pd.DataFrame, **kwar trace_time = gpu_trace.size * config['TRACE_QUANTA'] # seconds if wall_time > trace_time: - missing_steps = int(wall_time - trace_time) + missing_steps = int((wall_time - trace_time) // config['TRACE_QUANTA']) if start_time < 0: cpu_trace = np.concatenate((np.array([np.NaN] * missing_steps),cpu_trace)) gpu_trace = np.concatenate((np.array([np.NaN] * missing_steps),gpu_trace)) diff --git a/raps/engine.py b/raps/engine.py index f056022..f8c3917 100644 --- a/raps/engine.py +++ b/raps/engine.py @@ -148,8 +148,13 @@ class Engine: cpu_utils = [] gpu_utils = [] net_utils = [] + if self.debug: + print(f"Current Time: {self.current_time}") + for job in self.running: + if self.debug: + print(f"JobID: {job.id}") if job.state == JobState.RUNNING: job.running_time = self.current_time - job.start_time if job.running_time > job.trace_time: @@ -252,7 +257,7 @@ class Engine: self.current_time += 1 return tick_data - def prepare_system_state(self, all_jobs:List, timestep_start): + def prepare_system_state(self, all_jobs:List, timestep_start, replay:bool): # Modifies Jobs object self.current_time = timestep_start @@ -263,24 +268,26 @@ class Engine: self.add_running_jobs_to_queue(all_jobs) # Now process job queue one by one (needed to get the start_time right!) - for job in self.queue: + for job in self.queue[:]: # operate over a slice copy to be able to remove jobs from queue if placed. self.scheduler.schedule([job], self.running, job.start_time, sorted=True) - if len(self.queue) != len(self.running): + self.queue.remove(job) + if replay and len(self.queue) != 0: raise ValueError(f"Something went wrong! Not all jobs could be placed!\nPotential confligt in queue:\n{self.queue}") - self.queue = [] # Empty queue needed as addition one by one does not empty the queue! + def run_simulation(self, jobs, timestep_start, timestep_end, autoshutdown=False): """Generator that yields after each simulation tick.""" self.timesteps = timestep_end - timestep_start # Where is this used? - # Place jobs that are currently running, onto the system. - self.prepare_system_state(jobs, timestep_start) - if self.scheduler.policy == PolicyType.REPLAY: replay = True else: replay = False + # Place jobs that are currently running, onto the system. + self.prepare_system_state(jobs, timestep_start, replay) + + for timestep in range(timestep_start,timestep_end): completed_jobs, newly_downed_nodes = self.prepare_timestep(replay) -- GitLab From 3e6a3d9b1026708e7648c93a5ad6da284466e62c Mon Sep 17 00:00:00 2001 From: Matthias Maiterth Date: Tue, 25 Feb 2025 17:24:51 -0500 Subject: [PATCH 024/388] Rewrite of trace_times using trace_start and trace_end time Commit Before removing comments with offset calculation --- raps/dataloaders/frontier.py | 34 ++++++++++++++----------- raps/engine.py | 48 ++++++++++++++++++++++++++++++------ raps/job.py | 10 ++++++-- raps/workload.py | 23 +++++++++++------ 4 files changed, 84 insertions(+), 31 deletions(-) diff --git a/raps/dataloaders/frontier.py b/raps/dataloaders/frontier.py index 123707f..5ac742e 100644 --- a/raps/dataloaders/frontier.py +++ b/raps/dataloaders/frontier.py @@ -114,6 +114,8 @@ def load_data_from_df(jobs_df: pd.DataFrame, jobprofile_df: pd.DataFrame, **kwar - end_time - wall_time (end_time-start_time, actual runtime in seconds) - trace_time (lenght of each trace in seconds) + - trace_start_time (time offset in seconds after which the trace starts) + - trace_end_time (time offset in seconds after which the trace ends) has to be set for use within the simulation The returned values are these three: @@ -222,28 +224,33 @@ def load_data_from_df(jobs_df: pd.DataFrame, jobprofile_df: pd.DataFrame, **kwar diff = end_time_timestamp - telemetry_start_timestamp end_time = diff.total_seconds() - wall_time = end_time - start_time if np.isnan(wall_time): wall_time = 0 trace_time = gpu_trace.size * config['TRACE_QUANTA'] # seconds + trace_start_time = 0 + trace_end_time = trace_time if wall_time > trace_time: - missing_steps = int((wall_time - trace_time) // config['TRACE_QUANTA']) + missing_trace_time = wall_time - trace_time if start_time < 0: - cpu_trace = np.concatenate((np.array([np.NaN] * missing_steps),cpu_trace)) - gpu_trace = np.concatenate((np.array([np.NaN] * missing_steps),gpu_trace)) - print(f"Job: {job_id} prepended {missing_steps} Values with idle power!") - print(f"{start_time} - {end_time}") + #cpu_trace = np.concatenate((np.array([np.NaN] * missing_steps),cpu_trace)) + #gpu_trace = np.concatenate((np.array([np.NaN] * missing_steps),gpu_trace)) + #print(f"Job: {job_id} prepended {missing_steps} Values with idle power!") + #print(f"{start_time} - {end_time}") + trace_start_time = missing_trace_time + trace_end_time = wall_time elif end_time > telemetry_end: - cpu_trace = np.concatenate((cpu_trace,np.array([np.NaN] * missing_steps))) - gpu_trace = np.concatenate((gpu_trace,np.array([np.NaN] * missing_steps))) - print(f"Job: {job_id} appended {missing_steps} Values with idle power!") - print(f"{start_time} - {end_time}") + #cpu_trace = np.concatenate((cpu_trace,np.array([np.NaN] * missing_steps))) + #gpu_trace = np.concatenate((gpu_trace,np.array([np.NaN] * missing_steps))) + #print(f"Job: {job_id} appended {missing_steps} Values with idle power!") + #print(f"{start_time} - {end_time}") + trace_start_time = 0 + trace_end_time = trace_time else: print(f"Job: {job_id} {start_time} - {end_time}!") raise ValueError("Missing values not at start nor end.") - trace_time = wall_time # Pretending to have a full trace, This may not be needed! + #trace_time = gpu_trace.size * config["TRACE_QUANTA"] # Update trace_time to padded trace xnames = jobs_df.loc[jidx, 'xnames'] # Don't replay any job with an empty set of xnames @@ -277,15 +284,14 @@ def load_data_from_df(jobs_df: pd.DataFrame, jobprofile_df: pd.DataFrame, **kwar print("Job starts after last recorded telemetry entry:",job_id, "start:", start_time,"end:",end_time, " Telemetry: ", len(gpu_trace), "entries.") continue # SKIP! - - if gpu_trace.size > 0 and (jid == job_id or jid == '*'): # and time_submit >= 0: job_info = job_dict(nodes_required, name, account, cpu_trace, gpu_trace, [], [], end_state, scheduled_nodes, job_id, priority, # partition missing submit_time=submit_time, time_limit=time_limit, start_time=start_time, end_time=end_time, - wall_time=wall_time, trace_time=trace_time) + wall_time=wall_time, trace_time=trace_time, + trace_start_time=trace_start_time, trace_end_time=trace_end_time) jobs.append(job_info) return jobs, telemetry_start, telemetry_end diff --git a/raps/engine.py b/raps/engine.py index f8c3917..34ea533 100644 --- a/raps/engine.py +++ b/raps/engine.py @@ -157,15 +157,47 @@ class Engine: print(f"JobID: {job.id}") if job.state == JobState.RUNNING: job.running_time = self.current_time - job.start_time - if job.running_time > job.trace_time: - raise ValueError(f"Trace Ended before job ended!\n\ - {job.running_time} > {job.trace_time}\n\ - {len(job.cpu_trace)} vs. {self.running_time // self.config['TRACE_QUANTA']}\ + + if job.running_time > job.wall_time: + raise Exception(f"Job should have ended already!\n\ + {job.running_time} > {job.wall_time}\n\ + {len(job.cpu_trace)} vs. {job.running_time // self.config['TRACE_QUANTA']}\ ") - time_quanta_index = int(job.running_time // self.config['TRACE_QUANTA']) - cpu_util = get_utilization(job.cpu_trace, time_quanta_index) - gpu_util = get_utilization(job.gpu_trace, time_quanta_index) - net_util = 0 + # Next: compute the time_quanta_index: + # If the running time is past the last time step in the trace, + # use the last value in the trace. This can happen if the end + # time valid timesteps is e.g. 17%15, the last trace value is + # 15%15 and the next possible trace value 30%15 but was not + # recorded because the job ended before. Instead of using a + # additional padding or raisinig an error, the last valid value + # is used. + #if int(job.trace_time // self.config['TRACE_QUANTA']) \ + # <= int(job.running_time // self.config['TRACE_QUANTA']): + # # Make sure only the last interval is missing if any: + # if job.running_time - job.trace_time < self.config['TRACE_QUANTA']: + # time_quanta_index = len(job.cpu_trace) - 1 + # else: + # raise Exception(f"Job is not padded correctly!\n\ + # {job.running_time} > {job.trace_time}\n\ + # {len(job.cpu_trace)} vs. {job.running_time // self.config['TRACE_QUANTA']}\ + # ") + #else: + # time_quanta_index = int(job.running_time // self.config['TRACE_QUANTA']) + + if job.running_time < job.trace_start_time or job.running_time > job.trace_end_time: + cpu_util = 0 # get_utilization(job.cpu_trace, time_quanta_index) + gpu_util = 0 # get_utilization(job.gpu_trace, time_quanta_index) + net_util = 0 + if self.debug: + print("No Values in trace, using IDLE.") + if self.scheduler.policy == PolicyType.REPLAY: + print(f"{job.running_time} < {job.trace_start_time} or {job.running_time} > {job.trace_end_time}") + raise Exception("Replay is using IDLE values! Something is wrong!") + else: + time_quanta_index = int((job.running_time - job.trace_start_time) // self.config['TRACE_QUANTA']) + cpu_util = get_utilization(job.cpu_trace, time_quanta_index) + gpu_util = get_utilization(job.gpu_trace, time_quanta_index) + net_util = 0 if len(job.ntx_trace) and len(job.nrx_trace): net_tx = get_utilization(job.ntx_trace, time_quanta_index) diff --git a/raps/job.py b/raps/job.py index 1492da2..c992598 100644 --- a/raps/job.py +++ b/raps/job.py @@ -4,7 +4,7 @@ def job_dict(nodes_required, name, account, \ cpu_trace, gpu_trace, ntx_trace, nrx_trace, \ end_state, scheduled_nodes, job_id, priority=0, partition=0, submit_time=0, time_limit=0, start_time=0, end_time=0, - wall_time=0, trace_time=0): + wall_time=0, trace_time=0, trace_start_time=0,trace_end_time=0): """ Return job info dictionary """ return { 'nodes_required': nodes_required, @@ -25,7 +25,9 @@ def job_dict(nodes_required, name, account, \ 'start_time': start_time, 'end_time': end_time, 'wall_time': wall_time, - 'trace_time': trace_time + 'trace_time': trace_time, + 'trace_start_time': trace_start_time, + 'trace_end_time': trace_end_time } @@ -65,6 +67,8 @@ class Job: self.end_time = None # Actual end time when executing or from telemetry self.wall_time = None # end_time - start_time self.trace_time = None # Time period for which traces are available + self.trace_start_time = None # Time period for which traces are available + self.trace_end_time = None # Time period for which traces are available self.running_time = 0 # Current running time updated when simulating # If a job dict was given, override the values from the job_dict: @@ -87,6 +91,8 @@ class Job: f"start_time={self.start_time}, end_time={self.end_time}, " f"wall_time={self.wall_time}, " f"trace_time={self.trace_time}, " + f"trace_start_time={self.trace_start_time}, " + f"trace_end_time={self.trace_end_time}, " f"running_time={self.running_time}, state={self._state}, " f"scheduled_nodes={self.scheduled_nodes}, power={self.power}, " f"power_history={self.power_history})") diff --git a/raps/workload.py b/raps/workload.py index 6b20495..93b091b 100644 --- a/raps/workload.py +++ b/raps/workload.py @@ -85,7 +85,8 @@ class Workload: jobs.append(job_dict(nodes_required, name, account, cpu_trace, gpu_trace, net_tx, net_rx, \ end_state, None, job_index, priority, partition, - time_to_next_job, time_limit, time_to_next_job, time_to_next_job + wall_time, wall_time, wall_time)) + time_to_next_job, time_limit, time_to_next_job, time_to_next_job + wall_time, wall_time, + wall_time, 0, wall_time)) return jobs @@ -128,7 +129,9 @@ class Workload: 0, # Start time / or None len(gpu_trace) * config['TRACE_QUANTA'], # End time / or None len(gpu_trace) * config['TRACE_QUANTA'], # Wall time - len(gpu_trace) * config['TRACE_QUANTA'] # Trace time + len(gpu_trace) * config['TRACE_QUANTA'], # Trace time + 0, # Trace start time + len(gpu_trace) * config['TRACE_QUANTA'] # Trace end time ) print(job_info) jobs.append(job_info) # Add job to the list @@ -170,7 +173,9 @@ class Workload: 0, # Start time / or None len(gpu_trace) * config['TRACE_QUANTA'], # End time / or None len(gpu_trace) * config['TRACE_QUANTA'], # Wall time - len(gpu_trace) * config['TRACE_QUANTA'] # Trace time + len(gpu_trace) * config['TRACE_QUANTA'], # Trace time + 0, # Trace start time + len(gpu_trace) * config['TRACE_QUANTA'] # Trace end time ) jobs.append(job_info) # Add job to the list @@ -199,7 +204,8 @@ class Workload: f"Max Test {partition}", account, cpu_trace, gpu_trace, net_tx, net_rx, 'COMPLETED', None, None, 100, partition, 0, len(gpu_trace) * config['TRACE_QUANTA'] + 1, - 0, 10800, len(gpu_trace) * config['TRACE_QUANTA'], len(gpu_trace) * config['TRACE_QUANTA'] + 0, 10800, len(gpu_trace) * config['TRACE_QUANTA'], + len(gpu_trace) * config['TRACE_QUANTA'], 0, len(gpu_trace) * config['TRACE_QUANTA'] ) jobs.append(job_info) @@ -211,7 +217,8 @@ class Workload: f"OpenMxP {partition}", account, cpu_trace, gpu_trace, net_tx, net_rx, 'COMPLETED', None, None, 100, partition, 0, len(gpu_trace) * config['TRACE_QUANTA'] + 1, - 10800, 14200, len(gpu_trace) * config['TRACE_QUANTA'], len(gpu_trace) * config['TRACE_QUANTA'] + 10800, 14200, len(gpu_trace) * config['TRACE_QUANTA'], + len(gpu_trace) * config['TRACE_QUANTA'], 0, len(gpu_trace) * config['TRACE_QUANTA'] ) jobs.append(job_info) @@ -223,7 +230,8 @@ class Workload: f"HPL {partition}", account, cpu_trace, gpu_trace, net_tx, net_rx, 'COMPLETED', None, None, 100, partition, 0, len(gpu_trace) * config['TRACE_QUANTA'] + 1, - 14200, 17800, len(gpu_trace) * config['TRACE_QUANTA'], len(gpu_trace) * config['TRACE_QUANTA'] + 14200, 17800, len(gpu_trace) * config['TRACE_QUANTA'], + len(gpu_trace) * config['TRACE_QUANTA'], 0, len(gpu_trace) * config['TRACE_QUANTA'] ) jobs.append(job_info) @@ -235,7 +243,8 @@ class Workload: f"Idle Test {partition}", account, cpu_trace, gpu_trace, net_tx, net_rx, 'COMPLETED', None, None, 100, partition, 0, len(gpu_trace) * config['TRACE_QUANTA'] + 1, - 17800, 21400, len(gpu_trace) * config['TRACE_QUANTA'], len(gpu_trace) * config['TRACE_QUANTA'] + 17800, 21400, len(gpu_trace) * config['TRACE_QUANTA'], + len(gpu_trace) * config['TRACE_QUANTA'], 0, len(gpu_trace) * config['TRACE_QUANTA'] ) jobs.append(job_info) -- GitLab From e7a6c96a19a1e0d7bece25ca70e616ccc3960c78 Mon Sep 17 00:00:00 2001 From: Matthias Maiterth Date: Tue, 25 Feb 2025 18:09:10 -0500 Subject: [PATCH 025/388] Rewrote engine, jobs and frontier dataloader to use trace_start_time and trace_end_time. This replaces the nan/0 padded version of the trace files. For replay errors are raised if the simulation tries to access values outside of the recorded data. For re-schedule jobs idle values are used if the re-scheduled part of the job has no associated telemetry. --- raps/dataloaders/frontier.py | 16 +++++--------- raps/engine.py | 42 +++++++++++------------------------- raps/job.py | 4 ++-- 3 files changed, 20 insertions(+), 42 deletions(-) diff --git a/raps/dataloaders/frontier.py b/raps/dataloaders/frontier.py index 5ac742e..510fe4c 100644 --- a/raps/dataloaders/frontier.py +++ b/raps/dataloaders/frontier.py @@ -100,7 +100,8 @@ def load_data_from_df(jobs_df: pd.DataFrame, jobprofile_df: pd.DataFrame, **kwar This means that the time between first_start_timestamp and telemetry_start has no associated values in the traces! - The missing values after simulation_end can be ignored, as the simulatuion will have stoped before. + The missing values after simulation_end can be ignored, as the simulatuion + will have stoped before. However, the times before telemetry_start have to be padded to generate correct offsets within their data! @@ -118,6 +119,9 @@ def load_data_from_df(jobs_df: pd.DataFrame, jobprofile_df: pd.DataFrame, **kwar - trace_end_time (time offset in seconds after which the trace ends) has to be set for use within the simulation + The values trace_start_time are similar to the telemetry_start and + telemetry_stop but job specific. + The returned values are these three: - The list of parsed jobs. (as a job_dict) - telemetry_start: int (in seconds) @@ -159,7 +163,6 @@ def load_data_from_df(jobs_df: pd.DataFrame, jobprofile_df: pd.DataFrame, **kwar diff = first_start_timestamp - telemetry_start_timestamp first_start = int(diff.total_seconds()) # negative seconds or 0 - num_jobs = len(jobs_df) if debug: print("num_jobs:", num_jobs) @@ -234,23 +237,14 @@ def load_data_from_df(jobs_df: pd.DataFrame, jobprofile_df: pd.DataFrame, **kwar if wall_time > trace_time: missing_trace_time = wall_time - trace_time if start_time < 0: - #cpu_trace = np.concatenate((np.array([np.NaN] * missing_steps),cpu_trace)) - #gpu_trace = np.concatenate((np.array([np.NaN] * missing_steps),gpu_trace)) - #print(f"Job: {job_id} prepended {missing_steps} Values with idle power!") - #print(f"{start_time} - {end_time}") trace_start_time = missing_trace_time trace_end_time = wall_time elif end_time > telemetry_end: - #cpu_trace = np.concatenate((cpu_trace,np.array([np.NaN] * missing_steps))) - #gpu_trace = np.concatenate((gpu_trace,np.array([np.NaN] * missing_steps))) - #print(f"Job: {job_id} appended {missing_steps} Values with idle power!") - #print(f"{start_time} - {end_time}") trace_start_time = 0 trace_end_time = trace_time else: print(f"Job: {job_id} {start_time} - {end_time}!") raise ValueError("Missing values not at start nor end.") - #trace_time = gpu_trace.size * config["TRACE_QUANTA"] # Update trace_time to padded trace xnames = jobs_df.loc[jidx, 'xnames'] # Don't replay any job with an empty set of xnames diff --git a/raps/engine.py b/raps/engine.py index 34ea533..63beea6 100644 --- a/raps/engine.py +++ b/raps/engine.py @@ -163,30 +163,9 @@ class Engine: {job.running_time} > {job.wall_time}\n\ {len(job.cpu_trace)} vs. {job.running_time // self.config['TRACE_QUANTA']}\ ") - # Next: compute the time_quanta_index: - # If the running time is past the last time step in the trace, - # use the last value in the trace. This can happen if the end - # time valid timesteps is e.g. 17%15, the last trace value is - # 15%15 and the next possible trace value 30%15 but was not - # recorded because the job ended before. Instead of using a - # additional padding or raisinig an error, the last valid value - # is used. - #if int(job.trace_time // self.config['TRACE_QUANTA']) \ - # <= int(job.running_time // self.config['TRACE_QUANTA']): - # # Make sure only the last interval is missing if any: - # if job.running_time - job.trace_time < self.config['TRACE_QUANTA']: - # time_quanta_index = len(job.cpu_trace) - 1 - # else: - # raise Exception(f"Job is not padded correctly!\n\ - # {job.running_time} > {job.trace_time}\n\ - # {len(job.cpu_trace)} vs. {job.running_time // self.config['TRACE_QUANTA']}\ - # ") - #else: - # time_quanta_index = int(job.running_time // self.config['TRACE_QUANTA']) - if job.running_time < job.trace_start_time or job.running_time > job.trace_end_time: - cpu_util = 0 # get_utilization(job.cpu_trace, time_quanta_index) - gpu_util = 0 # get_utilization(job.gpu_trace, time_quanta_index) + cpu_util = 0 # No values available therefore we assume IDLE == 0 + gpu_util = 0 net_util = 0 if self.debug: print("No Values in trace, using IDLE.") @@ -195,6 +174,16 @@ class Engine: raise Exception("Replay is using IDLE values! Something is wrong!") else: time_quanta_index = int((job.running_time - job.trace_start_time) // self.config['TRACE_QUANTA']) + if time_quanta_index == len(job.cpu_trace): + # If the running time is past the last time step in the + # trace, use the last value in the trace. This can + # happen if the last valid timesteps is e.g. 17%15, + # the last trace value is 15%15 and the next possible + # trace value 30%15 but was not recorded because the + # job ended before. + # For every other error condition trace_start_ and + # _end_time are used! + time_quanta_index -= 1 cpu_util = get_utilization(job.cpu_trace, time_quanta_index) gpu_util = get_utilization(job.gpu_trace, time_quanta_index) net_util = 0 @@ -225,7 +214,6 @@ class Engine: job.power_history.append(jobs_power[i] * len(job.scheduled_nodes)) del _running_jobs - # Update the power array UI component rack_power, rect_losses = self.power_manager.compute_rack_power() sivoc_losses = self.power_manager.compute_sivoc_losses() @@ -293,7 +281,7 @@ class Engine: # Modifies Jobs object self.current_time = timestep_start - #keep only jobs that have not yet ended + # Keep only jobs that have not yet ended all_jobs[:] = [job for job in all_jobs if job['end_time'] >= timestep_start] all_jobs.sort(key=lambda j: j['submit_time']) @@ -306,7 +294,6 @@ class Engine: if replay and len(self.queue) != 0: raise ValueError(f"Something went wrong! Not all jobs could be placed!\nPotential confligt in queue:\n{self.queue}") - def run_simulation(self, jobs, timestep_start, timestep_end, autoshutdown=False): """Generator that yields after each simulation tick.""" self.timesteps = timestep_end - timestep_start # Where is this used? @@ -319,13 +306,10 @@ class Engine: # Place jobs that are currently running, onto the system. self.prepare_system_state(jobs, timestep_start, replay) - for timestep in range(timestep_start,timestep_end): completed_jobs, newly_downed_nodes = self.prepare_timestep(replay) # Identify eligible jobs and add them to the queue. - #self.queue += self.eligible_jobs(jobs, self.current_time) - #jobs = self.add_eligible_jobs_to_queue(jobs) self.add_eligible_jobs_to_queue(jobs) # Schedule jobs that are now in the queue. self.scheduler.schedule(self.queue, self.running, self.current_time, sorted=False) diff --git a/raps/job.py b/raps/job.py index c992598..7fa287e 100644 --- a/raps/job.py +++ b/raps/job.py @@ -67,8 +67,8 @@ class Job: self.end_time = None # Actual end time when executing or from telemetry self.wall_time = None # end_time - start_time self.trace_time = None # Time period for which traces are available - self.trace_start_time = None # Time period for which traces are available - self.trace_end_time = None # Time period for which traces are available + self.trace_start_time = None # Relative start time of the trace (to running time) + self.trace_end_time = None # Relative end time of the trace self.running_time = 0 # Current running time updated when simulating # If a job dict was given, override the values from the job_dict: -- GitLab From 1013492321f9169ed8386a85ef04d01d5d4ee9eb Mon Sep 17 00:00:00 2001 From: Matthias Maiterth Date: Wed, 26 Feb 2025 21:37:45 -0500 Subject: [PATCH 026/388] Removed Backfill Scheduling, and added backfill policy as BackfillType independent of scheduling policy. Added argument --backfill Augmented default scheduler Implemented Backfill, and refactored. The following are working: firstfit, EASY The following are not implemnted: bestfit, greedy and conservative, and are left for other schedulers to implement at the moment. --- args.py | 2 + raps/engine.py | 6 +- raps/policy.py | 10 +- raps/schedulers/default.py | 184 +++++++++++++++++++++---------------- 4 files changed, 121 insertions(+), 81 deletions(-) diff --git a/args.py b/args.py index ddc2a6e..4f0ab38 100644 --- a/args.py +++ b/args.py @@ -46,6 +46,8 @@ parser.add_argument('-w', '--workload', type=str, choices=choices, default=choic # Scheduling options choices = ['default', 'replay', 'nrel', 'anl', 'flux'] parser.add_argument('--scheduler', type=str, choices=choices, default=choices[0], help='Name of scheduler') +choices = [None, 'firstfit', 'bestfit', 'greedy', 'easy', 'conservative'] +parser.add_argument('--backfill', type=str, choices=choices, default=None, help='Backfill Policy') policies = [policy.value for policy in PolicyType] choices = ['prescribed', 'poisson'] parser.add_argument('--arrival', default=choices[0], type=str, choices=choices, help=f'Modify arrival distribution ({choices[1]}) or use the original submit times ({choices[0]})') diff --git a/raps/engine.py b/raps/engine.py index 63beea6..67485cb 100644 --- a/raps/engine.py +++ b/raps/engine.py @@ -59,12 +59,16 @@ class Engine: # Get scheduler type from command-line args or default scheduler_type = kwargs.get('scheduler', 'default') + policy_type = kwargs.get('policy', None) + backfill_type = kwargs.get('backfill', None) + self.scheduler = load_scheduler(scheduler_type)( config=self.config, policy=kwargs.get('policy'), + bfpolicy=kwargs.get('backfill'), resource_manager=self.resource_manager ) - print(f"Using scheduler: {scheduler_type}") + print(f"Using scheduler: {scheduler_type}, with policy {policy_type} and backfill {backfill_type}") def add_running_jobs_to_queue(self, jobs_to_submit: List): diff --git a/raps/policy.py b/raps/policy.py index 5a98838..45e9770 100644 --- a/raps/policy.py +++ b/raps/policy.py @@ -4,9 +4,17 @@ from enum import Enum class PolicyType(Enum): """Supported scheduling policies.""" FCFS = 'fcfs' - BACKFILL = 'backfill' PRIORITY = 'priority' FUGAKU_PTS = 'fugaku_pts' REPLAY = 'replay' SJF = 'sjf' LJF = 'ljf' + +class BackfillType(Enum): + """Supported backfilling policies.""" + NONE = None + FIRSTFIT = 'firstfit' + BESTFIT = 'bestfit' + GREEDY = 'greedy' + EASY = 'easy' # Earliest Available Start Time Yielding + CONSERVATIVE = 'conservative' diff --git a/raps/schedulers/default.py b/raps/schedulers/default.py index 0ba986c..d5dffc4 100644 --- a/raps/schedulers/default.py +++ b/raps/schedulers/default.py @@ -4,15 +4,16 @@ from ..utils import summarize_ranges from ..workload import MAX_PRIORITY -from ..policy import PolicyType +from ..policy import PolicyType, BackfillType class Scheduler: """ Default job scheduler with various scheduling policies. """ - def __init__(self, config, policy, resource_manager=None): + def __init__(self, config, policy, bfpolicy=None, resource_manager=None): self.config = config self.policy = PolicyType(policy) + self.bfpolicy = BackfillType(bfpolicy) if resource_manager is None: raise ValueError("Scheduler requires a ResourceManager instance") self.resource_manager = resource_manager @@ -20,7 +21,7 @@ class Scheduler: def sort_jobs(self, queue, accounts=None): """Sort jobs based on the selected scheduling policy.""" - if self.policy == PolicyType.FCFS or self.policy == PolicyType.BACKFILL: + if self.policy == PolicyType.FCFS: return sorted(queue, key=lambda job: job.submit_time) elif self.policy == PolicyType.PRIORITY: return sorted(queue, key=lambda job: job.priority, reverse=True) @@ -35,7 +36,7 @@ class Scheduler: else: raise ValueError(f"Policy not implemented: {self.policy}") - def schedule(self, queue, running, current_time, accounts=None, sorted=False, debug=False): + def schedule(self, queue, running, current_time, accounts=None, sorted=False): # Sort the queue in place. if not sorted: queue[:] = self.sort_jobs(queue, accounts) @@ -44,61 +45,28 @@ class Scheduler: for job in queue[:]: if self.policy == PolicyType.REPLAY: if job.start_time > current_time: - continue + continue # Replay: Job didn't start yet. Next! else: pass else: pass - # Make sure the requested nodes are available. - nodes_available = False - if job.requested_nodes: # nodes specified, i.e., telemetry replay - if len(job.requested_nodes) <= len(self.resource_manager.available_nodes): - if self.policy == PolicyType.REPLAY: # Check if exact set is available: - nodes_available = set(job.requested_nodes).issubset(set(self.resource_manager.available_nodes)) - else: - # Sufficiently large number of nodes available - # but no exact set is required! - nodes_available = True - # remove the request for specific nodes and ask for n nodes - job.nodes_required = len(job.requested_nodes) - job.requested_nodes = [] - else: - # Next we check if we continue or abort. - # This may be policy dependent. I break by default but this may not be correct. - if self.policy == PolicyType.FCFS or \ - self.policy == PolicyType.PRIORITY or\ - self.policy == PolicyType.FUGAKU_PTS or \ - self.policy == PolicyType.LJF or \ - False: # self.policy == PolicyType ?? - break # The job at the front of the queue doesnt fit, wait until it fits. - elif self.policy == PolicyType.REPLAY or \ - self.policy == PolicyType.BACKFILL or \ - self.policy == PolicyType.SJF or\ - False: - continue # The job at the front of the queue doesn't fit, but there are other jobs that may fit, look at the next one. - else: - raise NotImplementedError("Depending on the Policy this choice should be explicit. Add the implementation above!") - else: # synthetic jobs dont have nodes assigned: - nodes_available = len(self.resource_manager.available_nodes) >= job.nodes_required + + nodes_available = self.check_available_nodes(job) + if nodes_available: - self.resource_manager.assign_nodes_to_job(job, current_time) - running.append(job) - queue.remove(job) - if debug: - scheduled_nodes = summarize_ranges(job.scheduled_nodes) - print(f"t={current_time}: Scheduled job {job.id} with wall time {job.wall_time} on nodes {scheduled_nodes}") - else: - # not sure if this does what it should! - if self.policy == PolicyType.BACKFILL: - # Try to find a backfill candidate from the entire queue. - backfill_job = self.find_backfill_job(queue, len(self.resource_manager.available_nodes), current_time) - if backfill_job: - self.assign_nodes_to_job(backfill_job, self.resource_manager.available_nodes, current_time) - running.append(backfill_job) - queue.remove(backfill_job) - if debug: - scheduled_nodes = summarize_ranges(backfill_job.scheduled_nodes) - print(f"t={current_time}: Backfilling job {backfill_job.id} with wall time {backfill_job.wall_time} on nodes {scheduled_nodes}") + self.place_job_and_manage_queues(job, queue, running, current_time) + else: # In case the job was not placed, see how we should continue: + if self.bfpolicy is not None: + self.backfill(queue, running, current_time) + + # After backfill dedice continue processing the queue or wait, continuing may result in fairness issues. + if self.policy in [PolicyType.REPLAY]: + continue # Regardless if the job at the front of the queue doenst fit, try placing all of them. + elif self.policy in [PolicyType.FCFS, PolicyType.PRIORITY, + PolicyType.FUGAKU_PTS, PolicyType.LJF]: + break # The job at the front of the queue doesnt fit stop processing the queue. + else: + raise NotImplementedError("Depending on the Policy this choice should be explicit. Add the implementation above!") def prepare_system_state(self,jobs_to_submit:List, running, timestep_start): # def schedule(self, queue, running, current_time, accounts=None, sorted=False, debug=False): @@ -130,46 +98,104 @@ class Scheduler: else: return jobs_to_submit - def find_backfill_job(self, queue, num_free_nodes, current_time): + def place_job_and_manage_queues(self, job, queue,running, current_time): + self.resource_manager.assign_nodes_to_job(job, current_time) + running.append(job) + queue.remove(job) + if self.debug: + scheduled_nodes = summarize_ranges(job.scheduled_nodes) + print(f"t={current_time}: Scheduled job {job.id} with wall time {job.wall_time} on nodes {scheduled_nodes}") + + def check_available_nodes(self,job): + nodes_available = False + if job.requested_nodes: # nodes specified, i.e., telemetry replay + if len(job.requested_nodes) <= len(self.resource_manager.available_nodes): + if self.policy == PolicyType.REPLAY: # Check if exact set is available: + nodes_available = set(job.requested_nodes).issubset(set(self.resource_manager.available_nodes)) + else: + # Sufficiently large number of nodes available + # but no exact set is required! + nodes_available = True + # remove the request for specific nodes and ask for n nodes + job.nodes_required = len(job.requested_nodes) + job.requested_nodes = [] + else: + pass + else: # Exact nodes not specified (e.g. synthetic jobs dont have nodes assigned) + nodes_available = len(self.resource_manager.available_nodes) >= job.nodes_required + + return nodes_available + + def backfill(self,queue:List, running:List, current_time): + # Try to find a backfill candidate from the entire queue. + while queue: + backfill_job = self.find_backfill_job(queue, running, current_time) + if backfill_job: + self.place_job_and_manage_queues(backfill_job, queue, running, current_time) + else: + break + + def find_backfill_job(self, queue, running, current_time): """Finds a backfill job based on available nodes and estimated completion times. - Based on pseudocode from Leonenkov and Zhumatiy, 'Introducing new backfill-based + Loosely based on pseudocode from Leonenkov and Zhumatiy, 'Introducing new backfill-based scheduler for slurm resource manager.' Procedia computer science 66 (2015): 661-669. """ - if not queue: return None + # Identify when the nex job in the queue could run as a time limit: first_job = queue[0] + nodes_required = 0 + if first_job.requested_nodes: + nodes_required = len(first_job.requested_nodes) + else: + nodes_required = first_job.nodes_required - for job in queue: - job.end_time = current_time + job.wall_time # Estimate end time - - # Sort jobs according to their termination time (end_time) - sorted_queue = sorted(queue, key=lambda job: job.end_time) - - # Compute shadow time by accumulating nodes - sum_nodes = 0 - shadow_time = None - num_extra_nodes = 0 + sorted_running = sorted(running, key=lambda job: job.end_time) - for job in sorted_queue: - sum_nodes += job.nodes_required - if sum_nodes >= first_job.nodes_required: - shadow_time = current_time + job.wall_time - num_extra_nodes = sum_nodes - job.nodes_required + # Identify when we have enough nodes therefore the start time of the first_job in line + shadow_time_end = 0 + shadow_nodes_avail = len(self.resource_manager.available_nodes) + for job in sorted_running: + if shadow_nodes_avail >= nodes_required: break + else: + shadow_nodes_avail += job.nodes_required + shadow_time_end = job.end_time + + time_limit = shadow_time_end - current_time + # We now have the time_limit after which no backfilled job should end + # as the next job in line has the necessary resrouces after this time limit. + + # Find and return the first job that fits + if self.bfpolicy == BackfillType.EASY: + queue[:] = sorted(queue, key=lambda job: job.submit_time) + return self.return_first_fit(queue,time_limit) + elif self.bfpolicy == BackfillType.FIRSTFIT: + pass # Stay with the prioritization! + return self.return_first_fit(queue,time_limit) + elif self.bfpolicy in [BackfillType.BESTFIT, + BackfillType.GREEDY, + BackfillType.CONSERVATIVE, + ]: + raise NotImplementedError(f"{self.bfpolicy} not implemented! Please implement!") + else: + raise NotImplementedError(f"{self.bfpolicy} not implemented.") - # Find backfill job + def return_first_fit(self, queue, time_limit): for job in queue: - condition1 = job.nodes_required <= num_free_nodes and current_time + job.wall_time < shadow_time - condition2 = job.nodes_required <= min(num_free_nodes, num_extra_nodes) - - if condition1 or condition2: - return job - + if job.time_limit <= time_limit: + nodes_available = self.check_available_nodes(job) + if nodes_available: + return job + else: + continue + else: + continue return None + def sort_fugaku_redeeming(self, queue, accounts=None): if queue == []: return queue -- GitLab From 3147d0d38b15ac73ba842d4db53ce0bc8c6e80f9 Mon Sep 17 00:00:00 2001 From: Matthias Maiterth Date: Thu, 27 Feb 2025 09:55:40 -0500 Subject: [PATCH 027/388] Modified 'pue' identifier for plotting and grabbing the correct column --- main.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/main.py b/main.py index b2ca73a..8b8f7c2 100644 --- a/main.py +++ b/main.py @@ -203,7 +203,7 @@ if args.plot: if 'pue' in args.plot: if cooling_model: - ylabel = 'PUE_Out[1]' + ylabel = 'pue' title = 'FMU ' + ylabel + 'History' pl = Plotter('Time (s)', ylabel, title, OPATH / f'pue.{args.imtype}', \ uncertainties=args.uncertainties) -- GitLab From dd60f8f19ca5dae40f3cc9c871d4f5f11edcb17e Mon Sep 17 00:00:00 2001 From: Matthias Maiterth Date: Thu, 27 Feb 2025 13:07:22 -0500 Subject: [PATCH 028/388] Updated BackfillType.None was not working as expected. Fixed when no Backfill is choosen. --- raps/schedulers/default.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/raps/schedulers/default.py b/raps/schedulers/default.py index d5dffc4..f29d67f 100644 --- a/raps/schedulers/default.py +++ b/raps/schedulers/default.py @@ -169,7 +169,9 @@ class Scheduler: # as the next job in line has the necessary resrouces after this time limit. # Find and return the first job that fits - if self.bfpolicy == BackfillType.EASY: + if self.bfpolicy == BackfillType.NONE: + pass + elif self.bfpolicy == BackfillType.EASY: queue[:] = sorted(queue, key=lambda job: job.submit_time) return self.return_first_fit(queue,time_limit) elif self.bfpolicy == BackfillType.FIRSTFIT: -- GitLab From 70c7832b9dee1f6315dd18771c1368b55feab4c8 Mon Sep 17 00:00:00 2001 From: Matthias Maiterth Date: Thu, 27 Feb 2025 18:58:57 -0500 Subject: [PATCH 029/388] Added a job flag for missing telemetry. And updated marconi data loader, not yet fully tested. --- raps/dataloaders/frontier.py | 3 +- raps/dataloaders/marconi100.py | 116 +++++++++++++++++++++++++-------- raps/engine.py | 4 +- raps/job.py | 6 +- 4 files changed, 98 insertions(+), 31 deletions(-) diff --git a/raps/dataloaders/frontier.py b/raps/dataloaders/frontier.py index 510fe4c..13fc4e6 100644 --- a/raps/dataloaders/frontier.py +++ b/raps/dataloaders/frontier.py @@ -260,7 +260,8 @@ def load_data_from_df(jobs_df: pd.DataFrame, jobprofile_df: pd.DataFrame, **kwar else: # Prescribed replay scheduled_nodes = [] - priority = 0 # not used for replay + # priority = 0 # not used for replay + priority = aging_boost(nodes_required) for xname in xnames: indices = xname_to_index(xname, config) scheduled_nodes.append(indices) diff --git a/raps/dataloaders/marconi100.py b/raps/dataloaders/marconi100.py index 65de113..7115433 100644 --- a/raps/dataloaders/marconi100.py +++ b/raps/dataloaders/marconi100.py @@ -23,6 +23,7 @@ """ import uuid import random +import numpy as np import pandas as pd from tqdm import tqdm @@ -60,11 +61,13 @@ def load_data_from_df(jobs_df: pd.DataFrame, **kwargs): config = kwargs.get('config') min_time = kwargs.get('min_time', None) arrival = kwargs.get('arrival') - fastforward = kwargs.get('fastforward') validate = kwargs.get('validate') jid = kwargs.get('jid', '*') + debug = kwargs.get('debug') - if fastforward: print(f"fast-forwarding {fastforward} seconds") + #fastforward = kwargs.get('fastforward') + #if fastforward: + # print(f"fast-forwarding {fastforward} seconds") # Sort jobs dataframe based on values in time_start column, adjust indices after sorting jobs_df = jobs_df.sort_values(by='start_time') @@ -77,16 +80,36 @@ def load_data_from_df(jobs_df: pd.DataFrame, **kwargs): else: time_zero = jobs_df['start_time'].min() + # Dataset has one value from start to finish. + # Therefore we set telemetry start and end equal to job start and end. + first_start_timestamp = jobs_df['start_time'].min() + telemetry_start_timestamp = first_start_timestamp + + last_end_timestamp = jobs_df['end_time'].max() + telemetry_end_timestamp = last_end_timestamp + + telemetry_start = 0 + diff = telemetry_end_timestamp - telemetry_start_timestamp + telemetry_end = int(diff.total_seconds()) + num_jobs = len(jobs_df) - print("time_zero:", time_zero, "num_jobs", num_jobs) + + if debug: + print("num_jobs:", num_jobs) + print("telemetry_start:", telemetry_start, "simulation_fin", telemetry_end) + print("telemetry_start_timestamp:", telemetry_start_timestamp, "telemetry_end_timestamp", telemetry_end_timestamp) + print("first_start_timestamp:",first_start_timestamp, "last start timestamp:", jobs_df['time_start'].max()) jobs = [] # Map dataframe to job state. Add results to jobs list for jidx in tqdm(range(num_jobs - 1), total=num_jobs, desc="Processing Jobs"): - account = jobs_df.loc[jidx, 'user_id'] # or 'group_id' + account = jobs_df.loc[jidx, 'user_id'] # or 'user_id' ? job_id = jobs_df.loc[jidx, 'job_id'] + # allocation_id = + nodes_required = jobs_df.loc[jidx, 'num_nodes_alloc'] + end_state = jobs_df.loc[jidx, 'job_state'] if not jid == '*': if int(jid) == int(job_id): @@ -95,10 +118,10 @@ def load_data_from_df(jobs_df: pd.DataFrame, **kwargs): continue nodes_required = jobs_df.loc[jidx, 'num_nodes_alloc'] - name = str(uuid.uuid4())[:6] + name = str(uuid.uuid4())[:6] # This generates a random 6 char identifier.... if validate: - cpu_power = jobs_df.loc[jidx, 'node_power_consumption']/jobs_df.loc[jidx, 'num_nodes_alloc'] + cpu_power = jobs_df.loc[jidx, 'node_power_consumption'] / jobs_df.loc[jidx, 'num_nodes_alloc'] cpu_trace = cpu_power gpu_trace = cpu_trace @@ -129,25 +152,57 @@ def load_data_from_df(jobs_df: pd.DataFrame, **kwargs): gpu_trace = gpu_util * config['GPUS_PER_NODE'] priority = int(jobs_df.loc[jidx, 'priority']) + partition = int(jobs_df.loc[jidx, 'partition']) + + submit_timestamp = jobs_df.loc[jidx, 'submit_time'] + diff = submit_timestamp - telemetry_start_timestamp + submit_time = int(diff.total_seconds()) + + time_limit = jobs_df.loc[jidx, 'time_limit'] + + start_timestamp = jobs_df.loc[jidx, 'start_time'] + diff = start_timestamp - telemetry_start_timestamp + start_time = int(diff.total_seconds()) + + end_timestamp = jobs_df.loc[jidx, 'end_time'] + diff = end_timestamp - telemetry_start_timestamp + end_time = int(diff.total_seconds()) + + wall_time = int(jobs_df.loc[jidx, 'run_time']) + if np.isnan(wall_time): + wall_time = 0 + if wall_time != (end_time - start_time): + print("wall_time != (end_time - start_time)") + print(f"{wall_time} != {(end_time - start_time)}") + + trace_time = gpu_trace.size * config['TRACE_QUANTA'] # seconds + trace_start_time = 0 + trace_end_time = trace_time + if wall_time > trace_time: + missing_trace_time = wall_time - trace_time + if start_time < 0: + trace_start_time = missing_trace_time + trace_end_time = wall_time + elif end_time > telemetry_end: + trace_start_time = 0 + trace_end_time = trace_time + else: + # Telemetry mission at the end + trace_start_time = 0 + trace_end_time = trace_time + trace_missing_values = True - wall_time = gpu_trace.size * config['TRACE_QUANTA'] # seconds - end_state = jobs_df.loc[jidx, 'job_state'] - - time_start = jobs_df.loc[jidx, 'start_time'] - time_start = time_start - time_zero - - time_submit = jobs_df.loc[jidx, 'submit_time'] - time_submit = time_submit - time_zero + # What does this do? + #if jid == '*': + # # submit_time = max(submit_time.total_seconds(), 0) + # submit_timestamp = jobs_df.loc[jidx, 'submit_time'] + # diff = submit_timestamp - telemetry_start_timestamp + # submit_time = diff.total_seconds() - if jid == '*': - time_submit = max(time_submit.total_seconds(), 0) - else: - # When extracting out a single job, run one iteration past the end of the job - time_submit = config['UI_UPDATE_FREQ'] - if fastforward: - time_start -= fastforward - time_submit -= fastforward + #else: + # # When extracting out a single job, run one iteration past the end of the job + # submit_time = config['UI_UPDATE_FREQ'] if arrival == 'poisson': # Modify the arrival times according to Poisson distribution scheduled_nodes = None @@ -156,12 +211,21 @@ def load_data_from_df(jobs_df: pd.DataFrame, **kwargs): else: # Prescribed replay scheduled_nodes = (jobs_df.loc[jidx, 'nodes']).tolist() - if gpu_trace.size > 0 and time_submit >= 0: - job_info = job_dict(nodes_required, name, account, cpu_trace, gpu_trace, [], [], wall_time, - end_state, scheduled_nodes, time_submit, job_id, priority, time_start) + if gpu_trace.size > 0 and (jid == job_id or jid == '*'): # and time_submit >= 0: + + job_info = job_dict(nodes_required, name, account, cpu_trace, gpu_trace, [], [], + end_state, scheduled_nodes, + job_id, priority, partition, + submit_time=submit_time, time_limit=time_limit, + start_time=start_time, end_time=end_time, + wall_time=wall_time, trace_time=trace_time, + trace_start_time=trace_start_time, + trace_end_time=trace_end_time, + trace_missing_values=trace_missing_values) + jobs.append(job_info) - return jobs + return jobs, telemetry_start, telemetry_end def node_index_to_name(index: int, config: dict): diff --git a/raps/engine.py b/raps/engine.py index 67485cb..84e947f 100644 --- a/raps/engine.py +++ b/raps/engine.py @@ -167,13 +167,13 @@ class Engine: {job.running_time} > {job.wall_time}\n\ {len(job.cpu_trace)} vs. {job.running_time // self.config['TRACE_QUANTA']}\ ") - if job.running_time < job.trace_start_time or job.running_time > job.trace_end_time: + if job.running_time < job.trace_start_time or job.running_time >= job.trace_end_time: cpu_util = 0 # No values available therefore we assume IDLE == 0 gpu_util = 0 net_util = 0 if self.debug: print("No Values in trace, using IDLE.") - if self.scheduler.policy == PolicyType.REPLAY: + if self.scheduler.policy == PolicyType.REPLAY and not job.trace_missing_values: print(f"{job.running_time} < {job.trace_start_time} or {job.running_time} > {job.trace_end_time}") raise Exception("Replay is using IDLE values! Something is wrong!") else: diff --git a/raps/job.py b/raps/job.py index 7fa287e..6f141ad 100644 --- a/raps/job.py +++ b/raps/job.py @@ -4,7 +4,7 @@ def job_dict(nodes_required, name, account, \ cpu_trace, gpu_trace, ntx_trace, nrx_trace, \ end_state, scheduled_nodes, job_id, priority=0, partition=0, submit_time=0, time_limit=0, start_time=0, end_time=0, - wall_time=0, trace_time=0, trace_start_time=0,trace_end_time=0): + wall_time=0, trace_time=0, trace_start_time=0,trace_end_time=0, trace_missing_values=False): """ Return job info dictionary """ return { 'nodes_required': nodes_required, @@ -27,7 +27,9 @@ def job_dict(nodes_required, name, account, \ 'wall_time': wall_time, 'trace_time': trace_time, 'trace_start_time': trace_start_time, - 'trace_end_time': trace_end_time + 'trace_end_time': trace_end_time, + 'trace_missing_values': trace_missing_values + } -- GitLab From 8f78ef03b2fc508b841b49c544e05aea5fd372d0 Mon Sep 17 00:00:00 2001 From: Matthias Maiterth Date: Thu, 27 Feb 2025 19:17:03 -0500 Subject: [PATCH 030/388] Fixed -t to be a duration and not a time-point. (e.g. -ff 2h -t 1h means fast forward for 2 hours and simulate 1, not fast forward for 2 hours and end after hour 1, i.e. negative runtime.) --- main.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/main.py b/main.py index 8b8f7c2..265dd78 100644 --- a/main.py +++ b/main.py @@ -115,7 +115,7 @@ if args.replay: # Set number of timesteps based on the last job running which we assume # is the maximum value of submit_time + wall_time of all the jobs if args.time: - timestep_end = convert_to_seconds(args.time) + timestep_end = timestep_start + convert_to_seconds(args.time) elif not timestep_end: timestep_end = int(max(job['wall_time'] + job['start_time'] for job in jobs)) + 1 -- GitLab From 2b2d4a7f290424451e63277c87b71709cf8d291e Mon Sep 17 00:00:00 2001 From: Matthias Maiterth Date: Thu, 27 Feb 2025 20:24:32 -0500 Subject: [PATCH 031/388] Updated the prepare_system_state function to consider the simulation end. This improves simulation time if large telemetry files are loaded, but only a shorter duration is simulated. E.g. By using -t 1d only the data that ended before the start of the day and was submitted before the end of the day is considered. --- raps/engine.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/raps/engine.py b/raps/engine.py index 84e947f..2ca957d 100644 --- a/raps/engine.py +++ b/raps/engine.py @@ -281,12 +281,12 @@ class Engine: self.current_time += 1 return tick_data - def prepare_system_state(self, all_jobs:List, timestep_start, replay:bool): + def prepare_system_state(self, all_jobs:List, timestep_start, timestep_end, replay:bool): # Modifies Jobs object self.current_time = timestep_start - # Keep only jobs that have not yet ended - all_jobs[:] = [job for job in all_jobs if job['end_time'] >= timestep_start] + # Keep only jobs that have not yet ended and that have a chance to start + all_jobs[:] = [job for job in all_jobs if job['end_time'] >= timestep_start and job['submit_time'] < timestep_end] all_jobs.sort(key=lambda j: j['submit_time']) @@ -308,7 +308,7 @@ class Engine: replay = False # Place jobs that are currently running, onto the system. - self.prepare_system_state(jobs, timestep_start, replay) + self.prepare_system_state(jobs, timestep_start, timestep_end, replay) for timestep in range(timestep_start,timestep_end): completed_jobs, newly_downed_nodes = self.prepare_timestep(replay) -- GitLab From 6b7a842f3d5fa0e2c638e77aba0ecd2801c071ab Mon Sep 17 00:00:00 2001 From: Matthias Maiterth Date: Fri, 28 Feb 2025 18:21:35 -0500 Subject: [PATCH 032/388] Batching for run_simulation loop in 6h windows for better performance --- raps/engine.py | 23 ++++++++++++++++++----- 1 file changed, 18 insertions(+), 5 deletions(-) diff --git a/raps/engine.py b/raps/engine.py index 2ca957d..a8b8836 100644 --- a/raps/engine.py +++ b/raps/engine.py @@ -92,7 +92,6 @@ class Engine: eligible_jobs_list.append(job_instance) self.queue += eligible_jobs_list - def add_eligible_jobs_to_queue(self, jobs_to_submit: List): """ Mofifies jobs_to_submit @@ -310,16 +309,30 @@ class Engine: # Place jobs that are currently running, onto the system. self.prepare_system_state(jobs, timestep_start, timestep_end, replay) + # Process jobs in batches for better performance of timestep loop + all_jobs = jobs.copy() + jobs = [] + for timestep in range(timestep_start,timestep_end): + + # Batch Jobs into 6h windows based on submit_time + batch_window = 60 * 60 * 6 # 6h + if (timestep % batch_window == 0) or (timestep == timestep_start): + # Add jobs that are within the batching window and remove them from all jobs + jobs += [job for job in all_jobs if job['submit_time'] <= timestep + batch_window] + all_jobs[:] = [job for job in all_jobs if job['submit_time'] > timestep + batch_window] + + # Start Siulation loop: + # 1. Cleanup old jobs completed_jobs, newly_downed_nodes = self.prepare_timestep(replay) - # Identify eligible jobs and add them to the queue. + # 2. Identify eligible jobs and add them to the queue. self.add_eligible_jobs_to_queue(jobs) - # Schedule jobs that are now in the queue. + # 3. Schedule jobs that are now in the queue. self.scheduler.schedule(self.queue, self.running, self.current_time, sorted=False) - # Stop the simulation if no more jobs are running or in the queue. - if autoshutdown and not self.queue and not self.running and not self.replay: + # Stop the simulation if no more jobs are running or in the queue or in the job list. + if autoshutdown and not self.queue and not self.running and not self.replay and not all_jobs and not jobs: print(f"[DEBUG] {self.config['system_name']} - Stopping simulation at time {self.current_time}") break -- GitLab From 8e5f21365e2e6b341538cd591d4500801fde5472 Mon Sep 17 00:00:00 2001 From: Matthias Maiterth Date: Sat, 1 Mar 2025 18:53:19 -0500 Subject: [PATCH 033/388] Updated rich layouts to use Live and added a progress bar to see how far the simulation is. --- main.py | 5 ++-- raps/ui.py | 71 ++++++++++++++++++++++++++++++++++++------------------ 2 files changed, 50 insertions(+), 26 deletions(-) diff --git a/main.py b/main.py index 265dd78..674d68e 100644 --- a/main.py +++ b/main.py @@ -69,7 +69,6 @@ sc = Engine( cooling_model=cooling_model, **args_dict, ) -layout_manager = LayoutManager(args.layout, engine=sc, debug=args.debug, **config) timestep_start = 0 if args.fastforward: @@ -159,7 +158,9 @@ if args.plot or args.output: if args.verbose: print(jobs) -print(f'Simulating {len(jobs)} jobs for {timestep_end - timestep_start} seconds') +total_timesteps = timestep_end - timestep_start +print(f'Simulating {len(jobs)} jobs for {total_timesteps} seconds') +layout_manager = LayoutManager(args.layout, engine=sc, debug=args.debug, total_timesteps=total_timesteps, **config) layout_manager.run(jobs, timestep_start=timestep_start, timestep_end=timestep_end) engine_stats = get_engine_stats(sc) diff --git a/raps/ui.py b/raps/ui.py index d7af1b3..73fbdf7 100644 --- a/raps/ui.py +++ b/raps/ui.py @@ -4,13 +4,16 @@ from rich.console import Console from rich.layout import Layout from rich.panel import Panel from rich.table import Table +from rich.live import Live +from rich.progress import Progress,TextColumn,BarColumn,TaskProgressColumn,TimeRemainingColumn, track, TimeElapsedColumn, MofNCompleteColumn + from .utils import summarize_ranges, convert_seconds from .constants import ELLIPSES from .engine import TickData, Engine class LayoutManager: - def __init__(self, layout_type, engine: Engine, debug, **config): + def __init__(self, layout_type, engine: Engine, total_timesteps=0, debug=None, **config): self.engine = engine self.config = config self.console = Console() @@ -22,20 +25,32 @@ class LayoutManager: self.racks_per_cdu = self.config['RACKS_PER_CDU'] self.power_column = self.power_df_header[self.racks_per_cdu + 1] self.loss_column = self.power_df_header[-1] + self.progress = Progress( + TextColumn("Progress: [progress.percentage]{task.percentage:>3.0f}%"), + BarColumn(bar_width=None), + TextColumn("•"), + MofNCompleteColumn(), + TextColumn("•"), + TimeElapsedColumn(), + TextColumn("•"), + TimeRemainingColumn() + ) + self.progress_task = self.progress.add_task("Progress",total=total_timesteps, name="Progress") def setup_layout(self, layout_type): + self.layout.split_column(Layout(name="main"),Layout(name="progress",size=1)) if layout_type == "layout2": - self.layout.split_row(Layout(name="left", ratio=3), Layout(name="right", ratio=2)) - self.layout["left"].split_column( + self.layout["main"].split_row(Layout(name="left", ratio=3), Layout(name="right", ratio=2)) + self.layout["main"]["left"].split_column( Layout(name="pressflow", ratio=6), Layout(name="powertemp", ratio=11), Layout(name="totpower", ratio=3), ) - self.layout["right"].split(Layout(name="scheduled", ratio=17), Layout(name="status", ratio=3)) + self.layout["main"]["right"].split(Layout(name="scheduled", ratio=17), Layout(name="status", ratio=3)) else: - self.layout.split_row(Layout(name="left", ratio=1), Layout(name="right", ratio=1)) - self.layout["left"].split_column(Layout(name="upper", ratio=8), Layout(name="lower", ratio=2)) - self.layout["right"].split_column(Layout(name="scheduled", ratio=8), Layout(name="status", ratio=2)) + self.layout["main"].split_row(Layout(name="left", ratio=1), Layout(name="right", ratio=1)) + self.layout["main"]["left"].split_column(Layout(name="upper", ratio=8), Layout(name="lower", ratio=2)) + self.layout["main"]["right"].split_column(Layout(name="scheduled", ratio=8), Layout(name="status", ratio=2)) def create_table(self, title, columns, header_style="bold green"): """ @@ -373,25 +388,34 @@ class LayoutManager: self.layout["lower"].update(Panel(Align(total_table, align="center"), title="Power and Performance")) + def update_progress(self, timestamp): + self.progress.update(self.progress_task, description=f"{timestamp}",advance=timestamp,transient=True) + self.layout["progress"].update(self.progress.get_renderable()) + def update(self, data: TickData): uncertainties = self.engine.power_manager.uncertainties - if self.engine.cooling_model: - self.update_powertemp_array( - data.power_df, data.fmu_outputs, data.p_flops, data.g_flops_w, data.system_util, - uncertainties=uncertainties, + if data.current_time % self.config['UI_UPDATE_FREQ'] == 0: + if self.engine.cooling_model: + self.update_powertemp_array( + data.power_df, data.fmu_outputs, data.p_flops, data.g_flops_w, data.system_util, + uncertainties=uncertainties, + ) + self.update_pressflow_array(data.fmu_outputs) + + self.update_scheduled_jobs(data.running + data.queue) + self.update_status( + data.current_time, len(data.running), len(data.queue), data.num_active_nodes, + data.num_free_nodes, data.down_nodes, + ) + self.update_power_array( + data.power_df, data.p_flops, data.g_flops_w, + data.system_util, uncertainties=uncertainties, ) - self.update_pressflow_array(data.fmu_outputs) + if False: + self.render() + self.update_progress(1) - self.update_scheduled_jobs(data.running + data.queue) - self.update_status( - data.current_time, len(data.running), len(data.queue), data.num_active_nodes, - data.num_free_nodes, data.down_nodes, - ) - self.update_power_array( - data.power_df, data.p_flops, data.g_flops_w, - data.system_util, uncertainties=uncertainties, - ) def render(self): if not self.debug: @@ -400,10 +424,9 @@ class LayoutManager: def run(self, jobs, timestep_start, timestep_end): """ Runs the UI, blocking until the simulation is complete """ - for data in self.engine.run_simulation(jobs, timestep_start, timestep_end): - if data.current_time % self.config['UI_UPDATE_FREQ'] == 0: + with Live(self.layout, refresh_per_second=5): + for data in self.engine.run_simulation(jobs, timestep_start, timestep_end): self.update(data) - self.render() def run_stepwise(self, jobs, timestep_start, timestep_end): """ Prepares the UI and returns a generator for the simulation """ -- GitLab From cd97807a0b98127989b1ebedc03548e9ca283af5 Mon Sep 17 00:00:00 2001 From: Matthias Maiterth Date: Mon, 3 Mar 2025 15:07:19 -0500 Subject: [PATCH 034/388] Upadted fugaku data loader for new engine loop. Updated engine loop to work with single utilization values again, instead of expecting a list. --- raps/dataloaders/fugaku.py | 83 ++++++++++++++++++++++++++++++-------- raps/engine.py | 9 +++-- 2 files changed, 71 insertions(+), 21 deletions(-) diff --git a/raps/dataloaders/fugaku.py b/raps/dataloaders/fugaku.py index 601119f..2632863 100644 --- a/raps/dataloaders/fugaku.py +++ b/raps/dataloaders/fugaku.py @@ -47,6 +47,8 @@ def load_data_from_df(df, **kwargs): Returns: list: List of job dictionaries. + int: Telemetry Start (in seconds 0) + int: Telemetry End (in seconds) """ encrypt_bool = kwargs.get('encrypt') fastforward = kwargs.get('fastforward') @@ -54,44 +56,84 @@ def load_data_from_df(df, **kwargs): validate = kwargs.get('validate') jid = kwargs.get('jid', '*') config = kwargs.get('config') - min_time = kwargs.get('min_time', None) if fastforward: print(f"fast-forwarding {fastforward} seconds") job_list = [] + # Convert all times to datetime and find the min and max thereof for reference use. # Convert 'adt' (submit time) to datetime and find the earliest submission time df['adt'] = pd.to_datetime(df['adt'], errors='coerce') - if not min_time: - min_time = df['adt'].min() + df['sdt'] = pd.to_datetime(df['sdt'], errors='coerce') + df['edt'] = pd.to_datetime(df['edt'], errors='coerce') + + # We only have average power therefore we set the earliest telemetry to the earliest start time + first_start_timestamp = df['sdt'].min() + last_end_timestamp = df['edt'].max() + telemetry_start_timestamp = first_start_timestamp + telemetry_start = 0 + telemetry_end_timestamp = last_end_timestamp + diff = telemetry_end_timestamp - telemetry_start_timestamp + telemetry_end = int(diff.total_seconds()) # Loop through the DataFrame rows to extract job information for _, row in tqdm(df.iterrows(), total=len(df), desc="Processing Jobs"): nodes_required = row['nnumr'] if 'nnumr' in df.columns else 0 - account = row['usr'] name = row['jnam'] if 'jnam' in df.columns else 'unknown' + account = row['usr'] if validate: cpu_trace = row['avgpcon'] gpu_trace = cpu_trace else: - cpu_trace = row['perf1'] if 'perf1' in df.columns else 0 # Assuming some performance metric as cpu_trace + # cpu_trace = row['perf1'] if 'perf1' in df.columns else 0 # Assuming some performance metric as cpu_trace + cpu_trace = row['perf1'] / (row['perf1'] + row['perf6']) if 'perf1' in df.columns else 0 # Total Opts / Total Ops + Idle Ops gpu_trace = 0 # Set to 0 as GPU trace is not explicitly provided - wall_time = row['duration'] if 'duration' in df.columns else 0 + # No network trace + end_state = row['exit state'] if 'exit state' in df.columns else 'unknown' - #scheduled_nodes = row['nnuma'] if 'nnuma' in df.columns else 0 - scheduled_nodes = None - submit_time = row['adt'] if 'adt' in df.columns else min_time - if arrival == 'poisson': # Modify the arrival times of according to Poisson distribution - time_offset = next_arrival(1/config['JOB_ARRIVAL_TIME']) - else: - time_offset = (submit_time - min_time).total_seconds() # Compute time offset in seconds + scheduled_nodes = None # Only nodes_required is in the trace job_id = row['jid'] if 'jid' in df.columns else 'unknown' priority = row['pri'] if 'pri' in df.columns else 0 + submit_timestamp = pd.to_datetime(row['adt']) if 'adt' in df.columns else -1 # Else job was submitted in the past + diff = submit_timestamp - telemetry_start_timestamp + submit_time = int(diff.total_seconds()) + + time_limit = int(row['elpl']) if 'elpl' in df.columns else 24 * 60 * 60 # in seconds + + start_timestamp = pd.to_datetime(row['sdt']) if 'sdt' in df.columns else 0 + diff = start_timestamp - telemetry_start_timestamp + start_time = int(diff.total_seconds()) + + end_timestamp = pd.to_datetime(row['edt']) if 'edt' in df.columns else 0 + diff = end_timestamp - telemetry_start_timestamp + end_time = int(diff.total_seconds()) + + wall_time = end_time - start_time + #duration = int(row['duration']) if 'duration' in df.columns else 0 # in seconds Recorded duration and wall_time do not match! + #if (wall_time != duration): + # if abs(wall_time - duration) <= 1: # offset is often 1 + # wall_time = min(wall_time,duration) + # else: + # raise ValueError(f"Duration: {row}") # Offset can be as large as 15 minutes! Removed. + + # We only have a single average value, set trace times as if we had all. + trace_time = wall_time + trace_start_time = start_time + trace_end_time = end_time + trace_missing_values = False # Sane Choice? + + # Should we still have this? + # if arrival == 'poisson': # Modify the arrival times of according to Poisson distribution + # time_offset = next_arrival(1/config['JOB_ARRIVAL_TIME']) + # else: + # time_offset = (submit_time - min_time).total_seconds() # Compute time offset in seconds + # Removed from job_dict: time_offset=time_offset, + # Create job dictionary job_info = job_dict( nodes_required=nodes_required, @@ -101,17 +143,24 @@ def load_data_from_df(df, **kwargs): gpu_trace=gpu_trace, ntx_trace=[], nrx_trace=[], - wall_time=wall_time, end_state=end_state, scheduled_nodes=scheduled_nodes, - time_offset=time_offset, job_id=job_id, - priority=priority + priority=priority, + submit_time=submit_time, + time_limit=time_limit, + start_time=start_time, + end_time=end_time, + wall_time=wall_time, + trace_time=trace_time, + trace_start_time=trace_start_time, + trace_end_time=trace_end_time, + trace_missing_values=trace_missing_values ) job_list.append(job_info) - return job_list + return job_list, telemetry_start, telemetry_end def node_index_to_name(index: int, config: dict): diff --git a/raps/engine.py b/raps/engine.py index a8b8836..cf4e6fa 100644 --- a/raps/engine.py +++ b/raps/engine.py @@ -165,8 +165,9 @@ class Engine: raise Exception(f"Job should have ended already!\n\ {job.running_time} > {job.wall_time}\n\ {len(job.cpu_trace)} vs. {job.running_time // self.config['TRACE_QUANTA']}\ - ") - if job.running_time < job.trace_start_time or job.running_time >= job.trace_end_time: + ") + # job.running_time < job.trace_start_time or + if job.running_time >= job.trace_end_time: cpu_util = 0 # No values available therefore we assume IDLE == 0 gpu_util = 0 net_util = 0 @@ -177,7 +178,7 @@ class Engine: raise Exception("Replay is using IDLE values! Something is wrong!") else: time_quanta_index = int((job.running_time - job.trace_start_time) // self.config['TRACE_QUANTA']) - if time_quanta_index == len(job.cpu_trace): + if isinstance(job.cpu_trace, List) and time_quanta_index == len(job.cpu_trace): # If the running time is past the last time step in the # trace, use the last value in the trace. This can # happen if the last valid timesteps is e.g. 17%15, @@ -191,7 +192,7 @@ class Engine: gpu_util = get_utilization(job.gpu_trace, time_quanta_index) net_util = 0 - if len(job.ntx_trace) and len(job.nrx_trace): + if isinstance(job.ntx_trace,List) and len(job.ntx_trace) and isinstance(job.nrx_trace,List) and len(job.nrx_trace): net_tx = get_utilization(job.ntx_trace, time_quanta_index) net_rx = get_utilization(job.nrx_trace, time_quanta_index) net_util = network_utilization(net_tx, net_rx) -- GitLab From a35d1f2801e2b6f1085a93d1985f390f3cf1c65f Mon Sep 17 00:00:00 2001 From: Matthias Maiterth Date: Mon, 3 Mar 2025 17:09:36 -0500 Subject: [PATCH 035/388] Updated adastra data loader for the new engine/scheduler loop. --- raps/dataloaders/adastraMI250.py | 125 ++++++++++++++++++------------- raps/dataloaders/frontier.py | 2 +- raps/dataloaders/fugaku.py | 8 +- raps/dataloaders/marconi100.py | 12 +-- raps/telemetry.py | 4 +- 5 files changed, 83 insertions(+), 68 deletions(-) diff --git a/raps/dataloaders/adastraMI250.py b/raps/dataloaders/adastraMI250.py index 1dcc95f..894717e 100644 --- a/raps/dataloaders/adastraMI250.py +++ b/raps/dataloaders/adastraMI250.py @@ -5,16 +5,16 @@ # to simulate the dataset - python main.py -f /path/to/AdastaJobsMI250_15days.parquet --system adastra + python main.py -f /path/to/AdastaJobsMI250_15days.parquet --system adastraMI250 - # to replay with different arrival distribution - python main.py -f /path/to/AdastaJobsMI250_15days.parquet --system adastra --arrival poisson + # to replay with different scheduling policy + python main.py -f /path/to/AdastaJobsMI250_15days.parquet --system adastraMI250 --policy priority --backfill easy # to fast-forward 60 days and replay for 1 day - python main.py -f /path/to/AdastaJobsMI250_15days.parquet --system adastra -ff 60d -t 1d + python main.py -f /path/to/AdastaJobsMI250_15days.parquet --system adastraMI250 -ff 60d -t 1d # to analyze dataset - python -m raps.telemetry -f /path/to/AdastaJobsMI250_15days.parquet --system adastra -v + python -m raps.telemetry -f /path/to/AdastaJobsMI250_15days.parquet --system adastraMI250 -v """ import uuid @@ -52,74 +52,74 @@ def load_data_from_df(jobs_df: pd.DataFrame, **kwargs): ------- list The list of parsed jobs. + telemetry_start + telemetry_end """ count_jobs_notOK = 0 config = kwargs.get('config') - min_time = kwargs.get('min_time', None) arrival = kwargs.get('arrival') - fastforward = kwargs.get('fastforward') validate = kwargs.get('validate') jid = kwargs.get('jid', '*') - if fastforward: print(f"fast-forwarding {fastforward} seconds") - # Sort jobs dataframe based on values in time_start column, adjust indices after sorting jobs_df = jobs_df.sort_values(by='start_time') jobs_df = jobs_df.reset_index(drop=True) - # Take earliest time as baseline reference - # We can use the start time of the first job. - if min_time: - time_zero = min_time - else: - time_zero = jobs_df['start_time'].min() + # We only have average power, therefore use the first start time as the start time for the telemetry + telemetry_start_timestamp = jobs_df['start_time'].min() + telemetry_end_timestamp = jobs_df['end_time'].max() + + telemetry_start_time = 0 + diff = telemetry_end_timestamp - telemetry_start_timestamp + telemetry_end_time = int(diff.total_seconds()) num_jobs = len(jobs_df) - print("time_zero:", time_zero, "num_jobs", num_jobs) + print("First start time:", telemetry_start_timestamp, "num_jobs", num_jobs) jobs = [] # Map dataframe to job state. Add results to jobs list for jidx in tqdm(range(num_jobs - 1), total=num_jobs, desc="Processing Jobs"): - account = jobs_df.loc[jidx, 'user_id'] # or 'group_id' job_id = jobs_df.loc[jidx, 'job_id'] - if not jid == '*': if int(jid) == int(job_id): print(f'Extracting {job_id} profile') else: continue - nodes_required = jobs_df.loc[jidx, 'num_nodes_alloc'] + nodes_required = jobs_df.loc[jidx, 'num_nodes_alloc'] name = str(uuid.uuid4())[:6] - wall_time = jobs_df.loc[jidx, 'run_time'] + account = jobs_df.loc[jidx, 'user_id'] + + wall_time = int(jobs_df.loc[jidx, 'run_time']) if wall_time <= 0: print("error wall_time",wall_time) continue if nodes_required <= 0: print("error nodes_required",nodes_required) continue - #wall_time = gpu_trace.size * TRACE_QUANTA # seconds if validate: node_power = jobs_df.loc[jidx, 'node_power_consumption'] node_power_array = node_power.tolist() - node_watts = sum(node_power_array) / (wall_time*nodes_required) + node_watts = sum(node_power_array) / (wall_time * nodes_required) cpu_trace = node_watts gpu_trace = 0.0 # should contain stddev_node_power when --validate flag is used else: cpu_power = jobs_df.loc[jidx, 'cpu_power_consumption'] cpu_power_array = cpu_power.tolist() - cpu_watts = sum(cpu_power_array) / (wall_time*nodes_required) + cpu_watts = sum(cpu_power_array) / (wall_time * nodes_required) cpu_min_power = config['POWER_CPU_IDLE'] * config['CPUS_PER_NODE'] cpu_max_power = config['POWER_CPU_MAX'] * config['CPUS_PER_NODE'] + cpu_util = (cpu_watts / float(config['POWER_CPU_IDLE']) - config['CPUS_PER_NODE']) \ + / ((float(config['POWER_CPU_MAX']) / float(config['POWER_CPU_IDLE'])) - 1.0) + # power_to_utilization(cpu_power_array, cpu_min_power, cpu_max_power) + # print("cpu_watts",cpu_watts,"cpu_util",cpu_util) - cpu_util = (cpu_watts/float(config['POWER_CPU_IDLE']) - config['CPUS_PER_NODE']) / ((float(config['POWER_CPU_MAX']) / float(config['POWER_CPU_IDLE'])) -1.0) #power_to_utilization(cpu_power_array, cpu_min_power, cpu_max_power) - # print("cpu_watts",cpu_watts,"cpu_util",cpu_util) cpu_trace = np.maximum(0, cpu_util) node_power = (jobs_df.loc[jidx, 'node_power_consumption']).tolist() @@ -131,31 +131,19 @@ def load_data_from_df(jobs_df: pd.DataFrame, **kwargs): cpu_power = cpu_power[:min_length] mem_power = mem_power[:min_length] - gpu_power = (node_power - cpu_power - mem_power - - ([config['NICS_PER_NODE'] * config['POWER_NIC']])) + gpu_power = (node_power - cpu_power - mem_power \ + - ([config['NICS_PER_NODE'] * config['POWER_NIC']])) gpu_power_array = gpu_power.tolist() - gpu_watts = sum(gpu_power_array) / (wall_time*nodes_required) - gpu_min_power = config['POWER_GPU_IDLE'] * config['GPUS_PER_NODE'] - gpu_max_power = config['POWER_GPU_MAX'] * config['GPUS_PER_NODE'] - gpu_util = (gpu_watts/float(config['POWER_GPU_IDLE']) - config['GPUS_PER_NODE']) / ((float(config['POWER_GPU_MAX']) / float(config['POWER_GPU_IDLE'])) -1.0) #power_to_utilization(cpu_power_array, cpu_min_power, cpu_max_power) - # print("gpu_watts",gpu_watts,"gpu_util",gpu_util) - gpu_trace = np.maximum(0, gpu_util) #gpu_util * GPUS_PER_NODE - - priority = int(jobs_df.loc[jidx, 'priority']) + gpu_watts = sum(gpu_power_array) / (wall_time * nodes_required) + gpu_min_power = config['POWER_GPU_IDLE'] * config['GPUS_PER_NODE'] + gpu_max_power = config['POWER_GPU_MAX'] * config['GPUS_PER_NODE'] + gpu_util = (gpu_watts / float(config['POWER_GPU_IDLE']) - config['GPUS_PER_NODE']) \ + / ((float(config['POWER_GPU_MAX']) / float(config['POWER_GPU_IDLE'])) - 1.0) + # power_to_utilization(gpu_power_array, gpu_min_power, gpu_max_power) + # print("gpu_watts",gpu_watts,"gpu_util",gpu_util) + gpu_trace = np.maximum(0, gpu_util) end_state = jobs_df.loc[jidx, 'job_state'] - time_start = jobs_df.loc[jidx, 'start_time'] - time_end = jobs_df.loc[jidx, 'end_time'] - diff = time_start - time_zero - - if jid == '*': - time_offset = max(diff.total_seconds(), 0) - else: - # When extracting out a single job, run one iteration past the end of the job - time_offset = config['UI_UPDATE_FREQ'] - - if fastforward: - time_offset -= fastforward if arrival == 'poisson': # Modify the arrival times of the jobs according to Poisson distribution scheduled_nodes = None @@ -163,15 +151,50 @@ def load_data_from_df(jobs_df: pd.DataFrame, **kwargs): else: # Prescribed replay scheduled_nodes = (jobs_df.loc[jidx, 'nodes']).tolist() - if time_offset >= 0 and wall_time > 0: - job_info = job_dict(nodes_required, name, account, cpu_trace, gpu_trace, [],[],wall_time, - end_state, scheduled_nodes, time_offset, job_id, priority) + priority = int(jobs_df.loc[jidx, 'priority']) + + submit_timestamp = jobs_df.loc[jidx, 'submit_time'] + diff = submit_timestamp - telemetry_start_timestamp + submit_time = int(diff.total_seconds()) + + time_limit = jobs_df.loc[jidx, 'time_limit'] # in seconds + + start_timestamp = jobs_df.loc[jidx, 'start_time'] + diff = start_timestamp - telemetry_start_timestamp + start_time = int(diff.total_seconds()) + + end_timestamp = jobs_df.loc[jidx,'end_time'] + diff = end_timestamp - telemetry_start_timestamp + end_time = int(diff.total_seconds()) + + if wall_time != end_time - start_time: + print("wall_time != end_time - start_time") + print(f"{wall_time} != {end_time - start_time}") + print(jobs_df[jidx]) + + trace_time = wall_time + trace_start_time = end_time + trace_end_time = start_time + + if wall_time > 0: + job_info = job_dict(nodes_required, name, account, cpu_trace, gpu_trace, [],[], + end_state, scheduled_nodes, job_id, priority, + submit_time=submit_time, + time_limit=time_limit, + start_time=start_time, + end_time=end_time, + wall_time=wall_time, + trace_time=trace_time, + trace_start_time=trace_start_time, + trace_end_time=trace_end_time, + trace_missing_values=True + ) jobs.append(job_info) else: count_jobs_notOK += 1 print("jobs not added: ", count_jobs_notOK) - return jobs + return jobs, telemetry_start_time, telemetry_end_time def xname_to_index(xname: str, config: dict): """ diff --git a/raps/dataloaders/frontier.py b/raps/dataloaders/frontier.py index 13fc4e6..44c8838 100644 --- a/raps/dataloaders/frontier.py +++ b/raps/dataloaders/frontier.py @@ -174,7 +174,7 @@ def load_data_from_df(jobs_df: pd.DataFrame, jobprofile_df: pd.DataFrame, **kwar # Map dataframe to job state. Add results to jobs list for jidx in tqdm(range(num_jobs - 1), total=num_jobs, desc="Processing Jobs"): - user = jobs_df.loc[jidx, 'user'] + # user = jobs_df.loc[jidx, 'user'] account = jobs_df.loc[jidx, 'account'] job_id = jobs_df.loc[jidx, 'job_id'] allocation_id = jobs_df.loc[jidx, 'allocation_id'] diff --git a/raps/dataloaders/fugaku.py b/raps/dataloaders/fugaku.py index 2632863..880ed0e 100644 --- a/raps/dataloaders/fugaku.py +++ b/raps/dataloaders/fugaku.py @@ -10,8 +10,9 @@ The '--arrival poisson' will compute submit times from Poisson distribution, instead of using the submit times given in F-Data. - python main.py --system fugaku -f /path/to/21_04.parquet --arrival poisson --validate - + python main.py --system fugaku -f /path/to/21_04.parquet + python main.py --system fugaku -f /path/to/21_04.parquet --validate + python main.py --system fugaku -f /path/to/21_04.parquet --policy priority --backfill easy """ import pandas as pd from tqdm import tqdm @@ -51,14 +52,11 @@ def load_data_from_df(df, **kwargs): int: Telemetry End (in seconds) """ encrypt_bool = kwargs.get('encrypt') - fastforward = kwargs.get('fastforward') arrival = kwargs.get('arrival') validate = kwargs.get('validate') jid = kwargs.get('jid', '*') config = kwargs.get('config') - if fastforward: print(f"fast-forwarding {fastforward} seconds") - job_list = [] # Convert all times to datetime and find the min and max thereof for reference use. diff --git a/raps/dataloaders/marconi100.py b/raps/dataloaders/marconi100.py index 7115433..d067d57 100644 --- a/raps/dataloaders/marconi100.py +++ b/raps/dataloaders/marconi100.py @@ -11,8 +11,9 @@ # to simulate the dataset python main.py -f /path/to/job_table.parquet --system marconi100 - # to replay using modified arrival times - python main.py -f /path/to/job_table.parquet --system marconi100 --arrival poisson + # to replay using differnt schedulers + python main.py -f /path/to/job_table.parquet --system marconi100 --policy fcfs --backfill easy + python main.py -f /path/to/job_table.parquet --system marconi100 --policy priority --backfill firstfit # to fast-forward 60 days and replay for 1 day python main.py -f /path/to/job_table.parquet --system marconi100 -ff 60d -t 1d @@ -73,13 +74,6 @@ def load_data_from_df(jobs_df: pd.DataFrame, **kwargs): jobs_df = jobs_df.sort_values(by='start_time') jobs_df = jobs_df.reset_index(drop=True) - # Take earliest time as baseline reference - # We can use the start time of the first job. - if min_time: - time_zero = min_time - else: - time_zero = jobs_df['start_time'].min() - # Dataset has one value from start to finish. # Therefore we set telemetry start and end equal to job start and end. first_start_timestamp = jobs_df['start_time'].min() diff --git a/raps/telemetry.py b/raps/telemetry.py index ce78b5b..2d8e330 100644 --- a/raps/telemetry.py +++ b/raps/telemetry.py @@ -84,13 +84,13 @@ if __name__ == "__main__": if args.replay[0].endswith(".npz"): print(f"Loading {args.replay[0]}...") - jobs = td.load_snapshot(args.replay[0]) + jobs,_,_ = td.load_snapshot(args.replay[0]) if args.arrival == "poisson": for job in tqdm(jobs, desc="Updating requested_nodes"): job['requested_nodes'] = None job['submit_time'] = next_arrival(1 / config['JOB_ARRIVAL_TIME']) else: - jobs = td.load_data(args.replay) + jobs,_,_ = td.load_data(args.replay) timesteps = int(max(job['wall_time'] + job['submit_time'] for job in jobs)) -- GitLab From 3338f6ebe1f9a72e5246819b240158cf64fbd1d6 Mon Sep 17 00:00:00 2001 From: Matthias Maiterth Date: Mon, 3 Mar 2025 18:52:39 -0500 Subject: [PATCH 036/388] Upate lassen dataloader for new engine/scheduler loop Next: fix npz files. --- config/lassen/system.json | 3 +- raps/dataloaders/lassen.py | 138 ++++++++++++++++++++----------------- 2 files changed, 78 insertions(+), 63 deletions(-) diff --git a/config/lassen/system.json b/config/lassen/system.json index 77d860f..bf739d4 100644 --- a/config/lassen/system.json +++ b/config/lassen/system.json @@ -6,13 +6,14 @@ "CHASSIS_PER_RACK": 1, "NODES_PER_BLADE": 1, "SWITCHES_PER_CHASSIS": 5, - "NICS_PER_NODE": 2, + "NICS_PER_NODE": 2, "RECTIFIERS_PER_CHASSIS": 5, "NODES_PER_RECTIFIER": 4, "MISSING_RACKS": [44], "DOWN_NODES": [], "CPUS_PER_NODE": 2, "CORES_PER_CPU": 22, + "CPU_FREQUENCY": 2400000000, "GPUS_PER_NODE": 4, "CPU_PEAK_FLOPS": 396.8E9, "GPU_PEAK_FLOPS": 7.8E12, diff --git a/raps/dataloaders/lassen.py b/raps/dataloaders/lassen.py index 8e2c37f..08ec59c 100644 --- a/raps/dataloaders/lassen.py +++ b/raps/dataloaders/lassen.py @@ -26,24 +26,21 @@ Usage Instructions: python main.py -f /path/to/LAST/Lassen-Supercomputer-Job-Dataset --system lassen -ff 37d -t 1d """ import math -import numpy as np import os +import uuid +import numpy as np import pandas as pd from tqdm import tqdm -try: - from ..job import job_dict - from ..utils import power_to_utilization, next_arrival - -except: - pass +from ..job import job_dict +from ..utils import power_to_utilization, next_arrival def load_data(path, **kwargs): """ Loads data from the given file paths and returns job info. """ - nrows = 1E4 + nrows = 1E5 alloc_df = pd.read_csv(os.path.join(path[0], 'final_csm_allocation_history_hashed.csv'), nrows=nrows) node_df = pd.read_csv(os.path.join(path[0], 'final_csm_allocation_node_history.csv'), nrows=nrows) step_df = pd.read_csv(os.path.join(path[0], 'final_csm_step_history.csv'), nrows=nrows) @@ -56,25 +53,30 @@ def load_data_from_df(allocation_df, node_df, step_df, **kwargs): """ config = kwargs.get('config') jid = kwargs.get('jid', '*') + validate = kwargs.get('validate') arrival = kwargs.get('arrival') - fastforward = kwargs.get('fastforward') verbose = kwargs.get('verbose') - min_time = kwargs.get('min_time', None) - - if fastforward: - print(f"fast-forwarding {fastforward} seconds") allocation_df['job_submit_time'] = pd.to_datetime(allocation_df['job_submit_time'], format='mixed', errors='coerce') allocation_df['begin_time'] = pd.to_datetime(allocation_df['begin_time'], format='mixed', errors='coerce') allocation_df['end_time'] = pd.to_datetime(allocation_df['end_time'], format='mixed', errors='coerce') - if not min_time: - min_time = pd.to_datetime(allocation_df['begin_time']).min() + telemetry_start_timestamp = allocation_df['begin_time'].min() + telemetry_start_time = 0 + telemetry_end_timestamp = allocation_df['end_time'].max() + diff = telemetry_end_timestamp - telemetry_start_timestamp + telemetry_end_time = int(math.ceil(diff.total_seconds())) job_list = [] for _, row in tqdm(allocation_df.iterrows(), total=len(allocation_df), desc="Processing Jobs"): + + account = row['hashed_user_id'] job_id = row['primary_job_id'] + allocation_id = row['allocation_id'] + nodes_required = row['num_nodes'] + end_state = row['exit_status'] + name = str(uuid.uuid4())[:6] # This generates a random 6 char identifier.... if not jid == '*': if int(jid) == int(job_id): @@ -84,55 +86,57 @@ def load_data_from_df(allocation_df, node_df, step_df, **kwargs): node_data = node_df[node_df['allocation_id'] == row['allocation_id']] - nodes_required = row['num_nodes'] - wall_time = compute_wall_time(row['begin_time'], row['end_time']) samples = math.ceil(wall_time / config['TRACE_QUANTA']) - # Compute GPU power - gpu_energy = node_data['gpu_energy'].sum() # Joules - # divide by nodes_required to get average gpu_usage per node - gpu_usage = node_data['gpu_usage'].sum() / 1E6 / nodes_required # seconds - gpu_power = gpu_energy / gpu_usage if gpu_usage > 0 else 0 - #gpu_power = gpu_energy / wall_time - gpu_power_array = np.array([gpu_power] * samples) - - gpu_min_power = nodes_required * config['POWER_GPU_IDLE'] - gpu_max_power = nodes_required * config['POWER_GPU_MAX'] - gpu_util = power_to_utilization(gpu_power_array, gpu_min_power, gpu_max_power) - # GPU power can be 0: - # Utilization is defined in the range of [0 to GPUS_PER_NODE]. - # gpu_util will be negative if power reports 0, which is smaller than POWER_GPU_IDLE - # Therefore: gpu_util should be set to zero if it is smaller than 0. - gpu_trace = np.maximum(0, gpu_util) - - # Compute CPU power from CPU usage time - # CPU usage is reported per core, while we need it in the range [0 to CPUS_PER_NODE] - cpu_usage = node_data['cpu_usage'].sum() / 1E9 / nodes_required / config['CORES_PER_CPU'] # seconds - cpu_usage_array = np.array([cpu_usage] * samples) - cpu_util = cpu_usage_array / wall_time - cpu_trace = cpu_util # * CPUS_PER_NODE - # TODO use total energy for validation - # Only Node Energy and GPU Energy is reported! - # total_energy = node_data['energy'].sum() # Joules + if validate: + # Validate should represent the node power and not split it according to cpu and gpu. + # Not sure if this is correct. + cpu_power = (node_data['energy'].sum() / nodes_required) / wall_time + cpu_trace = cpu_power + gpu_trace = 0 # = cpu_trace # Is this correct? + else: + # Compute GPU power + gpu_power = (node_data['gpu_energy'].sum() / nodes_required) / wall_time + gpu_min_power = config['POWER_GPU_IDLE'] + gpu_max_power = config['POWER_GPU_MAX'] + gpu_util = power_to_utilization(gpu_power,gpu_min_power,gpu_max_power) + gpu_trace = gpu_util + + # Compute CPU power from CPU usage time + # CPU usage is reported per core, while we need it in the range [0 to CPUS_PER_NODE] + cpu_util = node_data['cpu_usage'].sum() / nodes_required / wall_time / config['CPU_FREQUENCY'] / config['CORES_PER_CPU'] + cpu_trace = cpu_util + # TODO use total energy for validation + # Only Node Energy and GPU Energy is reported! + # total_energy = node_data['energy'].sum() # Joules # Network utilization - since values are given in octets / quarter of a byte, multiply by 4 to get bytes - ib_tx = 4 * node_data['ib_tx'].values[0] if node_data['ib_tx'].values.size > 0 else [] - ib_rx = 4 * node_data['ib_rx'].values[0] if node_data['ib_rx'].values.size > 0 else [] + ib_tx = 4 * node_data['ib_tx'].sum() if node_data['ib_tx'].values.size > 0 else [] + ib_rx = 4 * node_data['ib_rx'].sum() if node_data['ib_rx'].values.size > 0 else [] net_tx, net_rx = generate_network_sequences(ib_tx, ib_rx, samples, lambda_poisson=0.3) + # no priorities defined! + priority = row.get('priority', 0) + partition = row.get('partition', "0") + if arrival == 'poisson': # Modify the submit times according to Poisson process scheduled_nodes = None - time_submit = next_arrival(1/config['JOB_ARRIVAL_TIME']) - time_start = None # Scheduler will determine start time + submit_time = next_arrival(1 / config['JOB_ARRIVAL_TIME']) + start_time = None # Scheduler will determine start time + end_time = None # Scheduler will determine end time else: # Prescribed replay scheduled_nodes = get_scheduled_nodes(row['allocation_id'], node_df) - time_submit = compute_time_offset(row['job_submit_time'], min_time) - time_start = compute_time_offset(row['begin_time'], min_time) - if fastforward: - time_submit -= fastforward - time_start -= fastforward + submit_time = compute_time_offset(row['job_submit_time'], telemetry_start_timestamp) + start_time = compute_time_offset(row['begin_time'], telemetry_start_timestamp) + end_time = compute_time_offset(row['end_time'], telemetry_start_timestamp) + time_limit = row['time_limit'] + + trace_time = wall_time + trace_start_time = start_time + trace_end_time = end_time + trace_missing_values = False if verbose: print('ib_tx, ib_rx, samples:', ib_tx, ib_rx, samples) @@ -140,22 +144,32 @@ def load_data_from_df(allocation_df, node_df, step_df, **kwargs): print('rx:', net_rx) print('scheduled_nodes:', nodes_required, scheduled_nodes) - if time_submit >= 0: - + if wall_time >= 0: job_info = job_dict(nodes_required, - row['hashed_user_id'], - row['hashed_user_group_id'], - cpu_trace, gpu_trace, net_tx, net_rx, wall_time, - row['exit_status'], + name, + account, + cpu_trace, + gpu_trace, + net_tx, + net_rx, + end_state, scheduled_nodes, - time_submit, job_id, - row.get('priority', 0), - time_start) + priority, + partition, + submit_time=submit_time, + time_limit=time_limit, + start_time=start_time, + end_time=end_time, + wall_time=wall_time, + trace_time=trace_time, + trace_start_time=trace_start_time, + trace_end_time=trace_end_time, + trace_missing_values=trace_missing_values) job_list.append(job_info) - return job_list + return job_list, telemetry_start_time, telemetry_end_time def get_scheduled_nodes(allocation_id, node_df): -- GitLab From 98b2ea2275776acf92e0ba30bb26881046bd8b7d Mon Sep 17 00:00:00 2001 From: Matthias Maiterth Date: Tue, 4 Mar 2025 13:56:48 -0500 Subject: [PATCH 037/388] Updated lassen dataloader to cut the region of interest to the simulation time. --- raps/dataloaders/lassen.py | 62 ++++++++++++++++++++++++++------------ 1 file changed, 42 insertions(+), 20 deletions(-) diff --git a/raps/dataloaders/lassen.py b/raps/dataloaders/lassen.py index 08ec59c..995284f 100644 --- a/raps/dataloaders/lassen.py +++ b/raps/dataloaders/lassen.py @@ -22,8 +22,8 @@ Usage Instructions: # to modify the submit times of the telemetry according to Poisson distribution python main.py -f /path/to/LAST/Lassen-Supercomputer-Job-Dataset --system lassen --arrival poisson - # to fast-forward 37 days and replay for 1 day - python main.py -f /path/to/LAST/Lassen-Supercomputer-Job-Dataset --system lassen -ff 37d -t 1d + # to fast-forward 365 days and replay for 1 day. This region day has 2250 jobs with 1650 jobs executed. + python main.py -f /path/to/LAST/Lassen-Supercomputer-Job-Dataset --system lassen -ff 365d -t 1d """ import math import os @@ -31,19 +31,20 @@ import uuid import numpy as np import pandas as pd from tqdm import tqdm +from datetime import timedelta from ..job import job_dict -from ..utils import power_to_utilization, next_arrival +from ..utils import power_to_utilization, next_arrival, convert_to_seconds def load_data(path, **kwargs): """ Loads data from the given file paths and returns job info. """ - nrows = 1E5 - alloc_df = pd.read_csv(os.path.join(path[0], 'final_csm_allocation_history_hashed.csv'), nrows=nrows) - node_df = pd.read_csv(os.path.join(path[0], 'final_csm_allocation_node_history.csv'), nrows=nrows) - step_df = pd.read_csv(os.path.join(path[0], 'final_csm_step_history.csv'), nrows=nrows) + nrows = None + alloc_df = pd.read_csv(os.path.join(path[0], 'final_csm_allocation_history_hashed.csv'), nrows=nrows, low_memory=False) + node_df = pd.read_csv(os.path.join(path[0], 'final_csm_allocation_node_history.csv'), nrows=nrows, low_memory=False) + step_df = pd.read_csv(os.path.join(path[0], 'final_csm_step_history.csv'), nrows=nrows, low_memory=False) return load_data_from_df(alloc_df, node_df, step_df, **kwargs) @@ -56,23 +57,44 @@ def load_data_from_df(allocation_df, node_df, step_df, **kwargs): validate = kwargs.get('validate') arrival = kwargs.get('arrival') verbose = kwargs.get('verbose') - - allocation_df['job_submit_time'] = pd.to_datetime(allocation_df['job_submit_time'], format='mixed', errors='coerce') - allocation_df['begin_time'] = pd.to_datetime(allocation_df['begin_time'], format='mixed', errors='coerce') - allocation_df['end_time'] = pd.to_datetime(allocation_df['end_time'], format='mixed', errors='coerce') - - telemetry_start_timestamp = allocation_df['begin_time'].min() + fastforward = kwargs.get('fastforward') # int in seconds + + allocation_df['job_submit_timestamp'] = pd.to_datetime(allocation_df['job_submit_time'], format='mixed', errors='coerce') + allocation_df['begin_timestamp'] = pd.to_datetime(allocation_df['begin_time'], format='mixed', errors='coerce') + allocation_df['end_timestamp'] = pd.to_datetime(allocation_df['end_time'], format='mixed', errors='coerce') + + # Too large dataset! Cut by fastforward and time to simulate! + if fastforward is None: # This is in seconds / int? + fastforward = 0 + fastforward_timedelta = timedelta(seconds=fastforward) # timedelta + else: + fastforward_timedelta = timedelta(seconds=fastforward) # timedelta + time_to_simulate = kwargs.get('time') # int in seconds + if time_to_simulate is None: # This is a string! + time_to_simulate = 31536000 # a year + time_to_simulate_timedelta = timedelta(seconds=time_to_simulate) # timedelta + else: + time_to_simulate_timedelta = timedelta(seconds=convert_to_seconds(time_to_simulate)) # timedelta + + telemetry_start_timestamp = allocation_df['begin_timestamp'].min() telemetry_start_time = 0 - telemetry_end_timestamp = allocation_df['end_time'].max() + telemetry_end_timestamp = allocation_df['end_timestamp'].max() diff = telemetry_end_timestamp - telemetry_start_timestamp telemetry_end_time = int(math.ceil(diff.total_seconds())) + simulation_start_timestamp = telemetry_start_timestamp + fastforward_timedelta + simulation_end_timestamp = simulation_start_timestamp + time_to_simulate_timedelta + + # As these are >1.4M jobs, filtered to the simulated timestamps before creating the job structs. + allocation_df = allocation_df[allocation_df['end_timestamp'] >= simulation_start_timestamp] # Job should not have ended before the simulation time + allocation_df = allocation_df[allocation_df['job_submit_timestamp'] < simulation_end_timestamp] # Job has to have been submited before or during the simulaion time + job_list = [] for _, row in tqdm(allocation_df.iterrows(), total=len(allocation_df), desc="Processing Jobs"): account = row['hashed_user_id'] - job_id = row['primary_job_id'] + job_id = int(row['primary_job_id']) allocation_id = row['allocation_id'] nodes_required = row['num_nodes'] end_state = row['exit_status'] @@ -86,7 +108,7 @@ def load_data_from_df(allocation_df, node_df, step_df, **kwargs): node_data = node_df[node_df['allocation_id'] == row['allocation_id']] - wall_time = compute_wall_time(row['begin_time'], row['end_time']) + wall_time = compute_wall_time(row['begin_timestamp'], row['end_timestamp']) samples = math.ceil(wall_time / config['TRACE_QUANTA']) if validate: @@ -128,9 +150,9 @@ def load_data_from_df(allocation_df, node_df, step_df, **kwargs): end_time = None # Scheduler will determine end time else: # Prescribed replay scheduled_nodes = get_scheduled_nodes(row['allocation_id'], node_df) - submit_time = compute_time_offset(row['job_submit_time'], telemetry_start_timestamp) - start_time = compute_time_offset(row['begin_time'], telemetry_start_timestamp) - end_time = compute_time_offset(row['end_time'], telemetry_start_timestamp) + submit_time = compute_time_offset(row['job_submit_timestamp'], telemetry_start_timestamp) + start_time = compute_time_offset(row['begin_timestamp'], telemetry_start_timestamp) + end_time = compute_time_offset(row['end_timestamp'], telemetry_start_timestamp) time_limit = row['time_limit'] trace_time = wall_time @@ -208,7 +230,7 @@ def adjust_bursts(burst_intervals, total, intervals): if adjustment != 0: for i in range(len(bursts)): if bursts[i] > 0: - bursts[i] += adjustment + bursts[i] += adjustment % (2^64-1) break # Apply adjustment only once where it won't cause a negative return bursts -- GitLab From 05c26b371f0a18f2951c0e2af1d477ccd4b0abd1 Mon Sep 17 00:00:00 2001 From: Matthias Maiterth Date: Tue, 4 Mar 2025 14:58:29 -0500 Subject: [PATCH 038/388] Updated .npz loading. Broken Multi-Part-sim.py removed from tests/smoke.py --- main.py | 15 ++++++++++++--- tests/smoke.py | 2 +- 2 files changed, 13 insertions(+), 4 deletions(-) diff --git a/main.py b/main.py index 674d68e..6dc789a 100644 --- a/main.py +++ b/main.py @@ -92,7 +92,16 @@ if args.replay: # Read telemetry data (either npz file or via custom data loader) if args.replay[0].endswith(".npz"): # Replay .npz file print(f"Loading {args.replay[0]}...") - jobs = td.load_snapshot(args.replay[0]) + jobs, timestep_start_from_file, timestep_end_from_file, args_from_file = td.load_snapshot(args.replay[0]) + print("Intended to run with:" +\ + f"\n--system {args_from_file.system} " +\ + f"-ff {args_from_file.fastforward} " +\ + f"-t {args_from_file.time}\n" +\ + f"All Args:\n{args_from_file}" + ) + if args.time is None: + print("Set --time (necessary) and possibly --fasforward, to run .npz replay successfully!") + exit() if args.scale: for job in tqdm(jobs, desc=f"Scaling jobs to {args.scale} nodes"): @@ -108,8 +117,8 @@ if args.replay: else: # custom data loader print(*args.replay) jobs, timestep_start_from_data, timestep_end = td.load_data(args.replay) - td.save_snapshot(jobs, filename=DIR_NAME) - timestep_start += timestep_start_from_data # + timestep_start_from_data + timestep_start += timestep_start_from_data + td.save_snapshot((jobs, timestep_start, timestep_end, args), filename=DIR_NAME) # Set number of timesteps based on the last job running which we assume # is the maximum value of submit_time + wall_time of all the jobs diff --git a/tests/smoke.py b/tests/smoke.py index 9174b3c..ac9f15e 100644 --- a/tests/smoke.py +++ b/tests/smoke.py @@ -64,7 +64,7 @@ def main(): # If no arguments are given, run all tests if not args.tests: synthetic_workload_tests() - hetero_tests() + #hetero_tests() execute_system_tests(SYSTEMS.keys()) else: # Validate each test name -- GitLab From 012af1fa41ba64ac4c4008d2f4b0e92e6f21d187 Mon Sep 17 00:00:00 2001 From: Wes Brewer Date: Tue, 4 Mar 2025 16:53:54 -0500 Subject: [PATCH 039/388] Add future note about simplification --- raps/job.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/raps/job.py b/raps/job.py index 6f141ad..d781a41 100644 --- a/raps/job.py +++ b/raps/job.py @@ -1,5 +1,15 @@ from enum import Enum +""" +Note: want to simplify this in the future to use a minimal required set of job attributes, +the standard workload format (swf) https://www.cs.huji.ac.il/labs/parallel/workload/swf.html + +Implementing such using something like: + + from types import SimpleNamespace + job = SimpleNamespace(**job_dict(...)) +""" + def job_dict(nodes_required, name, account, \ cpu_trace, gpu_trace, ntx_trace, nrx_trace, \ end_state, scheduled_nodes, job_id, priority=0, partition=0, -- GitLab From 4c84f9bd4c0f162189046fd4212318fa13ba84df Mon Sep 17 00:00:00 2001 From: Matthias Maiterth Date: Tue, 4 Mar 2025 18:31:41 -0500 Subject: [PATCH 040/388] Updated Multi-part-sim and updated minor nuances. - Set default replay of npz to --fastfoward 0 if argumet is not given - fixed multipart sim to use fastforward and time to get the correct timestep_start timestep_end as needed in run_stepwise - Updated default policy to replay in default schedulers - Added System name indicator in System Panel (e.g. for replay with other systems.) - Re-enabled the heterogeneous system test in the smoke tests. --- args.py | 13 ------------- main.py | 10 +++++----- multi-part-sim.py | 9 ++++++++- raps/engine.py | 8 +++++--- raps/schedulers/default.py | 2 ++ raps/ui.py | 2 +- tests/smoke.py | 2 +- 7 files changed, 22 insertions(+), 24 deletions(-) diff --git a/args.py b/args.py index 4f0ab38..074de3d 100644 --- a/args.py +++ b/args.py @@ -58,16 +58,3 @@ parser.add_argument('--accounts-json', type=str, help='Json of account stats gen args = parser.parse_args() args_dict = vars(args) print(args_dict) - -# Determine the default policy based on --replay -policy_specified = args.policy is not None # was policy set explicitly - -if not policy_specified: - if args.replay: # if --replay is provided, default to "replay" - args.policy = "replay" - print(f"No policy specified, using default for replay: {args.policy}") - else: # otherwise, default to "fcfs" - args.policy = policies[0] - print(f"No policy specified, using default: {args.policy}") - -print("Final policy:", args.policy) diff --git a/main.py b/main.py index 6dc789a..da7bfb0 100644 --- a/main.py +++ b/main.py @@ -93,15 +93,15 @@ if args.replay: if args.replay[0].endswith(".npz"): # Replay .npz file print(f"Loading {args.replay[0]}...") jobs, timestep_start_from_file, timestep_end_from_file, args_from_file = td.load_snapshot(args.replay[0]) - print("Intended to run with:" +\ + if args_from_file.fastforward is None: + args_from_file.fastforward = 0 + print("File was generated with:" +\ f"\n--system {args_from_file.system} " +\ f"-ff {args_from_file.fastforward} " +\ f"-t {args_from_file.time}\n" +\ f"All Args:\n{args_from_file}" ) - if args.time is None: - print("Set --time (necessary) and possibly --fasforward, to run .npz replay successfully!") - exit() + timestep_end = timestep_end_from_file if args.scale: for job in tqdm(jobs, desc=f"Scaling jobs to {args.scale} nodes"): @@ -153,7 +153,7 @@ if args.accounts: job_accounts = Accounts(jobs) if args.accounts_json: loaded_accounts = Accounts.from_json_filename(args.accounts_json) - accounts = Accounts.merge(loaded_accounts,job_accounts) + accounts = Accounts.merge(loaded_accounts, job_accounts) else: accounts = job_accounts sc.accounts = accounts diff --git a/multi-part-sim.py b/multi-part-sim.py index 37a032a..b9d3856 100644 --- a/multi-part-sim.py +++ b/multi-part-sim.py @@ -79,13 +79,20 @@ for i, config in enumerate(configs): layout_managers[config['system_name']] = LayoutManager(args.layout, engine=sc, debug=args.debug, **config) # Set simulation timesteps +if args.fastforward: + fastfoward = convert_to_seconds(args.fastforward) +else: + fastforward = 0 if args.time: timesteps = convert_to_seconds(args.time) else: timesteps = 88200 # Default to 24 hours +timestep_start = fastforward +timestep_end = timestep_start + timesteps + # Create generators for each layout manager -generators = {name: lm.run_stepwise(jobs_by_partition[name], timesteps=timesteps) +generators = {name: lm.run_stepwise(jobs_by_partition[name], timestep_start=timestep_start, timestep_end=timestep_end) for name, lm in layout_managers.items()} # Step through all generators in lockstep diff --git a/raps/engine.py b/raps/engine.py index cf4e6fa..89bab2b 100644 --- a/raps/engine.py +++ b/raps/engine.py @@ -59,8 +59,8 @@ class Engine: # Get scheduler type from command-line args or default scheduler_type = kwargs.get('scheduler', 'default') - policy_type = kwargs.get('policy', None) - backfill_type = kwargs.get('backfill', None) + policy_type = kwargs.get('policy') + backfill_type = kwargs.get('backfill') self.scheduler = load_scheduler(scheduler_type)( config=self.config, @@ -68,7 +68,9 @@ class Engine: bfpolicy=kwargs.get('backfill'), resource_manager=self.resource_manager ) - print(f"Using scheduler: {scheduler_type}, with policy {policy_type} and backfill {backfill_type}") + print(f"Using scheduler: {str(self.scheduler.__class__).split('.')[2]}"\ + f", with policy {self.scheduler.policy.value} "\ + f"and backfill {self.scheduler.bfpolicy.value}") def add_running_jobs_to_queue(self, jobs_to_submit: List): diff --git a/raps/schedulers/default.py b/raps/schedulers/default.py index f29d67f..67883e5 100644 --- a/raps/schedulers/default.py +++ b/raps/schedulers/default.py @@ -12,6 +12,8 @@ class Scheduler: def __init__(self, config, policy, bfpolicy=None, resource_manager=None): self.config = config + if policy is None: # policy is passed as policy=None, therefore default is not choosen + policy = "replay" self.policy = PolicyType(policy) self.bfpolicy = BackfillType(bfpolicy) if resource_manager is None: diff --git a/raps/ui.py b/raps/ui.py index 73fbdf7..e2dcd83 100644 --- a/raps/ui.py +++ b/raps/ui.py @@ -359,7 +359,7 @@ class LayoutManager: percent_loss_str = f"{total_loss_mw / total_power_mw * 100:.2f}%" if not self.hascooling: - self.layout["upper"].update(Panel(Align(table, align="center"))) + self.layout["upper"].update(Panel(Align(table, align="center"),title=self.engine.config["system_name"].capitalize())) # Create Total Power table with green headers and white data total_table = Table(show_header=True, header_style="bold green") diff --git a/tests/smoke.py b/tests/smoke.py index ac9f15e..9174b3c 100644 --- a/tests/smoke.py +++ b/tests/smoke.py @@ -64,7 +64,7 @@ def main(): # If no arguments are given, run all tests if not args.tests: synthetic_workload_tests() - #hetero_tests() + hetero_tests() execute_system_tests(SYSTEMS.keys()) else: # Validate each test name -- GitLab From 3a61ec393be12b801aad8d432379b8884ad44c91 Mon Sep 17 00:00:00 2001 From: Wes Brewer Date: Tue, 25 Feb 2025 19:16:39 -0500 Subject: [PATCH 041/388] Add initial skeleton for integrating ScheduleFlow --- .gitmodules | 3 + args.py | 26 +++++---- raps/schedulers/scheduleflow.py | 100 ++++++++++++++++++++++++++++++++ third_party/ScheduleFlow | 1 + 4 files changed, 120 insertions(+), 10 deletions(-) create mode 100644 .gitmodules create mode 100644 raps/schedulers/scheduleflow.py create mode 160000 third_party/ScheduleFlow diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 0000000..7818279 --- /dev/null +++ b/.gitmodules @@ -0,0 +1,3 @@ +[submodule "third_party/ScheduleFlow"] + path = third_party/ScheduleFlow + url = https://github.com/whbrewer/ScheduleFlow diff --git a/args.py b/args.py index 074de3d..d8f5900 100644 --- a/args.py +++ b/args.py @@ -1,6 +1,5 @@ import argparse -import sys -from raps.schedulers.default import PolicyType +from raps.schedulers.default import PolicyType, BackfillType parser = argparse.ArgumentParser(description='Resource Allocator & Power Simulator (RAPS)') @@ -10,18 +9,22 @@ parser.add_argument('-x', '--partitions', nargs='+', default=None, help='List of parser.add_argument('-c', '--cooling', action='store_true', help='Include FMU cooling model') # Simulation runtime options +parser.add_argument('-ff', '--fastforward', type=str, default=None, help='Fast-forward by time amount (uses same units as -t)') parser.add_argument('-t', '--time', type=str, default=None, help='Length of time to simulate, e.g., 123, 123s, 27m, 3h, 7d') parser.add_argument('-d', '--debug', action='store_true', help='Enable debug mode and disable rich layout') parser.add_argument('-n', '--numjobs', type=int, default=1000, help='Number of jobs to schedule') parser.add_argument('-v', '--verbose', action='store_true', help='Enable verbose output') -choices = ['layout1', 'layout2'] -parser.add_argument('--layout', type=str, choices=choices, default=choices[0], help='Layout of UI') parser.add_argument('--start', type=str, help='ISO8061 string for start of simulation') parser.add_argument('--end', type=str, help='ISO8061 string for end of simulation') parser.add_argument('--seed', action='store_true', help='Set random number seed for deterministic simulation') parser.add_argument('-u', '--uncertainties', action='store_true', help='Change from floating point units to floating point units with uncertainties.' + \ - ' Very expensive w.r.t simulation time!') + ' Very expensive w.r.t simulation time!') + +# User Interface options +choices = ['layout1', 'layout2'] +parser.add_argument('--layout', type=str, choices=choices, default=choices[0], help='Layout of UI') + # Output options parser.add_argument('-o', '--output', action='store_true', help='Output power, cooling, and loss models for later analysis') @@ -33,7 +36,6 @@ parser.add_argument('--imtype', type=str, choices=choices, default=choices[0], h # Telemetry data parser.add_argument('-f', '--replay', nargs='+', type=str, help='Either: path/to/joblive path/to/jobprofile' + \ ' -or- filename.npz (overrides --workload option)') -parser.add_argument('-ff', '--fastforward', type=str, default=None, help='Fast-forward by time amount (uses same units as -t)') parser.add_argument('-e', '--encrypt', action='store_true', help='Encrypt any sensitive data in telemetry') parser.add_argument('--validate', action='store_true', help='Use node power instead of CPU/GPU utilizations') parser.add_argument('--jid', type=str, default='*', help='Replay job id') @@ -44,14 +46,18 @@ choices = ['random', 'benchmark', 'peak', 'idle'] parser.add_argument('-w', '--workload', type=str, choices=choices, default=choices[0], help='Type of synthetic workload') # Scheduling options -choices = ['default', 'replay', 'nrel', 'anl', 'flux'] +choices = ['default', 'scheduleflow', 'nrel', 'anl', 'flux'] parser.add_argument('--scheduler', type=str, choices=choices, default=choices[0], help='Name of scheduler') -choices = [None, 'firstfit', 'bestfit', 'greedy', 'easy', 'conservative'] +choices = [policy.value for policy in PolicyType] +parser.add_argument('--policy', type=str, choices=choices, default=choices[0], help='Schedule policy to use') +choices = [policy.value for policy in BackfillType] parser.add_argument('--backfill', type=str, choices=choices, default=None, help='Backfill Policy') -policies = [policy.value for policy in PolicyType] + +# Redistribution of job arrival choices = ['prescribed', 'poisson'] parser.add_argument('--arrival', default=choices[0], type=str, choices=choices, help=f'Modify arrival distribution ({choices[1]}) or use the original submit times ({choices[0]})') -parser.add_argument('--policy', type=str, choices=policies, default=None, help='Schedule policy to use') + +# Account options parser.add_argument('--accounts', action='store_true', help='Flag indicating if accounts should be tracked') parser.add_argument('--accounts-json', type=str, help='Json of account stats generated in previous run. see raps/accounts.py') diff --git a/raps/schedulers/scheduleflow.py b/raps/schedulers/scheduleflow.py new file mode 100644 index 0000000..bfa9306 --- /dev/null +++ b/raps/schedulers/scheduleflow.py @@ -0,0 +1,100 @@ +from raps.job import Job, JobState +from raps.utils import summarize_ranges +# Import ScheduleFlow’s modules – since ScheduleFlow isn’t pip installable, you +# may have vendored it or added it as a submodule (e.g. under third_party/scheduleflow) +from third_party.ScheduleFlow import ScheduleFlow # adjust this import if needed + +class Scheduler: + """ + Adapter for integrating ScheduleFlow into RAPS. + + This scheduler implements the same interface as the default RAPS scheduler. + It converts RAPS jobs into ScheduleFlow’s format, calls ScheduleFlow’s scheduling + routines, then updates the RAPS job objects accordingly. + """ + + def __init__(self, config, policy, resource_manager): + self.config = config + # You might or might not use the policy parameter; for now we store it. + self.policy = policy + self.resource_manager = resource_manager + # Here we instantiate a ScheduleFlow scheduler. + # For example, if ScheduleFlow provides a Scheduler or OnlineScheduler, + # choose one based on your needs. (See ScheduleFlow documentation for details.) + self.sf_scheduler = ScheduleFlow.Scheduler( + ScheduleFlow.System(config['TOTAL_NODES']), + # You might pass additional parameters here if needed. + ) + + def sort_jobs(self, queue, accounts=None): + """ + Optionally, pre-sort jobs. + + For now, we can sort by submit_time (FCFS) as a default. + """ + return sorted(queue, key=lambda job: job.submit_time) + + def schedule(self, queue, running, current_time, accounts=None, sorted=False, debug=False): + """ + Convert the list of RAPS jobs into the format ScheduleFlow expects, + call ScheduleFlow’s scheduling function, and then update each job. + + This method is expected to remove the scheduled jobs from `queue` and append them to `running`. + """ + # Convert RAPS jobs into ScheduleFlow job representations. + sf_jobs = [self._convert_job(job) for job in queue] + + # Call ScheduleFlow’s scheduling algorithm. + # This is a placeholder – you must adapt it to ScheduleFlow’s actual API. + scheduled_sf_jobs = self.sf_scheduler.compute_schedule(sf_jobs) + + # Map ScheduleFlow’s output back to the corresponding RAPS jobs. + # Here we assume each ScheduleFlow job has an 'id' and a field 'assigned_nodes'. + for sf_job in scheduled_sf_jobs: + job = self._find_job_by_id(queue, sf_job['id']) + if job is not None: + job.scheduled_nodes = sf_job.get('assigned_nodes', []) + # You could also update start_time, end_time, etc., if ScheduleFlow provides these. + job.start_time = current_time # Or use sf_job['start_time'] if available + job.end_time = current_time + job.wall_time + job.state = JobState.RUNNING + running.append(job) + queue.remove(job) + if debug: + print(f"t={current_time}: Scheduled job {job.id} on nodes {summarize_ranges(job.scheduled_nodes)}") + # Optionally, if ScheduleFlow supports backfill, you can implement find_backfill_job() similarly. + + def _convert_job(self, job): + """ + Convert a RAPS Job object into a dictionary (or other format) that ScheduleFlow expects. + + Adjust the fields as necessary – here’s an example conversion. + """ + return { + 'id': job.id, + 'nodes_required': job.nodes_required, + 'wall_time': job.wall_time, + 'submit_time': job.submit_time, + # Add any additional fields required by ScheduleFlow here. + } + + def _find_job_by_id(self, queue, job_id): + """ + Given a list of RAPS jobs, return the one with the matching id. + """ + for job in queue: + if job.id == job_id: + return job + return None + + def find_backfill_job(self, queue, num_free_nodes, current_time): + """ + Optionally, implement backfill logic by delegating to ScheduleFlow's + mechanisms or by applying custom logic. + """ + # This is left as an exercise. You might use ScheduleFlow’s API to determine if a job can backfill. + return None + +if __name__ == '__main__': + import unittest + unittest.main() diff --git a/third_party/ScheduleFlow b/third_party/ScheduleFlow new file mode 160000 index 0000000..3fdfd36 --- /dev/null +++ b/third_party/ScheduleFlow @@ -0,0 +1 @@ +Subproject commit 3fdfd3675e68f0c2a0e68c1d7ce7205940d28216 -- GitLab From b7bbbc905a0e95fb8bc88f3e83172c2c14987aab Mon Sep 17 00:00:00 2001 From: Wes Brewer Date: Tue, 25 Feb 2025 19:20:34 -0500 Subject: [PATCH 042/388] Add update in README.md on how to pull third party submodules --- README.md | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/README.md b/README.md index 5b953b9..552bca9 100644 --- a/README.md +++ b/README.md @@ -96,6 +96,12 @@ There are three ways to modify replaying of telemetry data: make docker_build && make docker_run +## Third party schedulers + +To install third-party schedulers, such as ScheduleFlow, run: + + git submodule update --init --recursive + ### Setup Simulation Server See instructions in [server/README.md](https://code.ornl.gov/exadigit/simulationserver) -- GitLab From 59768b6a813358a75d9f8b4e9dcc2333c903dc2f Mon Sep 17 00:00:00 2001 From: Wes Brewer Date: Tue, 25 Feb 2025 23:06:17 -0500 Subject: [PATCH 043/388] A little more progress - a long way to go --- raps/schedulers/scheduleflow.py | 109 +++++++++++++++++++------------- 1 file changed, 64 insertions(+), 45 deletions(-) diff --git a/raps/schedulers/scheduleflow.py b/raps/schedulers/scheduleflow.py index bfa9306..87bbf23 100644 --- a/raps/schedulers/scheduleflow.py +++ b/raps/schedulers/scheduleflow.py @@ -1,8 +1,28 @@ from raps.job import Job, JobState from raps.utils import summarize_ranges -# Import ScheduleFlow’s modules – since ScheduleFlow isn’t pip installable, you -# may have vendored it or added it as a submodule (e.g. under third_party/scheduleflow) -from third_party.ScheduleFlow import ScheduleFlow # adjust this import if needed +from third_party.ScheduleFlow import ScheduleFlow +from ..job import job_dict + +class SFJob: + def __init__(self, job_info): + """Map RAPS job object to ScheduleFlow""" + self.job_id = job_info['id'] + self.nodes = job_info['nodes_required'] + self.walltime = job_info['wall_time'] + self.requested_walltimes = None + self.submission_time = job_info['submit_time'] + self.name = job_info['name'] + self.priority = job_info['priority'] + self.resubmit_factor = -1 + + def __hash__(self): + return hash(self.job_id) + + def __eq__(self, other): + return isinstance(other, SFJob) and self.id == other.id + + def __repr__(self): + return f"SFJob(id={self.job_id}, nodes={self.nodes}, wall_time={self.walltime})" class Scheduler: """ @@ -15,15 +35,11 @@ class Scheduler: def __init__(self, config, policy, resource_manager): self.config = config - # You might or might not use the policy parameter; for now we store it. self.policy = policy self.resource_manager = resource_manager - # Here we instantiate a ScheduleFlow scheduler. - # For example, if ScheduleFlow provides a Scheduler or OnlineScheduler, - # choose one based on your needs. (See ScheduleFlow documentation for details.) self.sf_scheduler = ScheduleFlow.Scheduler( ScheduleFlow.System(config['TOTAL_NODES']), - # You might pass additional parameters here if needed. + priorityLevels=3, ) def sort_jobs(self, queue, accounts=None): @@ -35,52 +51,55 @@ class Scheduler: return sorted(queue, key=lambda job: job.submit_time) def schedule(self, queue, running, current_time, accounts=None, sorted=False, debug=False): - """ - Convert the list of RAPS jobs into the format ScheduleFlow expects, - call ScheduleFlow’s scheduling function, and then update each job. - - This method is expected to remove the scheduled jobs from `queue` and append them to `running`. - """ - # Convert RAPS jobs into ScheduleFlow job representations. - sf_jobs = [self._convert_job(job) for job in queue] - - # Call ScheduleFlow’s scheduling algorithm. - # This is a placeholder – you must adapt it to ScheduleFlow’s actual API. - scheduled_sf_jobs = self.sf_scheduler.compute_schedule(sf_jobs) - - # Map ScheduleFlow’s output back to the corresponding RAPS jobs. - # Here we assume each ScheduleFlow job has an 'id' and a field 'assigned_nodes'. - for sf_job in scheduled_sf_jobs: - job = self._find_job_by_id(queue, sf_job['id']) - if job is not None: + # Convert RAPS jobs to ScheduleFlow format + sf_jobs = [self._convert_to_sf(job) for job in queue] + + # Submit each job to the ScheduleFlow scheduler + for sf_job in sf_jobs: + self.sf_scheduler.submit_job(current_time, [sf_job]) + + # Trigger the schedule calculation + actions = self.sf_scheduler.trigger_schedule(current_time) + + # Process the actions (each action is assumed to be (start_time, job_info)) + for act in actions: + start_time, sf_job = act + # Find the corresponding RAPS job using its ID + job = self._find_job(queue, sf_job['id']) + if job: job.scheduled_nodes = sf_job.get('assigned_nodes', []) - # You could also update start_time, end_time, etc., if ScheduleFlow provides these. - job.start_time = current_time # Or use sf_job['start_time'] if available - job.end_time = current_time + job.wall_time + job.start_time = start_time + job.end_time = start_time + job.wall_time job.state = JobState.RUNNING running.append(job) queue.remove(job) if debug: print(f"t={current_time}: Scheduled job {job.id} on nodes {summarize_ranges(job.scheduled_nodes)}") - # Optionally, if ScheduleFlow supports backfill, you can implement find_backfill_job() similarly. - def _convert_job(self, job): - """ - Convert a RAPS Job object into a dictionary (or other format) that ScheduleFlow expects. - - Adjust the fields as necessary – here’s an example conversion. - """ - return { - 'id': job.id, - 'nodes_required': job.nodes_required, - 'wall_time': job.wall_time, - 'submit_time': job.submit_time, - # Add any additional fields required by ScheduleFlow here. - } + def _convert_to_sf(self, job): + # Use job_dict to create a dictionary from the RAPS job. + d = job_dict( + job.nodes_required, + job.name, + job.account, + job.cpu_trace, + job.gpu_trace, + job.ntx_trace, + job.nrx_trace, + job.wall_time, + getattr(job, 'end_state', None), # Provide a default if not set + job.requested_nodes, + job.submit_time, + job.id, + priority=job.priority, + partition=getattr(job, 'partition', 0) + ) + # Now create an SFJob from the dictionary. + return SFJob(d) - def _find_job_by_id(self, queue, job_id): + def _find_job(self, queue, job_id): """ - Given a list of RAPS jobs, return the one with the matching id. + Find the RAPS job in the queue that matches the given job_id. """ for job in queue: if job.id == job_id: -- GitLab From 06124697cf2b6bc8fa7a211bc32d8e9877fb78c8 Mon Sep 17 00:00:00 2001 From: Matthias Maiterth Date: Wed, 5 Mar 2025 18:48:53 -0500 Subject: [PATCH 044/388] Added a plotting script --- scripts/plot_p-util_t.py | 63 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 63 insertions(+) create mode 100644 scripts/plot_p-util_t.py diff --git a/scripts/plot_p-util_t.py b/scripts/plot_p-util_t.py new file mode 100644 index 0000000..0ac4eb3 --- /dev/null +++ b/scripts/plot_p-util_t.py @@ -0,0 +1,63 @@ +#!/bin/env python3 +import pandas as pd +import pyarrow.parquet as pq +import matplotlib.pyplot as plt + +import sys + +if len(sys.argv) > 1: + path = sys.argv[1] +else: + print(f"Usage: python {sys.argv[0]} ") + exit() + +# e.g. path = "$HOME/Repositories/exadigit/raps/simulation_results/b803010" + +files = ['cooling_model.parquet', 'loss_history.parquet', 'power_history.parquet', 'util.parquet'] + +full_files = [f"{path}/{file}" for file in files] + + +def iter_to_seconds(i): + return i * 15 + + +for i in [1]: + fig, ax1 = plt.subplots(figsize=(10, 6)) + + power = path + "/" + files[2] + loss = path + "/" + files[1] + util = path + "/" + files[3] + + df_power = pd.read_parquet(power) + df_power = df_power.rename(columns={0:'time',1:'power [kw]'}) + ax1.plot(df_power['time'],df_power['power [kw]'], color='black', label='Power kW]') + + #df_loss = pd.read_parquet(loss) + #df_loss = df_loss.rename(columns={0:'time',1:'loss [kw]'}) + #ax1.plot(df_loss['time'],df_loss['loss [kw]'], color='red', label='Loss [kW]') + + ax2 = ax1.twinx() + + #df_cooling = pd.read_parquet(cooling) + #df_cooling['index'] = df_cooling.index + #df_cooling['time'] = df_cooling['index'].apply(iter_to_seconds) + #ymax = max(df_cooling['pue']) + #ax2.plot(df_cooling['time'],df_cooling['pue'], color='blue', label='PUE') + + df_util = pd.read_parquet(util) + df_util = df_util.rename(columns={0:'time', 1:'utilization [%]'}) + df_util['utilization'] = df_util['utilization [%]'] / 100 + ax2.plot(df_util['time'],df_util['utilization'], color='orange', label='Utilization') + + #ymax = max(max(df_cooling['pue']),max(df_util['utilization'])) + ymax = max(0,max(df_util['utilization'])) + ax2.set_ylim([0, ymax * 1.05]) + + ax1.set_xlabel('time [s]') + ax1.set_ylabel('[kW]') + ax2.set_ylabel('[%]') + plt.title(path) + ax1.legend(loc='upper left') + ax2.legend(loc='upper right') + plt.show() -- GitLab From 62ac03708c9c16c2d049ded8337bcc2d032061ab Mon Sep 17 00:00:00 2001 From: Matthias Maiterth Date: Fri, 7 Mar 2025 00:20:48 -0500 Subject: [PATCH 045/388] Third try for scheduleflow. Using the Events directly. Simulation slows down drastically after over 90 jobs are quued and simulations time is 3+h. --- main.py | 14 +- raps/engine.py | 13 +- raps/job.py | 10 +- raps/schedulers/default.py | 2 +- raps/schedulers/scheduleflow.py | 234 ++++++++++++++++++++++---------- raps/workload.py | 16 ++- 6 files changed, 199 insertions(+), 90 deletions(-) diff --git a/main.py b/main.py index da7bfb0..f5fc99d 100644 --- a/main.py +++ b/main.py @@ -63,12 +63,6 @@ else: args_dict['config'] = config flops_manager = FLOPSManager(**args_dict) -sc = Engine( - power_manager=power_manager, - flops_manager=flops_manager, - cooling_model=cooling_model, - **args_dict, -) timestep_start = 0 if args.fastforward: @@ -145,6 +139,14 @@ else: # Synthetic jobs DIR_NAME = create_casename() +sc = Engine( + power_manager=power_manager, + flops_manager=flops_manager, + cooling_model=cooling_model, + jobs=jobs, + **args_dict, +) + OPATH = OUTPUT_PATH / DIR_NAME print("Output directory is: ", OPATH) sc.opath = OPATH diff --git a/raps/engine.py b/raps/engine.py index 89bab2b..5b2830c 100644 --- a/raps/engine.py +++ b/raps/engine.py @@ -34,7 +34,7 @@ class TickData: class Engine: """Job scheduling simulation engine.""" - def __init__(self, *, power_manager, flops_manager, cooling_model=None, config, **kwargs): + def __init__(self, *, power_manager, flops_manager, cooling_model=None, config, jobs=None, **kwargs): self.config = config self.down_nodes = summarize_ranges(self.config['DOWN_NODES']) self.resource_manager = ResourceManager( @@ -66,11 +66,12 @@ class Engine: config=self.config, policy=kwargs.get('policy'), bfpolicy=kwargs.get('backfill'), - resource_manager=self.resource_manager + resource_manager=self.resource_manager, + jobs=jobs ) print(f"Using scheduler: {str(self.scheduler.__class__).split('.')[2]}"\ - f", with policy {self.scheduler.policy.value} "\ - f"and backfill {self.scheduler.bfpolicy.value}") + f", with policy {self.scheduler.policy} "\ + f"and backfill {self.scheduler.bfpolicy}") def add_running_jobs_to_queue(self, jobs_to_submit: List): @@ -315,11 +316,11 @@ class Engine: # Process jobs in batches for better performance of timestep loop all_jobs = jobs.copy() jobs = [] + # Batch Jobs into 6h windows based on submit_time + batch_window = 60 * 60 * 6 # 6h for timestep in range(timestep_start,timestep_end): - # Batch Jobs into 6h windows based on submit_time - batch_window = 60 * 60 * 6 # 6h if (timestep % batch_window == 0) or (timestep == timestep_start): # Add jobs that are within the batching window and remove them from all jobs jobs += [job for job in all_jobs if job['submit_time'] <= timestep + batch_window] diff --git a/raps/job.py b/raps/job.py index d781a41..25eebbd 100644 --- a/raps/job.py +++ b/raps/job.py @@ -1,7 +1,7 @@ from enum import Enum """ -Note: want to simplify this in the future to use a minimal required set of job attributes, +Note: want to simplify this in the future to use a minimal required set of job attributes, the standard workload format (swf) https://www.cs.huji.ac.il/labs/parallel/workload/swf.html Implementing such using something like: @@ -10,9 +10,9 @@ Implementing such using something like: job = SimpleNamespace(**job_dict(...)) """ -def job_dict(nodes_required, name, account, \ +def job_dict(*,nodes_required, name, account, \ cpu_trace, gpu_trace, ntx_trace, nrx_trace, \ - end_state, scheduled_nodes, job_id, priority=0, partition=0, + end_state, scheduled_nodes=None, id, priority=0, partition=0, submit_time=0, time_limit=0, start_time=0, end_time=0, wall_time=0, trace_time=0, trace_start_time=0,trace_end_time=0, trace_missing_values=False): """ Return job info dictionary """ @@ -26,7 +26,7 @@ def job_dict(nodes_required, name, account, \ 'nrx_trace': nrx_trace, 'end_state': end_state, 'requested_nodes': scheduled_nodes, - 'id': job_id, + 'id': id, 'priority': priority, 'partition': partition, # Times: @@ -87,7 +87,7 @@ class Job: for key, value in job_dict.items(): setattr(self, key, value) # In any case: provide a job_id! - if not self.id: + if self.id is None: # This is wrong self.id = Job._get_next_id() if self.scheduled_nodes and self.nodes_required == 0: diff --git a/raps/schedulers/default.py b/raps/schedulers/default.py index 67883e5..8f89818 100644 --- a/raps/schedulers/default.py +++ b/raps/schedulers/default.py @@ -10,7 +10,7 @@ from ..policy import PolicyType, BackfillType class Scheduler: """ Default job scheduler with various scheduling policies. """ - def __init__(self, config, policy, bfpolicy=None, resource_manager=None): + def __init__(self, config, policy, bfpolicy=None, jobs=None, resource_manager=None): self.config = config if policy is None: # policy is passed as policy=None, therefore default is not choosen policy = "replay" diff --git a/raps/schedulers/scheduleflow.py b/raps/schedulers/scheduleflow.py index 87bbf23..88725fe 100644 --- a/raps/schedulers/scheduleflow.py +++ b/raps/schedulers/scheduleflow.py @@ -1,108 +1,189 @@ -from raps.job import Job, JobState +from raps.job import JobState from raps.utils import summarize_ranges from third_party.ScheduleFlow import ScheduleFlow +from third_party.ScheduleFlow import _intScheduleFlow +from third_party.ScheduleFlow._intScheduleFlow import EventType from ..job import job_dict -class SFJob: - def __init__(self, job_info): - """Map RAPS job object to ScheduleFlow""" - self.job_id = job_info['id'] - self.nodes = job_info['nodes_required'] - self.walltime = job_info['wall_time'] - self.requested_walltimes = None - self.submission_time = job_info['submit_time'] - self.name = job_info['name'] - self.priority = job_info['priority'] - self.resubmit_factor = -1 - - def __hash__(self): - return hash(self.job_id) - - def __eq__(self, other): - return isinstance(other, SFJob) and self.id == other.id - - def __repr__(self): - return f"SFJob(id={self.job_id}, nodes={self.nodes}, wall_time={self.walltime})" class Scheduler: """ Adapter for integrating ScheduleFlow into RAPS. - + This scheduler implements the same interface as the default RAPS scheduler. It converts RAPS jobs into ScheduleFlow’s format, calls ScheduleFlow’s scheduling routines, then updates the RAPS job objects accordingly. """ - def __init__(self, config, policy, resource_manager): + def __init__(self, config, policy, bfpolicy, resource_manager, jobs): + self.sorted_priorities = sorted([x['priority'] for x in jobs]) + num_prios = len(self.sorted_priorities) + # self.sf_queue = [] + self.queue = [] # track submitted jobs self.config = config - self.policy = policy + self.policy = policy + self.bfpolicy = bfpolicy self.resource_manager = resource_manager self.sf_scheduler = ScheduleFlow.Scheduler( ScheduleFlow.System(config['TOTAL_NODES']), - priorityLevels=3, + priorityLevels=num_prios, ) + self._sf_runtime = _intScheduleFlow.Runtime([]) + self._sf_runtime.scheduler = self.sf_scheduler + # self.sf_time = -1 + self.sf_submitted_list = [] # list of sf_apps + # self.sf_start_list = [] # list as returned from sf_scheduler.submit_job + # self.sf_end_list = [] # list as returned from sf_scheduler.start_job + # self.sf_action_list = [] # list as returned from sf_scheduler.stop_job + def sort_jobs(self, queue, accounts=None): """ Optionally, pre-sort jobs. - + For now, we can sort by submit_time (FCFS) as a default. """ return sorted(queue, key=lambda job: job.submit_time) + def start_job_event(): + pass + + def end_job_event(): + pass + def schedule(self, queue, running, current_time, accounts=None, sorted=False, debug=False): - # Convert RAPS jobs to ScheduleFlow format - sf_jobs = [self._convert_to_sf(job) for job in queue] - - # Submit each job to the ScheduleFlow scheduler - for sf_job in sf_jobs: - self.sf_scheduler.submit_job(current_time, [sf_job]) - - # Trigger the schedule calculation - actions = self.sf_scheduler.trigger_schedule(current_time) - + + #self._sf_runtim + pass + #### SECOND TRY + new_queue_items = list(filter(lambda x: x not in self.queue, queue)) + if new_queue_items: + self.queue += new_queue_items + # # Convert RAPS jobs to ScheduleFlow format + new_sf_jobs = [self._convert_to_sf(job) for job in new_queue_items] + self.sf_submitted_list += new_sf_jobs # This one only holds sf_jobs no timestamps + # Submit each job to the ScheduleFlow scheduler # This trigger schedule! + if new_sf_jobs: + ret = self.sf_scheduler.submit_job(current_time, new_sf_jobs) + self._sf_runtime._Runtime__handle_scheduler_actions(ret) + self._sf_runtime._Runtime__trigger_schedule_event() + + if not self._sf_runtime._Runtime__events.empty(): + top = self._sf_runtime._Runtime__events.top() + if top[0] == current_time: + start_jobs = [] + end_jobs = [] + for event in self._sf_runtime._Runtime__events.pop_list(): + if event[1] == EventType.Submit: + raise ValueError(f"Didnt we already Submit above? {event}") + if event[1] == EventType.JobStart: + start_jobs.append(event[2]) + if event[1] == EventType.JobEnd: + end_jobs.append(event[2]) + if len(end_jobs) > 0: + self._sf_runtime._Runtime__job_end_event(end_jobs) + # End of jobs is handled by RAPS via prepare_timestep + pass + if len(start_jobs) > 0: + self._sf_runtime._Runtime__job_start_event(start_jobs) + for sf_app in start_jobs: + job = _match_sf_app_and_job(sf_app,queue,start_jobs) + queue.remove(job) + self.resource_manager.assign_nodes_to_job(job, current_time) + running.append(job) + + + # Keep track of: All jobs have been submitted empty the queue! + + + # remove_list = [] + # job_list = [] + # for x in self.sf_start_list: + # sf_job_start_time,sf_app = x + # if sf_job_start_time <= current_time: + # job_list.append(sf_app) + # remove_list.append(x) + # job = _match_sf_app_and_job(sf_app,queue,self.sf_submitted_list) + # if current_time != sf_job_start_time: + # print("current_time != sf_job_start_time") + # print(f"{current_time} != {sf_job_start_time}") + # queue.remove(job) + # self.sf_submitted_list.remove(sf_app) + + # self.resource_manager.assign_nodes_to_job(job, current_time) + # running.append(job) + # if job_list: + # self.sf_end_list += self.sf_scheduler.start_job(current_time,job_list) + # for x in remove_list: + # self.sf_start_list.remove(x) + + #### First TRY + #if self.sf_end_list: + # remove_list = [] + # job_list = [] + # for x in self.sf_end_list: + # if x[0] <= current_time: + # job_list.append(x[1]) + # remove_list.append(x) + # if job_list: + # self.sf_action_list += self.sf_scheduler.stop_job(current_time,job_list) + # for x in remove_list: + # self.sf_end_list.remove(x) + + # submit_jobs triggered the schedule calculation, sf_jobs returned the placed jobs. + # We need to flect this on the raps side. + + # March the sf_scheduler forward based on the jobs + #end_jobs = self.sf_scheduler.start_job(current_time,sf_schedule[1]) + #self.sf_scheduler.end_job(current_time,end_jobs) + + # Add to running + # Process the actions (each action is assumed to be (start_time, job_info)) - for act in actions: - start_time, sf_job = act - # Find the corresponding RAPS job using its ID - job = self._find_job(queue, sf_job['id']) - if job: - job.scheduled_nodes = sf_job.get('assigned_nodes', []) - job.start_time = start_time - job.end_time = start_time + job.wall_time - job.state = JobState.RUNNING - running.append(job) - queue.remove(job) - if debug: - print(f"t={current_time}: Scheduled job {job.id} on nodes {summarize_ranges(job.scheduled_nodes)}") + #for act in actions: + # start_time, sf_job = act + # # Find the corresponding RAPS job using its ID + # job = self._find_job(queue, sf_job['job_id']) + # if job: + # job.scheduled_nodes = sf_job.get('assigned_nodes', []) + # job.start_time = start_time + # job.end_time = start_time + job.wall_time + # job.state = JobState.RUNNING + # running.append(job) + # queue.remove(job) + # if debug: + # print(f"t={current_time}: Scheduled job {job.id} on nodes {summarize_ranges(job.scheduled_nodes)}") + + + + def _find_sf_in_queue(self,queue,sf_app): + # Remember we added four digits and an underscore in _convert_to_sf: + match = [x for x in queue if x.id == sf_app.name] + if len(match != 1): + raise ValueError(sf_app) + return match[0] def _convert_to_sf(self, job): + # Create an ScheduleFlow.Application from the job information: + sf_prio = self.sorted_priorities.index(job.priority) # Use job_dict to create a dictionary from the RAPS job. - d = job_dict( - job.nodes_required, - job.name, - job.account, - job.cpu_trace, - job.gpu_trace, - job.ntx_trace, - job.nrx_trace, - job.wall_time, - getattr(job, 'end_state', None), # Provide a default if not set - job.requested_nodes, - job.submit_time, - job.id, - priority=job.priority, - partition=getattr(job, 'partition', 0) - ) - # Now create an SFJob from the dictionary. - return SFJob(d) + nodes = job.nodes_required + submission_time = job.submit_time + if submission_time < 0: + submission_time = 0 + walltime = job.wall_time + requested_walltimes = [job.wall_time] + priority = sf_prio + resubmit_factor = -1 + name = job.id # We use the ID as name to be able to match when unpacking! + return ScheduleFlow.Application(nodes,submission_time,walltime,requested_walltimes,priority,resubmit_factor,name) def _find_job(self, queue, job_id): """ Find the RAPS job in the queue that matches the given job_id. """ for job in queue: - if job.id == job_id: + if job.job_id == job_id: return job return None @@ -114,6 +195,23 @@ class Scheduler: # This is left as an exercise. You might use ScheduleFlow’s API to determine if a job can backfill. return None + +def _match_sf_app_and_job(sf_app,queue,sf_queue): + match = [x for x in sf_queue if x.name == sf_app.name] + if len(match) != 1: + print("Multiple Matches") + raise ValueError(sf_app) + else: + match = match[0] + job = [x for x in queue if x.id == match.name] + if len(job) != 1: + print("Multiple submitted Jobs ") + raise ValueError(job) + else: + job = job[0] + return job + + if __name__ == '__main__': import unittest unittest.main() diff --git a/raps/workload.py b/raps/workload.py index 93b091b..0134945 100644 --- a/raps/workload.py +++ b/raps/workload.py @@ -83,10 +83,18 @@ class Workload: # Jobs arrive according to Poisson process time_to_next_job = next_arrival(1 / config['JOB_ARRIVAL_TIME']) - jobs.append(job_dict(nodes_required, name, account, cpu_trace, gpu_trace, net_tx, net_rx, \ - end_state, None, job_index, priority, partition, - time_to_next_job, time_limit, time_to_next_job, time_to_next_job + wall_time, wall_time, - wall_time, 0, wall_time)) + jobs.append(job_dict(nodes_required=nodes_required, name=name, + account=account, cpu_trace=cpu_trace, + gpu_trace=gpu_trace, ntx_trace=net_tx, + nrx_trace=net_rx, end_state=end_state, + id=job_index, priority=priority, + partition=partition, + submit_time=time_to_next_job - 100, + time_limit=time_limit, + start_time=time_to_next_job, + end_time=time_to_next_job + wall_time, + wall_time=wall_time, trace_time=wall_time, + trace_start_time=0, trace_end_time=wall_time)) return jobs -- GitLab From acc765b3878a8ff3e28d5407f9db2eb32edbfa95 Mon Sep 17 00:00:00 2001 From: Rashad-CSU Date: Fri, 7 Mar 2025 09:24:38 -0700 Subject: [PATCH 046/388] Inclusion of metrics --- raps/stats.py | 18 +++++++++++++++--- scripts/plot_p-util_t.py | 3 ++- 2 files changed, 17 insertions(+), 4 deletions(-) diff --git a/raps/stats.py b/raps/stats.py index b067429..7ee1fb9 100644 --- a/raps/stats.py +++ b/raps/stats.py @@ -62,6 +62,8 @@ def get_job_stats(engine: Engine): min_wait_time, max_wait_time, sum_wait_time = sys.maxsize, -sys.maxsize - 1, 0 min_turnaround_time, max_turnaround_time, sum_turnaround_time = sys.maxsize, -sys.maxsize - 1, 0 + min_psf_partial_num, max_psf_partial_num, sum_psf_partial_num = sys.maxsize, -sys.maxsize - 1, 0 + min_psf_partial_den, max_psf_partial_den, sum_psf_partial_den = sys.maxsize, -sys.maxsize - 1, 0 min_awrt, max_awrt, sum_awrt = sys.maxsize, -sys.maxsize - 1, 0 @@ -90,14 +92,23 @@ def get_job_stats(engine: Engine): awrt = agg_node_hours * turnaround_time # Area Weighted Response Time min_awrt, max_awrt, sum_awrt = min_max_sum(awrt, min_awrt, max_awrt, sum_awrt) + + psf_partial_num = job_size * (turnaround_time**4 - wait_time**4) + psf_partial_den = job_size * (turnaround_time**3 - wait_time**3) + + min_psf_partial_num, max_psf_partial_num, sum_psf_partial_num = \ + min_max_sum(psf_partial_num, min_psf_partial_num, max_psf_partial_num, sum_psf_partial_num) + min_psf_partial_den, max_psf_partial_den, sum_psf_partial_den = \ + min_max_sum(psf_partial_den, min_psf_partial_den, max_psf_partial_den, sum_psf_partial_den) if len(engine.job_history_dict) != 0: avg_job_size = sum_job_size / len(engine.job_history_dict) avg_runtime = sum_runtime / len(engine.job_history_dict) - avg_agg_node_hours = sum_agg_node_hours / len(engine.job_history_dict) + avg_agg_node_hours = sum_agg_node_hours / len(engine.job_history_dict) avg_wait_time = sum_wait_time / len(engine.job_history_dict) avg_turnaround_time = sum_turnaround_time / len(engine.job_history_dict) - avg_awrt = sum_awrt / len(engine.job_history_dict) + avg_awrt = sum_awrt / sum_agg_node_hours + psf = (3*sum_psf_partial_num)/(4*sum_psf_partial_den) else: # Set these to -1 to indicate nothing ran min_job_size, max_job_size, avg_job_size = -1,-1,-1 @@ -131,6 +142,7 @@ def get_job_stats(engine: Engine): 'average_turnaround_time': avg_turnaround_time, 'min_area_weighted_response_time': min_awrt, 'max_area_weighted_response_time': max_awrt, - 'avg_area_weighted_response_time': avg_awrt + 'area_weighted_avg_response_time': avg_awrt, + 'priority_weighted_specific_response_time': psf } return job_stats diff --git a/scripts/plot_p-util_t.py b/scripts/plot_p-util_t.py index 0ac4eb3..55dc4c8 100644 --- a/scripts/plot_p-util_t.py +++ b/scripts/plot_p-util_t.py @@ -60,4 +60,5 @@ for i in [1]: plt.title(path) ax1.legend(loc='upper left') ax2.legend(loc='upper right') - plt.show() + # plt.show() + plt.savefig("test.png") -- GitLab From 2d60432c9da6e950a98085226178b7f1da4288d2 Mon Sep 17 00:00:00 2001 From: Matthias Maiterth Date: Fri, 7 Mar 2025 15:26:37 -0500 Subject: [PATCH 047/388] Update of Marconi for explict parameters of job_dict only --- raps/dataloaders/marconi100.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/raps/dataloaders/marconi100.py b/raps/dataloaders/marconi100.py index d067d57..ca3051c 100644 --- a/raps/dataloaders/marconi100.py +++ b/raps/dataloaders/marconi100.py @@ -207,9 +207,12 @@ def load_data_from_df(jobs_df: pd.DataFrame, **kwargs): if gpu_trace.size > 0 and (jid == job_id or jid == '*'): # and time_submit >= 0: - job_info = job_dict(nodes_required, name, account, cpu_trace, gpu_trace, [], [], - end_state, scheduled_nodes, - job_id, priority, partition, + job_info = job_dict(nodes_required=nodes_required, name=name, + account=account, cpu_trace=cpu_trace, + gpu_trace=gpu_trace, nrx_trace=[],ntx_trace=[], + end_state=end_state, + scheduled_nodes=scheduled_nodes, + id=job_id, priority=priority,partition=partition, submit_time=submit_time, time_limit=time_limit, start_time=start_time, end_time=end_time, wall_time=wall_time, trace_time=trace_time, -- GitLab From e99208000eb6b132b6d5a8f3f78a73939700c721 Mon Sep 17 00:00:00 2001 From: Matthias Maiterth Date: Fri, 7 Mar 2025 15:34:42 -0500 Subject: [PATCH 048/388] Fixed error in the case that no job completed. --- raps/stats.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/raps/stats.py b/raps/stats.py index 7ee1fb9..e8827d9 100644 --- a/raps/stats.py +++ b/raps/stats.py @@ -64,7 +64,6 @@ def get_job_stats(engine: Engine): min_turnaround_time, max_turnaround_time, sum_turnaround_time = sys.maxsize, -sys.maxsize - 1, 0 min_psf_partial_num, max_psf_partial_num, sum_psf_partial_num = sys.maxsize, -sys.maxsize - 1, 0 min_psf_partial_den, max_psf_partial_den, sum_psf_partial_den = sys.maxsize, -sys.maxsize - 1, 0 - min_awrt, max_awrt, sum_awrt = sys.maxsize, -sys.maxsize - 1, 0 # Information on Job-Mix @@ -90,9 +89,11 @@ def get_job_stats(engine: Engine): min_turnaround_time, max_turnaround_time, sum_turnaround_time = \ min_max_sum(turnaround_time, min_turnaround_time, max_turnaround_time, sum_turnaround_time) + # Area Weighted Average Response Time awrt = agg_node_hours * turnaround_time # Area Weighted Response Time min_awrt, max_awrt, sum_awrt = min_max_sum(awrt, min_awrt, max_awrt, sum_awrt) - + + # Priority Weighted Specific Response Time psf_partial_num = job_size * (turnaround_time**4 - wait_time**4) psf_partial_den = job_size * (turnaround_time**3 - wait_time**3) @@ -104,11 +105,11 @@ def get_job_stats(engine: Engine): if len(engine.job_history_dict) != 0: avg_job_size = sum_job_size / len(engine.job_history_dict) avg_runtime = sum_runtime / len(engine.job_history_dict) - avg_agg_node_hours = sum_agg_node_hours / len(engine.job_history_dict) + avg_agg_node_hours = sum_agg_node_hours / len(engine.job_history_dict) avg_wait_time = sum_wait_time / len(engine.job_history_dict) avg_turnaround_time = sum_turnaround_time / len(engine.job_history_dict) avg_awrt = sum_awrt / sum_agg_node_hours - psf = (3*sum_psf_partial_num)/(4*sum_psf_partial_den) + psf = (3 * sum_psf_partial_num) / (4 * sum_psf_partial_den) else: # Set these to -1 to indicate nothing ran min_job_size, max_job_size, avg_job_size = -1,-1,-1 @@ -117,6 +118,7 @@ def get_job_stats(engine: Engine): min_wait_time, max_wait_time, avg_wait_time = -1,-1,-1 min_turnaround_time, max_turnaround_time, avg_turnaround_time = -1,-1,-1 min_awrt, max_awrt, avg_awrt = -1,-1,-1 + psf = -1 job_stats = { 'jobs completed': engine.jobs_completed, -- GitLab From bd5ed077035dd95729e3f8b6097c5a8f5a99b18b Mon Sep 17 00:00:00 2001 From: Matthias Maiterth Date: Fri, 7 Mar 2025 16:18:30 -0500 Subject: [PATCH 049/388] Added energy / EDP / EDP^2 and small medium large huge Jobs as stats. --- raps/stats.py | 50 +++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 49 insertions(+), 1 deletion(-) diff --git a/raps/stats.py b/raps/stats.py index e8827d9..6bc6409 100644 --- a/raps/stats.py +++ b/raps/stats.py @@ -50,12 +50,16 @@ def min_max_sum(value,min,max,sum): sum += value return min,max,sum - def get_job_stats(engine: Engine): """ Return job statistics processed over the engine execution""" # Information on Job-Mix min_job_size, max_job_size, sum_job_size = sys.maxsize, -sys.maxsize - 1, 0 min_runtime, max_runtime, sum_runtime = sys.maxsize, -sys.maxsize - 1, 0 + + min_energy, max_energy, sum_energy = sys.maxsize, -sys.maxsize - 1, 0 + min_edp, max_edp, sum_edp = sys.maxsize, -sys.maxsize - 1, 0 + min_edp2, max_edp2, sum_edp2 = sys.maxsize, -sys.maxsize - 1, 0 + min_agg_node_hours, max_agg_node_hours, sum_agg_node_hours = sys.maxsize, -sys.maxsize - 1, 0 # Completion statistics throughput = engine.jobs_completed / engine.timesteps * 3600 if engine.timesteps else 0 # Jobs per hour @@ -66,6 +70,11 @@ def get_job_stats(engine: Engine): min_psf_partial_den, max_psf_partial_den, sum_psf_partial_den = sys.maxsize, -sys.maxsize - 1, 0 min_awrt, max_awrt, sum_awrt = sys.maxsize, -sys.maxsize - 1, 0 + jobsSmall = 0 + jobsMedium = 0 + jobsLarge = 0 + jobsHuge = 0 + # Information on Job-Mix for job in engine.job_history_dict: job_size = job['num_nodes'] @@ -76,6 +85,17 @@ def get_job_stats(engine: Engine): min_runtime, max_runtime, sum_runtime = \ min_max_sum(runtime, min_runtime, max_runtime, sum_runtime) + energy = job['energy'] + min_energy, max_energy, sum_energy = \ + min_max_sum(energy, min_energy, max_energy, sum_energy) + edp = energy * runtime + min_edp, max_edp, sum_edp = \ + min_max_sum(edp, min_edp, max_edp, sum_edp) + + edp2 = energy * runtime**2 + min_edp2, max_edp2, sum_edp2 = \ + min_max_sum(edp2, min_edp2, max_edp2, sum_edp2) + agg_node_hours = runtime * job_size # Aggreagte node hours min_agg_node_hours, max_agg_node_hours, sum_agg_node_hours = \ min_max_sum(agg_node_hours, min_agg_node_hours, max_agg_node_hours, sum_agg_node_hours) @@ -102,9 +122,21 @@ def get_job_stats(engine: Engine): min_psf_partial_den, max_psf_partial_den, sum_psf_partial_den = \ min_max_sum(psf_partial_den, min_psf_partial_den, max_psf_partial_den, sum_psf_partial_den) + if job['num_nodes'] <= 5: + jobsSmall += 1 + elif job['num_nodes'] <= 50: + jobsMedium += 1 + elif job['num_nodes'] <= 250: + jobsLarge += 1 + else: # job['nodes_required'] > 250: + jobsHuge += 1 + if len(engine.job_history_dict) != 0: avg_job_size = sum_job_size / len(engine.job_history_dict) avg_runtime = sum_runtime / len(engine.job_history_dict) + avg_energy = sum_energy / len(engine.job_history_dict) + avg_edp= sum_edp / len(engine.job_history_dict) + avg_edp2= sum_edp2 / len(engine.job_history_dict) avg_agg_node_hours = sum_agg_node_hours / len(engine.job_history_dict) avg_wait_time = sum_wait_time / len(engine.job_history_dict) avg_turnaround_time = sum_turnaround_time / len(engine.job_history_dict) @@ -114,6 +146,9 @@ def get_job_stats(engine: Engine): # Set these to -1 to indicate nothing ran min_job_size, max_job_size, avg_job_size = -1,-1,-1 min_runtime, max_runtime, avg_runtime = -1,-1,-1 + min_energy, max_energy, avg_energy = -1,-1,-1 + min_edp, max_edp, avg_edp = -1,-1,-1 + min_edp2, max_edp2, avg_edp2 = -1,-1,-1 min_agg_node_hours, max_agg_node_hours, avg_agg_node_hours = -1,-1,-1 min_wait_time, max_wait_time, avg_wait_time = -1,-1,-1 min_turnaround_time, max_turnaround_time, avg_turnaround_time = -1,-1,-1 @@ -125,6 +160,10 @@ def get_job_stats(engine: Engine): 'throughput': f'{throughput:.2f} jobs/hour', 'jobs still running': [job.id for job in engine.running], 'jobs still in queue': [job.id for job in engine.queue], + 'Jobs <= 5 nodes': jobsSmall, + 'Jobs <= 50 nodes': jobsMedium, + 'Jobs <= 250 nodes': jobsLarge, + 'Jobs > 250 nodes': jobsHuge, # Information on job-mix executed 'min job size': min_job_size, 'max job size': max_job_size, @@ -132,6 +171,15 @@ def get_job_stats(engine: Engine): 'min runtime': min_runtime, 'max runtime': max_runtime, 'average runtime': avg_runtime, + 'min energy': min_energy, + 'max energy': max_energy, + 'avg energy': avg_energy, + 'min edp': min_edp, + 'max edp': max_edp, + 'avg edp': avg_edp, + 'min edp^2': min_edp2, + 'max edp^2': max_edp2, + 'avg edp^2': avg_edp2, 'min_aggregate_node_hours': min_agg_node_hours, 'max_aggregate_node_hours': max_agg_node_hours, 'avg_aggregate_node_hours': avg_agg_node_hours, -- GitLab From 36fd4ef8686675353e7d989060e957103deb8071 Mon Sep 17 00:00:00 2001 From: Matthias Maiterth Date: Mon, 24 Mar 2025 16:11:17 -0400 Subject: [PATCH 050/388] adjusted dataloaders and smoke test to new job_dict. (job dict has named arguments only) --- raps/dataloaders/adastraMI250.py | 13 +- raps/dataloaders/frontier.py | 23 +++- raps/dataloaders/fugaku.py | 4 +- raps/dataloaders/lassen.py | 25 ++-- raps/schedulers/scheduleflow.py | 6 + raps/workload.py | 212 +++++++++++++++++++------------ tests/smoke.py | 3 +- 7 files changed, 181 insertions(+), 105 deletions(-) diff --git a/raps/dataloaders/adastraMI250.py b/raps/dataloaders/adastraMI250.py index 894717e..6a546f0 100644 --- a/raps/dataloaders/adastraMI250.py +++ b/raps/dataloaders/adastraMI250.py @@ -177,8 +177,17 @@ def load_data_from_df(jobs_df: pd.DataFrame, **kwargs): trace_end_time = start_time if wall_time > 0: - job_info = job_dict(nodes_required, name, account, cpu_trace, gpu_trace, [],[], - end_state, scheduled_nodes, job_id, priority, + job_info = job_dict(nodes_required=nodes_required, + name=name, + account=account, + cpu_trace=cpu_trace, + gpu_trace=gpu_trace, + ntx_trace=[], + nrx_trace=[], + end_state=end_state, + scheduled_nodes=scheduled_nodes, + id=job_id, + priority=priority, submit_time=submit_time, time_limit=time_limit, start_time=start_time, diff --git a/raps/dataloaders/frontier.py b/raps/dataloaders/frontier.py index 44c8838..08dc5fb 100644 --- a/raps/dataloaders/frontier.py +++ b/raps/dataloaders/frontier.py @@ -280,13 +280,22 @@ def load_data_from_df(jobs_df: pd.DataFrame, jobprofile_df: pd.DataFrame, **kwar continue # SKIP! if gpu_trace.size > 0 and (jid == job_id or jid == '*'): # and time_submit >= 0: - job_info = job_dict(nodes_required, name, account, cpu_trace, gpu_trace, [], [], - end_state, scheduled_nodes, - job_id, priority, # partition missing - submit_time=submit_time, time_limit=time_limit, - start_time=start_time, end_time=end_time, - wall_time=wall_time, trace_time=trace_time, - trace_start_time=trace_start_time, trace_end_time=trace_end_time) + job_info = job_dict( + nodes_required=nodes_required, + name=name, + account=account, + cpu_trace=cpu_trace, + gpu_trace=gpu_trace, + nrx_trace=[], + ntx_trace=[], + end_state=end_state, + scheduled_nodes=scheduled_nodes, + id=job_id, + priority=priority, # partition missing + submit_time=submit_time, time_limit=time_limit, + start_time=start_time, end_time=end_time, + wall_time=wall_time, trace_time=trace_time, + trace_start_time=trace_start_time, trace_end_time=trace_end_time) jobs.append(job_info) return jobs, telemetry_start, telemetry_end diff --git a/raps/dataloaders/fugaku.py b/raps/dataloaders/fugaku.py index 880ed0e..d9d0095 100644 --- a/raps/dataloaders/fugaku.py +++ b/raps/dataloaders/fugaku.py @@ -153,9 +153,7 @@ def load_data_from_df(df, **kwargs): trace_time=trace_time, trace_start_time=trace_start_time, trace_end_time=trace_end_time, - trace_missing_values=trace_missing_values - ) - + trace_missing_values=trace_missing_values) job_list.append(job_info) return job_list, telemetry_start, telemetry_end diff --git a/raps/dataloaders/lassen.py b/raps/dataloaders/lassen.py index 995284f..27d72e1 100644 --- a/raps/dataloaders/lassen.py +++ b/raps/dataloaders/lassen.py @@ -167,18 +167,18 @@ def load_data_from_df(allocation_df, node_df, step_df, **kwargs): print('scheduled_nodes:', nodes_required, scheduled_nodes) if wall_time >= 0: - job_info = job_dict(nodes_required, - name, - account, - cpu_trace, - gpu_trace, - net_tx, - net_rx, - end_state, - scheduled_nodes, - job_id, - priority, - partition, + job_info = job_dict(nodes_required=nodes_required, + name=name, + account=account, + cpu_trace=cpu_trace, + gpu_trace=gpu_trace, + ntx_trace=net_tx, + nrx_trace=net_rx, + end_state=end_state, + scheduled_nodes=scheduled_nodes, + id=job_id, + priority=priority, + partition=partition, submit_time=submit_time, time_limit=time_limit, start_time=start_time, @@ -188,7 +188,6 @@ def load_data_from_df(allocation_df, node_df, step_df, **kwargs): trace_start_time=trace_start_time, trace_end_time=trace_end_time, trace_missing_values=trace_missing_values) - job_list.append(job_info) return job_list, telemetry_start_time, telemetry_end_time diff --git a/raps/schedulers/scheduleflow.py b/raps/schedulers/scheduleflow.py index 88725fe..a586d28 100644 --- a/raps/schedulers/scheduleflow.py +++ b/raps/schedulers/scheduleflow.py @@ -37,6 +37,12 @@ class Scheduler: # self.sf_action_list = [] # list as returned from sf_scheduler.stop_job + def gif(self): + logs = self._sf_runtime.get_stats() + #vis_hanlder = _intScheduleFlow.VizualizationEngine(self.sf_scheduler. + self._sf_runtime._Runtime__generate_gif() + + def sort_jobs(self, queue, accounts=None): """ Optionally, pre-sort jobs. diff --git a/raps/workload.py b/raps/workload.py index 0134945..50e996a 100644 --- a/raps/workload.py +++ b/raps/workload.py @@ -118,73 +118,66 @@ class Workload: cpu_trace, gpu_trace = self.compute_traces(cpu_util, gpu_util, 10800, config['TRACE_QUANTA']) net_tx, net_rx = [], [] + job_time = len(gpu_trace) * config['TRACE_QUANTA'] # Create job info for this partition job_info = job_dict( - config['AVAILABLE_NODES'], # Nodes required - f"Max Test {partition}", # Name with partition label - ACCT_NAMES[0], # User account - cpu_trace, # CPU trace - gpu_trace, # GPU trace - net_tx, # Network transmit trace - net_rx, # Network receive trace - 'COMPLETED', # End state - list(range(config['AVAILABLE_NODES'])), # Explicitly all nodes to test replay - None, # Job ID - 100, # Priority - partition, # Partition name - 0, # Submit time - len(gpu_trace) * config['TRACE_QUANTA'] + 1, # Time limit - 0, # Start time / or None - len(gpu_trace) * config['TRACE_QUANTA'], # End time / or None - len(gpu_trace) * config['TRACE_QUANTA'], # Wall time - len(gpu_trace) * config['TRACE_QUANTA'], # Trace time - 0, # Trace start time - len(gpu_trace) * config['TRACE_QUANTA'] # Trace end time - ) - print(job_info) + nodes_required=config['AVAILABLE_NODES'], + name=f"Max Test {partition}", + account=ACCT_NAMES[0], + cpu_trace=cpu_trace, + gpu_trace=gpu_trace, + ntx_trace=net_tx, + nrx_trace=net_rx, + end_state='COMPLETED', + scheduled_nodes=list(range(config['AVAILABLE_NODES'])), + id=None, + priority=100, + partition=partition, + time_limit=job_time + 1, + start_time=0, + end_time=job_time, + wall_time=job_time, + trace_time=job_time, + trace_start_time=0, + trace_end_time=job_time) jobs.append(job_info) # Add job to the list return jobs def idle(self, **kwargs): - """Idle power test for multiple partitions""" - - # List to hold jobs for all partitions jobs = [] - # Iterate through each partition and get its configuration for partition in self.partitions: - # Fetch partition-specific configuration + # Fetch the config for the current partition config = self.config_map[partition] # Generate traces based on partition-specific configuration - cpu_util, gpu_util = 0, 0 # Idle test has zero utilization - cpu_trace, gpu_trace = self.compute_traces(cpu_util, gpu_util, 43200, config['TRACE_QUANTA']) # 12 hours + cpu_util, gpu_util = 0, 0 + cpu_trace, gpu_trace = self.compute_traces(cpu_util, gpu_util, 10800, config['TRACE_QUANTA']) net_tx, net_rx = [], [] + job_time = len(gpu_trace) * config['TRACE_QUANTA'] # Create job info for this partition job_info = job_dict( - config['AVAILABLE_NODES'], # Nodes required - f"Idle Test {partition}", # Name with partition label - ACCT_NAMES[0], # User account - cpu_trace, # CPU trace - gpu_trace, # GPU trace - net_tx, # Network transmit trace - net_rx, # Network receive trace - 'COMPLETED', # End state - list(range(config['AVAILABLE_NODES'])), # Explicitly all nodes to test replay - None, # Job ID - 100, # Priority - partition, # Partition name - 0, # Submit time - len(gpu_trace) * config['TRACE_QUANTA'] + 1, # Time limit - 0, # Start time / or None - len(gpu_trace) * config['TRACE_QUANTA'], # End time / or None - len(gpu_trace) * config['TRACE_QUANTA'], # Wall time - len(gpu_trace) * config['TRACE_QUANTA'], # Trace time - 0, # Trace start time - len(gpu_trace) * config['TRACE_QUANTA'] # Trace end time - ) + nodes_required=config['AVAILABLE_NODES'], + name=f"Idle Test {partition}", + account=ACCT_NAMES[0], + cpu_trace=cpu_trace, + gpu_trace=gpu_trace, + ntx_trace=net_tx, + nrx_trace=net_rx, + end_state='COMPLETED', + scheduled_nodes=list(range(config['AVAILABLE_NODES'])), + id=None, + priority=100, + partition=partition, + time_limit=job_time + 1, + start_time=0, + end_time=job_time, + wall_time=job_time, + trace_time=job_time, + trace_start_time=0, + trace_end_time=job_time) jobs.append(job_info) # Add job to the list return jobs @@ -207,53 +200,114 @@ class Workload: cpu_util, gpu_util = 1, 4 cpu_trace, gpu_trace = self.compute_traces(cpu_util, gpu_util, 10800, config['TRACE_QUANTA']) + job_time = len(gpu_trace) * config['TRACE_QUANTA'] + job_info = job_dict( - config['AVAILABLE_NODES'], - f"Max Test {partition}", account, cpu_trace, gpu_trace, net_tx, net_rx, - 'COMPLETED', None, None, 100, partition, - 0, len(gpu_trace) * config['TRACE_QUANTA'] + 1, - 0, 10800, len(gpu_trace) * config['TRACE_QUANTA'], - len(gpu_trace) * config['TRACE_QUANTA'], 0, len(gpu_trace) * config['TRACE_QUANTA'] - ) + nodes_required=config['AVAILABLE_NODES'], + scheduled_nodes=list_of_all_nodes, + name=f"Max Test {partition}", + account=account, + cpu_trace=cpu_trace, + gpu_trace=gpu_trace, + ntx_trace=net_tx, + nrx_trace=net_rx, + end_state='COMPLETED', + id=None, + priority=100, + partition=partition, + submit_time=0, + time_limit=job_time + 1, + start_time=0, + end_time=job_time, + wall_time=job_time, + trace_time=job_time, + trace_start_time=0, + trace_end_time=job_time, + trace_missing_values=False) jobs.append(job_info) # OpenMxP run cpu_util, gpu_util = 0, 4 cpu_trace, gpu_trace = self.compute_traces(cpu_util, gpu_util, 3600, config['TRACE_QUANTA']) + job_time = len(gpu_trace) * config['TRACE_QUANTA'] + job_info = job_dict( - config['AVAILABLE_NODES'], - f"OpenMxP {partition}", account, cpu_trace, gpu_trace, net_tx, net_rx, - 'COMPLETED', None, None, 100, partition, - 0, len(gpu_trace) * config['TRACE_QUANTA'] + 1, - 10800, 14200, len(gpu_trace) * config['TRACE_QUANTA'], - len(gpu_trace) * config['TRACE_QUANTA'], 0, len(gpu_trace) * config['TRACE_QUANTA'] - ) + nodes_required=config['AVAILABLE_NODES'], + scheduled_nodes=list_of_all_nodes, + name=f"OpenMxP {partition}", + account=account, + cpu_trace=cpu_trace, + gpu_trace=gpu_trace, + ntx_trace=net_tx, + nrx_trace=net_rx, + end_state='COMPLETED', + id=None, + priority=100, + partition=partition, + submit_time=0, + time_limit=job_time + 1, + start_time=10800, + end_time=14200, + wall_time=job_time, + trace_time=job_time, + trace_start_time=0, + trace_end_time=job_time, + trace_missing_values=False) jobs.append(job_info) # HPL run cpu_util, gpu_util = 0.33, 0.79 * 4 # based on 24-01-18 run cpu_trace, gpu_trace = self.compute_traces(cpu_util, gpu_util, 3600, config['TRACE_QUANTA']) + job_time = len(gpu_trace) * config['TRACE_QUANTA'] job_info = job_dict( - config['AVAILABLE_NODES'], - f"HPL {partition}", account, cpu_trace, gpu_trace, net_tx, net_rx, - 'COMPLETED', None, None, 100, partition, - 0, len(gpu_trace) * config['TRACE_QUANTA'] + 1, - 14200, 17800, len(gpu_trace) * config['TRACE_QUANTA'], - len(gpu_trace) * config['TRACE_QUANTA'], 0, len(gpu_trace) * config['TRACE_QUANTA'] - ) + nodes_required=config['AVAILABLE_NODES'], + scheduled_nodes=list_of_all_nodes, + name=f"HPL {partition}", + account=account, + cpu_trace=cpu_trace, + gpu_trace=gpu_trace, + ntx_trace=net_tx, + nrx_trace=net_rx, + end_state='COMPLETED', + id=None, + priority=100, + partition=partition, + submit_time=0, + time_limit=job_time + 1, + start_time=14200, + end_time=17800, + wall_time=job_time, + trace_time=job_time, + trace_start_time=0, + trace_end_time=job_time, + trace_missing_values=False) jobs.append(job_info) # Idle test - cpu_util, gpu_util = 0, 0 cpu_trace, gpu_trace = self.compute_traces(cpu_util, gpu_util, 3600, config['TRACE_QUANTA']) + job_time = len(gpu_trace) * config['TRACE_QUANTA'] job_info = job_dict( - config['AVAILABLE_NODES'], - f"Idle Test {partition}", account, cpu_trace, gpu_trace, net_tx, net_rx, - 'COMPLETED', None, None, 100, partition, - 0, len(gpu_trace) * config['TRACE_QUANTA'] + 1, - 17800, 21400, len(gpu_trace) * config['TRACE_QUANTA'], - len(gpu_trace) * config['TRACE_QUANTA'], 0, len(gpu_trace) * config['TRACE_QUANTA'] - ) + nodes_required=config['AVAILABLE_NODES'], + scheduled_nodes=list_of_all_nodes, + name=f"Idle Test {partition}", + account=account, + cpu_trace=cpu_trace, + gpu_trace=gpu_trace, + ntx_trace=net_tx, + nrx_trace=net_rx, + end_state='COMPLETED', + id=None, + priority=100, + partition=partition, + submit_time=0, + time_limit=job_time + 1, + start_time=17800, + end_time=21400, + wall_time=job_time, + trace_time=job_time, + trace_start_time=0, + trace_end_time=job_time, + trace_missing_values=False) jobs.append(job_info) return jobs diff --git a/tests/smoke.py b/tests/smoke.py index 9174b3c..609f46e 100644 --- a/tests/smoke.py +++ b/tests/smoke.py @@ -7,6 +7,7 @@ DATAPATH = os.path.expanduser("~/data") # Standardize the time setting DEFAULT_TIME = "1h" +BENCH_TIME = "4h" # Define systems and their corresponding filenames SYSTEMS = { @@ -41,7 +42,7 @@ def synthetic_workload_tests(): """Run synthetic workload tests.""" print("Starting synthetic workload tests...") run_command(f"python main.py -t {DEFAULT_TIME}") - run_command(f"python main.py -w benchmark -t {DEFAULT_TIME}") + run_command(f"python main.py -w benchmark -t {BENCH_TIME}") run_command(f"python main.py -w peak -t {DEFAULT_TIME}") run_command(f"python main.py -w idle -t {DEFAULT_TIME}") -- GitLab From 554d8ea185dd3bd46fceb21d5a2eb63de3f8e406 Mon Sep 17 00:00:00 2001 From: Matthias Maiterth Date: Tue, 25 Mar 2025 09:07:03 -0400 Subject: [PATCH 051/388] Added Contributors to CONTRIBTUORS.txt (part of previous changes) --- CONTRIBUTORS.txt | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/CONTRIBUTORS.txt b/CONTRIBUTORS.txt index 77b218c..046df5b 100644 --- a/CONTRIBUTORS.txt +++ b/CONTRIBUTORS.txt @@ -7,3 +7,7 @@ Matthias Maiterth (maiterthm@ornl.gov), Oak Ridge National Laboratory Sedrick Bouknight (bouknightsl@ornl.gov), Oak Ridge National Laboratory Jesse Hines (hinesjr@ornl.gov), Oak Ridge National Laboratory Jake Webb (webbtj@ornl.gov), Oak Ridge National Laboratory +Rashadul Kabir (rashadul.kabir@colostate.edu), Colorado State University +Bertrand Cirou (cirou@cines.fr), Centre Informatique National de l’Enseignement Supérieur +Kevin Menear (kmenear@nrel.gov), National Renewable Energy Laboratory + -- GitLab From 80e2c7c5d233729e3bf0e25c465d7497cedd96df Mon Sep 17 00:00:00 2001 From: Wes Brewer Date: Thu, 27 Mar 2025 23:32:13 -0400 Subject: [PATCH 052/388] Add Gantt plot - not yet working --- raps/plotting.py | 52 +++++++++++++++++++++++++++++++++++++++++++++++ raps/telemetry.py | 10 ++++++--- 2 files changed, 59 insertions(+), 3 deletions(-) diff --git a/raps/plotting.py b/raps/plotting.py index 1c3f550..cf85750 100644 --- a/raps/plotting.py +++ b/raps/plotting.py @@ -231,6 +231,58 @@ def plot_submit_times(submit_times, nr_list): plt.savefig('submit_times.png', dpi=300, bbox_inches='tight') +def convert_time_scale(times): + max_time = max(times) + if max_time >= 3600 * 24 * 7: # more than a week + return [t / (3600 * 24) for t in times], 'days' + elif max_time >= 3600 * 24: # more than a day + return [t / 3600 for t in times], 'hours' + else: + return times, 'seconds' + + +def plot_job_gantt(start_times, end_times, node_counts): + # Convert times + start_times, time_label = convert_time_scale(start_times) + end_times, _ = convert_time_scale(end_times) + + plt.figure(figsize=(10, 4)) + + # We'll plot each job in a different row on the Y-axis + y_positions = range(len(start_times)) # 0, 1, 2, ... + + for s, e, n in zip(start_times, end_times, node_counts): + # Bar placed at y = n + plt.barh( + y=n, # node count is the vertical coordinate + width=e - s, # job duration on the x-axis + left=s, # start time + height=0.8, # thickness of the bar + color='yellow', + edgecolor='black', + alpha=0.8 + ) + + #for y, (s, e, n) in enumerate(zip(start_times, end_times, node_counts)): + # plt.barh(y, width=e - s, left=s, height=0.8, + # color='yellow', edgecolor='black', alpha=0.8) + # # Optionally place the node count label in the middle of the bar + # plt.text((s + e)/2, y, str(n), + # ha='center', va='center', color='black') + + plt.xlabel(f'Time ({time_label})') + plt.ylabel('Job Index') + plt.title('Job Timeline (Gantt Style)') + plt.yticks(y_positions) # label each job if desired + + # Time axis from earliest start to latest end + plt.xlim(min(start_times), max(end_times)) + + plt.tight_layout() + plt.savefig('job_gantt.png', dpi=300) + plt.show() + + if __name__ == "__main__": plotter = Plotter() #plotter.plot_history([1, 2, 3, 4]) diff --git a/raps/telemetry.py b/raps/telemetry.py index 2616bc7..f18e378 100644 --- a/raps/telemetry.py +++ b/raps/telemetry.py @@ -16,6 +16,7 @@ if __name__ == "__main__": help='Either: path/to/joblive path/to/jobprofile' + \ ' -or- filename.npz (overrides --workload option)') parser.add_argument('-p', '--plot', action='store_true', help='Output plots') + parser.add_argument('-t', '--time', type=str, default=None, help='Length of time to simulate, e.g., 123, 123s, 27m, 3h, 7d') parser.add_argument('--system', type=str, default='frontier', help='System config to use') parser.add_argument('--reschedule', action='store_true', help='Reschedule the telemetry workload') parser.add_argument('-v', '--verbose', action='store_true', help='Enable verbose output') @@ -28,7 +29,7 @@ from tqdm import tqdm from .config import ConfigManager from .job import Job from .account import Accounts -from .plotting import plot_submit_times, plot_nodes_histogram +from .plotting import plot_submit_times, plot_nodes_histogram, plot_job_gantt from .utils import next_arrival @@ -97,12 +98,14 @@ if __name__ == "__main__": wt_list = [] nr_list = [] submit_times = [] + end_times = [] last = 0 for job_vector in jobs: job = Job(job_vector, 0) wt_list.append(job.wall_time) nr_list.append(job.nodes_required) submit_times.append(job.submit_time) + end_times.append(job.submit_time + job.wall_time) if job.submit_time > 0: dt = job.submit_time - last dt_list.append(dt) @@ -118,5 +121,6 @@ if __name__ == "__main__": print(f'Nodes required (std): {np.std(nr_list):.2f}') if args.plot: - plot_nodes_histogram(nr_list) - plot_submit_times(submit_times, nr_list) + #plot_nodes_histogram(nr_list) + #plot_submit_times(submit_times, nr_list) + plot_job_gantt(submit_times, end_times, nr_list) -- GitLab From e6302750681790f55ec43db6fb2c408a24dbe6b4 Mon Sep 17 00:00:00 2001 From: Matthias Maiterth Date: Fri, 28 Mar 2025 11:23:54 -0400 Subject: [PATCH 053/388] Added queue and running stats to more easily identify submission regions of interest. --- main.py | 10 +++++++++- raps/engine.py | 11 +++++++++++ raps/stats.py | 10 ++++++++++ 3 files changed, 30 insertions(+), 1 deletion(-) diff --git a/main.py b/main.py index f5fc99d..4f2920c 100644 --- a/main.py +++ b/main.py @@ -32,7 +32,7 @@ from raps.workload import Workload from raps.account import Accounts from raps.weather import Weather from raps.utils import create_casename, convert_to_seconds, write_dict_to_file, next_arrival -from raps.stats import get_engine_stats, get_job_stats +from raps.stats import get_engine_stats, get_job_stats, get_scheduler_stats config = ConfigManager(system_name=args.system).get_config() @@ -176,14 +176,17 @@ layout_manager.run(jobs, timestep_start=timestep_start, timestep_end=timestep_en engine_stats = get_engine_stats(sc) job_stats = get_job_stats(sc) +scheduler_stats = get_scheduler_stats(sc) # Following b/c we get the following error when we use PM100 telemetry dataset # TypeError: Object of type int64 is not JSON serializable try: print(json.dumps(engine_stats, indent=4)) print(json.dumps(job_stats, indent=4)) + print(json.dumps(scheduler_stats, indent=4)) except: print(engine_stats) print(job_stats) + print(scheduler_stats) if args.plot: @@ -259,6 +262,11 @@ if args.output: job_history = pd.DataFrame(sc.get_job_history_dict()) job_history.to_csv(OPATH / "job_history.csv", index=False) + scheduler_running_history = pd.DataFrame(sc.get_scheduler_running_history()) + job_history.to_csv(OPATH / "running_history.csv", index=False) + scheduler_queue_history = pd.DataFrame(sc.get_scheduler_running_history()) + job_history.to_csv(OPATH / "queue_history.csv", index=False) + try: with open(OPATH / 'stats.out', 'w') as f: json.dump(engine_stats, f, indent=4) diff --git a/raps/engine.py b/raps/engine.py index 5b2830c..ca27c9d 100644 --- a/raps/engine.py +++ b/raps/engine.py @@ -56,6 +56,8 @@ class Engine: self.output = kwargs.get('output') self.replay = kwargs.get('replay') self.sys_util_history = [] + self.scheduler_queue_history = [] + self.scheduler_running_history = [] # Get scheduler type from command-line args or default scheduler_type = kwargs.get('scheduler', 'default') @@ -230,6 +232,9 @@ class Engine: system_util = self.num_active_nodes / self.config['AVAILABLE_NODES'] * 100 self.sys_util_history.append((self.current_time, system_util)) + self.scheduler_queue_history.append(len(self.running)) + self.scheduler_running_history.append(len(self.queue)) + # Render the updated layout power_df = None cooling_inputs, cooling_outputs = None, None @@ -349,3 +354,9 @@ class Engine: def get_job_history_dict(self): return self.job_history_dict + + def get_scheduler_queue_history(self): + return self.scheduler_queue_history + + def get_scheduler_running_history(self): + return self.scheduler_running_history diff --git a/raps/stats.py b/raps/stats.py index 6bc6409..789e0a2 100644 --- a/raps/stats.py +++ b/raps/stats.py @@ -50,6 +50,16 @@ def min_max_sum(value,min,max,sum): sum += value return min,max,sum + +def get_scheduler_stats(engine: Engine): + average_queue = sum(engine.scheduler_queue_history) / len(engine.scheduler_queue_history) + average_running = sum(engine.scheduler_running_history) / len(engine.scheduler_running_history) + stats = { + 'average_queue': average_queue, + 'average_running': average_running, + } + return stats + def get_job_stats(engine: Engine): """ Return job statistics processed over the engine execution""" # Information on Job-Mix -- GitLab From 0e5e2fbd50ddff026d2719d8c3f1becb8e659c95 Mon Sep 17 00:00:00 2001 From: Matthias Maiterth Date: Mon, 31 Mar 2025 09:37:53 -0400 Subject: [PATCH 054/388] Plotting script and ui NaN bug --- raps/dataloaders/plot3.py | 152 ++++++++++++++++++++++++++++++++++++++ raps/ui.py | 8 ++ scripts/plot_p-util_t.py | 43 ++++++++--- 3 files changed, 191 insertions(+), 12 deletions(-) create mode 100644 raps/dataloaders/plot3.py diff --git a/raps/dataloaders/plot3.py b/raps/dataloaders/plot3.py new file mode 100644 index 0000000..6470ff4 --- /dev/null +++ b/raps/dataloaders/plot3.py @@ -0,0 +1,152 @@ +#!/bin/env python3 +import pandas as pd +import pyarrow.parquet as pq +import matplotlib.pyplot as plt + +style=['seaborn-v0_8', 'tableau-colorblind10'] + +for j in range(-1,len(style)): + if j in range(0,len(style)): + plt.style.use(style[j]) + + colors = plt.rcParams['axes.prop_cycle'].by_key()['color'] + # Revert to the default style + plt.style.use('default') + # Apply ggplot colors to default style + plt.rcParams['axes.prop_cycle'] = plt.cycler(color=colors) + + + import sys + + if len(sys.argv) > 1: + path = sys.argv[1] + else: + print(f"Usage: python {sys.argv[0]} ") + exit() + + # e.g. path = "$HOME/Repositories/exadigit/raps/simulation_results/b803010" + + policies = ['fcfs-nobf','fcfs-easy','priority-nobf','priority-easy','priority-ffbf','replay'] + files = ['cooling_model.parquet', 'loss_history.parquet', 'power_history.parquet', 'util.parquet'] + files = ['cooling_model.parquet', 'power_history.parquet', 'util.parquet'] + files = ['util.parquet', 'power_history.parquet', 'cooling_model.parquet'] + #files = ['loss_history.parquet', 'power_history.parquet', 'util.parquet'] + #files = ['power_history.parquet', 'util.parquet', 'cooling_model.parquet'] + + policy_path = {f"{policy}":f"{path}/{policy}" for policy in policies} + full_files = {f"{policy}":f"{path}/{policy}/{file}" for policy in policies for file in files} + + + def iter_to_seconds(i): + return i * 15 + + + fig, axs = plt.subplots(len(files),figsize=(12, 12)) + for i,file in enumerate(files): + policy_files = [f"{path}/{policy}/{file}" for policy in policies] + for policy_file in policy_files: + # df = pd.read_parquet(policy_file) + x = 'time' + policy = policy_file.split('/')[-2] + if file == "power_history.parquet": + y = 'power [kw]' + ylab = 'Power [kW]' + ylim = 29000 + axs[i].set_ylim(0,ylim) + + df = pd.read_parquet(policy_file) + df = df.rename(columns={0:'time',1:'power [kw]'}) + + elif file == "cooling_model.parquet": + y = 'pue' + ylab = 'PUE' + + df = pd.read_parquet(policy_file) + df['index'] = df.index + df[x] = df['index'].apply(iter_to_seconds) + ymax = max(df['pue']) + #axs[i].plot(df[x],df[y], label=ylab) + + elif file == "loss_history.parquet": + y = 'loss [kw]' + ylab = 'Loss [kW]' + ylim = 29000 + axs[i].set_ylim(0,ylim) + + df = pd.read_parquet(policy_file) + df = df.rename(columns={0:'time',1:'loss [kw]'}) + #axs[i].plot(df[x],df[y], label=ylab) + + elif file == "util.parquet": + y = 'utilization' + ylab = 'Utilization' + + df = pd.read_parquet(policy_file) + df = df.rename(columns={0:'time', 1:'utilization [%]'}) + df[y] = df['utilization [%]'] / 100 + #axs[i].plot(df[x], df[y], label=ylab) + + else: + raise KeyError + + axs[i].plot(df[x],df[y], label=policy) + axs[i].set_ylabel(ylab) + #$axs[i].plot(df[0],df[1],label=policy) + if file == "power_history.parquet": + axs[i].legend(loc='upper right') + axs[i].set_title('Power') + elif file == "util.parquet": + axs[i].set_title('Utilization') + axs[i].legend(loc='lower right') + elif file == "cooling_model.parquet": + axs[i].set_title('PUE') + axs[i].legend(loc='upper right') + elif file == "loss_history.parquet": + axs[i].set_title('Loss') + axs[i].legend(loc='upper right') + else: + raise KeyError() + #plt.show() + plt.savefig(f"Type{[j]}.png") + + + #for i in [1]: + # fig, ax1 = plt.subplots(figsize=(10, 6)) + # + # power = path + "/" + files[2] + # loss = path + "/" + files[1] + # util = path + "/" + files[3] + # + # df_power = pd.read_parquet(power) + # df_power = df_power.rename(columns={0:'time',1:'power [kw]'}) + # ax1.plot(df_power['time'],df_power['power [kw]'], color='black', label='Power kW]') + # + # #df_loss = pd.read_parquet(loss) + # #df_loss = df_loss.rename(columns={0:'time',1:'loss [kw]'}) + # #ax1.plot(df_loss['time'],df_loss['loss [kw]'], color='red', label='Loss [kW]') + # + # ax2 = ax1.twinx() + # + # #df_cooling = pd.read_parquet(cooling) + # #df_cooling['index'] = df_cooling.index + # #df_cooling['time'] = df_cooling['index'].apply(iter_to_seconds) + # #ymax = max(df_cooling['pue']) + # #ax2.plot(df_cooling['time'],df_cooling['pue'], color='blue', label='PUE') + # + # df_util = pd.read_parquet(util) + # df_util = df_util.rename(columns={0:'time', 1:'utilization [%]'}) + # df_util['utilization'] = df_util['utilization [%]'] / 100 + # ax2.plot(df_util['time'],df_util['utilization'], color='orange', label='Utilization') + # + # #ymax = max(max(df_cooling['pue']),max(df_util['utilization'])) + # ymax = max(0,max(df_util['utilization'])) + # ax2.set_ylim([0, ymax * 1.05]) + # + # ax1.set_xlabel('time [s]') + # ax1.set_ylabel('[kW]') + # ax2.set_ylabel('[%]') + # plt.title(path) + # ax1.legend(loc='upper left') + # ax2.legend(loc='upper right') + # plt.show() + # #plt.savefig("test.png") diff --git a/raps/ui.py b/raps/ui.py index e2dcd83..a1cd0f1 100644 --- a/raps/ui.py +++ b/raps/ui.py @@ -1,4 +1,6 @@ +import sys import pandas as pd +import numpy as np from rich.align import Align from rich.console import Console from rich.layout import Layout @@ -268,6 +270,9 @@ class LayoutManager: if uncertainties: pass else: + power_df = power_df.replace([np.nan],0.0) + power_df = power_df.replace([np.inf],sys.maxsize) + power_df = power_df.replace([-np.inf], -sys.maxsize - 1) power_df = power_df[power_columns].astype(int) # Populate the table with data from the DataFrame, applying the data styles @@ -332,6 +337,9 @@ class LayoutManager: if uncertainties: pass else: + power_df = power_df.replace([np.nan],0.0) + power_df = power_df.replace([np.inf],sys.maxsize) + power_df = power_df.replace([-np.inf], -sys.maxsize - 1) power_df = power_df[display_columns].round().astype(int) # Create table for displaying rack power and loss with styling diff --git a/scripts/plot_p-util_t.py b/scripts/plot_p-util_t.py index 55dc4c8..f91fbd7 100644 --- a/scripts/plot_p-util_t.py +++ b/scripts/plot_p-util_t.py @@ -5,6 +5,8 @@ import matplotlib.pyplot as plt import sys + + if len(sys.argv) > 1: path = sys.argv[1] else: @@ -22,43 +24,60 @@ def iter_to_seconds(i): return i * 15 +SMALL_SIZE = 16 +MEDIUM_SIZE = 18 +BIGGER_SIZE = 22 + +plt.rc('font', size=SMALL_SIZE) # controls default text sizes +plt.rc('axes', titlesize=SMALL_SIZE) # fontsize of the axes title +plt.rc('axes', labelsize=MEDIUM_SIZE) # fontsize of the x and y labels +plt.rc('xtick', labelsize=SMALL_SIZE) # fontsize of the tick labels +plt.rc('ytick', labelsize=SMALL_SIZE) # fontsize of the tick labels +plt.rc('legend', fontsize=SMALL_SIZE) # legend fontsize +plt.rc('figure', titlesize=BIGGER_SIZE) # fontsize of the figure title + + + for i in [1]: fig, ax1 = plt.subplots(figsize=(10, 6)) power = path + "/" + files[2] loss = path + "/" + files[1] util = path + "/" + files[3] + cooling = path + "/" + files[0] df_power = pd.read_parquet(power) df_power = df_power.rename(columns={0:'time',1:'power [kw]'}) ax1.plot(df_power['time'],df_power['power [kw]'], color='black', label='Power kW]') - #df_loss = pd.read_parquet(loss) - #df_loss = df_loss.rename(columns={0:'time',1:'loss [kw]'}) - #ax1.plot(df_loss['time'],df_loss['loss [kw]'], color='red', label='Loss [kW]') + df_loss = pd.read_parquet(loss) + df_loss = df_loss.rename(columns={0:'time',1:'loss [kw]'}) + ax1.plot(df_loss['time'],df_loss['loss [kw]'], color='red', label='Loss [kW]') ax2 = ax1.twinx() - #df_cooling = pd.read_parquet(cooling) - #df_cooling['index'] = df_cooling.index - #df_cooling['time'] = df_cooling['index'].apply(iter_to_seconds) - #ymax = max(df_cooling['pue']) - #ax2.plot(df_cooling['time'],df_cooling['pue'], color='blue', label='PUE') + df_cooling = pd.read_parquet(cooling) + df_cooling['index'] = df_cooling.index + df_cooling['time'] = df_cooling['index'].apply(iter_to_seconds) + ymax = max(df_cooling['pue']) + ax2.plot(df_cooling['time'],df_cooling['pue'], color='blue', label='PUE') df_util = pd.read_parquet(util) df_util = df_util.rename(columns={0:'time', 1:'utilization [%]'}) df_util['utilization'] = df_util['utilization [%]'] / 100 ax2.plot(df_util['time'],df_util['utilization'], color='orange', label='Utilization') - #ymax = max(max(df_cooling['pue']),max(df_util['utilization'])) - ymax = max(0,max(df_util['utilization'])) + ymax = max(max(df_cooling['pue']),max(df_util['utilization'])) + #ymax = max(0,max(df_util['utilization'])) ax2.set_ylim([0, ymax * 1.05]) ax1.set_xlabel('time [s]') ax1.set_ylabel('[kW]') ax2.set_ylabel('[%]') - plt.title(path) + #path + #plt.title(path) ax1.legend(loc='upper left') ax2.legend(loc='upper right') - # plt.show() + #plt.rcParams.update({'font.size': 30}) + #plt.show() plt.savefig("test.png") -- GitLab From 24f2507c0d68af64a27ec0844021c1904ea073c7 Mon Sep 17 00:00:00 2001 From: Matthias Maiterth Date: Wed, 2 Apr 2025 13:05:18 -0400 Subject: [PATCH 055/388] Added fixes to make sure the incentive strucutre work can proceed. Fixes are: Missing data in datasets mitigated in 1. dataloader 2. engine Removed burst factor in lassen as this throws errors. --- raps/dataloaders/frontier.py | 8 +++--- raps/dataloaders/lassen.py | 12 ++++----- raps/engine.py | 50 ++++++++++++++++++------------------ scripts/marconi100-day51.sh | 4 +++ 4 files changed, 40 insertions(+), 34 deletions(-) create mode 100644 scripts/marconi100-day51.sh diff --git a/raps/dataloaders/frontier.py b/raps/dataloaders/frontier.py index 08dc5fb..1929175 100644 --- a/raps/dataloaders/frontier.py +++ b/raps/dataloaders/frontier.py @@ -232,10 +232,13 @@ def load_data_from_df(jobs_df: pd.DataFrame, jobprofile_df: pd.DataFrame, **kwar wall_time = 0 trace_time = gpu_trace.size * config['TRACE_QUANTA'] # seconds + + + trace_start_time = 0 trace_end_time = trace_time if wall_time > trace_time: - missing_trace_time = wall_time - trace_time + missing_trace_time = int(wall_time - trace_time) if start_time < 0: trace_start_time = missing_trace_time trace_end_time = wall_time @@ -243,8 +246,7 @@ def load_data_from_df(jobs_df: pd.DataFrame, jobprofile_df: pd.DataFrame, **kwar trace_start_time = 0 trace_end_time = trace_time else: - print(f"Job: {job_id} {start_time} - {end_time}!") - raise ValueError("Missing values not at start nor end.") + print(f"Job: {job_id} {end_state} {start_time} - {end_time},Trace: {trace_start_time} - {trace_end_time} Missing: {missing_trace_time}!") xnames = jobs_df.loc[jidx, 'xnames'] # Don't replay any job with an empty set of xnames diff --git a/raps/dataloaders/lassen.py b/raps/dataloaders/lassen.py index 27d72e1..7017b6e 100644 --- a/raps/dataloaders/lassen.py +++ b/raps/dataloaders/lassen.py @@ -225,12 +225,12 @@ def adjust_bursts(burst_intervals, total, intervals): bursts = np.round(bursts).astype(int) adjustment = total - np.sum(bursts) - # Distribute adjustment across non-zero elements to avoid negative values - if adjustment != 0: - for i in range(len(bursts)): - if bursts[i] > 0: - bursts[i] += adjustment % (2^64-1) - break # Apply adjustment only once where it won't cause a negative + ## Distribute adjustment across non-zero elements to avoid negative values + #if adjustment != 0: + # for i in range(len(bursts)): + # if bursts[i] > 0: + # bursts[i] += adjustment % (2^64-1) # This can overflow! + # break # Apply adjustment only once where it won't cause a negative return bursts diff --git a/raps/engine.py b/raps/engine.py index ca27c9d..08bb8cb 100644 --- a/raps/engine.py +++ b/raps/engine.py @@ -115,6 +115,10 @@ class Engine: job_instance = Job(job_data) eligible_jobs_list.append(job_instance) self.queue += eligible_jobs_list + if eligible_jobs_list != []: + return True + else: + return False def prepare_timestep(self, replay:bool = True): completed_jobs = [job for job in self.running if job.end_time is not None and job.end_time <= self.current_time] @@ -171,31 +175,27 @@ class Engine: {job.running_time} > {job.wall_time}\n\ {len(job.cpu_trace)} vs. {job.running_time // self.config['TRACE_QUANTA']}\ ") - # job.running_time < job.trace_start_time or - if job.running_time >= job.trace_end_time: - cpu_util = 0 # No values available therefore we assume IDLE == 0 - gpu_util = 0 - net_util = 0 - if self.debug: - print("No Values in trace, using IDLE.") - if self.scheduler.policy == PolicyType.REPLAY and not job.trace_missing_values: - print(f"{job.running_time} < {job.trace_start_time} or {job.running_time} > {job.trace_end_time}") - raise Exception("Replay is using IDLE values! Something is wrong!") - else: - time_quanta_index = int((job.running_time - job.trace_start_time) // self.config['TRACE_QUANTA']) - if isinstance(job.cpu_trace, List) and time_quanta_index == len(job.cpu_trace): - # If the running time is past the last time step in the - # trace, use the last value in the trace. This can - # happen if the last valid timesteps is e.g. 17%15, - # the last trace value is 15%15 and the next possible - # trace value 30%15 but was not recorded because the - # job ended before. - # For every other error condition trace_start_ and - # _end_time are used! - time_quanta_index -= 1 + + time_quanta_index = int((job.running_time - job.trace_start_time) // self.config['TRACE_QUANTA']) + # If the running time is past the last time step in the + # trace, use the last value in the trace. This can + # happen if the last valid timesteps is e.g. 17%15, + # the last trace value is 15%15 and the next possible + # trace value 30%15 but was not recorded because the + # job ended before. + # For every other error condition trace_start_ and + # _end_time are used! + + if time_quanta_index < len(job.cpu_trace): cpu_util = get_utilization(job.cpu_trace, time_quanta_index) + else: + cpu_util = get_utilization(job.cpu_trace, len(job.cpu_trace) - 1) + + if time_quanta_index < len(job.gpu_trace): gpu_util = get_utilization(job.gpu_trace, time_quanta_index) - net_util = 0 + else: + gpu_util = get_utilization(job.gpu_trace, len(job.gpu_trace) - 1) + net_util = 0 if isinstance(job.ntx_trace,List) and len(job.ntx_trace) and isinstance(job.nrx_trace,List) and len(job.nrx_trace): net_tx = get_utilization(job.ntx_trace, time_quanta_index) @@ -336,9 +336,9 @@ class Engine: completed_jobs, newly_downed_nodes = self.prepare_timestep(replay) # 2. Identify eligible jobs and add them to the queue. - self.add_eligible_jobs_to_queue(jobs) + has_new_additions = self.add_eligible_jobs_to_queue(jobs) # 3. Schedule jobs that are now in the queue. - self.scheduler.schedule(self.queue, self.running, self.current_time, sorted=False) + self.scheduler.schedule(self.queue, self.running, self.current_time, sorted=(not has_new_additions)) # Stop the simulation if no more jobs are running or in the queue or in the job list. if autoshutdown and not self.queue and not self.running and not self.replay and not all_jobs and not jobs: diff --git a/scripts/marconi100-day51.sh b/scripts/marconi100-day51.sh new file mode 100644 index 0000000..ae801a5 --- /dev/null +++ b/scripts/marconi100-day51.sh @@ -0,0 +1,4 @@ +python main.py -f ~/data/marconi100/job_table.parquet --system marconi100 -ff 4381000 -t 61000 -o --policy replay +python main.py -f ~/data/marconi100/job_table.parquet --system marconi100 -ff 4381000 -t 61000 -o --policy fcfs +python main.py -f ~/data/marconi100/job_table.parquet --system marconi100 -ff 4381000 -t 61000 -o --policy fcfs --backfill easy +python main.py -f ~/data/marconi100/job_table.parquet --system marconi100 -ff 4381000 -t 61000 -o --policy priority --backfill firstfit -- GitLab From f0bf0600a4082b1cd283793a0aa5a543cd6d02c7 Mon Sep 17 00:00:00 2001 From: Matthias Maiterth Date: Wed, 2 Apr 2025 22:46:06 -0400 Subject: [PATCH 056/388] added accounts to schedule, such that sort can sort according to the fugaku points associated with an account. Also added additional stats for cpu/gpu utilization. --- raps/engine.py | 4 ++-- raps/job.py | 16 ++++++++++++++++ raps/schedulers/default.py | 1 + raps/stats.py | 34 +++++++++++++++++++++++++++++++--- 4 files changed, 50 insertions(+), 5 deletions(-) diff --git a/raps/engine.py b/raps/engine.py index 08bb8cb..21fde0f 100644 --- a/raps/engine.py +++ b/raps/engine.py @@ -301,7 +301,7 @@ class Engine: self.add_running_jobs_to_queue(all_jobs) # Now process job queue one by one (needed to get the start_time right!) for job in self.queue[:]: # operate over a slice copy to be able to remove jobs from queue if placed. - self.scheduler.schedule([job], self.running, job.start_time, sorted=True) + self.scheduler.schedule([job], self.running, job.start_time, accounts=self.accounts, sorted=True) self.queue.remove(job) if replay and len(self.queue) != 0: raise ValueError(f"Something went wrong! Not all jobs could be placed!\nPotential confligt in queue:\n{self.queue}") @@ -338,7 +338,7 @@ class Engine: # 2. Identify eligible jobs and add them to the queue. has_new_additions = self.add_eligible_jobs_to_queue(jobs) # 3. Schedule jobs that are now in the queue. - self.scheduler.schedule(self.queue, self.running, self.current_time, sorted=(not has_new_additions)) + self.scheduler.schedule(self.queue, self.running, self.current_time,accounts=self.accounts, sorted=(not has_new_additions)) # Stop the simulation if no more jobs are running or in the queue or in the job list. if autoshutdown and not self.queue and not self.running and not self.replay and not all_jobs and not jobs: diff --git a/raps/job.py b/raps/job.py index 25eebbd..7166884 100644 --- a/raps/job.py +++ b/raps/job.py @@ -159,6 +159,22 @@ class JobStatistics: self.start_time = job.start_time self.end_time = job.end_time self.state = job._state + if len(job.cpu_trace) == 0: + self.avg_cpu_usage = 0 + else: + self.avg_cpu_usage = sum(job.cpu_trace) / len(job.cpu_trace) + if len(job.gpu_trace) == 0: + self.avg_gpu_usage = 0 + else: + self.avg_gpu_usage = sum(job.gpu_trace) / len(job.gpu_trace) + if len(job.ntx_trace) == 0: + self.avg_ntx_usage = 0 + else: + self.avg_ntx_usage = sum(job.ntx_trace) / len(job.ntx_trace) + if len(job.nrx_trace) == 0: + self.avg_nrx_usage = 0 + else: + self.avg_nrx_usage = sum(job.nrx_trace) / len(job.nrx_trace) if len(job.power_history) == 0: self.avg_node_power = 0 self.max_node_power = 0 diff --git a/raps/schedulers/default.py b/raps/schedulers/default.py index 8f89818..c8c691e 100644 --- a/raps/schedulers/default.py +++ b/raps/schedulers/default.py @@ -63,6 +63,7 @@ class Scheduler: # After backfill dedice continue processing the queue or wait, continuing may result in fairness issues. if self.policy in [PolicyType.REPLAY]: + # print(f"Nodes available {nodes_available} - Req:{len(job.requested_nodes)} N-avail:{len(self.resource_manager.available_nodes)}") continue # Regardless if the job at the front of the queue doenst fit, try placing all of them. elif self.policy in [PolicyType.FCFS, PolicyType.PRIORITY, PolicyType.FUGAKU_PTS, PolicyType.LJF]: diff --git a/raps/stats.py b/raps/stats.py index 789e0a2..370028b 100644 --- a/raps/stats.py +++ b/raps/stats.py @@ -60,6 +60,7 @@ def get_scheduler_stats(engine: Engine): } return stats + def get_job_stats(engine: Engine): """ Return job statistics processed over the engine execution""" # Information on Job-Mix @@ -80,9 +81,15 @@ def get_job_stats(engine: Engine): min_psf_partial_den, max_psf_partial_den, sum_psf_partial_den = sys.maxsize, -sys.maxsize - 1, 0 min_awrt, max_awrt, sum_awrt = sys.maxsize, -sys.maxsize - 1, 0 + min_cpu_u, max_cpu_u, sum_cpu_u = sys.maxsize, -sys.maxsize - 1, 0 + min_gpu_u, max_gpu_u, sum_gpu_u = sys.maxsize, -sys.maxsize - 1, 0 + min_ntx_u, max_ntx_u, sum_ntx_u = sys.maxsize, -sys.maxsize - 1, 0 + min_nrx_u, max_nrx_u, sum_nrx_u = sys.maxsize, -sys.maxsize - 1, 0 + jobsSmall = 0 jobsMedium = 0 jobsLarge = 0 + jobsVLarge = 0 jobsHuge = 0 # Information on Job-Mix @@ -132,12 +139,19 @@ def get_job_stats(engine: Engine): min_psf_partial_den, max_psf_partial_den, sum_psf_partial_den = \ min_max_sum(psf_partial_den, min_psf_partial_den, max_psf_partial_den, sum_psf_partial_den) + min_cpu_u, max_cpu_u, sum_cpu_u = min_max_sum(min_cpu_u, max_cpu_u, sum_cpu_u) + min_gpu_u, max_gpu_u, sum_gpu_u = min_max_sum(min_gpu_u, max_gpu_u, sum_gpu_u) + min_ntx_u, max_ntx_u, sum_ntx_u = min_max_sum(min_ntx_u, max_ntx_u, sum_ntx_u) + min_nrx_u, max_nrx_u, sum_nrx_u = min_max_sum(min_nrx_u, max_nrx_u, sum_nrx_u) + if job['num_nodes'] <= 5: jobsSmall += 1 elif job['num_nodes'] <= 50: jobsMedium += 1 elif job['num_nodes'] <= 250: jobsLarge += 1 + elif job['num_nodes'] <= 4500: + jobsVLarge += 1 else: # job['nodes_required'] > 250: jobsHuge += 1 @@ -145,8 +159,8 @@ def get_job_stats(engine: Engine): avg_job_size = sum_job_size / len(engine.job_history_dict) avg_runtime = sum_runtime / len(engine.job_history_dict) avg_energy = sum_energy / len(engine.job_history_dict) - avg_edp= sum_edp / len(engine.job_history_dict) - avg_edp2= sum_edp2 / len(engine.job_history_dict) + avg_edp = sum_edp / len(engine.job_history_dict) + avg_edp2 = sum_edp2 / len(engine.job_history_dict) avg_agg_node_hours = sum_agg_node_hours / len(engine.job_history_dict) avg_wait_time = sum_wait_time / len(engine.job_history_dict) avg_turnaround_time = sum_turnaround_time / len(engine.job_history_dict) @@ -173,7 +187,8 @@ def get_job_stats(engine: Engine): 'Jobs <= 5 nodes': jobsSmall, 'Jobs <= 50 nodes': jobsMedium, 'Jobs <= 250 nodes': jobsLarge, - 'Jobs > 250 nodes': jobsHuge, + 'Jobs <= 4500 nodes': jobsVLarge, + 'Jobs > 4500 nodes': jobsHuge, # Information on job-mix executed 'min job size': min_job_size, 'max job size': max_job_size, @@ -193,6 +208,19 @@ def get_job_stats(engine: Engine): 'min_aggregate_node_hours': min_agg_node_hours, 'max_aggregate_node_hours': max_agg_node_hours, 'avg_aggregate_node_hours': avg_agg_node_hours, + # Utilization: + 'min_cpu_util': min_cpu_u, + 'max_cpu_util': max_cpu_u, + 'sum_cpu_util': sum_cpu_u, + 'min_gpu_util': min_gpu_u, + 'max_gpu_util': max_gpu_u, + 'sum_gpu_util': sum_gpu_u, + 'min_ntx_util': min_ntx_u, + 'max_ntx_util': max_ntx_u, + 'sum_ntx_util': sum_ntx_u, + 'min_nrx_util': min_nrx_u, + 'max_nrx_util': max_nrx_u, + 'sum_nrx_util': sum_nrx_u, # Completion statistics 'min_wait_time': min_wait_time, 'max_wait_time': max_wait_time, -- GitLab From d2de68623e09cf5023f559175ec990ca90731dc2 Mon Sep 17 00:00:00 2001 From: Matthias Maiterth Date: Thu, 3 Apr 2025 11:25:21 -0400 Subject: [PATCH 057/388] Stats update, next f-data --- raps/stats.py | 25 +++++++++++++++++-------- 1 file changed, 17 insertions(+), 8 deletions(-) diff --git a/raps/stats.py b/raps/stats.py index 370028b..ee9508c 100644 --- a/raps/stats.py +++ b/raps/stats.py @@ -43,6 +43,8 @@ def get_engine_stats(engine: Engine): def min_max_sum(value,min,max,sum): + if value < 0: + value = 0 if value < min: min = value if value > max: @@ -139,10 +141,10 @@ def get_job_stats(engine: Engine): min_psf_partial_den, max_psf_partial_den, sum_psf_partial_den = \ min_max_sum(psf_partial_den, min_psf_partial_den, max_psf_partial_den, sum_psf_partial_den) - min_cpu_u, max_cpu_u, sum_cpu_u = min_max_sum(min_cpu_u, max_cpu_u, sum_cpu_u) - min_gpu_u, max_gpu_u, sum_gpu_u = min_max_sum(min_gpu_u, max_gpu_u, sum_gpu_u) - min_ntx_u, max_ntx_u, sum_ntx_u = min_max_sum(min_ntx_u, max_ntx_u, sum_ntx_u) - min_nrx_u, max_nrx_u, sum_nrx_u = min_max_sum(min_nrx_u, max_nrx_u, sum_nrx_u) + min_cpu_u, max_cpu_u, sum_cpu_u = min_max_sum(job['avg_cpu_usage'],min_cpu_u, max_cpu_u, sum_cpu_u) + min_gpu_u, max_gpu_u, sum_gpu_u = min_max_sum(job['avg_gpu_usage'],min_gpu_u, max_gpu_u, sum_gpu_u) + min_ntx_u, max_ntx_u, sum_ntx_u = min_max_sum(job['avg_ntx_usage'],min_ntx_u, max_ntx_u, sum_ntx_u) + min_nrx_u, max_nrx_u, sum_nrx_u = min_max_sum(job['avg_nrx_usage'],min_nrx_u, max_nrx_u, sum_nrx_u) if job['num_nodes'] <= 5: jobsSmall += 1 @@ -164,6 +166,13 @@ def get_job_stats(engine: Engine): avg_agg_node_hours = sum_agg_node_hours / len(engine.job_history_dict) avg_wait_time = sum_wait_time / len(engine.job_history_dict) avg_turnaround_time = sum_turnaround_time / len(engine.job_history_dict) + + avg_cpu_u = sum_cpu_u / len(engine.job_history_dict) + avg_gpu_u = sum_gpu_u / len(engine.job_history_dict) + avg_ntx_u = sum_ntx_u / len(engine.job_history_dict) + avg_nrx_u = sum_nrx_u / len(engine.job_history_dict) + + avg_awrt = sum_awrt / sum_agg_node_hours psf = (3 * sum_psf_partial_num) / (4 * sum_psf_partial_den) else: @@ -211,16 +220,16 @@ def get_job_stats(engine: Engine): # Utilization: 'min_cpu_util': min_cpu_u, 'max_cpu_util': max_cpu_u, - 'sum_cpu_util': sum_cpu_u, + 'avg_cpu_util': avg_cpu_u, 'min_gpu_util': min_gpu_u, 'max_gpu_util': max_gpu_u, - 'sum_gpu_util': sum_gpu_u, + 'avg_gpu_util': avg_gpu_u, 'min_ntx_util': min_ntx_u, 'max_ntx_util': max_ntx_u, - 'sum_ntx_util': sum_ntx_u, + 'avg_ntx_util': avg_ntx_u, 'min_nrx_util': min_nrx_u, 'max_nrx_util': max_nrx_u, - 'sum_nrx_util': sum_nrx_u, + 'avg_nrx_util': avg_nrx_u, # Completion statistics 'min_wait_time': min_wait_time, 'max_wait_time': max_wait_time, -- GitLab From 766eef736212d023ab1f8d15be68711401ceb596 Mon Sep 17 00:00:00 2001 From: Matthias Maiterth Date: Fri, 4 Apr 2025 17:52:26 -0400 Subject: [PATCH 058/388] Updated for F-Data and disabled node-failure sim (random number generator takes time and functionality is not implemendted properly) --- raps/dataloaders/fugaku.py | 3 ++- raps/engine.py | 32 ++++++++++++++++--------- raps/job.py | 49 +++++++++++++++++++++++++++----------- raps/resmgr.py | 1 + 4 files changed, 59 insertions(+), 26 deletions(-) diff --git a/raps/dataloaders/fugaku.py b/raps/dataloaders/fugaku.py index d9d0095..ff277a9 100644 --- a/raps/dataloaders/fugaku.py +++ b/raps/dataloaders/fugaku.py @@ -95,6 +95,7 @@ def load_data_from_df(df, **kwargs): scheduled_nodes = None # Only nodes_required is in the trace job_id = row['jid'] if 'jid' in df.columns else 'unknown' + priority = row['pri'] if 'pri' in df.columns else 0 submit_timestamp = pd.to_datetime(row['adt']) if 'adt' in df.columns else -1 # Else job was submitted in the past @@ -143,7 +144,7 @@ def load_data_from_df(df, **kwargs): nrx_trace=[], end_state=end_state, scheduled_nodes=scheduled_nodes, - job_id=job_id, + id=job_id, priority=priority, submit_time=submit_time, time_limit=time_limit, diff --git a/raps/engine.py b/raps/engine.py index 21fde0f..dd4ef69 100644 --- a/raps/engine.py +++ b/raps/engine.py @@ -1,8 +1,7 @@ from typing import Optional, List import dataclasses import pandas as pd - -import sys +import numpy as np from .job import Job, JobState from .policy import PolicyType @@ -185,19 +184,30 @@ class Engine: # job ended before. # For every other error condition trace_start_ and # _end_time are used! - - if time_quanta_index < len(job.cpu_trace): - cpu_util = get_utilization(job.cpu_trace, time_quanta_index) + #print(type(job.cpu_trace)) + if isinstance(job.cpu_trace,list) or isinstance(job.cpu_trace,np.ndarray): + if time_quanta_index < len(job.cpu_trace): + cpu_util = get_utilization(job.cpu_trace, time_quanta_index) + else: + cpu_util = get_utilization(job.cpu_trace, len(job.cpu_trace) - 1) + elif isinstance(job.cpu_trace,float) or isinstance(job.cpu_trace,int): + cpu_util = job.cpu_trace else: - cpu_util = get_utilization(job.cpu_trace, len(job.cpu_trace) - 1) - - if time_quanta_index < len(job.gpu_trace): - gpu_util = get_utilization(job.gpu_trace, time_quanta_index) + raise NotImplementedError() + + if isinstance(job.gpu_trace,list) or isinstance(job.gpu_trace,np.ndarray): + if time_quanta_index < len(job.gpu_trace): + gpu_util = get_utilization(job.gpu_trace, time_quanta_index) + else: + gpu_util = get_utilization(job.gpu_trace, len(job.gpu_trace) - 1) + elif isinstance(job.gpu_trace,float) or isinstance(job.gpu_trace,int): + gpu_util = job.gpu_trace else: - gpu_util = get_utilization(job.gpu_trace, len(job.gpu_trace) - 1) + raise NotImplementedError() + net_util = 0 - if isinstance(job.ntx_trace,List) and len(job.ntx_trace) and isinstance(job.nrx_trace,List) and len(job.nrx_trace): + if (isinstance(job.ntx_trace,list) or isinstance(job.ntx_trace,np.ndarray)) and len(job.ntx_trace) and (isinstance(job.nrx_trace,list) or isinstance(job.nrx_trace,list)) and len(job.nrx_trace): net_tx = get_utilization(job.ntx_trace, time_quanta_index) net_rx = get_utilization(job.nrx_trace, time_quanta_index) net_util = network_utilization(net_tx, net_rx) diff --git a/raps/job.py b/raps/job.py index 7166884..09eaed4 100644 --- a/raps/job.py +++ b/raps/job.py @@ -1,4 +1,5 @@ from enum import Enum +import numpy as np """ Note: want to simplify this in the future to use a minimal required set of job attributes, @@ -159,22 +160,42 @@ class JobStatistics: self.start_time = job.start_time self.end_time = job.end_time self.state = job._state - if len(job.cpu_trace) == 0: - self.avg_cpu_usage = 0 + if isinstance(job.cpu_trace,list) or isinstance(job.cpu_trace,np.ndarray): + if len(job.cpu_trace) == 0: + self.avg_cpu_usage = 0 + else: + self.avg_cpu_usage = sum(job.cpu_trace) / len(job.cpu_trace) + elif isinstance(job.cpu_trace,int) or isinstance(job.cpu_trace,float): + self.avg_cpu_usage = job.cpu_trace else: - self.avg_cpu_usage = sum(job.cpu_trace) / len(job.cpu_trace) - if len(job.gpu_trace) == 0: - self.avg_gpu_usage = 0 + raise NotImplementedError() + + if isinstance(job.gpu_trace,list) or isinstance(job.gpu_trace,np.ndarray): + if len(job.gpu_trace) == 0: + self.avg_gpu_usage = 0 + else: + self.avg_gpu_usage = sum(job.gpu_trace) / len(job.gpu_trace) + elif isinstance(job.gpu_trace,int) or isinstance(job.gpu_trace,float): + self.avg_gpu_usage = job.gpu_trace else: - self.avg_gpu_usage = sum(job.gpu_trace) / len(job.gpu_trace) - if len(job.ntx_trace) == 0: - self.avg_ntx_usage = 0 - else: - self.avg_ntx_usage = sum(job.ntx_trace) / len(job.ntx_trace) - if len(job.nrx_trace) == 0: - self.avg_nrx_usage = 0 - else: - self.avg_nrx_usage = sum(job.nrx_trace) / len(job.nrx_trace) + raise NotImplementedError() + + if isinstance(job.ntx_trace,list) or isinstance(job.ntx_trace,np.ndarray): + if len(job.ntx_trace) == 0: + self.avg_ntx_usage = 0 + else: + self.avg_ntx_usage = sum(job.ntx_trace) / len(job.ntx_trace) + elif isinstance(job.ntx_trace,int) or isinstance(job.ntx_trace,float): + self.avg_ntx_usage = job.ntx_trace + + if isinstance(job.nrx_trace,list) or isinstance(job.nrx_trace,np.ndarray): + if len(job.nrx_trace) == 0: + self.avg_nrx_usage = 0 + else: + self.avg_nrx_usage = sum(job.nrx_trace) / len(job.nrx_trace) + elif isinstance(job.nrx_trace,int) or isinstance(job.nrx_trace,float): + self.avg_nrx_usage = job.nrx_trace + if len(job.power_history) == 0: self.avg_node_power = 0 self.max_node_power = 0 diff --git a/raps/resmgr.py b/raps/resmgr.py index ee3caef..6a3ffda 100644 --- a/raps/resmgr.py +++ b/raps/resmgr.py @@ -54,6 +54,7 @@ class ResourceManager: return utilization def node_failure(self, mtbf): + return [] """Simulate node failure using Weibull distribution.""" shape_parameter = 1.5 scale_parameter = mtbf * 3600 # Convert to seconds -- GitLab From 49a1a104d1e33afde52dc2c1a54fec010e68e93c Mon Sep 17 00:00:00 2001 From: Matthias Maiterth Date: Sat, 5 Apr 2025 11:40:34 -0400 Subject: [PATCH 059/388] Fix with default values in cpu/gpu util and policy. Default values for avg cpu /gpu util in stats were not set when no jobs completed. Default policy was picked up as the first entry which was fcfs no backfill. This should be replay, and in fact by choice of the scheduler not by the args implementation. --- args.py | 2 +- raps/policy.py | 2 +- raps/stats.py | 6 +++++- 3 files changed, 7 insertions(+), 3 deletions(-) diff --git a/args.py b/args.py index d8f5900..6ba3c42 100644 --- a/args.py +++ b/args.py @@ -49,7 +49,7 @@ parser.add_argument('-w', '--workload', type=str, choices=choices, default=choic choices = ['default', 'scheduleflow', 'nrel', 'anl', 'flux'] parser.add_argument('--scheduler', type=str, choices=choices, default=choices[0], help='Name of scheduler') choices = [policy.value for policy in PolicyType] -parser.add_argument('--policy', type=str, choices=choices, default=choices[0], help='Schedule policy to use') +parser.add_argument('--policy', type=str, choices=choices, default=None, help='Schedule policy to use') choices = [policy.value for policy in BackfillType] parser.add_argument('--backfill', type=str, choices=choices, default=None, help='Backfill Policy') diff --git a/raps/policy.py b/raps/policy.py index 45e9770..82b42a1 100644 --- a/raps/policy.py +++ b/raps/policy.py @@ -3,10 +3,10 @@ from enum import Enum class PolicyType(Enum): """Supported scheduling policies.""" + REPLAY = 'replay' # Default is specified in each scheduler! FCFS = 'fcfs' PRIORITY = 'priority' FUGAKU_PTS = 'fugaku_pts' - REPLAY = 'replay' SJF = 'sjf' LJF = 'ljf' diff --git a/raps/stats.py b/raps/stats.py index ee9508c..98b97bb 100644 --- a/raps/stats.py +++ b/raps/stats.py @@ -172,7 +172,6 @@ def get_job_stats(engine: Engine): avg_ntx_u = sum_ntx_u / len(engine.job_history_dict) avg_nrx_u = sum_nrx_u / len(engine.job_history_dict) - avg_awrt = sum_awrt / sum_agg_node_hours psf = (3 * sum_psf_partial_num) / (4 * sum_psf_partial_den) else: @@ -188,6 +187,11 @@ def get_job_stats(engine: Engine): min_awrt, max_awrt, avg_awrt = -1,-1,-1 psf = -1 + min_cpu_u, max_cpu_u, avg_cpu_u = -1,-1,-1 + min_gpu_u, max_gpu_u, avg_gpu_u = -1,-1,-1 + min_ntx_u, max_ntx_u, avg_ntx_u = -1,-1,-1 + min_nrx_u, max_nrx_u, avg_nrx_u = -1,-1,-1 + job_stats = { 'jobs completed': engine.jobs_completed, 'throughput': f'{throughput:.2f} jobs/hour', -- GitLab From f3631dd13da30c09a4e2e51d389974edd6c3ccbd Mon Sep 17 00:00:00 2001 From: Matthias Maiterth Date: Sat, 5 Apr 2025 15:17:46 -0400 Subject: [PATCH 060/388] Updated dataloader to derive cpu and gpu power more accurate per node. This is a fix, but only a patch-work fix. There are several assumptions that do not really hold for this dataset: Power in raps is always between idle and max. Power here, can be below as other C-States are possible. Additionally, this dataset provides per node traces, while RAPS assumes one trace per job, which applies to all nodes, this should be overhauled. (In this dataset there is one value per node, not one trace per node, though.) --- raps/dataloaders/lassen.py | 30 +++++++++++++++++++++++++----- 1 file changed, 25 insertions(+), 5 deletions(-) diff --git a/raps/dataloaders/lassen.py b/raps/dataloaders/lassen.py index 7017b6e..b93a0f1 100644 --- a/raps/dataloaders/lassen.py +++ b/raps/dataloaders/lassen.py @@ -119,15 +119,35 @@ def load_data_from_df(allocation_df, node_df, step_df, **kwargs): gpu_trace = 0 # = cpu_trace # Is this correct? else: # Compute GPU power - gpu_power = (node_data['gpu_energy'].sum() / nodes_required) / wall_time - gpu_min_power = config['POWER_GPU_IDLE'] - gpu_max_power = config['POWER_GPU_MAX'] + gpu_node_idle_power = config['POWER_GPU_IDLE'] * config['GPUS_PER_NODE'] + # Note: GPU_Power is on a per node basis. + # The current simulator uses the same time series for every node of the job + # Therefore we sum over all nodes and form the average node power. + # TODO: Jobs could have a time-series per node! + gpu_node_energy = node_data['gpu_energy'].copy() + gpu_power = (gpu_node_energy.sum() / nodes_required) / wall_time # This is a single value + if gpu_power < gpu_node_idle_power: + # print(gpu_power, gpu_node_idle_power) # Issue: RAPS assumes power is between idle and max, but C-states are not considered! + gpu_power = gpu_node_idle_power # Setting to idle as other parts of the sim make this assumption + assert (gpu_power >= gpu_node_idle_power) + gpu_min_power = gpu_node_idle_power + gpu_max_power = config['POWER_GPU_MAX'] * config['GPUS_PER_NODE'] + # power_to_utilization has issues! As it is unclear if gpu_power is for a single gpu or all gpus of a node. + # The multiplication by GPUS_PER_NODE fixes this but is patch-work! TODO Refactor and fix gpu_util = power_to_utilization(gpu_power,gpu_min_power,gpu_max_power) - gpu_trace = gpu_util + # gpu_util should to be between 0 an 4 (4 GPUs), where 4 is all GPUs full utilization. + gpu_trace = gpu_util * config['GPUS_PER_NODE'] # Compute CPU power from CPU usage time # CPU usage is reported per core, while we need it in the range [0 to CPUS_PER_NODE] - cpu_util = node_data['cpu_usage'].sum() / nodes_required / wall_time / config['CPU_FREQUENCY'] / config['CORES_PER_CPU'] + # Same + cpu_node_usage = node_data['cpu_usage'].copy() + cpu_node_usage[cpu_node_usage < 0] = 0.0 + cpu_node_usage[cpu_node_usage == np.NaN] = 0.0 + cpu_util = cpu_node_usage.sum() / nodes_required / wall_time / config['CPU_FREQUENCY'] / config['CORES_PER_CPU'] + assert (cpu_util >= 0) + # cpu_util should be between 0 an 2 (2 CPUs) + cpu_trace = cpu_util # TODO use total energy for validation # Only Node Energy and GPU Energy is reported! -- GitLab From 3daee4577f3507f316492651ca0bd8472099392a Mon Sep 17 00:00:00 2001 From: Matthias Maiterth Date: Sat, 5 Apr 2025 15:47:45 -0400 Subject: [PATCH 061/388] Fix for Div/0: Considering wall_time == 0 setting utils to 0 etc. --- raps/dataloaders/lassen.py | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/raps/dataloaders/lassen.py b/raps/dataloaders/lassen.py index b93a0f1..c251aaa 100644 --- a/raps/dataloaders/lassen.py +++ b/raps/dataloaders/lassen.py @@ -109,6 +109,7 @@ def load_data_from_df(allocation_df, node_df, step_df, **kwargs): node_data = node_df[node_df['allocation_id'] == row['allocation_id']] wall_time = compute_wall_time(row['begin_timestamp'], row['end_timestamp']) + samples = math.ceil(wall_time / config['TRACE_QUANTA']) if validate: @@ -125,11 +126,19 @@ def load_data_from_df(allocation_df, node_df, step_df, **kwargs): # Therefore we sum over all nodes and form the average node power. # TODO: Jobs could have a time-series per node! gpu_node_energy = node_data['gpu_energy'].copy() - gpu_power = (gpu_node_energy.sum() / nodes_required) / wall_time # This is a single value + gpu_node_energy[gpu_node_energy < 0] = 0.0 + gpu_node_energy[gpu_node_energy == np.NaN] = 0.0 + if len(gpu_node_energy) < 1: + gpu_power = gpu_node_idle_power # Setting to idle as other parts of the sim make this assumption + else: + if wall_time > 0: + gpu_power = (gpu_node_energy.sum() / nodes_required) / wall_time # This is a single value + else: + gpu_power = gpu_node_idle_power if gpu_power < gpu_node_idle_power: # print(gpu_power, gpu_node_idle_power) # Issue: RAPS assumes power is between idle and max, but C-states are not considered! gpu_power = gpu_node_idle_power # Setting to idle as other parts of the sim make this assumption - assert (gpu_power >= gpu_node_idle_power) + assert gpu_power >= gpu_node_idle_power, f"{gpu_power} >= {gpu_node_idle_power}" + f" gpu_power = ({gpu_node_energy.sum()} / {nodes_required}) / {wall_time}" gpu_min_power = gpu_node_idle_power gpu_max_power = config['POWER_GPU_MAX'] * config['GPUS_PER_NODE'] # power_to_utilization has issues! As it is unclear if gpu_power is for a single gpu or all gpus of a node. @@ -144,8 +153,11 @@ def load_data_from_df(allocation_df, node_df, step_df, **kwargs): cpu_node_usage = node_data['cpu_usage'].copy() cpu_node_usage[cpu_node_usage < 0] = 0.0 cpu_node_usage[cpu_node_usage == np.NaN] = 0.0 - cpu_util = cpu_node_usage.sum() / nodes_required / wall_time / config['CPU_FREQUENCY'] / config['CORES_PER_CPU'] - assert (cpu_util >= 0) + if wall_time > 0: + cpu_util = cpu_node_usage.sum() / nodes_required / wall_time / config['CPU_FREQUENCY'] / config['CORES_PER_CPU'] + else: + cpu_util = 0.0 + assert cpu_util >= 0, f"{cpu_util} = {cpu_node_usage.sum()} / {nodes_required} / {wall_time} / {config['CPU_FREQUENCY']} / {config['CORES_PER_CPU']}" # cpu_util should be between 0 an 2 (2 CPUs) cpu_trace = cpu_util -- GitLab From 029ab34d321170967e0a01ef25038bfdc30ceb3b Mon Sep 17 00:00:00 2001 From: Matthias Maiterth Date: Sun, 6 Apr 2025 13:39:26 -0400 Subject: [PATCH 062/388] Moved a misplaced plotting script --- {raps/dataloaders => scripts}/plot3.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename {raps/dataloaders => scripts}/plot3.py (100%) diff --git a/raps/dataloaders/plot3.py b/scripts/plot3.py similarity index 100% rename from raps/dataloaders/plot3.py rename to scripts/plot3.py -- GitLab From 0b289428884f2087dc9ca60b79b4a2a8970b6d14 Mon Sep 17 00:00:00 2001 From: Matthias Maiterth Date: Sun, 6 Apr 2025 13:48:38 -0400 Subject: [PATCH 063/388] Update to License file wrong project. --- LICENSE-MIT | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/LICENSE-MIT b/LICENSE-MIT index b053dab..9ca4d0e 100644 --- a/LICENSE-MIT +++ b/LICENSE-MIT @@ -1,6 +1,6 @@ MIT License -Copyright (c) 2023-2024 UT-Battelle, LLC and other exadigitUE5 Project Developers. +Copyright (c) 2023-2024 UT-Battelle, LLC and other exadigit/raps Project Developers. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal -- GitLab From 91600d0a3d5a795b513020f18a6cf21205b4ee0e Mon Sep 17 00:00:00 2001 From: Matthias Maiterth Date: Sun, 6 Apr 2025 17:16:40 -0400 Subject: [PATCH 064/388] Fix to prepare_system_state, initial schedule. Fixed setup of the simulation with settings as if it was replay, and switching to the desired scheduling approach only at the start of the simulation. --- raps/engine.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/raps/engine.py b/raps/engine.py index dd4ef69..cc2f796 100644 --- a/raps/engine.py +++ b/raps/engine.py @@ -309,12 +309,21 @@ class Engine: all_jobs.sort(key=lambda j: j['submit_time']) self.add_running_jobs_to_queue(all_jobs) + # Set policy to replay and no backfill to get the original prefilled placement. + target_policy = self.scheduler.policy + self.scheduler.policy = PolicyType.REPLAY + target_bfpolicy = self.scheduler.bfpolicy + self.scheduler.bfpolicy = None + # Now process job queue one by one (needed to get the start_time right!) for job in self.queue[:]: # operate over a slice copy to be able to remove jobs from queue if placed. self.scheduler.schedule([job], self.running, job.start_time, accounts=self.accounts, sorted=True) self.queue.remove(job) if replay and len(self.queue) != 0: raise ValueError(f"Something went wrong! Not all jobs could be placed!\nPotential confligt in queue:\n{self.queue}") + # Restore the target policy and backfill for the remainder of the simulation. + self.scheduler.policy = target_policy + self.scheduler.bfpolicy = target_bfpolicy def run_simulation(self, jobs, timestep_start, timestep_end, autoshutdown=False): """Generator that yields after each simulation tick.""" -- GitLab From 14112a0bf9b725d3ae7230d1fdf30dcb53fd446d Mon Sep 17 00:00:00 2001 From: Matthias Maiterth Date: Mon, 7 Apr 2025 21:21:09 -0400 Subject: [PATCH 065/388] Update to raps accounts to pass on empty power for fgku pts Added start time and empty node number to template workloads. --- raps/account.py | 2 +- raps/workload.py | 15 +++++++-------- 2 files changed, 8 insertions(+), 9 deletions(-) diff --git a/raps/account.py b/raps/account.py index 5ff80b9..379d3dd 100644 --- a/raps/account.py +++ b/raps/account.py @@ -40,7 +40,7 @@ class Account: def update_fugaku_points(self, average_energy, average_power): if average_power == 0: - raise ValueError(f"{average_power} is zero") + return self.fugaku_points = (average_energy - self.energy_allocated) / average_power def update_statistics(self, jobstats, average_user): diff --git a/raps/workload.py b/raps/workload.py index 50e996a..56d7cb3 100644 --- a/raps/workload.py +++ b/raps/workload.py @@ -122,6 +122,7 @@ class Workload: # Create job info for this partition job_info = job_dict( nodes_required=config['AVAILABLE_NODES'], + scheduled_nodes=[], # Down nodes, therefore doesnt work list(range(config['AVAILABLE_NODES'])), name=f"Max Test {partition}", account=ACCT_NAMES[0], cpu_trace=cpu_trace, @@ -129,7 +130,6 @@ class Workload: ntx_trace=net_tx, nrx_trace=net_rx, end_state='COMPLETED', - scheduled_nodes=list(range(config['AVAILABLE_NODES'])), id=None, priority=100, partition=partition, @@ -167,11 +167,12 @@ class Workload: ntx_trace=net_tx, nrx_trace=net_rx, end_state='COMPLETED', - scheduled_nodes=list(range(config['AVAILABLE_NODES'])), + scheduled_nodes=[], # list(range(config['AVAILABLE_NODES'])), id=None, priority=100, partition=partition, time_limit=job_time + 1, + submit_time=0, start_time=0, end_time=job_time, wall_time=job_time, @@ -194,8 +195,6 @@ class Workload: config = self.config_map[partition] net_tx, net_rx = [], [] - list_of_all_nodes = list(range(config['AVAILABLE_NODES'])) - # Max test cpu_util, gpu_util = 1, 4 cpu_trace, gpu_trace = self.compute_traces(cpu_util, gpu_util, 10800, config['TRACE_QUANTA']) @@ -204,7 +203,7 @@ class Workload: job_info = job_dict( nodes_required=config['AVAILABLE_NODES'], - scheduled_nodes=list_of_all_nodes, + scheduled_nodes=[], # Explicit scheduled nodes will not work due to down nodes name=f"Max Test {partition}", account=account, cpu_trace=cpu_trace, @@ -233,7 +232,7 @@ class Workload: job_info = job_dict( nodes_required=config['AVAILABLE_NODES'], - scheduled_nodes=list_of_all_nodes, + scheduled_nodes=[], # Explicit scheduled nodes will not work due to down nodes name=f"OpenMxP {partition}", account=account, cpu_trace=cpu_trace, @@ -261,7 +260,7 @@ class Workload: job_time = len(gpu_trace) * config['TRACE_QUANTA'] job_info = job_dict( nodes_required=config['AVAILABLE_NODES'], - scheduled_nodes=list_of_all_nodes, + scheduled_nodes=[], # Explicit scheduled nodes will not work due to down nodes name=f"HPL {partition}", account=account, cpu_trace=cpu_trace, @@ -288,7 +287,7 @@ class Workload: job_time = len(gpu_trace) * config['TRACE_QUANTA'] job_info = job_dict( nodes_required=config['AVAILABLE_NODES'], - scheduled_nodes=list_of_all_nodes, + scheduled_nodes=[], # Explicit scheduled nodes will not work due to down nodes name=f"Idle Test {partition}", account=account, cpu_trace=cpu_trace, -- GitLab From 853055aa44b23959c687aab1cc1826ee3194e2fb Mon Sep 17 00:00:00 2001 From: Matthias Maiterth Date: Tue, 8 Apr 2025 21:15:59 -0400 Subject: [PATCH 066/388] Moved Fugaku_pts and other experimental scheduling strategies to raps/schedulers/experimental.py This serves as blueprint for other experimental schedulers and how to extend them, without the need to extend to an existing scheduler, see ScheduleFlow or FastSim. Updated lassen - Threads per core in config - cpu_util function from LAST dataset corrected Update Accounts: - calculation of fugaku points dealing with 0 average power. - time_allocated for a user counts runtime * nodes allocated. Updated engine to deal with traces that are missing data in the front. Default Scheduler: - does not contain FUGAKU_PTS anymore Experimental Scheduler: - Contains ACCT_FUGAKU_PTS = 'acct_fugaku_pts' ACCT_AVG_P = 'acct_avg_power' ACCT_AVG_PW4LJ = 'acct_avg_power_w4lj' ACCT_EDP = 'acct_edp' ACCT_ED2P = 'acct_ed2p' ACCT_PDP = 'acct_pdp' --- args.py | 4 +- config/lassen/system.json | 1 + raps/account.py | 7 +- raps/dataloaders/lassen.py | 9 +- raps/engine.py | 7 +- raps/policy.py | 1 - raps/schedulers/default.py | 47 +---- raps/schedulers/experimental.py | 337 ++++++++++++++++++++++++++++++++ 8 files changed, 360 insertions(+), 53 deletions(-) create mode 100644 raps/schedulers/experimental.py diff --git a/args.py b/args.py index 6ba3c42..f077b85 100644 --- a/args.py +++ b/args.py @@ -46,10 +46,10 @@ choices = ['random', 'benchmark', 'peak', 'idle'] parser.add_argument('-w', '--workload', type=str, choices=choices, default=choices[0], help='Type of synthetic workload') # Scheduling options -choices = ['default', 'scheduleflow', 'nrel', 'anl', 'flux'] +choices = ['default', 'scheduleflow', 'nrel', 'anl', 'flux', 'experimental'] parser.add_argument('--scheduler', type=str, choices=choices, default=choices[0], help='Name of scheduler') choices = [policy.value for policy in PolicyType] -parser.add_argument('--policy', type=str, choices=choices, default=None, help='Schedule policy to use') +parser.add_argument('--policy', type=str, default=None, help='Schedule policy to use, e.g.:' + str(choices) + " or extended policies") choices = [policy.value for policy in BackfillType] parser.add_argument('--backfill', type=str, choices=choices, default=None, help='Backfill Policy') diff --git a/config/lassen/system.json b/config/lassen/system.json index bf739d4..44da66e 100644 --- a/config/lassen/system.json +++ b/config/lassen/system.json @@ -13,6 +13,7 @@ "DOWN_NODES": [], "CPUS_PER_NODE": 2, "CORES_PER_CPU": 22, + "THREADS_PER_CORE": 4, "CPU_FREQUENCY": 2400000000, "GPUS_PER_NODE": 4, "CPU_PEAK_FLOPS": 396.8E9, diff --git a/raps/account.py b/raps/account.py index 379d3dd..15863b6 100644 --- a/raps/account.py +++ b/raps/account.py @@ -40,12 +40,13 @@ class Account: def update_fugaku_points(self, average_energy, average_power): if average_power == 0: - return - self.fugaku_points = (average_energy - self.energy_allocated) / average_power + self.fugaku_points = 0 + else: + self.fugaku_points = (average_energy - self.energy_allocated) / average_power def update_statistics(self, jobstats, average_user): self.jobs_completed += 1 - self.time_allocated += jobstats.run_time + self.time_allocated += jobstats.run_time * jobstats.num_nodes self.energy_allocated += jobstats.energy if self.time_allocated == 0: self.avg_power = 0 diff --git a/raps/dataloaders/lassen.py b/raps/dataloaders/lassen.py index c251aaa..a3be849 100644 --- a/raps/dataloaders/lassen.py +++ b/raps/dataloaders/lassen.py @@ -154,10 +154,12 @@ def load_data_from_df(allocation_df, node_df, step_df, **kwargs): cpu_node_usage[cpu_node_usage < 0] = 0.0 cpu_node_usage[cpu_node_usage == np.NaN] = 0.0 if wall_time > 0: - cpu_util = cpu_node_usage.sum() / nodes_required / wall_time / config['CPU_FREQUENCY'] / config['CORES_PER_CPU'] + threads_per_core = config['THREADS_PER_CORE'] + cpu_util = cpu_node_usage.sum() / 10e9 / nodes_required / wall_time / threads_per_core else: cpu_util = 0.0 - assert cpu_util >= 0, f"{cpu_util} = {cpu_node_usage.sum()} / {nodes_required} / {wall_time} / {config['CPU_FREQUENCY']} / {config['CORES_PER_CPU']}" + assert cpu_util >= 0, f"{cpu_util} = {cpu_node_usage.sum()} / 10e9 / {nodes_required} / {wall_time} / {threads_per_core}" + # cpu_util should be between 0 an 2 (2 CPUs) cpu_trace = cpu_util @@ -169,7 +171,8 @@ def load_data_from_df(allocation_df, node_df, step_df, **kwargs): ib_tx = 4 * node_data['ib_tx'].sum() if node_data['ib_tx'].values.size > 0 else [] ib_rx = 4 * node_data['ib_rx'].sum() if node_data['ib_rx'].values.size > 0 else [] - net_tx, net_rx = generate_network_sequences(ib_tx, ib_rx, samples, lambda_poisson=0.3) + #net_tx, net_rx = generate_network_sequences(ib_tx, ib_rx, samples, lambda_poisson=0.3) + net_tx, net_rx = [],[] # generate_network_sequences generates errors (e.g. -ff 800d -t 1d ) # no priorities defined! priority = row.get('priority', 0) diff --git a/raps/engine.py b/raps/engine.py index cc2f796..05e386e 100644 --- a/raps/engine.py +++ b/raps/engine.py @@ -184,7 +184,12 @@ class Engine: # job ended before. # For every other error condition trace_start_ and # _end_time are used! - #print(type(job.cpu_trace)) + # #print(type(job.cpu_trace)) + if time_quanta_index < 0: + time_quanta_index = 0 + # Similar with the first time_quanta index: If the job started + # in the past and no trace if there, read index 0 until values + # are available. if isinstance(job.cpu_trace,list) or isinstance(job.cpu_trace,np.ndarray): if time_quanta_index < len(job.cpu_trace): cpu_util = get_utilization(job.cpu_trace, time_quanta_index) diff --git a/raps/policy.py b/raps/policy.py index 82b42a1..872b19a 100644 --- a/raps/policy.py +++ b/raps/policy.py @@ -6,7 +6,6 @@ class PolicyType(Enum): REPLAY = 'replay' # Default is specified in each scheduler! FCFS = 'fcfs' PRIORITY = 'priority' - FUGAKU_PTS = 'fugaku_pts' SJF = 'sjf' LJF = 'ljf' diff --git a/raps/schedulers/default.py b/raps/schedulers/default.py index c8c691e..9953087 100644 --- a/raps/schedulers/default.py +++ b/raps/schedulers/default.py @@ -1,9 +1,5 @@ from typing import List -from enum import Enum from ..utils import summarize_ranges - -from ..workload import MAX_PRIORITY - from ..policy import PolicyType, BackfillType @@ -27,12 +23,10 @@ class Scheduler: return sorted(queue, key=lambda job: job.submit_time) elif self.policy == PolicyType.PRIORITY: return sorted(queue, key=lambda job: job.priority, reverse=True) - elif self.policy == PolicyType.FUGAKU_PTS: - return self.sort_fugaku_redeeming(queue, accounts) - if self.policy == PolicyType.SJF: + elif self.policy == PolicyType.SJF: return sorted(queue, key=lambda job: job.time_limit) - if self.policy == PolicyType.LJF: - return sorted(queue, key=lambda job: job.nodes_required) + elif self.policy == PolicyType.LJF: + return sorted(queue, key=lambda job: job.nodes_required, reverse=True) elif self.policy == PolicyType.REPLAY: return sorted(queue, key=lambda job: job.start_time) else: @@ -66,7 +60,7 @@ class Scheduler: # print(f"Nodes available {nodes_available} - Req:{len(job.requested_nodes)} N-avail:{len(self.resource_manager.available_nodes)}") continue # Regardless if the job at the front of the queue doenst fit, try placing all of them. elif self.policy in [PolicyType.FCFS, PolicyType.PRIORITY, - PolicyType.FUGAKU_PTS, PolicyType.LJF]: + PolicyType.LJF, PolicyType.SJF]: break # The job at the front of the queue doesnt fit stop processing the queue. else: raise NotImplementedError("Depending on the Policy this choice should be explicit. Add the implementation above!") @@ -199,36 +193,3 @@ class Scheduler: else: continue return None - - - def sort_fugaku_redeeming(self, queue, accounts=None): - if queue == []: - return queue - # Priority queues not yet implemented: - # Strategy: Sort by Fugaku Points Representing the Priority Queue - # Everything with negative Fugaku Points get sorted according to normal priority - priority_triple_list = [] - for job in queue: - fugaku_priority = accounts.account_dict[job.account].fugaku_points - # Create a tuple of the job and the priority - priority = job.priority - priority_triple_list.append((fugaku_priority,priority,job)) - # Sort everythin according to fugaku_points - priority_triple_list = sorted(priority_triple_list, key=lambda x:x[0], reverse=True) - # Find the first element with negative fugaku_points - for cutoff, triple in enumerate(priority_triple_list): - fugaku_priority, _, _ = triple - if fugaku_priority < 0: - break - first_part = priority_triple_list[:cutoff] - # Sort everything afterwards according to job priority - second_part = sorted(priority_triple_list[cutoff:], key=lambda x:x[1], reverse=True) - queue_a = [] - queue_b = [] - if first_part != []: - _, _, queue_a = zip(*first_part) - queue_a = list(queue_a) - if second_part != []: - _, _, queue_b = zip(*second_part) - queue_b = list(queue_b) - return queue_a + queue_b diff --git a/raps/schedulers/experimental.py b/raps/schedulers/experimental.py new file mode 100644 index 0000000..0705639 --- /dev/null +++ b/raps/schedulers/experimental.py @@ -0,0 +1,337 @@ +from typing import List +from enum import Enum +from ..utils import summarize_ranges + +from ..policy import BackfillType + +# Extending PolicyType: +from ..policy import PolicyType as BasePolicyType + + +class ExtendedPolicyType(Enum): + ACCT_FUGAKU_PTS = 'acct_fugaku_pts' + ACCT_AVG_P = 'acct_avg_power' + ACCT_AVG_PW4LJ = 'acct_avg_power_w4lj' + ACCT_EDP = 'acct_edp' + ACCT_ED2P = 'acct_ed2p' + ACCT_PDP = 'acct_pdp' + + +# Boilerplate to combine the enums +combined_members = { + **{name: member.value for name, member in BasePolicyType.__members__.items()}, + **{name: member.value for name, member in ExtendedPolicyType.__members__.items()} +} +PolicyType = Enum('PolicyType', combined_members) +# The scheduler can now use both the BasePolicies and the Extended Policies + + +class Scheduler: + """ Default job scheduler with various scheduling policies. """ + + def __init__(self, config, policy, bfpolicy=None, jobs=None, resource_manager=None): + self.config = config + if policy is None: # policy is passed as policy=None, therefore default is not choosen + policy = "replay" + self.policy = PolicyType(policy) + self.bfpolicy = BackfillType(bfpolicy) + if resource_manager is None: + raise ValueError("Scheduler requires a ResourceManager instance") + self.resource_manager = resource_manager + self.debug = False + + def sort_jobs(self, queue, accounts=None): + """Sort jobs based on the selected scheduling policy.""" + if self.policy == PolicyType.ACCT_FUGAKU_PTS: + return self.sort_fugaku_redeeming(queue, accounts) + elif self.policy == PolicyType.ACCT_AVG_PW4LJ: + return self.sort_avg_Pw4LJ(queue, accounts) + elif self.policy == PolicyType.ACCT_AVG_P: + return self.sort_avg_P(queue, accounts) + elif self.policy == PolicyType.ACCT_EDP: + return self.sort_AEDP(queue, accounts) + elif self.policy == PolicyType.ACCT_ED2P: + return self.sort_AED2P(queue, accounts) + elif self.policy == PolicyType.ACCT_PDP: + return self.sort_APDP(queue, accounts) + else: + raise ValueError(f"Policy not implemented: {self.policy}") + + def schedule(self, queue, running, current_time, accounts=None, sorted=False): + # Sort the queue in place. + if not sorted: + queue[:] = self.sort_jobs(queue, accounts) + + # Iterate over a copy of the queue since we might remove items + for job in queue[:]: + if self.policy == PolicyType.REPLAY: + if job.start_time > current_time: + continue # Replay: Job didn't start yet. Next! + else: + pass + else: + pass + + nodes_available = self.check_available_nodes(job) + + if nodes_available: + self.place_job_and_manage_queues(job, queue, running, current_time) + else: # In case the job was not placed, see how we should continue: + if self.bfpolicy is not None: + self.backfill(queue, running, current_time) + + # After backfill dedice continue processing the queue or wait, continuing may result in fairness issues. + if False: # self.policy in [PolicyType.REPLAY]: + # print(f"Nodes available {nodes_available} - Req:{len(job.requested_nodes)} N-avail:{len(self.resource_manager.available_nodes)}") + continue # Regardless if the job at the front of the queue doenst fit, try placing all of them. + elif self.policy in [PolicyType.ACCT_FUGAKU_PTS, + PolicyType.ACCT_AVG_PW4LJ, PolicyType.ACCT_AVG_P, + PolicyType.ACCT_EDP, PolicyType.ACCT_ED2P, PolicyType.ACCT_PDP, + ]: + break # The job at the front of the queue doesnt fit stop processing the queue. + else: + raise NotImplementedError("Depending on the Policy this choice should be explicit. Add the implementation above!") + + def place_job_and_manage_queues(self, job, queue,running, current_time): + self.resource_manager.assign_nodes_to_job(job, current_time) + running.append(job) + queue.remove(job) + if self.debug: + scheduled_nodes = summarize_ranges(job.scheduled_nodes) + print(f"t={current_time}: Scheduled job {job.id} with wall time {job.wall_time} on nodes {scheduled_nodes}") + + def check_available_nodes(self,job): + nodes_available = False + if job.requested_nodes: # nodes specified, i.e., telemetry replay + if len(job.requested_nodes) <= len(self.resource_manager.available_nodes): + if self.policy == PolicyType.REPLAY: # Check if exact set is available: + nodes_available = set(job.requested_nodes).issubset(set(self.resource_manager.available_nodes)) + else: + # Sufficiently large number of nodes available + # but no exact set is required! + nodes_available = True + # remove the request for specific nodes and ask for n nodes + job.nodes_required = len(job.requested_nodes) + job.requested_nodes = [] + else: + pass + else: # Exact nodes not specified (e.g. synthetic jobs dont have nodes assigned) + nodes_available = len(self.resource_manager.available_nodes) >= job.nodes_required + + return nodes_available + + def backfill(self,queue:List, running:List, current_time): + # Try to find a backfill candidate from the entire queue. + while queue: + backfill_job = self.find_backfill_job(queue, running, current_time) + if backfill_job: + self.place_job_and_manage_queues(backfill_job, queue, running, current_time) + else: + break + + def find_backfill_job(self, queue, running, current_time): + """Finds a backfill job based on available nodes and estimated completion times. + + Loosely based on pseudocode from Leonenkov and Zhumatiy, 'Introducing new backfill-based + scheduler for slurm resource manager.' Procedia computer science 66 (2015): 661-669. + """ + if not queue: + return None + + # Identify when the nex job in the queue could run as a time limit: + first_job = queue[0] + nodes_required = 0 + if first_job.requested_nodes: + nodes_required = len(first_job.requested_nodes) + else: + nodes_required = first_job.nodes_required + + sorted_running = sorted(running, key=lambda job: job.end_time) + + # Identify when we have enough nodes therefore the start time of the first_job in line + shadow_time_end = 0 + shadow_nodes_avail = len(self.resource_manager.available_nodes) + for job in sorted_running: + if shadow_nodes_avail >= nodes_required: + break + else: + shadow_nodes_avail += job.nodes_required + shadow_time_end = job.end_time + + time_limit = shadow_time_end - current_time + # We now have the time_limit after which no backfilled job should end + # as the next job in line has the necessary resrouces after this time limit. + + # Find and return the first job that fits + if self.bfpolicy == BackfillType.NONE: + pass + elif self.bfpolicy == BackfillType.EASY: + queue[:] = sorted(queue, key=lambda job: job.submit_time) + return self.return_first_fit(queue,time_limit) + elif self.bfpolicy == BackfillType.FIRSTFIT: + pass # Stay with the prioritization! + return self.return_first_fit(queue,time_limit) + elif self.bfpolicy in [BackfillType.BESTFIT, + BackfillType.GREEDY, + BackfillType.CONSERVATIVE, + ]: + raise NotImplementedError(f"{self.bfpolicy} not implemented! Please implement!") + else: + raise NotImplementedError(f"{self.bfpolicy} not implemented.") + + def return_first_fit(self, queue, time_limit): + for job in queue: + if job.time_limit <= time_limit: + nodes_available = self.check_available_nodes(job) + if nodes_available: + return job + else: + continue + else: + continue + return None + + def sort_fugaku_redeeming(self, queue, accounts=None): + if queue == []: + return queue + # Priority queues not yet implemented: + # Strategy: Sort by Fugaku Points Representing the Priority Queue + # Everything with negative Fugaku Points get sorted according to normal priority + priority_triple_list = [] + for job in queue: + fugaku_priority = accounts.account_dict[job.account].fugaku_points + if fugaku_priority is None: + fugaku_priority = 0 + # Create a tuple of the job and the priority + priority = job.priority + priority_triple_list.append((fugaku_priority,priority,job)) + # Sort everythin according to fugaku_points + priority_triple_list = sorted(priority_triple_list, key=lambda x:x[0], reverse=True) + # Find the first element with negative fugaku_points + for cutoff, triple in enumerate(priority_triple_list): + fugaku_priority, _, _ = triple + if fugaku_priority < 0: + break + first_part = priority_triple_list[:cutoff] + # Sort everything afterwards according to job priority + second_part = sorted(priority_triple_list[cutoff:], key=lambda x:x[1], reverse=True) + queue_a = [] + queue_b = [] + if first_part != []: + _, _, queue_a = zip(*first_part) + queue_a = list(queue_a) + if second_part != []: + _, _, queue_b = zip(*second_part) + queue_b = list(queue_b) + return queue_a + queue_b + + def sort_avg_Pw4LJ(self, queue, accounts=None): + if queue == []: + return queue + priority_tuple_list = [] + for job in queue: + power = accounts.account_dict[job.account].avg_power + if power is None: + power = 0 + # Create a tuple of the job and the priority + if job.nodes_required: + nnodes = job.nodes_required + elif job.scheduled_nodes: + nnodes = len(job.scheduled_nodes) + else: + raise KeyError("No nodes indicated") + + priority = 100 * nnodes * power + priority_tuple_list.append((priority,job)) + # Sort everythin according to new priority + priority_tuple_list = sorted(priority_tuple_list, key=lambda x:x[0], reverse=True) + queue = [] + if priority_tuple_list != []: + _, queue = zip(*priority_tuple_list) + queue = list(queue) + return queue + + def sort_avg_P(self, queue, accounts=None): + if queue == []: + return queue + priority_tuple_list = [] + for job in queue: + power = accounts.account_dict[job.account].avg_power + if power is None: + power = 0 + + priority = power + priority_tuple_list.append((priority,job)) + # Sort everythin according to power_acct_priority Disregarding size + priority_tuple_list = sorted(priority_tuple_list, key=lambda x:x[0], reverse=True) + queue = [] + if priority_tuple_list != []: + _, queue = zip(*priority_tuple_list) + queue = list(queue) + return queue + + def sort_AEDP(self, queue, accounts=None): + if queue == []: + return queue + priority_tuple_list = [] + for job in queue: + energy = accounts.account_dict[job.account].energy_allocated + time = accounts.account_dict[job.account].time_allocated + if energy is None: + energy = 0 + if time is None: + time = 0 + + priority = energy * time + priority_tuple_list.append((priority,job)) + # Sort everythin according to power_acct_priority Disregarding size + priority_tuple_list = sorted(priority_tuple_list, key=lambda x:x[0], reverse=True) + queue = [] + if priority_tuple_list != []: + _, queue = zip(*priority_tuple_list) + queue = list(queue) + return queue + + def sort_AED2P(self, queue, accounts=None): + if queue == []: + return queue + priority_tuple_list = [] + for job in queue: + energy = accounts.account_dict[job.account].energy_allocated + time = accounts.account_dict[job.account].time_allocated + if energy is None: + energy = 0 + if time is None: + time = 0 + + priority = energy * time * time + priority_tuple_list.append((priority,job)) + # Sort everythin according to power_acct_priority Disregarding size + priority_tuple_list = sorted(priority_tuple_list, key=lambda x:x[0], reverse=True) + queue = [] + if priority_tuple_list != []: + _, queue = zip(*priority_tuple_list) + queue = list(queue) + return queue + + def sort_APDP(self, queue, accounts=None): + if queue == []: + return queue + priority_tuple_list = [] + for job in queue: + power = accounts.account_dict[job.account].avg_power + time = accounts.account_dict[job.account].time_allocated + if power is None: + power = 0 + if time is None: + time = 0 + + priority = power * time + priority_tuple_list.append((priority,job)) + # Sort everythin according to power_acct_priority Disregarding size + priority_tuple_list = sorted(priority_tuple_list, key=lambda x:x[0], reverse=True) + queue = [] + if priority_tuple_list != []: + _, queue = zip(*priority_tuple_list) + queue = list(queue) + return queue -- GitLab From a0e306b9865064e2a58855db620ecb3a1a94602a Mon Sep 17 00:00:00 2001 From: Matthias Maiterth Date: Wed, 9 Apr 2025 16:04:23 -0400 Subject: [PATCH 067/388] Seperation of experimental scheduler for enums - fixes --- raps/policy.py | 7 +++-- raps/schedulers/experimental.py | 52 ++++++++++++++++++++++++++------- raps/utils.py | 11 +++++++ 3 files changed, 56 insertions(+), 14 deletions(-) diff --git a/raps/policy.py b/raps/policy.py index 872b19a..10a9a31 100644 --- a/raps/policy.py +++ b/raps/policy.py @@ -1,7 +1,7 @@ -from enum import Enum +from .utils import ValueComparableEnum -class PolicyType(Enum): +class PolicyType(ValueComparableEnum): """Supported scheduling policies.""" REPLAY = 'replay' # Default is specified in each scheduler! FCFS = 'fcfs' @@ -9,7 +9,8 @@ class PolicyType(Enum): SJF = 'sjf' LJF = 'ljf' -class BackfillType(Enum): + +class BackfillType(ValueComparableEnum): """Supported backfilling policies.""" NONE = None FIRSTFIT = 'firstfit' diff --git a/raps/schedulers/experimental.py b/raps/schedulers/experimental.py index 0705639..95a7d10 100644 --- a/raps/schedulers/experimental.py +++ b/raps/schedulers/experimental.py @@ -6,12 +6,14 @@ from ..policy import BackfillType # Extending PolicyType: from ..policy import PolicyType as BasePolicyType +from ..utils import ValueComparableEnum -class ExtendedPolicyType(Enum): +class ExtendedPolicyType(ValueComparableEnum): ACCT_FUGAKU_PTS = 'acct_fugaku_pts' ACCT_AVG_P = 'acct_avg_power' - ACCT_AVG_PW4LJ = 'acct_avg_power_w4lj' + ACCT_LOW_AVG_P = 'acct_low_avg_power' + ACCT_AVG_PW4LJ = 'acct_avg_power_w4jl' ACCT_EDP = 'acct_edp' ACCT_ED2P = 'acct_ed2p' ACCT_PDP = 'acct_pdp' @@ -22,7 +24,7 @@ combined_members = { **{name: member.value for name, member in BasePolicyType.__members__.items()}, **{name: member.value for name, member in ExtendedPolicyType.__members__.items()} } -PolicyType = Enum('PolicyType', combined_members) +PolicyType = Enum('PolicyType', combined_members, type=ValueComparableEnum) # The scheduler can now use both the BasePolicies and the Extended Policies @@ -42,12 +44,16 @@ class Scheduler: def sort_jobs(self, queue, accounts=None): """Sort jobs based on the selected scheduling policy.""" - if self.policy == PolicyType.ACCT_FUGAKU_PTS: + if self.policy == PolicyType.REPLAY: # REPLAY NEEDS TO BE THERE + return sorted(queue, key=lambda job: job.start_time) + elif self.policy == PolicyType.ACCT_FUGAKU_PTS: return self.sort_fugaku_redeeming(queue, accounts) elif self.policy == PolicyType.ACCT_AVG_PW4LJ: return self.sort_avg_Pw4LJ(queue, accounts) elif self.policy == PolicyType.ACCT_AVG_P: return self.sort_avg_P(queue, accounts) + elif self.policy == PolicyType.ACCT_LOW_AVG_P: + return self.sort_low_avg_P(queue, accounts) elif self.policy == PolicyType.ACCT_EDP: return self.sort_AEDP(queue, accounts) elif self.policy == PolicyType.ACCT_ED2P: @@ -81,12 +87,16 @@ class Scheduler: self.backfill(queue, running, current_time) # After backfill dedice continue processing the queue or wait, continuing may result in fairness issues. - if False: # self.policy in [PolicyType.REPLAY]: - # print(f"Nodes available {nodes_available} - Req:{len(job.requested_nodes)} N-avail:{len(self.resource_manager.available_nodes)}") + print(f"if {self.policy} in [PolicyType.REPLAY]") # REPLAY NEEDS TO BE THERE + print(f"== {self.policy in [PolicyType.REPLAY]}") # REPLAY NEEDS TO BE THERE + print(f"==value {self.policy.value in [PolicyType.REPLAY.value]}") # REPLAY NEEDS TO BE THERE + print(f"type {type(self.policy)} in [{type(PolicyType.REPLAY)}]") # REPLAY NEEDS TO BE THERE + print(f"id {id(self.policy)} in [{id(PolicyType.REPLAY)}]") # REPLAY NEEDS TO BE THERE + if self.policy in [PolicyType.REPLAY]: # REPLAY NEEDS TO BE THERE continue # Regardless if the job at the front of the queue doenst fit, try placing all of them. elif self.policy in [PolicyType.ACCT_FUGAKU_PTS, - PolicyType.ACCT_AVG_PW4LJ, PolicyType.ACCT_AVG_P, - PolicyType.ACCT_EDP, PolicyType.ACCT_ED2P, PolicyType.ACCT_PDP, + PolicyType.ACCT_AVG_PW4LJ, PolicyType.ACCT_LOW_AVG_P, PolicyType.ACCT_AVG_P, + PolicyType.ACCT_EDP, PolicyType.ACCT_ED2P, PolicyType.ACCT_PDP ]: break # The job at the front of the queue doesnt fit stop processing the queue. else: @@ -270,6 +280,26 @@ class Scheduler: queue = list(queue) return queue + def sort_low_avg_P(self, queue, accounts=None): + if queue == []: + return queue + priority_tuple_list = [] + for job in queue: + power = accounts.account_dict[job.account].avg_power + if power is None: + power = 0 + + priority = power + priority_tuple_list.append((priority,job)) + # Sort everythin according to power_acct_priority Disregarding size + priority_tuple_list = sorted(priority_tuple_list, key=lambda x:x[0], reverse=False) + queue = [] + if priority_tuple_list != []: + _, queue = zip(*priority_tuple_list) + queue = list(queue) + return queue + + def sort_AEDP(self, queue, accounts=None): if queue == []: return queue @@ -285,7 +315,7 @@ class Scheduler: priority = energy * time priority_tuple_list.append((priority,job)) # Sort everythin according to power_acct_priority Disregarding size - priority_tuple_list = sorted(priority_tuple_list, key=lambda x:x[0], reverse=True) + priority_tuple_list = sorted(priority_tuple_list, key=lambda x:x[0], reverse=False) queue = [] if priority_tuple_list != []: _, queue = zip(*priority_tuple_list) @@ -307,7 +337,7 @@ class Scheduler: priority = energy * time * time priority_tuple_list.append((priority,job)) # Sort everythin according to power_acct_priority Disregarding size - priority_tuple_list = sorted(priority_tuple_list, key=lambda x:x[0], reverse=True) + priority_tuple_list = sorted(priority_tuple_list, key=lambda x:x[0], reverse=False) queue = [] if priority_tuple_list != []: _, queue = zip(*priority_tuple_list) @@ -329,7 +359,7 @@ class Scheduler: priority = power * time priority_tuple_list.append((priority,job)) # Sort everythin according to power_acct_priority Disregarding size - priority_tuple_list = sorted(priority_tuple_list, key=lambda x:x[0], reverse=True) + priority_tuple_list = sorted(priority_tuple_list, key=lambda x:x[0], reverse=False) queue = [] if priority_tuple_list != []: _, queue = zip(*priority_tuple_list) diff --git a/raps/utils.py b/raps/utils.py index ce9dc57..62b98ef 100644 --- a/raps/utils.py +++ b/raps/utils.py @@ -7,6 +7,7 @@ generating random numbers, summarizing and expanding ranges, determining job sta """ from datetime import timedelta +from enum import Enum import hashlib import math @@ -382,3 +383,13 @@ def get_utilization(trace, time_quanta_index): return float(trace) else: raise TypeError(f"Invalid type for utilization: {type(trace)}.") + + +class ValueComparableEnum(Enum): + def __eq__(self, other): + if isinstance(other, Enum): + return self.value == other.value + return self.value == other + + def __hash__(self): # required if you override __eq__ + return hash(self.value) -- GitLab From cfa7182c4bf5ce30083e15b49471fa07552f3555 Mon Sep 17 00:00:00 2001 From: Matthias Maiterth Date: Wed, 9 Apr 2025 16:06:15 -0400 Subject: [PATCH 068/388] Removed debug print statements in experimental scheduler. --- raps/schedulers/experimental.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/raps/schedulers/experimental.py b/raps/schedulers/experimental.py index 95a7d10..7461015 100644 --- a/raps/schedulers/experimental.py +++ b/raps/schedulers/experimental.py @@ -87,11 +87,6 @@ class Scheduler: self.backfill(queue, running, current_time) # After backfill dedice continue processing the queue or wait, continuing may result in fairness issues. - print(f"if {self.policy} in [PolicyType.REPLAY]") # REPLAY NEEDS TO BE THERE - print(f"== {self.policy in [PolicyType.REPLAY]}") # REPLAY NEEDS TO BE THERE - print(f"==value {self.policy.value in [PolicyType.REPLAY.value]}") # REPLAY NEEDS TO BE THERE - print(f"type {type(self.policy)} in [{type(PolicyType.REPLAY)}]") # REPLAY NEEDS TO BE THERE - print(f"id {id(self.policy)} in [{id(PolicyType.REPLAY)}]") # REPLAY NEEDS TO BE THERE if self.policy in [PolicyType.REPLAY]: # REPLAY NEEDS TO BE THERE continue # Regardless if the job at the front of the queue doenst fit, try placing all of them. elif self.policy in [PolicyType.ACCT_FUGAKU_PTS, -- GitLab From cad3e922ed58234ac47529943b22544b1d8c2b06 Mon Sep 17 00:00:00 2001 From: Matthias Maiterth Date: Wed, 9 Apr 2025 16:30:32 -0400 Subject: [PATCH 069/388] Type w4lj -> Weighted for large jobs --- raps/schedulers/experimental.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/raps/schedulers/experimental.py b/raps/schedulers/experimental.py index 7461015..feade7c 100644 --- a/raps/schedulers/experimental.py +++ b/raps/schedulers/experimental.py @@ -13,7 +13,7 @@ class ExtendedPolicyType(ValueComparableEnum): ACCT_FUGAKU_PTS = 'acct_fugaku_pts' ACCT_AVG_P = 'acct_avg_power' ACCT_LOW_AVG_P = 'acct_low_avg_power' - ACCT_AVG_PW4LJ = 'acct_avg_power_w4jl' + ACCT_AVG_PW4LJ = 'acct_avg_power_w4lj' ACCT_EDP = 'acct_edp' ACCT_ED2P = 'acct_ed2p' ACCT_PDP = 'acct_pdp' -- GitLab From 4e7b79463e4f4c9fc595ecb8edf141d8fa788b2d Mon Sep 17 00:00:00 2001 From: Matthias Maiterth Date: Thu, 10 Apr 2025 22:43:13 -0400 Subject: [PATCH 070/388] Added plots for S-RAPS --- scripts/plots/2in1-adastra.py | 166 +++++++++++++++++++++++++++++ scripts/plots/2in1-pm100day50.py | 166 +++++++++++++++++++++++++++++ scripts/plots/4in1-frontier-wC.py | 167 +++++++++++++++++++++++++++++ scripts/plots/fgk_frontier.py | 169 ++++++++++++++++++++++++++++++ scripts/plots/paper.mplstyle | 86 +++++++++++++++ 5 files changed, 754 insertions(+) create mode 100644 scripts/plots/2in1-adastra.py create mode 100644 scripts/plots/2in1-pm100day50.py create mode 100644 scripts/plots/4in1-frontier-wC.py create mode 100644 scripts/plots/fgk_frontier.py create mode 100644 scripts/plots/paper.mplstyle diff --git a/scripts/plots/2in1-adastra.py b/scripts/plots/2in1-adastra.py new file mode 100644 index 0000000..47647c9 --- /dev/null +++ b/scripts/plots/2in1-adastra.py @@ -0,0 +1,166 @@ +#!/bin/env python3 +import pandas as pd +import pyarrow.parquet as pq +import matplotlib.pyplot as plt + +import sys + +import matplotlib +matplotlib.rcParams['text.usetex'] = True + +plt.style.use("paper.mplstyle") + + +plt.rcParams.update({ + "text.usetex": True, + "font.family": "Libertine" +}) + +plt.rcParams['text.latex.preamble'] = r'\usepackage{libertine}' + +plt.rcParams.update({ + "text.usetex": True, + "font.family": "serif", + "font.serif": ["Linux Libertine O"], # Specify the font family +}) + + +pt = 1. / 72.27 +width = 1.2 * 241.14749 * pt +golden = (1 + 5**0.5) / 2 +height = width / golden * 3. / 5. + + +carray = ['tab:cyan','tab:orange','tab:brown','tab:blue'] + +if len(sys.argv) > 1: + path = sys.argv[1] +else: + print(f"Usage: python {sys.argv[0]} ") + exit() + +# e.g. path = "$HOME/Repositories/exadigit/raps/simulation_results/adastra/full" + +policies = ['fcfs-nobf','fcfs-easy','priority-nobf','priority-easy','priority-ffbf','replay'] +policies = ['fcfs-nobf','fcfs-easy','priority-ffbf','replay'] +files = ['cooling_model.parquet', 'loss_history.parquet', 'power_history.parquet', 'util.parquet'] +files = ['power_history.parquet', 'util.parquet'] +#files = ['util.parquet', 'power_history.parquet'] +#files = ['loss_history.parquet', 'power_history.parquet', 'util.parquet'] +#files = ['power_history.parquet', 'util.parquet', 'cooling_model.parquet'] + +policy_path = {f"{policy}":f"{path}/{policy}" for policy in policies} +full_files = {f"{policy}":f"{path}/{policy}/{file}" for policy in policies for file in files} + + +def iter_to_seconds(i): + return i * 15 + +c_cnt=0 +fig, axs = plt.subplots(len(files),figsize=(width,2 * height)) +for i,file in enumerate(files): + policy_files = [f"{path}/{policy}/{file}" for policy in policies] + for c,policy_file in enumerate(policy_files): + # df = pd.read_parquet(policy_file) + x = 'time' + xlab = 'Time [hours/days]' + policy = policy_file.split('/')[-2] + if file == "power_history.parquet": + y = 'power [kw]' + ylab = 'Power [kW]' + #ymax = 26000 + #ymin = 6500 + #axs[i].set_ylim(ymin,ymax) + df = pd.read_parquet(policy_file) + df = df.rename(columns={0:'time',1:'power [kw]'}) + + elif file == "cooling_model.parquet": + if c_cnt == 0: + y = 'pue' + ylab = 'PUE' + if c_cnt == 1: + y = 'simulator[1].centralEnergyPlant[1].coolingTowerLoop[1].summary.T_fac_ctw_r_C' + ylab = 'Temperature [°C]' + if c_cnt == 2: + y = 'simulator[1].centralEnergyPlant[1].coolingTowerLoop[1].summary.T_fac_ctw_s_C' + ylab = 'Temperature [°C]' + + df = pd.read_parquet(policy_file) + df['index'] = df.index + df[x] = df['index'].apply(iter_to_seconds) + ymax = max(df[y]) + #axs[i].plot(df[x],df[y], label=ylab) + #y = 'simulator[1].centralEnergyPlant[1].coolingTowerLoop[1].summary.T_fac_ctw_r_C' + + elif file == "loss_history.parquet": + y = 'loss [kw]' + ylab = 'Loss [kW]' + #ylim = 29000 + #axs[i].set_ylim(0,ylim) + + df = pd.read_parquet(policy_file) + df = df.rename(columns={0:'time',1:'loss [kw]'}) + #axs[i].plot(df[x],df[y], label=ylab) + + elif file == "util.parquet": + y = 'utilization' + ylab = r'Utilization [\%]' + + df = pd.read_parquet(policy_file) + df = df.rename(columns={0:'time', 1:'utilization [%]'}) + df[y] = df['utilization [%]'] / 100 + #axs[i].plot(df[x], df[y], label=ylab) + + else: + raise KeyError + + timeline_s = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16] + timeline_s = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16] + timeline_s = [24 * 60 * 60 * x for x in timeline_s] + timeline_text = ['0:00','', '', 'day 3', '', '', 'day 6', '', '', 'day 9', '', '', 'day 12', '', '','day 15', ''] + axs[i].set_xticks(timeline_s,timeline_text) + if i == 1: + pass + else: + axs[i].set_xticklabels([]) # Remove x-axis labels + xlab = None + + + timeline_s = [0,21600,43200,64800,86400] + + #axs[i].set_xlim(timeline_s[0],timeline_s[-1]) + axs[i].set_xlabel(xlab) + axs[i].plot(df[x],df[y], label=policy, color=carray[c]) + axs[i].set_ylabel(ylab) + #$axs[i].plot(df[0],df[1],label=policy) + if file == "power_history.parquet": + axs[i].legend(loc='lower right',frameon=True) + axs[i].get_legend().get_frame().set_linewidth(0.0) + axs[i].set_title('Power',x=0.075, y=0.75,ha="left") + elif file == "util.parquet": + axs[i].set_title('Utilization',x=0.075, y=0.05,ha="left") + axs[i].legend(loc='lower right',frameon=True) + axs[i].get_legend().get_frame().set_linewidth(0.0) + elif file == "cooling_model.parquet": + if c_cnt == 0: + axs[i].set_title('PUE',x=0.05, y=0.8,ha="left") + axs[i].legend(loc='upper right') + elif c_cnt == 1: + axs[i].set_title('Cooling Tower\nReturn\nTemperature',x=0.05, y=0.5,ha="left") + axs[i].legend(loc='upper right') + else: + axs[i].set_title('Cooling Tower Supply Temperature',x=0.1, y=0.8) + axs[i].legend(loc='upper right') + c_cnt = c_cnt+1 + elif file == "loss_history.parquet": + axs[i].set_title('Loss') + axs[i].legend(loc='upper right') + else: + raise KeyError() +#plt.show() +#plt.savefig(f"3in1.png",bbox_inches='tight') +#plt.tight_layout(pad=0.4, w_pad=0.5, h_pad=1.0) +fig.subplots_adjust(hspace=0) +plt.tight_layout(pad=0,w_pad=0.0,h_pad=-0.08)#3) +plt.savefig(f"2in1-adastra.png",bbox_inches='tight',pad_inches = 0.02, dpi = 300) + diff --git a/scripts/plots/2in1-pm100day50.py b/scripts/plots/2in1-pm100day50.py new file mode 100644 index 0000000..1f3e0df --- /dev/null +++ b/scripts/plots/2in1-pm100day50.py @@ -0,0 +1,166 @@ +#!/bin/env python3 +import pandas as pd +import pyarrow.parquet as pq +import matplotlib.pyplot as plt + +import sys + +import matplotlib +matplotlib.rcParams['text.usetex'] = True + +plt.style.use("paper.mplstyle") + + +plt.rcParams.update({ + "text.usetex": True, + "font.family": "Libertine" +}) + +plt.rcParams['text.latex.preamble'] = r'\usepackage{libertine}' + +plt.rcParams.update({ + "text.usetex": True, + "font.family": "serif", + "font.serif": ["Linux Libertine O"], # Specify the font family +}) + + +pt = 1. / 72.27 +width = 1.2 * 241.14749 * pt +golden = (1 + 5**0.5) / 2 +height = width / golden * 3. / 5. + + +carray = ['tab:cyan','tab:orange','tab:brown','tab:blue'] + +if len(sys.argv) > 1: + path = sys.argv[1] +else: + print(f"Usage: python {sys.argv[0]} ") + exit() + +# e.g. path = "$HOME/Repositories/exadigit/raps/simulation_results/marconi100/day51" + +policies = ['fcfs-nobf','fcfs-easy','priority-nobf','priority-easy','priority-ffbf','replay'] +policies = ['fcfs-nobf','fcfs-easy','priority-ffbf','replay'] +files = ['cooling_model.parquet', 'loss_history.parquet', 'power_history.parquet', 'util.parquet'] +files = ['power_history.parquet', 'util.parquet'] +#files = ['util.parquet', 'power_history.parquet'] +#files = ['loss_history.parquet', 'power_history.parquet', 'util.parquet'] +#files = ['power_history.parquet', 'util.parquet', 'cooling_model.parquet'] + +policy_path = {f"{policy}":f"{path}/{policy}" for policy in policies} +full_files = {f"{policy}":f"{path}/{policy}/{file}" for policy in policies for file in files} + + +def iter_to_seconds(i): + return i * 15 + +c_cnt=0 +fig, axs = plt.subplots(len(files),figsize=(width,2 * height)) +for i,file in enumerate(files): + policy_files = [f"{path}/{policy}/{file}" for policy in policies] + for c,policy_file in enumerate(policy_files): + # df = pd.read_parquet(policy_file) + x = 'time' + xlab = 'Time [hours/days]' + policy = policy_file.split('/')[-2] + if file == "power_history.parquet": + y = 'power [kw]' + ylab = 'Power [kW]' + #ymax = 26000 + #ymin = 6500 + #axs[i].set_ylim(ymin,ymax) + df = pd.read_parquet(policy_file) + df = df.rename(columns={0:'time',1:'power [kw]'}) + + elif file == "cooling_model.parquet": + if c_cnt == 0: + y = 'pue' + ylab = 'PUE' + if c_cnt == 1: + y = 'simulator[1].centralEnergyPlant[1].coolingTowerLoop[1].summary.T_fac_ctw_r_C' + ylab = 'Temperature [°C]' + if c_cnt == 2: + y = 'simulator[1].centralEnergyPlant[1].coolingTowerLoop[1].summary.T_fac_ctw_s_C' + ylab = 'Temperature [°C]' + + df = pd.read_parquet(policy_file) + df['index'] = df.index + df[x] = df['index'].apply(iter_to_seconds) + ymax = max(df[y]) + #axs[i].plot(df[x],df[y], label=ylab) + #y = 'simulator[1].centralEnergyPlant[1].coolingTowerLoop[1].summary.T_fac_ctw_r_C' + + elif file == "loss_history.parquet": + y = 'loss [kw]' + ylab = 'Loss [kW]' + #ylim = 29000 + #axs[i].set_ylim(0,ylim) + + df = pd.read_parquet(policy_file) + df = df.rename(columns={0:'time',1:'loss [kw]'}) + #axs[i].plot(df[x],df[y], label=ylab) + + elif file == "util.parquet": + y = 'utilization' + ylab = r'Utilization [\%]' + + df = pd.read_parquet(policy_file) + df = df.rename(columns={0:'time', 1:'utilization [%]'}) + df[y] = df['utilization [%]'] / 100 + #axs[i].plot(df[x], df[y], label=ylab) + + else: + raise KeyError + + timeline_s = [] + timeline_text = [] + + timeline_s.extend([4320000,4330800,4341600,4352400,4363200,4374000,4384800,4395600]) + timeline_text.extend(['0:00\nDay 50','3:00','6:00','9:00','12:00','15:00','18:00','21:00']) + timeline_s.extend([4406400,4417200,4428000,4438800,4449600,4460400,4471200,4482000]) + timeline_text.extend(['0:00\nDay 51','3:00','6:00','9:00','12:00','15:00','18:00','21:00']) + + axs[i].set_xticks(timeline_s,timeline_text) + if i == 1: + pass + else: + axs[i].set_xticklabels([]) # Remove x-axis labels + xlab = None + + axs[i].set_xlabel(xlab) + axs[i].plot(df[x],df[y], label=policy, color=carray[c]) + axs[i].set_ylabel(ylab) + #$axs[i].plot(df[0],df[1],label=policy) + if file == "power_history.parquet": + axs[i].legend(loc='lower right',frameon=True) + axs[i].get_legend().get_frame().set_linewidth(0.0) + axs[i].set_title('Power',x=0.07, y=0.03,ha="left") + elif file == "util.parquet": + axs[i].set_title('Utilization',x=0.07, y=0.03,ha="left") + axs[i].legend(loc='lower right',frameon=True) + axs[i].get_legend().get_frame().set_linewidth(0.0) + elif file == "cooling_model.parquet": + if c_cnt == 0: + axs[i].set_title('PUE',x=0.05, y=0.8,ha="left") + axs[i].legend(loc='upper right') + elif c_cnt == 1: + axs[i].set_title('Cooling Tower\nReturn\nTemperature',x=0.05, y=0.5,ha="left") + axs[i].legend(loc='upper right') + else: + axs[i].set_title('Cooling Tower Supply Temperature',x=0.1, y=0.8) + axs[i].legend(loc='upper right') + c_cnt = c_cnt+1 + elif file == "loss_history.parquet": + axs[i].set_title('Loss') + axs[i].legend(loc='upper right') + else: + raise KeyError() +#plt.show() +#plt.savefig(f"3in1.png",bbox_inches='tight') +#plt.tight_layout(pad=0.4, w_pad=0.5, h_pad=1.0) +fig.subplots_adjust(hspace=0) +plt.tight_layout(pad=0,w_pad=0.0,h_pad=-0.08)#3) +plt.savefig(f"2in1-pm100day50.png",bbox_inches='tight',pad_inches = 0.02, dpi = 300) + diff --git a/scripts/plots/4in1-frontier-wC.py b/scripts/plots/4in1-frontier-wC.py new file mode 100644 index 0000000..79ce235 --- /dev/null +++ b/scripts/plots/4in1-frontier-wC.py @@ -0,0 +1,167 @@ +#!/bin/env python3 +import pandas as pd +import pyarrow.parquet as pq +import matplotlib.pyplot as plt + +import sys + +import matplotlib +matplotlib.rcParams['text.usetex'] = True + +plt.style.use("paper.mplstyle") + + +plt.rcParams.update({ + "text.usetex": True, + "font.family": "Libertine" +}) + +plt.rcParams['text.latex.preamble'] = r'\usepackage{libertine}' + +plt.rcParams.update({ + "text.usetex": True, + "font.family": "serif", + "font.serif": ["Linux Libertine O"], # Specify the font family +}) + + +pt = 1. / 72.27 +width = 1.2 * 241.14749 * pt +golden = (1 + 5**0.5) / 2 +height = width / golden * 3. / 5. + + +carray = ['tab:cyan','tab:orange','tab:brown','tab:blue'] + +if len(sys.argv) > 1: + path = sys.argv[1] +else: + print(f"Usage: python {sys.argv[0]} ") + exit() + +# e.g. path = "$HOME/Repositories/exadigit/raps/simulation_results/frontier" + +policies = ['fcfs-nobf','fcfs-easy','priority-nobf','priority-easy','priority-ffbf','replay'] +policies = ['fcfs-nobf','fcfs-easy','priority-ffbf','replay'] +files = ['cooling_model.parquet', 'loss_history.parquet', 'power_history.parquet', 'util.parquet'] +files = ['cooling_model.parquet', 'power_history.parquet', 'util.parquet'] +files = ['util.parquet', 'power_history.parquet', 'cooling_model.parquet', 'cooling_model.parquet'] #, 'cooling_model.parquet'] +#files = ['util.parquet', 'power_history.parquet'] +#files = ['loss_history.parquet', 'power_history.parquet', 'util.parquet'] +#files = ['power_history.parquet', 'util.parquet', 'cooling_model.parquet'] + +policy_path = {f"{policy}":f"{path}/{policy}" for policy in policies} +full_files = {f"{policy}":f"{path}/{policy}/{file}" for policy in policies for file in files} + + +def iter_to_seconds(i): + return i * 15 + +c_cnt=0 +fig, axs = plt.subplots(len(files),figsize=(width,4 * height)) +for i,file in enumerate(files): + policy_files = [f"{path}/{policy}/{file}" for policy in policies] + for c,policy_file in enumerate(policy_files): + # df = pd.read_parquet(policy_file) + x = 'time' + xlab = 'Time [hours]' + policy = policy_file.split('/')[-2] + if file == "power_history.parquet": + y = 'power [kw]' + ylab = 'Power [kW]' + #ymax = 26000 + #ymin = 6500 + #axs[i].set_ylim(ymin,ymax) + df = pd.read_parquet(policy_file) + df = df.rename(columns={0:'time',1:'power [kw]'}) + + elif file == "cooling_model.parquet": + if c_cnt == 0: + y = 'pue' + ylab = 'PUE' + if c_cnt == 1: + y = 'simulator[1].centralEnergyPlant[1].coolingTowerLoop[1].summary.T_fac_ctw_r_C' + ylab = 'Temperature [°C]' + if c_cnt == 2: + y = 'simulator[1].centralEnergyPlant[1].coolingTowerLoop[1].summary.T_fac_ctw_s_C' + ylab = 'Temperature [°C]' + + df = pd.read_parquet(policy_file) + df['index'] = df.index + df[x] = df['index'].apply(iter_to_seconds) + ymax = max(df[y]) + #axs[i].plot(df[x],df[y], label=ylab) + #y = 'simulator[1].centralEnergyPlant[1].coolingTowerLoop[1].summary.T_fac_ctw_r_C' + + elif file == "loss_history.parquet": + y = 'loss [kw]' + ylab = 'Loss [kW]' + #ylim = 29000 + #axs[i].set_ylim(0,ylim) + + df = pd.read_parquet(policy_file) + df = df.rename(columns={0:'time',1:'loss [kw]'}) + #axs[i].plot(df[x],df[y], label=ylab) + + elif file == "util.parquet": + y = 'utilization' + ylab = r'Utilization [\%]' + + df = pd.read_parquet(policy_file) + df = df.rename(columns={0:'time', 1:'utilization [%]'}) + df[y] = df['utilization [%]'] / 100 + #axs[i].plot(df[x], df[y], label=ylab) + + else: + raise KeyError + + + if i == 3: + timeline_s = [0,21600,43200,64800,86400] + timeline_h = ['0:00','6:00','12:00','18:00','24:00'] + axs[i].set_xticks(timeline_s,timeline_h) + else: + timeline_s = [0,21600,43200,64800,86400] + timeline_h = ['0:00','6:00','12:00','18:00','24:00'] + axs[i].set_xticks(timeline_s,timeline_h) + + axs[i].set_xticklabels([]) # Remove x-axis labels + xlab = None + + + timeline_s = [0,21600,43200,64800,86400] + + #axs[i].set_xlim(timeline_s[0],timeline_s[-1]) + axs[i].set_xlabel(xlab) + axs[i].plot(df[x],df[y], label=policy, color=carray[c]) + axs[i].set_ylabel(ylab) + #$axs[i].plot(df[0],df[1],label=policy) + if file == "power_history.parquet": + axs[i].legend(loc='upper right') + axs[i].set_title('Power',x=0.05, y=0.8,ha="left") + elif file == "util.parquet": + axs[i].set_title('Utilization',x=0.05, y=0.1,ha="left") + axs[i].legend(loc='lower right') + elif file == "cooling_model.parquet": + if c_cnt == 0: + axs[i].set_title('PUE',x=0.05, y=0.8,ha="left") + axs[i].legend(loc='upper right') + elif c_cnt == 1: + axs[i].set_title('Cooling Tower\nReturn\nTemperature',x=0.05, y=0.5,ha="left") + axs[i].legend(loc='upper right') + else: + axs[i].set_title('Cooling Tower Supply Temperature',x=0.1, y=0.8) + axs[i].legend(loc='upper right') + c_cnt = c_cnt+1 + elif file == "loss_history.parquet": + axs[i].set_title('Loss') + axs[i].legend(loc='upper right') + else: + raise KeyError() +#plt.show() +#plt.savefig(f"3in1.png",bbox_inches='tight') +#plt.tight_layout(pad=0.4, w_pad=0.5, h_pad=1.0) +fig.subplots_adjust(hspace=0) +plt.tight_layout(pad=0,w_pad=0.0,h_pad=-0.08)#3) +plt.savefig(f"4in1-frontier-wC.png",bbox_inches='tight',pad_inches = 0.02, dpi = 300) + diff --git a/scripts/plots/fgk_frontier.py b/scripts/plots/fgk_frontier.py new file mode 100644 index 0000000..20c3dee --- /dev/null +++ b/scripts/plots/fgk_frontier.py @@ -0,0 +1,169 @@ +#!/bin/env python3 +import pandas as pd +import pyarrow.parquet as pq +import matplotlib.pyplot as plt +import matplotlib +import numpy + +import sys + +matplotlib.rcParams['text.usetex'] = True + +plt.style.use("paper.mplstyle") + + +plt.rcParams.update({ + "text.usetex": True, + "font.family": "Libertine" +}) + +plt.rcParams['text.latex.preamble'] = r'\usepackage{libertine}' + +plt.rcParams.update({ + "text.usetex": True, + "font.family": "serif", + "font.serif": ["Linux Libertine O"], # Specify the font family +}) + + + +pt = 1. / 72.27 +width = 1.2*241.14749 * pt +golden = (1 + 5**0.5) / 2 +height = width / golden * 4./5. +# COLUMNWIDTH241.14749pt TEXTWIDTH506.295pt + + +carray = [] +t = plt.get_cmap('tab10').colors +for i in range(0,len(t)): + carray.append(t[i]) +g = carray[2] +carray[2] = carray[4] +carray[4] = g + +if len(sys.argv) > 1: + path = sys.argv[1] +else: + print(f"Usage: python {sys.argv[0]} ") + exit() +# e.g. path = "$HOME/Repositories/exadigit/raps/simulation_results/frontier/nnew_fkg_2024-01-18" + +policies = [ + 'replay', +# 'replay-ffbf', +# 'fcfs-ffbf', +# 'priority-ffbf', # on fcfs +# 'sjf-ffbf', +# 'ljf-ffbf', # on prio + 'acct_avg_power-ffbf', + 'acct_low_avg_power-ffbf', +# 'acct_avg_power_w4lj-ffbf', + 'acct_edp-ffbf', + 'acct_fugaku_pts-ffbf', + #'acct_ed2p-ffbf', #Sim to edp + #'acct_pdp-ffbf', + +] +#policies = ['fcfs-nobf','fcfs-easy','priority-nobf','priority-easy','priority-ffbf','replay'] +#policies = ['fcfs-nobf','fcfs-easy','priority-ffbf','replay'] +#policies = ['replay','prio-ffbf','fugaku_pts'] +#files = ['cooling_model.parquet', 'loss_history.parquet', 'power_history.parquet', 'util.parquet'] +#files = ['cooling_model.parquet', 'power_history.parquet', 'util.parquet'] +#files = ['util.parquet', 'power_history.parquet', 'cooling_model.parquet'] +files = ['util.parquet', 'power_history.parquet'] +files = ['power_history.parquet'] +#files = ['loss_history.parquet', 'power_history.parquet', 'util.parquet'] +#files = ['power_history.parquet', 'util.parquet', 'cooling_model.parquet'] + +prefix = "" + +policy_path = {f"{policy}":f"{path}/{prefix}{policy}" for policy in policies} +full_files = {f"{policy}":f"{path}/{prefix}{policy}/{file}" for policy in policies for file in files} + + +def iter_to_seconds(i): + return i * 15 + + +fig, axs = plt.subplots(len(files),figsize=(width, height*len(files)),sharex=True) +if isinstance(axs, matplotlib.axes._axes.Axes): + axs = [axs] +elif isinstance(axs, numpy.ndarray): + pass +else: + pass + +for i,file in enumerate(files): + policy_files = [f"{path}/{prefix}{policy}/{file}" for policy in policies] + for c,policy_file in enumerate(policy_files): + # df = pd.read_parquet(policy_file) + x = 'time' + xlab = 'Time [hours]' + policy = policy_file.split('/')[-2] + if file == "power_history.parquet": + y = 'power [kw]' + ylab = 'Power [kW]' + df = pd.read_parquet(policy_file) + df = df.rename(columns={0:'time',1:'power [kw]'}) + + + elif file == "cooling_model.parquet": + y = 'pue' + ylab = 'PUE' + + df = pd.read_parquet(policy_file) + df['index'] = df.index + df[x] = df['index'].apply(iter_to_seconds) + ymax = max(df['pue']) + + + elif file == "loss_history.parquet": + y = 'loss [kw]' + ylab = 'Loss [kW]' + + df = pd.read_parquet(policy_file) + df = df.rename(columns={0:'time',1:'loss [kw]'}) + + elif file == "util.parquet": + y = 'utilization' + ylab = 'Utilization' + + df = pd.read_parquet(policy_file) + df = df.rename(columns={0:'time', 1:'utilization [%]'}) + df[y] = df['utilization [%]'] / 100 + + + + else: + raise KeyError + + timeline_s = [0,21600,43200,64800,86400] + timeline_h = ['0:00','6:00','12:00','18:00','24:00'] + axs[i].set_xticks(timeline_s,timeline_h) + axs[i].set_xlabel(xlab) + + axs[i].plot(df[x],df[y], label=policy, + #linewidth=0.5, + marker='', color=carray[c]) + axs[i].set_ylabel(ylab) + #$axs[i].plot(df[0],df[1],label=policy) + if file == "power_history.parquet": + axs[i].legend(loc='center left',bbox_to_anchor=(0.02, 0.6)) + axs[i].set_title('Power',x=0.1,y=0.80) + elif file == "util.parquet": + axs[i].set_title('Utilization') + axs[i].legend(loc='lower left') + elif file == "cooling_model.parquet": + axs[i].set_title('PUE') + axs[i].legend(loc='upper left') + elif file == "loss_history.parquet": + axs[i].set_title('Loss') + axs[i].legend(loc='upper left') + else: + raise KeyError() +#plt.show() +#plt.tight_layout(pad=0.4, w_pad=0.5, h_pad=1.0) +fig.subplots_adjust(hspace=0) +plt.tight_layout(pad=0,w_pad=0.0,h_pad=-0.08)#3) +plt.savefig(f"nnew_fkg_2024-01-18.png",bbox_inches='tight',pad_inches = 0.02, dpi = 300)#, bbox_inches='tight') diff --git a/scripts/plots/paper.mplstyle b/scripts/plots/paper.mplstyle new file mode 100644 index 0000000..5f77db6 --- /dev/null +++ b/scripts/plots/paper.mplstyle @@ -0,0 +1,86 @@ +## See https://matplotlib.org/stable/tutorials/introductory/customizing.html#a-sample-matplotlibrc-file + +text.usetex: True +text.latex.preamble: \usepackage{amsmath}\usepackage{amssymb} +font.family: serif +#font.serif: \T1/LinuxLibertineT-TLF/m/n/10 +savefig.bbox: tight +savefig.format: pdf + +lines.linewidth: .5 + +## *************************************************************************** +## * AXES * +## *************************************************************************** +axes.linewidth: 0.5 # edge line width +axes.labelsize: 10 # font size of the x and y labels +axes.labelpad: 3.0 # space between label and axis +#axes.labelweight: normal # weight of the x and y labels +axes.grid: True +axes.grid.axis: y + +grid.linewidth: 0.2 + +## *************************************************************************** +## * TICKS * +## *************************************************************************** +## See https://matplotlib.org/api/axis_api.html#matplotlib.axis.Tick +xtick.top: True # draw ticks on the top side +# xtick.bottom: True # draw ticks on the bottom side +# xtick.labeltop: False # draw label on the top +# xtick.labelbottom: True # draw label on the bottom +# xtick.major.size: 3 # major tick size in points +# xtick.minor.size: 1.5 # minor tick size in points +xtick.major.width: .3 # major tick width in points +# xtick.minor.width: .3 # minor tick width in points +# xtick.major.pad: 2 # distance to major tick label in points +# xtick.minor.pad: 2 # distance to the minor tick label in points +# xtick.color: black # color of the ticks +# xtick.labelcolor: inherit # color of the tick labels or inherit from xtick.color +xtick.labelsize: 8 # font size of the tick labels +xtick.direction: in # direction: {in, out, inout} +# xtick.minor.visible: True # visibility of minor ticks on x-axis +# xtick.major.top: True # draw x axis top major ticks +# xtick.major.bottom: True # draw x axis bottom major ticks +# xtick.minor.top: False # draw x axis top minor ticks +# xtick.minor.bottom: False # draw x axis bottom minor ticks +# xtick.alignment: center # alignment of xticks +# +# ytick.left: True # draw ticks on the left side +ytick.right: True # draw ticks on the right side +# ytick.labelleft: True # draw tick labels on the left side +# ytick.labelright: False # draw tick labels on the right side +# ytick.major.size: 3 # major tick size in points +# ytick.minor.size: 1.5 # minor tick size in points +# ytick.major.width: .3 # major tick width in points +# ytick.minor.width: .3 # minor tick width in points +# ytick.major.pad: 2 # distance to major tick label in points +# ytick.minor.pad: 2 # distance to the minor tick label in points +# ytick.color: black # color of the ticks +# ytick.labelcolor: inherit # color of the tick labels or inherit from ytick.color +ytick.labelsize: 8 # font size of the tick labels +ytick.direction: in # direction: {in, out, inout} +# ytick.minor.visible: True # visibility of minor ticks on y-axis +# ytick.major.left: True # draw y axis left major ticks +# ytick.major.right: True # draw y axis right major ticks +# ytick.minor.left: True # draw y axis left minor ticks +# ytick.minor.right: True # draw y axis right minor ticks +# ytick.alignment: center_baseline # alignment of yticks + +## *************************************************************************** +## * LEGEND * +## *************************************************************************** +legend.loc: upper right +legend.frameon: False # if True, draw the legend on a background patch +# legend.framealpha: 0.8 # legend patch transparency +# legend.fancybox: True # if True, use a rounded box for the + # legend background, else a rectangle +#legend.markerscale: 1.0 # the relative size of legend markers vs. original +legend.fontsize: 6 + +## *************************************************************************** +## * FIGURE * +## *************************************************************************** +# figure.figsize: 3.4, 2.55 # figure size in inches +figure.dpi: 300 # figure dots per inch +# figure.frameon: True # enable figure frame -- GitLab From 91995fa732220f12ae0d55b543b74fa38d418443 Mon Sep 17 00:00:00 2001 From: Matthias Maiterth Date: Thu, 24 Apr 2025 15:15:25 -0400 Subject: [PATCH 071/388] Update to write_dict_to_file This update was needed as some dataloaders that use numpy failed to dump accounts.json files. This now works. However reading them in fails. To be fixed in the next commit. --- main.py | 3 ++- raps/utils.py | 24 +++++++++++++++++++++++- 2 files changed, 25 insertions(+), 2 deletions(-) diff --git a/main.py b/main.py index 4f2920c..0ca0bbe 100644 --- a/main.py +++ b/main.py @@ -33,6 +33,7 @@ from raps.account import Accounts from raps.weather import Weather from raps.utils import create_casename, convert_to_seconds, write_dict_to_file, next_arrival from raps.stats import get_engine_stats, get_job_stats, get_scheduler_stats +from raps.utils import convert_numpy_to_builtin config = ConfigManager(system_name=args.system).get_config() @@ -281,5 +282,5 @@ if args.output: json_string = json.dumps(sc.accounts.to_dict()) f.write(json_string) except TypeError: - raise TypeError(f"{sc.accounts} could not be parsed by json.dump") + write_dict_to_file(sc.accounts.to_dict(), OPATH / 'accounts.json') print("Output directory is: ", OPATH) # If output is enabled, the user wants this information as last output diff --git a/raps/utils.py b/raps/utils.py index 62b98ef..3646288 100644 --- a/raps/utils.py +++ b/raps/utils.py @@ -360,7 +360,8 @@ def write_dict_to_file(dictionary, file_path): if isinstance(value, dict): file.write(f"{key}: {{\n") for subkey, subvalue in value.items(): - file.write(f" {subkey}: {subvalue}\n") + base_subvalue = convert_numpy_to_builtin(subvalue) + file.write(f" {subkey}: {base_subvalue}\n") file.write("}\n") else: file.write(f"{key}: {value}\n") @@ -375,6 +376,27 @@ def toJSON(obj): indent=4) +def convert_numpy_to_builtin(obj): + if isinstance(obj, dict): + tmp_obj = dict() + for k,v in obj.items(): + tmp_obj[k] = convert_numpy_to_builtin(v) + return tmp_obj + elif isinstance(obj, list): + return [convert_numpy_to_builtin(i) for i in obj] + elif isinstance(obj, np.ndarray): + tmplist = obj.tolist() + return convert_numpy_to_builtin(tmplist) + elif isinstance(obj, (np.integer, np.int64, np.int32)): + return int(obj) + elif isinstance(obj, (np.floating, np.float64, np.float32)): + return float(obj) + elif isinstance(obj, (np.bool_)): + return bool(obj) + else: + return obj + + def get_utilization(trace, time_quanta_index): """Retrieve utilization value for a given trace at a specific time quanta index.""" if isinstance(trace, (list, np.ndarray)): -- GitLab From 6d81e2337ef5b6986baec509ee04ee79f201de48 Mon Sep 17 00:00:00 2001 From: Matthias Maiterth Date: Fri, 25 Apr 2025 09:59:11 -0400 Subject: [PATCH 072/388] Fix to raps/utils.py write_dict_to_file to write parseable json files. Both reading and writing of --acounts / --accounts-json works now even if numpy data was emited at some point. --- raps/utils.py | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/raps/utils.py b/raps/utils.py index 3646288..26f8856 100644 --- a/raps/utils.py +++ b/raps/utils.py @@ -356,15 +356,23 @@ def encrypt(name): def write_dict_to_file(dictionary, file_path): """Function to write dictionary to a text file""" with open(file_path, 'w') as file: - for key, value in dictionary.items(): + file.write("{") + for j, (key, value) in enumerate(dictionary.items()): if isinstance(value, dict): - file.write(f"{key}: {{\n") - for subkey, subvalue in value.items(): + file.write(f"\"{str(key)}\": {{\n") + for i, (subkey, subvalue) in enumerate(value.items()): base_subvalue = convert_numpy_to_builtin(subvalue) - file.write(f" {subkey}: {base_subvalue}\n") - file.write("}\n") + json_string = toJSON(base_subvalue) + file.write(f" \"{str(subkey)}\": {json_string}") + if i < len(value.items()) - 1: + file.write(", ") + file.write("}") else: - file.write(f"{key}: {value}\n") + file.write(f"\"{str(key)}\": {value}") + if j < len(dictionary.items()) - 1: + file.write(", ") + file.write("\n") + file.write("}") def toJSON(obj): -- GitLab From 2292d62ce8eb10c113a5775ce2c022d11578896d Mon Sep 17 00:00:00 2001 From: Matthias Maiterth Date: Mon, 28 Apr 2025 17:39:35 -0400 Subject: [PATCH 073/388] Test, what would a 40*Frontier system look like --- config/40frontiers/cooling.json | 25 +++++++++++++++++++++++++ config/40frontiers/power.json | 18 ++++++++++++++++++ config/40frontiers/scheduler.json | 17 +++++++++++++++++ config/40frontiers/system.json | 20 ++++++++++++++++++++ config/40frontiers/uq.json | 11 +++++++++++ 5 files changed, 91 insertions(+) create mode 100644 config/40frontiers/cooling.json create mode 100644 config/40frontiers/power.json create mode 100644 config/40frontiers/scheduler.json create mode 100644 config/40frontiers/system.json create mode 100644 config/40frontiers/uq.json diff --git a/config/40frontiers/cooling.json b/config/40frontiers/cooling.json new file mode 100644 index 0000000..778a56d --- /dev/null +++ b/config/40frontiers/cooling.json @@ -0,0 +1,25 @@ +{ + "COOLING_EFFICIENCY": 0.945, + "WET_BULB_TEMP": 290.0, + "ZIP_CODE": 37831, + "COUNTRY_CODE": "US", + "FMU_PATH": "models/Simulator_olcf5_base.fmu", + "FMU_COLUMN_MAPPING": { + "T_sec_r_C": "Rack Return Temperature (\u00b0C)", + "T_sec_s_C": "Rack Supply Temperature (\u00b0C)", + "p_sec_r_psig": "Rack Supply Pressure (psig)", + "p_sec_s_psig": "Rack Return Pressure (psig)", + "V_flow_sec_GPM": "Rack Flowrate (gpm)", + "T_prim_r_C": "Facility Return Temperature (\u00b0C)", + "T_prim_s_C": "Facility Supply Temperature (\u00b0C)", + "p_prim_s_psig": "Facility Supply Pressure (psig)", + "p_prim_r_psig": "Facility Return Pressure (psig)", + "V_flow_prim_GPM": "Facility Flowrate (gpm)", + "W_flow_CDUP_kW": "Work Done By CDUP (kW)" + }, + "TEMPERATURE_KEY": "simulator_1_centralEnergyPlant_1_coolingTowerLoop_1_sources_Towb", + "W_HTWPs_KEY": "simulator[1].centralEnergyPlant[1].hotWaterLoop[1].summary.W_flow_HTWP_kW", + "W_CTWPs_KEY": "simulator[1].centralEnergyPlant[1].coolingTowerLoop[1].summary.W_flow_CTWP_kW", + "W_CTs_KEY": "simulator[1].centralEnergyPlant[1].coolingTowerLoop[1].summary.W_flow_CT_kW" + +} diff --git a/config/40frontiers/power.json b/config/40frontiers/power.json new file mode 100644 index 0000000..d6ec29e --- /dev/null +++ b/config/40frontiers/power.json @@ -0,0 +1,18 @@ +{ + "POWER_GPU_IDLE": 88, + "POWER_GPU_MAX": 560, + "POWER_CPU_IDLE": 90, + "POWER_CPU_MAX": 280, + "POWER_MEM": 74.26, + "POWER_NIC": 20, + "POWER_NVME": 30, + "POWER_SWITCH": 250, + "POWER_CDU": 8473.47, + "POWER_UPDATE_FREQ": 15, + "RECTIFIER_PEAK_THRESHOLD": 13670, + "SIVOC_LOSS_CONSTANT": 13, + "SIVOC_EFFICIENCY": 0.98, + "RECTIFIER_LOSS_CONSTANT": 17, + "RECTIFIER_EFFICIENCY": 0.96, + "POWER_COST": 0.094 +} diff --git a/config/40frontiers/scheduler.json b/config/40frontiers/scheduler.json new file mode 100644 index 0000000..0a43f19 --- /dev/null +++ b/config/40frontiers/scheduler.json @@ -0,0 +1,17 @@ +{ + "SEED": 42, + "JOB_ARRIVAL_TIME": 1, + "MTBF": 11, + "TRACE_QUANTA": 15, + "MIN_WALL_TIME": 3600, + "MAX_WALL_TIME": 43200, + "UI_UPDATE_FREQ": 900, + "MAX_NODES_PER_JOB": 9000, + "JOB_END_PROBS": { + "COMPLETED": 0.63, + "FAILED": 0.13, + "CANCELLED": 0.12, + "TIMEOUT": 0.11, + "NODE_FAIL": 0.01 + } +} diff --git a/config/40frontiers/system.json b/config/40frontiers/system.json new file mode 100644 index 0000000..51add94 --- /dev/null +++ b/config/40frontiers/system.json @@ -0,0 +1,20 @@ +{ + "NUM_CDUS": 1000, + "RACKS_PER_CDU": 3, + "NODES_PER_RACK": 128, + "RECTIFIERS_PER_RACK": 32, + "CHASSIS_PER_RACK": 8, + "NODES_PER_BLADE": 2, + "SWITCHES_PER_CHASSIS": 4, + "NICS_PER_NODE": 4, + "RECTIFIERS_PER_CHASSIS": 4, + "NODES_PER_RECTIFIER": 4, + "MISSING_RACKS": [41], + "DOWN_NODES": [], + "CPUS_PER_NODE": 1, + "GPUS_PER_NODE": 4, + "CPU_PEAK_FLOPS": 2048E9, + "GPU_PEAK_FLOPS": 52E12, + "CPU_FP_RATIO": 0.667, + "GPU_FP_RATIO": 0.667 +} diff --git a/config/40frontiers/uq.json b/config/40frontiers/uq.json new file mode 100644 index 0000000..7359bc2 --- /dev/null +++ b/config/40frontiers/uq.json @@ -0,0 +1,11 @@ +{ + "POWER_GPU_UNCERTAINTY": 0.05 , + "POWER_CPU_UNCERTAINTY": 0.05 , + "POWER_MEM_UNCERTAINTY": 0.05 , + "POWER_NIC_UNCERTAINTY": 0.05 , + "POWER_NVME_UNCERTAINTY": 0.05 , + "POWER_CDUS_UNCERTAINTY": 0.05 , + "POWER_NODE_UNCERTAINTY": 0.002, + "POWER_SWITCH_UNCERTAINTY": 0.05 , + "RECTIFIER_POWER_UNCERTAINTY": 0.05 +} -- GitLab From a3cb124e49ab82ac0b01f7e048672a53eee67f81 Mon Sep 17 00:00:00 2001 From: Matthias Maiterth Date: Mon, 28 Apr 2025 21:08:54 -0400 Subject: [PATCH 074/388] Fix: Set nodes of completed jobs to idle. --- raps/engine.py | 1 + 1 file changed, 1 insertion(+) diff --git a/raps/engine.py b/raps/engine.py index 05e386e..b836271 100644 --- a/raps/engine.py +++ b/raps/engine.py @@ -123,6 +123,7 @@ class Engine: completed_jobs = [job for job in self.running if job.end_time is not None and job.end_time <= self.current_time] for job in completed_jobs: + self.power_manager.set_idle(job.scheduled_nodes) job.state = JobState.COMPLETED self.running.remove(job) -- GitLab From f1dab007b42c50a7ffe39a478f9b58cbb900e98f Mon Sep 17 00:00:00 2001 From: Matthias Maiterth Date: Mon, 28 Apr 2025 22:38:33 -0400 Subject: [PATCH 075/388] Added alternative distribution, old distribution has jobs truncated by hour. --- raps/utils.py | 7 +++++++ raps/workload.py | 54 ++++++++++++++++++++++++++++++++++-------------- 2 files changed, 46 insertions(+), 15 deletions(-) diff --git a/raps/utils.py b/raps/utils.py index 26f8856..6c6dda0 100644 --- a/raps/utils.py +++ b/raps/utils.py @@ -64,6 +64,13 @@ def truncated_normalvariate(mu, sigma, lower, upper): return number +def truncated_weibull(scale, shape, min, max): + while True: + number = random.weibullvariate(scale, shape) + if min < number <= max: + return int(number) + + def linear_to_3d_index(linear_index, shape): """ Convert linear index to 3D index. diff --git a/raps/workload.py b/raps/workload.py index 56d7cb3..d6f50d1 100644 --- a/raps/workload.py +++ b/raps/workload.py @@ -42,7 +42,7 @@ ACCT_NAMES = ["ACT01", "ACT02", "ACT03", "ACT04", "ACT05", "ACT06", "ACT07",\ MAX_PRIORITY = 500000 -from .utils import truncated_normalvariate, determine_state, next_arrival +from .utils import truncated_normalvariate, determine_state, next_arrival, truncated_weibull class Workload: @@ -65,20 +65,44 @@ class Workload: partition = random.choice(self.partitions) # Get the corresponding config for the selected partition config = self.config_map[partition] - - nodes_required = random.randint(1, config['MAX_NODES_PER_JOB']) - name = random.choice(JOB_NAMES) - account = random.choice(ACCT_NAMES) - cpu_util = random.random() * config['CPUS_PER_NODE'] - gpu_util = random.random() * config['GPUS_PER_NODE'] - mu = (config['MAX_WALL_TIME'] + config['MIN_WALL_TIME']) / 2 - sigma = (config['MAX_WALL_TIME'] - config['MIN_WALL_TIME']) / 6 - wall_time = truncated_normalvariate(mu, sigma, config['MIN_WALL_TIME'], config['MAX_WALL_TIME']) // 3600 * 3600 - time_limit = truncated_normalvariate(mu, sigma, wall_time, config['MAX_WALL_TIME']) // 3600 * 3600 - end_state = determine_state(config['JOB_END_PROBS']) - cpu_trace, gpu_trace = self.compute_traces(cpu_util, gpu_util, wall_time, config['TRACE_QUANTA']) - priority = random.randint(0, MAX_PRIORITY) - net_tx, net_rx = [], [] + wes_random = False + if wes_random: + nodes_required = random.randint(1, config['MAX_NODES_PER_JOB']) + name = random.choice(JOB_NAMES) + account = random.choice(ACCT_NAMES) + cpu_util = random.random() * config['CPUS_PER_NODE'] + gpu_util = random.random() * config['GPUS_PER_NODE'] + mu = (config['MAX_WALL_TIME'] + config['MIN_WALL_TIME']) / 2 + sigma = (config['MAX_WALL_TIME'] - config['MIN_WALL_TIME']) / 6 + wall_time = truncated_normalvariate(mu, sigma, config['MIN_WALL_TIME'], config['MAX_WALL_TIME']) // 3600 * 3600 + time_limit = truncated_normalvariate(mu, sigma, wall_time, config['MAX_WALL_TIME']) // 3600 * 3600 + end_state = determine_state(config['JOB_END_PROBS']) + cpu_trace, gpu_trace = self.compute_traces(cpu_util, gpu_util, wall_time, config['TRACE_QUANTA']) + priority = random.randint(0, MAX_PRIORITY) + net_tx, net_rx = [], [] + else: + max_nodes = config['MAX_NODES_PER_JOB'] + min_nodes = 1 + nodes_required = truncated_weibull(max_nodes, 0.1, min_nodes, max_nodes) + name = random.choice(JOB_NAMES) + account = random.choice(ACCT_NAMES) + cpu_util = random.random() * config['CPUS_PER_NODE'] + gpu_util = random.random() * config['GPUS_PER_NODE'] + mu = (config['MAX_WALL_TIME'] + config['MIN_WALL_TIME']) / 2 + sigma = (config['MAX_WALL_TIME'] - config['MIN_WALL_TIME']) / 6 + wall_time = truncated_weibull(3 * config['MIN_WALL_TIME'],0.75,config['MIN_WALL_TIME'],config['MAX_WALL_TIME']) // 60 * 60 # to 1 minute + time_limit = truncated_normalvariate(mu, sigma, wall_time, config['MAX_WALL_TIME']) // 300 * 300 # to 5 minutes + end_state = determine_state(config['JOB_END_PROBS']) + cpu_trace, gpu_trace = self.compute_traces(cpu_util, gpu_util, wall_time, config['TRACE_QUANTA']) + if nodes_required < max_nodes * .10: + priority = 0 + elif nodes_required < max_nodes * .20: + priority = 1 + elif nodes_required < max_nodes * .50: + priority = 2 + else: + priority = 3 + net_tx, net_rx = [], [] # Jobs arrive according to Poisson process time_to_next_job = next_arrival(1 / config['JOB_ARRIVAL_TIME']) -- GitLab From 79c6601ef71080a60028440151b7723a391f30d1 Mon Sep 17 00:00:00 2001 From: Matthias Maiterth Date: Tue, 6 May 2025 11:16:48 -0400 Subject: [PATCH 076/388] Changed pyproject.toml to lower bounds --- pyproject.toml | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 1b3f2e0..2ec982d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -11,14 +11,14 @@ readme = "README.md" # license = {file = "LICENSE.txt"} dependencies = [ - "matplotlib==3.7.2", - "numpy==1.23.5", - "rich==13.6.0", - "fmpy==0.3.19", - "pandas==2.0.3", - "scipy==1.10.1", - "pyarrow==15.0.1", - "tqdm==4.66.5", - "uncertainties==3.2.1", - "requests==2.32.3" + "matplotlib>=3.7.2", + "numpy>=1.23.5", + "rich>=13.6.0", + "fmpy>=0.3.19", + "pandas>=2.0.3", + "scipy>=1.10.1", + "pyarrow>=15.0.1", + "tqdm>=4.66.5", + "uncertainties>=3.2.1", + "requests>=2.32.3" ] -- GitLab From 68f7c6008cbbec0f3a49386f4cb97740b3791c71 Mon Sep 17 00:00:00 2001 From: Matthias Maiterth Date: Wed, 7 May 2025 16:50:33 -0400 Subject: [PATCH 077/388] Initial restructure to allow for additional distributions. TODO: The distributions are not implemented properly. This sets the initial steps to be able to add the codes and distributions. Additional work is needed to actually draw from the correct distributions given the parameters. Complete the set. Allow for multimodal distributions (Initials are made) And set default parameters e.g. in a config file that can be specified in the command line. --- args.py | 10 +- config/frontier/scheduler.json | 2 +- main.py | 8 +- raps/config.py | 4 +- raps/utils.py | 8 +- raps/workload.py | 275 +++++++++++++++++++++++++++++++-- 6 files changed, 280 insertions(+), 27 deletions(-) diff --git a/args.py b/args.py index f077b85..114628e 100644 --- a/args.py +++ b/args.py @@ -1,6 +1,8 @@ import argparse from raps.schedulers.default import PolicyType, BackfillType +from raps.workload import add_workload_to_parser + parser = argparse.ArgumentParser(description='Resource Allocator & Power Simulator (RAPS)') # System configurations @@ -42,8 +44,9 @@ parser.add_argument('--jid', type=str, default='*', help='Replay job id') parser.add_argument('--scale', type=int, default=0, help='Scale telemetry to max nodes specified in order to run telemetry on a smaller smaller target system/partition, e.g., --scale 192') # Synthetic workloads -choices = ['random', 'benchmark', 'peak', 'idle'] -parser.add_argument('-w', '--workload', type=str, choices=choices, default=choices[0], help='Type of synthetic workload') +parser = add_workload_to_parser(parser) +#choices = ['random', 'benchmark', 'peak', 'idle','synthetic'] +#parser.add_argument('-w', '--workload', type=str, choices=choices, default=choices[0], help='Type of synthetic workload') # Scheduling options choices = ['default', 'scheduleflow', 'nrel', 'anl', 'flux', 'experimental'] @@ -61,6 +64,7 @@ parser.add_argument('--arrival', default=choices[0], type=str, choices=choices, parser.add_argument('--accounts', action='store_true', help='Flag indicating if accounts should be tracked') parser.add_argument('--accounts-json', type=str, help='Json of account stats generated in previous run. see raps/accounts.py') + +# ### At the end get args and an args_dict. import this if needed. args = parser.parse_args() args_dict = vars(args) -print(args_dict) diff --git a/config/frontier/scheduler.json b/config/frontier/scheduler.json index 3cc1744..47d1da4 100644 --- a/config/frontier/scheduler.json +++ b/config/frontier/scheduler.json @@ -3,7 +3,7 @@ "JOB_ARRIVAL_TIME": 100, "MTBF": 11, "TRACE_QUANTA": 15, - "MIN_WALL_TIME": 3600, + "MIN_WALL_TIME": 60, "MAX_WALL_TIME": 43200, "UI_UPDATE_FREQ": 900, "MAX_NODES_PER_JOB": 3000, diff --git a/main.py b/main.py index 0ca0bbe..40338a7 100644 --- a/main.py +++ b/main.py @@ -13,9 +13,6 @@ from tqdm import tqdm from raps.helpers import check_python_version check_python_version() -from args import args -args_dict = vars(args) -print(args_dict) from raps.config import ConfigManager from raps.constants import OUTPUT_PATH, SEED @@ -35,6 +32,9 @@ from raps.utils import create_casename, convert_to_seconds, write_dict_to_file, from raps.stats import get_engine_stats, get_job_stats, get_scheduler_stats from raps.utils import convert_numpy_to_builtin +from args import args, args_dict + + config = ConfigManager(system_name=args.system).get_config() if args.seed: @@ -125,7 +125,7 @@ if args.replay: else: # Synthetic jobs wl = Workload(config) - jobs = getattr(wl, args.workload)(num_jobs=args.numjobs) + jobs = getattr(wl, args.workload)(args=args) if args.verbose: for job_vector in jobs: diff --git a/raps/config.py b/raps/config.py index 51e7d86..73035be 100644 --- a/raps/config.py +++ b/raps/config.py @@ -17,7 +17,7 @@ class ConfigManager: base_path = CONFIG_PATH / system_name config_files = ['system.json', 'power.json', 'scheduler.json'] optional_files = ['cooling.json', 'uq.json'] - + for config_file in config_files + optional_files: file_path = base_path / config_file if config_file in optional_files and not file_path.exists(): @@ -26,7 +26,7 @@ class ConfigManager: raise FileNotFoundError(f"Mandatory configuration file {config_file} not found.") config_data = self.load_config_file(file_path) self.config.update(config_data) - + @staticmethod def load_config_file(file_path: Path) -> dict[str, Any]: with open(file_path, 'r') as file: diff --git a/raps/utils.py b/raps/utils.py index 6c6dda0..71ff4fc 100644 --- a/raps/utils.py +++ b/raps/utils.py @@ -58,10 +58,14 @@ def truncated_normalvariate(mu, sigma, lower, upper): float Random number from the truncated normal distribution. """ - while True: + CUTOFF = 100000000 + i = 0 + while i < CUTOFF: number = random.normalvariate(mu, sigma) if lower < number < upper: - return number + return int(number) + i += 1 + raise Exception(f"mu:{mu} sigma:{sigma}, not a single hit in {CUTOFF} tries.") def truncated_weibull(scale, shape, min, max): diff --git a/raps/workload.py b/raps/workload.py index d6f50d1..5cae9d0 100644 --- a/raps/workload.py +++ b/raps/workload.py @@ -24,11 +24,13 @@ JOB_END_PROBS : list List of probabilities for different job end states. """ - +import math import random import numpy as np +import argparse +import matplotlib.pyplot as plt -from .job import job_dict +from raps.job import job_dict JOB_NAMES = ["LAMMPS", "GROMACS", "VASP", "Quantum ESPRESSO", "NAMD",\ "OpenFOAM", "WRF", "AMBER", "CP2K", "nek5000", "CHARMM",\ @@ -42,7 +44,7 @@ ACCT_NAMES = ["ACT01", "ACT02", "ACT03", "ACT04", "ACT05", "ACT06", "ACT07",\ MAX_PRIORITY = 500000 -from .utils import truncated_normalvariate, determine_state, next_arrival, truncated_weibull +from raps.utils import truncated_normalvariate, determine_state, next_arrival, truncated_weibull class Workload: @@ -57,14 +59,143 @@ class Workload: gpu_trace = gpu_util * np.ones(int(wall_time) // trace_quanta) return (cpu_trace, gpu_trace) - def generate_random_jobs(self, num_jobs: int) -> list[list[any]]: - """ Generate random jobs with specified number of jobs. """ + def generate_uniform_jobs(self, *, num_jobs) -> list[list[any]]: + print("TODO Implement propper!") + jobs = [] + partition = random.choice(self.partitions) + config = self.config_map[partition] + + for job_index in range(num_jobs): + + time_to_next_job = next_arrival(1 / config['JOB_ARRIVAL_TIME']) + + nodes_required = random.randint(1, config['MAX_NODES_PER_JOB']) + name = random.choice(JOB_NAMES) + account = random.choice(ACCT_NAMES) + cpu_util = random.random() * config['CPUS_PER_NODE'] + gpu_util = random.random() * config['GPUS_PER_NODE'] + mu = config["MIN_WALL_TIME"] * 1.0 + sigma = 4.0 + wall_time = truncated_normalvariate(mu, sigma, config['MIN_WALL_TIME'], config['MAX_WALL_TIME']) // 3600 * 3600 + time_limit = truncated_normalvariate(mu, sigma, wall_time, config['MAX_WALL_TIME']) // 3600 * 3600 + end_state = determine_state(config['JOB_END_PROBS']) + cpu_trace, gpu_trace = self.compute_traces(cpu_util, gpu_util, wall_time, config['TRACE_QUANTA']) + priority = random.randint(0, MAX_PRIORITY) + net_tx, net_rx = [], [] + jobs.append(job_dict(nodes_required=nodes_required, name=name, + account=account, cpu_trace=cpu_trace, + gpu_trace=gpu_trace, ntx_trace=net_tx, + nrx_trace=net_rx, end_state=end_state, + id=job_index, priority=priority, + partition=partition, + submit_time=time_to_next_job - 100, + time_limit=time_limit, + start_time=time_to_next_job, + end_time=time_to_next_job + wall_time, + wall_time=wall_time, trace_time=wall_time, + trace_start_time=0, trace_end_time=wall_time)) + return jobs + + def generate_normal_jobs(self, *, num_jobs) -> list[list[any]]: + print("TODO Implement propper!") + jobs = [] + partition = random.choice(self.partitions) + config = self.config_map[partition] + + for job_index in range(num_jobs): + + time_to_next_job = next_arrival(1 / config['JOB_ARRIVAL_TIME']) + + nodes_required = random.randint(1, config['MAX_NODES_PER_JOB']) + name = random.choice(JOB_NAMES) + account = random.choice(ACCT_NAMES) + cpu_util = random.random() * config['CPUS_PER_NODE'] + gpu_util = random.random() * config['GPUS_PER_NODE'] + mu = config["MIN_WALL_TIME"] * 1.0 + sigma = 4.0 + wall_time = truncated_normalvariate(mu, sigma, config['MIN_WALL_TIME'], config['MAX_WALL_TIME']) // 3600 * 3600 + time_limit = truncated_normalvariate(mu, sigma, wall_time, config['MAX_WALL_TIME']) // 3600 * 3600 + end_state = determine_state(config['JOB_END_PROBS']) + cpu_trace, gpu_trace = self.compute_traces(cpu_util, gpu_util, wall_time, config['TRACE_QUANTA']) + priority = random.randint(0, MAX_PRIORITY) + net_tx, net_rx = [], [] + jobs.append(job_dict(nodes_required=nodes_required, name=name, + account=account, cpu_trace=cpu_trace, + gpu_trace=gpu_trace, ntx_trace=net_tx, + nrx_trace=net_rx, end_state=end_state, + id=job_index, priority=priority, + partition=partition, + submit_time=time_to_next_job - 100, + time_limit=time_limit, + start_time=time_to_next_job, + end_time=time_to_next_job + wall_time, + wall_time=wall_time, trace_time=wall_time, + trace_start_time=0, trace_end_time=wall_time)) + return jobs + + def generate_weibull_jobs(self, *, shape, scale, num_jobs) -> list[list[any]]: + print("TODO Implement propper!") jobs = [] + partition = random.choice(self.partitions) + config = self.config_map[partition] + for job_index in range(num_jobs): + + time_to_next_job = next_arrival(1 / config['JOB_ARRIVAL_TIME']) + + nodes_required = random.randint(1, config['MAX_NODES_PER_JOB']) + name = random.choice(JOB_NAMES) + account = random.choice(ACCT_NAMES) + cpu_util = random.random() * config['CPUS_PER_NODE'] + gpu_util = random.random() * config['GPUS_PER_NODE'] + mu = config["MIN_WALL_TIME"] * 1.0 + sigma = 4.0 + #wall_time = truncated_normalvariate(mu, sigma, config['MIN_WALL_TIME'], config['MAX_WALL_TIME']) // 3600 * 3600 + + wall_time = truncated_weibull( + (config['MAX_WALL_TIME'] // 2) + config['MIN_WALL_TIME'], 1, + #(config['MAX_WALL_TIME'] // 4) + config['MIN_WALL_TIME'], + config['MIN_WALL_TIME'],config['MAX_WALL_TIME']) // 60 * 60 # to 1 minute + + #time_limit = truncated_weibull(mu, sigma, wall_time, config['MAX_WALL_TIME']) // 300 * 300 # to 5 minutes + time_limit = truncated_weibull(config['MAX_WALL_TIME'] // 2 + config['MIN_WALL_TIME'], 1, wall_time, config['MAX_WALL_TIME']) // 300 * 300 # to 5 minutes + + + #time_limit = truncated_normalvariate(mu, sigma, wall_time, config['MAX_WALL_TIME']) // 3600 * 3600 + end_state = determine_state(config['JOB_END_PROBS']) + cpu_trace, gpu_trace = self.compute_traces(cpu_util, gpu_util, wall_time, config['TRACE_QUANTA']) + priority = random.randint(0, MAX_PRIORITY) + net_tx, net_rx = [], [] + jobs.append(job_dict(nodes_required=nodes_required, name=name, + account=account, cpu_trace=cpu_trace, + gpu_trace=gpu_trace, ntx_trace=net_tx, + nrx_trace=net_rx, end_state=end_state, + id=job_index, priority=priority, + partition=partition, + submit_time=time_to_next_job - 100, + time_limit=time_limit, + start_time=time_to_next_job, + end_time=time_to_next_job + wall_time, + wall_time=wall_time, trace_time=wall_time, + trace_start_time=0, trace_end_time=wall_time)) + return jobs + + + + def generate_random_jobs(self, args) -> list[list[any]]: + """ Generate random jobs with specified number of jobs. """ + + partition = random.choice(self.partitions) + config = self.config_map[partition] + if args.mu is None: + mu = (config['MAX_WALL_TIME'] + config['MIN_WALL_TIME']) / 2 + if args.sigma is None: + sigma = (config['MAX_WALL_TIME'] - config['MIN_WALL_TIME']) / 6 + + jobs = [] + for job_index in range(args.numjobs): # Randomly select a partition - partition = random.choice(self.partitions) # Get the corresponding config for the selected partition - config = self.config_map[partition] wes_random = False if wes_random: nodes_required = random.randint(1, config['MAX_NODES_PER_JOB']) @@ -72,8 +203,6 @@ class Workload: account = random.choice(ACCT_NAMES) cpu_util = random.random() * config['CPUS_PER_NODE'] gpu_util = random.random() * config['GPUS_PER_NODE'] - mu = (config['MAX_WALL_TIME'] + config['MIN_WALL_TIME']) / 2 - sigma = (config['MAX_WALL_TIME'] - config['MIN_WALL_TIME']) / 6 wall_time = truncated_normalvariate(mu, sigma, config['MIN_WALL_TIME'], config['MAX_WALL_TIME']) // 3600 * 3600 time_limit = truncated_normalvariate(mu, sigma, wall_time, config['MAX_WALL_TIME']) // 3600 * 3600 end_state = determine_state(config['JOB_END_PROBS']) @@ -88,10 +217,14 @@ class Workload: account = random.choice(ACCT_NAMES) cpu_util = random.random() * config['CPUS_PER_NODE'] gpu_util = random.random() * config['GPUS_PER_NODE'] - mu = (config['MAX_WALL_TIME'] + config['MIN_WALL_TIME']) / 2 - sigma = (config['MAX_WALL_TIME'] - config['MIN_WALL_TIME']) / 6 - wall_time = truncated_weibull(3 * config['MIN_WALL_TIME'],0.75,config['MIN_WALL_TIME'],config['MAX_WALL_TIME']) // 60 * 60 # to 1 minute - time_limit = truncated_normalvariate(mu, sigma, wall_time, config['MAX_WALL_TIME']) // 300 * 300 # to 5 minutes + #wall_time = truncated_weibull((config['MAX_WALL_TIME']/4)*3+config['MIN_WALL_TIME'],0.5,config['MIN_WALL_TIME'],config['MAX_WALL_TIME']) // 60 * 60 # to 1 minute + wall_time = truncated_weibull( + (config['MAX_WALL_TIME'] // 2) + config['MIN_WALL_TIME'], 1, + #(config['MAX_WALL_TIME'] // 4) + config['MIN_WALL_TIME'], + config['MIN_WALL_TIME'],config['MAX_WALL_TIME']) // 60 * 60 # to 1 minute + + #time_limit = truncated_weibull(mu, sigma, wall_time, config['MAX_WALL_TIME']) // 300 * 300 # to 5 minutes + time_limit = truncated_weibull(config['MAX_WALL_TIME'] // 2 + config['MIN_WALL_TIME'], 1, wall_time, config['MAX_WALL_TIME']) // 300 * 300 # to 5 minutes end_state = determine_state(config['JOB_END_PROBS']) cpu_trace, gpu_trace = self.compute_traces(cpu_util, gpu_util, wall_time, config['TRACE_QUANTA']) if nodes_required < max_nodes * .10: @@ -122,10 +255,34 @@ class Workload: return jobs + def synthetic(self, **kwargs): + args = kwargs.get('args',None) + print("ARGS") + print(args) + num_jobs = args.numjobs + #for key,value in kwargs.items(): + # print(key,value) + #print("HERE") + #print(sum(kwargs.get('multimodal'))) + jobs = [] + if len(args.distribution) != 1 and sum(args.multimodal) != 1.0: + raise Exception(f"Sum of --multimodal != 1.0 : {args.multimodal} == {sum(args.multimodal)}") + for dist,percentage in zip(args.distribution,args.multimodal): + print(args.distribution) + if "uniform" in args.distribution: + jobs.extend(self.generate_uniform_jobs(num_jobs=int(percentage * num_jobs))) + elif "weibull" in args.distribution: + jobs.extend(self.generate_weibull_jobs(shape=args.dist_shape,scale=args.dist_scale,num_jobs=int(percentage * num_jobs))) + elif "normal" in args.distribution: + jobs.extend(self.generate_normal_jobs(num_jobs=int(percentage * num_jobs))) + else: + pass + return jobs + def random(self, **kwargs): """ Generate random workload """ - num_jobs = kwargs.get('num_jobs', 0) - return self.generate_random_jobs(num_jobs=num_jobs) + args = kwargs.get('args',None) + return self.generate_random_jobs(args=args) def peak(self, **kwargs): """Peak power test for multiple partitions""" @@ -334,3 +491,91 @@ class Workload: jobs.append(job_info) return jobs + + +def plot_job_hist(jobs): + + y = [y['nodes_required'] for y in jobs] + x = [x['wall_time'] for x in jobs] + x2 = [x['time_limit'] for x in jobs] + fig, axs = plt.subplots(2, 2, gridspec_kw={'width_ratios': (4, 1), 'height_ratios': (1, 4)}) + # Remove space between subplots + fig.subplots_adjust(wspace=0, hspace=0) + # Create scatter plot + for i in range(len(x)): + axs[1,0].plot([x[i],x2[i]],[y[i],y[i]],color='lightblue',zorder=1) + axs[1, 0].scatter(x2, y,marker='.',c='lightblue',zorder=2) + axs[1, 0].scatter(x, y,zorder=3) + + axs[0, 0].hist(x2,bins=max(1,min(100,(max(x2) - min(x)))), orientation='vertical',color='lightblue') + axs[0, 0].hist(x,bins=max(1,min(100,(max(x2) - min(x)))), orientation='vertical') + #print(x) + axs[1, 0].sharex(axs[0,0]) + + axs[1, 1].hist(y,bins=max(1,min(100,(max(y) - min(y)))), orientation='horizontal') + axs[1, 0].sharey(axs[1,1]) + + # Remove ticks + axs[0, 0].set_xticks([]) + #axs[0, 0].set_yticks([]) + #axs[1, 1].set_xticks([]) + axs[1, 1].set_yticks([]) + #axs[0, 1].set_xticks([]) + #axs[0, 1].set_yticks([]) + #axs[0, 1].set_yticks([]) + axs[0, 1].spines['top'].set_color('white') + axs[0, 1].set_yticks([]) + axs[0, 1].set_xticks([]) + #axs[0, 1].spines['bottom'].set_color('white') + #axs[0, 1].spines['left'].set_color('white') + axs[0, 1].spines['right'].set_color('white') + + axs[1,0].set_ylabel("nodes [N]") + axs[1,0].set_xlabel("wall time [hh:mm]") + #axs[1,0].set_yticklabels([str(n).zfill(2) + ':00' for n in np.arange(min(y)//3600, max(y)//3600, 1)]) + minx_s = 0 + maxx_s = max(x2) + x_label_mins = [n for n in np.arange(minx_s // 60 ,maxx_s // 60 )] + x_label_ticks = [n * 60 for n in x_label_mins[0::60]] + x_label_str = [str(x1).zfill(2) + ":" + str(x2).zfill(2) for + (x1,x2) in [(n // 60,n % 60) for + n in x_label_mins[0::60]]] + print(x_label_str) + axs[1,0].set_xticks(x_label_ticks,x_label_str) + + miny = min(y) + maxy = max(y) + y_ticks = np.arange(0,maxy,maxy // 10) + y_ticks[0] = miny + axs[1,0].set_yticks(y_ticks) + + axs[0,0].tick_params(axis="x", labelbottom=False) + axs[1,1].tick_params(axis="y", labelleft=False) + + plt.show() + + +def add_workload_to_parser(parser): + + choices = ['random', 'benchmark', 'peak', 'idle','synthetic'] + parser.add_argument('-w', '--workload', type=str, choices=choices, default=choices[0], help='Type of synthetic workload') + + parser.add_argument("--multimodal", default=[1.0], type=float, nargs="+", help="Percentage to draw from each distribution (list of floats)e.g. '0.2 0.8' percentages apply in order to the list of the --distribution argument list.") + parser.add_argument("--distribution", type=str, nargs="+", choices=['uniform','weibull','normal'], default=None, help='Distribution type') + parser.add_argument("--dist_shape", nargs="+", type=float, required=False, help="Shape of weibull") + parser.add_argument("--dist_scale", nargs="+", type=float, required=False, help="Scale of weibull") + parser.add_argument("--mu", nargs="+", type=float, required=False, help="Mean (mu) for Normal distribution") + parser.add_argument("--sigma", nargs="+", type=float, required=False, help="Standard deviation (sigma) for Normal distribution") + + return parser + + +if __name__ == "__main__": + + from args import args + from raps.config import ConfigManager + config = ConfigManager(system_name=args.system).get_config() + + workload = Workload(config) + jobs = getattr(workload, args.workload)(args=args) + plot_job_hist(jobs) -- GitLab From 3b06d23d7bb33bf74da40a145c32cf6c55f807f1 Mon Sep 17 00:00:00 2001 From: Matthias Maiterth Date: Tue, 20 May 2025 14:47:57 -0400 Subject: [PATCH 078/388] Implement some code to study job slowdown caused by network congestion --- raps/engine.py | 13 ++++++ raps/job.py | 117 +++++++++++++++++++++++++++++++++++++++++++----- raps/network.py | 19 ++++++++ 3 files changed, 139 insertions(+), 10 deletions(-) diff --git a/raps/engine.py b/raps/engine.py index b836271..29aa0ad 100644 --- a/raps/engine.py +++ b/raps/engine.py @@ -218,6 +218,19 @@ class Engine: net_rx = get_utilization(job.nrx_trace, time_quanta_index) net_util = network_utilization(net_tx, net_rx) net_utils.append(net_util) + + # Get the maximum allowed bandwidth from the configuration. + network_congestion_threshold = self.config.get('NETWORK_MAX_BW', 100.0) + if net_util > network_congestion_threshold: + # Use our network helper functions to get current bandwidth usage. + current_bw = get_current_bandwidth_usage(link_id="link_1") + dilation_factor = network_dilation_factor(current_bw, network_congestion_threshold) + # Optionally, only apply dilation once per job to avoid compounding the effect. + if not hasattr(job, 'network_dilated') or not job.network_dilated: + print(f"Applying dilation factor {dilation_factor:.2f} to job {job.id} due to network congestion") + job.apply_dilation(dilation_factor) + job.network_dilated = True + else: net_utils.append(0) diff --git a/raps/job.py b/raps/job.py index 09eaed4..15bc9a0 100644 --- a/raps/job.py +++ b/raps/job.py @@ -1,3 +1,4 @@ +import numpy as np from enum import Enum import numpy as np @@ -11,11 +12,13 @@ Implementing such using something like: job = SimpleNamespace(**job_dict(...)) """ -def job_dict(*,nodes_required, name, account, \ - cpu_trace, gpu_trace, ntx_trace, nrx_trace, \ + +def job_dict(*, nodes_required, name, account, + cpu_trace, gpu_trace, ntx_trace, nrx_trace, end_state, scheduled_nodes=None, id, priority=0, partition=0, submit_time=0, time_limit=0, start_time=0, end_time=0, - wall_time=0, trace_time=0, trace_start_time=0,trace_end_time=0, trace_missing_values=False): + wall_time=0, trace_time=0, trace_start_time=0, trace_end_time=0, + trace_missing_values=False): """ Return job info dictionary """ return { 'nodes_required': nodes_required, @@ -39,11 +42,35 @@ def job_dict(*,nodes_required, name, account, \ 'trace_time': trace_time, 'trace_start_time': trace_start_time, 'trace_end_time': trace_end_time, - 'trace_missing_values': trace_missing_values - + 'trace_missing_values': trace_missing_values, + 'dilated': False } +def dilate_trace(trace, factor): + """ + Scale a trace in the time dimension by the given factor. + + Parameters: + - trace (list of float): the original trace values. + - factor (float): the dilation factor; >1 to slow down (stretch) and <1 to speed up (compress). + + Returns: + - list of float: the dilated trace. + """ + if trace is None or len(trace) == 0: + return trace + original_length = len(trace) + # Compute the new length (rounding to the nearest integer) + new_length = int(np.round(original_length * factor)) + # Create arrays for the old and new indices + old_indices = np.linspace(0, original_length - 1, num=original_length) + new_indices = np.linspace(0, original_length - 1, num=new_length) + # Use linear interpolation to compute the new trace values + new_trace = np.interp(new_indices, old_indices, trace).tolist() + return new_trace + + class JobState(Enum): """Enumeration for job states.""" RUNNING = 'R' @@ -140,15 +167,26 @@ class Job: return cls._id_counter def statistics(self): - """ Derive job statistics from the Job Class and return - """ + """ Derive job statistics from the Job Class and return """ return JobStatistics(self) + def apply_dilation(self, factor): + """ + Apply a dilation factor to the job’s execution traces and wall time. + + Parameters: + - factor (float): the dilation factor; >1 to slow down (lengthen the traces) and <1 to speed up. + """ + self.cpu_trace = dilate_trace(self.cpu_trace, factor) + self.gpu_trace = dilate_trace(self.gpu_trace, factor) + self.ntx_trace = dilate_trace(self.ntx_trace, factor) + self.nrx_trace = dilate_trace(self.nrx_trace, factor) + self.wall_time = int(np.round(self.wall_time * factor)) + + class JobStatistics: - """ - Reduced class for handling statistics after the job has finished. - """ + """ Reduced class for handling statistics after the job has finished. """ def __init__(self,job): self.id = job.id @@ -203,3 +241,62 @@ class JobStatistics: self.avg_node_power = sum(job.power_history) / len(job.power_history) / self.num_nodes self.max_node_power = max(job.power_history) / self.num_nodes self.energy = self.run_time * self.avg_node_power * self.num_nodes + + +if __name__ == "__main__": + import random + + # Each sample in the trace represents 15 seconds. + trace_quanta = 15 # seconds per sample + wall_time = 600 # total job wall time in seconds (600s = 10 minutes) + num_samples = wall_time // trace_quanta # should be 40 samples + + # Generate a random GPU trace (values between 0 and 4 for 4 GPUs total) + gpu_trace = [random.uniform(0, 4) for _ in range(num_samples)] + # Generate a random CPU trace (values between 0 and 1) + cpu_trace = [random.uniform(0, 1) for _ in range(num_samples)] + # Dummy network traces + ntx_trace = [random.uniform(0, 10) for _ in range(num_samples)] + nrx_trace = [random.uniform(0, 10) for _ in range(num_samples)] + + # Create a job dictionary using the existing job_dict helper. + jdict = job_dict( + nodes_required=1, + name="test_job", + account="test_account", + cpu_trace=cpu_trace, + gpu_trace=gpu_trace, + ntx_trace=ntx_trace, + nrx_trace=nrx_trace, + wall_time=wall_time, + end_state="", + scheduled_nodes=[], + time_offset=0, + job_id=0 + ) + + # Instantiate the Job. + job_instance = Job(jdict, current_time=0) + + # Print original job properties. + print("Original wall_time:", job_instance.wall_time) + print("Original cpu_trace length:", len(job_instance.cpu_trace)) + print("Original gpu_trace length:", len(job_instance.gpu_trace)) + + # Apply a dilation factor, e.g., 1.5 for a 50% slowdown (traces become 50% longer) + dilation_factor = 1.5 + job_instance.apply_dilation(dilation_factor) + + # Calculate the expected new lengths. + expected_samples = int(np.round(num_samples * dilation_factor)) + expected_wall_time = int(np.round(wall_time * dilation_factor)) + + # Print the dilated job properties. + print("\nAfter applying a dilation factor of", dilation_factor) + print("New wall_time:", job_instance.wall_time, "(expected:", expected_wall_time, ")") + print("New cpu_trace length:", len(job_instance.cpu_trace), "(expected:", expected_samples, ")") + print("New gpu_trace length:", len(job_instance.gpu_trace), "(expected:", expected_samples, ")") + + # Optionally, print a few sample values from the new traces. + print("\nSample cpu_trace values:", job_instance.cpu_trace[:5]) + print("Sample gpu_trace values:", job_instance.gpu_trace[:5]) diff --git a/raps/network.py b/raps/network.py index ddcfbc1..2c83110 100644 --- a/raps/network.py +++ b/raps/network.py @@ -6,3 +6,22 @@ def network_utilization(tx, rx): tx_util = min(tx / TX_MAX, 1.0) # Clamp to 1.0 rx_util = min(rx / RX_MAX, 1.0) return (tx_util + rx_util) / 2.0 + +def network_dilation_factor(current_bw, max_bw): + """ + Calculate a dilation factor based on current network bandwidth usage. + + If current_bw is within limits, the factor is 1.0 (no slowdown). + If current_bw exceeds max_bw, the factor is current_bw/max_bw. + """ + if current_bw <= max_bw: + return 1.0 + else: + return current_bw / max_bw + +def get_current_bandwidth_usage(link_id): + """ + Placeholder function: In a real system, query the current bandwidth usage + for the given network link. Here we return a fixed value for demonstration. + """ + return 150.0 # e.g., 150 -- GitLab From e8cfea358e9e16fa9cfbcd114628268105040470 Mon Sep 17 00:00:00 2001 From: Wes Brewer Date: Sat, 5 Apr 2025 16:57:19 -0400 Subject: [PATCH 079/388] Add debug statements --- config/lassen/network.json | 5 +++++ raps/engine.py | 3 +++ 2 files changed, 8 insertions(+) create mode 100644 config/lassen/network.json diff --git a/config/lassen/network.json b/config/lassen/network.json new file mode 100644 index 0000000..7b0fdc5 --- /dev/null +++ b/config/lassen/network.json @@ -0,0 +1,5 @@ +{ + "NETWORK_MODEL": "capacity", + "UPLINK_CAPACITY": 1000, + "NETWORK_MAX_BW": 1000 +} diff --git a/raps/engine.py b/raps/engine.py index 29aa0ad..9106899 100644 --- a/raps/engine.py +++ b/raps/engine.py @@ -217,6 +217,9 @@ class Engine: net_tx = get_utilization(job.ntx_trace, time_quanta_index) net_rx = get_utilization(job.nrx_trace, time_quanta_index) net_util = network_utilization(net_tx, net_rx) + print("time:", self.current_time, "net util:", net_util) + print("jid", job.id, "net_tx", net_tx) + print("jid", job.id, "net_rx", net_tx) net_utils.append(net_util) # Get the maximum allowed bandwidth from the configuration. -- GitLab From d1332a969aba0cd0ef5afaa2f77c4c0788df43b6 Mon Sep 17 00:00:00 2001 From: Matthias Maiterth Date: Tue, 20 May 2025 15:22:23 -0400 Subject: [PATCH 080/388] Get basic job dilation working during simulation --- config/lassen/network.json | 4 ++-- raps/config.py | 2 +- raps/dataloaders/lassen.py | 23 +++++++++++++++++++++-- raps/engine.py | 20 +++++++++++++------- raps/job.py | 1 + raps/network.py | 9 +++------ 6 files changed, 41 insertions(+), 18 deletions(-) diff --git a/config/lassen/network.json b/config/lassen/network.json index 7b0fdc5..c8a2020 100644 --- a/config/lassen/network.json +++ b/config/lassen/network.json @@ -1,5 +1,5 @@ { "NETWORK_MODEL": "capacity", - "UPLINK_CAPACITY": 1000, - "NETWORK_MAX_BW": 1000 + "UPLINK_CAPACITY": 10, + "NETWORK_MAX_BW": 20000 } diff --git a/raps/config.py b/raps/config.py index 51e7d86..909f9f6 100644 --- a/raps/config.py +++ b/raps/config.py @@ -15,7 +15,7 @@ class ConfigManager: def load_system_config(self, system_name: str) -> None: base_path = CONFIG_PATH / system_name - config_files = ['system.json', 'power.json', 'scheduler.json'] + config_files = ['system.json', 'power.json', 'scheduler.json', 'network.json'] optional_files = ['cooling.json', 'uq.json'] for config_file in config_files + optional_files: diff --git a/raps/dataloaders/lassen.py b/raps/dataloaders/lassen.py index a3be849..326fbc8 100644 --- a/raps/dataloaders/lassen.py +++ b/raps/dataloaders/lassen.py @@ -171,8 +171,9 @@ def load_data_from_df(allocation_df, node_df, step_df, **kwargs): ib_tx = 4 * node_data['ib_tx'].sum() if node_data['ib_tx'].values.size > 0 else [] ib_rx = 4 * node_data['ib_rx'].sum() if node_data['ib_rx'].values.size > 0 else [] - #net_tx, net_rx = generate_network_sequences(ib_tx, ib_rx, samples, lambda_poisson=0.3) - net_tx, net_rx = [],[] # generate_network_sequences generates errors (e.g. -ff 800d -t 1d ) + # net_tx, net_rx = [],[] # generate_network_sequences generates errors (e.g. -ff 800d -t 1d ) + # net_tx, net_rx = generate_network_sequences(ib_tx, ib_rx, samples, lambda_poisson=0.3) + net_tx, net_rx = generate_network_sequences_avg(ib_tx, ib_rx, samples, lambda_poisson=0.3) # no priorities defined! priority = row.get('priority', 0) @@ -288,6 +289,24 @@ def generate_network_sequences(total_tx, total_rx, intervals, lambda_poisson): return tx_bursts, rx_bursts +def generate_network_sequences_avg(total_tx, total_rx, intervals, lambda_poisson): + + if not total_tx or not total_rx: + return [], [] + + # Generate sporadic bursts using a Poisson distribution (shared for both tx and rx) + #burst_intervals = np.random.poisson(lam=lambda_poisson, size=intervals) + + # Ensure some intervals have no traffic (both tx and rx will share zero intervals) + #burst_intervals = np.where(burst_intervals > 0, burst_intervals, 0) + + # Adjust bursts for both tx and rx + tx_bursts = [total_tx // intervals] * intervals + rx_bursts = [total_rx // intervals] * intervals + + return tx_bursts, rx_bursts + + def node_index_to_name(index: int, config: dict): """ Converts an index value back to an name string based on system configuration. """ return f"node{index:04d}" diff --git a/raps/engine.py b/raps/engine.py index 9106899..a7a212a 100644 --- a/raps/engine.py +++ b/raps/engine.py @@ -5,7 +5,7 @@ import numpy as np from .job import Job, JobState from .policy import PolicyType -from .network import network_utilization +from .network import network_utilization, get_current_bandwidth_usage, network_dilation_factor from .utils import summarize_ranges, expand_ranges, get_utilization from .utils import sum_values, min_value, max_value from .resmgr import ResourceManager @@ -214,25 +214,31 @@ class Engine: net_util = 0 if (isinstance(job.ntx_trace,list) or isinstance(job.ntx_trace,np.ndarray)) and len(job.ntx_trace) and (isinstance(job.nrx_trace,list) or isinstance(job.nrx_trace,list)) and len(job.nrx_trace): + max_link_bw = self.config.get('NETWORK_MAX_BW') net_tx = get_utilization(job.ntx_trace, time_quanta_index) net_rx = get_utilization(job.nrx_trace, time_quanta_index) - net_util = network_utilization(net_tx, net_rx) + net_util = network_utilization(net_tx, net_rx, max_link_bw) print("time:", self.current_time, "net util:", net_util) print("jid", job.id, "net_tx", net_tx) print("jid", job.id, "net_rx", net_tx) net_utils.append(net_util) # Get the maximum allowed bandwidth from the configuration. - network_congestion_threshold = self.config.get('NETWORK_MAX_BW', 100.0) - if net_util > network_congestion_threshold: + if net_util > 1: #network_congestion_threshold: + print(f"congested {net_util} > {max_link_bw}") + print(f"length of {len(job.gpu_trace)} before dilation") # Use our network helper functions to get current bandwidth usage. - current_bw = get_current_bandwidth_usage(link_id="link_1") - dilation_factor = network_dilation_factor(current_bw, network_congestion_threshold) + #current_bw = get_current_bandwidth_usage(link_id="link_1") + current_bw = net_tx + net_rx + dilation_factor = network_dilation_factor(current_bw, max_link_bw) + dilation_factor = min(dilation_factor, 2) # set max dilation factor # Optionally, only apply dilation once per job to avoid compounding the effect. - if not hasattr(job, 'network_dilated') or not job.network_dilated: + print("***", hasattr(job, 'network_dilated'), current_bw, max_link_bw, dilation_factor) #if not hasattr(job, 'network_dilated') or not job.network_dilated: + if not job.network_dilated: print(f"Applying dilation factor {dilation_factor:.2f} to job {job.id} due to network congestion") job.apply_dilation(dilation_factor) job.network_dilated = True + print(f"length of {len(job.gpu_trace)} after dilation") else: net_utils.append(0) diff --git a/raps/job.py b/raps/job.py index 15bc9a0..be7506b 100644 --- a/raps/job.py +++ b/raps/job.py @@ -182,6 +182,7 @@ class Job: self.ntx_trace = dilate_trace(self.ntx_trace, factor) self.nrx_trace = dilate_trace(self.nrx_trace, factor) self.wall_time = int(np.round(self.wall_time * factor)) + self.end_time = self.start_time + self.wall_time diff --git a/raps/network.py b/raps/network.py index 2c83110..9b86083 100644 --- a/raps/network.py +++ b/raps/network.py @@ -1,10 +1,7 @@ -TX_MAX = 10000 -RX_MAX = 20000 - -def network_utilization(tx, rx): +def network_utilization(tx, rx, MAX): """Compute average network utilization""" - tx_util = min(tx / TX_MAX, 1.0) # Clamp to 1.0 - rx_util = min(rx / RX_MAX, 1.0) + tx_util = float(tx) / MAX + rx_util = float(rx) / MAX return (tx_util + rx_util) / 2.0 def network_dilation_factor(current_bw, max_bw): -- GitLab From e0c86a475b74bacdd269601b62440c4cd2acd498 Mon Sep 17 00:00:00 2001 From: Wes Brewer Date: Tue, 6 May 2025 13:04:55 -0400 Subject: [PATCH 081/388] Output average transmit/receive --- config/lassen/network.json | 2 +- raps/telemetry.py | 24 ++++++++++++++++++++++++ 2 files changed, 25 insertions(+), 1 deletion(-) diff --git a/config/lassen/network.json b/config/lassen/network.json index c8a2020..ef660da 100644 --- a/config/lassen/network.json +++ b/config/lassen/network.json @@ -1,5 +1,5 @@ { "NETWORK_MODEL": "capacity", "UPLINK_CAPACITY": 10, - "NETWORK_MAX_BW": 20000 + "NETWORK_MAX_BW": 9000000000 } diff --git a/raps/telemetry.py b/raps/telemetry.py index a4001d9..be4d803 100644 --- a/raps/telemetry.py +++ b/raps/telemetry.py @@ -121,6 +121,30 @@ if __name__ == "__main__": print(f'Nodes required (max): {np.max(nr_list)}') print(f'Nodes required (std): {np.std(nr_list):.2f}') + # ——— compute avg network traces ——— + ntx_means = [] + nrx_means = [] + for job_vec in jobs: + ntx = np.array(job_vec.get('ntx_trace', [])) + nrx = np.array(job_vec.get('nrx_trace', [])) + + # only if there’s at least one valid sample + if ntx.size > 0 and not np.all(np.isnan(ntx)): + ntx_means.append(np.nanmean(ntx)) + if nrx.size > 0 and not np.all(np.isnan(nrx)): + nrx_means.append(np.nanmean(nrx)) + + if ntx_means: + print(f'Average ntx_trace per job: {np.mean(ntx_means):.2f}') + else: + print('No valid ntx_trace data found.') + + if nrx_means: + print(f'Average nrx_trace per job: {np.mean(nrx_means):.2f}') + else: + print('No valid nrx_trace data found.') + # ———————————————————————————— + if args.plot: #plot_nodes_histogram(nr_list) #plot_submit_times(submit_times, nr_list) -- GitLab From bcad098a4890669d9cc9ea799816c2e09c0c91c4 Mon Sep 17 00:00:00 2001 From: Wes Brewer Date: Tue, 13 May 2025 13:37:16 -0400 Subject: [PATCH 082/388] Minor cleanup --- raps/engine.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/raps/engine.py b/raps/engine.py index a7a212a..c1e889f 100644 --- a/raps/engine.py +++ b/raps/engine.py @@ -225,7 +225,7 @@ class Engine: # Get the maximum allowed bandwidth from the configuration. if net_util > 1: #network_congestion_threshold: - print(f"congested {net_util} > {max_link_bw}") + print(f"congested net_util: {net_util}, max_link_bw: {max_link_bw}") print(f"length of {len(job.gpu_trace)} before dilation") # Use our network helper functions to get current bandwidth usage. #current_bw = get_current_bandwidth_usage(link_id="link_1") @@ -233,7 +233,8 @@ class Engine: dilation_factor = network_dilation_factor(current_bw, max_link_bw) dilation_factor = min(dilation_factor, 2) # set max dilation factor # Optionally, only apply dilation once per job to avoid compounding the effect. - print("***", hasattr(job, 'network_dilated'), current_bw, max_link_bw, dilation_factor) #if not hasattr(job, 'network_dilated') or not job.network_dilated: + print("***", hasattr(job, 'network_dilated'), current_bw, max_link_bw, dilation_factor) + #if not hasattr(job, 'network_dilated') or not job.network_dilated: if not job.network_dilated: print(f"Applying dilation factor {dilation_factor:.2f} to job {job.id} due to network congestion") job.apply_dilation(dilation_factor) -- GitLab From 06f21deb8e0153a2467f6bf761a60fca140d8e02 Mon Sep 17 00:00:00 2001 From: Matthias Maiterth Date: Tue, 20 May 2025 15:29:35 -0400 Subject: [PATCH 083/388] Report network stats on console --- README.md | 8 ++++++++ raps/engine.py | 49 +++++++++++++++++++++++++++++++++++++++++-------- raps/ui.py | 32 +++++++++++++++++++++++++++++--- 3 files changed, 78 insertions(+), 11 deletions(-) diff --git a/README.md b/README.md index 8f072ac..9f08dcb 100644 --- a/README.md +++ b/README.md @@ -44,6 +44,14 @@ For Adastra MI250 supercomputer, download 'AdastaJobsMI250_15days.parquet' from # Adastra MI250 python main.py --system adastraMI250 -f AdastaJobsMI250_15days.parquet +## Perform Network Simulation + +Lassen is one of the few datasets that has networking data. See `raps/dataloaders/lassen.py` for how to +get the datasets. To run a network simulation, use the following command: + + python main.py -f ~/data/lassen/Lassen-Supercomputer-Job-Dataset --system lassen --reschedule poisson -t 1h -d + + ## Snapshot of extracted workload data To reduce the expense of extracting the needed data from the telemetry parquet files, diff --git a/raps/engine.py b/raps/engine.py index c1e889f..78db796 100644 --- a/raps/engine.py +++ b/raps/engine.py @@ -28,6 +28,9 @@ class TickData: fmu_outputs: Optional[dict] num_active_nodes: int num_free_nodes: int + avg_net_tx: float + avg_net_rx: float + avg_net_util: float class Engine: @@ -57,6 +60,9 @@ class Engine: self.sys_util_history = [] self.scheduler_queue_history = [] self.scheduler_running_history = [] + self.avg_net_tx = [] + self.avg_net_rx = [] + self.avg_net_util = [] # Get scheduler type from command-line args or default scheduler_type = kwargs.get('scheduler', 'default') @@ -160,8 +166,13 @@ class Engine: cpu_utils = [] gpu_utils = [] net_utils = [] + net_tx_list = [] + net_rx_list = [] if self.debug: print(f"Current Time: {self.current_time}") + for job in self.running: + if job.end_time == self.current_time: + job.state = JobState.COMPLETED for job in self.running: @@ -218,28 +229,36 @@ class Engine: net_tx = get_utilization(job.ntx_trace, time_quanta_index) net_rx = get_utilization(job.nrx_trace, time_quanta_index) net_util = network_utilization(net_tx, net_rx, max_link_bw) - print("time:", self.current_time, "net util:", net_util) - print("jid", job.id, "net_tx", net_tx) - print("jid", job.id, "net_rx", net_tx) + net_tx_list.append(net_tx) + net_rx_list.append(net_rx) + #net_utils.append(net_util) + if self.debug: + print("time:", self.current_time, "net util:", net_util) + print("jid", job.id, "net_tx", net_tx) + print("jid", job.id, "net_rx", net_tx) net_utils.append(net_util) # Get the maximum allowed bandwidth from the configuration. if net_util > 1: #network_congestion_threshold: - print(f"congested net_util: {net_util}, max_link_bw: {max_link_bw}") - print(f"length of {len(job.gpu_trace)} before dilation") + if self.debug: + print(f"congested net_util: {net_util}, max_link_bw: {max_link_bw}") + print(f"length of {len(job.gpu_trace)} before dilation") # Use our network helper functions to get current bandwidth usage. #current_bw = get_current_bandwidth_usage(link_id="link_1") current_bw = net_tx + net_rx dilation_factor = network_dilation_factor(current_bw, max_link_bw) dilation_factor = min(dilation_factor, 2) # set max dilation factor # Optionally, only apply dilation once per job to avoid compounding the effect. - print("***", hasattr(job, 'network_dilated'), current_bw, max_link_bw, dilation_factor) + if self.debug: + print("***", hasattr(job, 'network_dilated'), current_bw, max_link_bw, dilation_factor) #if not hasattr(job, 'network_dilated') or not job.network_dilated: if not job.network_dilated: - print(f"Applying dilation factor {dilation_factor:.2f} to job {job.id} due to network congestion") + if self.debug: + print(f"Applying dilation factor {dilation_factor:.2f} to job {job.id} due to network congestion") job.apply_dilation(dilation_factor) job.network_dilated = True - print(f"length of {len(job.gpu_trace)} after dilation") + if self.debug: + print(f"length of {len(job.gpu_trace)} after dilation") else: net_utils.append(0) @@ -309,6 +328,17 @@ class Engine: # Get a dataframe of the power data power_df = self.power_manager.get_power_df(rack_power, rack_loss) + # Compute network averages + n = len(net_utils) or 1 + avg_tx = sum(net_tx_list) / n + avg_rx = sum(net_rx_list) / n + avg_net = sum(net_utils) / n + + # Save network history + self.avg_net_tx.append(avg_tx) + self.avg_net_rx.append(avg_rx) + self.avg_net_util.append(avg_net) + tick_data = TickData( current_time=self.current_time, completed=None, @@ -323,6 +353,9 @@ class Engine: fmu_outputs=cooling_outputs, num_active_nodes=self.num_active_nodes, num_free_nodes=self.num_free_nodes, + avg_net_tx=avg_tx, + avg_net_rx=avg_rx, + avg_net_util=avg_net ) self.current_time += 1 diff --git a/raps/ui.py b/raps/ui.py index a1cd0f1..2e61422 100644 --- a/raps/ui.py +++ b/raps/ui.py @@ -139,7 +139,8 @@ class LayoutManager: # Update the layout self.layout["scheduled"].update(Panel(Align(table, align="center"))) - def update_status(self, time, nrun, nqueue, active_nodes, free_nodes, down_nodes): + def update_status(self, time, nrun, nqueue, active_nodes, free_nodes, down_nodes, + avg_net_tx, avg_net_rx, avg_net_util): """ Updates the status information table with the provided system status data. @@ -159,7 +160,11 @@ class LayoutManager: List of nodes that are down. """ # Define columns with header styles - columns = ["Time", "Jobs Running", "Jobs Queued", "Active Nodes", "Free Nodes", "Down Nodes"] + columns = [ + "Time", "Jobs Running", "Jobs Queued", + "Active Nodes", "Free Nodes", "Down Nodes", + "Net TX (Mbps)", "Net RX (Mbps)", "Net Util (%)" + ] table = Table(header_style="bold magenta", expand=True) for col in columns: table.add_column(col, justify="center") @@ -171,7 +176,10 @@ class LayoutManager: str(nqueue), str(active_nodes), str(free_nodes), - str(len(down_nodes)) + str(len(down_nodes)), + f"{avg_net_tx:.1e}", + f"{avg_net_rx:.1e}", + f"{avg_net_util * 100:.1f}%" ] # Add the row with the 'white' style applied to the whole row table.add_row(*row, style="white") @@ -424,6 +432,24 @@ class LayoutManager: self.render() self.update_progress(1) + self.update_scheduled_jobs(data.running + data.queue) + + self.update_status( + data.current_time, + len(data.running), + len(data.queue), + data.num_active_nodes, + data.num_free_nodes, + data.down_nodes, + data.avg_net_tx, + data.avg_net_rx, + data.avg_net_util, + ) + + self.update_power_array( + data.power_df, data.p_flops, data.g_flops_w, + data.system_util, uncertainties=uncertainties, + ) def render(self): if not self.debug: -- GitLab From 566bfe357d53ee2d681cbd3848dd453d4a0c95a5 Mon Sep 17 00:00:00 2001 From: Matthias Maiterth Date: Tue, 20 May 2025 15:34:19 -0400 Subject: [PATCH 084/388] Rename *dilation_factor to *slowdown_factor --- raps/engine.py | 18 +++++++++--------- raps/network.py | 2 +- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/raps/engine.py b/raps/engine.py index 78db796..af89630 100644 --- a/raps/engine.py +++ b/raps/engine.py @@ -246,17 +246,17 @@ class Engine: # Use our network helper functions to get current bandwidth usage. #current_bw = get_current_bandwidth_usage(link_id="link_1") current_bw = net_tx + net_rx - dilation_factor = network_dilation_factor(current_bw, max_link_bw) - dilation_factor = min(dilation_factor, 2) # set max dilation factor - # Optionally, only apply dilation once per job to avoid compounding the effect. + slowdown_factor = network_slowdown_factor(current_bw, max_link_bw) + slowdown_factor = min(slowdown_factor, 2) # set max dilation factor + # Optionally, only apply slowdown once per job to avoid compounding the effect. if self.debug: - print("***", hasattr(job, 'network_dilated'), current_bw, max_link_bw, dilation_factor) - #if not hasattr(job, 'network_dilated') or not job.network_dilated: - if not job.network_dilated: + print("***", hasattr(job, 'dilated'), current_bw, max_link_bw, slowdown_factor) + #if not hasattr(job, 'dilated') or not job.dilated: + if not job.dilated: if self.debug: - print(f"Applying dilation factor {dilation_factor:.2f} to job {job.id} due to network congestion") - job.apply_dilation(dilation_factor) - job.network_dilated = True + print(f"Applying slowdown factor {slowdown_factor:.2f} to job {job.id} due to network congestion") + job.apply_dilation(slowdown_factor) + job.dilated = True if self.debug: print(f"length of {len(job.gpu_trace)} after dilation") diff --git a/raps/network.py b/raps/network.py index 9b86083..66bb268 100644 --- a/raps/network.py +++ b/raps/network.py @@ -4,7 +4,7 @@ def network_utilization(tx, rx, MAX): rx_util = float(rx) / MAX return (tx_util + rx_util) / 2.0 -def network_dilation_factor(current_bw, max_bw): +def network_slowdown_factor(current_bw, max_bw): """ Calculate a dilation factor based on current network bandwidth usage. -- GitLab From 28acdc1cb00eaa2bffffee6897009ff6d2efcdf9 Mon Sep 17 00:00:00 2001 From: Wes Brewer Date: Wed, 14 May 2025 12:03:58 -0400 Subject: [PATCH 085/388] Make network.json optional config file --- raps/config.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/raps/config.py b/raps/config.py index 909f9f6..07bbdb7 100644 --- a/raps/config.py +++ b/raps/config.py @@ -15,8 +15,8 @@ class ConfigManager: def load_system_config(self, system_name: str) -> None: base_path = CONFIG_PATH / system_name - config_files = ['system.json', 'power.json', 'scheduler.json', 'network.json'] - optional_files = ['cooling.json', 'uq.json'] + config_files = ['system.json', 'power.json', 'scheduler.json'] + optional_files = ['cooling.json', 'uq.json', 'network.json'] for config_file in config_files + optional_files: file_path = base_path / config_file -- GitLab From 7b529816dfe207bdc277dfafc668c8f7259ff70e Mon Sep 17 00:00:00 2001 From: Wes Brewer Date: Wed, 14 May 2025 12:09:04 -0400 Subject: [PATCH 086/388] Remove network tx/rx from console update - just show net util --- raps/ui.py | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/raps/ui.py b/raps/ui.py index 2e61422..8265a11 100644 --- a/raps/ui.py +++ b/raps/ui.py @@ -139,8 +139,7 @@ class LayoutManager: # Update the layout self.layout["scheduled"].update(Panel(Align(table, align="center"))) - def update_status(self, time, nrun, nqueue, active_nodes, free_nodes, down_nodes, - avg_net_tx, avg_net_rx, avg_net_util): + def update_status(self, time, nrun, nqueue, active_nodes, free_nodes, down_nodes, avg_net_util): """ Updates the status information table with the provided system status data. @@ -162,8 +161,7 @@ class LayoutManager: # Define columns with header styles columns = [ "Time", "Jobs Running", "Jobs Queued", - "Active Nodes", "Free Nodes", "Down Nodes", - "Net TX (Mbps)", "Net RX (Mbps)", "Net Util (%)" + "Active Nodes", "Free Nodes", "Down Nodes", "Net Util (%)" ] table = Table(header_style="bold magenta", expand=True) for col in columns: @@ -177,8 +175,6 @@ class LayoutManager: str(active_nodes), str(free_nodes), str(len(down_nodes)), - f"{avg_net_tx:.1e}", - f"{avg_net_rx:.1e}", f"{avg_net_util * 100:.1f}%" ] # Add the row with the 'white' style applied to the whole row @@ -441,9 +437,7 @@ class LayoutManager: data.num_active_nodes, data.num_free_nodes, data.down_nodes, - data.avg_net_tx, - data.avg_net_rx, - data.avg_net_util, + data.avg_net_util ) self.update_power_array( -- GitLab From e074915e10d993cf346d29d51d2a968b2266a63f Mon Sep 17 00:00:00 2001 From: Matthias Maiterth Date: Tue, 20 May 2025 15:36:01 -0400 Subject: [PATCH 087/388] Color slowed down jobs in console with yellow --- config/lassen/network.json | 4 +++- raps/ui.py | 4 ++++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/config/lassen/network.json b/config/lassen/network.json index ef660da..02e19f9 100644 --- a/config/lassen/network.json +++ b/config/lassen/network.json @@ -1,5 +1,7 @@ { + "TOPOLOGY": "fat-tree", "NETWORK_MODEL": "capacity", "UPLINK_CAPACITY": 10, - "NETWORK_MAX_BW": 9000000000 + "NETWORK_MAX_BW": 9e9, + "LATENCY": 1 } diff --git a/raps/ui.py b/raps/ui.py index 8265a11..cc9b07f 100644 --- a/raps/ui.py +++ b/raps/ui.py @@ -133,6 +133,10 @@ class LayoutManager: nodes_display, convert_seconds(job.running_time) ] + + if job.dilated: + row = [f"[yellow]{x}[/yellow]" for x in row] + # Add the row with the 'white' style applied to the whole row table.add_row(*row, style="white") -- GitLab From c5efd1067a843fdd7620395b118d573491ef5e39 Mon Sep 17 00:00:00 2001 From: Matthias Maiterth Date: Tue, 20 May 2025 15:37:50 -0400 Subject: [PATCH 088/388] Fix the network utilization statistic which is reported to console --- raps/engine.py | 17 +++++++++-------- raps/network.py | 32 +++++++++++++++++++------------- raps/ui.py | 2 +- 3 files changed, 29 insertions(+), 22 deletions(-) diff --git a/raps/engine.py b/raps/engine.py index af89630..b995e48 100644 --- a/raps/engine.py +++ b/raps/engine.py @@ -5,7 +5,7 @@ import numpy as np from .job import Job, JobState from .policy import PolicyType -from .network import network_utilization, get_current_bandwidth_usage, network_dilation_factor +from .network import network_utilization, network_congestion, network_slowdown from .utils import summarize_ranges, expand_ranges, get_utilization from .utils import sum_values, min_value, max_value from .resmgr import ResourceManager @@ -165,6 +165,7 @@ class Engine: scheduled_nodes = [] cpu_utils = [] gpu_utils = [] + net_congs = [] net_utils = [] net_tx_list = [] net_rx_list = [] @@ -229,25 +230,24 @@ class Engine: net_tx = get_utilization(job.ntx_trace, time_quanta_index) net_rx = get_utilization(job.nrx_trace, time_quanta_index) net_util = network_utilization(net_tx, net_rx, max_link_bw) + net_cong = network_congestion(net_tx, net_rx, max_link_bw) net_tx_list.append(net_tx) net_rx_list.append(net_rx) - #net_utils.append(net_util) if self.debug: print("time:", self.current_time, "net util:", net_util) print("jid", job.id, "net_tx", net_tx) print("jid", job.id, "net_rx", net_tx) + net_congs.append(net_cong) net_utils.append(net_util) # Get the maximum allowed bandwidth from the configuration. - if net_util > 1: #network_congestion_threshold: + if net_cong > 1: #network_congestion_threshold: if self.debug: - print(f"congested net_util: {net_util}, max_link_bw: {max_link_bw}") + print(f"congested net_cong: {net_cong}, max_link_bw: {max_link_bw}") print(f"length of {len(job.gpu_trace)} before dilation") - # Use our network helper functions to get current bandwidth usage. - #current_bw = get_current_bandwidth_usage(link_id="link_1") current_bw = net_tx + net_rx - slowdown_factor = network_slowdown_factor(current_bw, max_link_bw) - slowdown_factor = min(slowdown_factor, 2) # set max dilation factor + slowdown_factor = network_slowdown(current_bw, max_link_bw) + slowdown_factor = min(slowdown_factor, 2) # set max slowdown factor # Optionally, only apply slowdown once per job to avoid compounding the effect. if self.debug: print("***", hasattr(job, 'dilated'), current_bw, max_link_bw, slowdown_factor) @@ -262,6 +262,7 @@ class Engine: else: net_utils.append(0) + net_congs.append(0) scheduled_nodes.append(job.scheduled_nodes) # ? cpu_utils.append(cpu_util) diff --git a/raps/network.py b/raps/network.py index 66bb268..2dcbaad 100644 --- a/raps/network.py +++ b/raps/network.py @@ -1,12 +1,25 @@ -def network_utilization(tx, rx, MAX): - """Compute average network utilization""" - tx_util = float(tx) / MAX - rx_util = float(rx) / MAX +def network_congestion(tx, rx, max_bw): + """ + Overload factor ≥0: average of send/recv NOT clamped. + >1.0 means you’re pushing above capacity. + """ + tx_util = float(tx) / max_bw + rx_util = float(rx) / max_bw return (tx_util + rx_util) / 2.0 -def network_slowdown_factor(current_bw, max_bw): + +def network_utilization(tx, rx, max_bw): + """ + True utilization in [0,1]: average of send/recv clamped to 100%. + """ + tx_u = min(float(tx) / max_bw, 1.0) + rx_u = min(float(rx) / max_bw, 1.0) + return (tx_u + rx_u) / 2.0 + + +def network_slowdown(current_bw, max_bw): """ - Calculate a dilation factor based on current network bandwidth usage. + Calculate a slowdown factor based on current network bandwidth usage. If current_bw is within limits, the factor is 1.0 (no slowdown). If current_bw exceeds max_bw, the factor is current_bw/max_bw. @@ -15,10 +28,3 @@ def network_slowdown_factor(current_bw, max_bw): return 1.0 else: return current_bw / max_bw - -def get_current_bandwidth_usage(link_id): - """ - Placeholder function: In a real system, query the current bandwidth usage - for the given network link. Here we return a fixed value for demonstration. - """ - return 150.0 # e.g., 150 diff --git a/raps/ui.py b/raps/ui.py index cc9b07f..c9c61b1 100644 --- a/raps/ui.py +++ b/raps/ui.py @@ -179,7 +179,7 @@ class LayoutManager: str(active_nodes), str(free_nodes), str(len(down_nodes)), - f"{avg_net_util * 100:.1f}%" + f"{avg_net_util * 100:.0f}%" ] # Add the row with the 'white' style applied to the whole row table.add_row(*row, style="white") -- GitLab From fa6380fe807b1a0b86974356325aa071242faa7a Mon Sep 17 00:00:00 2001 From: Matthias Maiterth Date: Tue, 20 May 2025 15:46:37 -0400 Subject: [PATCH 089/388] Report slowdown_per_job (SPJ) to console and to final stats --- config/lassen/network.json | 2 +- raps/engine.py | 72 ++++++++++++++++++++++++++++++++++++-- raps/ui.py | 10 +++--- 3 files changed, 76 insertions(+), 8 deletions(-) diff --git a/config/lassen/network.json b/config/lassen/network.json index 02e19f9..b243b4a 100644 --- a/config/lassen/network.json +++ b/config/lassen/network.json @@ -2,6 +2,6 @@ "TOPOLOGY": "fat-tree", "NETWORK_MODEL": "capacity", "UPLINK_CAPACITY": 10, - "NETWORK_MAX_BW": 9e9, + "NETWORK_MAX_BW": 10e9, "LATENCY": 1 } diff --git a/raps/engine.py b/raps/engine.py index b995e48..c8d3be6 100644 --- a/raps/engine.py +++ b/raps/engine.py @@ -31,6 +31,7 @@ class TickData: avg_net_tx: float avg_net_rx: float avg_net_util: float + slowdown_per_job: float class Engine: @@ -62,7 +63,8 @@ class Engine: self.scheduler_running_history = [] self.avg_net_tx = [] self.avg_net_rx = [] - self.avg_net_util = [] + self.net_util_history = [] + self.slowdown_history = [] # Get scheduler type from command-line args or default scheduler_type = kwargs.get('scheduler', 'default') @@ -171,6 +173,9 @@ class Engine: net_rx_list = [] if self.debug: print(f"Current Time: {self.current_time}") + + slowdown_factors = [] + for job in self.running: if job.end_time == self.current_time: job.state = JobState.COMPLETED @@ -259,6 +264,10 @@ class Engine: job.dilated = True if self.debug: print(f"length of {len(job.gpu_trace)} after dilation") + else: + slowdown_factor = 1 + + slowdown_factors.append(slowdown_factor) else: net_utils.append(0) @@ -335,10 +344,14 @@ class Engine: avg_rx = sum(net_rx_list) / n avg_net = sum(net_utils) / n + n = len(slowdown_factors) or 1 + slowdown_per_job = sum(slowdown_factors) / n + self.slowdown_history.append(slowdown_per_job) + # Save network history self.avg_net_tx.append(avg_tx) self.avg_net_rx.append(avg_rx) - self.avg_net_util.append(avg_net) + self.net_util_history.append(avg_net) tick_data = TickData( current_time=self.current_time, @@ -356,7 +369,8 @@ class Engine: num_free_nodes=self.num_free_nodes, avg_net_tx=avg_tx, avg_net_rx=avg_rx, - avg_net_util=avg_net + avg_net_util=avg_net, + slowdown_per_job=slowdown_per_job ) self.current_time += 1 @@ -434,6 +448,58 @@ class Engine: tick_data.completed = completed_jobs yield tick_data + + def get_stats(self): + """ Return output statistics """ + sum_values = lambda values: sum(x[1] for x in values) if values else 0 + min_value = lambda values: min(x[1] for x in values) if values else 0 + max_value = lambda values: max(x[1] for x in values) if values else 0 + num_samples = len(self.power_manager.history) if self.power_manager else 0 + + throughput = self.jobs_completed / self.timesteps * 3600 if self.timesteps else 0 # Jobs per hour + average_power_mw = sum_values(self.power_manager.history) / num_samples / 1000 if num_samples else 0 + average_loss_mw = sum_values(self.power_manager.loss_history) / num_samples / 1000 if num_samples else 0 + min_loss_mw = min_value(self.power_manager.loss_history) / 1000 if num_samples else 0 + max_loss_mw = max_value(self.power_manager.loss_history) / 1000 if num_samples else 0 + + loss_fraction = average_loss_mw / average_power_mw if average_power_mw else 0 + efficiency = 1 - loss_fraction if loss_fraction else 0 + total_energy_consumed = average_power_mw * self.timesteps / 3600 if self.timesteps else 0 # MW-hr + emissions = total_energy_consumed * 852.3 / 2204.6 / efficiency if efficiency else 0 + total_cost = total_energy_consumed * 1000 * self.config.get('POWER_COST', 0) # Total cost in dollars + + stats = { + 'num_samples': num_samples, + 'jobs completed': self.jobs_completed, + 'throughput': f'{throughput:.2f} jobs/hour', + 'jobs still running': [job.id for job in self.running], + 'jobs still in queue': [job.id for job in self.queue], + 'average power': f'{average_power_mw:.2f} MW', + 'min loss': f'{min_loss_mw:.2f} MW', + 'average loss': f'{average_loss_mw:.2f} MW', + 'max loss': f'{max_loss_mw:.2f} MW', + 'system power efficiency': f'{efficiency * 100:.2f}%', + 'total energy consumed': f'{total_energy_consumed:.2f} MW-hr', + 'carbon emissions': f'{emissions:.2f} metric tons CO2', + 'total cost': f'${total_cost:.2f}' + } + + if self.net_util_history: + mean_net_util = sum(self.net_util_history) / len(self.net_util_history) + else: + mean_net_util = 0.0 + + stats["avg network util"] = f"{mean_net_util*100:.2f}%" + + if self.slowdown_history: + avg_job_slow = sum(self.slowdown_history) / len(self.slowdown_history) + else: + avg_job_slow = 1.0 + stats["avg per-job slowdown"] = f"{avg_job_slow:.2f}x" + + return stats + + def get_job_history_dict(self): return self.job_history_dict diff --git a/raps/ui.py b/raps/ui.py index c9c61b1..ffff185 100644 --- a/raps/ui.py +++ b/raps/ui.py @@ -143,7 +143,7 @@ class LayoutManager: # Update the layout self.layout["scheduled"].update(Panel(Align(table, align="center"))) - def update_status(self, time, nrun, nqueue, active_nodes, free_nodes, down_nodes, avg_net_util): + def update_status(self, time, nrun, nqueue, active_nodes, free_nodes, down_nodes, avg_net_util, slowdown): """ Updates the status information table with the provided system status data. @@ -165,7 +165,7 @@ class LayoutManager: # Define columns with header styles columns = [ "Time", "Jobs Running", "Jobs Queued", - "Active Nodes", "Free Nodes", "Down Nodes", "Net Util (%)" + "Active Nodes", "Free Nodes", "Down Nodes", "Net Util (%)", "SPJ" ] table = Table(header_style="bold magenta", expand=True) for col in columns: @@ -179,7 +179,8 @@ class LayoutManager: str(active_nodes), str(free_nodes), str(len(down_nodes)), - f"{avg_net_util * 100:.0f}%" + f"{avg_net_util * 100:.0f}%", + f"{slowdown:.1f}x" ] # Add the row with the 'white' style applied to the whole row table.add_row(*row, style="white") @@ -441,7 +442,8 @@ class LayoutManager: data.num_active_nodes, data.num_free_nodes, data.down_nodes, - data.avg_net_util + data.avg_net_util, + data.slowdown_per_job ) self.update_power_array( -- GitLab From 3b85a15995767e4d8d74eef05a8d2c660f68bdb6 Mon Sep 17 00:00:00 2001 From: Matthias Maiterth Date: Wed, 21 May 2025 10:49:18 -0400 Subject: [PATCH 090/388] Fixes after rebase and before pushing to new branch. --- main.py | 2 +- raps/dataloaders/lassen.py | 4 ++-- raps/engine.py | 5 +++++ raps/job.py | 4 +++- raps/telemetry.py | 8 ++++---- raps/ui.py | 11 ++++++----- 6 files changed, 21 insertions(+), 13 deletions(-) diff --git a/main.py b/main.py index 0ca0bbe..f58feb1 100644 --- a/main.py +++ b/main.py @@ -113,7 +113,7 @@ if args.replay: print(*args.replay) jobs, timestep_start_from_data, timestep_end = td.load_data(args.replay) timestep_start += timestep_start_from_data - td.save_snapshot((jobs, timestep_start, timestep_end, args), filename=DIR_NAME) + td.save_snapshot(jobs, timestep_start, timestep_end, args, filename=DIR_NAME) # Set number of timesteps based on the last job running which we assume # is the maximum value of submit_time + wall_time of all the jobs diff --git a/raps/dataloaders/lassen.py b/raps/dataloaders/lassen.py index 326fbc8..6d0b8a2 100644 --- a/raps/dataloaders/lassen.py +++ b/raps/dataloaders/lassen.py @@ -127,7 +127,7 @@ def load_data_from_df(allocation_df, node_df, step_df, **kwargs): # TODO: Jobs could have a time-series per node! gpu_node_energy = node_data['gpu_energy'].copy() gpu_node_energy[gpu_node_energy < 0] = 0.0 - gpu_node_energy[gpu_node_energy == np.NaN] = 0.0 + gpu_node_energy[gpu_node_energy == np.nan] = 0.0 if len(gpu_node_energy) < 1: gpu_power = gpu_node_idle_power # Setting to idle as other parts of the sim make this assumption else: @@ -152,7 +152,7 @@ def load_data_from_df(allocation_df, node_df, step_df, **kwargs): # Same cpu_node_usage = node_data['cpu_usage'].copy() cpu_node_usage[cpu_node_usage < 0] = 0.0 - cpu_node_usage[cpu_node_usage == np.NaN] = 0.0 + cpu_node_usage[cpu_node_usage == np.nan] = 0.0 if wall_time > 0: threads_per_core = config['THREADS_PER_CORE'] cpu_util = cpu_node_usage.sum() / 10e9 / nodes_required / wall_time / threads_per_core diff --git a/raps/engine.py b/raps/engine.py index c8d3be6..a92fcd7 100644 --- a/raps/engine.py +++ b/raps/engine.py @@ -337,6 +337,11 @@ class Engine: else: # Get a dataframe of the power data power_df = self.power_manager.get_power_df(rack_power, rack_loss) + elif power_df is None: # Even if power didnt update, make sure its not None! + power_df = self.power_manager.get_power_df(rack_power, rack_loss) + else: # We made sure there are values in there but power did not need to be updated. + pass + # Compute network averages n = len(net_utils) or 1 diff --git a/raps/job.py b/raps/job.py index be7506b..dd9c857 100644 --- a/raps/job.py +++ b/raps/job.py @@ -58,7 +58,9 @@ def dilate_trace(trace, factor): Returns: - list of float: the dilated trace. """ - if trace is None or len(trace) == 0: + if trace is None or (isinstance(trace,(list, np.ndarray)) and len(trace) == 0): + return trace + if isinstance(trace, (np.float64, float)): # This needs to be handled! return trace original_length = len(trace) # Compute the new length (rounding to the nearest integer) diff --git a/raps/telemetry.py b/raps/telemetry.py index be4d803..07fc2a4 100644 --- a/raps/telemetry.py +++ b/raps/telemetry.py @@ -46,14 +46,14 @@ class Telemetry: except: print("WARNING: Failed to load dataloader") - def save_snapshot(self, jobs: list, filename: str): + def save_snapshot(self, jobs: list, start_timestep:int, end_timestep:int, args:dict, filename: str): """Saves a snapshot of the jobs to a compressed file. """ - np.savez_compressed(filename, jobs=jobs) + np.savez_compressed(filename, jobs=jobs, start_timestep=start_timestep, end_timestep=end_timestep, args=args) def load_snapshot(self, snapshot: str) -> list: """Reads a snapshot from a compressed file and returns the jobs.""" - jobs = np.load(snapshot, allow_pickle=True, mmap_mode='r') - return jobs['jobs'].tolist() + jobs, start_timestep, end_timestep, args = np.load(snapshot, allow_pickle=True, mmap_mode='r') # This is untested and may need fixing! + return jobs['jobs'].tolist(), start_timestep, end_timestep, args def load_data(self, files): """Load telemetry data using custom data loaders.""" diff --git a/raps/ui.py b/raps/ui.py index ffff185..d66a1aa 100644 --- a/raps/ui.py +++ b/raps/ui.py @@ -7,7 +7,8 @@ from rich.layout import Layout from rich.panel import Panel from rich.table import Table from rich.live import Live -from rich.progress import Progress,TextColumn,BarColumn,TaskProgressColumn,TimeRemainingColumn, track, TimeElapsedColumn, MofNCompleteColumn +from rich.progress import Progress,TextColumn, BarColumn, TaskProgressColumn, \ + TimeRemainingColumn, track, TimeElapsedColumn, MofNCompleteColumn from .utils import summarize_ranges, convert_seconds from .constants import ELLIPSES @@ -390,9 +391,9 @@ class LayoutManager: total_table.add_row( f"{system_util:.1f}%", total_power_str, - str(f"{pflops:.2f}"), - str(f"{gflop_per_watt:.1f}"), - total_loss_str + " (" + percent_loss_str+ ")", + str(f"{pflops:.2f}" if pflops is not None else "None"), + str(f"{gflop_per_watt:.1f}" if gflop_per_watt is not None else "None"), + total_loss_str + " (" + percent_loss_str + ")", style="white" # Apply 'white' style to the entire row ) @@ -423,7 +424,7 @@ class LayoutManager: self.update_scheduled_jobs(data.running + data.queue) self.update_status( data.current_time, len(data.running), len(data.queue), data.num_active_nodes, - data.num_free_nodes, data.down_nodes, + data.num_free_nodes, data.down_nodes, data.avg_net_util, data.slowdown_per_job ) self.update_power_array( data.power_df, data.p_flops, data.g_flops_w, -- GitLab From 3e9c16f8b491998194d2b3257d1d255bf3b7b46e Mon Sep 17 00:00:00 2001 From: Matthias Maiterth Date: Wed, 21 May 2025 12:45:12 -0400 Subject: [PATCH 091/388] Updated network stats, lassen loaders, engine and jobs adjusting to the rebase on develop. Changes to stats.py: - added network stats. - TODO: Remove or repurpose engine_stats in raps/engine.py this should already live in stats.py but apparently there is residual use! Changes in Lassen: - removed slicing to time of simulation for arrival redistribution. - ('--arrival poisson' did not work as only relevant jobs were considered. arrival moves everything forward.) - This may not be the best behaviour yet but fixes and restores the previous main's behavior. - Updated how --arrival is handling time values based on rebased development. (The previous scheduler handled start times different, where the current one is more strict!) Changes to engine: - Removed slowdown cap, as the rebased branch allows to use missing trace values (defaulting to the last known value). Changes to jobs: Adjusted slowdown return value to reflect the slowdown. --- main.py | 5 ++++- raps/dataloaders/lassen.py | 33 ++++++++++++++++----------------- raps/engine.py | 2 +- raps/job.py | 6 ++++-- raps/stats.py | 19 +++++++++++++++++++ 5 files changed, 44 insertions(+), 21 deletions(-) diff --git a/main.py b/main.py index f58feb1..19a76d8 100644 --- a/main.py +++ b/main.py @@ -32,7 +32,7 @@ from raps.workload import Workload from raps.account import Accounts from raps.weather import Weather from raps.utils import create_casename, convert_to_seconds, write_dict_to_file, next_arrival -from raps.stats import get_engine_stats, get_job_stats, get_scheduler_stats +from raps.stats import get_engine_stats, get_job_stats, get_scheduler_stats, get_network_stats from raps.utils import convert_numpy_to_builtin config = ConfigManager(system_name=args.system).get_config() @@ -178,16 +178,19 @@ layout_manager.run(jobs, timestep_start=timestep_start, timestep_end=timestep_en engine_stats = get_engine_stats(sc) job_stats = get_job_stats(sc) scheduler_stats = get_scheduler_stats(sc) +network_stats = get_network_stats(sc) # Following b/c we get the following error when we use PM100 telemetry dataset # TypeError: Object of type int64 is not JSON serializable try: print(json.dumps(engine_stats, indent=4)) print(json.dumps(job_stats, indent=4)) print(json.dumps(scheduler_stats, indent=4)) + print(json.dumps(network_stats, indent=4)) except: print(engine_stats) print(job_stats) print(scheduler_stats) + print(network_stats) if args.plot: diff --git a/raps/dataloaders/lassen.py b/raps/dataloaders/lassen.py index 6d0b8a2..8ae55d3 100644 --- a/raps/dataloaders/lassen.py +++ b/raps/dataloaders/lassen.py @@ -41,7 +41,7 @@ def load_data(path, **kwargs): """ Loads data from the given file paths and returns job info. """ - nrows = None + nrows = 1E5 # None alloc_df = pd.read_csv(os.path.join(path[0], 'final_csm_allocation_history_hashed.csv'), nrows=nrows, low_memory=False) node_df = pd.read_csv(os.path.join(path[0], 'final_csm_allocation_node_history.csv'), nrows=nrows, low_memory=False) step_df = pd.read_csv(os.path.join(path[0], 'final_csm_step_history.csv'), nrows=nrows, low_memory=False) @@ -86,8 +86,8 @@ def load_data_from_df(allocation_df, node_df, step_df, **kwargs): simulation_end_timestamp = simulation_start_timestamp + time_to_simulate_timedelta # As these are >1.4M jobs, filtered to the simulated timestamps before creating the job structs. - allocation_df = allocation_df[allocation_df['end_timestamp'] >= simulation_start_timestamp] # Job should not have ended before the simulation time - allocation_df = allocation_df[allocation_df['job_submit_timestamp'] < simulation_end_timestamp] # Job has to have been submited before or during the simulaion time + #allocation_df = allocation_df[allocation_df['end_timestamp'] >= simulation_start_timestamp] # Job should not have ended before the simulation time + #allocation_df = allocation_df[allocation_df['job_submit_timestamp'] < simulation_end_timestamp] # Job has to have been submited before or during the simulaion time job_list = [] @@ -179,22 +179,21 @@ def load_data_from_df(allocation_df, node_df, step_df, **kwargs): priority = row.get('priority', 0) partition = row.get('partition', "0") + scheduled_nodes = get_scheduled_nodes(row['allocation_id'], node_df) + submit_time = compute_time_offset(row['job_submit_timestamp'], telemetry_start_timestamp) + start_time = compute_time_offset(row['begin_timestamp'], telemetry_start_timestamp) + end_time = compute_time_offset(row['end_timestamp'], telemetry_start_timestamp) + time_limit = row['time_limit'] + + trace_time = wall_time + trace_start_time = start_time + trace_end_time = end_time + trace_missing_values = False + if arrival == 'poisson': # Modify the submit times according to Poisson process - scheduled_nodes = None + start_time = 0 + end_time = wall_time submit_time = next_arrival(1 / config['JOB_ARRIVAL_TIME']) - start_time = None # Scheduler will determine start time - end_time = None # Scheduler will determine end time - else: # Prescribed replay - scheduled_nodes = get_scheduled_nodes(row['allocation_id'], node_df) - submit_time = compute_time_offset(row['job_submit_timestamp'], telemetry_start_timestamp) - start_time = compute_time_offset(row['begin_timestamp'], telemetry_start_timestamp) - end_time = compute_time_offset(row['end_timestamp'], telemetry_start_timestamp) - time_limit = row['time_limit'] - - trace_time = wall_time - trace_start_time = start_time - trace_end_time = end_time - trace_missing_values = False if verbose: print('ib_tx, ib_rx, samples:', ib_tx, ib_rx, samples) diff --git a/raps/engine.py b/raps/engine.py index a92fcd7..d6f4e45 100644 --- a/raps/engine.py +++ b/raps/engine.py @@ -252,7 +252,7 @@ class Engine: print(f"length of {len(job.gpu_trace)} before dilation") current_bw = net_tx + net_rx slowdown_factor = network_slowdown(current_bw, max_link_bw) - slowdown_factor = min(slowdown_factor, 2) # set max slowdown factor + #slowdown_factor = min(slowdown_factor, 2) # set max slowdown factor # Optionally, only apply slowdown once per job to avoid compounding the effect. if self.debug: print("***", hasattr(job, 'dilated'), current_bw, max_link_bw, slowdown_factor) diff --git a/raps/job.py b/raps/job.py index dd9c857..1be41e0 100644 --- a/raps/job.py +++ b/raps/job.py @@ -60,8 +60,10 @@ def dilate_trace(trace, factor): """ if trace is None or (isinstance(trace,(list, np.ndarray)) and len(trace) == 0): return trace - if isinstance(trace, (np.float64, float)): # This needs to be handled! - return trace + # Traces can be list/np.array or single float values. + # In case of a single float, we adjust the value directly as it is applied to each timestep + if isinstance(trace, (np.float64, float)): + return trace / factor # Single value original_length = len(trace) # Compute the new length (rounding to the nearest integer) new_length = int(np.round(original_length * factor)) diff --git a/raps/stats.py b/raps/stats.py index 98b97bb..3e04e1c 100644 --- a/raps/stats.py +++ b/raps/stats.py @@ -63,6 +63,25 @@ def get_scheduler_stats(engine: Engine): return stats +def get_network_stats(engine: Engine): + stats = {} + + if engine.net_util_history: + mean_net_util = sum(engine.net_util_history) / len(engine.net_util_history) + else: + mean_net_util = 0.0 + + stats["avg network util"] = f"{mean_net_util * 100:.2f}%" + + if engine.slowdown_history: + avg_job_slow = sum(engine.slowdown_history) / len(engine.slowdown_history) + else: + avg_job_slow = 1.0 + stats["avg per-job slowdown"] = f"{avg_job_slow:.2f}x" + + return stats + + def get_job_stats(engine: Engine): """ Return job statistics processed over the engine execution""" # Information on Job-Mix -- GitLab From d34924921cf81159efb0b0697e404212aa1f7394 Mon Sep 17 00:00:00 2001 From: Matthias Maiterth Date: Wed, 21 May 2025 14:40:54 -0400 Subject: [PATCH 092/388] added commands to the readme / comments to make getting started easier. --- README.md | 4 +++- raps/dataloaders/lassen.py | 4 ++++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 8f072ac..1bd9c47 100644 --- a/README.md +++ b/README.md @@ -77,9 +77,11 @@ This will dump a .npz file with a randomized name, e.g. ac23db.npz. Let's rename There are three ways to modify replaying of telemetry data: 1. `--arrival`. Changing the arrival time distribution - replay cases will default to `--arrival prescribed`, where the jobs will be submitted exactly as they were submitted on the physical machine. This can be changed to `--arrival poisson` to change when the jobs arrive, which is especially useful in cases where there may be gaps in time, e.g., when the system goes down for several days, or the system is is underutilized. - + python main.py -f $DPATH/slurm/joblive/$DATEDIR $DPATH/jobprofile/$DATEDIR --arrival poisson 2. `--policy`. Changing the way the jobs are scheduled. The `--policy` flag will be set by default to `replay` in cases where a telemetry file is provided, in which case the jobs will be scheduled according to the start times provided. Changing the `--policy` to `fcfs` or `backfill` will use the internal scheduler. + python main.py -f $DPATH/slurm/joblive/$DATEDIR $DPATH/jobprofile/$DATEDIR --policy fcfs --backfill firstfit -t 12h + 3. `--scale`. Changing the scale of each job in the telemetry data. The `--scale` flag will specify the maximum number of nodes for each job (generally set this to the max number of nodes of the smallest partition), and randomly select the number of nodes for each job from one to max nodes. This flag is useful when replaying telemetry from a larger system onto a smaller system. 4. `--shuffle`. Shuffle the jobs before playing. diff --git a/raps/dataloaders/lassen.py b/raps/dataloaders/lassen.py index a3be849..f477da9 100644 --- a/raps/dataloaders/lassen.py +++ b/raps/dataloaders/lassen.py @@ -24,6 +24,10 @@ Usage Instructions: # to fast-forward 365 days and replay for 1 day. This region day has 2250 jobs with 1650 jobs executed. python main.py -f /path/to/LAST/Lassen-Supercomputer-Job-Dataset --system lassen -ff 365d -t 1d + + # For the network replay this command gives suiteable snapshots: + python main.py -f /path/to/LAST/Lassen-Supercomputer-Job-Dataset --system lassen --policy fcfs --backfill firstfit -t 12h --arrival poisson + """ import math import os -- GitLab From e76dc485f965deeca2f7e17e64205f675d56ea5e Mon Sep 17 00:00:00 2001 From: Tim Dykes Date: Thu, 22 May 2025 14:44:24 +0100 Subject: [PATCH 093/388] First draft config for LUMI --- config/lumi/lumi-c/power.json | 18 ++++++++++++++++++ config/lumi/lumi-c/scheduler.json | 17 +++++++++++++++++ config/lumi/lumi-c/system.json | 20 ++++++++++++++++++++ config/lumi/lumi-g/power.json | 18 ++++++++++++++++++ config/lumi/lumi-g/scheduler.json | 17 +++++++++++++++++ config/lumi/lumi-g/system.json | 20 ++++++++++++++++++++ 6 files changed, 110 insertions(+) create mode 100644 config/lumi/lumi-c/power.json create mode 100644 config/lumi/lumi-c/scheduler.json create mode 100644 config/lumi/lumi-c/system.json create mode 100644 config/lumi/lumi-g/power.json create mode 100644 config/lumi/lumi-g/scheduler.json create mode 100644 config/lumi/lumi-g/system.json diff --git a/config/lumi/lumi-c/power.json b/config/lumi/lumi-c/power.json new file mode 100644 index 0000000..d6ec29e --- /dev/null +++ b/config/lumi/lumi-c/power.json @@ -0,0 +1,18 @@ +{ + "POWER_GPU_IDLE": 88, + "POWER_GPU_MAX": 560, + "POWER_CPU_IDLE": 90, + "POWER_CPU_MAX": 280, + "POWER_MEM": 74.26, + "POWER_NIC": 20, + "POWER_NVME": 30, + "POWER_SWITCH": 250, + "POWER_CDU": 8473.47, + "POWER_UPDATE_FREQ": 15, + "RECTIFIER_PEAK_THRESHOLD": 13670, + "SIVOC_LOSS_CONSTANT": 13, + "SIVOC_EFFICIENCY": 0.98, + "RECTIFIER_LOSS_CONSTANT": 17, + "RECTIFIER_EFFICIENCY": 0.96, + "POWER_COST": 0.094 +} diff --git a/config/lumi/lumi-c/scheduler.json b/config/lumi/lumi-c/scheduler.json new file mode 100644 index 0000000..530a1c2 --- /dev/null +++ b/config/lumi/lumi-c/scheduler.json @@ -0,0 +1,17 @@ +{ + "SEED": 42, + "JOB_ARRIVAL_TIME": 900, + "MTBF": 11, + "TRACE_QUANTA": 15, + "MIN_WALL_TIME": 60, + "MAX_WALL_TIME": 172800, + "UI_UPDATE_FREQ": 900, + "MAX_NODES_PER_JOB": 512, + "JOB_END_PROBS": { + "COMPLETED": 0.63, + "FAILED": 0.13, + "CANCELLED": 0.12, + "TIMEOUT": 0.11, + "NODE_FAIL": 0.01 + } +} diff --git a/config/lumi/lumi-c/system.json b/config/lumi/lumi-c/system.json new file mode 100644 index 0000000..924f281 --- /dev/null +++ b/config/lumi/lumi-c/system.json @@ -0,0 +1,20 @@ +{ + "NUM_CDUS": 1, + "RACKS_PER_CDU": 2, + "NODES_PER_RACK": 256, + "RECTIFIERS_PER_RACK": 32, + "CHASSIS_PER_RACK": 8, + "NODES_PER_BLADE": 4, + "SWITCHES_PER_CHASSIS": 4, + "NICS_PER_NODE": 4, + "RECTIFIERS_PER_CHASSIS": 4, + "NODES_PER_RECTIFIER": 4, + "MISSING_RACKS": [], + "DOWN_NODES": [], + "CPUS_PER_NODE": 2, + "GPUS_PER_NODE": 0, + "CPU_PEAK_FLOPS": 2.50944E12, + "GPU_PEAK_FLOPS": 0, + "CPU_FP_RATIO": 0.667, + "GPU_FP_RATIO": 0.667 +} diff --git a/config/lumi/lumi-g/power.json b/config/lumi/lumi-g/power.json new file mode 100644 index 0000000..d6ec29e --- /dev/null +++ b/config/lumi/lumi-g/power.json @@ -0,0 +1,18 @@ +{ + "POWER_GPU_IDLE": 88, + "POWER_GPU_MAX": 560, + "POWER_CPU_IDLE": 90, + "POWER_CPU_MAX": 280, + "POWER_MEM": 74.26, + "POWER_NIC": 20, + "POWER_NVME": 30, + "POWER_SWITCH": 250, + "POWER_CDU": 8473.47, + "POWER_UPDATE_FREQ": 15, + "RECTIFIER_PEAK_THRESHOLD": 13670, + "SIVOC_LOSS_CONSTANT": 13, + "SIVOC_EFFICIENCY": 0.98, + "RECTIFIER_LOSS_CONSTANT": 17, + "RECTIFIER_EFFICIENCY": 0.96, + "POWER_COST": 0.094 +} diff --git a/config/lumi/lumi-g/scheduler.json b/config/lumi/lumi-g/scheduler.json new file mode 100644 index 0000000..ed21980 --- /dev/null +++ b/config/lumi/lumi-g/scheduler.json @@ -0,0 +1,17 @@ +{ + "SEED": 42, + "JOB_ARRIVAL_TIME": 900, + "MTBF": 11, + "TRACE_QUANTA": 15, + "MIN_WALL_TIME": 60, + "MAX_WALL_TIME": 172800, + "UI_UPDATE_FREQ": 900, + "MAX_NODES_PER_JOB": 1024, + "JOB_END_PROBS": { + "COMPLETED": 0.63, + "FAILED": 0.13, + "CANCELLED": 0.12, + "TIMEOUT": 0.11, + "NODE_FAIL": 0.01 + } +} diff --git a/config/lumi/lumi-g/system.json b/config/lumi/lumi-g/system.json new file mode 100644 index 0000000..9a36fcb --- /dev/null +++ b/config/lumi/lumi-g/system.json @@ -0,0 +1,20 @@ +{ + "NUM_CDUS": 10, + "RACKS_PER_CDU": 3, + "NODES_PER_RACK": 128, + "RECTIFIERS_PER_RACK": 32, + "CHASSIS_PER_RACK": 8, + "NODES_PER_BLADE": 2, + "SWITCHES_PER_CHASSIS": 4, + "NICS_PER_NODE": 4, + "RECTIFIERS_PER_CHASSIS": 4, + "NODES_PER_RECTIFIER": 4, + "MISSING_RACKS": [], + "DOWN_NODES": [], + "CPUS_PER_NODE": 1, + "GPUS_PER_NODE": 4, + "CPU_PEAK_FLOPS": 2048E9, + "GPU_PEAK_FLOPS": 52E12, + "CPU_FP_RATIO": 0.667, + "GPU_FP_RATIO": 0.667 +} \ No newline at end of file -- GitLab From ec5d118a5ef4a3b4a7d3ad1955c9da66c90cca1f Mon Sep 17 00:00:00 2001 From: Matthias Maiterth Date: Thu, 22 May 2025 13:02:46 -0400 Subject: [PATCH 094/388] Generalized synthetic job generation, random is reverted to old behavior. Refactored synthetic job generation. This now takes distributions for each parameter as function and these can be implemented as their own function. Currently configured via command line, but should be added as configs as well, overrideable by the command line. python raps/workload.py --workload synthetic --job-size-distribution uniform weibull --wall-time-stddev 5000 --wall-time-mean 5000 --weibull-job-shape 1.75 --weibull-job-scale 140 --multimodal 0.8 0.2 --wall-time-distribution uniform weibull --weibull-time-scale $((60*60)) --weibull-time-shape $((60*60*60)) Todo: Check for needed additions, and changes, implement other distributions, handle multiple distributions of the same kind, etc. Check for errors and inconsistencies. --- raps/workload.py | 284 +++++++++++++++++++---------------------------- 1 file changed, 114 insertions(+), 170 deletions(-) diff --git a/raps/workload.py b/raps/workload.py index 5cae9d0..c9aafec 100644 --- a/raps/workload.py +++ b/raps/workload.py @@ -59,111 +59,58 @@ class Workload: gpu_trace = gpu_util * np.ones(int(wall_time) // trace_quanta) return (cpu_trace, gpu_trace) - def generate_uniform_jobs(self, *, num_jobs) -> list[list[any]]: - print("TODO Implement propper!") - jobs = [] - partition = random.choice(self.partitions) - config = self.config_map[partition] + def job_arrival_distribution_draw_poisson(self,args,config): + return next_arrival(1 / config['JOB_ARRIVAL_TIME']) - for job_index in range(num_jobs): + def job_size_distribution_draw_uniform(self,args,config): + return random.randint(1, config['MAX_NODES_PER_JOB']) - time_to_next_job = next_arrival(1 / config['JOB_ARRIVAL_TIME']) + def job_size_distribution_draw_weibull(self,args,config): + return truncated_weibull(args.weibull_job_scale, args.weibull_job_shape, 1, config['MAX_NODES_PER_JOB']) - nodes_required = random.randint(1, config['MAX_NODES_PER_JOB']) - name = random.choice(JOB_NAMES) - account = random.choice(ACCT_NAMES) - cpu_util = random.random() * config['CPUS_PER_NODE'] - gpu_util = random.random() * config['GPUS_PER_NODE'] - mu = config["MIN_WALL_TIME"] * 1.0 - sigma = 4.0 - wall_time = truncated_normalvariate(mu, sigma, config['MIN_WALL_TIME'], config['MAX_WALL_TIME']) // 3600 * 3600 - time_limit = truncated_normalvariate(mu, sigma, wall_time, config['MAX_WALL_TIME']) // 3600 * 3600 - end_state = determine_state(config['JOB_END_PROBS']) - cpu_trace, gpu_trace = self.compute_traces(cpu_util, gpu_util, wall_time, config['TRACE_QUANTA']) - priority = random.randint(0, MAX_PRIORITY) - net_tx, net_rx = [], [] - jobs.append(job_dict(nodes_required=nodes_required, name=name, - account=account, cpu_trace=cpu_trace, - gpu_trace=gpu_trace, ntx_trace=net_tx, - nrx_trace=net_rx, end_state=end_state, - id=job_index, priority=priority, - partition=partition, - submit_time=time_to_next_job - 100, - time_limit=time_limit, - start_time=time_to_next_job, - end_time=time_to_next_job + wall_time, - wall_time=wall_time, trace_time=wall_time, - trace_start_time=0, trace_end_time=wall_time)) - return jobs + def cpu_utilization_distribution_draw_uniform(self,args,config): + return random.uniform(0.0, config['CPUS_PER_NODE']) - def generate_normal_jobs(self, *, num_jobs) -> list[list[any]]: - print("TODO Implement propper!") - jobs = [] - partition = random.choice(self.partitions) - config = self.config_map[partition] + def gpu_utilization_distribution_draw_uniform(self,args,config): + return random.uniform(0.0, config['GPUS_PER_NODE']) - for job_index in range(num_jobs): + def wall_time_distribution_draw_uniform(self,args,config): + return random.uniform(config['MIN_WALL_TIME'],config['MAX_WALL_TIME']) - time_to_next_job = next_arrival(1 / config['JOB_ARRIVAL_TIME']) + def wall_time_distribution_draw_normal(self,args,config): + return max(1,truncated_normalvariate(float(args.wall_time_mean), float(args.wall_time_stddev), config['MIN_WALL_TIME'], config['MAX_WALL_TIME']) // 3600 * 3600) - nodes_required = random.randint(1, config['MAX_NODES_PER_JOB']) - name = random.choice(JOB_NAMES) - account = random.choice(ACCT_NAMES) - cpu_util = random.random() * config['CPUS_PER_NODE'] - gpu_util = random.random() * config['GPUS_PER_NODE'] - mu = config["MIN_WALL_TIME"] * 1.0 - sigma = 4.0 - wall_time = truncated_normalvariate(mu, sigma, config['MIN_WALL_TIME'], config['MAX_WALL_TIME']) // 3600 * 3600 - time_limit = truncated_normalvariate(mu, sigma, wall_time, config['MAX_WALL_TIME']) // 3600 * 3600 - end_state = determine_state(config['JOB_END_PROBS']) - cpu_trace, gpu_trace = self.compute_traces(cpu_util, gpu_util, wall_time, config['TRACE_QUANTA']) - priority = random.randint(0, MAX_PRIORITY) - net_tx, net_rx = [], [] - jobs.append(job_dict(nodes_required=nodes_required, name=name, - account=account, cpu_trace=cpu_trace, - gpu_trace=gpu_trace, ntx_trace=net_tx, - nrx_trace=net_rx, end_state=end_state, - id=job_index, priority=priority, - partition=partition, - submit_time=time_to_next_job - 100, - time_limit=time_limit, - start_time=time_to_next_job, - end_time=time_to_next_job + wall_time, - wall_time=wall_time, trace_time=wall_time, - trace_start_time=0, trace_end_time=wall_time)) - return jobs + def wall_time_distribution_draw_weibull(self,args,config): + wall_time = truncated_weibull((config['MAX_WALL_TIME'] // 2) + config['MIN_WALL_TIME'], 1, + # (config['MAX_WALL_TIME'] // 4) + config['MIN_WALL_TIME'], + config['MIN_WALL_TIME'],config['MAX_WALL_TIME']) // 60 * 60 # to 1 minute + return wall_time - def generate_weibull_jobs(self, *, shape, scale, num_jobs) -> list[list[any]]: - print("TODO Implement propper!") + def generate_jobs(self, *, + job_arrival_distribution_to_draw_from, + job_size_distribution_to_draw_from, + cpu_util_distribution_to_draw_from, + gpu_util_distribution_to_draw_from, + wall_time_distribution_to_draw_from, + args + ) -> list[list[any]]: jobs = [] partition = random.choice(self.partitions) config = self.config_map[partition] - - for job_index in range(num_jobs): - - time_to_next_job = next_arrival(1 / config['JOB_ARRIVAL_TIME']) - - nodes_required = random.randint(1, config['MAX_NODES_PER_JOB']) + for job_index in range(args.numjobs): + submit_time = job_arrival_distribution_to_draw_from(args,config) + start_time = submit_time + nodes_required = job_size_distribution_to_draw_from(args,config) name = random.choice(JOB_NAMES) account = random.choice(ACCT_NAMES) - cpu_util = random.random() * config['CPUS_PER_NODE'] - gpu_util = random.random() * config['GPUS_PER_NODE'] - mu = config["MIN_WALL_TIME"] * 1.0 - sigma = 4.0 - #wall_time = truncated_normalvariate(mu, sigma, config['MIN_WALL_TIME'], config['MAX_WALL_TIME']) // 3600 * 3600 - - wall_time = truncated_weibull( - (config['MAX_WALL_TIME'] // 2) + config['MIN_WALL_TIME'], 1, - #(config['MAX_WALL_TIME'] // 4) + config['MIN_WALL_TIME'], - config['MIN_WALL_TIME'],config['MAX_WALL_TIME']) // 60 * 60 # to 1 minute - - #time_limit = truncated_weibull(mu, sigma, wall_time, config['MAX_WALL_TIME']) // 300 * 300 # to 5 minutes - time_limit = truncated_weibull(config['MAX_WALL_TIME'] // 2 + config['MIN_WALL_TIME'], 1, wall_time, config['MAX_WALL_TIME']) // 300 * 300 # to 5 minutes - - - #time_limit = truncated_normalvariate(mu, sigma, wall_time, config['MAX_WALL_TIME']) // 3600 * 3600 + cpu_util = cpu_util_distribution_to_draw_from(args,config) + gpu_util = gpu_util_distribution_to_draw_from(args,config) + wall_time = wall_time_distribution_to_draw_from(args,config) + end_time = start_time + wall_time + time_limit = max(wall_time,wall_time_distribution_to_draw_from(args,config)) end_state = determine_state(config['JOB_END_PROBS']) - cpu_trace, gpu_trace = self.compute_traces(cpu_util, gpu_util, wall_time, config['TRACE_QUANTA']) + cpu_trace = cpu_util # self.compute_traces(cpu_util, gpu_util, wall_time, config['TRACE_QUANTA']) + gpu_trace = gpu_util # self.compute_traces(cpu_util, gpu_util, wall_time, config['TRACE_QUANTA']) priority = random.randint(0, MAX_PRIORITY) net_tx, net_rx = [], [] jobs.append(job_dict(nodes_required=nodes_required, name=name, @@ -172,70 +119,87 @@ class Workload: nrx_trace=net_rx, end_state=end_state, id=job_index, priority=priority, partition=partition, - submit_time=time_to_next_job - 100, + submit_time=submit_time, time_limit=time_limit, - start_time=time_to_next_job, - end_time=time_to_next_job + wall_time, + start_time=start_time, + end_time=end_time, wall_time=wall_time, trace_time=wall_time, trace_start_time=0, trace_end_time=wall_time)) return jobs - + def synthetic(self, **kwargs): + args = kwargs.get('args',None) + print("ARGS") + print(args) + total_jobs = args.numjobs + orig_job_size_distribution = args.job_size_distribution + orig_wall_time_distribution = args.job_size_distribution + jobs = [] + if len(args.job_size_distribution) != 1 and sum(args.multimodal) != 1.0: + raise Exception(f"Sum of --multimodal != 1.0 : {args.multimodal} == {sum(args.multimodal)}") + for i,(jsdist,wtdist,percentage) in enumerate(zip(args.job_size_distribution,args.wall_time_distribution,args.multimodal)): + + args.numjobs = math.floor(total_jobs * percentage) + args.job_size_distribution = jsdist + args.wall_time_distribution = wtdist + + job_arrival_distribution_to_draw_from = self.job_arrival_distribution_draw_poisson + match args.job_size_distribution: + case "uniform": + job_size_distribution_to_draw_from = self.job_size_distribution_draw_uniform + case "weibull": + job_size_distribution_to_draw_from = self.job_size_distribution_draw_weibull + case _: + raise NotImplementedError(args.job_size_distribution) + cpu_util_distribution_to_draw_from = self.cpu_utilization_distribution_draw_uniform + gpu_util_distribution_to_draw_from = self.gpu_utilization_distribution_draw_uniform + match args.wall_time_distribution: + case "weibull": + wall_time_distribution_to_draw_from = self.wall_time_distribution_draw_weibull + case "normal": + wall_time_distribution_to_draw_from = self.wall_time_distribution_draw_normal + case "uniform": + wall_time_distribution_to_draw_from = self.wall_time_distribution_draw_uniform + + case _: + raise NotImplementedError(args.wall_time_distribution) + + new_jobs = self.generate_jobs( + job_arrival_distribution_to_draw_from=job_arrival_distribution_to_draw_from, + job_size_distribution_to_draw_from=job_size_distribution_to_draw_from, + cpu_util_distribution_to_draw_from=cpu_util_distribution_to_draw_from, + gpu_util_distribution_to_draw_from=gpu_util_distribution_to_draw_from, + wall_time_distribution_to_draw_from=wall_time_distribution_to_draw_from, + args=args) + jobs.extend(new_jobs) + args.numjobs = total_jobs + args.job_size_distribution = orig_job_size_distribution + args.wall_time_distribution = orig_wall_time_distribution + return jobs def generate_random_jobs(self, args) -> list[list[any]]: """ Generate random jobs with specified number of jobs. """ partition = random.choice(self.partitions) config = self.config_map[partition] - if args.mu is None: - mu = (config['MAX_WALL_TIME'] + config['MIN_WALL_TIME']) / 2 - if args.sigma is None: - sigma = (config['MAX_WALL_TIME'] - config['MIN_WALL_TIME']) / 6 jobs = [] for job_index in range(args.numjobs): # Randomly select a partition # Get the corresponding config for the selected partition - wes_random = False - if wes_random: - nodes_required = random.randint(1, config['MAX_NODES_PER_JOB']) - name = random.choice(JOB_NAMES) - account = random.choice(ACCT_NAMES) - cpu_util = random.random() * config['CPUS_PER_NODE'] - gpu_util = random.random() * config['GPUS_PER_NODE'] - wall_time = truncated_normalvariate(mu, sigma, config['MIN_WALL_TIME'], config['MAX_WALL_TIME']) // 3600 * 3600 - time_limit = truncated_normalvariate(mu, sigma, wall_time, config['MAX_WALL_TIME']) // 3600 * 3600 - end_state = determine_state(config['JOB_END_PROBS']) - cpu_trace, gpu_trace = self.compute_traces(cpu_util, gpu_util, wall_time, config['TRACE_QUANTA']) - priority = random.randint(0, MAX_PRIORITY) - net_tx, net_rx = [], [] - else: - max_nodes = config['MAX_NODES_PER_JOB'] - min_nodes = 1 - nodes_required = truncated_weibull(max_nodes, 0.1, min_nodes, max_nodes) - name = random.choice(JOB_NAMES) - account = random.choice(ACCT_NAMES) - cpu_util = random.random() * config['CPUS_PER_NODE'] - gpu_util = random.random() * config['GPUS_PER_NODE'] - #wall_time = truncated_weibull((config['MAX_WALL_TIME']/4)*3+config['MIN_WALL_TIME'],0.5,config['MIN_WALL_TIME'],config['MAX_WALL_TIME']) // 60 * 60 # to 1 minute - wall_time = truncated_weibull( - (config['MAX_WALL_TIME'] // 2) + config['MIN_WALL_TIME'], 1, - #(config['MAX_WALL_TIME'] // 4) + config['MIN_WALL_TIME'], - config['MIN_WALL_TIME'],config['MAX_WALL_TIME']) // 60 * 60 # to 1 minute - - #time_limit = truncated_weibull(mu, sigma, wall_time, config['MAX_WALL_TIME']) // 300 * 300 # to 5 minutes - time_limit = truncated_weibull(config['MAX_WALL_TIME'] // 2 + config['MIN_WALL_TIME'], 1, wall_time, config['MAX_WALL_TIME']) // 300 * 300 # to 5 minutes - end_state = determine_state(config['JOB_END_PROBS']) - cpu_trace, gpu_trace = self.compute_traces(cpu_util, gpu_util, wall_time, config['TRACE_QUANTA']) - if nodes_required < max_nodes * .10: - priority = 0 - elif nodes_required < max_nodes * .20: - priority = 1 - elif nodes_required < max_nodes * .50: - priority = 2 - else: - priority = 3 - net_tx, net_rx = [], [] + nodes_required = random.randint(1, config['MAX_NODES_PER_JOB']) + name = random.choice(JOB_NAMES) + account = random.choice(ACCT_NAMES) + cpu_util = random.random() * config['CPUS_PER_NODE'] + gpu_util = random.random() * config['GPUS_PER_NODE'] + mu = (config['MAX_WALL_TIME'] + config['MIN_WALL_TIME']) / 2 + sigma = (config['MAX_WALL_TIME'] - config['MIN_WALL_TIME']) / 6 + wall_time = truncated_normalvariate(mu, sigma, config['MIN_WALL_TIME'], config['MAX_WALL_TIME']) // 3600 * 3600 + time_limit = truncated_normalvariate(mu, sigma, wall_time, config['MAX_WALL_TIME']) // 3600 * 3600 + end_state = determine_state(config['JOB_END_PROBS']) + cpu_trace, gpu_trace = self.compute_traces(cpu_util, gpu_util, wall_time, config['TRACE_QUANTA']) + priority = random.randint(0, MAX_PRIORITY) + net_tx, net_rx = [], [] # Jobs arrive according to Poisson process time_to_next_job = next_arrival(1 / config['JOB_ARRIVAL_TIME']) @@ -252,31 +216,6 @@ class Workload: end_time=time_to_next_job + wall_time, wall_time=wall_time, trace_time=wall_time, trace_start_time=0, trace_end_time=wall_time)) - - return jobs - - def synthetic(self, **kwargs): - args = kwargs.get('args',None) - print("ARGS") - print(args) - num_jobs = args.numjobs - #for key,value in kwargs.items(): - # print(key,value) - #print("HERE") - #print(sum(kwargs.get('multimodal'))) - jobs = [] - if len(args.distribution) != 1 and sum(args.multimodal) != 1.0: - raise Exception(f"Sum of --multimodal != 1.0 : {args.multimodal} == {sum(args.multimodal)}") - for dist,percentage in zip(args.distribution,args.multimodal): - print(args.distribution) - if "uniform" in args.distribution: - jobs.extend(self.generate_uniform_jobs(num_jobs=int(percentage * num_jobs))) - elif "weibull" in args.distribution: - jobs.extend(self.generate_weibull_jobs(shape=args.dist_shape,scale=args.dist_scale,num_jobs=int(percentage * num_jobs))) - elif "normal" in args.distribution: - jobs.extend(self.generate_normal_jobs(num_jobs=int(percentage * num_jobs))) - else: - pass return jobs def random(self, **kwargs): @@ -535,7 +474,7 @@ def plot_job_hist(jobs): #axs[1,0].set_yticklabels([str(n).zfill(2) + ':00' for n in np.arange(min(y)//3600, max(y)//3600, 1)]) minx_s = 0 maxx_s = max(x2) - x_label_mins = [n for n in np.arange(minx_s // 60 ,maxx_s // 60 )] + x_label_mins = [n for n in np.arange(minx_s // 60, maxx_s // 60)] x_label_ticks = [n * 60 for n in x_label_mins[0::60]] x_label_str = [str(x1).zfill(2) + ":" + str(x2).zfill(2) for (x1,x2) in [(n // 60,n % 60) for @@ -561,11 +500,16 @@ def add_workload_to_parser(parser): parser.add_argument('-w', '--workload', type=str, choices=choices, default=choices[0], help='Type of synthetic workload') parser.add_argument("--multimodal", default=[1.0], type=float, nargs="+", help="Percentage to draw from each distribution (list of floats)e.g. '0.2 0.8' percentages apply in order to the list of the --distribution argument list.") - parser.add_argument("--distribution", type=str, nargs="+", choices=['uniform','weibull','normal'], default=None, help='Distribution type') - parser.add_argument("--dist_shape", nargs="+", type=float, required=False, help="Shape of weibull") - parser.add_argument("--dist_scale", nargs="+", type=float, required=False, help="Scale of weibull") - parser.add_argument("--mu", nargs="+", type=float, required=False, help="Mean (mu) for Normal distribution") - parser.add_argument("--sigma", nargs="+", type=float, required=False, help="Standard deviation (sigma) for Normal distribution") + parser.add_argument("--job-size-distribution", type=str, nargs="+", choices=['uniform','weibull','normal'], default=None, help='Distribution type') + parser.add_argument("--weibull-job-shape", type=float, required=False, help="Shape of weibull") + parser.add_argument("--weibull-job-scale", type=float, required=False, help="Scale of weibull") + + parser.add_argument("--wall-time-distribution", type=str, nargs="+", choices=['uniform','weibull','normal'], default=None, help='Distribution type') + parser.add_argument("--weibull-time-shape", type=float, required=False, help="Shape of weibull") + parser.add_argument("--weibull-time-scale", type=float, required=False, help="Scale of weibull") + + parser.add_argument("--wall-time-mean", type=float, required=False, help="Mean (mu) for Normal distribution") + parser.add_argument("--wall-time-stddev", type=float, required=False, help="Standard deviation (sigma) for Normal distribution") return parser -- GitLab From 9d13139079f791ffb151ac4b73af860707b8beb6 Mon Sep 17 00:00:00 2001 From: Matthias Maiterth Date: Thu, 22 May 2025 13:53:25 -0400 Subject: [PATCH 095/388] Random time values changed the x_label_str to be a float. Fixed. --- raps/workload.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/raps/workload.py b/raps/workload.py index c9aafec..695375a 100644 --- a/raps/workload.py +++ b/raps/workload.py @@ -473,7 +473,7 @@ def plot_job_hist(jobs): axs[1,0].set_xlabel("wall time [hh:mm]") #axs[1,0].set_yticklabels([str(n).zfill(2) + ':00' for n in np.arange(min(y)//3600, max(y)//3600, 1)]) minx_s = 0 - maxx_s = max(x2) + maxx_s = math.ceil(max(x2)) x_label_mins = [n for n in np.arange(minx_s // 60, maxx_s // 60)] x_label_ticks = [n * 60 for n in x_label_mins[0::60]] x_label_str = [str(x1).zfill(2) + ":" + str(x2).zfill(2) for -- GitLab From 795660607b82c678293544b43b9b506fbed485e5 Mon Sep 17 00:00:00 2001 From: Wes Brewer Date: Thu, 22 May 2025 16:30:25 -0400 Subject: [PATCH 096/388] Add histogram plot of network traffic --- README.md | 3 +-- raps/dataloaders/lassen.py | 5 +++-- raps/plotting.py | 26 +++++++++++++++++++++++++- raps/telemetry.py | 8 ++++++-- 4 files changed, 35 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index 9f08dcb..90523eb 100644 --- a/README.md +++ b/README.md @@ -49,8 +49,7 @@ For Adastra MI250 supercomputer, download 'AdastaJobsMI250_15days.parquet' from Lassen is one of the few datasets that has networking data. See `raps/dataloaders/lassen.py` for how to get the datasets. To run a network simulation, use the following command: - python main.py -f ~/data/lassen/Lassen-Supercomputer-Job-Dataset --system lassen --reschedule poisson -t 1h -d - + python main.py -f ~/data/lassen/Lassen-Supercomputer-Job-Dataset --system lassen --policy fcfs --backfill firstfit -t 12h --arrival poisson ## Snapshot of extracted workload data diff --git a/raps/dataloaders/lassen.py b/raps/dataloaders/lassen.py index 8ae55d3..6eb46ba 100644 --- a/raps/dataloaders/lassen.py +++ b/raps/dataloaders/lassen.py @@ -13,8 +13,8 @@ Usage Instructions: git clone https://github.com/LLNL/LAST/ && cd LAST git lfs pull - # to analyze dataset - python -m raps.telemetry -f /path/to/LAST/Lassen-Supercomputer-Job-Dataset --system lassen -v + # to analyze dataset and plot histograms + python -m raps.telemetry -f /path/to/LAST/Lassen-Supercomputer-Job-Dataset --system lassen --plot # to simulate the dataset as submitted python main.py -f /path/to/LAST/Lassen-Supercomputer-Job-Dataset --system lassen @@ -24,6 +24,7 @@ Usage Instructions: # to fast-forward 365 days and replay for 1 day. This region day has 2250 jobs with 1650 jobs executed. python main.py -f /path/to/LAST/Lassen-Supercomputer-Job-Dataset --system lassen -ff 365d -t 1d + """ import math import os diff --git a/raps/plotting.py b/raps/plotting.py index cf85750..9e8609e 100644 --- a/raps/plotting.py +++ b/raps/plotting.py @@ -280,7 +280,31 @@ def plot_job_gantt(start_times, end_times, node_counts): plt.tight_layout() plt.savefig('job_gantt.png', dpi=300) - plt.show() + + +def plot_network_histogram(data, bins=50, save_path='network_histogram.png'): + """ + Plot a histogram of network traffic per job, with scientific notation on the x-axis. + """ + import matplotlib.pyplot as plt + + plt.clf() + plt.figure(figsize=(10, 3)) + plt.hist(data, bins=bins, edgecolor='black', alpha=0.7) + + # log-scale the y-axis + plt.yscale('log') + + # force scientific notation on x-axis + plt.ticklabel_format(style='scientific', axis='x', scilimits=(0,0)) + + plt.xlabel('Network Traffic per Job (bytes)') + plt.ylabel('Frequency') + plt.title('Histogram of Network Traffic per Job') + plt.grid(True, which='both', ls='--', lw=0.5) + + plt.savefig(save_path, dpi=300, bbox_inches='tight') + plt.close() if __name__ == "__main__": diff --git a/raps/telemetry.py b/raps/telemetry.py index 07fc2a4..c94a06c 100644 --- a/raps/telemetry.py +++ b/raps/telemetry.py @@ -30,7 +30,7 @@ from tqdm import tqdm from .config import ConfigManager from .job import Job from .account import Accounts -from .plotting import plot_submit_times, plot_nodes_histogram, plot_job_gantt +from .plotting import plot_submit_times, plot_nodes_histogram, plot_job_gantt, plot_network_histogram from .utils import next_arrival @@ -143,9 +143,13 @@ if __name__ == "__main__": print(f'Average nrx_trace per job: {np.mean(nrx_means):.2f}') else: print('No valid nrx_trace data found.') - # ———————————————————————————— if args.plot: #plot_nodes_histogram(nr_list) #plot_submit_times(submit_times, nr_list) plot_job_gantt(submit_times, end_times, nr_list) + + if ntx_means and nrx_means: + # combine into total per‐job traffic + net_means = [tx + rx for tx, rx in zip(ntx_means, nrx_means)] + plot_network_histogram(net_means) -- GitLab From 947607d1d945503f4ad01e812a26132f97e770e4 Mon Sep 17 00:00:00 2001 From: Matthias Maiterth Date: Thu, 22 May 2025 22:31:51 -0400 Subject: [PATCH 097/388] Cleared the parameters and fixed some of the distributions from default parameters. Todo next: Plot the submit time and the wall time in a bottom plot (Gant chart). --- raps/workload.py | 70 ++++++++++++++++++++++++++++++------------------ 1 file changed, 44 insertions(+), 26 deletions(-) diff --git a/raps/workload.py b/raps/workload.py index 695375a..debc806 100644 --- a/raps/workload.py +++ b/raps/workload.py @@ -66,7 +66,10 @@ class Workload: return random.randint(1, config['MAX_NODES_PER_JOB']) def job_size_distribution_draw_weibull(self,args,config): - return truncated_weibull(args.weibull_job_scale, args.weibull_job_shape, 1, config['MAX_NODES_PER_JOB']) + return truncated_weibull(args.jobsize_weibull_scale, args.jobsize_weibull_shape, 1, config['MAX_NODES_PER_JOB']) + + def job_size_distribution_draw_normal(self,args,config): + return truncated_normalvariate(args.jobsize_normal_mean, args.jobsize_normal_stddev, 1, config['MAX_NODES_PER_JOB']) def cpu_utilization_distribution_draw_uniform(self,args,config): return random.uniform(0.0, config['CPUS_PER_NODE']) @@ -78,13 +81,18 @@ class Workload: return random.uniform(config['MIN_WALL_TIME'],config['MAX_WALL_TIME']) def wall_time_distribution_draw_normal(self,args,config): - return max(1,truncated_normalvariate(float(args.wall_time_mean), float(args.wall_time_stddev), config['MIN_WALL_TIME'], config['MAX_WALL_TIME']) // 3600 * 3600) + return max(1,truncated_normalvariate(float(args.walltime_normal_mean), float(args.walltime_normal_stddev), config['MIN_WALL_TIME'], config['MAX_WALL_TIME']) / 3600 * 3600) def wall_time_distribution_draw_weibull(self,args,config): - wall_time = truncated_weibull((config['MAX_WALL_TIME'] // 2) + config['MIN_WALL_TIME'], 1, - # (config['MAX_WALL_TIME'] // 4) + config['MIN_WALL_TIME'], - config['MIN_WALL_TIME'],config['MAX_WALL_TIME']) // 60 * 60 # to 1 minute - return wall_time + return truncated_weibull(args.walltime_weibull_scale, args.walltime_weibull_shape, config['MIN_WALL_TIME'], config['MAX_WALL_TIME']) + + #wall_time = random.weibullvariate(args.walltime_weibull_scale,args.walltime_weibull_shape) + ##wall_time = truncated_weibull(args.walltime_weibull_scale,args.walltime_weibull_shape) + + ##(config['MAX_WALL_TIME'] // 2) + config['MIN_WALL_TIME'], 1, + ## # (config['MAX_WALL_TIME'] // 4) + config['MIN_WALL_TIME'], + ## config['MIN_WALL_TIME'],config['MAX_WALL_TIME']) // 60 * 60 # to 1 minute + #return wall_time def generate_jobs(self, *, job_arrival_distribution_to_draw_from, @@ -132,28 +140,30 @@ class Workload: print("ARGS") print(args) total_jobs = args.numjobs - orig_job_size_distribution = args.job_size_distribution - orig_wall_time_distribution = args.job_size_distribution + orig_job_size_distribution = args.jobsize_distribution + orig_wall_time_distribution = args.jobsize_distribution jobs = [] - if len(args.job_size_distribution) != 1 and sum(args.multimodal) != 1.0: + if len(args.jobsize_distribution) != 1 and sum(args.multimodal) != 1.0: raise Exception(f"Sum of --multimodal != 1.0 : {args.multimodal} == {sum(args.multimodal)}") - for i,(jsdist,wtdist,percentage) in enumerate(zip(args.job_size_distribution,args.wall_time_distribution,args.multimodal)): + for i,(jsdist,wtdist,percentage) in enumerate(zip(args.jobsize_distribution,args.walltime_distribution,args.multimodal)): args.numjobs = math.floor(total_jobs * percentage) - args.job_size_distribution = jsdist - args.wall_time_distribution = wtdist + args.jobsize_distribution = jsdist + args.walltime_distribution = wtdist job_arrival_distribution_to_draw_from = self.job_arrival_distribution_draw_poisson - match args.job_size_distribution: + match args.jobsize_distribution: case "uniform": job_size_distribution_to_draw_from = self.job_size_distribution_draw_uniform + case "normal": + job_size_distribution_to_draw_from = self.job_size_distribution_draw_normal case "weibull": job_size_distribution_to_draw_from = self.job_size_distribution_draw_weibull case _: - raise NotImplementedError(args.job_size_distribution) + raise NotImplementedError(args.jobsize_distribution) cpu_util_distribution_to_draw_from = self.cpu_utilization_distribution_draw_uniform gpu_util_distribution_to_draw_from = self.gpu_utilization_distribution_draw_uniform - match args.wall_time_distribution: + match args.walltime_distribution: case "weibull": wall_time_distribution_to_draw_from = self.wall_time_distribution_draw_weibull case "normal": @@ -162,7 +172,7 @@ class Workload: wall_time_distribution_to_draw_from = self.wall_time_distribution_draw_uniform case _: - raise NotImplementedError(args.wall_time_distribution) + raise NotImplementedError(args.walltime_distribution) new_jobs = self.generate_jobs( job_arrival_distribution_to_draw_from=job_arrival_distribution_to_draw_from, @@ -173,8 +183,8 @@ class Workload: args=args) jobs.extend(new_jobs) args.numjobs = total_jobs - args.job_size_distribution = orig_job_size_distribution - args.wall_time_distribution = orig_wall_time_distribution + args.jobsize_distribution = orig_job_size_distribution + args.walltime_distribution = orig_wall_time_distribution return jobs def generate_random_jobs(self, args) -> list[list[any]]: @@ -500,16 +510,24 @@ def add_workload_to_parser(parser): parser.add_argument('-w', '--workload', type=str, choices=choices, default=choices[0], help='Type of synthetic workload') parser.add_argument("--multimodal", default=[1.0], type=float, nargs="+", help="Percentage to draw from each distribution (list of floats)e.g. '0.2 0.8' percentages apply in order to the list of the --distribution argument list.") - parser.add_argument("--job-size-distribution", type=str, nargs="+", choices=['uniform','weibull','normal'], default=None, help='Distribution type') - parser.add_argument("--weibull-job-shape", type=float, required=False, help="Shape of weibull") - parser.add_argument("--weibull-job-scale", type=float, required=False, help="Scale of weibull") - parser.add_argument("--wall-time-distribution", type=str, nargs="+", choices=['uniform','weibull','normal'], default=None, help='Distribution type') - parser.add_argument("--weibull-time-shape", type=float, required=False, help="Shape of weibull") - parser.add_argument("--weibull-time-scale", type=float, required=False, help="Scale of weibull") + # Jobsize: + parser.add_argument("--jobsize-distribution", type=str, nargs="+", choices=['uniform','weibull','normal'], default=None, help='Distribution type') + + parser.add_argument("--jobsize-normal-mean", type=float, required=False, help="Mean (mu) for Normal distribution") + parser.add_argument("--jobsize-normal-stddev", type=float, required=False, help="Standard deviation (sigma) for Normal distribution") + + parser.add_argument("--jobsize-weibull-shape", type=float, required=False, help="Jobsize shape of weibull") + parser.add_argument("--jobsize-weibull-scale", type=float, required=False, help="Jobsize scale of weibull") + + # Walltime: + parser.add_argument("--walltime-distribution", type=str, nargs="+", choices=['uniform','weibull','normal'], default=None, help='Distribution type') + + parser.add_argument("--walltime-normal-mean", type=float, required=False, help="Walltime mean (mu) for Normal distribution") + parser.add_argument("--walltime-normal-stddev", type=float, required=False, help="Walltime standard deviation (sigma) for Normal distribution") - parser.add_argument("--wall-time-mean", type=float, required=False, help="Mean (mu) for Normal distribution") - parser.add_argument("--wall-time-stddev", type=float, required=False, help="Standard deviation (sigma) for Normal distribution") + parser.add_argument("--walltime-weibull-shape", type=float, required=False, help="Walltime shape of weibull") + parser.add_argument("--walltime-weibull-scale", type=float, required=False, help="Walltime scale of weibull") return parser -- GitLab From cb3d49667a55a9dc58ebee3e46083c0f6900308f Mon Sep 17 00:00:00 2001 From: Matthias Maiterth Date: Fri, 23 May 2025 13:29:02 -0400 Subject: [PATCH 098/388] Gantt chart for arrivals --- raps/utils.py | 4 +- raps/workload.py | 117 +++++++++++++++++++++++++++++++++++------------ 2 files changed, 91 insertions(+), 30 deletions(-) diff --git a/raps/utils.py b/raps/utils.py index 71ff4fc..7e0dab5 100644 --- a/raps/utils.py +++ b/raps/utils.py @@ -323,8 +323,8 @@ def create_casename(prefix=''): return prefix + str(uuid.uuid4())[:7] -def next_arrival(lambda_rate): - if not hasattr(next_arrival, 'next_time'): +def next_arrival(lambda_rate,reset=False): + if not hasattr(next_arrival, 'next_time') or reset is True: # Initialize the first time it's called next_arrival.next_time = 0 else: diff --git a/raps/workload.py b/raps/workload.py index debc806..d58484b 100644 --- a/raps/workload.py +++ b/raps/workload.py @@ -29,6 +29,7 @@ import random import numpy as np import argparse import matplotlib.pyplot as plt +import matplotlib.gridspec as gridspec from raps.job import job_dict @@ -181,6 +182,7 @@ class Workload: gpu_util_distribution_to_draw_from=gpu_util_distribution_to_draw_from, wall_time_distribution_to_draw_from=wall_time_distribution_to_draw_from, args=args) + next_arrival(0,reset=True) jobs.extend(new_jobs) args.numjobs = total_jobs args.jobsize_distribution = orig_job_size_distribution @@ -442,45 +444,68 @@ class Workload: return jobs -def plot_job_hist(jobs): +def plot_job_hist(jobs,num_dist=1): y = [y['nodes_required'] for y in jobs] x = [x['wall_time'] for x in jobs] x2 = [x['time_limit'] for x in jobs] - fig, axs = plt.subplots(2, 2, gridspec_kw={'width_ratios': (4, 1), 'height_ratios': (1, 4)}) - # Remove space between subplots - fig.subplots_adjust(wspace=0, hspace=0) - # Create scatter plot + fig_m = plt.figure() + gs = fig_m.add_gridspec(3, 1) + gs0 = gs[0:2].subgridspec(5,5) + gs1 = gs[2].subgridspec(1,1) + #ax0 = fig_m.add_subplot(gs[:2,:]) + #ax1 = fig_m.add_subplot(gs[2:,:]) + + #gss = gridspec.GridSpec(5, 5, figure=ax0) + #fig, axs = plt.subplots(2, 2, gridspec_kw={'width_ratios': (4, 1), 'height_ratios': (1, 4)}) + axs = [] + col = [] + col.append(fig_m.add_subplot(gs0[0,:4])) + col.append(fig_m.add_subplot(gs0[4:,4:])) + axs.append(col.copy()) + col = [] + col.append(fig_m.add_subplot(gs0[1:,:4])) + col.append(fig_m.add_subplot(gs0[1:,4:])) + axs.append(col.copy()) + + ax_b = fig_m.add_subplot(gs1[:,:]) + #ax00 = fig_m.add_subplot(gs0[1:5,0]) + #ax10 = ax1.add_subplot(gss[1:4,1:4]) + #ax11 = ax1.add_subplot(gss[4:,1:4]) + + ## Remove space between subplots + #fig.subplots_adjust(wspace=0, hspace=0) + ## Create scatter plot for i in range(len(x)): - axs[1,0].plot([x[i],x2[i]],[y[i],y[i]],color='lightblue',zorder=1) - axs[1, 0].scatter(x2, y,marker='.',c='lightblue',zorder=2) - axs[1, 0].scatter(x, y,zorder=3) + axs[1][0].plot([x[i],x2[i]],[y[i],y[i]],color='lightblue',zorder=1) + axs[1][0].scatter(x2, y,marker='.',c='lightblue',zorder=2) + axs[1][0].scatter(x, y,zorder=3) - axs[0, 0].hist(x2,bins=max(1,min(100,(max(x2) - min(x)))), orientation='vertical',color='lightblue') - axs[0, 0].hist(x,bins=max(1,min(100,(max(x2) - min(x)))), orientation='vertical') - #print(x) - axs[1, 0].sharex(axs[0,0]) + axs[0][0].hist(x2,bins=max(1,min(100,(max(x2) - min(x)))), orientation='vertical',color='lightblue') + axs[0][0].hist(x,bins=max(1,min(100,(max(x2) - min(x)))), orientation='vertical') + ##print(x) + axs[1][0].sharex(axs[0][0]) - axs[1, 1].hist(y,bins=max(1,min(100,(max(y) - min(y)))), orientation='horizontal') - axs[1, 0].sharey(axs[1,1]) + axs[1][1].hist(y,bins=max(1,min(100,(max(y) - min(y)))), orientation='horizontal') + axs[1][0].sharey(axs[1][1]) - # Remove ticks - axs[0, 0].set_xticks([]) + ## Remove ticks + axs[0][0].set_xticks([]) #axs[0, 0].set_yticks([]) #axs[1, 1].set_xticks([]) - axs[1, 1].set_yticks([]) + axs[1][1].set_yticks([]) #axs[0, 1].set_xticks([]) #axs[0, 1].set_yticks([]) #axs[0, 1].set_yticks([]) - axs[0, 1].spines['top'].set_color('white') - axs[0, 1].set_yticks([]) - axs[0, 1].set_xticks([]) + axs[0][1].spines['top'].set_color('white') + axs[0][1].set_yticks([]) + axs[0][1].set_xticks([]) #axs[0, 1].spines['bottom'].set_color('white') #axs[0, 1].spines['left'].set_color('white') - axs[0, 1].spines['right'].set_color('white') + axs[0][1].spines['right'].set_color('white') - axs[1,0].set_ylabel("nodes [N]") - axs[1,0].set_xlabel("wall time [hh:mm]") + axs[1][0].set_ylabel("nodes [N]") + axs[1][0].set_xlabel("wall time [hh:mm]") #axs[1,0].set_yticklabels([str(n).zfill(2) + ':00' for n in np.arange(min(y)//3600, max(y)//3600, 1)]) minx_s = 0 maxx_s = math.ceil(max(x2)) @@ -490,16 +515,48 @@ def plot_job_hist(jobs): (x1,x2) in [(n // 60,n % 60) for n in x_label_mins[0::60]]] print(x_label_str) - axs[1,0].set_xticks(x_label_ticks,x_label_str) + axs[1][0].set_xticks(x_label_ticks,x_label_str) miny = min(y) maxy = max(y) y_ticks = np.arange(0,maxy,maxy // 10) y_ticks[0] = miny - axs[1,0].set_yticks(y_ticks) + axs[1][0].set_yticks(y_ticks) + + axs[0][0].tick_params(axis="x", labelbottom=False) + axs[1][1].tick_params(axis="y", labelleft=False) + + duration = [x['wall_time'] for x in jobs] + nodes_required = [x['nodes_required'] for x in jobs] + submit_t = [x['submit_time'] for x in jobs] + + offset = 0 + + gantt_nodes = args.gantt_nodes + if gantt_nodes: + for i in range(len(x)): + #ax_b.barh(i,duration[i], height=1.0, left=submit_t[i]) + ax_b.barh(offset+nodes_required[i]/2,duration[i], height=nodes_required[i], left=submit_t[i]) + offset += nodes_required[i] + if len(x)%num_dist: + ax_b.axhline(y=offset, color='red', linestyle='--',lw=0.5*sum(nodes_required)*0.01) + #ax_b.axhline(y=(len(x)/num_dist * i)-0.5, color='red', linestyle='--',lw=0.5) + else: + for i in range(len(x)): + ax_b.barh(i,duration[i], height=1.0, left=submit_t[i]) + for i in range(1,num_dist): + ax_b.axhline(y=(len(x)/num_dist * i)-0.5, color='red', linestyle='--',lw=0.5) + #ax_b labels: + minx_s = 0 + maxx_s = math.ceil(max([x['wall_time'] for x in jobs]) + max([x['submit_time'] for x in jobs])) + x_label_mins = [n for n in np.arange(minx_s // 60, maxx_s // 60)] + x_label_ticks = [n * 60 for n in x_label_mins[0::60]] + x_label_str = [str(x1).zfill(2) + ":" + str(x2).zfill(2) for + (x1,x2) in [(n // 60,n % 60) for + n in x_label_mins[0::60]]] - axs[0,0].tick_params(axis="x", labelbottom=False) - axs[1,1].tick_params(axis="y", labelleft=False) + ax_b.set_xticks(x_label_ticks,x_label_str) + ax_b.yaxis.set_inverted(True) plt.show() @@ -528,6 +585,7 @@ def add_workload_to_parser(parser): parser.add_argument("--walltime-weibull-shape", type=float, required=False, help="Walltime shape of weibull") parser.add_argument("--walltime-weibull-scale", type=float, required=False, help="Walltime scale of weibull") + parser.add_argument("--gantt-nodes", default=False, action='store_true', required=False, help="Print Gannt with nodes required as line thickness (default false)") return parser @@ -540,4 +598,7 @@ if __name__ == "__main__": workload = Workload(config) jobs = getattr(workload, args.workload)(args=args) - plot_job_hist(jobs) + num_dist = 1 + if args.multimodal: + num_dist = len(args.multimodal) + plot_job_hist(jobs,num_dist) -- GitLab From 3403021a1e9a1f9e8980039ce5f87777da214f1a Mon Sep 17 00:00:00 2001 From: Matthias Maiterth Date: Fri, 23 May 2025 17:22:40 -0400 Subject: [PATCH 099/388] Added Gantt Plot for submit time with wall time. --- raps/workload.py | 74 +++++++++++++++++++++++++++--------------------- 1 file changed, 41 insertions(+), 33 deletions(-) diff --git a/raps/workload.py b/raps/workload.py index d58484b..9b84315 100644 --- a/raps/workload.py +++ b/raps/workload.py @@ -138,8 +138,6 @@ class Workload: def synthetic(self, **kwargs): args = kwargs.get('args',None) - print("ARGS") - print(args) total_jobs = args.numjobs orig_job_size_distribution = args.jobsize_distribution orig_wall_time_distribution = args.jobsize_distribution @@ -444,15 +442,25 @@ class Workload: return jobs -def plot_job_hist(jobs,num_dist=1): +def plot_job_hist(jobs,num_dist=1,split=[1.0,0.0]): y = [y['nodes_required'] for y in jobs] x = [x['wall_time'] for x in jobs] x2 = [x['time_limit'] for x in jobs] fig_m = plt.figure() - gs = fig_m.add_gridspec(3, 1) - gs0 = gs[0:2].subgridspec(5,5) - gs1 = gs[2].subgridspec(1,1) + gs = fig_m.add_gridspec(30, 1) + gs0 = gs[0:20].subgridspec(500,500,hspace=0,wspace=0) + gs1 = gs[24:].subgridspec(1,1) + + ax_top = fig_m.add_subplot(gs0[:]) + ax_top.axis('off') + ax_top.set_title('Job Distribution') + + ax_bot = fig_m.add_subplot(gs1[:]) + ax_bot.axis('off') + ax_bot.set_title('Submit Time + Wall Time') + + #ax0 = fig_m.add_subplot(gs[:2,:]) #ax1 = fig_m.add_subplot(gs[2:,:]) @@ -460,22 +468,17 @@ def plot_job_hist(jobs,num_dist=1): #fig, axs = plt.subplots(2, 2, gridspec_kw={'width_ratios': (4, 1), 'height_ratios': (1, 4)}) axs = [] col = [] - col.append(fig_m.add_subplot(gs0[0,:4])) - col.append(fig_m.add_subplot(gs0[4:,4:])) + col.append(fig_m.add_subplot(gs0[:100,:433])) + col.append(fig_m.add_subplot(gs0[400:,433:])) axs.append(col.copy()) col = [] - col.append(fig_m.add_subplot(gs0[1:,:4])) - col.append(fig_m.add_subplot(gs0[1:,4:])) + col.append(fig_m.add_subplot(gs0[100:,:433])) + col.append(fig_m.add_subplot(gs0[100:,433:])) axs.append(col.copy()) ax_b = fig_m.add_subplot(gs1[:,:]) - #ax00 = fig_m.add_subplot(gs0[1:5,0]) - #ax10 = ax1.add_subplot(gss[1:4,1:4]) - #ax11 = ax1.add_subplot(gss[4:,1:4]) - ## Remove space between subplots - #fig.subplots_adjust(wspace=0, hspace=0) - ## Create scatter plot + # Create scatter plot for i in range(len(x)): axs[1][0].plot([x[i],x2[i]],[y[i],y[i]],color='lightblue',zorder=1) axs[1][0].scatter(x2, y,marker='.',c='lightblue',zorder=2) @@ -483,30 +486,21 @@ def plot_job_hist(jobs,num_dist=1): axs[0][0].hist(x2,bins=max(1,min(100,(max(x2) - min(x)))), orientation='vertical',color='lightblue') axs[0][0].hist(x,bins=max(1,min(100,(max(x2) - min(x)))), orientation='vertical') - ##print(x) axs[1][0].sharex(axs[0][0]) axs[1][1].hist(y,bins=max(1,min(100,(max(y) - min(y)))), orientation='horizontal') axs[1][0].sharey(axs[1][1]) - ## Remove ticks + # Remove ticks axs[0][0].set_xticks([]) - #axs[0, 0].set_yticks([]) - #axs[1, 1].set_xticks([]) axs[1][1].set_yticks([]) - #axs[0, 1].set_xticks([]) - #axs[0, 1].set_yticks([]) - #axs[0, 1].set_yticks([]) axs[0][1].spines['top'].set_color('white') axs[0][1].set_yticks([]) axs[0][1].set_xticks([]) - #axs[0, 1].spines['bottom'].set_color('white') - #axs[0, 1].spines['left'].set_color('white') axs[0][1].spines['right'].set_color('white') axs[1][0].set_ylabel("nodes [N]") axs[1][0].set_xlabel("wall time [hh:mm]") - #axs[1,0].set_yticklabels([str(n).zfill(2) + ':00' for n in np.arange(min(y)//3600, max(y)//3600, 1)]) minx_s = 0 maxx_s = math.ceil(max(x2)) x_label_mins = [n for n in np.arange(minx_s // 60, maxx_s // 60)] @@ -514,7 +508,6 @@ def plot_job_hist(jobs,num_dist=1): x_label_str = [str(x1).zfill(2) + ":" + str(x2).zfill(2) for (x1,x2) in [(n // 60,n % 60) for n in x_label_mins[0::60]]] - print(x_label_str) axs[1][0].set_xticks(x_label_ticks,x_label_str) miny = min(y) @@ -531,22 +524,37 @@ def plot_job_hist(jobs,num_dist=1): submit_t = [x['submit_time'] for x in jobs] offset = 0 - + split_index = 0 + split_offset = math.floor(len(x) * split[split_index]) gantt_nodes = args.gantt_nodes if gantt_nodes: + if split[0] == 0.0: + ax_b.axhline(y=offset, color='red', linestyle='--',lw=0.5) + split_index += 1 for i in range(len(x)): #ax_b.barh(i,duration[i], height=1.0, left=submit_t[i]) - ax_b.barh(offset+nodes_required[i]/2,duration[i], height=nodes_required[i], left=submit_t[i]) + ax_b.barh(offset + nodes_required[i] / 2,duration[i], height=nodes_required[i], left=submit_t[i]) offset += nodes_required[i] - if len(x)%num_dist: - ax_b.axhline(y=offset, color='red', linestyle='--',lw=0.5*sum(nodes_required)*0.01) + if i != len(x) - 1 and i == split_offset - 1 and split_index < len(split): + ax_b.axhline(y=offset, color='red', linestyle='--',lw=0.5) + split_index += 1 + split_offset += math.floor(len(x) * split[split_index]) #ax_b.axhline(y=(len(x)/num_dist * i)-0.5, color='red', linestyle='--',lw=0.5) + if split[-1] == 0.0: + ax_b.axhline(y=offset, color='red', linestyle='--',lw=0.5) + split_index += 1 + ax_b.set_ylabel("Jobs' acc. nodes") else: for i in range(len(x)): ax_b.barh(i,duration[i], height=1.0, left=submit_t[i]) for i in range(1,num_dist): - ax_b.axhline(y=(len(x)/num_dist * i)-0.5, color='red', linestyle='--',lw=0.5) + if num_dist == 1: + break + ax_b.axhline(y=(len(x) * split[split_index]) - 0.5, color='red', linestyle='--',lw=0.5) + split_index += 1 + ax_b.set_ylabel("Job ID") #ax_b labels: + ax_b.set_xlabel("time [hh:mm]") minx_s = 0 maxx_s = math.ceil(max([x['wall_time'] for x in jobs]) + max([x['submit_time'] for x in jobs])) x_label_mins = [n for n in np.arange(minx_s // 60, maxx_s // 60)] @@ -601,4 +609,4 @@ if __name__ == "__main__": num_dist = 1 if args.multimodal: num_dist = len(args.multimodal) - plot_job_hist(jobs,num_dist) + plot_job_hist(jobs,num_dist, args.multimodal) -- GitLab From d7994a4385a4353742191884073d8484a081879c Mon Sep 17 00:00:00 2001 From: Matthias Maiterth Date: Fri, 23 May 2025 17:37:20 -0400 Subject: [PATCH 100/388] Cleanup of plot_job_hist parameters. --- raps/workload.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/raps/workload.py b/raps/workload.py index 9b84315..7a63b1c 100644 --- a/raps/workload.py +++ b/raps/workload.py @@ -442,7 +442,13 @@ class Workload: return jobs -def plot_job_hist(jobs,num_dist=1,split=[1.0,0.0]): +def plot_job_hist(jobs,dist_split=None): + # put args.multimodal in dist_split! + split = [1.0] + num_dist = 1 + if dist_split: + num_dist = len(dist_split) + split = dist_split y = [y['nodes_required'] for y in jobs] x = [x['wall_time'] for x in jobs] @@ -606,7 +612,4 @@ if __name__ == "__main__": workload = Workload(config) jobs = getattr(workload, args.workload)(args=args) - num_dist = 1 - if args.multimodal: - num_dist = len(args.multimodal) - plot_job_hist(jobs,num_dist, args.multimodal) + plot_job_hist(jobs, args.multimodal) -- GitLab From 27ff7f1d51357cd1fe12dfab4241e9d887f2a833 Mon Sep 17 00:00:00 2001 From: Wes Brewer Date: Sat, 24 May 2025 14:18:41 -0400 Subject: [PATCH 101/388] Change from bandwidth to throughput per time quanta --- config/lassen/network.json | 2 +- raps/dataloaders/lassen.py | 21 ++++++++++----------- raps/engine.py | 22 ++++++++++++---------- raps/network.py | 18 +++++++++--------- 4 files changed, 32 insertions(+), 31 deletions(-) diff --git a/config/lassen/network.json b/config/lassen/network.json index b243b4a..51a0d71 100644 --- a/config/lassen/network.json +++ b/config/lassen/network.json @@ -2,6 +2,6 @@ "TOPOLOGY": "fat-tree", "NETWORK_MODEL": "capacity", "UPLINK_CAPACITY": 10, - "NETWORK_MAX_BW": 10e9, + "NETWORK_MAX_BW": 100e9, "LATENCY": 1 } diff --git a/raps/dataloaders/lassen.py b/raps/dataloaders/lassen.py index 6eb46ba..e101c95 100644 --- a/raps/dataloaders/lassen.py +++ b/raps/dataloaders/lassen.py @@ -169,12 +169,18 @@ def load_data_from_df(allocation_df, node_df, step_df, **kwargs): # total_energy = node_data['energy'].sum() # Joules # Network utilization - since values are given in octets / quarter of a byte, multiply by 4 to get bytes - ib_tx = 4 * node_data['ib_tx'].sum() if node_data['ib_tx'].values.size > 0 else [] - ib_rx = 4 * node_data['ib_rx'].sum() if node_data['ib_rx'].values.size > 0 else [] + total_ib_tx = 4 * node_data['ib_tx'].sum() if node_data['ib_tx'].values.size > 0 else 0 + total_ib_rx = 4 * node_data['ib_rx'].sum() if node_data['ib_rx'].values.size > 0 else 0 + + n = 1 # use total bytes per job + #n = nodes_required or 1 # use average bytes per node + #print("***", n, total_ib_tx, total_ib_rx) + ib_tx_per_node = total_ib_tx / n + ib_rx_per_node = total_ib_rx / n # net_tx, net_rx = [],[] # generate_network_sequences generates errors (e.g. -ff 800d -t 1d ) # net_tx, net_rx = generate_network_sequences(ib_tx, ib_rx, samples, lambda_poisson=0.3) - net_tx, net_rx = generate_network_sequences_avg(ib_tx, ib_rx, samples, lambda_poisson=0.3) + net_tx, net_rx = throughput_traces(ib_tx_per_node, ib_rx_per_node, samples) # no priorities defined! priority = row.get('priority', 0) @@ -289,18 +295,11 @@ def generate_network_sequences(total_tx, total_rx, intervals, lambda_poisson): return tx_bursts, rx_bursts -def generate_network_sequences_avg(total_tx, total_rx, intervals, lambda_poisson): +def throughput_traces(total_tx, total_rx, intervals): if not total_tx or not total_rx: return [], [] - # Generate sporadic bursts using a Poisson distribution (shared for both tx and rx) - #burst_intervals = np.random.poisson(lam=lambda_poisson, size=intervals) - - # Ensure some intervals have no traffic (both tx and rx will share zero intervals) - #burst_intervals = np.where(burst_intervals > 0, burst_intervals, 0) - - # Adjust bursts for both tx and rx tx_bursts = [total_tx // intervals] * intervals rx_bursts = [total_rx // intervals] * intervals diff --git a/raps/engine.py b/raps/engine.py index d6f4e45..b8c7e17 100644 --- a/raps/engine.py +++ b/raps/engine.py @@ -218,24 +218,26 @@ class Engine: else: raise NotImplementedError() - if isinstance(job.gpu_trace,list) or isinstance(job.gpu_trace,np.ndarray): + if isinstance(job.gpu_trace,list) or isinstance(job.gpu_trace, np.ndarray): if time_quanta_index < len(job.gpu_trace): gpu_util = get_utilization(job.gpu_trace, time_quanta_index) else: gpu_util = get_utilization(job.gpu_trace, len(job.gpu_trace) - 1) - elif isinstance(job.gpu_trace,float) or isinstance(job.gpu_trace,int): + elif isinstance(job.gpu_trace, float) or isinstance(job.gpu_trace, int): gpu_util = job.gpu_trace else: raise NotImplementedError() net_util = 0 - if (isinstance(job.ntx_trace,list) or isinstance(job.ntx_trace,np.ndarray)) and len(job.ntx_trace) and (isinstance(job.nrx_trace,list) or isinstance(job.nrx_trace,list)) and len(job.nrx_trace): - max_link_bw = self.config.get('NETWORK_MAX_BW') + if (isinstance(job.ntx_trace, list) or isinstance(job.ntx_trace, np.ndarray)) and \ + (isinstance(job.nrx_trace, list) or isinstance(job.nrx_trace, list)) and \ + len(job.ntx_trace) and len(job.nrx_trace): + max_throughput = self.config.get('NETWORK_MAX_BW') * self.config.get('TRACE_QUANTA') net_tx = get_utilization(job.ntx_trace, time_quanta_index) net_rx = get_utilization(job.nrx_trace, time_quanta_index) - net_util = network_utilization(net_tx, net_rx, max_link_bw) - net_cong = network_congestion(net_tx, net_rx, max_link_bw) + net_util = network_utilization(net_tx, net_rx, max_throughput) + net_cong = network_congestion(net_tx, net_rx, max_throughput) net_tx_list.append(net_tx) net_rx_list.append(net_rx) if self.debug: @@ -248,14 +250,14 @@ class Engine: # Get the maximum allowed bandwidth from the configuration. if net_cong > 1: #network_congestion_threshold: if self.debug: - print(f"congested net_cong: {net_cong}, max_link_bw: {max_link_bw}") + print(f"congested net_cong: {net_cong}, max_throughput: {max_throughput}") print(f"length of {len(job.gpu_trace)} before dilation") - current_bw = net_tx + net_rx - slowdown_factor = network_slowdown(current_bw, max_link_bw) + throughput = net_tx + net_rx + slowdown_factor = network_slowdown(throughput, max_throughput) #slowdown_factor = min(slowdown_factor, 2) # set max slowdown factor # Optionally, only apply slowdown once per job to avoid compounding the effect. if self.debug: - print("***", hasattr(job, 'dilated'), current_bw, max_link_bw, slowdown_factor) + print("***", hasattr(job, 'dilated'), throughput, max_throughput, slowdown_factor) #if not hasattr(job, 'dilated') or not job.dilated: if not job.dilated: if self.debug: diff --git a/raps/network.py b/raps/network.py index 2dcbaad..1f8c79d 100644 --- a/raps/network.py +++ b/raps/network.py @@ -1,30 +1,30 @@ -def network_congestion(tx, rx, max_bw): +def network_congestion(tx, rx, max_throughput): """ Overload factor ≥0: average of send/recv NOT clamped. >1.0 means you’re pushing above capacity. """ - tx_util = float(tx) / max_bw - rx_util = float(rx) / max_bw + tx_util = float(tx) / max_throughput + rx_util = float(rx) / max_throughput return (tx_util + rx_util) / 2.0 -def network_utilization(tx, rx, max_bw): +def network_utilization(tx, rx, max_throughput): """ True utilization in [0,1]: average of send/recv clamped to 100%. """ - tx_u = min(float(tx) / max_bw, 1.0) - rx_u = min(float(rx) / max_bw, 1.0) + tx_u = min(float(tx) / max_throughput, 1.0) + rx_u = min(float(rx) / max_throughput, 1.0) return (tx_u + rx_u) / 2.0 -def network_slowdown(current_bw, max_bw): +def network_slowdown(current_throughput, max_throughput): """ Calculate a slowdown factor based on current network bandwidth usage. If current_bw is within limits, the factor is 1.0 (no slowdown). If current_bw exceeds max_bw, the factor is current_bw/max_bw. """ - if current_bw <= max_bw: + if current_throughput <= max_throughput: return 1.0 else: - return current_bw / max_bw + return current_throughput / max_throughput -- GitLab From 59ab046a5a4dac61c7e7e7062632246e340a2289 Mon Sep 17 00:00:00 2001 From: Matthias Maiterth Date: Thu, 29 May 2025 18:29:53 -0400 Subject: [PATCH 102/388] Added arrival rate for poisson distribution --- args.py | 2 + main.py | 2 +- raps/utils.py | 34 +++++++++++++- raps/workload.py | 112 ++++++++++++++++++++++++++++++++++++++++------- 4 files changed, 131 insertions(+), 19 deletions(-) diff --git a/args.py b/args.py index 114628e..fc0b2f6 100644 --- a/args.py +++ b/args.py @@ -59,6 +59,8 @@ parser.add_argument('--backfill', type=str, choices=choices, default=None, help= # Redistribution of job arrival choices = ['prescribed', 'poisson'] parser.add_argument('--arrival', default=choices[0], type=str, choices=choices, help=f'Modify arrival distribution ({choices[1]}) or use the original submit times ({choices[0]})') +parser.add_argument('--arrival-poisson-rate', default=1, type=float, help='Modify arrival rate of poisson distribution (default 1)') + # Account options parser.add_argument('--accounts', action='store_true', help='Flag indicating if accounts should be tracked') diff --git a/main.py b/main.py index 40338a7..6b8dfad 100644 --- a/main.py +++ b/main.py @@ -107,7 +107,7 @@ if args.replay: print("available nodes:", config['AVAILABLE_NODES']) for job in tqdm(jobs, desc="Rescheduling jobs"): job['requested_nodes'] = None - job['submit_time'] = next_arrival(1 / config['JOB_ARRIVAL_TIME']) + job['submit_time'] = next_arrival(args.arrival_poisson_rate / config['JOB_ARRIVAL_TIME']) # default arrival rate = 1 else: # custom data loader print(*args.replay) diff --git a/raps/utils.py b/raps/utils.py index 7e0dab5..f345d6a 100644 --- a/raps/utils.py +++ b/raps/utils.py @@ -38,7 +38,7 @@ def convert_seconds(seconds): return f"{h}:{m}" -def truncated_normalvariate(mu, sigma, lower, upper): +def truncated_normalvariate_int(mu, sigma, lower, upper): """ Generate a random number from a truncated normal distribution. @@ -55,7 +55,7 @@ def truncated_normalvariate(mu, sigma, lower, upper): Returns ------- - float + int Random number from the truncated normal distribution. """ CUTOFF = 100000000 @@ -68,6 +68,36 @@ def truncated_normalvariate(mu, sigma, lower, upper): raise Exception(f"mu:{mu} sigma:{sigma}, not a single hit in {CUTOFF} tries.") +def truncated_normalvariate_float(mu, sigma, lower, upper): + """ + Generate a random number from a truncated normal distribution. + + Parameters + ---------- + mu : float + Mean of the distribution. + sigma : float + Standard deviation of the distribution. + lower : float + Lower bound of the truncated distribution. + upper : float + Upper bound of the truncated distribution. + + Returns + ------- + float + Random number from the truncated normal distribution. + """ + CUTOFF = 100000000 + i = 0 + while i < CUTOFF: + number = random.normalvariate(mu, sigma) + if lower < number < upper: + return number + i += 1 + raise Exception(f"mu:{mu} sigma:{sigma}, not a single hit in {CUTOFF} tries.") + + def truncated_weibull(scale, shape, min, max): while True: number = random.weibullvariate(scale, shape) diff --git a/raps/workload.py b/raps/workload.py index 7a63b1c..1e9b757 100644 --- a/raps/workload.py +++ b/raps/workload.py @@ -45,7 +45,7 @@ ACCT_NAMES = ["ACT01", "ACT02", "ACT03", "ACT04", "ACT05", "ACT06", "ACT07",\ MAX_PRIORITY = 500000 -from raps.utils import truncated_normalvariate, determine_state, next_arrival, truncated_weibull +from raps.utils import truncated_normalvariate_int, truncated_normalvariate_float, determine_state, next_arrival, truncated_weibull class Workload: @@ -61,7 +61,7 @@ class Workload: return (cpu_trace, gpu_trace) def job_arrival_distribution_draw_poisson(self,args,config): - return next_arrival(1 / config['JOB_ARRIVAL_TIME']) + return next_arrival(args.arrival_poisson_rate / config['JOB_ARRIVAL_TIME']) def job_size_distribution_draw_uniform(self,args,config): return random.randint(1, config['MAX_NODES_PER_JOB']) @@ -70,19 +70,31 @@ class Workload: return truncated_weibull(args.jobsize_weibull_scale, args.jobsize_weibull_shape, 1, config['MAX_NODES_PER_JOB']) def job_size_distribution_draw_normal(self,args,config): - return truncated_normalvariate(args.jobsize_normal_mean, args.jobsize_normal_stddev, 1, config['MAX_NODES_PER_JOB']) + return truncated_normalvariate_int(args.jobsize_normal_mean, args.jobsize_normal_stddev, 1, config['MAX_NODES_PER_JOB']) def cpu_utilization_distribution_draw_uniform(self,args,config): return random.uniform(0.0, config['CPUS_PER_NODE']) + def cpu_utilization_distribution_draw_normal(self,args,config): + return truncated_normalvariate_float(args.cpuutil_normal_mean, args.cpuutil_normal_stddev,0.0, config['CPUS_PER_NODE']) + + def cpu_utilization_distribution_draw_weibull(self,args,config): + return truncated_weibull(args.cpuutil_normal_mean, args.cpuutil_normal_stddev,0.0, config['CPUS_PER_NODE']) + def gpu_utilization_distribution_draw_uniform(self,args,config): return random.uniform(0.0, config['GPUS_PER_NODE']) + def gpu_utilization_distribution_draw_normal(self,args,config): + return truncated_normalvariate_float(args.gpuutil_normal_mean, args.gpuutil_normal_stddev,0.0, config['GPUS_PER_NODE']) + + def gpu_utilization_distribution_draw_weibull(self,args,config): + return truncated_weibull(args.gpuutil_normal_mean, args.gpuutil_normal_stddev,0.0, config['GPUS_PER_NODE']) + def wall_time_distribution_draw_uniform(self,args,config): return random.uniform(config['MIN_WALL_TIME'],config['MAX_WALL_TIME']) def wall_time_distribution_draw_normal(self,args,config): - return max(1,truncated_normalvariate(float(args.walltime_normal_mean), float(args.walltime_normal_stddev), config['MIN_WALL_TIME'], config['MAX_WALL_TIME']) / 3600 * 3600) + return max(1,truncated_normalvariate_int(float(args.walltime_normal_mean), float(args.walltime_normal_stddev), config['MIN_WALL_TIME'], config['MAX_WALL_TIME']) / 3600 * 3600) def wall_time_distribution_draw_weibull(self,args,config): return truncated_weibull(args.walltime_weibull_scale, args.walltime_weibull_shape, config['MIN_WALL_TIME'], config['MAX_WALL_TIME']) @@ -138,17 +150,26 @@ class Workload: def synthetic(self, **kwargs): args = kwargs.get('args',None) + print(args) total_jobs = args.numjobs orig_job_size_distribution = args.jobsize_distribution orig_wall_time_distribution = args.jobsize_distribution + orig_cpuutil_distribution = args.cpuutil_distribution + orig_gpuutil_distribution = args.gpuutil_distribution jobs = [] if len(args.jobsize_distribution) != 1 and sum(args.multimodal) != 1.0: raise Exception(f"Sum of --multimodal != 1.0 : {args.multimodal} == {sum(args.multimodal)}") - for i,(jsdist,wtdist,percentage) in enumerate(zip(args.jobsize_distribution,args.walltime_distribution,args.multimodal)): + for i,(jsdist,wtdist,cudist,gudist,percentage) in enumerate(zip(args.jobsize_distribution, + args.walltime_distribution, + args.cpuutil_distribution, + args.gpuutil_distribution, + args.multimodal)): args.numjobs = math.floor(total_jobs * percentage) args.jobsize_distribution = jsdist args.walltime_distribution = wtdist + args.cpuutil_distribution = cudist + args.gpuutil_distribution = gudist job_arrival_distribution_to_draw_from = self.job_arrival_distribution_draw_poisson match args.jobsize_distribution: @@ -160,8 +181,7 @@ class Workload: job_size_distribution_to_draw_from = self.job_size_distribution_draw_weibull case _: raise NotImplementedError(args.jobsize_distribution) - cpu_util_distribution_to_draw_from = self.cpu_utilization_distribution_draw_uniform - gpu_util_distribution_to_draw_from = self.gpu_utilization_distribution_draw_uniform + match args.walltime_distribution: case "weibull": wall_time_distribution_to_draw_from = self.wall_time_distribution_draw_weibull @@ -169,10 +189,29 @@ class Workload: wall_time_distribution_to_draw_from = self.wall_time_distribution_draw_normal case "uniform": wall_time_distribution_to_draw_from = self.wall_time_distribution_draw_uniform - case _: raise NotImplementedError(args.walltime_distribution) + match args.cpuutil_distribution: + case "uniform": + cpu_util_distribution_to_draw_from = self.cpu_utilization_distribution_draw_uniform + case "normal": + cpu_util_distribution_to_draw_from = self.cpu_utilization_distribution_draw_normal + case "weibull": + cpu_util_distribution_to_draw_from = self.cpu_utilization_distribution_draw_weibull + case _: + raise NotImplementedError(args.cpuutil_distribution) + + match args.gpuutil_distribution: + case "uniform": + gpu_util_distribution_to_draw_from = self.gpu_utilization_distribution_draw_uniform + case "normal": + gpu_util_distribution_to_draw_from = self.gpu_utilization_distribution_draw_normal + case "weibull": + gpu_util_distribution_to_draw_from = self.gpu_utilization_distribution_draw_weibull + case _: + raise NotImplementedError(args.gpuutil_distribution) + new_jobs = self.generate_jobs( job_arrival_distribution_to_draw_from=job_arrival_distribution_to_draw_from, job_size_distribution_to_draw_from=job_size_distribution_to_draw_from, @@ -184,6 +223,8 @@ class Workload: jobs.extend(new_jobs) args.numjobs = total_jobs args.jobsize_distribution = orig_job_size_distribution + args.cpuutil_distribution = orig_cpuutil_distribution + args.gpuutil_distribution = orig_gpuutil_distribution args.walltime_distribution = orig_wall_time_distribution return jobs @@ -204,8 +245,8 @@ class Workload: gpu_util = random.random() * config['GPUS_PER_NODE'] mu = (config['MAX_WALL_TIME'] + config['MIN_WALL_TIME']) / 2 sigma = (config['MAX_WALL_TIME'] - config['MIN_WALL_TIME']) / 6 - wall_time = truncated_normalvariate(mu, sigma, config['MIN_WALL_TIME'], config['MAX_WALL_TIME']) // 3600 * 3600 - time_limit = truncated_normalvariate(mu, sigma, wall_time, config['MAX_WALL_TIME']) // 3600 * 3600 + wall_time = truncated_normalvariate_int(mu, sigma, config['MIN_WALL_TIME'], config['MAX_WALL_TIME']) // 3600 * 3600 + time_limit = truncated_normalvariate_int(mu, sigma, wall_time, config['MAX_WALL_TIME']) // 3600 * 3600 end_state = determine_state(config['JOB_END_PROBS']) cpu_trace, gpu_trace = self.compute_traces(cpu_util, gpu_util, wall_time, config['TRACE_QUANTA']) priority = random.randint(0, MAX_PRIORITY) @@ -442,7 +483,7 @@ class Workload: return jobs -def plot_job_hist(jobs,dist_split=None): +def plot_job_hist(jobs,config=None,dist_split=None): # put args.multimodal in dist_split! split = [1.0] num_dist = 1 @@ -475,7 +516,7 @@ def plot_job_hist(jobs,dist_split=None): axs = [] col = [] col.append(fig_m.add_subplot(gs0[:100,:433])) - col.append(fig_m.add_subplot(gs0[400:,433:])) + col.append(fig_m.add_subplot(gs0[:100,433:])) axs.append(col.copy()) col = [] col.append(fig_m.add_subplot(gs0[100:,:433])) @@ -490,6 +531,27 @@ def plot_job_hist(jobs,dist_split=None): axs[1][0].scatter(x2, y,marker='.',c='lightblue',zorder=2) axs[1][0].scatter(x, y,zorder=3) + cpu_util = [x['cpu_trace'] for x in jobs] + gpu_util = [x['gpu_trace'] for x in jobs] + if not all([x == 0 for x in gpu_util]): + axs[0][1].scatter(cpu_util,gpu_util,zorder=2,marker='.',s=0.2) + axs[0][1].hist(gpu_util,bins=100,orientation='horizontal',zorder=1, density=True,color='tab:purple') + axs[0][1].axhline(np.mean(gpu_util), color='r', linewidth=1,zorder=3) + axs[0][1].set(ylim=[0,config['GPUS_PER_NODE']]) + axs[0][1].set_ylabel("gpu util") + axs[0][1].yaxis.set_label_coords(1.15, 0.5) + axs[0][1].yaxis.set_label_position("right") + axs[0][1].yaxis.tick_right() + else: + axs[0][1].set_yticks([]) + pass + axs[0][1].hist(cpu_util,bins=100,orientation='vertical',zorder=1, density=True,color='tab:cyan') + axs[0][1].axvline(np.mean(cpu_util), color='r', linewidth=1,zorder=3) + axs[0][1].set(xlim=[0,config['CPUS_PER_NODE']]) + axs[0][1].set_xlabel("cpu util") + axs[0][1].xaxis.set_label_coords(0.5,1.30) + axs[0][1].xaxis.set_label_position("top") + axs[0][1].xaxis.tick_top() axs[0][0].hist(x2,bins=max(1,min(100,(max(x2) - min(x)))), orientation='vertical',color='lightblue') axs[0][0].hist(x,bins=max(1,min(100,(max(x2) - min(x)))), orientation='vertical') axs[1][0].sharex(axs[0][0]) @@ -501,10 +563,7 @@ def plot_job_hist(jobs,dist_split=None): axs[0][0].set_xticks([]) axs[1][1].set_yticks([]) axs[0][1].spines['top'].set_color('white') - axs[0][1].set_yticks([]) - axs[0][1].set_xticks([]) axs[0][1].spines['right'].set_color('white') - axs[1][0].set_ylabel("nodes [N]") axs[1][0].set_xlabel("wall time [hh:mm]") minx_s = 0 @@ -525,6 +584,7 @@ def plot_job_hist(jobs,dist_split=None): axs[0][0].tick_params(axis="x", labelbottom=False) axs[1][1].tick_params(axis="y", labelleft=False) + # Submit_time and Wall_time duration = [x['wall_time'] for x in jobs] nodes_required = [x['nodes_required'] for x in jobs] submit_t = [x['submit_time'] for x in jobs] @@ -577,6 +637,7 @@ def plot_job_hist(jobs,dist_split=None): def add_workload_to_parser(parser): + choices = ['random', 'benchmark', 'peak', 'idle','synthetic'] parser.add_argument('-w', '--workload', type=str, choices=choices, default=choices[0], help='Type of synthetic workload') @@ -599,6 +660,25 @@ def add_workload_to_parser(parser): parser.add_argument("--walltime-weibull-shape", type=float, required=False, help="Walltime shape of weibull") parser.add_argument("--walltime-weibull-scale", type=float, required=False, help="Walltime scale of weibull") + + # Utilizations + parser.add_argument("--cpuutil-distribution", type=str, nargs="+", choices=['uniform','weibull','normal'], default=['uniform'], help='Distribution type') + + parser.add_argument("--cpuutil-normal-mean", type=float, required=False, help="Walltime mean (mu) for Normal distribution") + parser.add_argument("--cpuutil-normal-stddev", type=float, required=False, help="Walltime standard deviation (sigma) for Normal distribution") + + parser.add_argument("--cpuutil-weibull-shape", type=float, required=False, help="Walltime shape of weibull") + parser.add_argument("--cpuutil-weibull-scale", type=float, required=False, help="Walltime scale of weibull") + + parser.add_argument("--gpuutil-distribution", type=str, nargs="+", choices=['uniform','weibull','normal'], default=['uniform'], help='Distribution type') + + parser.add_argument("--gpuutil-normal-mean", type=float, required=False, help="Walltime mean (mu) for Normal distribution") + parser.add_argument("--gpuutil-normal-stddev", type=float, required=False, help="Walltime standard deviation (sigma) for Normal distribution") + + parser.add_argument("--gpuutil-weibull-shape", type=float, required=False, help="Walltime shape of weibull") + parser.add_argument("--gpuutil-weibull-scale", type=float, required=False, help="Walltime scale of weibull") + + parser.add_argument("--gantt-nodes", default=False, action='store_true', required=False, help="Print Gannt with nodes required as line thickness (default false)") return parser @@ -612,4 +692,4 @@ if __name__ == "__main__": workload = Workload(config) jobs = getattr(workload, args.workload)(args=args) - plot_job_hist(jobs, args.multimodal) + plot_job_hist(jobs, config=config, dist_split=args.multimodal) -- GitLab From ef56d131a01d2200ec9908033ac950a9dc6371c2 Mon Sep 17 00:00:00 2001 From: Matthias Maiterth Date: Fri, 30 May 2025 11:47:35 -0400 Subject: [PATCH 103/388] Fixed Flattening of utilization arrays for plotting and minor code cleanup --- raps/workload.py | 17 ++++------------- 1 file changed, 4 insertions(+), 13 deletions(-) diff --git a/raps/workload.py b/raps/workload.py index 1e9b757..69f1c85 100644 --- a/raps/workload.py +++ b/raps/workload.py @@ -27,9 +27,7 @@ JOB_END_PROBS : list import math import random import numpy as np -import argparse import matplotlib.pyplot as plt -import matplotlib.gridspec as gridspec from raps.job import job_dict @@ -99,14 +97,6 @@ class Workload: def wall_time_distribution_draw_weibull(self,args,config): return truncated_weibull(args.walltime_weibull_scale, args.walltime_weibull_shape, config['MIN_WALL_TIME'], config['MAX_WALL_TIME']) - #wall_time = random.weibullvariate(args.walltime_weibull_scale,args.walltime_weibull_shape) - ##wall_time = truncated_weibull(args.walltime_weibull_scale,args.walltime_weibull_shape) - - ##(config['MAX_WALL_TIME'] // 2) + config['MIN_WALL_TIME'], 1, - ## # (config['MAX_WALL_TIME'] // 4) + config['MIN_WALL_TIME'], - ## config['MIN_WALL_TIME'],config['MAX_WALL_TIME']) // 60 * 60 # to 1 minute - #return wall_time - def generate_jobs(self, *, job_arrival_distribution_to_draw_from, job_size_distribution_to_draw_from, @@ -507,7 +497,6 @@ def plot_job_hist(jobs,config=None,dist_split=None): ax_bot.axis('off') ax_bot.set_title('Submit Time + Wall Time') - #ax0 = fig_m.add_subplot(gs[:2,:]) #ax1 = fig_m.add_subplot(gs[2:,:]) @@ -532,7 +521,11 @@ def plot_job_hist(jobs,config=None,dist_split=None): axs[1][0].scatter(x, y,zorder=3) cpu_util = [x['cpu_trace'] for x in jobs] + if isinstance(cpu_util[0],np.ndarray): + cpu_util = np.concatenate(cpu_util).ravel() gpu_util = [x['gpu_trace'] for x in jobs] + if isinstance(gpu_util[0],np.ndarray): + gpu_util = np.concatenate(gpu_util).ravel() if not all([x == 0 for x in gpu_util]): axs[0][1].scatter(cpu_util,gpu_util,zorder=2,marker='.',s=0.2) axs[0][1].hist(gpu_util,bins=100,orientation='horizontal',zorder=1, density=True,color='tab:purple') @@ -637,7 +630,6 @@ def plot_job_hist(jobs,config=None,dist_split=None): def add_workload_to_parser(parser): - choices = ['random', 'benchmark', 'peak', 'idle','synthetic'] parser.add_argument('-w', '--workload', type=str, choices=choices, default=choices[0], help='Type of synthetic workload') @@ -678,7 +670,6 @@ def add_workload_to_parser(parser): parser.add_argument("--gpuutil-weibull-shape", type=float, required=False, help="Walltime shape of weibull") parser.add_argument("--gpuutil-weibull-scale", type=float, required=False, help="Walltime scale of weibull") - parser.add_argument("--gantt-nodes", default=False, action='store_true', required=False, help="Print Gannt with nodes required as line thickness (default false)") return parser -- GitLab From 20ef61d1d0e1c3b4bcf01c31d8172f5feaac606d Mon Sep 17 00:00:00 2001 From: Matthias Maiterth Date: Fri, 30 May 2025 17:39:29 -0400 Subject: [PATCH 104/388] Added save workload file as npz and load for re-play. Test and fix needed if other npz save and load funcationality still works, before merge! --- raps/telemetry.py | 12 ++++++++---- raps/utils.py | 35 +++++++++++++++++++++++++++++++++++ raps/workload.py | 6 ++++++ 3 files changed, 49 insertions(+), 4 deletions(-) diff --git a/raps/telemetry.py b/raps/telemetry.py index a4001d9..7fae145 100644 --- a/raps/telemetry.py +++ b/raps/telemetry.py @@ -46,14 +46,18 @@ class Telemetry: except: print("WARNING: Failed to load dataloader") - def save_snapshot(self, jobs: list, filename: str): + def save_snapshot(self,*, jobs: list, timestep_start, timestep_end, args, filename: str): """Saves a snapshot of the jobs to a compressed file. """ - np.savez_compressed(filename, jobs=jobs) + np.savez_compressed(filename, jobs=jobs, timestep_start=timestep_start, timestep_end=timestep_end, args=args) def load_snapshot(self, snapshot: str) -> list: """Reads a snapshot from a compressed file and returns the jobs.""" - jobs = np.load(snapshot, allow_pickle=True, mmap_mode='r') - return jobs['jobs'].tolist() + data = np.load(snapshot, allow_pickle=True, mmap_mode='r') + return (data['jobs'].tolist(), + data['timestep_start'], + data['timestep_end'], + data['args'].tolist() + ) def load_data(self, files): """Load telemetry data using custom data loaders.""" diff --git a/raps/utils.py b/raps/utils.py index f345d6a..0e80f61 100644 --- a/raps/utils.py +++ b/raps/utils.py @@ -9,6 +9,7 @@ generating random numbers, summarizing and expanding ranges, determining job sta from datetime import timedelta from enum import Enum +import os import hashlib import math import numpy as np @@ -353,6 +354,40 @@ def create_casename(prefix=''): return prefix + str(uuid.uuid4())[:7] +def create_file_indexed(prefix:str, path:str = None, ending:str = None, create=True) -> str: + if path is not None: + os.makedirs(path, exist_ok=True) + else: + path = "./" + index = 1 + while True: + if ending: + filename = f"{prefix}_{index:03d}.{ending}" + else: + filename = f"{prefix}_{index:03d}" + filepath = os.path.join(path, filename) + if not os.path.exists(filepath): + if create: + open(filepath, "w").close() + return filepath + index += 1 + + +def create_dir_indexed(dir:str, path:str = None) -> str: + if dir is None: + raise ValueError("'dir' cannot be none") + if path is None: + path = os.getcwd() + index = 1 + while True: + dirname = f"{dir}_{index:03d}" + fullpath = os.path.join(path,dirname) + if not os.path.exists(fullpath): + os.makedirs(fullpath,exist_ok=False) + return fullpath + index += 1 + + def next_arrival(lambda_rate,reset=False): if not hasattr(next_arrival, 'next_time') or reset is True: # Initialize the first time it's called diff --git a/raps/workload.py b/raps/workload.py index 69f1c85..e826142 100644 --- a/raps/workload.py +++ b/raps/workload.py @@ -30,6 +30,7 @@ import numpy as np import matplotlib.pyplot as plt from raps.job import job_dict +from raps.utils import create_file_indexed, create_dir_indexed JOB_NAMES = ["LAMMPS", "GROMACS", "VASP", "Quantum ESPRESSO", "NAMD",\ "OpenFOAM", "WRF", "AMBER", "CP2K", "nek5000", "CHARMM",\ @@ -684,3 +685,8 @@ if __name__ == "__main__": workload = Workload(config) jobs = getattr(workload, args.workload)(args=args) plot_job_hist(jobs, config=config, dist_split=args.multimodal) + if args.output: + filename = create_file_indexed('wl',create=False) + timestep_start = min([x['submit_time'] for x in jobs]) + timestep_end = math.ceil(max([x['submit_time'] for x in jobs]) + max([x['wall_time'] for x in jobs])) + np.savez_compressed(filename,jobs=jobs,timestep_start=timestep_start, timestep_end=timestep_end, args=args) -- GitLab From 36605f4520236e6e1bb563a25a7f5658b2eb63e4 Mon Sep 17 00:00:00 2001 From: Wes Brewer Date: Tue, 3 Jun 2025 14:16:23 -0400 Subject: [PATCH 105/388] Fix issue with loading .npz file --- raps/telemetry.py | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/raps/telemetry.py b/raps/telemetry.py index c94a06c..97c76a4 100644 --- a/raps/telemetry.py +++ b/raps/telemetry.py @@ -52,8 +52,20 @@ class Telemetry: def load_snapshot(self, snapshot: str) -> list: """Reads a snapshot from a compressed file and returns the jobs.""" - jobs, start_timestep, end_timestep, args = np.load(snapshot, allow_pickle=True, mmap_mode='r') # This is untested and may need fixing! - return jobs['jobs'].tolist(), start_timestep, end_timestep, args + data = np.load(snapshot, allow_pickle=True, mmap_mode='r') + + # 'data["jobs"]' is already a 1-D ndarray of dicts, so just turn it into a Python list: + jobs_arr = data["jobs"] # e.g. array([ {...}, {...}, … ], dtype=object) + jobs_list = jobs_arr.tolist() # now it’s a Python list of job-dicts + + # If 'start_timestep'/'end_timestep' are 0-d arrays, convert to int + start_timestep = int(data["start_timestep"]) + end_timestep = int(data["end_timestep"]) + + # 'data["args"]' is a 0-d object array containing a Namespace, so do .item() to pull it out: + args_from_file = data["args"].item() + + return jobs_list, start_timestep, end_timestep, args_from_file def load_data(self, files): """Load telemetry data using custom data loaders.""" -- GitLab From f9351a6b77270092350432b0d8c0da4dd40ece86 Mon Sep 17 00:00:00 2001 From: Wes Brewer Date: Tue, 3 Jun 2025 14:51:04 -0400 Subject: [PATCH 106/388] Fix bug where debug -d no longer was working to suppress Rich layout --- raps/ui.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/raps/ui.py b/raps/ui.py index d66a1aa..5f15a9d 100644 --- a/raps/ui.py +++ b/raps/ui.py @@ -430,8 +430,7 @@ class LayoutManager: data.power_df, data.p_flops, data.g_flops_w, data.system_util, uncertainties=uncertainties, ) - if False: - self.render() + self.update_progress(1) self.update_scheduled_jobs(data.running + data.queue) @@ -452,16 +451,17 @@ class LayoutManager: data.system_util, uncertainties=uncertainties, ) - def render(self): - if not self.debug: - self.console.clear() - self.console.print(self.layout) - def run(self, jobs, timestep_start, timestep_end): """ Runs the UI, blocking until the simulation is complete """ - with Live(self.layout, refresh_per_second=5): + if self.debug: + # Debug mode: don’t use Live—just iterate without rendering for data in self.engine.run_simulation(jobs, timestep_start, timestep_end): self.update(data) + else: + # Normal UI mode: use Live to render layout automatically + with Live(self.layout, refresh_per_second=5): + for data in self.engine.run_simulation(jobs, timestep_start, timestep_end): + self.update(data) def run_stepwise(self, jobs, timestep_start, timestep_end): """ Prepares the UI and returns a generator for the simulation """ -- GitLab From 1a3aea24242544289d10ad4bed89073d0ba343d0 Mon Sep 17 00:00:00 2001 From: Wes Brewer Date: Tue, 3 Jun 2025 14:53:36 -0400 Subject: [PATCH 107/388] Add in code for fat-tree topology, but slowdown not working yet for that case --- config/lassen/network.json | 4 +- raps/engine.py | 26 ++++++++--- raps/network.py | 92 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 114 insertions(+), 8 deletions(-) diff --git a/config/lassen/network.json b/config/lassen/network.json index 51a0d71..f192332 100644 --- a/config/lassen/network.json +++ b/config/lassen/network.json @@ -1,7 +1,5 @@ { - "TOPOLOGY": "fat-tree", - "NETWORK_MODEL": "capacity", - "UPLINK_CAPACITY": 10, + "TOPOLOGY": "capacity", "NETWORK_MAX_BW": 100e9, "LATENCY": 1 } diff --git a/raps/engine.py b/raps/engine.py index b8c7e17..74bf7fa 100644 --- a/raps/engine.py +++ b/raps/engine.py @@ -6,6 +6,7 @@ import numpy as np from .job import Job, JobState from .policy import PolicyType from .network import network_utilization, network_congestion, network_slowdown +from .network import build_fattree, link_loads_for_job, worst_link_util from .utils import summarize_ranges, expand_ranges, get_utilization from .utils import sum_values, min_value, max_value from .resmgr import ResourceManager @@ -82,6 +83,13 @@ class Engine: f", with policy {self.scheduler.policy} "\ f"and backfill {self.scheduler.bfpolicy}") + # Network + self.topology = self.config.get('TOPOLOGY') + # if fat-tree, build the graph once + if self.topology == "fat-tree": + k = config.get("FATTREE_K", 8) + self.net_graph = build_fattree(k) + self.max_link_bw = self.config.get("NETWORK_MAX_BW") def add_running_jobs_to_queue(self, jobs_to_submit: List): """ @@ -208,17 +216,17 @@ class Engine: # Similar with the first time_quanta index: If the job started # in the past and no trace if there, read index 0 until values # are available. - if isinstance(job.cpu_trace,list) or isinstance(job.cpu_trace,np.ndarray): + if isinstance(job.cpu_trace, list) or isinstance(job.cpu_trace, np.ndarray): if time_quanta_index < len(job.cpu_trace): cpu_util = get_utilization(job.cpu_trace, time_quanta_index) else: cpu_util = get_utilization(job.cpu_trace, len(job.cpu_trace) - 1) - elif isinstance(job.cpu_trace,float) or isinstance(job.cpu_trace,int): + elif isinstance(job.cpu_trace, float) or isinstance(job.cpu_trace, int): cpu_util = job.cpu_trace else: raise NotImplementedError() - if isinstance(job.gpu_trace,list) or isinstance(job.gpu_trace, np.ndarray): + if isinstance(job.gpu_trace, list) or isinstance(job.gpu_trace, np.ndarray): if time_quanta_index < len(job.gpu_trace): gpu_util = get_utilization(job.gpu_trace, time_quanta_index) else: @@ -237,11 +245,19 @@ class Engine: net_tx = get_utilization(job.ntx_trace, time_quanta_index) net_rx = get_utilization(job.nrx_trace, time_quanta_index) net_util = network_utilization(net_tx, net_rx, max_throughput) - net_cong = network_congestion(net_tx, net_rx, max_throughput) + + if self.topology == "fat-tree": + loads = link_loads_for_job(self.net_graph, job.requested_nodes, net_tx) + net_cong = worst_link_util(loads, max_throughput) + else: + # capacity model: simple α+β or normalized overload + net_cong = network_congestion(net_tx, net_rx, max_throughput) + + # collect for stats net_tx_list.append(net_tx) net_rx_list.append(net_rx) if self.debug: - print("time:", self.current_time, "net util:", net_util) + print("time:", self.current_time, "net util:", net_util, "net congestion:", net_cong) print("jid", job.id, "net_tx", net_tx) print("jid", job.id, "net_rx", net_tx) net_congs.append(net_cong) diff --git a/raps/network.py b/raps/network.py index 1f8c79d..f1d8327 100644 --- a/raps/network.py +++ b/raps/network.py @@ -1,3 +1,5 @@ +import networkx as nx + def network_congestion(tx, rx, max_throughput): """ Overload factor ≥0: average of send/recv NOT clamped. @@ -28,3 +30,93 @@ def network_slowdown(current_throughput, max_throughput): return 1.0 else: return current_throughput / max_throughput + +def build_fattree(k): + """ + Build a k-ary fat-tree: + - k pods + - each pod has k/2 edge switches, k/2 agg switches + - core layer has (k/2)^2 core switches + - each edge switch connects to k/2 hosts + Returns a NetworkX Graph where: + - hosts are named "h_{pod}_{edge}_{i}" + - edge switches "e_{pod}_{edge}" + - agg switches "a_{pod}_{agg}" + - core switches "c_{i}_{j}" + """ + G = nx.Graph() + # core + num_core = (k//2)**2 + for i in range(k//2): + for j in range(k//2): + core = f"c_{i}_{j}" + G.add_node(core, type="core") + # pods + for pod in range(k): + # agg switches + for agg in range(k//2): + a = f"a_{pod}_{agg}" + G.add_node(a, type="agg") + # connect to all core switches in column agg + for i in range(k//2): + core = f"c_{agg}_{i}" + G.add_edge(a, core) + # edge switches + hosts + for edge in range(k//2): + e = f"e_{pod}_{edge}" + G.add_node(e, type="edge") + # connect edge→each agg in this pod + for agg in range(k//2): + a = f"a_{pod}_{agg}" + G.add_edge(e, a) + # connect hosts + for h in range(k//2): + host = f"h_{pod}_{edge}_{h}" + G.add_node(host, type="host") + G.add_edge(e, host) + return G + +def all_to_all_paths(G, hosts): + """ + Given a list of host names, return shortest‐paths for every unordered pair. + """ + paths = [] + for i in range(len(hosts)): + for j in range(i+1, len(hosts)): + src, dst = hosts[i], hosts[j] + p = nx.shortest_path(G, src, dst) + paths.append((src, dst, p)) + return paths + +def link_loads_for_job(G, job_hosts, tx_volume_bytes): + """ + Distribute tx_volume_bytes from each host equally to all its peers; + accumulate per-link loads and return a dict {(u,v):bytes, …}. + """ + paths = all_to_all_paths(G, job_hosts) + loads = {edge: 0.0 for edge in G.edges()} + # each host sends tx_volume_bytes to each of the (N-1) peers + for src in job_hosts: + per_peer = tx_volume_bytes / (len(job_hosts)-1) + # find paths where src is the sender + for (s, d, p) in paths: + if s != src: continue + # add per_peer to every link on p + for u, v in zip(p, p[1:]): + # ensure ordering matches loads keys + edge = (u, v) if (u, v) in loads else (v, u) + loads[edge] += per_peer + return loads + +def worst_link_util(loads, throughput): + """ + Given loads in **bytes** and capacity in **bits/sec**, convert: + util = (bytes * 8) / throughput + Return the maximum util over all links. + """ + max_util = 0.0 + for edge, byte_load in loads.items(): + util = (byte_load * 8) / throughput + if util > max_util: + max_util = util + return max_util -- GitLab From de00a7398406600cce452d1041871ed419a88728 Mon Sep 17 00:00:00 2001 From: Wes Brewer Date: Tue, 3 Jun 2025 16:07:56 -0400 Subject: [PATCH 108/388] Get fat-tree network topology working --- config/lassen/network.json | 5 +++-- raps/dataloaders/lassen.py | 9 +++++++-- raps/engine.py | 39 +++++++++++++++++++++++++------------- raps/network.py | 12 ++++++++++++ raps/ui.py | 2 +- 5 files changed, 49 insertions(+), 18 deletions(-) diff --git a/config/lassen/network.json b/config/lassen/network.json index f192332..ca8ddb9 100644 --- a/config/lassen/network.json +++ b/config/lassen/network.json @@ -1,5 +1,6 @@ { - "TOPOLOGY": "capacity", - "NETWORK_MAX_BW": 100e9, + "TOPOLOGY": "fat-tree", + "NETWORK_MAX_BW": 10e9, + "FATTREE_K": 16, "LATENCY": 1 } diff --git a/raps/dataloaders/lassen.py b/raps/dataloaders/lassen.py index e101c95..54c416e 100644 --- a/raps/dataloaders/lassen.py +++ b/raps/dataloaders/lassen.py @@ -146,7 +146,7 @@ def load_data_from_df(allocation_df, node_df, step_df, **kwargs): # The multiplication by GPUS_PER_NODE fixes this but is patch-work! TODO Refactor and fix gpu_util = power_to_utilization(gpu_power,gpu_min_power,gpu_max_power) # gpu_util should to be between 0 an 4 (4 GPUs), where 4 is all GPUs full utilization. - gpu_trace = gpu_util * config['GPUS_PER_NODE'] + gpu_util_scalar = gpu_util * config['GPUS_PER_NODE'] # Compute CPU power from CPU usage time # CPU usage is reported per core, while we need it in the range [0 to CPUS_PER_NODE] @@ -163,11 +163,16 @@ def load_data_from_df(allocation_df, node_df, step_df, **kwargs): # cpu_util should be between 0 an 2 (2 CPUs) - cpu_trace = cpu_util + cpu_util_scalar = cpu_util # TODO use total energy for validation # Only Node Energy and GPU Energy is reported! # total_energy = node_data['energy'].sum() # Joules + # Expand into lists of length=samples + cpu_trace = [cpu_util_scalar] * samples + gpu_trace = [gpu_util_scalar] * samples + + # Network utilization - since values are given in octets / quarter of a byte, multiply by 4 to get bytes total_ib_tx = 4 * node_data['ib_tx'].sum() if node_data['ib_tx'].values.size > 0 else 0 total_ib_rx = 4 * node_data['ib_rx'].sum() if node_data['ib_rx'].values.size > 0 else 0 diff --git a/raps/engine.py b/raps/engine.py index 74bf7fa..7129c4d 100644 --- a/raps/engine.py +++ b/raps/engine.py @@ -6,7 +6,7 @@ import numpy as np from .job import Job, JobState from .policy import PolicyType from .network import network_utilization, network_congestion, network_slowdown -from .network import build_fattree, link_loads_for_job, worst_link_util +from .network import build_fattree, link_loads_for_job, worst_link_util, node_id_to_host_name from .utils import summarize_ranges, expand_ranges, get_utilization from .utils import sum_values, min_value, max_value from .resmgr import ResourceManager @@ -87,8 +87,10 @@ class Engine: self.topology = self.config.get('TOPOLOGY') # if fat-tree, build the graph once if self.topology == "fat-tree": - k = config.get("FATTREE_K", 8) - self.net_graph = build_fattree(k) + print("building fat tree...") + self.fattree_k = config.get("FATTREE_K") + self.net_graph = build_fattree(self.fattree_k) + print(self.net_graph) self.max_link_bw = self.config.get("NETWORK_MAX_BW") def add_running_jobs_to_queue(self, jobs_to_submit: List): @@ -246,11 +248,20 @@ class Engine: net_rx = get_utilization(job.nrx_trace, time_quanta_index) net_util = network_utilization(net_tx, net_rx, max_throughput) - if self.topology == "fat-tree": - loads = link_loads_for_job(self.net_graph, job.requested_nodes, net_tx) + if job.nodes_required == 1: + net_cong = 0 + + elif self.topology == "fat-tree": + + # Map integers to hostnames + host_list = [node_id_to_host_name(n, self.fattree_k) for n in job.scheduled_nodes] + loads = link_loads_for_job(self.net_graph, host_list, net_tx) net_cong = worst_link_util(loads, max_throughput) - else: - # capacity model: simple α+β or normalized overload + + if self.debug: + print(" fat-tree hosts:", host_list) + + else: # capacity model: simple α+β or normalized overload net_cong = network_congestion(net_tx, net_rx, max_throughput) # collect for stats @@ -264,17 +275,17 @@ class Engine: net_utils.append(net_util) # Get the maximum allowed bandwidth from the configuration. - if net_cong > 1: #network_congestion_threshold: + if net_cong > 1: if self.debug: print(f"congested net_cong: {net_cong}, max_throughput: {max_throughput}") print(f"length of {len(job.gpu_trace)} before dilation") throughput = net_tx + net_rx slowdown_factor = network_slowdown(throughput, max_throughput) - #slowdown_factor = min(slowdown_factor, 2) # set max slowdown factor - # Optionally, only apply slowdown once per job to avoid compounding the effect. + if self.debug: print("***", hasattr(job, 'dilated'), throughput, max_throughput, slowdown_factor) - #if not hasattr(job, 'dilated') or not job.dilated: + + # Only apply slowdown once per job to avoid compounding the effect. if not job.dilated: if self.debug: print(f"Applying slowdown factor {slowdown_factor:.2f} to job {job.id} due to network congestion") @@ -450,14 +461,16 @@ class Engine: jobs += [job for job in all_jobs if job['submit_time'] <= timestep + batch_window] all_jobs[:] = [job for job in all_jobs if job['submit_time'] > timestep + batch_window] - # Start Siulation loop: + # Start Simulation loop: # 1. Cleanup old jobs completed_jobs, newly_downed_nodes = self.prepare_timestep(replay) # 2. Identify eligible jobs and add them to the queue. has_new_additions = self.add_eligible_jobs_to_queue(jobs) + # 3. Schedule jobs that are now in the queue. - self.scheduler.schedule(self.queue, self.running, self.current_time,accounts=self.accounts, sorted=(not has_new_additions)) + self.scheduler.schedule(self.queue, self.running, self.current_time, \ + accounts=self.accounts, sorted=(not has_new_additions)) # Stop the simulation if no more jobs are running or in the queue or in the job list. if autoshutdown and not self.queue and not self.running and not self.replay and not all_jobs and not jobs: diff --git a/raps/network.py b/raps/network.py index f1d8327..3876677 100644 --- a/raps/network.py +++ b/raps/network.py @@ -120,3 +120,15 @@ def worst_link_util(loads, throughput): if util > max_util: max_util = util return max_util + +def node_id_to_host_name(node_id: int, k: int) -> str: + """ + Map a 0-based integer node_id into one of the fat-tree hosts "h_{pod}_{edge}_{h}". + There are (k^3/4) total hosts, assigned in ascending order across pod → edge → h. + """ + hosts_per_pod = (k // 2) * (k // 2) # e.g. for k=8, hosts_per_pod = 16 + pod = node_id // hosts_per_pod + offset = node_id % hosts_per_pod + edge = offset // (k // 2) + idx = offset % (k // 2) + return f"h_{pod}_{edge}_{idx}" diff --git a/raps/ui.py b/raps/ui.py index 5f15a9d..f9cf1cf 100644 --- a/raps/ui.py +++ b/raps/ui.py @@ -166,7 +166,7 @@ class LayoutManager: # Define columns with header styles columns = [ "Time", "Jobs Running", "Jobs Queued", - "Active Nodes", "Free Nodes", "Down Nodes", "Net Util (%)", "SPJ" + "Active Nodes", "Free Nodes", "Down Nodes", "Net Util (%)", "Slowdown per job" ] table = Table(header_style="bold magenta", expand=True) for col in columns: -- GitLab From f206f754d27714d4762573f5ff550ec0fd04d4c1 Mon Sep 17 00:00:00 2001 From: Wes Brewer Date: Tue, 3 Jun 2025 16:37:18 -0400 Subject: [PATCH 109/388] Set network utilization to 0 for jobs that only use a single node --- raps/engine.py | 1 + 1 file changed, 1 insertion(+) diff --git a/raps/engine.py b/raps/engine.py index 7129c4d..2fed88b 100644 --- a/raps/engine.py +++ b/raps/engine.py @@ -250,6 +250,7 @@ class Engine: if job.nodes_required == 1: net_cong = 0 + net_util = 0 elif self.topology == "fat-tree": -- GitLab From ced2b5ad2e731b912406fb93e8c4e1c0deaebded Mon Sep 17 00:00:00 2001 From: Wes Brewer Date: Wed, 4 Jun 2025 14:04:26 -0400 Subject: [PATCH 110/388] Fix some spelling mistakes in comments --- raps/engine.py | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/raps/engine.py b/raps/engine.py index 2fed88b..0e1496e 100644 --- a/raps/engine.py +++ b/raps/engine.py @@ -87,7 +87,7 @@ class Engine: self.topology = self.config.get('TOPOLOGY') # if fat-tree, build the graph once if self.topology == "fat-tree": - print("building fat tree...") + print("building fat-tree...") self.fattree_k = config.get("FATTREE_K") self.net_graph = build_fattree(self.fattree_k) print(self.net_graph) @@ -95,12 +95,11 @@ class Engine: def add_running_jobs_to_queue(self, jobs_to_submit: List): """ - Mofifies jobs_to_submit - and self.queue + Modifies jobs_to_submit and self.queue This is a preparatory step and should only be called before the main loop of run_simulation. - Adds running jobs to the queueu, and removes them from the jobs_to_submit + Adds running jobs to the queue, and removes them from the jobs_to_submit jobs_to_submit still holds the jobs that need be submitted in the future. """ # Build a list of jobs whose start_time is <= current_time. @@ -116,10 +115,9 @@ class Engine: def add_eligible_jobs_to_queue(self, jobs_to_submit: List): """ - Mofifies jobs_to_submit - and self.queue + Modifies jobs_to_submit and self.queue - Adds eligible jobs to the queueu, and removes them from the jobs_to_submit + Adds eligible jobs to the queue, and removes them from the jobs_to_submit jobs_to_submit still holds the jobs that need be submitted in the future. """ # Build a list of jobs whose submit_time is <= current_time. -- GitLab From d85204481fd2ce3754e87ddde47cd468e8b087a6 Mon Sep 17 00:00:00 2001 From: Wes Brewer Date: Wed, 4 Jun 2025 14:33:07 -0400 Subject: [PATCH 111/388] Add "dragonfly" topology --- config/lassen/network.json | 5 ++- raps/engine.py | 49 ++++++++++++++++++++--- raps/network.py | 80 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 127 insertions(+), 7 deletions(-) diff --git a/config/lassen/network.json b/config/lassen/network.json index ca8ddb9..4cd2676 100644 --- a/config/lassen/network.json +++ b/config/lassen/network.json @@ -1,6 +1,9 @@ { - "TOPOLOGY": "fat-tree", + "TOPOLOGY": "dragonfly", "NETWORK_MAX_BW": 10e9, "FATTREE_K": 16, + "DRAGONFLY_D": 11, + "DRAGONFLY_A": 9, + "DRAGONFLY_P": 8, "LATENCY": 1 } diff --git a/raps/engine.py b/raps/engine.py index 0e1496e..b2ad02f 100644 --- a/raps/engine.py +++ b/raps/engine.py @@ -5,13 +5,21 @@ import numpy as np from .job import Job, JobState from .policy import PolicyType -from .network import network_utilization, network_congestion, network_slowdown -from .network import build_fattree, link_loads_for_job, worst_link_util, node_id_to_host_name from .utils import summarize_ranges, expand_ranges, get_utilization from .utils import sum_values, min_value, max_value from .resmgr import ResourceManager from .schedulers import load_scheduler - +from .network import ( + network_utilization, + network_congestion, + network_slowdown, + build_fattree, + link_loads_for_job, + worst_link_util, + node_id_to_host_name, + build_dragonfly, + dragonfly_node_id_to_host_name, +) @dataclasses.dataclass class TickData: @@ -87,10 +95,23 @@ class Engine: self.topology = self.config.get('TOPOLOGY') # if fat-tree, build the graph once if self.topology == "fat-tree": - print("building fat-tree...") + print("building fat-tree graph...") self.fattree_k = config.get("FATTREE_K") self.net_graph = build_fattree(self.fattree_k) print(self.net_graph) + elif self.topology == "dragonfly": + print("building dragonfly graph...") + D = config["DRAGONFLY_D"] # groups + A = config["DRAGONFLY_A"] # routers per group + P = config["DRAGONFLY_P"] # hosts per router + self.net_graph = build_dragonfly(D, A, P) + print(self.net_graph) + + real_ids = self.resource_manager.available_nodes + real_ids.sort() + self.real_to_fat_idx = {rid: idx for idx, rid in enumerate(real_ids)} + # e.g. real_to_fat_idx[10] = 0, real_to_fat_idx[11] = 1, etc., up to 791 → 791 + self.max_link_bw = self.config.get("NETWORK_MAX_BW") def add_running_jobs_to_queue(self, jobs_to_submit: List): @@ -246,12 +267,11 @@ class Engine: net_rx = get_utilization(job.nrx_trace, time_quanta_index) net_util = network_utilization(net_tx, net_rx, max_throughput) - if job.nodes_required == 1: + if job.nodes_required <= 1: net_cong = 0 net_util = 0 elif self.topology == "fat-tree": - # Map integers to hostnames host_list = [node_id_to_host_name(n, self.fattree_k) for n in job.scheduled_nodes] loads = link_loads_for_job(self.net_graph, host_list, net_tx) @@ -260,6 +280,23 @@ class Engine: if self.debug: print(" fat-tree hosts:", host_list) + elif self.topology == "dragonfly": + D = self.config["DRAGONFLY_D"] + A = self.config["DRAGONFLY_A"] + P = self.config["DRAGONFLY_P"] + + host_list = [] + for real_n in job.scheduled_nodes: + fat_idx = self.real_to_fat_idx[real_n] # contiguous in [0..(D*A*P−1)] + host_list.append(dragonfly_node_id_to_host_name(fat_idx, D, A, P)) + if self.debug: + print(" dragonfly hosts:", host_list) + ##if len(host_list) <= 1: + # net_cong = 0.0 + #else: + loads = link_loads_for_job(self.net_graph, host_list, net_tx) + net_cong = worst_link_util(loads, max_throughput) + else: # capacity model: simple α+β or normalized overload net_cong = network_congestion(net_tx, net_rx, max_throughput) diff --git a/raps/network.py b/raps/network.py index 3876677..fa608a1 100644 --- a/raps/network.py +++ b/raps/network.py @@ -1,4 +1,6 @@ import networkx as nx +from itertools import combinations + def network_congestion(tx, rx, max_throughput): """ @@ -31,6 +33,7 @@ def network_slowdown(current_throughput, max_throughput): else: return current_throughput / max_throughput + def build_fattree(k): """ Build a k-ary fat-tree: @@ -76,6 +79,7 @@ def build_fattree(k): G.add_edge(e, host) return G + def all_to_all_paths(G, hosts): """ Given a list of host names, return shortest‐paths for every unordered pair. @@ -88,6 +92,7 @@ def all_to_all_paths(G, hosts): paths.append((src, dst, p)) return paths + def link_loads_for_job(G, job_hosts, tx_volume_bytes): """ Distribute tx_volume_bytes from each host equally to all its peers; @@ -108,6 +113,7 @@ def link_loads_for_job(G, job_hosts, tx_volume_bytes): loads[edge] += per_peer return loads + def worst_link_util(loads, throughput): """ Given loads in **bytes** and capacity in **bits/sec**, convert: @@ -121,6 +127,7 @@ def worst_link_util(loads, throughput): max_util = util return max_util + def node_id_to_host_name(node_id: int, k: int) -> str: """ Map a 0-based integer node_id into one of the fat-tree hosts "h_{pod}_{edge}_{h}". @@ -132,3 +139,76 @@ def node_id_to_host_name(node_id: int, k: int) -> str: edge = offset // (k // 2) idx = offset % (k // 2) return f"h_{pod}_{edge}_{idx}" + + +def build_dragonfly(D: int, A: int, P: int) -> nx.Graph: + """ + Build a “simple” k-ary Dragonfly with: + D = # of groups + A = # of routers per group + P = # of hosts (endpoints) per router + + Naming convention: + - Router nodes: "r_{g}_{r}" with g ∈ [0..D−1], r ∈ [0..A−1] + - Host nodes: "h_{g}_{r}_{p}" with p ∈ [0..P−1] + + Topology: + 1. All routers within a group form a full clique. + 2. Each router r in group g has exactly one “global link” to router r in each other group. + 3. Each router r in group g attaches to P hosts ("h_{g}_{r}_{0..P−1}"). + """ + G = nx.Graph() + + # 1) Create all router nodes + for g in range(D): + for r in range(A): + router = f"r_{g}_{r}" + G.add_node(router, type="router", group=g, index=r) + + # 2) Intra‐group full mesh of routers + for g in range(D): + routers_in_group = [f"r_{g}_{r}" for r in range(A)] + for u, v in combinations(routers_in_group, 2): + G.add_edge(u, v) + + # 3) Inter‐group “one‐to‐one” global links + # (router index r in group g → router index r in group g2) + for g1 in range(D): + for g2 in range(g1 + 1, D): + for r in range(A): + u = f"r_{g1}_{r}" + v = f"r_{g2}_{r}" + G.add_edge(u, v) + + # 4) Attach hosts to each router + for g in range(D): + for r in range(A): + router = f"r_{g}_{r}" + for p in range(P): + host = f"h_{g}_{r}_{p}" + G.add_node(host, type="host", group=g, router=r, index=p) + G.add_edge(router, host) + + return G + + +def dragonfly_node_id_to_host_name(fat_idx: int, D: int, A: int, P: int) -> str: + """ + Given a contiguous fat‐index ∈ [0..(D*A*P − 1)], return "h_{g}_{r}_{p}". + Hosts are laid out in order: + 0..(P−1) → group=0, router=0, p=0..P−1 + P..2P−1 → group=0, router=1, p=0..P−1 + … + (A*P)..(2A*P−1) → group=1, router=0, … + In general: + host_offset = fat_idx % P + router_offset = (fat_idx // P) % A + group = fat_idx // (A*P) + """ + total_hosts = D * A * P + assert 0 <= fat_idx < total_hosts, "fat_idx out of range" + + host_offset = fat_idx % P + router_group = (fat_idx // P) % A + pod = fat_idx // (A * P) + return f"h_{pod}_{router_group}_{host_offset}" -- GitLab From f00276d539f6451ff424ff6a408bd4b5bf9438af Mon Sep 17 00:00:00 2001 From: Wes Brewer Date: Wed, 4 Jun 2025 14:55:54 -0400 Subject: [PATCH 112/388] When running network sims, replace segments column with slowdown --- config/lassen/network.json | 2 +- raps/engine.py | 2 ++ raps/ui.py | 62 ++++++++++++++++++++++++++++++-------- 3 files changed, 53 insertions(+), 13 deletions(-) diff --git a/config/lassen/network.json b/config/lassen/network.json index 4cd2676..02c835f 100644 --- a/config/lassen/network.json +++ b/config/lassen/network.json @@ -1,5 +1,5 @@ { - "TOPOLOGY": "dragonfly", + "TOPOLOGY": "capacity", "NETWORK_MAX_BW": 10e9, "FATTREE_K": 16, "DRAGONFLY_D": 11, diff --git a/raps/engine.py b/raps/engine.py index b2ad02f..2ce94a7 100644 --- a/raps/engine.py +++ b/raps/engine.py @@ -332,6 +332,8 @@ class Engine: else: slowdown_factor = 1 + job.slowdown_factor = slowdown_factor + slowdown_factors.append(slowdown_factor) else: diff --git a/raps/ui.py b/raps/ui.py index f9cf1cf..fc2314b 100644 --- a/raps/ui.py +++ b/raps/ui.py @@ -19,6 +19,7 @@ class LayoutManager: def __init__(self, layout_type, engine: Engine, total_timesteps=0, debug=None, **config): self.engine = engine self.config = config + self.topology = self.engine.config.get("TOPOLOGY", "none") self.console = Console() self.layout = Layout() self.hascooling = layout_type == "layout2" @@ -102,8 +103,17 @@ class LayoutManager: show_nodes : bool, optional Flag indicating whether to display node information (default is False). """ - # Define columns with header styles - columns = ["JOBID", "WALL TIME", "NAME", "ACCOUNT", "ST", "NODES", "NODE SEGMENTS"] + + # Decide whether to show "SLOWDOWN" (if real topology) or "NODE SEGMENTS" (if capacity/none) + show_slowdown = (self.topology in ("fat-tree", "dragonfly", "capacity")) + + # Build the column headers + columns = ["JOBID", "WALL TIME", "NAME", "ACCOUNT", "ST", "NODES"] + if show_slowdown: + columns.append("SLOWDOWN") + else: + columns.append("NODE SEGMENTS") + if show_nodes: columns.append("NODELIST") columns.append("TIME") @@ -113,32 +123,60 @@ class LayoutManager: for col in columns: table.add_column(col, justify="center") - # Add data rows with white values + # Add data rows for job in jobs: - node_segments = summarize_ranges(job.scheduled_nodes) - if show_nodes: + # Number of requested nodes as a string + n_nodes = str(job.nodes_required) + + if show_slowdown: + # Each Job should have job.net_congestion set in Engine.tick() + slow = getattr(job, "slowdown_factor", 0.0) + # Format as "1.23×" (if ≤1.00 you will see "1.00×") + slowdown_str = f"{slow:.2f}×" + col_slow = slowdown_str + else: + # Fallback to original NODE SEGMENTS logic + node_segments = summarize_ranges(job.scheduled_nodes) + if show_nodes: + if len(node_segments) > 4: + nodes_display = ", ".join(node_segments[:2] + [ELLIPSES] + node_segments[-2:]) + else: + nodes_display = ", ".join(node_segments) + col_slow = nodes_display # reused variable name for simplicity + else: + col_slow = str(len(node_segments)) + + # If show_nodes is True, we need to append NODELIST as well + if show_nodes and not show_slowdown: + # use the same node_segments variable to build the list of nodes if len(node_segments) > 4: nodes_display = ", ".join(node_segments[:2] + [ELLIPSES] + node_segments[-2:]) else: nodes_display = ", ".join(node_segments) - else: - nodes_display = str(len(node_segments)) + col_nodelist = nodes_display + # Build the row row = [ str(job.id).zfill(5), convert_seconds(job.wall_time), str(job.name), str(job.account), job.state.value, - str(job.nodes_required), - nodes_display, - convert_seconds(job.running_time) + n_nodes, + col_slow, ] - if job.dilated: + if show_nodes: + # Insert NODELIST immediately after col_slow (whether NODELIST or SLOWDOWN) + row.append(col_nodelist) + + # Finally, append the running‐time column + row.append(convert_seconds(job.running_time)) + + # If the job has been flagged as “dilated”, show its row in yellow + if getattr(job, "dilated", False): row = [f"[yellow]{x}[/yellow]" for x in row] - # Add the row with the 'white' style applied to the whole row table.add_row(*row, style="white") # Update the layout -- GitLab From 585fc30bbdb368173fbc434ccfe73fd1d1ee73ea Mon Sep 17 00:00:00 2001 From: Wes Brewer Date: Wed, 4 Jun 2025 15:56:38 -0400 Subject: [PATCH 113/388] Divide lassen network data by number of nodes. Reduce bandwidth to 1E9 to show contention. --- config/lassen/network.json | 2 +- raps/dataloaders/lassen.py | 8 +++----- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/config/lassen/network.json b/config/lassen/network.json index 02c835f..e7bdca8 100644 --- a/config/lassen/network.json +++ b/config/lassen/network.json @@ -1,6 +1,6 @@ { "TOPOLOGY": "capacity", - "NETWORK_MAX_BW": 10e9, + "NETWORK_MAX_BW": 1e9, "FATTREE_K": 16, "DRAGONFLY_D": 11, "DRAGONFLY_A": 9, diff --git a/raps/dataloaders/lassen.py b/raps/dataloaders/lassen.py index 54c416e..47592b9 100644 --- a/raps/dataloaders/lassen.py +++ b/raps/dataloaders/lassen.py @@ -177,11 +177,9 @@ def load_data_from_df(allocation_df, node_df, step_df, **kwargs): total_ib_tx = 4 * node_data['ib_tx'].sum() if node_data['ib_tx'].values.size > 0 else 0 total_ib_rx = 4 * node_data['ib_rx'].sum() if node_data['ib_rx'].values.size > 0 else 0 - n = 1 # use total bytes per job - #n = nodes_required or 1 # use average bytes per node - #print("***", n, total_ib_tx, total_ib_rx) - ib_tx_per_node = total_ib_tx / n - ib_rx_per_node = total_ib_rx / n + n = nodes_required + ib_tx_per_node = total_ib_tx / n # average bytes per node + ib_rx_per_node = total_ib_rx / n # average bytes per node # net_tx, net_rx = [],[] # generate_network_sequences generates errors (e.g. -ff 800d -t 1d ) # net_tx, net_rx = generate_network_sequences(ib_tx, ib_rx, samples, lambda_poisson=0.3) -- GitLab From 4d23a1a5c2cbbe22679589fe98b48388e5d32f94 Mon Sep 17 00:00:00 2001 From: Wes Brewer Date: Thu, 5 Jun 2025 13:20:49 -0400 Subject: [PATCH 114/388] Change default to "fat-tree". Split SLOWDOWN into two words. --- config/lassen/network.json | 2 +- raps/ui.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/config/lassen/network.json b/config/lassen/network.json index e7bdca8..5a0f564 100644 --- a/config/lassen/network.json +++ b/config/lassen/network.json @@ -1,5 +1,5 @@ { - "TOPOLOGY": "capacity", + "TOPOLOGY": "fat-tree", "NETWORK_MAX_BW": 1e9, "FATTREE_K": 16, "DRAGONFLY_D": 11, diff --git a/raps/ui.py b/raps/ui.py index fc2314b..00638e0 100644 --- a/raps/ui.py +++ b/raps/ui.py @@ -110,7 +110,7 @@ class LayoutManager: # Build the column headers columns = ["JOBID", "WALL TIME", "NAME", "ACCOUNT", "ST", "NODES"] if show_slowdown: - columns.append("SLOWDOWN") + columns.append("SLOW DOWN") else: columns.append("NODE SEGMENTS") -- GitLab From 8c32ff776c4707210423a47716f9a200a0819664 Mon Sep 17 00:00:00 2001 From: Wes Brewer Date: Thu, 5 Jun 2025 13:28:23 -0400 Subject: [PATCH 115/388] Add parallelized version of multi-part-sim-mpi.py --- README.md | 6 ++ multi-part-sim-mpi.py | 180 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 186 insertions(+) create mode 100644 multi-part-sim-mpi.py diff --git a/README.md b/README.md index 90523eb..1272f8f 100644 --- a/README.md +++ b/README.md @@ -71,6 +71,12 @@ or simply: python multi-part-sim.py -x 'setonix/*' # zsh +To run this in parallel use: + + mpiexec -n 2 python multi-part-sim-mpi.py -x setonix/part-cpu setonix/part-gpu + +*Note: first install `mpi4py` via pip or conda.* + This will simulate synthetic workloads on two partitions as defined in `config/setonix-cpu` and `config/setonix-gpu`. To replay telemetry workloads from another system, e.g., Marconi100's PM100 dataset, first create a .npz snapshot of the telemetry data, e.g., python main.py --system marconi100 -f /path/to/marconi100/job_table.parquet diff --git a/multi-part-sim-mpi.py b/multi-part-sim-mpi.py new file mode 100644 index 0000000..efec8d3 --- /dev/null +++ b/multi-part-sim-mpi.py @@ -0,0 +1,180 @@ +from raps.helpers import check_python_version +check_python_version() + +import glob +import os +import random +import sys + +from args import args +from raps.config import ConfigManager, CONFIG_PATH +from raps.schedulers.default import PolicyType +from raps.ui import LayoutManager +from raps.engine import Engine +from raps.flops import FLOPSManager +from raps.power import PowerManager, compute_node_power +from raps.telemetry import Telemetry +from raps.workload import Workload +from raps.utils import convert_to_seconds, next_arrival + +from mpi4py import MPI +from tqdm import tqdm + +def main(): + comm = MPI.COMM_WORLD + rank = comm.Get_rank() + size = comm.Get_size() + + # 1) Expand “partitions” (on rank 0) if the user used a glob: + if rank == 0: + partition_names = args.partitions + if '*' in partition_names[0]: + paths = glob.glob(os.path.join(CONFIG_PATH, partition_names[0])) + partition_names = [os.path.join(*p.split(os.sep)[-2:]) for p in paths] + else: + partition_names = None + + # 2) Broadcast the final list of partition_names to everyone + partition_names = comm.bcast(partition_names, root=0) + + # 3) Load configs for every partition (all ranks do this) + configs = [ConfigManager(system_name=p).get_config() for p in partition_names] + args_dicts = [{**vars(args), 'config': cfg} for cfg in configs] + + # 4) Each rank decides which partition‐indices it owns (round-robin): + local_partition_indices = [i for i in range(len(partition_names)) if (i % size) == rank] + local_partition_names = [partition_names[i] for i in local_partition_indices] + local_configs = [configs[i] for i in local_partition_indices] + local_args_dicts = [args_dicts[i] for i in local_partition_indices] + + # 5) Rank 0 builds (or loads) the entire job list, assigns partitions, groups by partition, + # then scatters exactly those jobs to each rank. Other ranks just sit in the scatter: + if rank == 0: + # --- a) “REPLAY” branch? + if args.replay: + td = Telemetry(**args_dicts[0]) + print(f"[rank 0] Loading telemetry from {args.replay[0]}…") + jobs_full = td.load_snapshot(args.replay[0]) + available_nodes = [c['AVAILABLE_NODES'] for c in configs] + for job in jobs_full: + job['partition'] = random.choices(partition_names, weights=available_nodes, k=1)[0] + if args.scale: + for job in tqdm(jobs_full, desc="[rank 0] Scaling jobs…"): + job['nodes_required'] = random.randint(1, args.scale) + job['requested_nodes'] = None + if args.arrival == 'poisson': + for job in tqdm(jobs_full, desc="[rank 0] Rescheduling arrivals…"): + p_name = job['partition'] + p_cfg = configs[partition_names.index(p_name)] + job['requested_nodes'] = None + job['submit_time'] = next_arrival(1 / p_cfg['JOB_ARRIVAL_TIME']) + elif args.arrival == 'prescribed': + raise NotImplementedError("‘prescribed’ arrival not implemented yet") + + # --- b) “SYNTHETIC WORKLOAD” branch: + else: + wl = Workload(*configs) + jobs_full = getattr(wl, args.workload)(num_jobs=args.numjobs) + available_nodes = [c['AVAILABLE_NODES'] for c in configs] + for job in jobs_full: + job['partition'] = random.choices(partition_names, weights=available_nodes, k=1)[0] + + # --- c) Group “jobs_full” by partition name: + jobs_by_partition = { p: [] for p in partition_names } + for job in jobs_full: + jobs_by_partition[job['partition']].append(job) + + # --- d) Build a list-of-lists, one list per rank, containing the union + # of all jobs for that rank’s partitions: + jobs_for_rank = [[] for _ in range(size)] + for p_idx, p_name in enumerate(partition_names): + tgt = p_idx % size + jobs_for_rank[tgt].extend(jobs_by_partition[p_name]) + else: + jobs_for_rank = None + + # 6) Scatter the per-rank job lists: + local_jobs = comm.scatter(jobs_for_rank, root=0) + + # 7) Re‐group each rank’s “local_jobs” into a dict keyed by its local_partition_names: + local_jobs_by_partition = { p: [] for p in local_partition_names } + for job in local_jobs: + local_jobs_by_partition[job['partition']].append(job) + + # 8) Build one LayoutManager (and Engine/PowerManager/FLOPSManager) per local partition: + layout_managers = {} + for idx, p_name in enumerate(local_partition_names): + global_idx = local_partition_indices[idx] + cfg = configs[global_idx] + ad = args_dicts[global_idx] + + pm = PowerManager(compute_node_power, **cfg) + fm = FLOPSManager(**ad) + sc = Engine(power_manager=pm, flops_manager=fm, + cooling_model=None, **ad) + + layout_managers[p_name] = LayoutManager(args.layout, + engine=sc, + debug=args.debug, + **cfg) + + # 9) Compute timestep_start / timestep_end (all ranks agree): + if args.fastforward: + fastforward = convert_to_seconds(args.fastforward) + else: + fastforward = 0 + + if args.time: + timesteps = convert_to_seconds(args.time) + else: + timesteps = 88200 # default 24 hours + + timestep_start = fastforward + timestep_end = timestep_start + timesteps + + # 10) Build a generator for each partition that this rank owns: + local_generators = {} + for p_name in local_partition_names: + gen = layout_managers[p_name].run_stepwise( + local_jobs_by_partition[p_name], + timestep_start=timestep_start, + timestep_end=timestep_end + ) + local_generators[p_name] = gen + + # 11) Main simulation loop (every rank steps its own partitions in lockstep): + UIF = configs[0]['UI_UPDATE_FREQ'] # assume same for all configs + for t in range(timesteps): + # --- a) Advance each local partition’s generator + for gen in local_generators.values(): + try: + next(gen) + except StopIteration: + pass + + # --- b) Every UI_UPDATE_FREQ, do per-rank prints + one global reduction + if (t % UIF) == 0: + # 1) sum our local sys_power + local_sys_power = sum(lm.engine.sys_power for lm in layout_managers.values()) + + # 2) print *our* partition‐level info now (so rank 0 and rank 1 will both print): + for p_name, lm in layout_managers.items(): + sys_util = lm.engine.sys_util_history[-1] if lm.engine.sys_util_history else 0.0 + print(f"[DEBUG][rank {rank}] {p_name} – Timestep {t} – " + f"Jobs running: {len(lm.engine.running)} – " + f"Utilization: {sys_util[1]:.2f}% – " + f"Power: {lm.engine.sys_power:.1f}kW") + + # 3) do an MPI reduce so that rank 0 knows the total across all ranks: + total_sys_power = comm.reduce(local_sys_power, op=MPI.SUM, root=0) + if rank == 0: + print(f"[DEBUG][rank {rank}] TOTAL system power (all partitions): {total_sys_power:.1f}kW") + + # 12) Final barrier + exit message on rank 0 + comm.Barrier() + if rank == 0: + print("Simulation complete (all ranks).") + + +if __name__ == "__main__": + main() -- GitLab From b3c38aa1728853dea53bd7ce73fb44b5d3386ce1 Mon Sep 17 00:00:00 2001 From: Matthias Maiterth Date: Mon, 9 Jun 2025 12:17:43 -0400 Subject: [PATCH 116/388] Fixed arrival rate and arrival time. This is set by system in config/*/scheduler.json and can be overwritten by --job-arrival-time and additionally be scaled by --job-arrival-rate --- args.py | 3 ++- main.py | 4 ++-- raps/dataloaders/adastraMI250.py | 15 ++++++++------- raps/dataloaders/frontier.py | 4 ++-- raps/dataloaders/lassen.py | 4 ++-- raps/dataloaders/marconi100.py | 25 ++++++++++++++----------- raps/telemetry.py | 4 ++-- raps/utils.py | 21 +++++++++++++++++++++ raps/workload.py | 14 +++++++------- 9 files changed, 60 insertions(+), 34 deletions(-) diff --git a/args.py b/args.py index fc0b2f6..b8854c1 100644 --- a/args.py +++ b/args.py @@ -59,7 +59,8 @@ parser.add_argument('--backfill', type=str, choices=choices, default=None, help= # Redistribution of job arrival choices = ['prescribed', 'poisson'] parser.add_argument('--arrival', default=choices[0], type=str, choices=choices, help=f'Modify arrival distribution ({choices[1]}) or use the original submit times ({choices[0]})') -parser.add_argument('--arrival-poisson-rate', default=1, type=float, help='Modify arrival rate of poisson distribution (default 1)') +parser.add_argument('--job-arrival-time', type=int, help='Modify job arrival for poisson distribution (in seconds). Overrides config/*/scheduler.json value.') # no defaults as this overrides system config files +parser.add_argument('--job-arrival-rate', type=float, help='Modify arrival rate of poisson distribution (default 1)') # no defaults as this overrides system config files # Account options diff --git a/main.py b/main.py index 6b8dfad..fda5c91 100644 --- a/main.py +++ b/main.py @@ -28,7 +28,7 @@ from raps.telemetry import Telemetry from raps.workload import Workload from raps.account import Accounts from raps.weather import Weather -from raps.utils import create_casename, convert_to_seconds, write_dict_to_file, next_arrival +from raps.utils import create_casename, convert_to_seconds, write_dict_to_file, next_arrival_byconfargs from raps.stats import get_engine_stats, get_job_stats, get_scheduler_stats from raps.utils import convert_numpy_to_builtin @@ -107,7 +107,7 @@ if args.replay: print("available nodes:", config['AVAILABLE_NODES']) for job in tqdm(jobs, desc="Rescheduling jobs"): job['requested_nodes'] = None - job['submit_time'] = next_arrival(args.arrival_poisson_rate / config['JOB_ARRIVAL_TIME']) # default arrival rate = 1 + job['submit_time'] = next_arrival_byconfargs(config,args) else: # custom data loader print(*args.replay) diff --git a/raps/dataloaders/adastraMI250.py b/raps/dataloaders/adastraMI250.py index 6a546f0..5809b07 100644 --- a/raps/dataloaders/adastraMI250.py +++ b/raps/dataloaders/adastraMI250.py @@ -23,7 +23,7 @@ import pandas as pd from tqdm import tqdm from ..job import job_dict -from ..utils import power_to_utilization, next_arrival +from ..utils import power_to_utilization, next_arrival_byconfkwargs def load_data(jobs_path, **kwargs): @@ -145,17 +145,18 @@ def load_data_from_df(jobs_df: pd.DataFrame, **kwargs): end_state = jobs_df.loc[jidx, 'job_state'] + + priority = int(jobs_df.loc[jidx, 'priority']) + if arrival == 'poisson': # Modify the arrival times of the jobs according to Poisson distribution scheduled_nodes = None - time_offset = next_arrival(1/config['JOB_ARRIVAL_TIME']) + submit_time = next_arrival_byconfkwargs(config, kwargs) else: # Prescribed replay scheduled_nodes = (jobs_df.loc[jidx, 'nodes']).tolist() - priority = int(jobs_df.loc[jidx, 'priority']) - - submit_timestamp = jobs_df.loc[jidx, 'submit_time'] - diff = submit_timestamp - telemetry_start_timestamp - submit_time = int(diff.total_seconds()) + submit_timestamp = jobs_df.loc[jidx, 'submit_time'] + diff = submit_timestamp - telemetry_start_timestamp + submit_time = int(diff.total_seconds()) time_limit = jobs_df.loc[jidx, 'time_limit'] # in seconds diff --git a/raps/dataloaders/frontier.py b/raps/dataloaders/frontier.py index 1929175..53693be 100644 --- a/raps/dataloaders/frontier.py +++ b/raps/dataloaders/frontier.py @@ -14,7 +14,7 @@ import pandas as pd from tqdm import tqdm from ..job import job_dict -from ..utils import power_to_utilization, next_arrival, encrypt +from ..utils import power_to_utilization, next_arrival_byconfkwargs, encrypt def aging_boost(nnodes): @@ -255,7 +255,7 @@ def load_data_from_df(jobs_df: pd.DataFrame, jobprofile_df: pd.DataFrame, **kwar if arrival == 'poisson': # Modify the arrival times of the jobs according to Poisson distribution scheduled_nodes = None - submit_time = next_arrival(1 / config['JOB_ARRIVAL_TIME']) + submit_time = next_arrival_byconfkwargs(config,kwargs) start_time = None # ? end_time = None # ? priority = aging_boost(nodes_required) diff --git a/raps/dataloaders/lassen.py b/raps/dataloaders/lassen.py index f477da9..de0f716 100644 --- a/raps/dataloaders/lassen.py +++ b/raps/dataloaders/lassen.py @@ -38,7 +38,7 @@ from tqdm import tqdm from datetime import timedelta from ..job import job_dict -from ..utils import power_to_utilization, next_arrival, convert_to_seconds +from ..utils import power_to_utilization, next_arrival_byconfkwargs, convert_to_seconds def load_data(path, **kwargs): @@ -184,7 +184,7 @@ def load_data_from_df(allocation_df, node_df, step_df, **kwargs): if arrival == 'poisson': # Modify the submit times according to Poisson process scheduled_nodes = None - submit_time = next_arrival(1 / config['JOB_ARRIVAL_TIME']) + submit_time = next_arrival_byconfkwargs(config,kwargs) start_time = None # Scheduler will determine start time end_time = None # Scheduler will determine end time else: # Prescribed replay diff --git a/raps/dataloaders/marconi100.py b/raps/dataloaders/marconi100.py index ca3051c..95eeccb 100644 --- a/raps/dataloaders/marconi100.py +++ b/raps/dataloaders/marconi100.py @@ -29,7 +29,7 @@ import pandas as pd from tqdm import tqdm from ..job import job_dict -from ..utils import power_to_utilization, next_arrival +from ..utils import power_to_utilization, next_arrival_byconfkwargs def load_data(jobs_path, **kwargs): @@ -148,10 +148,6 @@ def load_data_from_df(jobs_df: pd.DataFrame, **kwargs): priority = int(jobs_df.loc[jidx, 'priority']) partition = int(jobs_df.loc[jidx, 'partition']) - submit_timestamp = jobs_df.loc[jidx, 'submit_time'] - diff = submit_timestamp - telemetry_start_timestamp - submit_time = int(diff.total_seconds()) - time_limit = jobs_df.loc[jidx, 'time_limit'] start_timestamp = jobs_df.loc[jidx, 'start_time'] @@ -169,6 +165,19 @@ def load_data_from_df(jobs_df: pd.DataFrame, **kwargs): print("wall_time != (end_time - start_time)") print(f"{wall_time} != {(end_time - start_time)}") + if arrival == 'poisson': # Modify the arrival times according to Poisson distribution + scheduled_nodes = None + submit_time = next_arrival_byconfkwargs(config,kwargs) + start_time = None + end_time = None + else: # Prescribed replay + scheduled_nodes = (jobs_df.loc[jidx, 'nodes']).tolist() + + submit_timestamp = jobs_df.loc[jidx, 'submit_time'] + diff = submit_timestamp - telemetry_start_timestamp + submit_time = int(diff.total_seconds()) + + trace_time = gpu_trace.size * config['TRACE_QUANTA'] # seconds trace_start_time = 0 trace_end_time = trace_time @@ -198,12 +207,6 @@ def load_data_from_df(jobs_df: pd.DataFrame, **kwargs): # # When extracting out a single job, run one iteration past the end of the job # submit_time = config['UI_UPDATE_FREQ'] - if arrival == 'poisson': # Modify the arrival times according to Poisson distribution - scheduled_nodes = None - time_submit = next_arrival(1/config['JOB_ARRIVAL_TIME']) - time_start = None - else: # Prescribed replay - scheduled_nodes = (jobs_df.loc[jidx, 'nodes']).tolist() if gpu_trace.size > 0 and (jid == job_id or jid == '*'): # and time_submit >= 0: diff --git a/raps/telemetry.py b/raps/telemetry.py index 7fae145..774464c 100644 --- a/raps/telemetry.py +++ b/raps/telemetry.py @@ -31,7 +31,7 @@ from .config import ConfigManager from .job import Job from .account import Accounts from .plotting import plot_submit_times, plot_nodes_histogram, plot_job_gantt -from .utils import next_arrival +from .utils import next_arrival_byconfargs class Telemetry: @@ -93,7 +93,7 @@ if __name__ == "__main__": if args.arrival == "poisson": for job in tqdm(jobs, desc="Updating requested_nodes"): job['requested_nodes'] = None - job['submit_time'] = next_arrival(1 / config['JOB_ARRIVAL_TIME']) + job['submit_time'] = next_arrival_byconfargs(config,args) else: jobs,_,_ = td.load_data(args.replay) diff --git a/raps/utils.py b/raps/utils.py index 0e80f61..d381ae2 100644 --- a/raps/utils.py +++ b/raps/utils.py @@ -20,6 +20,7 @@ import uuid import json + def sum_values(values): return sum(x[1] for x in values) if values else 0 @@ -388,6 +389,26 @@ def create_dir_indexed(dir:str, path:str = None) -> str: index += 1 +def next_arrival_byconfargs(config,args,reset=False): + arrival_rate = 1 + arrival_time = config['JOB_ARRIVAL_TIME'] + if args.job_arrival_rate: + arrival_rate = args.job_arrival_rate + if args.job_arrival_time: + arrival_time = args.job_arrival_time + return next_arrival(arrival_rate / arrival_time, reset) + + +def next_arrival_byconfkwargs(config,kwargs,reset=False): + arrival_rate = 1 + arrival_time = config['JOB_ARRIVAL_TIME'] + if kwargs['job_arrival_rate']: + arrival_rate = kwargs['job_arrival_rate'] + if kwargs['job_arrival_time']: + arrival_time = kwargs['job_arrival_time'] + return next_arrival(arrival_rate / arrival_time, reset) + + def next_arrival(lambda_rate,reset=False): if not hasattr(next_arrival, 'next_time') or reset is True: # Initialize the first time it's called diff --git a/raps/workload.py b/raps/workload.py index e826142..bdb6257 100644 --- a/raps/workload.py +++ b/raps/workload.py @@ -44,7 +44,7 @@ ACCT_NAMES = ["ACT01", "ACT02", "ACT03", "ACT04", "ACT05", "ACT06", "ACT07",\ MAX_PRIORITY = 500000 -from raps.utils import truncated_normalvariate_int, truncated_normalvariate_float, determine_state, next_arrival, truncated_weibull +from raps.utils import truncated_normalvariate_int, truncated_normalvariate_float, determine_state, next_arrival, next_arrival_byconfargs, truncated_weibull class Workload: @@ -60,7 +60,7 @@ class Workload: return (cpu_trace, gpu_trace) def job_arrival_distribution_draw_poisson(self,args,config): - return next_arrival(args.arrival_poisson_rate / config['JOB_ARRIVAL_TIME']) + return next_arrival_byconfargs(config,args) def job_size_distribution_draw_uniform(self,args,config): return random.randint(1, config['MAX_NODES_PER_JOB']) @@ -244,7 +244,7 @@ class Workload: net_tx, net_rx = [], [] # Jobs arrive according to Poisson process - time_to_next_job = next_arrival(1 / config['JOB_ARRIVAL_TIME']) + time_to_next_job = next_arrival_byconfargs(config,args) jobs.append(job_dict(nodes_required=nodes_required, name=name, account=account, cpu_trace=cpu_trace, @@ -546,8 +546,8 @@ def plot_job_hist(jobs,config=None,dist_split=None): axs[0][1].xaxis.set_label_coords(0.5,1.30) axs[0][1].xaxis.set_label_position("top") axs[0][1].xaxis.tick_top() - axs[0][0].hist(x2,bins=max(1,min(100,(max(x2) - min(x)))), orientation='vertical',color='lightblue') - axs[0][0].hist(x,bins=max(1,min(100,(max(x2) - min(x)))), orientation='vertical') + axs[0][0].hist(x2,bins=max(1,math.ceil(min(100,(max(x2) - min(x))))), orientation='vertical',color='lightblue') + axs[0][0].hist(x,bins=max(1,math.ceil(min(100,(max(x2) - min(x))))), orientation='vertical') axs[1][0].sharex(axs[0][0]) axs[1][1].hist(y,bins=max(1,min(100,(max(y) - min(y)))), orientation='horizontal') @@ -568,10 +568,10 @@ def plot_job_hist(jobs,config=None,dist_split=None): (x1,x2) in [(n // 60,n % 60) for n in x_label_mins[0::60]]] axs[1][0].set_xticks(x_label_ticks,x_label_str) - miny = min(y) maxy = max(y) - y_ticks = np.arange(0,maxy,maxy // 10) + interval = max(1,maxy // 10) + y_ticks = np.arange(0, maxy, interval) y_ticks[0] = miny axs[1][0].set_yticks(y_ticks) -- GitLab From 735224d1fdf121f05eab97c673a0c130516e2068 Mon Sep 17 00:00:00 2001 From: Wes Brewer Date: Mon, 9 Jun 2025 13:12:30 -0400 Subject: [PATCH 117/388] Add max_job_slowdown tracking --- raps/engine.py | 25 ++++++++++++++++++------- raps/stats.py | 10 ++++++++-- 2 files changed, 26 insertions(+), 9 deletions(-) diff --git a/raps/engine.py b/raps/engine.py index 2ce94a7..7c7ce7c 100644 --- a/raps/engine.py +++ b/raps/engine.py @@ -73,7 +73,8 @@ class Engine: self.avg_net_tx = [] self.avg_net_rx = [] self.net_util_history = [] - self.slowdown_history = [] + self.avg_slowdown_history = [] + self.max_slowdown_history = [] # Get scheduler type from command-line args or default scheduler_type = kwargs.get('scheduler', 'default') @@ -417,8 +418,10 @@ class Engine: avg_net = sum(net_utils) / n n = len(slowdown_factors) or 1 - slowdown_per_job = sum(slowdown_factors) / n - self.slowdown_history.append(slowdown_per_job) + avg_slowdown_per_job = sum(slowdown_factors) / n + self.avg_slowdown_history.append(avg_slowdown_per_job) + max_slowdown_per_job = max(slowdown_factors) + self.max_slowdown_history.append(max_slowdown_per_job) # Save network history self.avg_net_tx.append(avg_tx) @@ -442,7 +445,7 @@ class Engine: avg_net_tx=avg_tx, avg_net_rx=avg_rx, avg_net_util=avg_net, - slowdown_per_job=slowdown_per_job + slowdown_per_job=avg_slowdown_per_job ) self.current_time += 1 @@ -558,19 +561,27 @@ class Engine: 'total cost': f'${total_cost:.2f}' } + network_stats = get_network_stats() + stats.update(network_stats) + if self.net_util_history: mean_net_util = sum(self.net_util_history) / len(self.net_util_history) else: mean_net_util = 0.0 - stats["avg network util"] = f"{mean_net_util*100:.2f}%" - if self.slowdown_history: - avg_job_slow = sum(self.slowdown_history) / len(self.slowdown_history) + if self.avg_slowdown_history: + avg_job_slow = sum(self.avg_slowdown_history) / len(self.avg_slowdown_history) else: avg_job_slow = 1.0 stats["avg per-job slowdown"] = f"{avg_job_slow:.2f}x" + if self.max_slowdown_history: + max_job_slow = max(self.max_slowdown_history) + else: + max_job_slow = 1.0 + stats["max per-job slowdown"] = f"{max_job_slow:.2f}x" + return stats diff --git a/raps/stats.py b/raps/stats.py index 3e04e1c..2727f81 100644 --- a/raps/stats.py +++ b/raps/stats.py @@ -73,12 +73,18 @@ def get_network_stats(engine: Engine): stats["avg network util"] = f"{mean_net_util * 100:.2f}%" - if engine.slowdown_history: - avg_job_slow = sum(engine.slowdown_history) / len(engine.slowdown_history) + if engine.avg_slowdown_history: + avg_job_slow = sum(engine.avg_slowdown_history) / len(engine.avg_slowdown_history) else: avg_job_slow = 1.0 stats["avg per-job slowdown"] = f"{avg_job_slow:.2f}x" + if engine.max_slowdown_history: + max_job_slow = max(engine.max_slowdown_history) + else: + max_job_slow = 1.0 + stats["max per-job slowdown"] = f"{max_job_slow:.2f}x" + return stats -- GitLab From bdea3a8609830c9e88bd640489d51267b65b0f12 Mon Sep 17 00:00:00 2001 From: Matthias Maiterth Date: Mon, 9 Jun 2025 17:47:54 -0400 Subject: [PATCH 118/388] Changed file loader to be part of telemetry. This is now used from within main telemetry and workload, even when using them standalone. --- main.py | 56 +++------------------- raps/engine.py | 2 +- raps/telemetry.py | 118 ++++++++++++++++++++++++++++++++++++---------- raps/workload.py | 13 +++-- 4 files changed, 110 insertions(+), 79 deletions(-) diff --git a/main.py b/main.py index fda5c91..7ffedf5 100644 --- a/main.py +++ b/main.py @@ -4,6 +4,7 @@ import json import numpy as np import random import pandas as pd +import sys import os import re import time @@ -74,54 +75,8 @@ if args.fastforward: if args.replay: td = Telemetry(**args_dict) - - # Try to extract date from given name to use as case directory - matched_date = re.search(r"\d{4}-\d{2}-\d{2}", args.replay[0]) - if matched_date: - extracted_date = matched_date.group(0) - DIR_NAME = "sim=" + extracted_date - else: - extracted_date = "Date not found" - DIR_NAME = create_casename() - - # Read telemetry data (either npz file or via custom data loader) - if args.replay[0].endswith(".npz"): # Replay .npz file - print(f"Loading {args.replay[0]}...") - jobs, timestep_start_from_file, timestep_end_from_file, args_from_file = td.load_snapshot(args.replay[0]) - if args_from_file.fastforward is None: - args_from_file.fastforward = 0 - print("File was generated with:" +\ - f"\n--system {args_from_file.system} " +\ - f"-ff {args_from_file.fastforward} " +\ - f"-t {args_from_file.time}\n" +\ - f"All Args:\n{args_from_file}" - ) - timestep_end = timestep_end_from_file - - if args.scale: - for job in tqdm(jobs, desc=f"Scaling jobs to {args.scale} nodes"): - job['nodes_required'] = random.randint(1, args.scale) - job['requested_nodes'] = None # Setting to None triggers scheduler to assign nodes - - if args.policy == 'poisson': - print("available nodes:", config['AVAILABLE_NODES']) - for job in tqdm(jobs, desc="Rescheduling jobs"): - job['requested_nodes'] = None - job['submit_time'] = next_arrival_byconfargs(config,args) - - else: # custom data loader - print(*args.replay) - jobs, timestep_start_from_data, timestep_end = td.load_data(args.replay) - timestep_start += timestep_start_from_data - td.save_snapshot((jobs, timestep_start, timestep_end, args), filename=DIR_NAME) - - # Set number of timesteps based on the last job running which we assume - # is the maximum value of submit_time + wall_time of all the jobs - if args.time: - timestep_end = timestep_start + convert_to_seconds(args.time) - elif not timestep_end: - timestep_end = int(max(job['wall_time'] + job['start_time'] for job in jobs)) + 1 - + jobs, timestep_start, timestep_end, args_from_file = td.load_jobs_times_args_from_files(files=args.replay, args=args) + # TODO: Merge args and args_from_files? see telemetry.py:97 else: # Synthetic jobs wl = Workload(config) @@ -138,7 +93,9 @@ else: # Synthetic jobs else: timestep_end = 88200 # 24 hours - DIR_NAME = create_casename() + td = Telemetry(**args_dict) + td.save_snapshot(jobs=jobs, timestep_start=timestep_start, timestep_end=timestep_end, args=args, filename=td.dirname) + sc = Engine( power_manager=power_manager, @@ -148,6 +105,7 @@ sc = Engine( **args_dict, ) +DIR_NAME = td.dirname OPATH = OUTPUT_PATH / DIR_NAME print("Output directory is: ", OPATH) sc.opath = OPATH diff --git a/raps/engine.py b/raps/engine.py index b836271..1514e70 100644 --- a/raps/engine.py +++ b/raps/engine.py @@ -195,7 +195,7 @@ class Engine: if time_quanta_index < len(job.cpu_trace): cpu_util = get_utilization(job.cpu_trace, time_quanta_index) else: - cpu_util = get_utilization(job.cpu_trace, len(job.cpu_trace) - 1) + cpu_util = get_utilization(job.cpu_trace, max(0,len(job.cpu_trace) - 1)) elif isinstance(job.cpu_trace,float) or isinstance(job.cpu_trace,int): cpu_util = job.cpu_trace else: diff --git a/raps/telemetry.py b/raps/telemetry.py index 774464c..1457e43 100644 --- a/raps/telemetry.py +++ b/raps/telemetry.py @@ -6,7 +6,10 @@ parsing parquet files, and generating job state information. The module defines a `Telemetry` class for managing telemetry data and several helper functions for data encryption and conversion between node name and index formats. """ - +import json +import re +import sys +import random import argparse if __name__ == "__main__": @@ -27,11 +30,11 @@ import importlib import numpy as np from tqdm import tqdm -from .config import ConfigManager -from .job import Job -from .account import Accounts -from .plotting import plot_submit_times, plot_nodes_histogram, plot_job_gantt -from .utils import next_arrival_byconfargs +from raps.config import ConfigManager +from raps.job import Job +#from raps.account import Accounts +from raps.plotting import plot_submit_times, plot_nodes_histogram, plot_job_gantt +from raps.utils import next_arrival_byconfargs, create_casename, convert_to_seconds class Telemetry: @@ -41,8 +44,9 @@ class Telemetry: self.kwargs = kwargs self.system = kwargs.get('system') self.config = kwargs.get('config') + self.dirname = create_casename() try: - self.dataloader = importlib.import_module(f".dataloaders.{self.system}", package=__package__) + self.dataloader = importlib.import_module(f"raps.dataloaders.{self.system}", package=__package__) except: print("WARNING: Failed to load dataloader") @@ -51,13 +55,21 @@ class Telemetry: np.savez_compressed(filename, jobs=jobs, timestep_start=timestep_start, timestep_end=timestep_end, args=args) def load_snapshot(self, snapshot: str) -> list: - """Reads a snapshot from a compressed file and returns the jobs.""" + """Reads a snapshot from a compressed file and return 4 values: joblist, timestep_start, timestep_end and args. + + :param str snapshot: Filename + :returns: + - job list + - timestep_start + - timestep_end + - args, which were used to generate the loaded snapshot + """ data = np.load(snapshot, allow_pickle=True, mmap_mode='r') - return (data['jobs'].tolist(), - data['timestep_start'], - data['timestep_end'], - data['args'].tolist() - ) + return data['jobs'].tolist(), \ + int(data['timestep_start']), \ + int(data['timestep_end']), \ + data['args'].tolist() + def load_data(self, files): """Load telemetry data using custom data loaders.""" @@ -79,6 +91,73 @@ class Telemetry: """ Return (row, col) tuple for a cdu index """ return self.dataloader.cdu_pos(index, config=self.config) + def load_jobs_times_args_from_files(self,*,files, args): + """ Load all files as combined jobs """ + # Read telemetry data (either npz file or via custom data loader) + # TODO: Merge args? See main.py:79 + timestep_end = 0 + timestep_start = sys.maxsize + jobs = [] + trigger_custom_dataloader = False + for i,file in enumerate(files): + if file.endswith(".npz"): # Replay .npz file + print(f"Loading {file}...") + jobs_from_file, timestep_start_from_file, timestep_end_from_file, args_from_file = self.load_snapshot(file) + if not hasattr(args_from_file,'fastforward') or args_from_file.fastforward is None: + args_from_file.fastforward = 0 + print("File was generated with:" +\ + f"\n--system {args_from_file.system} " +\ + f"-ff {args_from_file.fastforward} " +\ + f"-t {args_from_file.time}\n" +\ + f"All Args:\n{args_from_file}" +\ + "To use these set them from the commandline!" + ) + jobs.extend(jobs_from_file) + timestep_start = min(timestep_start,timestep_start_from_file) + timestep_end = max(timestep_end, timestep_end_from_file) + + if hasattr(args,'scale') and args.scale: + for job in tqdm(jobs, desc=f"Scaling jobs to {args.scale} nodes"): + job['nodes_required'] = random.randint(1, args.scale) + job['requested_nodes'] = None # Setting to None triggers scheduler to assign nodes + + if hasattr(args,'policy') and args.policy == 'poisson': + print("available nodes:", config['AVAILABLE_NODES']) + for job in tqdm(jobs, desc="Rescheduling jobs"): + job['requested_nodes'] = None + job['submit_time'] = next_arrival_byconfargs(config,args) + elif i == 0: + trigger_custom_dataloader = True + break + else: + print("Multiple files given as input.") + break + + if trigger_custom_dataloader: # custom data loader + # Try to extract date from given name to use as case directory + matched_date = re.search(r"\d{4}-\d{2}-\d{2}", args.replay[0]) + if matched_date: + extracted_date = matched_date.group(0) + self.dirname = "sim=" + extracted_date + else: + extracted_date = "Date not found" + self.dirname = create_casename() + + print(*args.replay) + jobs, timestep_start_from_data, timestep_end_from_data = self.load_data(args.replay) + timestep_start = min(timestep_start, timestep_start_from_data) + timestep_end = max(timestep_end, timestep_end_from_data) + self.save_snapshot(jobs=jobs, + timestep_start=timestep_start, + timestep_end=timestep_end, + args=args, filename=self.dirname) + if args.time: + timestep_end = timestep_start + convert_to_seconds(args.time) + elif not timestep_end: + timestep_end = int(max(job['wall_time'] + job['start_time'] for job in jobs)) + 1 + + return jobs, timestep_start, timestep_end, args + if __name__ == "__main__": @@ -86,18 +165,9 @@ if __name__ == "__main__": config = ConfigManager(system_name=args.system).get_config() args_dict['config'] = config td = Telemetry(**args_dict) + jobs, timestep_start, timestep_end, _ = td.load_jobs_times_args_from_files(files=args.replay,args=args) - if args.replay[0].endswith(".npz"): - print(f"Loading {args.replay[0]}...") - jobs,_,_ = td.load_snapshot(args.replay[0]) - if args.arrival == "poisson": - for job in tqdm(jobs, desc="Updating requested_nodes"): - job['requested_nodes'] = None - job['submit_time'] = next_arrival_byconfargs(config,args) - else: - jobs,_,_ = td.load_data(args.replay) - - timesteps = int(max(job['wall_time'] + job['submit_time'] for job in jobs)) + timesteps = timestep_end - timestep_start dt_list = [] wt_list = [] diff --git a/raps/workload.py b/raps/workload.py index bdb6257..0148271 100644 --- a/raps/workload.py +++ b/raps/workload.py @@ -28,7 +28,7 @@ import math import random import numpy as np import matplotlib.pyplot as plt - +from raps.telemetry import Telemetry from raps.job import job_dict from raps.utils import create_file_indexed, create_dir_indexed @@ -678,12 +678,15 @@ def add_workload_to_parser(parser): if __name__ == "__main__": - from args import args + from args import args, args_dict from raps.config import ConfigManager config = ConfigManager(system_name=args.system).get_config() - - workload = Workload(config) - jobs = getattr(workload, args.workload)(args=args) + if args.replay: + td = Telemetry(**args_dict) + jobs,_,_,_ = td.load_jobs_times_args_from_files(files=args.replay,args=args) + else: + workload = Workload(config) + jobs = getattr(workload, args.workload)(args=args) plot_job_hist(jobs, config=config, dist_split=args.multimodal) if args.output: filename = create_file_indexed('wl',create=False) -- GitLab From 1be86fe04dafad02d4f11499aac1bb5807bcc8b1 Mon Sep 17 00:00:00 2001 From: Matthias Maiterth Date: Tue, 10 Jun 2025 10:54:28 -0400 Subject: [PATCH 119/388] Added Experimental Flag to map the number of nodes selected to the nearest power of N Indicated by this Flag: parser.add_argument("--jobsize-nearest-power-of", default=1, type=int,required=False,help="Map random samples to the nearest power of N your choice. (Experimental: This changes the shape of the distribution, as density of powers change on the numberline!)") This is not ideal, as the distribution changes. Improved solutions are a welcome contribution. --- raps/utils.py | 16 ++++++++++++++-- raps/workload.py | 13 +++++++++---- 2 files changed, 23 insertions(+), 6 deletions(-) diff --git a/raps/utils.py b/raps/utils.py index d381ae2..c8cec10 100644 --- a/raps/utils.py +++ b/raps/utils.py @@ -107,6 +107,18 @@ def truncated_weibull(scale, shape, min, max): return int(number) +def return_nearest_power_of(*,number,base): + if base == 1: + return number + else: + next_num = base ** math.ceil(math.log(number,base)) + prev_num = base ** math.floor(math.log(number,base)) + if next_num - number < number - prev_num: + return next_num + else: + return prev_num + + def linear_to_3d_index(linear_index, shape): """ Convert linear index to 3D index. @@ -409,10 +421,10 @@ def next_arrival_byconfkwargs(config,kwargs,reset=False): return next_arrival(arrival_rate / arrival_time, reset) -def next_arrival(lambda_rate,reset=False): +def next_arrival(lambda_rate,reset=False, start_time=0): if not hasattr(next_arrival, 'next_time') or reset is True: # Initialize the first time it's called - next_arrival.next_time = 0 + next_arrival.next_time = start_time else: next_arrival.next_time += \ -math.log(1.0 - random.random()) / lambda_rate diff --git a/raps/workload.py b/raps/workload.py index 0148271..dca2f0e 100644 --- a/raps/workload.py +++ b/raps/workload.py @@ -30,7 +30,7 @@ import numpy as np import matplotlib.pyplot as plt from raps.telemetry import Telemetry from raps.job import job_dict -from raps.utils import create_file_indexed, create_dir_indexed +from raps.utils import create_file_indexed, create_dir_indexed, return_nearest_power_of JOB_NAMES = ["LAMMPS", "GROMACS", "VASP", "Quantum ESPRESSO", "NAMD",\ "OpenFOAM", "WRF", "AMBER", "CP2K", "nek5000", "CHARMM",\ @@ -63,13 +63,16 @@ class Workload: return next_arrival_byconfargs(config,args) def job_size_distribution_draw_uniform(self,args,config): - return random.randint(1, config['MAX_NODES_PER_JOB']) + number = random.randint(1, config['MAX_NODES_PER_JOB']) + return return_nearest_power_of(number=number, base=args.jobsize_nearest_power_of) def job_size_distribution_draw_weibull(self,args,config): - return truncated_weibull(args.jobsize_weibull_scale, args.jobsize_weibull_shape, 1, config['MAX_NODES_PER_JOB']) + number = truncated_weibull(args.jobsize_weibull_scale, args.jobsize_weibull_shape, 1, config['MAX_NODES_PER_JOB']) + return return_nearest_power_of(number=number, base=args.jobsize_nearest_power_of) def job_size_distribution_draw_normal(self,args,config): - return truncated_normalvariate_int(args.jobsize_normal_mean, args.jobsize_normal_stddev, 1, config['MAX_NODES_PER_JOB']) + number = truncated_normalvariate_int(args.jobsize_normal_mean, args.jobsize_normal_stddev, 1, config['MAX_NODES_PER_JOB']) + return return_nearest_power_of(number=number, base=args.jobsize_nearest_power_of) def cpu_utilization_distribution_draw_uniform(self,args,config): return random.uniform(0.0, config['CPUS_PER_NODE']) @@ -645,6 +648,8 @@ def add_workload_to_parser(parser): parser.add_argument("--jobsize-weibull-shape", type=float, required=False, help="Jobsize shape of weibull") parser.add_argument("--jobsize-weibull-scale", type=float, required=False, help="Jobsize scale of weibull") + parser.add_argument("--jobsize-nearest-power-of", default=1, type=int,required=False,help="Map random samples to the nearest power of N your choice. (Experimental: This changes the shape of the distribution, as density of powers change on the numberline!)") + # Walltime: parser.add_argument("--walltime-distribution", type=str, nargs="+", choices=['uniform','weibull','normal'], default=None, help='Distribution type') -- GitLab From 53bfbd19143b3c486f5a176ecf8b60d74d0a41f6 Mon Sep 17 00:00:00 2001 From: Matthias Maiterth Date: Wed, 11 Jun 2025 18:23:35 -0400 Subject: [PATCH 120/388] fixes identified by smoketest. --- args.py | 9 +++++++++ main.py | 25 +++++++++++-------------- multi-part-sim.py | 2 +- raps/dataloaders/adastraMI250.py | 1 - raps/dataloaders/lassen.py | 17 +++++++++-------- raps/dataloaders/marconi100.py | 3 +-- raps/engine.py | 2 +- raps/stats.py | 11 +++++++++-- raps/telemetry.py | 5 +---- raps/utils.py | 9 +++++++-- 10 files changed, 49 insertions(+), 35 deletions(-) diff --git a/args.py b/args.py index b8854c1..3e1e3cb 100644 --- a/args.py +++ b/args.py @@ -2,6 +2,7 @@ import argparse from raps.schedulers.default import PolicyType, BackfillType from raps.workload import add_workload_to_parser +from raps.utils import convert_to_seconds parser = argparse.ArgumentParser(description='Resource Allocator & Power Simulator (RAPS)') @@ -70,4 +71,12 @@ parser.add_argument('--accounts-json', type=str, help='Json of account stats gen # ### At the end get args and an args_dict. import this if needed. args = parser.parse_args() +# Do conversions here if needed +if args.fastforward: + args.fastforward = convert_to_seconds(args.fastforward) +if args.time: + args.time = convert_to_seconds(args.time) +# generate the dictionary args_dict = vars(args) +# #import args and args_dict directly if needed.: +# from args import args,args_dict diff --git a/main.py b/main.py index 7ffedf5..9f4835d 100644 --- a/main.py +++ b/main.py @@ -4,17 +4,12 @@ import json import numpy as np import random import pandas as pd -import sys import os -import re import time -from tqdm import tqdm - from raps.helpers import check_python_version check_python_version() - from raps.config import ConfigManager from raps.constants import OUTPUT_PATH, SEED from raps.cooling import ThermoFluidsModel @@ -66,12 +61,6 @@ args_dict['config'] = config flops_manager = FLOPSManager(**args_dict) -timestep_start = 0 -if args.fastforward: - args.fastforward = convert_to_seconds(args.fastforward) - timestep_start = args.fastforward - - if args.replay: td = Telemetry(**args_dict) @@ -88,14 +77,22 @@ else: # Synthetic jobs print('jobid:', job.id, '\tlen(gpu_trace):', len(job.gpu_trace), '\twall_time(s):', job.wall_time) time.sleep(2) - if args.time: - timestep_end = convert_to_seconds(args.time) + timestep_start = 0 + if hasattr(jobs[0],'end_time'): + timestep_end = max([job.end_time for job in jobs]) else: timestep_end = 88200 # 24 hours td = Telemetry(**args_dict) td.save_snapshot(jobs=jobs, timestep_start=timestep_start, timestep_end=timestep_end, args=args, filename=td.dirname) +if args.fastforward: + args.fastforward = convert_to_seconds(args.fastforward) + timestep_start = args.fastforward + +if args.time: + timestep_end = convert_to_seconds(args.time) + sc = Engine( power_manager=power_manager, @@ -129,7 +126,7 @@ if args.verbose: print(jobs) total_timesteps = timestep_end - timestep_start -print(f'Simulating {len(jobs)} jobs for {total_timesteps} seconds') +print(f'Simulating {len(jobs)} jobs for {total_timesteps} seconds from {timestep_start} to {timestep_end}.') layout_manager = LayoutManager(args.layout, engine=sc, debug=args.debug, total_timesteps=total_timesteps, **config) layout_manager.run(jobs, timestep_start=timestep_start, timestep_end=timestep_end) diff --git a/multi-part-sim.py b/multi-part-sim.py index b9d3856..6d88252 100644 --- a/multi-part-sim.py +++ b/multi-part-sim.py @@ -63,7 +63,7 @@ else: # Synthetic workload wl = Workload(*configs) # Generate jobs based on workload type - jobs = getattr(wl, args.workload)(num_jobs=args.numjobs) + jobs = getattr(wl, args.workload)(args=args) # Group jobs by partition jobs_by_partition = {partition: [] for partition in partition_names} diff --git a/raps/dataloaders/adastraMI250.py b/raps/dataloaders/adastraMI250.py index 5809b07..00d8d9f 100644 --- a/raps/dataloaders/adastraMI250.py +++ b/raps/dataloaders/adastraMI250.py @@ -145,7 +145,6 @@ def load_data_from_df(jobs_df: pd.DataFrame, **kwargs): end_state = jobs_df.loc[jidx, 'job_state'] - priority = int(jobs_df.loc[jidx, 'priority']) if arrival == 'poisson': # Modify the arrival times of the jobs according to Poisson distribution diff --git a/raps/dataloaders/lassen.py b/raps/dataloaders/lassen.py index de0f716..bf8d635 100644 --- a/raps/dataloaders/lassen.py +++ b/raps/dataloaders/lassen.py @@ -131,7 +131,7 @@ def load_data_from_df(allocation_df, node_df, step_df, **kwargs): # TODO: Jobs could have a time-series per node! gpu_node_energy = node_data['gpu_energy'].copy() gpu_node_energy[gpu_node_energy < 0] = 0.0 - gpu_node_energy[gpu_node_energy == np.NaN] = 0.0 + gpu_node_energy[gpu_node_energy == np.nan] = 0.0 if len(gpu_node_energy) < 1: gpu_power = gpu_node_idle_power # Setting to idle as other parts of the sim make this assumption else: @@ -156,7 +156,7 @@ def load_data_from_df(allocation_df, node_df, step_df, **kwargs): # Same cpu_node_usage = node_data['cpu_usage'].copy() cpu_node_usage[cpu_node_usage < 0] = 0.0 - cpu_node_usage[cpu_node_usage == np.NaN] = 0.0 + cpu_node_usage[cpu_node_usage == np.nan] = 0.0 if wall_time > 0: threads_per_core = config['THREADS_PER_CORE'] cpu_util = cpu_node_usage.sum() / 10e9 / nodes_required / wall_time / threads_per_core @@ -192,12 +192,13 @@ def load_data_from_df(allocation_df, node_df, step_df, **kwargs): submit_time = compute_time_offset(row['job_submit_timestamp'], telemetry_start_timestamp) start_time = compute_time_offset(row['begin_timestamp'], telemetry_start_timestamp) end_time = compute_time_offset(row['end_timestamp'], telemetry_start_timestamp) - time_limit = row['time_limit'] - trace_time = wall_time - trace_start_time = start_time - trace_end_time = end_time - trace_missing_values = False + time_limit = row['time_limit'] + + trace_time = wall_time + trace_start_time = start_time + trace_end_time = end_time + trace_missing_values = False if verbose: print('ib_tx, ib_rx, samples:', ib_tx, ib_rx, samples) @@ -314,5 +315,5 @@ if __name__ == "__main__": intervals = 20 # number of 20-second intervals lambda_poisson = 0.3 # control sporadicity - tx_sequence, rx_sequence = generate_ib_tx_rx_sequences(total_ib_tx, total_ib_rx, intervals, lambda_poisson) + tx_sequence, rx_sequence = generate_network_sequences(total_ib_tx, total_ib_rx, intervals, lambda_poisson) print(tx_sequence, rx_sequence) diff --git a/raps/dataloaders/marconi100.py b/raps/dataloaders/marconi100.py index 95eeccb..8ab139b 100644 --- a/raps/dataloaders/marconi100.py +++ b/raps/dataloaders/marconi100.py @@ -177,8 +177,7 @@ def load_data_from_df(jobs_df: pd.DataFrame, **kwargs): diff = submit_timestamp - telemetry_start_timestamp submit_time = int(diff.total_seconds()) - - trace_time = gpu_trace.size * config['TRACE_QUANTA'] # seconds + trace_time = gpu_trace.size * config['TRACE_QUANTA'] # seconds trace_start_time = 0 trace_end_time = trace_time if wall_time > trace_time: diff --git a/raps/engine.py b/raps/engine.py index 1514e70..0fdfcf6 100644 --- a/raps/engine.py +++ b/raps/engine.py @@ -205,7 +205,7 @@ class Engine: if time_quanta_index < len(job.gpu_trace): gpu_util = get_utilization(job.gpu_trace, time_quanta_index) else: - gpu_util = get_utilization(job.gpu_trace, len(job.gpu_trace) - 1) + gpu_util = get_utilization(job.gpu_trace, max(0,len(job.gpu_trace) - 1)) elif isinstance(job.gpu_trace,float) or isinstance(job.gpu_trace,int): gpu_util = job.gpu_trace else: diff --git a/raps/stats.py b/raps/stats.py index 98b97bb..358f172 100644 --- a/raps/stats.py +++ b/raps/stats.py @@ -54,8 +54,15 @@ def min_max_sum(value,min,max,sum): def get_scheduler_stats(engine: Engine): - average_queue = sum(engine.scheduler_queue_history) / len(engine.scheduler_queue_history) - average_running = sum(engine.scheduler_running_history) / len(engine.scheduler_running_history) + if len(engine.scheduler_queue_history) != 0: + average_queue = sum(engine.scheduler_queue_history) / len(engine.scheduler_queue_history) + else: + average_queue = 0 + if len(engine.scheduler_running_history) != 0: + average_running = sum(engine.scheduler_running_history) / len(engine.scheduler_running_history) + else: + average_running = 0 + stats = { 'average_queue': average_queue, 'average_running': average_running, diff --git a/raps/telemetry.py b/raps/telemetry.py index 1457e43..4288cf4 100644 --- a/raps/telemetry.py +++ b/raps/telemetry.py @@ -6,7 +6,6 @@ parsing parquet files, and generating job state information. The module defines a `Telemetry` class for managing telemetry data and several helper functions for data encryption and conversion between node name and index formats. """ -import json import re import sys import random @@ -25,6 +24,7 @@ if __name__ == "__main__": parser.add_argument('--arrival', default=choices[0], type=str, choices=choices, help=f'Modify arrival distribution ({choices[1]}) or use the original submit times ({choices[0]})') parser.add_argument('-v', '--verbose', action='store_true', help='Enable verbose output') args = parser.parse_args() + args_dict = vars(args) import importlib import numpy as np @@ -70,7 +70,6 @@ class Telemetry: int(data['timestep_end']), \ data['args'].tolist() - def load_data(self, files): """Load telemetry data using custom data loaders.""" return self.dataloader.load_data(files, **self.kwargs) @@ -160,8 +159,6 @@ class Telemetry: if __name__ == "__main__": - - args_dict = vars(args) config = ConfigManager(system_name=args.system).get_config() args_dict['config'] = config td = Telemetry(**args_dict) diff --git a/raps/utils.py b/raps/utils.py index c8cec10..d4dea2f 100644 --- a/raps/utils.py +++ b/raps/utils.py @@ -20,7 +20,6 @@ import uuid import json - def sum_values(values): return sum(x[1] for x in values) if values else 0 @@ -310,6 +309,7 @@ def create_binary_array_numpy(max_time, trace_quanta, util): traces[i, :int(util * num_quanta / 100)] = 1 return traces + def extract_data_csv(fileName, skiprows, header): """ Read passed csv file path @ In, filename, dataframe, facility telemetry data @@ -322,16 +322,18 @@ def extract_data_csv(fileName, skiprows, header): df = df.dropna() return df + def resampledf(df, time_resampled): """ Match key and return idx @ In, None @ Out, CDU_names, list, list of CDU names """ - df.set_index('time',inplace =True) + df.set_index('time',inplace=True) df = df.reindex(df.index.union(time_resampled)).interpolate('values').loc[time_resampled] df = df.reset_index() return df + def output_dict(d, title='', output_file=sys.stdout): """ Write dictionary contents to a file. @@ -350,6 +352,7 @@ def output_dict(d, title='', output_file=sys.stdout): for key, value in d.items(): file.write(f"{key}: {value}\n") + def create_casename(prefix=''): """ Generate a unique case name. @@ -432,6 +435,8 @@ def next_arrival(lambda_rate,reset=False, start_time=0): def convert_to_seconds(time_str): + if isinstance(time_str, int): + return time_str # this happens.... # Define the conversion factors time_factors = { 'd': 86400, # 1 day = 86400 seconds -- GitLab From a6fc91c2215d26bfa1c5e724e76a9116e1644b58 Mon Sep 17 00:00:00 2001 From: Matthias Maiterth Date: Mon, 16 Jun 2025 13:35:50 -0400 Subject: [PATCH 121/388] Better version of distribution is power of N and is of nth degree. --- raps/utils.py | 2 +- raps/workload.py | 69 +++++++++++++++++++++++++++++++++++++++++------- 2 files changed, 61 insertions(+), 10 deletions(-) diff --git a/raps/utils.py b/raps/utils.py index d4dea2f..e0ac521 100644 --- a/raps/utils.py +++ b/raps/utils.py @@ -64,7 +64,7 @@ def truncated_normalvariate_int(mu, sigma, lower, upper): while i < CUTOFF: number = random.normalvariate(mu, sigma) if lower < number < upper: - return int(number) + return round(number) i += 1 raise Exception(f"mu:{mu} sigma:{sigma}, not a single hit in {CUTOFF} tries.") diff --git a/raps/workload.py b/raps/workload.py index dca2f0e..22bade7 100644 --- a/raps/workload.py +++ b/raps/workload.py @@ -30,7 +30,7 @@ import numpy as np import matplotlib.pyplot as plt from raps.telemetry import Telemetry from raps.job import job_dict -from raps.utils import create_file_indexed, create_dir_indexed, return_nearest_power_of +from raps.utils import create_file_indexed, create_dir_indexed JOB_NAMES = ["LAMMPS", "GROMACS", "VASP", "Quantum ESPRESSO", "NAMD",\ "OpenFOAM", "WRF", "AMBER", "CP2K", "nek5000", "CHARMM",\ @@ -63,16 +63,63 @@ class Workload: return next_arrival_byconfargs(config,args) def job_size_distribution_draw_uniform(self,args,config): - number = random.randint(1, config['MAX_NODES_PER_JOB']) - return return_nearest_power_of(number=number, base=args.jobsize_nearest_power_of) + min_v = 1 + max_v = config['MAX_NODES_PER_JOB'] + if (args.jobsize_is_power_of is not None): + base = args.jobsize_is_power_of + possible_jobsizes = [base ** exp for exp in range(min_v, int(math.floor(math.log(max_v,base))))] + selection = random.randint(0, len(possible_jobsizes) - 1) + number = possible_jobsizes[selection] + elif (args.jobsize_is_of_degree is not None): + exp = args.jobsize_is_of_degree + possible_jobsizes = [base ** exp for base in range(min_v, int(math.floor(pow(max_v, 1 / exp))))] + selection = random.randint(0, len(possible_jobsizes) - 1) + number = possible_jobsizes[selection] + else: + number = random.randint(1, config['MAX_NODES_PER_JOB']) + return number def job_size_distribution_draw_weibull(self,args,config): - number = truncated_weibull(args.jobsize_weibull_scale, args.jobsize_weibull_shape, 1, config['MAX_NODES_PER_JOB']) - return return_nearest_power_of(number=number, base=args.jobsize_nearest_power_of) + min_v = 1 + max_v = config['MAX_NODES_PER_JOB'] + if (args.jobsize_is_power_of is not None): + base = args.jobsize_is_power_of + possible_jobsizes = [base ** exp for exp in range(min_v, int(math.floor(math.log(max_v,base))))] + scale = math.log(args.jobsize_weibull_scale,base) + shape = math.log(args.jobsize_weibull_shape,base) + selection = truncated_weibull(scale, shape, 0, len(possible_jobsizes) - 1) + number = possible_jobsizes[selection] + elif (args.jobsize_is_of_degree is not None): + exp = args.jobsize_is_of_degree + possible_jobsizes = [base ** exp for base in range(min_v, int(math.floor(pow(max_v, 1 / exp))))] + scale = math.pow(args.jobsize_weibull_scale, 1 / exp) + shape = math.pow(args.jobsize_weibull_shape, 1 / exp) + selection = truncated_weibull(scale, shape, 0, len(possible_jobsizes) - 1) + number = possible_jobsizes[selection] + else: + number = truncated_weibull(args.jobsize_weibull_scale, args.jobsize_weibull_shape, 1, config['MAX_NODES_PER_JOB']) + return number def job_size_distribution_draw_normal(self,args,config): - number = truncated_normalvariate_int(args.jobsize_normal_mean, args.jobsize_normal_stddev, 1, config['MAX_NODES_PER_JOB']) - return return_nearest_power_of(number=number, base=args.jobsize_nearest_power_of) + min_v = 1 + max_v = config['MAX_NODES_PER_JOB'] + if (args.jobsize_is_power_of is not None): + base = args.jobsize_is_power_of + possible_jobsizes = [base ** exp for exp in range(min_v, int(math.floor(math.log(max_v,base))))] + mean = math.log(args.jobsize_normal_mean,base) + stddev = math.log(args.jobsize_normal_stddev,base) # (len(possible_jobsizes) / (max_v - min_v)) + selection = truncated_normalvariate_int(mean, stddev, 0, len(possible_jobsizes) - 1) + number = possible_jobsizes[selection - 1] + elif (args.jobsize_is_of_degree is not None): + exp = args.jobsize_is_of_degree + possible_jobsizes = [base ** exp for base in range(min_v, int(math.floor(pow(max_v, 1 / exp))))] + mean = math.pow(args.jobsize_normal_mean, 1 / exp) + stddev = math.pow(args.jobsize_normal_stddev, 1 / exp) + selection = truncated_weibull(mean, stddev, 0, len(possible_jobsizes) - 1) + number = possible_jobsizes[selection] + else: + number = truncated_normalvariate_int(args.jobsize_normal_mean, args.jobsize_normal_stddev, 1, config['MAX_NODES_PER_JOB']) + return number def cpu_utilization_distribution_draw_uniform(self,args,config): return random.uniform(0.0, config['CPUS_PER_NODE']) @@ -552,7 +599,6 @@ def plot_job_hist(jobs,config=None,dist_split=None): axs[0][0].hist(x2,bins=max(1,math.ceil(min(100,(max(x2) - min(x))))), orientation='vertical',color='lightblue') axs[0][0].hist(x,bins=max(1,math.ceil(min(100,(max(x2) - min(x))))), orientation='vertical') axs[1][0].sharex(axs[0][0]) - axs[1][1].hist(y,bins=max(1,min(100,(max(y) - min(y)))), orientation='horizontal') axs[1][0].sharey(axs[1][1]) @@ -648,7 +694,8 @@ def add_workload_to_parser(parser): parser.add_argument("--jobsize-weibull-shape", type=float, required=False, help="Jobsize shape of weibull") parser.add_argument("--jobsize-weibull-scale", type=float, required=False, help="Jobsize scale of weibull") - parser.add_argument("--jobsize-nearest-power-of", default=1, type=int,required=False,help="Map random samples to the nearest power of N your choice. (Experimental: This changes the shape of the distribution, as density of powers change on the numberline!)") + parser.add_argument("--jobsize-is-of-degree", default=None, type=int,required=False,help="Draw jobsizes from distribution of degree N (squared,cubed).") + parser.add_argument("--jobsize-is-power-of", default=None, type=int,required=False,help="Draw jobsizes from distribution of power of N (2=2^x,3=3^x).") # Walltime: parser.add_argument("--walltime-distribution", type=str, nargs="+", choices=['uniform','weibull','normal'], default=None, help='Distribution type') @@ -678,6 +725,10 @@ def add_workload_to_parser(parser): parser.add_argument("--gantt-nodes", default=False, action='store_true', required=False, help="Print Gannt with nodes required as line thickness (default false)") + args = parser.parse_args() + if (args.jobsize_is_power_of is not None and args.jobsize_is_of_degree is not None): + print("Choose either --jobsize-is-power-of or --jobsize-is-of-degree! Not both.") + exit(1) return parser -- GitLab From 11d6f8a680d1480596af70152b02f022c532ffe4 Mon Sep 17 00:00:00 2001 From: Matthias Maiterth Date: Mon, 16 Jun 2025 14:04:29 -0400 Subject: [PATCH 122/388] Fixed counting npz output files. --- raps/workload.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/raps/workload.py b/raps/workload.py index 22bade7..10218cd 100644 --- a/raps/workload.py +++ b/raps/workload.py @@ -745,7 +745,9 @@ if __name__ == "__main__": jobs = getattr(workload, args.workload)(args=args) plot_job_hist(jobs, config=config, dist_split=args.multimodal) if args.output: - filename = create_file_indexed('wl',create=False) timestep_start = min([x['submit_time'] for x in jobs]) timestep_end = math.ceil(max([x['submit_time'] for x in jobs]) + max([x['wall_time'] for x in jobs])) + filename = create_file_indexed('wl',create=False,ending="npz").split(".npz")[0] + # savez_compressed add npz itself, but create_file_indexed needs to check for .npz to find existing files np.savez_compressed(filename,jobs=jobs,timestep_start=timestep_start, timestep_end=timestep_end, args=args) + print(filename + ".npz") # To std-out to show which npz was created. -- GitLab From f7f2101dd5aadae473c04f8c5cbba3bd2d69c3d7 Mon Sep 17 00:00:00 2001 From: Matthias Maiterth Date: Mon, 16 Jun 2025 16:41:21 -0400 Subject: [PATCH 123/388] added time-delta parameter for raps simulation --- args.py | 1 + main.py | 8 +++++++- multi-part-sim.py | 7 ++++++- raps/engine.py | 28 +++++++++++++++++----------- raps/ui.py | 16 ++++++++-------- raps/workload.py | 2 +- 6 files changed, 40 insertions(+), 22 deletions(-) diff --git a/args.py b/args.py index 3e1e3cb..93ffb37 100644 --- a/args.py +++ b/args.py @@ -14,6 +14,7 @@ parser.add_argument('-c', '--cooling', action='store_true', help='Include FMU co # Simulation runtime options parser.add_argument('-ff', '--fastforward', type=str, default=None, help='Fast-forward by time amount (uses same units as -t)') parser.add_argument('-t', '--time', type=str, default=None, help='Length of time to simulate, e.g., 123, 123s, 27m, 3h, 7d') +parser.add_argument("--time-delta", type=str, default=None, help='Time delta for simulation steps, e.g. 15, 15s 1m, 1h, 3d. (Default unit in seconds. If not set "TRACE_QUANTA" is used.)') parser.add_argument('-d', '--debug', action='store_true', help='Enable debug mode and disable rich layout') parser.add_argument('-n', '--numjobs', type=int, default=1000, help='Number of jobs to schedule') parser.add_argument('-v', '--verbose', action='store_true', help='Enable verbose output') diff --git a/main.py b/main.py index 9f4835d..3b03e9e 100644 --- a/main.py +++ b/main.py @@ -126,9 +126,15 @@ if args.verbose: print(jobs) total_timesteps = timestep_end - timestep_start +if args.time_delta: + time_delta = convert_to_seconds(args.time_delta) +else: + time_delta = config['TRACE_QUANTA'] +print(time_delta) + print(f'Simulating {len(jobs)} jobs for {total_timesteps} seconds from {timestep_start} to {timestep_end}.') layout_manager = LayoutManager(args.layout, engine=sc, debug=args.debug, total_timesteps=total_timesteps, **config) -layout_manager.run(jobs, timestep_start=timestep_start, timestep_end=timestep_end) +layout_manager.run(jobs, timestep_start=timestep_start, timestep_end=timestep_end, time_delta=time_delta) engine_stats = get_engine_stats(sc) job_stats = get_job_stats(sc) diff --git a/multi-part-sim.py b/multi-part-sim.py index 6d88252..2d55ffb 100644 --- a/multi-part-sim.py +++ b/multi-part-sim.py @@ -91,8 +91,13 @@ else: timestep_start = fastforward timestep_end = timestep_start + timesteps +if args.time_delta: + time_delta = convert_to_seconds(args.time_delta) +else: + time_delta = config['TRACE_QUANTA'] + # Create generators for each layout manager -generators = {name: lm.run_stepwise(jobs_by_partition[name], timestep_start=timestep_start, timestep_end=timestep_end) +generators = {name: lm.run_stepwise(jobs_by_partition[name], timestep_start=timestep_start, timestep_end=timestep_end, time_delta=time_delta) for name, lm in layout_managers.items()} # Step through all generators in lockstep diff --git a/raps/engine.py b/raps/engine.py index 0fdfcf6..30909b5 100644 --- a/raps/engine.py +++ b/raps/engine.py @@ -151,8 +151,7 @@ class Engine: return completed_jobs, newly_downed_nodes - - def tick(self): + def tick(self,time_delta=1): """Simulate a timestep.""" # Update running time for all running jobs @@ -192,7 +191,9 @@ class Engine: # in the past and no trace if there, read index 0 until values # are available. if isinstance(job.cpu_trace,list) or isinstance(job.cpu_trace,np.ndarray): - if time_quanta_index < len(job.cpu_trace): + if (isinstance(job.cpu_trace,list) and len(job.cpu_trace)) or (isinstance(job.cpu_trace,np.ndarray) and job.cpu_trace.size == 0): + cpu_util = 0 + elif time_quanta_index < len(job.cpu_trace): cpu_util = get_utilization(job.cpu_trace, time_quanta_index) else: cpu_util = get_utilization(job.cpu_trace, max(0,len(job.cpu_trace) - 1)) @@ -202,7 +203,9 @@ class Engine: raise NotImplementedError() if isinstance(job.gpu_trace,list) or isinstance(job.gpu_trace,np.ndarray): - if time_quanta_index < len(job.gpu_trace): + if (isinstance(job.gpu_trace,list) and len(job.gpu_trace)) or (isinstance(job.gpu_trace,np.ndarray) and job.gpu_trace.size == 0): + gpu_util = 0 + elif time_quanta_index < len(job.gpu_trace): gpu_util = get_utilization(job.gpu_trace, time_quanta_index) else: gpu_util = get_utilization(job.gpu_trace, max(0,len(job.gpu_trace) - 1)) @@ -302,7 +305,7 @@ class Engine: num_free_nodes=self.num_free_nodes, ) - self.current_time += 1 + self.current_time += time_delta return tick_data def prepare_system_state(self, all_jobs:List, timestep_start, timestep_end, replay:bool): @@ -331,7 +334,7 @@ class Engine: self.scheduler.policy = target_policy self.scheduler.bfpolicy = target_bfpolicy - def run_simulation(self, jobs, timestep_start, timestep_end, autoshutdown=False): + def run_simulation(self, jobs, timestep_start, timestep_end, time_delta=1, autoshutdown=False): """Generator that yields after each simulation tick.""" self.timesteps = timestep_end - timestep_start # Where is this used? @@ -346,8 +349,8 @@ class Engine: # Process jobs in batches for better performance of timestep loop all_jobs = jobs.copy() jobs = [] - # Batch Jobs into 6h windows based on submit_time - batch_window = 60 * 60 * 6 # 6h + # Batch Jobs into 6h windows based on submit_time or twice the time_delta if larger + batch_window = max(60 * 60 * 6, 2 * time_delta) # 6h for timestep in range(timestep_start,timestep_end): @@ -373,9 +376,12 @@ class Engine: if self.debug and timestep % self.config['UI_UPDATE_FREQ'] == 0: print(".", end="", flush=True) - tick_data = self.tick() - tick_data.completed = completed_jobs - yield tick_data + if 0 == timestep % time_delta: + tick_data = self.tick(time_delta) + tick_data.completed = completed_jobs + yield tick_data + else: + yield None def get_job_history_dict(self): return self.job_history_dict diff --git a/raps/ui.py b/raps/ui.py index a1cd0f1..c88aa3a 100644 --- a/raps/ui.py +++ b/raps/ui.py @@ -400,7 +400,7 @@ class LayoutManager: self.progress.update(self.progress_task, description=f"{timestamp}",advance=timestamp,transient=True) self.layout["progress"].update(self.progress.get_renderable()) - def update(self, data: TickData): + def update(self, data: TickData, time_delta=1): uncertainties = self.engine.power_manager.uncertainties if data.current_time % self.config['UI_UPDATE_FREQ'] == 0: @@ -422,20 +422,20 @@ class LayoutManager: ) if False: self.render() - self.update_progress(1) - + self.update_progress(time_delta) def render(self): if not self.debug: self.console.clear() self.console.print(self.layout) - def run(self, jobs, timestep_start, timestep_end): + def run(self, jobs, timestep_start, timestep_end, time_delta): """ Runs the UI, blocking until the simulation is complete """ with Live(self.layout, refresh_per_second=5): - for data in self.engine.run_simulation(jobs, timestep_start, timestep_end): - self.update(data) + for data in self.engine.run_simulation(jobs, timestep_start, timestep_end, time_delta): + if data: + self.update(data,time_delta) - def run_stepwise(self, jobs, timestep_start, timestep_end): + def run_stepwise(self, jobs, timestep_start, timestep_end, time_delta): """ Prepares the UI and returns a generator for the simulation """ - return self.engine.run_simulation(jobs, timestep_start, timestep_end) + return self.engine.run_simulation(jobs, timestep_start, timestep_end, time_delta) diff --git a/raps/workload.py b/raps/workload.py index 10218cd..57690b5 100644 --- a/raps/workload.py +++ b/raps/workload.py @@ -695,7 +695,7 @@ def add_workload_to_parser(parser): parser.add_argument("--jobsize-weibull-scale", type=float, required=False, help="Jobsize scale of weibull") parser.add_argument("--jobsize-is-of-degree", default=None, type=int,required=False,help="Draw jobsizes from distribution of degree N (squared,cubed).") - parser.add_argument("--jobsize-is-power-of", default=None, type=int,required=False,help="Draw jobsizes from distribution of power of N (2=2^x,3=3^x).") + parser.add_argument("--jobsize-is-power-of", default=None, type=int,required=False,help="Draw jobsizes from distribution of power of N (2->2^x,3->3^x).") # Walltime: parser.add_argument("--walltime-distribution", type=str, nargs="+", choices=['uniform','weibull','normal'], default=None, help='Distribution type') -- GitLab From 3a53403ce90812c3dfe0ea8fef09c264552faba8 Mon Sep 17 00:00:00 2001 From: Matthias Maiterth Date: Mon, 16 Jun 2025 17:11:26 -0400 Subject: [PATCH 124/388] Fixed argument parsing and timestep_end calculation. The argument checkers needed additional postprocessing in workloads and moved this to a function in args.py as well. --- args.py | 20 +++++++++++++------- main.py | 2 +- raps/workload.py | 6 ++++-- 3 files changed, 18 insertions(+), 10 deletions(-) diff --git a/args.py b/args.py index 93ffb37..34e0459 100644 --- a/args.py +++ b/args.py @@ -1,7 +1,7 @@ import argparse from raps.schedulers.default import PolicyType, BackfillType -from raps.workload import add_workload_to_parser +from raps.workload import add_workload_to_parser, check_workload_args from raps.utils import convert_to_seconds parser = argparse.ArgumentParser(description='Resource Allocator & Power Simulator (RAPS)') @@ -70,14 +70,20 @@ parser.add_argument('--accounts', action='store_true', help='Flag indicating if parser.add_argument('--accounts-json', type=str, help='Json of account stats generated in previous run. see raps/accounts.py') +def post_process_args(args): + if args.fastforward: + args.fastforward = convert_to_seconds(args.fastforward) + if args.time: + args.time = convert_to_seconds(args.time) + return args + + # ### At the end get args and an args_dict. import this if needed. args = parser.parse_args() -# Do conversions here if needed -if args.fastforward: - args.fastforward = convert_to_seconds(args.fastforward) -if args.time: - args.time = convert_to_seconds(args.time) +# Do conversions and checks here if needed +check_workload_args(args) +args = post_process_args(args) # generate the dictionary args_dict = vars(args) -# #import args and args_dict directly if needed.: +# #Now import args and args_dict directly if needed.: # from args import args,args_dict diff --git a/main.py b/main.py index 3b03e9e..94f6ce4 100644 --- a/main.py +++ b/main.py @@ -91,7 +91,7 @@ if args.fastforward: timestep_start = args.fastforward if args.time: - timestep_end = convert_to_seconds(args.time) + timestep_end = timestep_start + convert_to_seconds(args.time) sc = Engine( diff --git a/raps/workload.py b/raps/workload.py index 57690b5..b4e2fdd 100644 --- a/raps/workload.py +++ b/raps/workload.py @@ -725,11 +725,13 @@ def add_workload_to_parser(parser): parser.add_argument("--gantt-nodes", default=False, action='store_true', required=False, help="Print Gannt with nodes required as line thickness (default false)") - args = parser.parse_args() + return parser + + +def check_workload_args(args): if (args.jobsize_is_power_of is not None and args.jobsize_is_of_degree is not None): print("Choose either --jobsize-is-power-of or --jobsize-is-of-degree! Not both.") exit(1) - return parser if __name__ == "__main__": -- GitLab From ca9400867fddfb6375fd9ecf90ed052c6781cd46 Mon Sep 17 00:00:00 2001 From: Wes Brewer Date: Tue, 17 Jun 2025 20:29:07 -0400 Subject: [PATCH 125/388] Initial commit of Google Cluster Traces v2 --- README.md | 5 + config/gcloudv2/power.json | 18 ++ config/gcloudv2/scheduler.json | 17 ++ config/gcloudv2/system.json | 20 ++ pyproject.toml | 2 + raps/dataloaders/gcloudv2.md | 122 ++++++++ raps/dataloaders/gcloudv2.py | 464 +++++++++++++++++++++++++++++++ scripts/get_cluster_v2_traces.sh | 38 +++ 8 files changed, 686 insertions(+) create mode 100644 config/gcloudv2/power.json create mode 100644 config/gcloudv2/scheduler.json create mode 100644 config/gcloudv2/system.json create mode 100644 raps/dataloaders/gcloudv2.md create mode 100644 raps/dataloaders/gcloudv2.py create mode 100755 scripts/get_cluster_v2_traces.sh diff --git a/README.md b/README.md index 1272f8f..a627317 100644 --- a/README.md +++ b/README.md @@ -44,6 +44,11 @@ For Adastra MI250 supercomputer, download 'AdastaJobsMI250_15days.parquet' from # Adastra MI250 python main.py --system adastraMI250 -f AdastaJobsMI250_15days.parquet +For Google cluster trace v2 + + # gcloudv2 + python main.py --system gcloudv2 -f ~/data/gcloud/v2 + ## Perform Network Simulation Lassen is one of the few datasets that has networking data. See `raps/dataloaders/lassen.py` for how to diff --git a/config/gcloudv2/power.json b/config/gcloudv2/power.json new file mode 100644 index 0000000..d6ec29e --- /dev/null +++ b/config/gcloudv2/power.json @@ -0,0 +1,18 @@ +{ + "POWER_GPU_IDLE": 88, + "POWER_GPU_MAX": 560, + "POWER_CPU_IDLE": 90, + "POWER_CPU_MAX": 280, + "POWER_MEM": 74.26, + "POWER_NIC": 20, + "POWER_NVME": 30, + "POWER_SWITCH": 250, + "POWER_CDU": 8473.47, + "POWER_UPDATE_FREQ": 15, + "RECTIFIER_PEAK_THRESHOLD": 13670, + "SIVOC_LOSS_CONSTANT": 13, + "SIVOC_EFFICIENCY": 0.98, + "RECTIFIER_LOSS_CONSTANT": 17, + "RECTIFIER_EFFICIENCY": 0.96, + "POWER_COST": 0.094 +} diff --git a/config/gcloudv2/scheduler.json b/config/gcloudv2/scheduler.json new file mode 100644 index 0000000..3cc1744 --- /dev/null +++ b/config/gcloudv2/scheduler.json @@ -0,0 +1,17 @@ +{ + "SEED": 42, + "JOB_ARRIVAL_TIME": 100, + "MTBF": 11, + "TRACE_QUANTA": 15, + "MIN_WALL_TIME": 3600, + "MAX_WALL_TIME": 43200, + "UI_UPDATE_FREQ": 900, + "MAX_NODES_PER_JOB": 3000, + "JOB_END_PROBS": { + "COMPLETED": 0.63, + "FAILED": 0.13, + "CANCELLED": 0.12, + "TIMEOUT": 0.11, + "NODE_FAIL": 0.01 + } +} diff --git a/config/gcloudv2/system.json b/config/gcloudv2/system.json new file mode 100644 index 0000000..525ab8a --- /dev/null +++ b/config/gcloudv2/system.json @@ -0,0 +1,20 @@ +{ + "NUM_CDUS": 125, + "RACKS_PER_CDU": 1, + "NODES_PER_RACK": 100, + "RECTIFIERS_PER_RACK": 32, + "CHASSIS_PER_RACK": 8, + "NODES_PER_BLADE": 2, + "SWITCHES_PER_CHASSIS": 4, + "NICS_PER_NODE": 4, + "RECTIFIERS_PER_CHASSIS": 4, + "NODES_PER_RECTIFIER": 4, + "MISSING_RACKS": [], + "DOWN_NODES": [], + "CPUS_PER_NODE": 1, + "GPUS_PER_NODE": 0, + "CPU_PEAK_FLOPS": 2048E9, + "GPU_PEAK_FLOPS": 0, + "CPU_FP_RATIO": 0.667, + "GPU_FP_RATIO": 0.667 +} diff --git a/pyproject.toml b/pyproject.toml index 1b3f2e0..fc510e3 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -15,6 +15,8 @@ dependencies = [ "numpy==1.23.5", "rich==13.6.0", "fmpy==0.3.19", + "fsspec==2025.5.1", + "gcsfs==2025.5.1", "pandas==2.0.3", "scipy==1.10.1", "pyarrow==15.0.1", diff --git a/raps/dataloaders/gcloudv2.md b/raps/dataloaders/gcloudv2.md new file mode 100644 index 0000000..b1f95cb --- /dev/null +++ b/raps/dataloaders/gcloudv2.md @@ -0,0 +1,122 @@ +# **Google Cluster Trace V2 (2011) Dataset Overview** + +This document provides a summary of the Google Cluster Trace V2 dataset, released in 2011\. This dataset offers insights into the operation of a large-scale production data center and its workload. It's crucial for research in areas like cluster scheduling, resource management, and workload characterization. + +## **1\. Dataset Overview** + +* **Scale:** The 2011 traces cover a **single production Borg cell (cluster)**. +* **Machines:** This cluster consisted of approximately **12,500 machines**. +* **Time Period:** The dataset spans **29 days** of workload data, collected during **May 2011**. +* **Total Size:** The total compressed size of the full dataset is around **41 GB**. +* **Format:** All files are provided in **gzipped CSV (.csv.gz)** format. +* **Anonymization:** The data is heavily anonymized to protect proprietary information. This means specific hardware details (like CPU models or exact core counts) are not provided, and resource values are normalized. User and job identifiers are opaque hashes. + +## **2\. Data Sources and File Contents** + +The V2 dataset is organized into subdirectories based on event types. Each subdirectory contains multiple gzipped CSV files (part-NNNNN-of-MMMMM.csv.gz). Each of these CSV files **does NOT have a header row**; the first row contains data. + +Here's a detailed look at the contents of the core files you've sampled: + +### **2.1. job\_events/part-NNNNN-of-MMMMM.csv.gz** + +This file contains records for job events. Each row represents an event in the lifecycle of a job. + +**Sample head output:** + +0,,3418309,0,70s3v5qRyCO/1PCdI6fVXnrW8FU/w+5CKRSa72xgcIo=,3,IHgtoxEBuUTHNbUeVs4hzptMY4n8rZKLbZg+Jh5fNG4=,wAmgn2H74cdoMuSFwJF3NaUEaudVBTZ0/HaNZBwIpEQ= +0,,3418314,0,70s3v5qRyCO/1PCdI6fVXnrW8FU/w+5CKRSa72xgcIo=,3,L52XDyhi9x9ChmVBZ1qavOFmnzPeVsvQ2QyGmBZcV4s=,ShNjeaoUeqGV2i9WMKEX9HTeuc9K2Fdfovibt7Mp6qI= +0,,3418319,0,70s3v5qRyCO/1PCdI6fVXnrW8FU/w+5CKRSa72xgcIo=,3,vq0IN3BWEbkDjYgYvkrVyH6OWoUoDwFFf3j/syEZzLA=,1A2GM17AzHRcKJcJet/oIF7FOORyFcAOcUSpR9Fqou8= + +**Schema Description:** + +| Column Index | Field Name | Description | Data Type (in CSV) | Notes | +| :---- | :---- | :---- | :---- | :---- | +| **0** | time | Time of event (microseconds) | Integer | | +| **1** | *(missing value)* | Often an empty string. | String | | +| **2** | job\_ID | Unique ID of the job | Integer | | +| **3** | event\_type | Type of event: 0=submit, 1=schedule, 2=evict, 3=fail, 4=finish, 5=kill, 6=lost, 7=update, 8=noop. | Integer | Sample shows 0 (submit). | +| **4** | user\_ID | Opaque ID of the user submitting the job | String | Hashed value. | +| **5** | scheduling\_class | 0=non-production, 1=production, 2=free. Values outside this range (like 3 in sample) might indicate an unlisted class or a specific trace artifact. | Integer | | +| **6** | job\_name | Opaque ID of the job's name | String | Hashed value. | +| **7** | logical\_job\_name | Opaque ID of the logical job name (for grouping related jobs) | String | Hashed value. | +| **8** | number\_of\_tasks | Number of tasks in the job (typically present only on submit events). | Integer | Can be empty if not applicable or derived for specific event types. | +| **9** | CPU\_request | (Normalized) CPU cores requested per task. | Float | | +| **10** | memory\_request | (Normalized) memory (RAM) requested per task. | Float | | + +### **2.2. machine\_events/part-NNNNN-of-MMMMM.csv.gz** + +This file describes events related to machines in the cluster. + +**Sample head output:** + +0,5,0,HofLGzk1Or/8Ildj2+Lqv0UGGvY82NLoni8+J/Yy0RU=,0.5,0.2493 +0,6,0,HofLGzk1Or/8Ildj2+Lqv0UGGvY82NLoni8+J/Yy0RU=,0.5,0.2493 +0,7,0,HofLGzk1Or/8Ildj2+Lqv0UGGvY82NLoni8+J/Yy0RU=,0.5,0.2493 + +**Schema Description:** + +| Column Index | Field Name | Description | Data Type (in CSV) | Notes | +| :---- | :---- | :---- | :---- | :---- | +| **0** | time | Time of event (microseconds) | Integer | | +| **1** | machine\_ID | Unique ID of the machine | Integer | IDs are simple integers, but map to opaque IDs in task\_events / task\_usage. | +| **2** | event\_type | Type of event: 0=add, 1=remove, 2=update | Integer | Sample shows 0 (add). | +| **3** | platform\_ID | Opaque string representing the machine's microarchitecture and chipset version | String | Hashed value. Provides insight into hardware heterogeneity without specifics. | +| **4** | CPU\_capacity | (Normalized) Total CPU cores on the machine (e.g., 0.5, 1.0). | Float | Normalized value relative to the largest CPU capacity in the trace (1.0). | +| **5** | memory\_capacity | (Normalized) Total memory (RAM) on the machine. | Float | Normalized value. | + +### **2.3. task\_events/part-NNNNN-of-MMMMM.csv.gz** + +This file details events related to individual tasks, which are components of jobs. + +**Sample head output:** + +0,2,3418309,0,4155527081,0,70s3v5qRyCO/1PCdI6fVXnrW8FU/w+5CKRSa72xgcIo=,3,9,,,, +0,2,3418309,1,329150663,0,70s3v5qRyCO/1PCdI6fVXnrW8FU/w+5CKRSa72xgcIo=,3,9,,,, +0,,3418314,0,3938719206,0,70s3v5qRyCO/1PCdI6fVXnrW8FU/w+5CKRSa72xgcIo=,3,9,0.125,0.07446,0.0004244,0 + +**Schema Description:** + +| Column Index | Field Name | Description | Data Type (in CSV) | Notes | +| :---- | :---- | :---- | :---- | :---- | +| **0** | time | Time of event (microseconds) | Integer | | +| **1** | *(missing value)* | Often an empty string. | String | | +| **2** | job\_ID | Unique ID of the job this task belongs to | Integer | | +| **3** | task\_index | The index of the task within the job (0-indexed). Uniquely identifies a task when combined with job\_ID. | Integer | | +| **4** | machine\_ID | ID of the machine where the event occurred (if applicable). This is typically populated when tasks are scheduled or run. Missing implies task not yet assigned to machine. | Integer | This is the opaque machine ID (hashed), distinct from the simple integer machine\_ID in machine\_events but maps to them. | +| **5** | event\_type | Type of task event: 0=submit, 1=schedule, 2=evict, 3=fail, 4=finish, 5=kill, 6=lost, 7=update, 8=noop, 9=assign. | Integer | Sample shows 0 (submit), 2 (evict). | +| **6** | user\_ID | Opaque ID of the user submitting the job. | String | Hashed value. | +| **7** | scheduling\_class | 0=non-production, 1=production, 2=free. Values like 3, 9 might be other classes. | Integer | | +| **8** | priority | Integer priority from 0 (lowest) to 11 (highest). | Integer | | +| **9** | CPU\_request | (Normalized) CPU cores requested by this task. | Float | Empty if not applicable for event type. | +| **10** | memory\_request | (Normalized) memory requested by this task. | Float | Empty if not applicable for event type. | +| **11** | disk\_space\_request | (Normalized) disk space requested by this task. | Float | Empty if not applicable for event type. | +| **12** | constraints | (Binary) 0=no constraints, 1=has constraints. | Integer | Empty if not applicable. | + +### **2.4. task\_usage/part-NNNNN-of-MMMMM.csv.gz** + +This is typically the largest file, containing periodic snapshots of resource usage for running tasks. + +**Sample head output:** + +600000000,900000000,3418309,0,4155527081,0.001562,0.06787,0.07568,0.001156,0.001503,0.06787,2.861e-06,0.0001869,0.03967,0.0003567,2.445,0.007243,0,1,0 +600000000,900000000,3418309,1,329150663,0.001568,0.06787,0.07556,0.0003195,0.0007,0.06787,5.722e-06,0.0001879,0.03302,0.0009289,2.1,0.005791,0,1,0 + +**Schema Description:** + +| Column Index | Field Name | Description | Data Type (in CSV) | Notes | +| :---- | :---- | :---- | :---- | :---- | +| **0** | start\_time | Start time of the data sample (microseconds) | Integer | | +| **1** | end\_time | End time of the data sample (microseconds) | Integer | Typically start\_time \+ 300,000,000 (300 seconds or 5 minutes). | +| **2** | job\_ID | Unique ID of the job | Integer | | +| **3** | task\_index | Index of the task within the job | Integer | | +| **4** | machine\_ID | ID of the machine where this task ran during the sample period | Integer | Opaque machine ID (hashed). | +| **5** | CPU\_usage\_rate | Normalized average CPU usage rate (cores per second) during the sample. | Float | | +| **6** | memory\_usage\_avg | Normalized average memory usage. | Float | | +| **7** | memory\_usage\_max | Normalized maximum memory usage. | Float | | +| **8** | disk\_I/O\_time\_avg | Normalized average disk I/O time. | Float | | +| **9** | disk\_I/O\_time\_max | Normalized maximum disk I/O time. | Float | | +| **10** | CPUs\_allocated | Normalized CPU cores allocated to the task during this sample. | Float | | +| **11** | memory\_allocated | Normalized amount of memory allocated. | Float | | +| **12** | sample\_duration | Duration of the sample period (microseconds). | Float | Usually around 300,000,000 (300 seconds). | +| **13-19** | *(unnamed/unknown)* | Additional columns not explicitly documented. | Mixed | These are usually other system metrics or internal flags. You can name them generically if needed. | + diff --git a/raps/dataloaders/gcloudv2.py b/raps/dataloaders/gcloudv2.py new file mode 100644 index 0000000..9e76280 --- /dev/null +++ b/raps/dataloaders/gcloudv2.py @@ -0,0 +1,464 @@ +import pandas as pd +import fsspec +import os +import re +import math # For math.inf and -math.inf +import numpy as np # Needed for empty arrays for traces +from typing import List, Dict, Optional, Union, Generator, Any + +""" To download cluster traces into ~/data/gcloud/v2 + + 1. Install Google cloud SDK + + https://cloud.google.com/sdk/docs/install + + 2. gcloud auth login + + 3. See https://github.com/google/cluster-data - we are using v2 traces b/c the v3 traces are too large for practical study + + 4. See download script in ../../get_cluster_v2_traces.sh + +""" + +# Assuming this script is located in raps/dataloaders/ +# Adjust the path if your raps/job.py is located differently +try: + from ..job import job_dict +except ImportError: + # Fallback for direct script execution/testing outside RAPS structure + print("Warning: Could not import 'job_dict' directly. Using a dummy job_dict for testing.") + class job_dict: + def __init__(self, **kwargs): + self.__dict__.update(kwargs) + def __repr__(self): + return f"DummyJobDict({self.id})" + +class GoogleClusterV2DataLoader: + """ + A custom dataloader for Google Cluster Traces V2 (2011 dataset), + designed to read locally downloaded .csv.gz files in an ExaDigiT/RAPS style. + """ + + # --- Configuration for your local V2 files --- + # This is a default fallback path for direct script execution (when not called by RAPS). + # It MUST point directly to the directory *containing* machine_events, job_events, etc. + BASE_LOCAL_PATH = "~/data/gcloud/v2/google_cluster_data_2011_sample/" + + SUPPORTED_EVENT_TYPES = [ + "machine_events", "job_events", "task_events", "task_usage", + ] + SUPPORTED_FORMATS = ["csv"] + + def __init__(self, + event_types: Optional[Union[str, List[str]]] = None, + file_indices: Optional[Union[int, List[int]]] = None, + read_options: Optional[Dict[str, Any]] = None, + concatenate_files: bool = True, + base_data_path: Optional[str] = None): + """ + Initializes the GoogleClusterV2DataLoader to read from local V2 trace files. + + Args: + event_types (Optional[Union[str, List[str]]]): + Specific event types to load. If None, all supported event types will be considered. + file_indices (Optional[Union[int, List[int]]]): + Specific numerical indices of parts to load. If None, all available files for the selected types. + read_options (Optional[Dict[str, Any]]): + Additional options passed directly to pandas.read_csv(). + concatenate_files (bool): + If True, all loaded files will be concatenated into a single pandas DataFrame + when the iterator finishes. If False, `__iter__` will yield individual DataFrames. + base_data_path (Optional[str]): The base path provided by the external calling script (e.g., RAPS). + This path will be adjusted to point to the actual data root. + """ + self.event_types = [event_types] if isinstance(event_types, str) else event_types + self.file_indices = [file_indices] if isinstance(file_indices, int) else file_indices + self.concatenate_files = concatenate_files + + # --- CRITICAL FIX START: Ensure 'time' column is read as int64 --- + self.read_options = read_options.copy() if read_options is not None else {} + if 'dtype' not in self.read_options: + self.read_options['dtype'] = {} + self.read_options['dtype']['time'] = 'int64' # Force 'time' to be read as integer + # --- CRITICAL FIX END --- + + # --- Determine the correct base path for this DataLoader instance --- + if base_data_path is not None: + clean_base_path = base_data_path + if not clean_base_path.endswith(os.sep): + clean_base_path += os.sep + self._current_base_path = os.path.join(clean_base_path, "google_cluster_data_2011_sample") + os.sep + else: + self._current_base_path = self.BASE_LOCAL_PATH + # --- End of base path determination --- + + self._fs = fsspec.AbstractFileSystem() + self._all_file_paths = [] + self._discover_files() + + def _discover_files(self): + """ + Discovers local V2 trace files based on specified event types and indices. + Populates self._all_file_paths with absolute file paths. + """ + event_types_to_consider = self.event_types if self.event_types else self.SUPPORTED_EVENT_TYPES + + self._all_file_paths = [] + + for event_type in event_types_to_consider: + event_type_dir = os.path.join(self._current_base_path, event_type) + + if not os.path.isdir(event_type_dir): + print(f"Warning: Local directory for '{event_type}' not found: '{event_type_dir}'. Skipping this type.") + continue + + if self.file_indices: + for idx in self.file_indices: + filename_pattern_re = rf"part-{idx:05d}-of-\d{{5}}\.csv\.gz" + + found_indexed_file = False + for filename in os.listdir(event_type_dir): + if re.fullmatch(filename_pattern_re, filename): + self._all_file_paths.append(os.path.join(event_type_dir, filename)) + found_indexed_file = True + break + + if not found_indexed_file: + print(f"Warning: Specific file '{event_type}/part-{idx:05d}-of-*.csv.gz' not found in '{event_type_dir}'.") + else: + for filename in os.listdir(event_type_dir): + if filename.startswith("part-") and filename.endswith(".csv.gz"): + self._all_file_paths.append(os.path.join(event_type_dir, filename)) + + self._all_file_paths = sorted(list(set(self._all_file_paths))) + + if not self._all_file_paths: + print(f"Warning: No local V2 trace files found in '{self._current_base_path}' matching the criteria.") + + def __len__(self) -> int: + return len(self._all_file_paths) + + def __iter__(self) -> Generator[pd.DataFrame, None, None]: + if not self._all_file_paths: + return + + all_data_frames = [] + total_files = len(self._all_file_paths) + + print(f"\nStarting to load {total_files} selected V2 trace files from '{self._current_base_path}'...") + + for i, file_path in enumerate(self._all_file_paths): + file_name = os.path.basename(file_path) + + file_size_bytes = os.path.getsize(file_path) + file_size_mb = file_size_bytes / (1024 * 1024) + + print(f"[{i + 1}/{total_files}] Loading '{file_name}' ({file_size_mb:.2f} MB)...", end='', flush=True) + + df = None + try: + df = pd.read_csv(file_path, compression='gzip', **self.read_options) + print(f" -> OK. Shape: {df.shape}") + except Exception as e: + print(f" -> FAILED. Error: {e}") + print(f" Failed to read CSV file '{file_name}'. Double-check CSV format (e.g., separator, header) or file integrity.") + continue + + if df is not None: + if self.concatenate_files: + all_data_frames.append(df) + else: + yield df + + if self.concatenate_files and all_data_frames: + final_df = pd.concat(all_data_frames, ignore_index=True) + print(f"\nAll selected V2 files concatenated. Final DataFrame shape: {final_df.shape}") + yield final_df + elif self.concatenate_files and not all_data_frames: + print("\nNo DataFrames were loaded to concatenate from the selected V2 files.") + + + def get_data_for_type(self, event_type: str, limit: Optional[int] = None) -> pd.DataFrame: + if event_type not in self.SUPPORTED_EVENT_TYPES: + raise ValueError(f"Unsupported event type: '{event_type}'. Choose from {self.SUPPORTED_EVENT_TYPES}") + + original_event_types = self.event_types + original_file_indices = self.file_indices + original_concatenate_files = self.concatenate_files + original_current_base_path = self._current_base_path + + self.event_types = [event_type] + self.concatenate_files = True + + temp_file_indices = None + if limit is not None: + temp_file_indices = list(range(limit)) + self.file_indices = temp_file_indices + + self._discover_files() + + combined_df = pd.DataFrame() + for df_chunk in self: + combined_df = df_chunk + + self.event_types = original_event_types + self.file_indices = original_file_indices + self.concatenate_files = original_concatenate_files + self._current_base_path = original_current_base_path + self._discover_files() + + return combined_df + +# --- MANDATORY RAPS `load_data` FUNCTION --- +def load_data( + data_path: Union[str, List[str]], + event_types: Optional[Union[str, List[str]]] = None, + file_indices: Optional[Union[int, List[int]]] = None, + read_options: Optional[Dict[str, Any]] = None, # User-provided read_options + **kwargs # Catch-all for additional arguments like 'system' +) -> tuple[List[Any], float, float]: # Updated return type hint for RAPS job_dict instances + """ + RAPS data loading entry point for Google Cluster Trace V2 (2011) data. + + Loads data from a specified local path, organizing it by event type. + It returns a list of primary 'job/task' records (as job_dict instances), + along with the global start and end timestamps of the loaded data. + + Args: + data_path (Union[str, List[str]]): The base path to the local V2 data directory. + This is the path provided by the RAPS main script. + Can be a string or a list containing a single string. + event_types (Optional[Union[str, List[str]]]): + Specific event types to load. If None, all supported types. + file_indices (Optional[Union[int, List[int]]]): + Specific numerical indices of parts to load. If None, all available parts. + read_options (Optional[Dict[str, Any]]): + Additional options for pandas.read_csv(). + **kwargs: Catch-all for any additional keyword arguments passed by RAPS + (e.g., 'system', 'config', 'cooling', 'fastforward', etc.). + + Returns: + tuple[List[Any], float, float]: + - A list of job_dict instances, where each represents a job/task record + and includes 'start_time' and 'wall_time' (even if derived/dummy). + - The global minimum timestamp found across all loaded data. + - The global maximum timestamp found across all loaded data. + Returns ([], 0.0, 0.0) if no data or time info found. + """ + + # --- FIX 1: Handle data_path potentially being a list (from argparse) --- + if isinstance(data_path, list): + if len(data_path) == 1: + data_path_str = data_path[0] + else: + raise ValueError( + f"load_data expected a single base data path, but received a list of multiple paths: {data_path}. " + f"Please ensure RAPS passes a single path." + ) + else: + data_path_str = data_path + # --- END FIX 1 --- + + # Expand the user home directory if '~' is used in data_path_str + expanded_data_path = os.path.expanduser(data_path_str) + # Ensure it ends with a slash for consistency with os.path.join later + if not expanded_data_path.endswith(os.sep): + expanded_data_path += os.sep + + # This dictionary will store DataFrames for all event types loaded by this function + loaded_dfs: Dict[str, pd.DataFrame] = {} + + types_to_load_for_rap = event_types if event_types else GoogleClusterV2DataLoader.SUPPORTED_EVENT_TYPES + if isinstance(types_to_load_for_rap, str): + types_to_load_for_rap = [types_to_load_for_rap] + + # Initialize global min/max timestamps for the entire dataset + global_min_time = float(math.inf) + global_max_time = float(-math.inf) + + for event_type_key in types_to_load_for_rap: + # Create a new DataLoader instance for each event_type to get its concatenated DF. + dataloader = GoogleClusterV2DataLoader( + event_types=event_type_key, # Load only this specific type + file_indices=file_indices, # Apply file index filter + read_options=read_options, # Apply any custom read options (will be merged with default dtype for time) + concatenate_files=True, # Ensure a single DataFrame is yielded for this type + base_data_path=expanded_data_path # Pass the RAPS provided path + ) + + for df_current_type in dataloader: + if not df_current_type.empty: + loaded_dfs[event_type_key] = df_current_type + # DEBUG: Check if 'time' column is being correctly read. + if 'time' in df_current_type.columns: + print(f"DEBUG: '{event_type_key}' time min/max in DF: {df_current_type['time'].min()}/{df_current_type['time'].max()}") + else: + print(f"DEBUG: 'time' column NOT FOUND in {event_type_key}.") + else: + print(f"RAPS: No data loaded for event type '{event_type_key}'.") + + print("\n--- RAPS: Data loading complete for individual types ---") + + # --- FIX 2: Select and prepare the primary 'jobs' DataFrame for RAPS --- + # RAPS main.py is iterating over `jobs`, expecting `job['wall_time']` and `job['start_time']`. + # This means `jobs` must be a list of dictionaries (or similar objects). + jobs_list_for_rap: List[Any] = [] # Initialize as empty list + + # Prioritize task_events for job records due to granularity, otherwise use job_events. + raw_primary_records_df = pd.DataFrame() + if 'task_events' in loaded_dfs and not loaded_dfs['task_events'].empty: + raw_primary_records_df = loaded_dfs['task_events'].copy() + print(f"RAPS: Selected 'task_events' as the primary source for job records.") + elif 'job_events' in loaded_dfs and not loaded_dfs['job_events'].empty: + raw_primary_records_df = loaded_dfs['job_events'].copy() + print(f"RAPS: Selected 'job_events' as the primary source for job records (task_events not available/empty).") + else: + print("RAPS: Warning: Neither 'task_events' nor 'job_events' found/loaded. Cannot create job records.") + + if not raw_primary_records_df.empty: + # --- FIX 3: Prepare raw_primary_records_df with RAPS-expected columns --- + # Map V2 'time' column to RAPS 'submit_time' and 'start_time' + if 'time' in raw_primary_records_df.columns: + raw_primary_records_df['submit_time'] = raw_primary_records_df['time'] + raw_primary_records_df['start_time'] = raw_primary_records_df['time'] # Simplistic for first pass + else: + raw_primary_records_df['submit_time'] = 0 + raw_primary_records_df['start_time'] = 0 + print("Warning: 'time' column not found in primary records DataFrame. Using 0 for submit/start_time.") + + # Derive 'end_time' and 'wall_time'. This is a major simplification for V2 data. + # For a more accurate 'end_time' and 'wall_time', you'd need to: + # 1. Join with 'task_usage' or other event types. + # 2. Aggregate events by job/task ID to find actual lifecycle timestamps. + # For now, setting a dummy wall_time to satisfy RAPS's requirement for `job['wall_time']` + # and to allow its `int(max(job['wall_time'] + job['start_time']...` calculation. + raw_primary_records_df['wall_time'] = 1 # Dummy: 1 microsecond duration + raw_primary_records_df['end_time'] = raw_primary_records_df['start_time'] + raw_primary_records_df['wall_time'] + + # --- FIX 4: Create job_dict instances and populate list --- + # Get the jid (job ID filter) from kwargs, defaulting to '*' + jid_filter = kwargs.get('jid', '*') + + # Loop through each record (row) to create a job_dict instance + # It's usually best to filter for submit events to ensure unique job instances + # and to map them to the proper RAPS 'job' concept. + submit_records_df = raw_primary_records_df[ + raw_primary_records_df.get('event_type') == 'submit' # Use .get() for robustness + ].copy() if 'event_type' in raw_primary_records_df.columns else raw_primary_records_df.copy() # If no event_type, use all + + if 'job_ID' not in submit_records_df.columns: # Fallback if job_ID not present (e.g., for task_events direct) + submit_records_df['job_ID'] = submit_records_df['task_ID'] if 'task_ID' in submit_records_df.columns else range(len(submit_records_df)) + print("Warning: 'job_ID' not found in selected primary records. Using 'task_ID' or row index.") + + for index, row in submit_records_df.iterrows(): + job_id = row['job_ID'] + + # Apply RAPS's jid filter (from main.py example) + if jid_filter != '*' and str(job_id) != str(jid_filter): # Convert to string for comparison + continue + + # --- Map V2 Data to job_dict arguments --- + # Most of these are simplifications or dummies for V2 given the limited data. + nodes_required = 1 # Dummy + name = f"job_{job_id}" + account = f"user_{row['user_ID']}" if 'user_ID' in row else "unknown_user" + priority = row['priority'] if 'priority' in row else 0 + + # Trace data fields (cpu_trace, gpu_trace etc.) are arrays, initially empty. + # V2 has no GPUs. + cpu_trace = np.array([]) + gpu_trace = np.array([]) + nrx_trace = np.array([]) + ntx_trace = np.array([]) + + end_state = "UNKNOWN" # V2 job_events has event_type, but not direct final state field. + scheduled_nodes = [] # Requires complex task scheduling analysis + + # Get trace-wide times (global min/max) + trace_start_time = float(global_min_time) if global_min_time != float(math.inf) else 0.0 + trace_end_time = float(global_max_time) if global_max_time != float(-math.inf) else 0.0 + + # This specific record's time (from its 'time' column) + trace_time_for_record = row['time'] if 'time' in row else 0 + + job_info = job_dict( + nodes_required=nodes_required, + name=name, + account=account, + cpu_trace=cpu_trace, + gpu_trace=gpu_trace, + nrx_trace=nrx_trace, + ntx_trace=ntx_trace, + end_state=end_state, + scheduled_nodes=scheduled_nodes, + id=job_id, + priority=priority, + submit_time=row['submit_time'], + time_limit=0, # V2 doesn't have explicit time_limit + start_time=row['start_time'], + end_time=row['end_time'], + wall_time=row['wall_time'], + trace_time=trace_time_for_record, # The time for this specific event record + trace_start_time=trace_start_time, # Global trace start + trace_end_time=trace_end_time # Global trace end + ) + jobs_list_for_rap.append(job_info) + print(f"RAPS: Created {len(jobs_list_for_rap)} job_dict instances.") + else: + print("RAPS: No primary records DataFrame available to create job_dict instances.") + + # Convert global min/max times to float + final_timestep_start = 0 #float(global_min_time) if global_min_time != float(math.inf) else 0.0 + final_timestep_end = 10000 #float(global_max_time) if global_max_time != float(-math.inf) else 0.0 + + print(f"RAPS: Final global time range determined: Start={final_timestep_start}, End={final_timestep_end}") + + # Return the three values RAPS expects + return jobs_list_for_rap, final_timestep_start, final_timestep_end + + +# --- Example Usage (for direct script execution/testing the load_data function) --- +if __name__ == "__main__": + # IMPORTANT: Adjust this path to match your local setup precisely. + # This path should be the DIRECTORY THAT RAPS's `-f` ARGUMENT POINTS TO. + # e.g., if you run `main.py -f /Users/w1b/data/gcloud/v2`, then this variable is '/Users/w1b/data/gcloud/v2/'. + # And inside THAT directory, you should have `google_cluster_data_2011_sample/` + #RAPS_SIMULATED_BASE_DIR = "/Users/w1b/data/gcloud/v2/" + + print("--- Running direct tests of the load_data function ---") + + print("\n--- Test 1: Loading all event types (default behavior for a RAPS integration) ---") + # Simulate RAPS passing a list with one element for the data_path + jobs_list_test1, start_time_test1, end_time_test1 = load_data([RAPS_SIMULATED_BASE_DIR], system="dummy_system_name") + + if jobs_list_test1: # Check if the list of jobs is not empty + print(f"\nSummary of Test 1 (Primary jobs list loaded):") + print(f"- Number of job/task records: {len(jobs_list_test1)}") + # Check if individual records have the expected keys + if jobs_list_test1 and hasattr(jobs_list_test1[0], 'start_time') and hasattr(jobs_list_test1[0], 'wall_time'): + print(f"- First record (id={jobs_list_test1[0].id}): start_time={jobs_list_test1[0].start_time}, wall_time={jobs_list_test1[0].wall_time}") + print(f"- Full first record details: {jobs_list_test1[0].__dict__}") # Show all attributes + print(f"- Global Start time: {start_time_test1}, Global End time: {end_time_test1}") + else: + print("\nTest 1: No primary jobs list loaded. Check specified paths and downloaded files.") + + print("\n--- Test 2: Loading specific event types and file indices ---") + jobs_list_test2, start_time_test2, end_time_test2 = load_data( + [RAPS_SIMULATED_BASE_DIR], + event_types=["job_events"], # Only request job_events explicitly for this test + file_indices=[0], # Load only the 'part-00000' file for job_events + read_options={'header': 0}, # Example: assuming first row is header + another_rap_param=123 # Example of passing an extra kwarg + ) + + if jobs_list_test2: # Check if the list of jobs is not empty + print(f"\nSummary of Test 2 (Primary jobs list loaded):") + print(f"- Number of job/task records: {len(jobs_list_test2)}") + if jobs_list_test2 and hasattr(jobs_list_test2[0], 'start_time') and hasattr(jobs_list_test2[0], 'wall_time'): + print(f"- First record (id={jobs_list_test2[0].id}): start_time={jobs_list_test2[0].start_time}, wall_time={jobs_list_test2[0].wall_time}") + print(f"- Full first record details: {jobs_list_test2[0].__dict__}") # Show all attributes + print(f"- Global Start time: {start_time_test2}, Global End time: {end_time_test2}") + else: + print("\nTest 2: No primary jobs list loaded. Check path, types, and indices.") + + print("\n--- RAPS Dataloader (V2) script demonstration complete ---") diff --git a/scripts/get_cluster_v2_traces.sh b/scripts/get_cluster_v2_traces.sh new file mode 100755 index 0000000..5cff607 --- /dev/null +++ b/scripts/get_cluster_v2_traces.sh @@ -0,0 +1,38 @@ +#!/bin/bash + +# Define the base GCS path for the 2011 (V2) dataset +GCS_BASE_PATH="gs://clusterdata-2011-2" +LOCAL_DIR="./google_cluster_data_2011_sample" # Local directory to save files +NUM_FILES_PER_TYPE=1 # Adjust this number: 1 is very small, 5-10 is a decent sample + +# Create the local base directory if it doesn't exist +mkdir -p "$LOCAL_DIR" + +# Define the event types present in the 2011 (V2) dataset +EVENT_TYPES=( + "machine_events" + "job_events" + "task_events" + "task_usage" +) + +echo "Starting download of Google Cluster Data V2 (2011) sample..." + +# Loop through each event type +for event_type in "${EVENT_TYPES[@]}"; do + echo "Processing event type: ${event_type}" + + # Create a local subdirectory for each event type + mkdir -p "${LOCAL_DIR}/${event_type}" + + # List files in the current event type's GCS directory, take the first N, and download them + # Added '2>/dev/null' to suppress BrokenPipeError messages from gsutil ls + gsutil ls "${GCS_BASE_PATH}/${event_type}/part-*.csv.gz" 2>/dev/null | head -n "${NUM_FILES_PER_TYPE}" | while read -r gcs_path; do + echo " Downloading $(basename "$gcs_path")..." + gsutil cp "$gcs_path" "${LOCAL_DIR}/${event_type}/" + done +done + +echo "---" +echo "Download complete. Files are in: $LOCAL_DIR" +echo "You've downloaded a sample of the 2011 (V2) Google Cluster Traces." -- GitLab From 5710f2bd7d5a8f29951a74ef2c7943e399ff4446 Mon Sep 17 00:00:00 2001 From: Matthias Maiterth Date: Wed, 18 Jun 2025 10:45:16 -0400 Subject: [PATCH 126/388] Removed requested_nodes in favour of using PolicyType to decide if a job is replayed or rescheduled. Removed requested_nodes for jobs. These now only have scheduled_nodes. requested_nodes was used to decide if a job should be rescheduled or replayed. As this is not the decision of the job but of the scheduling policy, this is not adjusted. The updated mechanisms use PolicyType to make the decision and if scheduled_nodes is set, then use these nodes for the replay. If scheduled nodes are not set, place the job on free nodes. If a PolicyType other than replay is used, place the job freely. In either case: Before a job is executed scheduled_nodes indicates where the job was placed according to the telemetry. If no such information is given, this is fine. After a job is executed scheduled_nodes indicates where the job was placed by the scheduler. When implementing to deciding how to place a job use the policy directly. --- multi-part-sim.py | 4 +--- raps/job.py | 18 +++++++++++------ raps/resmgr.py | 9 ++++----- raps/schedulers/default.py | 36 +++++++++++++++------------------ raps/schedulers/experimental.py | 31 +++++++++++++--------------- raps/schedulers/replay.py | 24 ++++++++++++---------- 6 files changed, 60 insertions(+), 62 deletions(-) diff --git a/multi-part-sim.py b/multi-part-sim.py index 2d55ffb..d0ccafa 100644 --- a/multi-part-sim.py +++ b/multi-part-sim.py @@ -47,13 +47,11 @@ if args.replay: if args.scale: for job in tqdm(jobs, desc=f"Scaling jobs to {args.scale} nodes"): job['nodes_required'] = random.randint(1, args.scale) - job['requested_nodes'] = None # Setting to None triggers scheduler to assign nodes if args.arrival == 'poisson': - for job in tqdm(jobs, desc="Rescheduling jobs"): + for job in tqdm(jobs, desc="Adjusting job submission time"): partition = job['partition'] partition_config = configs[partition_names.index(partition)] - job['requested_nodes'] = None job['submit_time'] = next_arrival(1 / partition_config['JOB_ARRIVAL_TIME']) elif args.arrival == 'prescribed': diff --git a/raps/job.py b/raps/job.py index 09eaed4..1c41ddf 100644 --- a/raps/job.py +++ b/raps/job.py @@ -13,7 +13,8 @@ Implementing such using something like: def job_dict(*,nodes_required, name, account, \ cpu_trace, gpu_trace, ntx_trace, nrx_trace, \ - end_state, scheduled_nodes=None, id, priority=0, partition=0, + end_state, scheduled_nodes=None, + id, priority=0, partition=0, submit_time=0, time_limit=0, start_time=0, end_time=0, wall_time=0, trace_time=0, trace_start_time=0,trace_end_time=0, trace_missing_values=False): """ Return job info dictionary """ @@ -26,7 +27,7 @@ def job_dict(*,nodes_required, name, account, \ 'ntx_trace': ntx_trace, 'nrx_trace': nrx_trace, 'end_state': end_state, - 'requested_nodes': scheduled_nodes, + 'scheduled_nodes': scheduled_nodes, 'id': id, 'priority': priority, 'partition': partition, @@ -91,15 +92,19 @@ class Job: if self.id is None: # This is wrong self.id = Job._get_next_id() - if self.scheduled_nodes and self.nodes_required == 0: + if self.nodes_required == 0 and self.scheduled_nodes != []: self.nodes_required = len(self.scheduled_nodes) + elif self.nodes_required != 0: + pass + else: + raise ValueError(f"{self.nodes_required} {self.scheduled_nodes}") def __repr__(self): """Return a string representation of the job.""" return (f"Job(id={self.id}, name={self.name}, account={self.account}, " - f"nodes_required={self.nodes_required}, " + f"nodes_required={self.nodes_required}, scheduled_nodes={self.scheduled_nodes}, " f"cpu_trace={self.cpu_trace}, gpu_trace={self.gpu_trace}, " - f"end_state={self.end_state}, requested_nodes={self.requested_nodes}, " + f"end_state={self.end_state}, " f"submit_time={self.submit_time}, time_limit={self.time_limit}, " f"start_time={self.start_time}, end_time={self.end_time}, " f"wall_time={self.wall_time}, " @@ -107,7 +112,7 @@ class Job: f"trace_start_time={self.trace_start_time}, " f"trace_end_time={self.trace_end_time}, " f"running_time={self.running_time}, state={self._state}, " - f"scheduled_nodes={self.scheduled_nodes}, power={self.power}, " + f"power={self.power}, " f"power_history={self.power_history})") @property @@ -155,6 +160,7 @@ class JobStatistics: self.name = job.name self.account = job.account self.num_nodes = len(job.scheduled_nodes) + self.scheduled_nodes = job.scheduled_nodes self.run_time = job.running_time self.submit_time = job.submit_time self.start_time = job.start_time diff --git a/raps/resmgr.py b/raps/resmgr.py index 6a3ffda..f73a1e6 100644 --- a/raps/resmgr.py +++ b/raps/resmgr.py @@ -1,5 +1,6 @@ import numpy as np -from .job import JobState +from raps.job import JobState +from raps.policy import PolicyType from scipy.stats import weibull_min @@ -13,13 +14,11 @@ class ResourceManager: # You can track system utilization history here self.sys_util_history = [] # list of (time, utilization) tuples - def assign_nodes_to_job(self, job, current_time): + def assign_nodes_to_job(self, job, current_time, policy): """Assigns nodes to a job and updates the available nodes.""" if len(self.available_nodes) < job.nodes_required: raise ValueError(f"Not enough available nodes to schedule job {job.id}") - - if job.requested_nodes: # Telemetry replay case - job.scheduled_nodes = job.requested_nodes + if policy == PolicyType.REPLAY and job.scheduled_nodes: self.available_nodes = [n for n in self.available_nodes if n not in job.scheduled_nodes] else: # Synthetic or case using modified/poisson arrival times job.scheduled_nodes = self.available_nodes[:job.nodes_required] diff --git a/raps/schedulers/default.py b/raps/schedulers/default.py index 9953087..2b2dd09 100644 --- a/raps/schedulers/default.py +++ b/raps/schedulers/default.py @@ -57,7 +57,6 @@ class Scheduler: # After backfill dedice continue processing the queue or wait, continuing may result in fairness issues. if self.policy in [PolicyType.REPLAY]: - # print(f"Nodes available {nodes_available} - Req:{len(job.requested_nodes)} N-avail:{len(self.resource_manager.available_nodes)}") continue # Regardless if the job at the front of the queue doenst fit, try placing all of them. elif self.policy in [PolicyType.FCFS, PolicyType.PRIORITY, PolicyType.LJF, PolicyType.SJF]: @@ -96,31 +95,28 @@ class Scheduler: return jobs_to_submit def place_job_and_manage_queues(self, job, queue,running, current_time): - self.resource_manager.assign_nodes_to_job(job, current_time) + self.resource_manager.assign_nodes_to_job(job, current_time, self.policy) running.append(job) queue.remove(job) if self.debug: scheduled_nodes = summarize_ranges(job.scheduled_nodes) print(f"t={current_time}: Scheduled job {job.id} with wall time {job.wall_time} on nodes {scheduled_nodes}") - def check_available_nodes(self,job): + def check_available_nodes(self, job): nodes_available = False - if job.requested_nodes: # nodes specified, i.e., telemetry replay - if len(job.requested_nodes) <= len(self.resource_manager.available_nodes): - if self.policy == PolicyType.REPLAY: # Check if exact set is available: - nodes_available = set(job.requested_nodes).issubset(set(self.resource_manager.available_nodes)) - else: - # Sufficiently large number of nodes available - # but no exact set is required! - nodes_available = True - # remove the request for specific nodes and ask for n nodes - job.nodes_required = len(job.requested_nodes) - job.requested_nodes = [] + if job.nodes_required <= len(self.resource_manager.available_nodes): + if self.policy == PolicyType.REPLAY and job.scheduled_nodes: # Check if we need exact set + # is exact set available: + nodes_available = set(job.scheduled_nodes).issubset(set(self.resource_manager.available_nodes)) else: - pass - else: # Exact nodes not specified (e.g. synthetic jobs dont have nodes assigned) - nodes_available = len(self.resource_manager.available_nodes) >= job.nodes_required - + # we dont need the exact set: + nodes_available = True # Checked above + if job.nodes_required == 0: + raise ValueError(f"Job Requested zero nodes: {job}") + #clear scheduled nodes + job.scheduled_nodes = [] + else: + pass # not enough nodes available return nodes_available def backfill(self,queue:List, running:List, current_time): @@ -144,8 +140,8 @@ class Scheduler: # Identify when the nex job in the queue could run as a time limit: first_job = queue[0] nodes_required = 0 - if first_job.requested_nodes: - nodes_required = len(first_job.requested_nodes) + if self.policy == PolicyType.REPLAY and first_job.scheduled_nodes: # This needs to be done propper! + nodes_required = len(first_job.scheduled_nodes) else: nodes_required = first_job.nodes_required diff --git a/raps/schedulers/experimental.py b/raps/schedulers/experimental.py index feade7c..0c49ffd 100644 --- a/raps/schedulers/experimental.py +++ b/raps/schedulers/experimental.py @@ -107,22 +107,19 @@ class Scheduler: def check_available_nodes(self,job): nodes_available = False - if job.requested_nodes: # nodes specified, i.e., telemetry replay - if len(job.requested_nodes) <= len(self.resource_manager.available_nodes): - if self.policy == PolicyType.REPLAY: # Check if exact set is available: - nodes_available = set(job.requested_nodes).issubset(set(self.resource_manager.available_nodes)) - else: - # Sufficiently large number of nodes available - # but no exact set is required! - nodes_available = True - # remove the request for specific nodes and ask for n nodes - job.nodes_required = len(job.requested_nodes) - job.requested_nodes = [] + if job.nodes_required <= len(self.resource_manager.available_nodes): + if self.policy == PolicyType.REPLAY and job.scheduled_nodes: # Check if we need exact set + # is exact set available: + nodes_available = set(job.scheduled_nodes).issubset(set(self.resource_manager.available_nodes)) else: - pass - else: # Exact nodes not specified (e.g. synthetic jobs dont have nodes assigned) - nodes_available = len(self.resource_manager.available_nodes) >= job.nodes_required - + # we dont need the exact set: + nodes_available = True # Checked above + if job.nodes_required == 0: + raise ValueError(f"Job Requested zero nodes: {job}") + #clear scheduled nodes + job.scheduled_nodes = [] + else: + pass # not enough nodes available return nodes_available def backfill(self,queue:List, running:List, current_time): @@ -146,8 +143,8 @@ class Scheduler: # Identify when the nex job in the queue could run as a time limit: first_job = queue[0] nodes_required = 0 - if first_job.requested_nodes: - nodes_required = len(first_job.requested_nodes) + if self.policy == PolicyType.REPLAY and first_job.scheduled_nodes: + nodes_required = len(first_job.scheduled_nodes) else: nodes_required = first_job.nodes_required diff --git a/raps/schedulers/replay.py b/raps/schedulers/replay.py index 00b1283..4b32809 100644 --- a/raps/schedulers/replay.py +++ b/raps/schedulers/replay.py @@ -35,17 +35,19 @@ class Scheduler: continue nodes_available = False - if job.requested_nodes: # nodes specified, i.e., telemetry replay - if len(job.requested_nodes) <= len(self.resource_manager.available_nodes): - nodes_available = set(job.requested_nodes).issubset(set(self.resource_manager.available_nodes)) + if job.nodes_required <= len(self.resource_manager.available_nodes): + if self.policy == PolicyType.REPLAY and job.scheduled_nodes: # Check if we need exact set + # is exact set available: + nodes_available = set(job.scheduled_nodes).issubset(set(self.resource_manager.available_nodes)) else: - continue # continue instead of break, as later job with specific nodes may still be placed! - else: # synthetic - if job.nodes_required: - pass - else: - raise ValueError("No number of nodes specified.") - + # we dont need the exact set: + nodes_available = True # Checked above + if job.nodes_required == 0: + raise ValueError(f"Job Requested zero nodes: {job}") + #clear scheduled nodes + job.scheduled_nodes = [] + else: + pass # not enough nodes available if nodes_available: self.resource_manager.assign_nodes_to_job(job, current_time) @@ -53,4 +55,4 @@ class Scheduler: queue.remove(job) else: # This is a replay so this should not happen - raise ValueError(f"Nodes not available!\nRequested:{job.requested_nodes}\nAvailable:{self.resource_manager.available_nodes}\n{job.__dict__}") + raise ValueError(f"Nodes not available!\nRequested:{job.scheduled_nodes}\nAvailable:{self.resource_manager.available_nodes}\n{job.__dict__}; Policy: {self.policy}") -- GitLab From e27bc554a36aab633ead0bf5c1d874a48427aeb3 Mon Sep 17 00:00:00 2001 From: Wes Brewer Date: Wed, 18 Jun 2025 11:07:47 -0400 Subject: [PATCH 127/388] Several fixes to make sure its reading the files correctly --- README.md | 2 +- raps/dataloaders/gcloudv2.py | 299 +++++++++++++++++++---------------- 2 files changed, 165 insertions(+), 136 deletions(-) diff --git a/README.md b/README.md index a627317..dc69283 100644 --- a/README.md +++ b/README.md @@ -47,7 +47,7 @@ For Adastra MI250 supercomputer, download 'AdastaJobsMI250_15days.parquet' from For Google cluster trace v2 # gcloudv2 - python main.py --system gcloudv2 -f ~/data/gcloud/v2 + python -m raps.telemetry --system gcloudv2 -f ~/data/gcloud/v2/google_cluster_data_2011_sample ## Perform Network Simulation diff --git a/raps/dataloaders/gcloudv2.py b/raps/dataloaders/gcloudv2.py index 9e76280..60f0e2a 100644 --- a/raps/dataloaders/gcloudv2.py +++ b/raps/dataloaders/gcloudv2.py @@ -2,24 +2,10 @@ import pandas as pd import fsspec import os import re -import math # For math.inf and -math.inf -import numpy as np # Needed for empty arrays for traces +import math +import numpy as np from typing import List, Dict, Optional, Union, Generator, Any -""" To download cluster traces into ~/data/gcloud/v2 - - 1. Install Google cloud SDK - - https://cloud.google.com/sdk/docs/install - - 2. gcloud auth login - - 3. See https://github.com/google/cluster-data - we are using v2 traces b/c the v3 traces are too large for practical study - - 4. See download script in ../../get_cluster_v2_traces.sh - -""" - # Assuming this script is located in raps/dataloaders/ # Adjust the path if your raps/job.py is located differently try: @@ -31,24 +17,54 @@ except ImportError: def __init__(self, **kwargs): self.__dict__.update(kwargs) def __repr__(self): - return f"DummyJobDict({self.id})" + return f"DummyJobDict(id={getattr(self, 'id', 'N/A')})" + class GoogleClusterV2DataLoader: """ A custom dataloader for Google Cluster Traces V2 (2011 dataset), designed to read locally downloaded .csv.gz files in an ExaDigiT/RAPS style. + + This dataloader supports loading data from local subdirectories for different + event types and can handle gzipped CSV files. It allows for selective loading + of specific event types and file indices. """ - # --- Configuration for your local V2 files --- - # This is a default fallback path for direct script execution (when not called by RAPS). - # It MUST point directly to the directory *containing* machine_events, job_events, etc. - BASE_LOCAL_PATH = "~/data/gcloud/v2/google_cluster_data_2011_sample/" + # This BASE_LOCAL_PATH will now effectively be managed by the `load_data` function + # which passes it via the `base_data_path` argument to __init__. + # It remains here as a default for direct instantiation/testing of the class. + BASE_LOCAL_PATH = "/Users/w1b/data/gcloud/v2/google_cluster_data_2011_sample/" SUPPORTED_EVENT_TYPES = [ "machine_events", "job_events", "task_events", "task_usage", ] SUPPORTED_FORMATS = ["csv"] + V2_COLUMN_NAMES = { + "job_events": [ + "time", "missing_col_1", "job_ID", "event_type", "user_ID", + "scheduling_class", "job_name", "logical_job_name", + "number_of_tasks", "CPU_request", "memory_request" + ], + "machine_events": [ + "time", "machine_ID", "event_type", "platform_ID", + "CPU_capacity", "memory_capacity" + ], + "task_events": [ + "time", "missing_col_1", "job_ID", "task_index", "machine_ID", + "event_type", "user_ID", "scheduling_class", "priority", + "CPU_request", "memory_request", "disk_space_request", "constraints" + ], + "task_usage": [ + "start_time", "end_time", "job_ID", "task_index", "machine_ID", + "CPU_usage_rate", "memory_usage_avg", "memory_usage_max", + "disk_IO_time_avg", "disk_IO_time_max", "CPUs_allocated", + "memory_allocated", "sample_duration", "missing_col_13", + "missing_col_14", "missing_col_15", "missing_col_16", + "missing_col_17", "missing_col_18", "missing_col_19" # Up to 20 columns observed + ] + } + def __init__(self, event_types: Optional[Union[str, List[str]]] = None, file_indices: Optional[Union[int, List[int]]] = None, @@ -66,31 +82,29 @@ class GoogleClusterV2DataLoader: read_options (Optional[Dict[str, Any]]): Additional options passed directly to pandas.read_csv(). concatenate_files (bool): - If True, all loaded files will be concatenated into a single pandas DataFrame - when the iterator finishes. If False, `__iter__` will yield individual DataFrames. - base_data_path (Optional[str]): The base path provided by the external calling script (e.g., RAPS). - This path will be adjusted to point to the actual data root. + If True, all loaded files will be concatenated into a single pandas DataFrame. + If False, `__iter__` will yield individual DataFrames. + base_data_path (Optional[str]): The base path to the local data directory. + This is the root that contains subdirectories like 'job_events'. """ self.event_types = [event_types] if isinstance(event_types, str) else event_types self.file_indices = [file_indices] if isinstance(file_indices, int) else file_indices self.concatenate_files = concatenate_files - # --- CRITICAL FIX START: Ensure 'time' column is read as int64 --- + # Set default read options specific to V2 CSVs self.read_options = read_options.copy() if read_options is not None else {} + if 'header' not in self.read_options: + self.read_options['header'] = None # V2 CSVs do not have a header row if 'dtype' not in self.read_options: self.read_options['dtype'] = {} self.read_options['dtype']['time'] = 'int64' # Force 'time' to be read as integer - # --- CRITICAL FIX END --- - - # --- Determine the correct base path for this DataLoader instance --- - if base_data_path is not None: - clean_base_path = base_data_path - if not clean_base_path.endswith(os.sep): - clean_base_path += os.sep - self._current_base_path = os.path.join(clean_base_path, "google_cluster_data_2011_sample") + os.sep - else: - self._current_base_path = self.BASE_LOCAL_PATH - # --- End of base path determination --- + self.read_options['dtype']['start_time'] = 'int64' # For task_usage + self.read_options['dtype']['end_time'] = 'int64' # For task_usage + + + # The effective base path for this DataLoader instance will be where the event_type_dirs are. + # This is the key path that load_data will correctly provide. + self._current_base_path = base_data_path if base_data_path is not None else self.BASE_LOCAL_PATH self._fs = fsspec.AbstractFileSystem() self._all_file_paths = [] @@ -107,6 +121,13 @@ class GoogleClusterV2DataLoader: for event_type in event_types_to_consider: event_type_dir = os.path.join(self._current_base_path, event_type) + + if event_type in self.V2_COLUMN_NAMES: + # Add names to read_options for this specific type loading instance + self.read_options['names'] = self.V2_COLUMN_NAMES[event_type] + else: + self.read_options.pop('names', None) # Remove names if not defined for this type + print(f"Warning: No explicit column names defined for '{event_type}'. Pandas will infer names.") if not os.path.isdir(event_type_dir): print(f"Warning: Local directory for '{event_type}' not found: '{event_type_dir}'. Skipping this type.") @@ -177,8 +198,11 @@ class GoogleClusterV2DataLoader: elif self.concatenate_files and not all_data_frames: print("\nNo DataFrames were loaded to concatenate from the selected V2 files.") - def get_data_for_type(self, event_type: str, limit: Optional[int] = None) -> pd.DataFrame: + """ + A convenience method to load data for a single event type from the V2 dataset, + up to a specified number of files. (Format is fixed to CSV for V2). + """ if event_type not in self.SUPPORTED_EVENT_TYPES: raise ValueError(f"Unsupported event type: '{event_type}'. Choose from {self.SUPPORTED_EVENT_TYPES}") @@ -210,39 +234,29 @@ class GoogleClusterV2DataLoader: return combined_df # --- MANDATORY RAPS `load_data` FUNCTION --- +# This function is the entry point that RAPS's main.py will call. def load_data( - data_path: Union[str, List[str]], - event_types: Optional[Union[str, List[str]]] = None, - file_indices: Optional[Union[int, List[int]]] = None, - read_options: Optional[Dict[str, Any]] = None, # User-provided read_options - **kwargs # Catch-all for additional arguments like 'system' -) -> tuple[List[Any], float, float]: # Updated return type hint for RAPS job_dict instances + data_path: Union[str, List[str]], **kwargs) -> tuple[List[Any], float, float]: # RAPS expects a list of job_dict instances, start_time, end_time """ RAPS data loading entry point for Google Cluster Trace V2 (2011) data. - Loads data from a specified local path, organizing it by event type. - It returns a list of primary 'job/task' records (as job_dict instances), - along with the global start and end timestamps of the loaded data. + Loads data from a specified local path, assuming it contains subdirectories + like 'job_events', 'task_events', etc., filled with .csv.gz files. + It returns a list of RAPS job_dict instances, along with the global start + and end timestamps of the loaded data. Args: data_path (Union[str, List[str]]): The base path to the local V2 data directory. - This is the path provided by the RAPS main script. + Expected to be the directory that *contains* the + 'google_cluster_data_2011_sample' subdirectory. Can be a string or a list containing a single string. - event_types (Optional[Union[str, List[str]]]): - Specific event types to load. If None, all supported types. - file_indices (Optional[Union[int, List[int]]]): - Specific numerical indices of parts to load. If None, all available parts. - read_options (Optional[Dict[str, Any]]): - Additional options for pandas.read_csv(). - **kwargs: Catch-all for any additional keyword arguments passed by RAPS - (e.g., 'system', 'config', 'cooling', 'fastforward', etc.). + Example: `~/data/gcloud/v2/` Returns: tuple[List[Any], float, float]: - - A list of job_dict instances, where each represents a job/task record - and includes 'start_time' and 'wall_time' (even if derived/dummy). - - The global minimum timestamp found across all loaded data. - - The global maximum timestamp found across all loaded data. + - A list of RAPS `job_dict` instances. + - The global minimum timestamp (float) found across all loaded data. + - The global maximum timestamp (float) found across all loaded data. Returns ([], 0.0, 0.0) if no data or time info found. """ @@ -268,43 +282,58 @@ def load_data( # This dictionary will store DataFrames for all event types loaded by this function loaded_dfs: Dict[str, pd.DataFrame] = {} - types_to_load_for_rap = event_types if event_types else GoogleClusterV2DataLoader.SUPPORTED_EVENT_TYPES - if isinstance(types_to_load_for_rap, str): - types_to_load_for_rap = [types_to_load_for_rap] - + # Load all supported event types (job_events, task_events, etc.) + # We set event_types=None and file_indices=None to load all available files for each type + # from the automatically detected subdirectories. + dataloader = GoogleClusterV2DataLoader( + event_types=None, # Load all supported types + file_indices=None, # Load all files found for each type + read_options=None, # Use default read_options defined in DataLoader + concatenate_files=True, # Get one concatenated DF per type + base_data_path=expanded_data_path # This is the RAPS-provided path to the directory *above* the data folder + ) + # Initialize global min/max timestamps for the entire dataset global_min_time = float(math.inf) global_max_time = float(-math.inf) - for event_type_key in types_to_load_for_rap: - # Create a new DataLoader instance for each event_type to get its concatenated DF. - dataloader = GoogleClusterV2DataLoader( - event_types=event_type_key, # Load only this specific type - file_indices=file_indices, # Apply file index filter - read_options=read_options, # Apply any custom read options (will be merged with default dtype for time) - concatenate_files=True, # Ensure a single DataFrame is yielded for this type - base_data_path=expanded_data_path # Pass the RAPS provided path + # Loop through the dataloader to get all concatenated DataFrames for each event type + for event_type_key in dataloader.SUPPORTED_EVENT_TYPES: # Iterate through explicitly supported types + # Create a temporary DataLoader instance just to load this specific event type + # from the correct subpath within expanded_data_path + temp_dataloader_for_type = GoogleClusterV2DataLoader( + event_types=event_type_key, + file_indices=None, # Load all files for this specific type + read_options=None, # Use default read_options + concatenate_files=True, + base_data_path=expanded_data_path # Pass the RAPS base path ) - for df_current_type in dataloader: + # This loop will run once for each event type, yielding its concatenated DataFrame + for df_current_type in temp_dataloader_for_type: if not df_current_type.empty: loaded_dfs[event_type_key] = df_current_type - # DEBUG: Check if 'time' column is being correctly read. + print(f"RAPS: Successfully loaded '{event_type_key}'. DataFrame shape: {df_current_type.shape}") + + # Update global min/max times if a 'time' column exists if 'time' in df_current_type.columns: - print(f"DEBUG: '{event_type_key}' time min/max in DF: {df_current_type['time'].min()}/{df_current_type['time'].max()}") - else: - print(f"DEBUG: 'time' column NOT FOUND in {event_type_key}.") + current_min = df_current_type['time'].min() + current_max = df_current_type['time'].max() + if current_min < global_min_time: + global_min_time = current_min + if current_max > global_max_time: + global_max_time = current_max else: print(f"RAPS: No data loaded for event type '{event_type_key}'.") print("\n--- RAPS: Data loading complete for individual types ---") - # --- FIX 2: Select and prepare the primary 'jobs' DataFrame for RAPS --- + # --- FIX 2: Select and prepare the primary 'jobs' list for RAPS --- # RAPS main.py is iterating over `jobs`, expecting `job['wall_time']` and `job['start_time']`. - # This means `jobs` must be a list of dictionaries (or similar objects). - jobs_list_for_rap: List[Any] = [] # Initialize as empty list + # This means `jobs` must be a list of dictionaries (or job_dict instances). + jobs_list_for_rap: List[Any] = [] - # Prioritize task_events for job records due to granularity, otherwise use job_events. + # Prioritize task_events for primary job records, otherwise use job_events. raw_primary_records_df = pd.DataFrame() if 'task_events' in loaded_dfs and not loaded_dfs['task_events'].empty: raw_primary_records_df = loaded_dfs['task_events'].copy() @@ -313,68 +342,69 @@ def load_data( raw_primary_records_df = loaded_dfs['job_events'].copy() print(f"RAPS: Selected 'job_events' as the primary source for job records (task_events not available/empty).") else: - print("RAPS: Warning: Neither 'task_events' nor 'job_events' found/loaded. Cannot create job records.") + print("RAPS: Warning: Neither 'task_events' nor 'job_events' found/loaded for primary 'jobs' data. Cannot create job records.") + # Return empty list and 0.0 times if no primary data + return [], 0.0, 0.0 if not raw_primary_records_df.empty: # --- FIX 3: Prepare raw_primary_records_df with RAPS-expected columns --- # Map V2 'time' column to RAPS 'submit_time' and 'start_time' if 'time' in raw_primary_records_df.columns: raw_primary_records_df['submit_time'] = raw_primary_records_df['time'] - raw_primary_records_df['start_time'] = raw_primary_records_df['time'] # Simplistic for first pass + raw_primary_records_df['start_time'] = raw_primary_records_df['time'] else: - raw_primary_records_df['submit_time'] = 0 - raw_primary_records_df['start_time'] = 0 + raw_primary_records_df['submit_time'] = 0 + raw_primary_records_df['start_time'] = 0 print("Warning: 'time' column not found in primary records DataFrame. Using 0 for submit/start_time.") - # Derive 'end_time' and 'wall_time'. This is a major simplification for V2 data. - # For a more accurate 'end_time' and 'wall_time', you'd need to: - # 1. Join with 'task_usage' or other event types. - # 2. Aggregate events by job/task ID to find actual lifecycle timestamps. - # For now, setting a dummy wall_time to satisfy RAPS's requirement for `job['wall_time']` - # and to allow its `int(max(job['wall_time'] + job['start_time']...` calculation. + # Add 'wall_time'. V2 trace does not have explicit wall_time per job/task. + # This is a dummy value for RAPS's internal calculations. raw_primary_records_df['wall_time'] = 1 # Dummy: 1 microsecond duration + + # Add 'end_time' to the DataFrame for internal consistency if needed later + # (though RAPS main.py calculates it, having it can be useful) raw_primary_records_df['end_time'] = raw_primary_records_df['start_time'] + raw_primary_records_df['wall_time'] - # --- FIX 4: Create job_dict instances and populate list --- + # --- FIX 4: Create job_dict instances and populate jobs_list_for_rap --- # Get the jid (job ID filter) from kwargs, defaulting to '*' jid_filter = kwargs.get('jid', '*') - # Loop through each record (row) to create a job_dict instance - # It's usually best to filter for submit events to ensure unique job instances - # and to map them to the proper RAPS 'job' concept. + # Filter to 'submit' events to represent distinct job creations submit_records_df = raw_primary_records_df[ - raw_primary_records_df.get('event_type') == 'submit' # Use .get() for robustness - ].copy() if 'event_type' in raw_primary_records_df.columns else raw_primary_records_df.copy() # If no event_type, use all + raw_primary_records_df.get('event_type') == 0 # Event type 0 is 'submit' + ].copy() if 'event_type' in raw_primary_records_df.columns else raw_primary_records_df.copy() - if 'job_ID' not in submit_records_df.columns: # Fallback if job_ID not present (e.g., for task_events direct) - submit_records_df['job_ID'] = submit_records_df['task_ID'] if 'task_ID' in submit_records_df.columns else range(len(submit_records_df)) - print("Warning: 'job_ID' not found in selected primary records. Using 'task_ID' or row index.") + if 'job_ID' not in submit_records_df.columns: + submit_records_df['job_ID'] = submit_records_df['task_index'] if 'task_index' in submit_records_df.columns else range(len(submit_records_df)) + print("Warning: 'job_ID' not found. Using 'task_index' or row index for job_id.") + + # Make job_ID unique in case 'task_index' was used and job_ID wasn't. + # This ensures unique RAPS job_dict IDs. + submit_records_df['unique_job_id'] = submit_records_df['job_ID'].astype(str) + "_" + submit_records_df['start_time'].astype(str) for index, row in submit_records_df.iterrows(): - job_id = row['job_ID'] + job_id_from_trace = row['job_ID'] # The original job_ID from the trace # Apply RAPS's jid filter (from main.py example) - if jid_filter != '*' and str(job_id) != str(jid_filter): # Convert to string for comparison + if jid_filter != '*' and str(job_id_from_trace) != str(jid_filter): continue # --- Map V2 Data to job_dict arguments --- - # Most of these are simplifications or dummies for V2 given the limited data. - nodes_required = 1 # Dummy - name = f"job_{job_id}" + nodes_required = 1 # Dummy: V2 doesn't specify nodes_required directly per job event + name = f"job_{job_id_from_trace}" account = f"user_{row['user_ID']}" if 'user_ID' in row else "unknown_user" priority = row['priority'] if 'priority' in row else 0 - # Trace data fields (cpu_trace, gpu_trace etc.) are arrays, initially empty. - # V2 has no GPUs. + # Trace data arrays are empty as per V2 characteristics cpu_trace = np.array([]) - gpu_trace = np.array([]) + gpu_trace = np.array([]) # V2 has no GPUs nrx_trace = np.array([]) ntx_trace = np.array([]) - end_state = "UNKNOWN" # V2 job_events has event_type, but not direct final state field. - scheduled_nodes = [] # Requires complex task scheduling analysis + end_state = "UNKNOWN" # Final job state requires complex aggregation of task events + scheduled_nodes = [] # Requires scheduling logic, not directly in raw event - # Get trace-wide times (global min/max) + # Global trace times (already calculated above) trace_start_time = float(global_min_time) if global_min_time != float(math.inf) else 0.0 trace_end_time = float(global_max_time) if global_max_time != float(-math.inf) else 0.0 @@ -391,16 +421,16 @@ def load_data( ntx_trace=ntx_trace, end_state=end_state, scheduled_nodes=scheduled_nodes, - id=job_id, + id=job_id_from_trace, # Use the original job ID from the trace priority=priority, submit_time=row['submit_time'], - time_limit=0, # V2 doesn't have explicit time_limit - start_time=row['start_time'], - end_time=row['end_time'], - wall_time=row['wall_time'], - trace_time=trace_time_for_record, # The time for this specific event record - trace_start_time=trace_start_time, # Global trace start - trace_end_time=trace_end_time # Global trace end + time_limit=0, # V2 doesn't have explicit time_limit per job_event + start_time=row['start_time'], # RAPS uses this for simulation start + end_time=row['end_time'], # RAPS uses this for simulation end + wall_time=row['wall_time'], # RAPS uses this for duration + trace_time=trace_time_for_record, # The timestamp of *this specific event* record + trace_start_time=trace_start_time, # Global trace start time + trace_end_time=trace_end_time # Global trace end time ) jobs_list_for_rap.append(job_info) print(f"RAPS: Created {len(jobs_list_for_rap)} job_dict instances.") @@ -408,12 +438,13 @@ def load_data( print("RAPS: No primary records DataFrame available to create job_dict instances.") # Convert global min/max times to float - final_timestep_start = 0 #float(global_min_time) if global_min_time != float(math.inf) else 0.0 - final_timestep_end = 10000 #float(global_max_time) if global_max_time != float(-math.inf) else 0.0 + final_timestep_start = float(global_min_time) if global_min_time != float(math.inf) else 0.0 + final_timestep_end = float(global_max_time) if global_max_time != float(-math.inf) else 0.0 print(f"RAPS: Final global time range determined: Start={final_timestep_start}, End={final_timestep_end}") - # Return the three values RAPS expects + # Return the three values RAPS expects: + # (list of job_dict instances, global min time, global max time) return jobs_list_for_rap, final_timestep_start, final_timestep_end @@ -423,21 +454,19 @@ if __name__ == "__main__": # This path should be the DIRECTORY THAT RAPS's `-f` ARGUMENT POINTS TO. # e.g., if you run `main.py -f /Users/w1b/data/gcloud/v2`, then this variable is '/Users/w1b/data/gcloud/v2/'. # And inside THAT directory, you should have `google_cluster_data_2011_sample/` - #RAPS_SIMULATED_BASE_DIR = "/Users/w1b/data/gcloud/v2/" + RAPS_SIMULATED_BASE_DIR = "/Users/w1b/data/gcloud/v2/" print("--- Running direct tests of the load_data function ---") print("\n--- Test 1: Loading all event types (default behavior for a RAPS integration) ---") - # Simulate RAPS passing a list with one element for the data_path jobs_list_test1, start_time_test1, end_time_test1 = load_data([RAPS_SIMULATED_BASE_DIR], system="dummy_system_name") - if jobs_list_test1: # Check if the list of jobs is not empty + if jobs_list_test1: print(f"\nSummary of Test 1 (Primary jobs list loaded):") print(f"- Number of job/task records: {len(jobs_list_test1)}") - # Check if individual records have the expected keys if jobs_list_test1 and hasattr(jobs_list_test1[0], 'start_time') and hasattr(jobs_list_test1[0], 'wall_time'): - print(f"- First record (id={jobs_list_test1[0].id}): start_time={jobs_list_test1[0].start_time}, wall_time={jobs_list_test1[0].wall_time}") - print(f"- Full first record details: {jobs_list_test1[0].__dict__}") # Show all attributes + print(f"- First record (id={jobs_list_test1[0].id}): submit_time={jobs_list_test1[0].submit_time}, start_time={jobs_list_test1[0].start_time}, wall_time={jobs_list_test1[0].wall_time}") + # print(f"- Full first record details: {jobs_list_test1[0].__dict__}") print(f"- Global Start time: {start_time_test1}, Global End time: {end_time_test1}") else: print("\nTest 1: No primary jobs list loaded. Check specified paths and downloaded files.") @@ -445,18 +474,18 @@ if __name__ == "__main__": print("\n--- Test 2: Loading specific event types and file indices ---") jobs_list_test2, start_time_test2, end_time_test2 = load_data( [RAPS_SIMULATED_BASE_DIR], - event_types=["job_events"], # Only request job_events explicitly for this test - file_indices=[0], # Load only the 'part-00000' file for job_events - read_options={'header': 0}, # Example: assuming first row is header - another_rap_param=123 # Example of passing an extra kwarg + event_types=["job_events"], + file_indices=[0], + read_options={'header': 0}, + another_rap_param=123 ) - if jobs_list_test2: # Check if the list of jobs is not empty + if jobs_list_test2: print(f"\nSummary of Test 2 (Primary jobs list loaded):") print(f"- Number of job/task records: {len(jobs_list_test2)}") if jobs_list_test2 and hasattr(jobs_list_test2[0], 'start_time') and hasattr(jobs_list_test2[0], 'wall_time'): - print(f"- First record (id={jobs_list_test2[0].id}): start_time={jobs_list_test2[0].start_time}, wall_time={jobs_list_test2[0].wall_time}") - print(f"- Full first record details: {jobs_list_test2[0].__dict__}") # Show all attributes + print(f"- First record (id={jobs_list_test2[0].id}): submit_time={jobs_list_test2[0].submit_time}, start_time={jobs_list_test2[0].start_time}, wall_time={jobs_list_test2[0].wall_time}") + # print(f"- Full first record details: {jobs_list_test2[0].__dict__}") print(f"- Global Start time: {start_time_test2}, Global End time: {end_time_test2}") else: print("\nTest 2: No primary jobs list loaded. Check path, types, and indices.") -- GitLab From 8d4c2ece2ee6a4aad73e26bdb15d577442fc5e6f Mon Sep 17 00:00:00 2001 From: Matthias Maiterth Date: Wed, 18 Jun 2025 11:50:35 -0400 Subject: [PATCH 128/388] Added plotting to telemetry. Gantt charts for nodes and jobs Nodes: Gantt chart showing nodes and how they were allocated according to start and end times. Jobs: Gantt chart showing jobIds at submit time with duration. --- raps/plotting.py | 18 +++++- raps/telemetry.py | 150 +++++++++++++++++++++++++++++++++++++++++----- 2 files changed, 150 insertions(+), 18 deletions(-) diff --git a/raps/plotting.py b/raps/plotting.py index cf85750..95b27ea 100644 --- a/raps/plotting.py +++ b/raps/plotting.py @@ -250,7 +250,7 @@ def plot_job_gantt(start_times, end_times, node_counts): # We'll plot each job in a different row on the Y-axis y_positions = range(len(start_times)) # 0, 1, 2, ... - + for s, e, n in zip(start_times, end_times, node_counts): # Bar placed at y = n plt.barh( @@ -258,8 +258,8 @@ def plot_job_gantt(start_times, end_times, node_counts): width=e - s, # job duration on the x-axis left=s, # start time height=0.8, # thickness of the bar - color='yellow', - edgecolor='black', + color='yellow', + edgecolor='black', alpha=0.8 ) @@ -283,6 +283,18 @@ def plot_job_gantt(start_times, end_times, node_counts): plt.show() +def spaced_colors(n, cmap_name='nipy_spectral'): + cmap = plt.get_cmap(cmap_name) + # Get n points spaced in [0,1] + base = np.linspace(0, 1, n, endpoint=False) + # Shuffle them to maximize distance between consecutive colors + # e.g. take every k-th, wrap around + step = int(np.ceil(np.sqrt(n))) + indices = (step * np.arange(n)) % n + values = base[indices] + return [cmap(v) for v in values] + + if __name__ == "__main__": plotter = Plotter() #plotter.plot_history([1, 2, 3, 4]) diff --git a/raps/telemetry.py b/raps/telemetry.py index 4288cf4..a77e636 100644 --- a/raps/telemetry.py +++ b/raps/telemetry.py @@ -17,7 +17,10 @@ if __name__ == "__main__": parser.add_argument('-f', '--replay', nargs='+', type=str, help='Either: path/to/joblive path/to/jobprofile' + \ ' -or- filename.npz (overrides --workload option)') - parser.add_argument('-p', '--plot', action='store_true', help='Output plots') + parser.add_argument('-p', '--plot', type=str, default=None, choices=['jobs','nodes'], help='Output plots') + + parser.add_argument("--gantt-nodes", default=False, action='store_true', required=False, help="Print Gannt with nodes required as line thickness (default false)") # duplicate in workload! + parser.add_argument('-t', '--time', type=str, default=None, help='Length of time to simulate, e.g., 123, 123s, 27m, 3h, 7d') parser.add_argument('--system', type=str, default='frontier', help='System config to use') choices = ['prescribed', 'poisson'] @@ -28,12 +31,15 @@ if __name__ == "__main__": import importlib import numpy as np +import pandas as pd from tqdm import tqdm +from rich.progress import track from raps.config import ConfigManager -from raps.job import Job +from raps.job import Job, job_dict #from raps.account import Accounts -from raps.plotting import plot_submit_times, plot_nodes_histogram, plot_job_gantt +import matplotlib.pyplot as plt +from raps.plotting import Plotter, plot_submit_times, plot_nodes_histogram, plot_job_gantt, spaced_colors from raps.utils import next_arrival_byconfargs, create_casename, convert_to_seconds @@ -78,6 +84,38 @@ class Telemetry: """Load telemetry data using custom data loaders.""" return self.dataloader.load_data_from_df(*args, **kwargs) + def load_data_from_csv(self, file, *args, **kwargs): + jobs = [] + df = pd.read_csv(file,chunksize=1, header='infer') + for d in df: + #print(d['name'].astype(str)) + job_info = job_dict(nodes_required=None, + name=d['name'].astype(str).item(), + account=d['account'].astype(str).item(), + cpu_trace=None, + gpu_trace=None, + ntx_trace=None, + nrx_trace=None, + end_state=d['state'].astype(str).item(), + scheduled_nodes=d['scheduled_nodes'].item(), + id=d['id'].astype(int).item(), + priority=None, + partition=None, + submit_time=d['submit_time'].astype(int).item(), + time_limit=None, + start_time=d['start_time'].astype(int).item(), + end_time=d['end_time'].astype(int).item(), + wall_time=d['end_time'].astype(int).item() - d['start_time'].astype(int).item(), + trace_time=None, + trace_start_time=None, + trace_end_time=None, + trace_missing_values=None + ) + jobs.append(job_info) + minstarttime = min([x['start_time'] for x in jobs]) + maxendtime = max([x['end_time'] for x in jobs]) + return jobs, minstarttime, maxendtime, None + def node_index_to_name(self, index: int): """ Convert node index into a name""" return self.dataloader.node_index_to_name(index, config=self.config) @@ -118,13 +156,16 @@ class Telemetry: if hasattr(args,'scale') and args.scale: for job in tqdm(jobs, desc=f"Scaling jobs to {args.scale} nodes"): job['nodes_required'] = random.randint(1, args.scale) - job['requested_nodes'] = None # Setting to None triggers scheduler to assign nodes + job['scheduled_nodes'] = None # Setting to None triggers scheduler to assign nodes if hasattr(args,'policy') and args.policy == 'poisson': print("available nodes:", config['AVAILABLE_NODES']) for job in tqdm(jobs, desc="Rescheduling jobs"): - job['requested_nodes'] = None + job['scheduled_nodes'] = None job['submit_time'] = next_arrival_byconfargs(config,args) + #elif file.endswith(".csv"): + # jobs_from_file = td.load_data + # jobs.extend(jobs_from_file) elif i == 0: trigger_custom_dataloader = True break @@ -158,11 +199,81 @@ class Telemetry: return jobs, timestep_start, timestep_end, args +def plot_jobs_gantt(*,ax=None,jobs): + if ax is None: + ax = plt.figure(figsize=(10,4)) + # Submit_time and Wall_time + submit_t = [x['submit_time'] for x in jobs] + duration = [x['wall_time'] for x in jobs] + nodes_required = [x['nodes_required'] for x in jobs] + + colors = spaced_colors(len(jobs)) + offset = 0 + for i in track(range(len(jobs)), description="Collecting information to plot"): + if args.gantt_nodes: + ax.barh(offset + nodes_required[i] / 2,duration[i], height=nodes_required[i], left=submit_t[i]) + offset += nodes_required[i] + else: + ax.barh(i, duration[i], height=1.0, left=submit_t[i], color=colors[i]) + print("Plotting") + + ax.set_ylabel("Job ID") + ##ax_b labels: + ax.set_xlabel("time [hh:mm]") + minx_s = 0 + maxx_s = np.ceil(max([x['wall_time'] for x in jobs]) + max([x['submit_time'] for x in jobs])) + x_label_mins = [n for n in np.arange(minx_s // 60, maxx_s // 60)] + x_label_ticks = [n * 60 for n in x_label_mins[0::60]] + x_label_str = [str(x1).zfill(2) + ":" + str(x2).zfill(2) for + (x1,x2) in [(n // 60,n % 60) for + n in x_label_mins[0::60]]] + + ax.set_xticks(x_label_ticks,x_label_str) + #ax.yaxis.set_inverted(True) + return ax + + +def plot_nodes_gantt(*,ax=None,jobs): + if ax is None: + ax = plt.figure(figsize=(10,4)) + # Submit_time and Wall_time + duration = [x['wall_time'] for x in jobs] + #nodes_required = [x['nodes_required'] for x in jobs] + start_t = [x['start_time'] for x in jobs] + nodeIDs = [x['scheduled_nodes'] for x in jobs] + + colors = spaced_colors(len(jobs)) + for i in track(range(len(jobs)), description="Collecting information to plot"): + for nodeID in nodeIDs[i]: + ax.barh(nodeID, duration[i], height=1.0, left=start_t[i], color=colors[i]) + print("Plotting") + + ax.set_ylabel("Node ID") + ##ax_b labels: + ax.set_xlabel("time [hh:mm]") + minx_s = 0 + maxx_s = np.ceil(max([x['wall_time'] for x in jobs]) + max([x['submit_time'] for x in jobs])) + x_label_mins = [n for n in np.arange(minx_s // 60, maxx_s // 60)] + x_label_ticks = [n * 60 for n in x_label_mins[0::60]] + x_label_str = [str(x1).zfill(2) + ":" + str(x2).zfill(2) for + (x1,x2) in [(n // 60,n % 60) for + n in x_label_mins[0::60]]] + + ax.set_xticks(x_label_ticks,x_label_str) + #ax.yaxis.set_inverted(True) + return ax + + if __name__ == "__main__": config = ConfigManager(system_name=args.system).get_config() args_dict['config'] = config td = Telemetry(**args_dict) - jobs, timestep_start, timestep_end, _ = td.load_jobs_times_args_from_files(files=args.replay,args=args) + if args.replay is None: + parser.print_help() + if args.replay[0].endswith(".csv"): + jobs, timestep_start, timestep_end, _ = td.load_data_from_csv(args.replay[0]) + else: + jobs, timestep_start, timestep_end, _ = td.load_jobs_times_args_from_files(files=args.replay,args=args) timesteps = timestep_end - timestep_start @@ -184,15 +295,24 @@ if __name__ == "__main__": last = job.submit_time if args.verbose: print(job) - + dt_list = [item for item in dt_list if item is not None] + nr_list = [item for item in nr_list if item is not None] + wt_list = [item for item in wt_list if item is not None] print(f'Simulation will run for {timesteps} seconds') - print(f'Average job arrival time is: {np.mean(dt_list):.2f}s') - print(f'Average wall time is: {np.mean(wt_list):.2f}s') - print(f'Nodes required (avg): {np.mean(nr_list):.2f}') - print(f'Nodes required (max): {np.max(nr_list)}') - print(f'Nodes required (std): {np.std(nr_list):.2f}') + if dt_list: + print(f'Average job arrival time is: {np.mean(dt_list):.2f}s') + if wt_list: + print(f'Average wall time is: {np.mean(wt_list):.2f}s') + if nr_list: + print(f'Nodes required (avg): {np.mean(nr_list):.2f}') + print(f'Nodes required (max): {np.max(nr_list)}') + print(f'Nodes required (std): {np.std(nr_list):.2f}') if args.plot: - #plot_nodes_histogram(nr_list) - #plot_submit_times(submit_times, nr_list) - plot_job_gantt(submit_times, end_times, nr_list) + fig,ax = plt.subplots() + if args.plot == "jobs": + plot_jobs_gantt(ax=ax,jobs=jobs) + ax.invert_yaxis() + if args.plot == "nodes": + plot_nodes_gantt(ax=ax,jobs=jobs) + plt.show() -- GitLab From 1e69b580f6b3ae227b75fc6771757d948785460f Mon Sep 17 00:00:00 2001 From: Wes Brewer Date: Wed, 18 Jun 2025 13:05:44 -0400 Subject: [PATCH 129/388] Greatly simplify gcloudv2.py dataloader - sort of working now, but need to work on start/end times --- config/gcloudv2/system.json | 2 +- raps/dataloaders/gcloudv2.py | 639 +++++++++-------------------------- raps/engine.py | 13 +- 3 files changed, 167 insertions(+), 487 deletions(-) diff --git a/config/gcloudv2/system.json b/config/gcloudv2/system.json index 525ab8a..4b6fc7b 100644 --- a/config/gcloudv2/system.json +++ b/config/gcloudv2/system.json @@ -3,7 +3,7 @@ "RACKS_PER_CDU": 1, "NODES_PER_RACK": 100, "RECTIFIERS_PER_RACK": 32, - "CHASSIS_PER_RACK": 8, + "CHASSIS_PER_RACK": 1, "NODES_PER_BLADE": 2, "SWITCHES_PER_CHASSIS": 4, "NICS_PER_NODE": 4, diff --git a/raps/dataloaders/gcloudv2.py b/raps/dataloaders/gcloudv2.py index 60f0e2a..8dd423d 100644 --- a/raps/dataloaders/gcloudv2.py +++ b/raps/dataloaders/gcloudv2.py @@ -1,493 +1,172 @@ -import pandas as pd -import fsspec import os -import re -import math -import numpy as np -from typing import List, Dict, Optional, Union, Generator, Any - -# Assuming this script is located in raps/dataloaders/ -# Adjust the path if your raps/job.py is located differently -try: - from ..job import job_dict -except ImportError: - # Fallback for direct script execution/testing outside RAPS structure - print("Warning: Could not import 'job_dict' directly. Using a dummy job_dict for testing.") - class job_dict: - def __init__(self, **kwargs): - self.__dict__.update(kwargs) - def __repr__(self): - return f"DummyJobDict(id={getattr(self, 'id', 'N/A')})" +import re +from typing import List, Optional, Generator, Tuple, Any, Union +import numpy as np +import pandas as pd -class GoogleClusterV2DataLoader: - """ - A custom dataloader for Google Cluster Traces V2 (2011 dataset), - designed to read locally downloaded .csv.gz files in an ExaDigiT/RAPS style. - - This dataloader supports loading data from local subdirectories for different - event types and can handle gzipped CSV files. It allows for selective loading - of specific event types and file indices. - """ - - # This BASE_LOCAL_PATH will now effectively be managed by the `load_data` function - # which passes it via the `base_data_path` argument to __init__. - # It remains here as a default for direct instantiation/testing of the class. - BASE_LOCAL_PATH = "/Users/w1b/data/gcloud/v2/google_cluster_data_2011_sample/" - - SUPPORTED_EVENT_TYPES = [ - "machine_events", "job_events", "task_events", "task_usage", +from raps.job import job_dict # ensure RAPS is in PYTHONPATH + +# Define expected column names for each supported event type +V2_COLUMN_NAMES = { + "job_events": [ + "timestamp", # ↔ time + "missing_info", # ↔ missing_col_1 + "job_ID", + "event_type", + "user_name", + "scheduling_class", + "job_name", + "logical_job_name" + ], + "machine_events": [ + "timestamp", + "machine_ID", + "event_type", + "platform_ID", + "CPU_capacity", + "memory_capacity" + ], + "task_events": [ + "timestamp", + "missing_info", + "job_ID", + "task_index", + "machine_ID", + "event_type", + "user_name", + "scheduling_class", + "priority", + "CPU_request", + "memory_request", + "disk_space_request", + "different_machine_constraint" + ], + "task_usage": [ + "start_time", # file-col 0 + "end_time", # file-col 1 + "job_ID", # file-col 2 + "task_index", # file-col 3 + "machine_ID", # file-col 4 + "CPU_usage_rate", # file-col 5 + "memory_usage_avg", # file-col 6 + "memory_usage_max", # file-col 7 + "assigned_memory", # file-col 8 + "unmapped_page_cache_memory", # file-col 9 + "page_cache_memory", # file-col 10 + "maximum_memory_usage", # file-col 11 + "disk_IO_time_avg", # file-col 12 + "disk_IO_time_max", # file-col 13 + "local_disk_space_used", # file-col 14 + "cycles_per_instruction", # file-col 15 + "memory_accesses_per_instruction", # file-col 16 + "sampling_rate", # file-col 17 + "aggregation_type", # file-col 18 + "missing_col_19" # file-col 19 ] - SUPPORTED_FORMATS = ["csv"] - - V2_COLUMN_NAMES = { - "job_events": [ - "time", "missing_col_1", "job_ID", "event_type", "user_ID", - "scheduling_class", "job_name", "logical_job_name", - "number_of_tasks", "CPU_request", "memory_request" - ], - "machine_events": [ - "time", "machine_ID", "event_type", "platform_ID", - "CPU_capacity", "memory_capacity" - ], - "task_events": [ - "time", "missing_col_1", "job_ID", "task_index", "machine_ID", - "event_type", "user_ID", "scheduling_class", "priority", - "CPU_request", "memory_request", "disk_space_request", "constraints" - ], - "task_usage": [ - "start_time", "end_time", "job_ID", "task_index", "machine_ID", - "CPU_usage_rate", "memory_usage_avg", "memory_usage_max", - "disk_IO_time_avg", "disk_IO_time_max", "CPUs_allocated", - "memory_allocated", "sample_duration", "missing_col_13", - "missing_col_14", "missing_col_15", "missing_col_16", - "missing_col_17", "missing_col_18", "missing_col_19" # Up to 20 columns observed - ] - } - - def __init__(self, - event_types: Optional[Union[str, List[str]]] = None, - file_indices: Optional[Union[int, List[int]]] = None, - read_options: Optional[Dict[str, Any]] = None, - concatenate_files: bool = True, - base_data_path: Optional[str] = None): - """ - Initializes the GoogleClusterV2DataLoader to read from local V2 trace files. - - Args: - event_types (Optional[Union[str, List[str]]]): - Specific event types to load. If None, all supported event types will be considered. - file_indices (Optional[Union[int, List[int]]]): - Specific numerical indices of parts to load. If None, all available files for the selected types. - read_options (Optional[Dict[str, Any]]): - Additional options passed directly to pandas.read_csv(). - concatenate_files (bool): - If True, all loaded files will be concatenated into a single pandas DataFrame. - If False, `__iter__` will yield individual DataFrames. - base_data_path (Optional[str]): The base path to the local data directory. - This is the root that contains subdirectories like 'job_events'. - """ - self.event_types = [event_types] if isinstance(event_types, str) else event_types - self.file_indices = [file_indices] if isinstance(file_indices, int) else file_indices - self.concatenate_files = concatenate_files - - # Set default read options specific to V2 CSVs - self.read_options = read_options.copy() if read_options is not None else {} - if 'header' not in self.read_options: - self.read_options['header'] = None # V2 CSVs do not have a header row - if 'dtype' not in self.read_options: - self.read_options['dtype'] = {} - self.read_options['dtype']['time'] = 'int64' # Force 'time' to be read as integer - self.read_options['dtype']['start_time'] = 'int64' # For task_usage - self.read_options['dtype']['end_time'] = 'int64' # For task_usage - - - # The effective base path for this DataLoader instance will be where the event_type_dirs are. - # This is the key path that load_data will correctly provide. - self._current_base_path = base_data_path if base_data_path is not None else self.BASE_LOCAL_PATH - - self._fs = fsspec.AbstractFileSystem() - self._all_file_paths = [] - self._discover_files() - - def _discover_files(self): - """ - Discovers local V2 trace files based on specified event types and indices. - Populates self._all_file_paths with absolute file paths. - """ - event_types_to_consider = self.event_types if self.event_types else self.SUPPORTED_EVENT_TYPES - - self._all_file_paths = [] - - for event_type in event_types_to_consider: - event_type_dir = os.path.join(self._current_base_path, event_type) - - if event_type in self.V2_COLUMN_NAMES: - # Add names to read_options for this specific type loading instance - self.read_options['names'] = self.V2_COLUMN_NAMES[event_type] - else: - self.read_options.pop('names', None) # Remove names if not defined for this type - print(f"Warning: No explicit column names defined for '{event_type}'. Pandas will infer names.") - - if not os.path.isdir(event_type_dir): - print(f"Warning: Local directory for '{event_type}' not found: '{event_type_dir}'. Skipping this type.") - continue - - if self.file_indices: - for idx in self.file_indices: - filename_pattern_re = rf"part-{idx:05d}-of-\d{{5}}\.csv\.gz" - - found_indexed_file = False - for filename in os.listdir(event_type_dir): - if re.fullmatch(filename_pattern_re, filename): - self._all_file_paths.append(os.path.join(event_type_dir, filename)) - found_indexed_file = True - break - - if not found_indexed_file: - print(f"Warning: Specific file '{event_type}/part-{idx:05d}-of-*.csv.gz' not found in '{event_type_dir}'.") - else: - for filename in os.listdir(event_type_dir): - if filename.startswith("part-") and filename.endswith(".csv.gz"): - self._all_file_paths.append(os.path.join(event_type_dir, filename)) - - self._all_file_paths = sorted(list(set(self._all_file_paths))) - - if not self._all_file_paths: - print(f"Warning: No local V2 trace files found in '{self._current_base_path}' matching the criteria.") - - def __len__(self) -> int: - return len(self._all_file_paths) - - def __iter__(self) -> Generator[pd.DataFrame, None, None]: - if not self._all_file_paths: - return - - all_data_frames = [] - total_files = len(self._all_file_paths) - - print(f"\nStarting to load {total_files} selected V2 trace files from '{self._current_base_path}'...") - - for i, file_path in enumerate(self._all_file_paths): - file_name = os.path.basename(file_path) - - file_size_bytes = os.path.getsize(file_path) - file_size_mb = file_size_bytes / (1024 * 1024) - - print(f"[{i + 1}/{total_files}] Loading '{file_name}' ({file_size_mb:.2f} MB)...", end='', flush=True) - - df = None - try: - df = pd.read_csv(file_path, compression='gzip', **self.read_options) - print(f" -> OK. Shape: {df.shape}") - except Exception as e: - print(f" -> FAILED. Error: {e}") - print(f" Failed to read CSV file '{file_name}'. Double-check CSV format (e.g., separator, header) or file integrity.") - continue - - if df is not None: - if self.concatenate_files: - all_data_frames.append(df) - else: - yield df - - if self.concatenate_files and all_data_frames: - final_df = pd.concat(all_data_frames, ignore_index=True) - print(f"\nAll selected V2 files concatenated. Final DataFrame shape: {final_df.shape}") - yield final_df - elif self.concatenate_files and not all_data_frames: - print("\nNo DataFrames were loaded to concatenate from the selected V2 files.") - - def get_data_for_type(self, event_type: str, limit: Optional[int] = None) -> pd.DataFrame: - """ - A convenience method to load data for a single event type from the V2 dataset, - up to a specified number of files. (Format is fixed to CSV for V2). - """ - if event_type not in self.SUPPORTED_EVENT_TYPES: - raise ValueError(f"Unsupported event type: '{event_type}'. Choose from {self.SUPPORTED_EVENT_TYPES}") - - original_event_types = self.event_types - original_file_indices = self.file_indices - original_concatenate_files = self.concatenate_files - original_current_base_path = self._current_base_path - - self.event_types = [event_type] - self.concatenate_files = True +} +SUPPORTED_EVENT_TYPES = list(V2_COLUMN_NAMES.keys()) - temp_file_indices = None - if limit is not None: - temp_file_indices = list(range(limit)) - self.file_indices = temp_file_indices - - self._discover_files() - - combined_df = pd.DataFrame() - for df_chunk in self: - combined_df = df_chunk - - self.event_types = original_event_types - self.file_indices = original_file_indices - self.concatenate_files = original_concatenate_files - self._current_base_path = original_current_base_path - self._discover_files() - - return combined_df - -# --- MANDATORY RAPS `load_data` FUNCTION --- -# This function is the entry point that RAPS's main.py will call. -def load_data( - data_path: Union[str, List[str]], **kwargs) -> tuple[List[Any], float, float]: # RAPS expects a list of job_dict instances, start_time, end_time +class GoogleClusterV2DataLoader: """ - RAPS data loading entry point for Google Cluster Trace V2 (2011) data. - - Loads data from a specified local path, assuming it contains subdirectories - like 'job_events', 'task_events', etc., filled with .csv.gz files. - It returns a list of RAPS job_dict instances, along with the global start - and end timestamps of the loaded data. - - Args: - data_path (Union[str, List[str]]): The base path to the local V2 data directory. - Expected to be the directory that *contains* the - 'google_cluster_data_2011_sample' subdirectory. - Can be a string or a list containing a single string. - Example: `~/data/gcloud/v2/` - - Returns: - tuple[List[Any], float, float]: - - A list of RAPS `job_dict` instances. - - The global minimum timestamp (float) found across all loaded data. - - The global maximum timestamp (float) found across all loaded data. - Returns ([], 0.0, 0.0) if no data or time info found. + Loader for Google Cluster V2 CSV.GZ files. """ - - # --- FIX 1: Handle data_path potentially being a list (from argparse) --- - if isinstance(data_path, list): - if len(data_path) == 1: - data_path_str = data_path[0] + def __init__(self, base_path: str, event_type: str="job_events", + file_indices: Optional[List[int]]=None, concatenate: bool=True): + self.base_path = os.path.expanduser(base_path) + if event_type not in SUPPORTED_EVENT_TYPES: + raise ValueError(f"Unsupported event type: '{event_type}'") + self.event_type = event_type + self.file_indices = file_indices + self.concatenate = concatenate + self.file_paths = self._find_files() + + def _find_files(self) -> List[str]: + dir_path = os.path.join(self.base_path, self.event_type) + if not os.path.isdir(dir_path): + raise FileNotFoundError(f"Directory not found: {dir_path}") + files = os.listdir(dir_path) + matches = [] + if self.file_indices: + for idx in self.file_indices: + pattern = re.compile(rf"part-{idx:05d}-of-\d{{5}}\.csv\.gz$") + found = [f for f in files if pattern.match(f)] + if not found: + raise FileNotFoundError(f"File index {idx} missing in {dir_path}") + matches.extend(found) else: - raise ValueError( - f"load_data expected a single base data path, but received a list of multiple paths: {data_path}. " - f"Please ensure RAPS passes a single path." - ) - else: - data_path_str = data_path - # --- END FIX 1 --- - - # Expand the user home directory if '~' is used in data_path_str - expanded_data_path = os.path.expanduser(data_path_str) - # Ensure it ends with a slash for consistency with os.path.join later - if not expanded_data_path.endswith(os.sep): - expanded_data_path += os.sep - - # This dictionary will store DataFrames for all event types loaded by this function - loaded_dfs: Dict[str, pd.DataFrame] = {} - - # Load all supported event types (job_events, task_events, etc.) - # We set event_types=None and file_indices=None to load all available files for each type - # from the automatically detected subdirectories. - dataloader = GoogleClusterV2DataLoader( - event_types=None, # Load all supported types - file_indices=None, # Load all files found for each type - read_options=None, # Use default read_options defined in DataLoader - concatenate_files=True, # Get one concatenated DF per type - base_data_path=expanded_data_path # This is the RAPS-provided path to the directory *above* the data folder - ) - - # Initialize global min/max timestamps for the entire dataset - global_min_time = float(math.inf) - global_max_time = float(-math.inf) - - # Loop through the dataloader to get all concatenated DataFrames for each event type - for event_type_key in dataloader.SUPPORTED_EVENT_TYPES: # Iterate through explicitly supported types - # Create a temporary DataLoader instance just to load this specific event type - # from the correct subpath within expanded_data_path - temp_dataloader_for_type = GoogleClusterV2DataLoader( - event_types=event_type_key, - file_indices=None, # Load all files for this specific type - read_options=None, # Use default read_options - concatenate_files=True, - base_data_path=expanded_data_path # Pass the RAPS base path - ) - - # This loop will run once for each event type, yielding its concatenated DataFrame - for df_current_type in temp_dataloader_for_type: - if not df_current_type.empty: - loaded_dfs[event_type_key] = df_current_type - print(f"RAPS: Successfully loaded '{event_type_key}'. DataFrame shape: {df_current_type.shape}") + matches = [f for f in files if f.startswith("part-") and f.endswith(".csv.gz")] + if not matches: + raise FileNotFoundError(f"No files in {dir_path}") + return [os.path.join(dir_path, f) for f in sorted(matches)] - # Update global min/max times if a 'time' column exists - if 'time' in df_current_type.columns: - current_min = df_current_type['time'].min() - current_max = df_current_type['time'].max() - if current_min < global_min_time: - global_min_time = current_min - if current_max > global_max_time: - global_max_time = current_max + def __iter__(self) -> Generator[pd.DataFrame, None, None]: + dfs = [] + names = V2_COLUMN_NAMES[self.event_type] + ts_col = names[0] + for path in self.file_paths: + df = pd.read_csv(path, compression='gzip', header=None, + names=names, dtype={ts_col: int}) + if not self.concatenate: + yield df else: - print(f"RAPS: No data loaded for event type '{event_type_key}'.") - - print("\n--- RAPS: Data loading complete for individual types ---") - - # --- FIX 2: Select and prepare the primary 'jobs' list for RAPS --- - # RAPS main.py is iterating over `jobs`, expecting `job['wall_time']` and `job['start_time']`. - # This means `jobs` must be a list of dictionaries (or job_dict instances). - jobs_list_for_rap: List[Any] = [] + dfs.append(df) + if self.concatenate and dfs: + yield pd.concat(dfs, ignore_index=True) - # Prioritize task_events for primary job records, otherwise use job_events. - raw_primary_records_df = pd.DataFrame() - if 'task_events' in loaded_dfs and not loaded_dfs['task_events'].empty: - raw_primary_records_df = loaded_dfs['task_events'].copy() - print(f"RAPS: Selected 'task_events' as the primary source for job records.") - elif 'job_events' in loaded_dfs and not loaded_dfs['job_events'].empty: - raw_primary_records_df = loaded_dfs['job_events'].copy() - print(f"RAPS: Selected 'job_events' as the primary source for job records (task_events not available/empty).") - else: - print("RAPS: Warning: Neither 'task_events' nor 'job_events' found/loaded for primary 'jobs' data. Cannot create job records.") - # Return empty list and 0.0 times if no primary data - return [], 0.0, 0.0 - if not raw_primary_records_df.empty: - # --- FIX 3: Prepare raw_primary_records_df with RAPS-expected columns --- - # Map V2 'time' column to RAPS 'submit_time' and 'start_time' - if 'time' in raw_primary_records_df.columns: - raw_primary_records_df['submit_time'] = raw_primary_records_df['time'] - raw_primary_records_df['start_time'] = raw_primary_records_df['time'] +def load_data(data_path: Union[str, List[str]], **kwargs: Any) -> Tuple[List[Any], float, float]: + # Unpack list + if isinstance(data_path, list): + if len(data_path)==1: + data_path=data_path[0] else: - raw_primary_records_df['submit_time'] = 0 - raw_primary_records_df['start_time'] = 0 - print("Warning: 'time' column not found in primary records DataFrame. Using 0 for submit/start_time.") - - # Add 'wall_time'. V2 trace does not have explicit wall_time per job/task. - # This is a dummy value for RAPS's internal calculations. - raw_primary_records_df['wall_time'] = 1 # Dummy: 1 microsecond duration - - # Add 'end_time' to the DataFrame for internal consistency if needed later - # (though RAPS main.py calculates it, having it can be useful) - raw_primary_records_df['end_time'] = raw_primary_records_df['start_time'] + raw_primary_records_df['wall_time'] - - # --- FIX 4: Create job_dict instances and populate jobs_list_for_rap --- - # Get the jid (job ID filter) from kwargs, defaulting to '*' - jid_filter = kwargs.get('jid', '*') - - # Filter to 'submit' events to represent distinct job creations - submit_records_df = raw_primary_records_df[ - raw_primary_records_df.get('event_type') == 0 # Event type 0 is 'submit' - ].copy() if 'event_type' in raw_primary_records_df.columns else raw_primary_records_df.copy() - - if 'job_ID' not in submit_records_df.columns: - submit_records_df['job_ID'] = submit_records_df['task_index'] if 'task_index' in submit_records_df.columns else range(len(submit_records_df)) - print("Warning: 'job_ID' not found. Using 'task_index' or row index for job_id.") - - # Make job_ID unique in case 'task_index' was used and job_ID wasn't. - # This ensures unique RAPS job_dict IDs. - submit_records_df['unique_job_id'] = submit_records_df['job_ID'].astype(str) + "_" + submit_records_df['start_time'].astype(str) - - for index, row in submit_records_df.iterrows(): - job_id_from_trace = row['job_ID'] # The original job_ID from the trace - - # Apply RAPS's jid filter (from main.py example) - if jid_filter != '*' and str(job_id_from_trace) != str(jid_filter): - continue - - # --- Map V2 Data to job_dict arguments --- - nodes_required = 1 # Dummy: V2 doesn't specify nodes_required directly per job event - name = f"job_{job_id_from_trace}" - account = f"user_{row['user_ID']}" if 'user_ID' in row else "unknown_user" - priority = row['priority'] if 'priority' in row else 0 - - # Trace data arrays are empty as per V2 characteristics - cpu_trace = np.array([]) - gpu_trace = np.array([]) # V2 has no GPUs - nrx_trace = np.array([]) - ntx_trace = np.array([]) - - end_state = "UNKNOWN" # Final job state requires complex aggregation of task events - scheduled_nodes = [] # Requires scheduling logic, not directly in raw event - - # Global trace times (already calculated above) - trace_start_time = float(global_min_time) if global_min_time != float(math.inf) else 0.0 - trace_end_time = float(global_max_time) if global_max_time != float(-math.inf) else 0.0 - - # This specific record's time (from its 'time' column) - trace_time_for_record = row['time'] if 'time' in row else 0 - - job_info = job_dict( - nodes_required=nodes_required, - name=name, - account=account, - cpu_trace=cpu_trace, - gpu_trace=gpu_trace, - nrx_trace=nrx_trace, - ntx_trace=ntx_trace, - end_state=end_state, - scheduled_nodes=scheduled_nodes, - id=job_id_from_trace, # Use the original job ID from the trace - priority=priority, - submit_time=row['submit_time'], - time_limit=0, # V2 doesn't have explicit time_limit per job_event - start_time=row['start_time'], # RAPS uses this for simulation start - end_time=row['end_time'], # RAPS uses this for simulation end - wall_time=row['wall_time'], # RAPS uses this for duration - trace_time=trace_time_for_record, # The timestamp of *this specific event* record - trace_start_time=trace_start_time, # Global trace start time - trace_end_time=trace_end_time # Global trace end time - ) - jobs_list_for_rap.append(job_info) - print(f"RAPS: Created {len(jobs_list_for_rap)} job_dict instances.") - else: - print("RAPS: No primary records DataFrame available to create job_dict instances.") - - # Convert global min/max times to float - final_timestep_start = float(global_min_time) if global_min_time != float(math.inf) else 0.0 - final_timestep_end = float(global_max_time) if global_max_time != float(-math.inf) else 0.0 - - print(f"RAPS: Final global time range determined: Start={final_timestep_start}, End={final_timestep_end}") - - # Return the three values RAPS expects: - # (list of job_dict instances, global min time, global max time) - return jobs_list_for_rap, final_timestep_start, final_timestep_end - - -# --- Example Usage (for direct script execution/testing the load_data function) --- -if __name__ == "__main__": - # IMPORTANT: Adjust this path to match your local setup precisely. - # This path should be the DIRECTORY THAT RAPS's `-f` ARGUMENT POINTS TO. - # e.g., if you run `main.py -f /Users/w1b/data/gcloud/v2`, then this variable is '/Users/w1b/data/gcloud/v2/'. - # And inside THAT directory, you should have `google_cluster_data_2011_sample/` - RAPS_SIMULATED_BASE_DIR = "/Users/w1b/data/gcloud/v2/" - - print("--- Running direct tests of the load_data function ---") - - print("\n--- Test 1: Loading all event types (default behavior for a RAPS integration) ---") - jobs_list_test1, start_time_test1, end_time_test1 = load_data([RAPS_SIMULATED_BASE_DIR], system="dummy_system_name") - - if jobs_list_test1: - print(f"\nSummary of Test 1 (Primary jobs list loaded):") - print(f"- Number of job/task records: {len(jobs_list_test1)}") - if jobs_list_test1 and hasattr(jobs_list_test1[0], 'start_time') and hasattr(jobs_list_test1[0], 'wall_time'): - print(f"- First record (id={jobs_list_test1[0].id}): submit_time={jobs_list_test1[0].submit_time}, start_time={jobs_list_test1[0].start_time}, wall_time={jobs_list_test1[0].wall_time}") - # print(f"- Full first record details: {jobs_list_test1[0].__dict__}") - print(f"- Global Start time: {start_time_test1}, Global End time: {end_time_test1}") - else: - print("\nTest 1: No primary jobs list loaded. Check specified paths and downloaded files.") - - print("\n--- Test 2: Loading specific event types and file indices ---") - jobs_list_test2, start_time_test2, end_time_test2 = load_data( - [RAPS_SIMULATED_BASE_DIR], - event_types=["job_events"], - file_indices=[0], - read_options={'header': 0}, - another_rap_param=123 - ) - - if jobs_list_test2: - print(f"\nSummary of Test 2 (Primary jobs list loaded):") - print(f"- Number of job/task records: {len(jobs_list_test2)}") - if jobs_list_test2 and hasattr(jobs_list_test2[0], 'start_time') and hasattr(jobs_list_test2[0], 'wall_time'): - print(f"- First record (id={jobs_list_test2[0].id}): submit_time={jobs_list_test2[0].submit_time}, start_time={jobs_list_test2[0].start_time}, wall_time={jobs_list_test2[0].wall_time}") - # print(f"- Full first record details: {jobs_list_test2[0].__dict__}") - print(f"- Global Start time: {start_time_test2}, Global End time: {end_time_test2}") - else: - print("\nTest 2: No primary jobs list loaded. Check path, types, and indices.") - - print("\n--- RAPS Dataloader (V2) script demonstration complete ---") + raise ValueError(f"Expected single path, got {data_path}") + base_path = os.path.expanduser(data_path) + + # Load submit events + loader = GoogleClusterV2DataLoader(base_path, event_type="job_events", concatenate=True) + df = next(iter(loader)) + for col in ("timestamp","job_ID","event_type"): + if col not in df.columns: + raise ValueError(f"Missing column {col}") + df = df[df["event_type"]==0] + df["timestamp"] = df["timestamp"].astype(float) + t0, t1 = df["timestamp"].min(), df["timestamp"].max() + + # Load task usage + usage_loader = GoogleClusterV2DataLoader(base_path, event_type="task_usage", concatenate=True) + usage_df = next(iter(usage_loader)) + # rename to avg + if "CPU_usage_rate" in usage_df.columns: + usage_df.rename(columns={"CPU_usage_rate":"CPU_usage_avg"}, inplace=True) + usage_df["job_ID"] = usage_df["job_ID"].astype(int) + usage_df["CPU_usage_avg"] = usage_df["CPU_usage_avg"].astype(float) + usage_map = usage_df.groupby("job_ID")["CPU_usage_avg"].apply(lambda s: s.to_numpy()).to_dict() + + # Filter to jobs with usage data + df = df[df["job_ID"].isin(usage_map)] + + jobs: List[Any] = [] + jid_f = kwargs.get('jid','*') + for _, row in df.iterrows(): + jid = int(row["job_ID"]) + if jid_f!='*' and str(jid)!=str(jid_f): continue + trace = usage_map[jid] + # ensure gpu_trace is same length + gpu_trace = np.zeros_like(trace) + jobs.append(job_dict( + nodes_required=1, + name=f"job_{jid}", + account=f"user_{row.get('user_name','unknown')}", + cpu_trace=trace, + gpu_trace=gpu_trace, + nrx_trace=[], ntx_trace=[], + end_state="UNKNOWN", scheduled_nodes=[], + id=jid, priority=int(row.get('scheduling_class',0)), + submit_time=row["timestamp"], time_limit=0, + start_time=row["timestamp"], end_time=row["timestamp"]+1.0, + wall_time=1.0, trace_time=row["timestamp"], + trace_start_time=float(t0), trace_end_time=float(t1) + )) + return jobs, 0, 10000 diff --git a/raps/engine.py b/raps/engine.py index 7c7ce7c..ae33fdc 100644 --- a/raps/engine.py +++ b/raps/engine.py @@ -418,10 +418,10 @@ class Engine: avg_net = sum(net_utils) / n n = len(slowdown_factors) or 1 - avg_slowdown_per_job = sum(slowdown_factors) / n - self.avg_slowdown_history.append(avg_slowdown_per_job) - max_slowdown_per_job = max(slowdown_factors) - self.max_slowdown_history.append(max_slowdown_per_job) + #avg_slowdown_per_job = sum(slowdown_factors) / n + #self.avg_slowdown_history.append(avg_slowdown_per_job) + #max_slowdown_per_job = max(slowdown_factors) + #self.max_slowdown_history.append(max_slowdown_per_job) # Save network history self.avg_net_tx.append(avg_tx) @@ -445,7 +445,7 @@ class Engine: avg_net_tx=avg_tx, avg_net_rx=avg_rx, avg_net_util=avg_net, - slowdown_per_job=avg_slowdown_per_job + slowdown_per_job=0 ) self.current_time += 1 @@ -495,7 +495,8 @@ class Engine: # Batch Jobs into 6h windows based on submit_time batch_window = 60 * 60 * 6 # 6h - for timestep in range(timestep_start,timestep_end): + print(timestep_start, timestep_end) + for timestep in range(timestep_start, timestep_end): if (timestep % batch_window == 0) or (timestep == timestep_start): # Add jobs that are within the batching window and remove them from all jobs -- GitLab From 9be4eebd80e54c677b8fe60481fd5ae2353d1486 Mon Sep 17 00:00:00 2001 From: Wes Brewer Date: Wed, 18 Jun 2025 13:17:11 -0400 Subject: [PATCH 130/388] Add URL to official documentation for Google Cluster V2 traces --- raps/dataloaders/gcloudv2.md | 4 ++++ raps/dataloaders/gcloudv2.py | 6 ++++++ 2 files changed, 10 insertions(+) diff --git a/raps/dataloaders/gcloudv2.md b/raps/dataloaders/gcloudv2.md index b1f95cb..dccbbc1 100644 --- a/raps/dataloaders/gcloudv2.md +++ b/raps/dataloaders/gcloudv2.md @@ -1,5 +1,9 @@ # **Google Cluster Trace V2 (2011) Dataset Overview** +Some of this info may be incorrect. Look here for the official documentation: + +https://drive.google.com/file/d/0B5g07T_gRDg9Z0lsSTEtTWtpOW8/view?resourcekey=0-cozD56gA4fUDdrkHnLJSrQ + This document provides a summary of the Google Cluster Trace V2 dataset, released in 2011\. This dataset offers insights into the operation of a large-scale production data center and its workload. It's crucial for research in areas like cluster scheduling, resource management, and workload characterization. ## **1\. Dataset Overview** diff --git a/raps/dataloaders/gcloudv2.py b/raps/dataloaders/gcloudv2.py index 8dd423d..71bea8f 100644 --- a/raps/dataloaders/gcloudv2.py +++ b/raps/dataloaders/gcloudv2.py @@ -7,6 +7,12 @@ import pandas as pd from raps.job import job_dict # ensure RAPS is in PYTHONPATH +""" +Official instructions are here: + +https://drive.google.com/file/d/0B5g07T_gRDg9Z0lsSTEtTWtpOW8/view?resourcekey=0-cozD56gA4fUDdrkHnLJSrQ +""" + # Define expected column names for each supported event type V2_COLUMN_NAMES = { "job_events": [ -- GitLab From f0a62c0cdf8f2d3f66eeb8d3cb4bae976329235f Mon Sep 17 00:00:00 2001 From: Matthias Maiterth Date: Wed, 18 Jun 2025 14:41:03 -0400 Subject: [PATCH 131/388] Fixed Smoke tests --- raps/job.py | 7 ++++ raps/telemetry.py | 86 +++++++++++++++++++++++++++++++++++------------ 2 files changed, 72 insertions(+), 21 deletions(-) diff --git a/raps/job.py b/raps/job.py index 1c41ddf..c1c1327 100644 --- a/raps/job.py +++ b/raps/job.py @@ -98,6 +98,13 @@ class Job: pass else: raise ValueError(f"{self.nodes_required} {self.scheduled_nodes}") + if self.scheduled_nodes == [] or self.scheduled_nodes is None or \ + (isinstance(self.scheduled_nodes,list) and isinstance(self.scheduled_nodes[0], int)) or \ + (isinstance(self.scheduled_nodes,np.ndarray) and isinstance(self.scheduled_nodes[0], int)): + pass # Type is ok + else: + # Type is not as expected! + raise ValueError(f"type: self.scheduled_nodes:{type(self.scheduled_nodes)}, with {type(self.scheduled_nodes[0])}") def __repr__(self): """Return a string representation of the job.""" diff --git a/raps/telemetry.py b/raps/telemetry.py index a77e636..e259cbd 100644 --- a/raps/telemetry.py +++ b/raps/telemetry.py @@ -10,6 +10,8 @@ import re import sys import random import argparse +import itertools +import json if __name__ == "__main__": parser = argparse.ArgumentParser(description='Telemetry data validator') @@ -18,9 +20,8 @@ if __name__ == "__main__": help='Either: path/to/joblive path/to/jobprofile' + \ ' -or- filename.npz (overrides --workload option)') parser.add_argument('-p', '--plot', type=str, default=None, choices=['jobs','nodes'], help='Output plots') - + parser.add_argument("--is-results-file", action='store_true', default=False, help='Output plots') parser.add_argument("--gantt-nodes", default=False, action='store_true', required=False, help="Print Gannt with nodes required as line thickness (default false)") # duplicate in workload! - parser.add_argument('-t', '--time', type=str, default=None, help='Length of time to simulate, e.g., 123, 123s, 27m, 3h, 7d') parser.add_argument('--system', type=str, default='frontier', help='System config to use') choices = ['prescribed', 'poisson'] @@ -76,6 +77,44 @@ class Telemetry: int(data['timestep_end']), \ data['args'].tolist() + def load_csv_results(self, file): + jobs = [] + time_start = 0 + time_end = 0 + args = None + for line in pd.read_csv(file,chunksize=1): + job_info = job_dict(nodes_required=line.get('num_nodes').item(), # Named like this somewhere in the csv history dumper + name=line.get('name').item(), + account=line.get('account').item(), + cpu_trace=None, + gpu_trace=None, + ntx_trace=None, + nrx_trace=None, + #end_state=line.get('end_state').item(), + end_state=None, + scheduled_nodes=json.loads(line.get('scheduled_nodes').item()), + id=line.get('id').item(), + #priority=line.get('priority').item(), + priority=None, + #partition=line.get('partition').item(), + partition=None, + submit_time=line.get('submit_time').item(), + start_time=line.get('start_time').item(), + end_time=line.get('end_time').item(), + #wall_time=line.get('wall_time').item(), + wall_time=line.get('end_time').item() - line.get('start_time').item(), + #trace_time=line.get('trace_time').item(), + trace_time=None, + #trace_start_time=line.get('trace_start_time').item(), + trace_start_time=None, + #trace_end_time=line.get('trace_end_time').item(), + trace_end_time=None, + #trace_missing_values=line.get('trace_missing_values').item(), + trace_missing_values=None + ) + jobs.append(Job(job_info)) + return jobs, time_start, time_end, args + def load_data(self, files): """Load telemetry data using custom data loaders.""" return self.dataloader.load_data(files, **self.kwargs) @@ -137,7 +176,11 @@ class Telemetry: jobs = [] trigger_custom_dataloader = False for i,file in enumerate(files): - if file.endswith(".npz"): # Replay .npz file + if hasattr(args,'is_results_file') and args.is_results_file: + if file.endswith(".csv"): + jobs, timestep_start, timestep, _ = self.load_csv_results(file) + + elif file.endswith(".npz"): # Replay .npz file print(f"Loading {file}...") jobs_from_file, timestep_start_from_file, timestep_end_from_file, args_from_file = self.load_snapshot(file) if not hasattr(args_from_file,'fastforward') or args_from_file.fastforward is None: @@ -149,7 +192,7 @@ class Telemetry: f"All Args:\n{args_from_file}" +\ "To use these set them from the commandline!" ) - jobs.extend(jobs_from_file) + jobs.extend(Job(jobs_from_file)) timestep_start = min(timestep_start,timestep_start_from_file) timestep_end = max(timestep_end, timestep_end_from_file) @@ -194,18 +237,19 @@ class Telemetry: if args.time: timestep_end = timestep_start + convert_to_seconds(args.time) elif not timestep_end: - timestep_end = int(max(job['wall_time'] + job['start_time'] for job in jobs)) + 1 + timestep_end = int(max(job.wall_time + job.start_time for job in jobs)) + 1 return jobs, timestep_start, timestep_end, args def plot_jobs_gantt(*,ax=None,jobs): + jobs.sort(key=lambda x:x.submit_time) if ax is None: ax = plt.figure(figsize=(10,4)) # Submit_time and Wall_time - submit_t = [x['submit_time'] for x in jobs] - duration = [x['wall_time'] for x in jobs] - nodes_required = [x['nodes_required'] for x in jobs] + submit_t = [x.submit_time for x in jobs] + duration = [x.wall_time for x in jobs] + nodes_required = [x.nodes_required for x in jobs] colors = spaced_colors(len(jobs)) offset = 0 @@ -221,8 +265,8 @@ def plot_jobs_gantt(*,ax=None,jobs): ##ax_b labels: ax.set_xlabel("time [hh:mm]") minx_s = 0 - maxx_s = np.ceil(max([x['wall_time'] for x in jobs]) + max([x['submit_time'] for x in jobs])) - x_label_mins = [n for n in np.arange(minx_s // 60, maxx_s // 60)] + maxx_s = np.ceil(max([x.wall_time for x in jobs]) + max([x.submit_time for x in jobs])) + x_label_mins = [int(n) for n in np.arange(minx_s // 60, maxx_s // 60)] x_label_ticks = [n * 60 for n in x_label_mins[0::60]] x_label_str = [str(x1).zfill(2) + ":" + str(x2).zfill(2) for (x1,x2) in [(n // 60,n % 60) for @@ -237,10 +281,12 @@ def plot_nodes_gantt(*,ax=None,jobs): if ax is None: ax = plt.figure(figsize=(10,4)) # Submit_time and Wall_time - duration = [x['wall_time'] for x in jobs] + duration = [x.wall_time for x in jobs] #nodes_required = [x['nodes_required'] for x in jobs] - start_t = [x['start_time'] for x in jobs] - nodeIDs = [x['scheduled_nodes'] for x in jobs] + start_t = [x.start_time for x in jobs] + nodeIDs = [x.scheduled_nodes for x in jobs] + print(nodeIDs[0]) + print(type(nodeIDs[0])) colors = spaced_colors(len(jobs)) for i in track(range(len(jobs)), description="Collecting information to plot"): @@ -252,7 +298,7 @@ def plot_nodes_gantt(*,ax=None,jobs): ##ax_b labels: ax.set_xlabel("time [hh:mm]") minx_s = 0 - maxx_s = np.ceil(max([x['wall_time'] for x in jobs]) + max([x['submit_time'] for x in jobs])) + maxx_s = np.ceil(max([x.wall_time for x in jobs]) + max([x.submit_time for x in jobs])) x_label_mins = [n for n in np.arange(minx_s // 60, maxx_s // 60)] x_label_ticks = [n * 60 for n in x_label_mins[0::60]] x_label_str = [str(x1).zfill(2) + ":" + str(x2).zfill(2) for @@ -260,6 +306,7 @@ def plot_nodes_gantt(*,ax=None,jobs): n in x_label_mins[0::60]]] ax.set_xticks(x_label_ticks,x_label_str) + ax.set_ylim(1,max(list(itertools.chain.from_iterable(nodeIDs)))) #ax.yaxis.set_inverted(True) return ax @@ -268,12 +315,10 @@ if __name__ == "__main__": config = ConfigManager(system_name=args.system).get_config() args_dict['config'] = config td = Telemetry(**args_dict) - if args.replay is None: - parser.print_help() - if args.replay[0].endswith(".csv"): - jobs, timestep_start, timestep_end, _ = td.load_data_from_csv(args.replay[0]) - else: + if args.replay: jobs, timestep_start, timestep_end, _ = td.load_jobs_times_args_from_files(files=args.replay,args=args) + else: + parser.print_help() timesteps = timestep_end - timestep_start @@ -283,8 +328,7 @@ if __name__ == "__main__": submit_times = [] end_times = [] last = 0 - for job_vector in jobs: - job = Job(job_vector) + for job in jobs: wt_list.append(job.wall_time) nr_list.append(job.nodes_required) submit_times.append(job.submit_time) -- GitLab From 45d20691f7b9374a749f5c9db45cda9e48d00843 Mon Sep 17 00:00:00 2001 From: Matthias Maiterth Date: Wed, 18 Jun 2025 14:56:21 -0400 Subject: [PATCH 132/388] fix plotting of npz files in raps.telemetry --- raps/telemetry.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/raps/telemetry.py b/raps/telemetry.py index e259cbd..1894b31 100644 --- a/raps/telemetry.py +++ b/raps/telemetry.py @@ -72,7 +72,13 @@ class Telemetry: - args, which were used to generate the loaded snapshot """ data = np.load(snapshot, allow_pickle=True, mmap_mode='r') - return data['jobs'].tolist(), \ + job_data = data['jobs'].tolist() + jobs = [] + for job_info in job_data: + job = Job(job_info) + jobs.append(job) + + return jobs, \ int(data['timestep_start']), \ int(data['timestep_end']), \ data['args'].tolist() @@ -192,7 +198,7 @@ class Telemetry: f"All Args:\n{args_from_file}" +\ "To use these set them from the commandline!" ) - jobs.extend(Job(jobs_from_file)) + jobs.extend(jobs_from_file) timestep_start = min(timestep_start,timestep_start_from_file) timestep_end = max(timestep_end, timestep_end_from_file) -- GitLab From 3a6ed1fb9e0b543346f9e9d40b5e9804519bd8f4 Mon Sep 17 00:00:00 2001 From: Matthias Maiterth Date: Wed, 18 Jun 2025 15:31:50 -0400 Subject: [PATCH 133/388] Fixed labels hh:mm --- raps/telemetry.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/raps/telemetry.py b/raps/telemetry.py index 1894b31..e8a9ed9 100644 --- a/raps/telemetry.py +++ b/raps/telemetry.py @@ -291,8 +291,6 @@ def plot_nodes_gantt(*,ax=None,jobs): #nodes_required = [x['nodes_required'] for x in jobs] start_t = [x.start_time for x in jobs] nodeIDs = [x.scheduled_nodes for x in jobs] - print(nodeIDs[0]) - print(type(nodeIDs[0])) colors = spaced_colors(len(jobs)) for i in track(range(len(jobs)), description="Collecting information to plot"): @@ -305,7 +303,7 @@ def plot_nodes_gantt(*,ax=None,jobs): ax.set_xlabel("time [hh:mm]") minx_s = 0 maxx_s = np.ceil(max([x.wall_time for x in jobs]) + max([x.submit_time for x in jobs])) - x_label_mins = [n for n in np.arange(minx_s // 60, maxx_s // 60)] + x_label_mins = [int(n) for n in np.arange(minx_s // 60, maxx_s // 60)] x_label_ticks = [n * 60 for n in x_label_mins[0::60]] x_label_str = [str(x1).zfill(2) + ":" + str(x2).zfill(2) for (x1,x2) in [(n // 60,n % 60) for -- GitLab From 9b937b3db54b62c90776010e7fc7c21163cafeeb Mon Sep 17 00:00:00 2001 From: Matthias Maiterth Date: Fri, 20 Jun 2025 14:22:25 -0400 Subject: [PATCH 134/388] Added trace quanta to jobs and moved jobs object creation to dataloaders --- main.py | 5 +++-- multi-part-sim.py | 10 +++++----- raps/dataloaders/adastraMI250.py | 5 +++-- raps/dataloaders/frontier.py | 23 ++++++++++++++--------- raps/dataloaders/fugaku.py | 5 +++-- raps/dataloaders/lassen.py | 7 ++++--- raps/dataloaders/marconi100.py | 7 ++++--- raps/job.py | 13 ++++++++++--- 8 files changed, 46 insertions(+), 29 deletions(-) diff --git a/main.py b/main.py index 94f6ce4..9effbde 100644 --- a/main.py +++ b/main.py @@ -6,6 +6,7 @@ import random import pandas as pd import os import time +import math from raps.helpers import check_python_version check_python_version() @@ -79,7 +80,7 @@ else: # Synthetic jobs timestep_start = 0 if hasattr(jobs[0],'end_time'): - timestep_end = max([job.end_time for job in jobs]) + timestep_end = int(math.ceil(max([job.end_time for job in jobs]))) else: timestep_end = 88200 # 24 hours @@ -130,9 +131,9 @@ if args.time_delta: time_delta = convert_to_seconds(args.time_delta) else: time_delta = config['TRACE_QUANTA'] -print(time_delta) print(f'Simulating {len(jobs)} jobs for {total_timesteps} seconds from {timestep_start} to {timestep_end}.') +print(f'Simulation time delta: {time_delta}s, Telemetry trace quanta: {jobs[0].trace_quanta}s.') layout_manager = LayoutManager(args.layout, engine=sc, debug=args.debug, total_timesteps=total_timesteps, **config) layout_manager.run(jobs, timestep_start=timestep_start, timestep_end=timestep_end, time_delta=time_delta) diff --git a/multi-part-sim.py b/multi-part-sim.py index d0ccafa..bb91fda 100644 --- a/multi-part-sim.py +++ b/multi-part-sim.py @@ -42,17 +42,17 @@ if args.replay: # Randomly assign partition for job in jobs: - job['partition'] = random.choices(partition_names, weights=available_nodes, k=1)[0] + job.partition = random.choices(partition_names, weights=available_nodes, k=1)[0] if args.scale: for job in tqdm(jobs, desc=f"Scaling jobs to {args.scale} nodes"): - job['nodes_required'] = random.randint(1, args.scale) + job.nodes_required = random.randint(1, args.scale) if args.arrival == 'poisson': for job in tqdm(jobs, desc="Adjusting job submission time"): - partition = job['partition'] + partition = job.partition partition_config = configs[partition_names.index(partition)] - job['submit_time'] = next_arrival(1 / partition_config['JOB_ARRIVAL_TIME']) + job.submit_time = next_arrival(1 / partition_config['JOB_ARRIVAL_TIME']) elif args.arrival == 'prescribed': raise NotImplementedError @@ -66,7 +66,7 @@ else: # Synthetic workload # Group jobs by partition jobs_by_partition = {partition: [] for partition in partition_names} for job in jobs: - jobs_by_partition[job['partition']].append(job) + jobs_by_partition[job.partition].append(job) # Initialize layout managers for each partition layout_managers = {} diff --git a/raps/dataloaders/adastraMI250.py b/raps/dataloaders/adastraMI250.py index 00d8d9f..58df564 100644 --- a/raps/dataloaders/adastraMI250.py +++ b/raps/dataloaders/adastraMI250.py @@ -22,7 +22,7 @@ import numpy as np import pandas as pd from tqdm import tqdm -from ..job import job_dict +from ..job import job_dict, Job from ..utils import power_to_utilization, next_arrival_byconfkwargs @@ -198,7 +198,8 @@ def load_data_from_df(jobs_df: pd.DataFrame, **kwargs): trace_end_time=trace_end_time, trace_missing_values=True ) - jobs.append(job_info) + job = Job(job_info) + jobs.append(job) else: count_jobs_notOK += 1 diff --git a/raps/dataloaders/frontier.py b/raps/dataloaders/frontier.py index 53693be..4a8bf42 100644 --- a/raps/dataloaders/frontier.py +++ b/raps/dataloaders/frontier.py @@ -13,7 +13,7 @@ import numpy as np import pandas as pd from tqdm import tqdm -from ..job import job_dict +from ..job import job_dict, Job from ..utils import power_to_utilization, next_arrival_byconfkwargs, encrypt @@ -117,10 +117,11 @@ def load_data_from_df(jobs_df: pd.DataFrame, jobprofile_df: pd.DataFrame, **kwar - trace_time (lenght of each trace in seconds) - trace_start_time (time offset in seconds after which the trace starts) - trace_end_time (time offset in seconds after which the trace ends) + - trace_quanta (job's associated trace quanta, to correctly replay with different trace quanta) has to be set for use within the simulation The values trace_start_time are similar to the telemetry_start and - telemetry_stop but job specific. + telemetry_stop but may different due to missing data, for each job. The returned values are these three: - The list of parsed jobs. (as a job_dict) @@ -231,14 +232,14 @@ def load_data_from_df(jobs_df: pd.DataFrame, jobprofile_df: pd.DataFrame, **kwar if np.isnan(wall_time): wall_time = 0 - trace_time = gpu_trace.size * config['TRACE_QUANTA'] # seconds - - + trace_quanta = config['TRACE_QUANTA'] + trace_time = gpu_trace.size * trace_quanta # seconds trace_start_time = 0 trace_end_time = trace_time if wall_time > trace_time: missing_trace_time = int(wall_time - trace_time) + trace_missing_values = True if start_time < 0: trace_start_time = missing_trace_time trace_end_time = wall_time @@ -247,6 +248,8 @@ def load_data_from_df(jobs_df: pd.DataFrame, jobprofile_df: pd.DataFrame, **kwar trace_end_time = trace_time else: print(f"Job: {job_id} {end_state} {start_time} - {end_time},Trace: {trace_start_time} - {trace_end_time} Missing: {missing_trace_time}!") + else: + trace_missing_values = False xnames = jobs_df.loc[jidx, 'xnames'] # Don't replay any job with an empty set of xnames @@ -288,8 +291,8 @@ def load_data_from_df(jobs_df: pd.DataFrame, jobprofile_df: pd.DataFrame, **kwar account=account, cpu_trace=cpu_trace, gpu_trace=gpu_trace, - nrx_trace=[], - ntx_trace=[], + nrx_trace=None, + ntx_trace=None, end_state=end_state, scheduled_nodes=scheduled_nodes, id=job_id, @@ -297,9 +300,11 @@ def load_data_from_df(jobs_df: pd.DataFrame, jobprofile_df: pd.DataFrame, **kwar submit_time=submit_time, time_limit=time_limit, start_time=start_time, end_time=end_time, wall_time=wall_time, trace_time=trace_time, - trace_start_time=trace_start_time, trace_end_time=trace_end_time) - jobs.append(job_info) + trace_start_time=trace_start_time, trace_end_time=trace_end_time, + trace_quanta=trace_quanta, trace_missing_values=trace_missing_values) + job = Job(job_info) + jobs.append(job) return jobs, telemetry_start, telemetry_end diff --git a/raps/dataloaders/fugaku.py b/raps/dataloaders/fugaku.py index ff277a9..11bb13d 100644 --- a/raps/dataloaders/fugaku.py +++ b/raps/dataloaders/fugaku.py @@ -16,7 +16,7 @@ """ import pandas as pd from tqdm import tqdm -from ..job import job_dict +from ..job import job_dict, Job from ..utils import next_arrival @@ -155,7 +155,8 @@ def load_data_from_df(df, **kwargs): trace_start_time=trace_start_time, trace_end_time=trace_end_time, trace_missing_values=trace_missing_values) - job_list.append(job_info) + job = Job(job_info) + job_list.append(job) return job_list, telemetry_start, telemetry_end diff --git a/raps/dataloaders/lassen.py b/raps/dataloaders/lassen.py index bf8d635..e3c99eb 100644 --- a/raps/dataloaders/lassen.py +++ b/raps/dataloaders/lassen.py @@ -37,7 +37,7 @@ import pandas as pd from tqdm import tqdm from datetime import timedelta -from ..job import job_dict +from ..job import job_dict, Job from ..utils import power_to_utilization, next_arrival_byconfkwargs, convert_to_seconds @@ -176,7 +176,7 @@ def load_data_from_df(allocation_df, node_df, step_df, **kwargs): ib_rx = 4 * node_data['ib_rx'].sum() if node_data['ib_rx'].values.size > 0 else [] #net_tx, net_rx = generate_network_sequences(ib_tx, ib_rx, samples, lambda_poisson=0.3) - net_tx, net_rx = [],[] # generate_network_sequences generates errors (e.g. -ff 800d -t 1d ) + net_tx, net_rx = None,None # generate_network_sequences generates errors (e.g. -ff 800d -t 1d ) # no priorities defined! priority = row.get('priority', 0) @@ -228,7 +228,8 @@ def load_data_from_df(allocation_df, node_df, step_df, **kwargs): trace_start_time=trace_start_time, trace_end_time=trace_end_time, trace_missing_values=trace_missing_values) - job_list.append(job_info) + job = Job(job_info) + job_list.append(job) return job_list, telemetry_start_time, telemetry_end_time diff --git a/raps/dataloaders/marconi100.py b/raps/dataloaders/marconi100.py index 8ab139b..d4e7b0f 100644 --- a/raps/dataloaders/marconi100.py +++ b/raps/dataloaders/marconi100.py @@ -28,7 +28,7 @@ import numpy as np import pandas as pd from tqdm import tqdm -from ..job import job_dict +from ..job import job_dict, Job from ..utils import power_to_utilization, next_arrival_byconfkwargs @@ -220,9 +220,10 @@ def load_data_from_df(jobs_df: pd.DataFrame, **kwargs): wall_time=wall_time, trace_time=trace_time, trace_start_time=trace_start_time, trace_end_time=trace_end_time, + trace_quanta=config["TRACE_QUANTA"], trace_missing_values=trace_missing_values) - - jobs.append(job_info) + job = Job(job_info) + jobs.append(job) return jobs, telemetry_start, telemetry_end diff --git a/raps/job.py b/raps/job.py index c1c1327..1bc0e81 100644 --- a/raps/job.py +++ b/raps/job.py @@ -15,8 +15,9 @@ def job_dict(*,nodes_required, name, account, \ cpu_trace, gpu_trace, ntx_trace, nrx_trace, \ end_state, scheduled_nodes=None, id, priority=0, partition=0, - submit_time=0, time_limit=0, start_time=0, end_time=0, - wall_time=0, trace_time=0, trace_start_time=0,trace_end_time=0, trace_missing_values=False): + submit_time=0, time_limit=0, start_time=0, end_time=0, wall_time=0, + trace_time=0, trace_start_time=0, trace_end_time=0, trace_quanta=None, + trace_missing_values=False): """ Return job info dictionary """ return { 'nodes_required': nodes_required, @@ -40,8 +41,8 @@ def job_dict(*,nodes_required, name, account, \ 'trace_time': trace_time, 'trace_start_time': trace_start_time, 'trace_end_time': trace_end_time, + 'trace_quanta': trace_quanta, 'trace_missing_values': trace_missing_values - } @@ -83,6 +84,7 @@ class Job: self.trace_time = None # Time period for which traces are available self.trace_start_time = None # Relative start time of the trace (to running time) self.trace_end_time = None # Relative end time of the trace + self.trace_quanta = None # Trace quanta associated with the job # None means single value! self.running_time = 0 # Current running time updated when simulating # If a job dict was given, override the values from the job_dict: @@ -118,6 +120,7 @@ class Job: f"trace_time={self.trace_time}, " f"trace_start_time={self.trace_start_time}, " f"trace_end_time={self.trace_end_time}, " + f"trace_quanta={self.trace_quanta}, " f"running_time={self.running_time}, state={self._state}, " f"power={self.power}, " f"power_history={self.power_history})") @@ -200,6 +203,8 @@ class JobStatistics: self.avg_ntx_usage = sum(job.ntx_trace) / len(job.ntx_trace) elif isinstance(job.ntx_trace,int) or isinstance(job.ntx_trace,float): self.avg_ntx_usage = job.ntx_trace + else: + self.avg_ntx_usage = 0 if isinstance(job.nrx_trace,list) or isinstance(job.nrx_trace,np.ndarray): if len(job.nrx_trace) == 0: @@ -208,6 +213,8 @@ class JobStatistics: self.avg_nrx_usage = sum(job.nrx_trace) / len(job.nrx_trace) elif isinstance(job.nrx_trace,int) or isinstance(job.nrx_trace,float): self.avg_nrx_usage = job.nrx_trace + else: + self.avg_nrx_usage = 0 if len(job.power_history) == 0: self.avg_node_power = 0 -- GitLab From 3feaf200552924c1e285a3e599d5f69fc2e1c3eb Mon Sep 17 00:00:00 2001 From: Matthias Maiterth Date: Fri, 20 Jun 2025 14:33:30 -0400 Subject: [PATCH 135/388] Fixes to jobs and telemetry needed to add from last commit. Following: Fix of engine --- raps/power.py | 8 +-- raps/schedulers/default.py | 2 +- raps/telemetry.py | 20 +++--- raps/ui.py | 8 +-- raps/utils.py | 31 ++++++-- raps/workload.py | 140 +++++++++++++++++++++---------------- 6 files changed, 123 insertions(+), 86 deletions(-) diff --git a/raps/power.py b/raps/power.py index e61010f..a7f3fbb 100644 --- a/raps/power.py +++ b/raps/power.py @@ -54,7 +54,7 @@ def compute_node_power(cpu_util, gpu_util, net_util, config): power_gpu = gpu_util * config['POWER_GPU_MAX'] + \ (config['GPUS_PER_NODE'] - gpu_util) * config['POWER_GPU_IDLE'] - try: + try: power_nic = config['POWER_NIC_IDLE'] + \ (config['POWER_NIC_MAX'] - config['POWER_NIC_IDLE']) * net_util except: @@ -279,7 +279,7 @@ class PowerManager: self.power_state[node_indices] = power_value self.sivoc_loss[node_indices] = sivoc_loss return power_value[np.cumsum(job_lengths) - 1] - + def calculate_rectifiers_needed(self, power_state_summed): """ @@ -386,7 +386,7 @@ class PowerManager: Array containing SIVOC losses for each CDU. """ # Aggregate SIVOC losses - summed_sivoc_losses = np.sum(self.sivoc_loss/1000, axis=2) # kW + summed_sivoc_losses = np.sum(self.sivoc_loss / 1000, axis=2) # kW rows = self.sc_shape[0] # Add CDU numbers to table @@ -398,7 +398,7 @@ class PowerManager: sivoc_loss_with_rows = np.hstack((sivoc_loss_with_rows, rack_sivoc_loss_sum)) return sivoc_loss_with_rows - + def get_power_df(self, rack_power, rack_loss): # Initialize the columns for power_df power_columns = self.config['POWER_DF_HEADER'] diff --git a/raps/schedulers/default.py b/raps/schedulers/default.py index 2b2dd09..d1e82ac 100644 --- a/raps/schedulers/default.py +++ b/raps/schedulers/default.py @@ -52,7 +52,7 @@ class Scheduler: if nodes_available: self.place_job_and_manage_queues(job, queue, running, current_time) else: # In case the job was not placed, see how we should continue: - if self.bfpolicy is not None: + if self.bfpolicy.value is not None: self.backfill(queue, running, current_time) # After backfill dedice continue processing the queue or wait, continuing may result in fairness issues. diff --git a/raps/telemetry.py b/raps/telemetry.py index e8a9ed9..cd28416 100644 --- a/raps/telemetry.py +++ b/raps/telemetry.py @@ -12,6 +12,7 @@ import random import argparse import itertools import json +import os.path if __name__ == "__main__": parser = argparse.ArgumentParser(description='Telemetry data validator') @@ -59,7 +60,10 @@ class Telemetry: def save_snapshot(self,*, jobs: list, timestep_start, timestep_end, args, filename: str): """Saves a snapshot of the jobs to a compressed file. """ - np.savez_compressed(filename, jobs=jobs, timestep_start=timestep_start, timestep_end=timestep_end, args=args) + array_of_job_dicts = [] + for job in jobs: + array_of_job_dicts.append(job.__dict__) + np.savez_compressed(filename, jobs=array_of_job_dicts, timestep_start=timestep_start, timestep_end=timestep_end, args=args) def load_snapshot(self, snapshot: str) -> list: """Reads a snapshot from a compressed file and return 4 values: joblist, timestep_start, timestep_end and args. @@ -117,8 +121,9 @@ class Telemetry: trace_end_time=None, #trace_missing_values=line.get('trace_missing_values').item(), trace_missing_values=None - ) - jobs.append(Job(job_info)) + ) + job = Job(job_info) + jobs.append(job) return jobs, time_start, time_end, args def load_data(self, files): @@ -182,6 +187,7 @@ class Telemetry: jobs = [] trigger_custom_dataloader = False for i,file in enumerate(files): + file = os.path.normpath(file.lstrip('"').rstrip('"')) if hasattr(args,'is_results_file') and args.is_results_file: if file.endswith(".csv"): jobs, timestep_start, timestep, _ = self.load_csv_results(file) @@ -212,14 +218,8 @@ class Telemetry: for job in tqdm(jobs, desc="Rescheduling jobs"): job['scheduled_nodes'] = None job['submit_time'] = next_arrival_byconfargs(config,args) - #elif file.endswith(".csv"): - # jobs_from_file = td.load_data - # jobs.extend(jobs_from_file) - elif i == 0: - trigger_custom_dataloader = True - break else: - print("Multiple files given as input.") + trigger_custom_dataloader = True break if trigger_custom_dataloader: # custom data loader diff --git a/raps/ui.py b/raps/ui.py index c88aa3a..7a86a00 100644 --- a/raps/ui.py +++ b/raps/ui.py @@ -9,7 +9,7 @@ from rich.table import Table from rich.live import Live from rich.progress import Progress,TextColumn,BarColumn,TaskProgressColumn,TimeRemainingColumn, track, TimeElapsedColumn, MofNCompleteColumn -from .utils import summarize_ranges, convert_seconds +from .utils import summarize_ranges, convert_seconds_to_hhmmss, convert_seconds_to_hhmm from .constants import ELLIPSES from .engine import TickData, Engine @@ -125,13 +125,13 @@ class LayoutManager: row = [ str(job.id).zfill(5), - convert_seconds(job.wall_time), + convert_seconds_to_hhmm(job.wall_time), str(job.name), str(job.account), job.state.value, str(job.nodes_required), nodes_display, - convert_seconds(job.running_time) + convert_seconds_to_hhmm(job.running_time) ] # Add the row with the 'white' style applied to the whole row table.add_row(*row, style="white") @@ -166,7 +166,7 @@ class LayoutManager: # Add data row with white values row = [ - convert_seconds(time), + convert_seconds_to_hhmmss(time), str(nrun), str(nqueue), str(active_nodes), diff --git a/raps/utils.py b/raps/utils.py index e0ac521..aaceecb 100644 --- a/raps/utils.py +++ b/raps/utils.py @@ -32,7 +32,14 @@ def max_value(values): return max(x[1] for x in values) if values else 0 -def convert_seconds(seconds): +def convert_seconds_to_hhmmss(seconds): + """Convert seconds to time format: 3661s -> 01:01""" + td = timedelta(seconds=seconds) + h, m, s = str(td).split(':') + return f"{h}:{m}:{s}" + + +def convert_seconds_to_hhmm(seconds): """Convert seconds to time format: 3661s -> 01:01""" td = timedelta(seconds=seconds) h, m, _ = str(td).split(':') @@ -435,23 +442,33 @@ def next_arrival(lambda_rate,reset=False, start_time=0): def convert_to_seconds(time_str): - if isinstance(time_str, int): + if isinstance(time_str, (int,float)): return time_str # this happens.... # Define the conversion factors time_factors = { 'd': 86400, # 1 day = 86400 seconds 'h': 3600, # 1 hour = 3600 seconds 'm': 60, # 1 minute = 60 seconds - 's': 1 # 1 second = 1 second + 's': 1, # 1 second = 1 second + '': 1 # empty string = 1 second } # Check if the input string ends with a unit or is purely numeric + # and extract the numeric part and the time unit if time_str[-1].isdigit(): - return int(time_str) # Directly return the number if it's purely numeric + unit = '' + num_str = time_str[:] + else: + unit = time_str[-1] + num_str = time_str[:-1] + + index = num_str.find(".") # convert int or float string + if index != -1: + num = float(num_str) + raise ValueError(f"Float not supported at this time: {num}{unit}") - # Extract the numeric part and the time unit - num = int(time_str[:-1]) - unit = time_str[-1] + else: + num = int(num_str) # Convert to seconds using the conversion factors if unit in time_factors: diff --git a/raps/workload.py b/raps/workload.py index b4e2fdd..f71df76 100644 --- a/raps/workload.py +++ b/raps/workload.py @@ -29,7 +29,7 @@ import random import numpy as np import matplotlib.pyplot as plt from raps.telemetry import Telemetry -from raps.job import job_dict +from raps.job import job_dict, Job from raps.utils import create_file_indexed, create_dir_indexed JOB_NAMES = ["LAMMPS", "GROMACS", "VASP", "Quantum ESPRESSO", "NAMD",\ @@ -174,19 +174,23 @@ class Workload: cpu_trace = cpu_util # self.compute_traces(cpu_util, gpu_util, wall_time, config['TRACE_QUANTA']) gpu_trace = gpu_util # self.compute_traces(cpu_util, gpu_util, wall_time, config['TRACE_QUANTA']) priority = random.randint(0, MAX_PRIORITY) - net_tx, net_rx = [], [] - jobs.append(job_dict(nodes_required=nodes_required, name=name, - account=account, cpu_trace=cpu_trace, - gpu_trace=gpu_trace, ntx_trace=net_tx, - nrx_trace=net_rx, end_state=end_state, - id=job_index, priority=priority, - partition=partition, - submit_time=submit_time, - time_limit=time_limit, - start_time=start_time, - end_time=end_time, - wall_time=wall_time, trace_time=wall_time, - trace_start_time=0, trace_end_time=wall_time)) + net_tx, net_rx = None, None + job_info = job_dict(nodes_required=nodes_required, name=name, + account=account, cpu_trace=cpu_trace, + gpu_trace=gpu_trace, ntx_trace=net_tx, + nrx_trace=net_rx, end_state=end_state, + id=job_index, priority=priority, + partition=partition, + submit_time=submit_time, + time_limit=time_limit, + start_time=start_time, + end_time=end_time, + wall_time=wall_time, trace_time=wall_time, + trace_start_time=0, trace_end_time=wall_time, + trace_quanta=config['TRACE_QUANTA'] + ) + job = Job(job_info) + jobs.append(job) return jobs def synthetic(self, **kwargs): @@ -291,12 +295,12 @@ class Workload: end_state = determine_state(config['JOB_END_PROBS']) cpu_trace, gpu_trace = self.compute_traces(cpu_util, gpu_util, wall_time, config['TRACE_QUANTA']) priority = random.randint(0, MAX_PRIORITY) - net_tx, net_rx = [], [] + net_tx, net_rx = None, None # Jobs arrive according to Poisson process time_to_next_job = next_arrival_byconfargs(config,args) - jobs.append(job_dict(nodes_required=nodes_required, name=name, + job_info = job_dict(nodes_required=nodes_required, name=name, account=account, cpu_trace=cpu_trace, gpu_trace=gpu_trace, ntx_trace=net_tx, nrx_trace=net_rx, end_state=end_state, @@ -307,7 +311,11 @@ class Workload: start_time=time_to_next_job, end_time=time_to_next_job + wall_time, wall_time=wall_time, trace_time=wall_time, - trace_start_time=0, trace_end_time=wall_time)) + trace_start_time=0, trace_end_time=wall_time, + trace_quanta=config['TRACE_QUANTA'] + ) + job = Job(job_info) + jobs.append(job) return jobs def random(self, **kwargs): @@ -328,31 +336,33 @@ class Workload: cpu_util = config['CPUS_PER_NODE'] gpu_util = config['GPUS_PER_NODE'] cpu_trace, gpu_trace = self.compute_traces(cpu_util, gpu_util, 10800, config['TRACE_QUANTA']) - net_tx, net_rx = [], [] + net_tx, net_rx = None, None job_time = len(gpu_trace) * config['TRACE_QUANTA'] # Create job info for this partition - job_info = job_dict( - nodes_required=config['AVAILABLE_NODES'], - scheduled_nodes=[], # Down nodes, therefore doesnt work list(range(config['AVAILABLE_NODES'])), - name=f"Max Test {partition}", - account=ACCT_NAMES[0], - cpu_trace=cpu_trace, - gpu_trace=gpu_trace, - ntx_trace=net_tx, - nrx_trace=net_rx, - end_state='COMPLETED', - id=None, - priority=100, - partition=partition, - time_limit=job_time + 1, - start_time=0, - end_time=job_time, - wall_time=job_time, - trace_time=job_time, - trace_start_time=0, - trace_end_time=job_time) - jobs.append(job_info) # Add job to the list + job_info = job_dict(nodes_required=config['AVAILABLE_NODES'], + scheduled_nodes=[], # Down nodes, therefore doesnt work list(range(config['AVAILABLE_NODES'])), + name=f"Max Test {partition}", + account=ACCT_NAMES[0], + cpu_trace=cpu_trace, + gpu_trace=gpu_trace, + ntx_trace=net_tx, + nrx_trace=net_rx, + end_state='COMPLETED', + id=None, + priority=100, + partition=partition, + time_limit=job_time + 1, + start_time=0, + end_time=job_time, + wall_time=job_time, + trace_time=job_time, + trace_start_time=0, + trace_end_time=job_time, + trace_quanta=config['TRACE_QUANTA'] + ) + job = Job(job_info) + jobs.append(job) # Add job to the list return jobs @@ -366,7 +376,7 @@ class Workload: # Generate traces based on partition-specific configuration cpu_util, gpu_util = 0, 0 cpu_trace, gpu_trace = self.compute_traces(cpu_util, gpu_util, 10800, config['TRACE_QUANTA']) - net_tx, net_rx = [], [] + net_tx, net_rx = None, None job_time = len(gpu_trace) * config['TRACE_QUANTA'] # Create job info for this partition @@ -390,8 +400,10 @@ class Workload: wall_time=job_time, trace_time=job_time, trace_start_time=0, - trace_end_time=job_time) - jobs.append(job_info) # Add job to the list + trace_end_time=job_time, + trace_quanta=config['TRACE_QUANTA']) + job = Job(job_info) + jobs.append(job) # Add job to the list return jobs @@ -405,7 +417,7 @@ class Workload: for partition in self.partitions: # Fetch partition-specific configuration config = self.config_map[partition] - net_tx, net_rx = [], [] + net_tx, net_rx = None, None # Max test cpu_util, gpu_util = 1, 4 @@ -434,8 +446,10 @@ class Workload: trace_time=job_time, trace_start_time=0, trace_end_time=job_time, - trace_missing_values=False) - jobs.append(job_info) + trace_missing_values=False, + trace_quanta=config['TRACE_QUANTA']) + job = Job(job_info) + jobs.append(job) # OpenMxP run cpu_util, gpu_util = 0, 4 @@ -463,8 +477,10 @@ class Workload: trace_time=job_time, trace_start_time=0, trace_end_time=job_time, - trace_missing_values=False) - jobs.append(job_info) + trace_missing_values=False, + trace_quanta=config['TRACE_QUANTA']) + job = Job(job_info) + jobs.append(job) # HPL run cpu_util, gpu_util = 0.33, 0.79 * 4 # based on 24-01-18 run @@ -491,8 +507,10 @@ class Workload: trace_time=job_time, trace_start_time=0, trace_end_time=job_time, - trace_missing_values=False) - jobs.append(job_info) + trace_missing_values=False, + trace_quanta=config['TRACE_QUANTA']) + job = Job(job_info) + jobs.append(job) # Idle test cpu_trace, gpu_trace = self.compute_traces(cpu_util, gpu_util, 3600, config['TRACE_QUANTA']) @@ -518,8 +536,10 @@ class Workload: trace_time=job_time, trace_start_time=0, trace_end_time=job_time, - trace_missing_values=False) - jobs.append(job_info) + trace_missing_values=False, + trace_quanta=config['TRACE_QUANTA']) + job = Job(job_info) + jobs.append(job) return jobs @@ -532,9 +552,9 @@ def plot_job_hist(jobs,config=None,dist_split=None): num_dist = len(dist_split) split = dist_split - y = [y['nodes_required'] for y in jobs] - x = [x['wall_time'] for x in jobs] - x2 = [x['time_limit'] for x in jobs] + y = [y.nodes_required for y in jobs] + x = [x.wall_time for x in jobs] + x2 = [x.time_limit for x in jobs] fig_m = plt.figure() gs = fig_m.add_gridspec(30, 1) gs0 = gs[0:20].subgridspec(500,500,hspace=0,wspace=0) @@ -571,10 +591,10 @@ def plot_job_hist(jobs,config=None,dist_split=None): axs[1][0].scatter(x2, y,marker='.',c='lightblue',zorder=2) axs[1][0].scatter(x, y,zorder=3) - cpu_util = [x['cpu_trace'] for x in jobs] + cpu_util = [x.cpu_trace for x in jobs] if isinstance(cpu_util[0],np.ndarray): cpu_util = np.concatenate(cpu_util).ravel() - gpu_util = [x['gpu_trace'] for x in jobs] + gpu_util = [x.gpu_trace for x in jobs] if isinstance(gpu_util[0],np.ndarray): gpu_util = np.concatenate(gpu_util).ravel() if not all([x == 0 for x in gpu_util]): @@ -628,9 +648,9 @@ def plot_job_hist(jobs,config=None,dist_split=None): axs[1][1].tick_params(axis="y", labelleft=False) # Submit_time and Wall_time - duration = [x['wall_time'] for x in jobs] - nodes_required = [x['nodes_required'] for x in jobs] - submit_t = [x['submit_time'] for x in jobs] + duration = [x.wall_time for x in jobs] + nodes_required = [x.nodes_required for x in jobs] + submit_t = [x.submit_time for x in jobs] offset = 0 split_index = 0 @@ -665,7 +685,7 @@ def plot_job_hist(jobs,config=None,dist_split=None): #ax_b labels: ax_b.set_xlabel("time [hh:mm]") minx_s = 0 - maxx_s = math.ceil(max([x['wall_time'] for x in jobs]) + max([x['submit_time'] for x in jobs])) + maxx_s = math.ceil(max([x.wall_time for x in jobs]) + max([x.submit_time for x in jobs])) x_label_mins = [n for n in np.arange(minx_s // 60, maxx_s // 60)] x_label_ticks = [n * 60 for n in x_label_mins[0::60]] x_label_str = [str(x1).zfill(2) + ":" + str(x2).zfill(2) for -- GitLab From a80ecc8c4232c7e30a715b7a73364cd389436c83 Mon Sep 17 00:00:00 2001 From: Matthias Maiterth Date: Mon, 23 Jun 2025 15:15:24 -0400 Subject: [PATCH 136/388] Additions to changes for data loader returns list of Job objects. --- args.py | 2 +- raps/engine.py | 115 +++++++++++++++++++++---------------- raps/schedulers/default.py | 2 +- raps/telemetry.py | 20 ++++--- raps/ui.py | 38 +++++++----- raps/workload.py | 4 +- 6 files changed, 104 insertions(+), 77 deletions(-) diff --git a/args.py b/args.py index 34e0459..3b5832d 100644 --- a/args.py +++ b/args.py @@ -14,7 +14,7 @@ parser.add_argument('-c', '--cooling', action='store_true', help='Include FMU co # Simulation runtime options parser.add_argument('-ff', '--fastforward', type=str, default=None, help='Fast-forward by time amount (uses same units as -t)') parser.add_argument('-t', '--time', type=str, default=None, help='Length of time to simulate, e.g., 123, 123s, 27m, 3h, 7d') -parser.add_argument("--time-delta", type=str, default=None, help='Time delta for simulation steps, e.g. 15, 15s 1m, 1h, 3d. (Default unit in seconds. If not set "TRACE_QUANTA" is used.)') +parser.add_argument("--time-delta", type=str, default="1s", help='Time delta for simulation steps, e.g. 15, 15s 1m, 1h, 3d. (Default unit in seconds. If not set "TRACE_QUANTA" is used.)') parser.add_argument('-d', '--debug', action='store_true', help='Enable debug mode and disable rich layout') parser.add_argument('-n', '--numjobs', type=int, default=1000, help='Number of jobs to schedule') parser.add_argument('-v', '--verbose', action='store_true', help='Enable verbose output') diff --git a/raps/engine.py b/raps/engine.py index 30909b5..a12254d 100644 --- a/raps/engine.py +++ b/raps/engine.py @@ -86,15 +86,12 @@ class Engine: jobs_to_submit still holds the jobs that need be submitted in the future. """ # Build a list of jobs whose start_time is <= current_time. - eligible = [job for job in jobs_to_submit if job['start_time'] < self.current_time] + eligible_jobs = [] + eligible_jobs[:] = [job for job in jobs_to_submit if job.start_time < self.current_time] # Remove those jobs from jobs_to_submit: - jobs_to_submit[:] = [job for job in jobs_to_submit if job['start_time'] >= self.current_time] + jobs_to_submit[:] = [job for job in jobs_to_submit if job.start_time >= self.current_time] # Convert them to Job instances and build list of eligible jobs. - eligible_jobs_list = [] - for job_data in eligible: - job_instance = Job(job_data) - eligible_jobs_list.append(job_instance) - self.queue += eligible_jobs_list + self.queue += eligible_jobs def add_eligible_jobs_to_queue(self, jobs_to_submit: List): """ @@ -103,23 +100,30 @@ class Engine: Adds eligible jobs to the queueu, and removes them from the jobs_to_submit jobs_to_submit still holds the jobs that need be submitted in the future. + returns + - true if new jobs are present + - false if no new jobs are present """ # Build a list of jobs whose submit_time is <= current_time. - eligible = [job for job in jobs_to_submit if job['submit_time'] <= self.current_time] + + eligible_jobs = [] + eligible_jobs[:] = [job for job in jobs_to_submit if job.submit_time <= self.current_time] # Remove those jobs from jobs_to_submit: - jobs_to_submit[:] = [job for job in jobs_to_submit if job['submit_time'] > self.current_time] + jobs_to_submit[:] = [job for job in jobs_to_submit if job.submit_time > self.current_time] # Convert them to Job instances and build list of eligible jobs. - eligible_jobs_list = [] - for job_data in eligible: - job_instance = Job(job_data) - eligible_jobs_list.append(job_instance) - self.queue += eligible_jobs_list - if eligible_jobs_list != []: + self.queue += eligible_jobs + if eligible_jobs != []: return True else: return False def prepare_timestep(self, replay:bool = True): + + #update Running time + for job in self.running: + if job.state == JobState.RUNNING: + job.running_time = self.current_time - job.start_time + completed_jobs = [job for job in self.running if job.end_time is not None and job.end_time <= self.current_time] for job in completed_jobs: @@ -160,22 +164,24 @@ class Engine: gpu_utils = [] net_utils = [] if self.debug: - print(f"Current Time: {self.current_time}") + print(f"Current Time: {self.current_time}") for job in self.running: if self.debug: print(f"JobID: {job.id}") if job.state == JobState.RUNNING: - job.running_time = self.current_time - job.start_time if job.running_time > job.wall_time: raise Exception(f"Job should have ended already!\n\ - {job.running_time} > {job.wall_time}\n\ - {len(job.cpu_trace)} vs. {job.running_time // self.config['TRACE_QUANTA']}\ + {job.running_time} > {job.wall_time}\ ") - time_quanta_index = int((job.running_time - job.trace_start_time) // self.config['TRACE_QUANTA']) + if job.trace_quanta: + time_quanta_index = int((job.running_time - job.trace_start_time) // job.trace_quanta) + if time_quanta_index < 0: + time_quanta_index = 0 + # If the running time is past the last time step in the # trace, use the last value in the trace. This can # happen if the last valid timesteps is e.g. 17%15, @@ -185,52 +191,62 @@ class Engine: # For every other error condition trace_start_ and # _end_time are used! # #print(type(job.cpu_trace)) - if time_quanta_index < 0: - time_quanta_index = 0 # Similar with the first time_quanta index: If the job started # in the past and no trace if there, read index 0 until values # are available. - if isinstance(job.cpu_trace,list) or isinstance(job.cpu_trace,np.ndarray): - if (isinstance(job.cpu_trace,list) and len(job.cpu_trace)) or (isinstance(job.cpu_trace,np.ndarray) and job.cpu_trace.size == 0): - cpu_util = 0 - elif time_quanta_index < len(job.cpu_trace): + + if (isinstance(job.cpu_trace,list) and job.cpu_trace != []) or \ + (isinstance(job.cpu_trace,np.ndarray) and job.cpu_trace.size != 0): + if time_quanta_index < len(job.cpu_trace): cpu_util = get_utilization(job.cpu_trace, time_quanta_index) else: cpu_util = get_utilization(job.cpu_trace, max(0,len(job.cpu_trace) - 1)) elif isinstance(job.cpu_trace,float) or isinstance(job.cpu_trace,int): cpu_util = job.cpu_trace else: - raise NotImplementedError() + cpu_util = 0 - if isinstance(job.gpu_trace,list) or isinstance(job.gpu_trace,np.ndarray): - if (isinstance(job.gpu_trace,list) and len(job.gpu_trace)) or (isinstance(job.gpu_trace,np.ndarray) and job.gpu_trace.size == 0): - gpu_util = 0 - elif time_quanta_index < len(job.gpu_trace): + if (isinstance(job.gpu_trace,list) and job.gpu_trace != []) or \ + (isinstance(job.gpu_trace,np.ndarray) and job.gpu_trace.size != 0): + if time_quanta_index < len(job.gpu_trace): gpu_util = get_utilization(job.gpu_trace, time_quanta_index) else: gpu_util = get_utilization(job.gpu_trace, max(0,len(job.gpu_trace) - 1)) elif isinstance(job.gpu_trace,float) or isinstance(job.gpu_trace,int): gpu_util = job.gpu_trace else: - raise NotImplementedError() - - net_util = 0 - - if (isinstance(job.ntx_trace,list) or isinstance(job.ntx_trace,np.ndarray)) and len(job.ntx_trace) and (isinstance(job.nrx_trace,list) or isinstance(job.nrx_trace,list)) and len(job.nrx_trace): - net_tx = get_utilization(job.ntx_trace, time_quanta_index) - net_rx = get_utilization(job.nrx_trace, time_quanta_index) + gpu_util = 0 + + if (((isinstance(job.ntx_trace,list) and job.ntx_trace != []) or \ + (isinstance(job.ntx_trace,np.ndarray) and job.ntx_trace.size != 0)) \ + and \ + ((isinstance(job.nrx_trace,list) and job.nrx_trace != []) or \ + (isinstance(job.nrx_trace,np.ndarray) and job.nrx_trace.size != 0))): + if time_quanta_index < len(job.ntx_trace): + net_tx = get_utilization(job.ntx_trace, time_quanta_index) + else: + net_tx = get_utilization(job.ntx_trace, max(0,len(job.ntx_trace) - 1)) + if time_quanta_index < len(job.nrx_trace): + net_rx = get_utilization(job.nrx_trace, time_quanta_index) + else: + net_rx = get_utilization(job.nrx_trace, max(0,len(job.nrx_trace) - 1)) + net_util = network_utilization(net_tx, net_rx) + elif (isinstance(job.ntx_trace,float) or isinstance(job.ntx_trace,int)) and \ + (isinstance(job.nrx_trace,float) or isinstance(job.nrx_trace,int)): + net_tx = job.ntx_trace + net_rx = job.nrx_trace net_util = network_utilization(net_tx, net_rx) - net_utils.append(net_util) else: - net_utils.append(0) + net_util = 0 scheduled_nodes.append(job.scheduled_nodes) # ? cpu_utils.append(cpu_util) gpu_utils.append(gpu_util) + net_utils.append(net_util) else: raise ValueError(f"Job is in running list, but state is not RUNNING: job.state == {job.state}") - if len(scheduled_nodes) > 0: + if len(scheduled_nodes) > 0: # When can this not happen? self.flops_manager.update_flop_state(scheduled_nodes, cpu_utils, gpu_utils) jobs_power = self.power_manager.update_power_state(scheduled_nodes, cpu_utils, gpu_utils, net_utils) @@ -313,9 +329,9 @@ class Engine: self.current_time = timestep_start # Keep only jobs that have not yet ended and that have a chance to start - all_jobs[:] = [job for job in all_jobs if job['end_time'] >= timestep_start and job['submit_time'] < timestep_end] + all_jobs[:] = [job for job in all_jobs if job.end_time >= timestep_start and job.submit_time < timestep_end] - all_jobs.sort(key=lambda j: j['submit_time']) + all_jobs.sort(key=lambda j: j.submit_time) self.add_running_jobs_to_queue(all_jobs) # Set policy to replay and no backfill to get the original prefilled placement. @@ -350,14 +366,14 @@ class Engine: all_jobs = jobs.copy() jobs = [] # Batch Jobs into 6h windows based on submit_time or twice the time_delta if larger - batch_window = max(60 * 60 * 6, 2 * time_delta) # 6h + batch_window = max(60 * 60 * 6, 2 * time_delta) # at least 6h - for timestep in range(timestep_start,timestep_end): + for timestep in range(timestep_start,timestep_end): # Runs every seconds! if (timestep % batch_window == 0) or (timestep == timestep_start): # Add jobs that are within the batching window and remove them from all jobs - jobs += [job for job in all_jobs if job['submit_time'] <= timestep + batch_window] - all_jobs[:] = [job for job in all_jobs if job['submit_time'] > timestep + batch_window] + jobs += [job for job in all_jobs if job.submit_time <= timestep + batch_window] + all_jobs[:] = [job for job in all_jobs if job.submit_time > timestep + batch_window] # Start Siulation loop: # 1. Cleanup old jobs @@ -366,7 +382,8 @@ class Engine: # 2. Identify eligible jobs and add them to the queue. has_new_additions = self.add_eligible_jobs_to_queue(jobs) # 3. Schedule jobs that are now in the queue. - self.scheduler.schedule(self.queue, self.running, self.current_time,accounts=self.accounts, sorted=(not has_new_additions)) + if completed_jobs != [] or newly_downed_nodes != [] or has_new_additions: + self.scheduler.schedule(self.queue, self.running, self.current_time,accounts=self.accounts, sorted=(not has_new_additions)) # Stop the simulation if no more jobs are running or in the queue or in the job list. if autoshutdown and not self.queue and not self.running and not self.replay and not all_jobs and not jobs: @@ -376,7 +393,7 @@ class Engine: if self.debug and timestep % self.config['UI_UPDATE_FREQ'] == 0: print(".", end="", flush=True) - if 0 == timestep % time_delta: + if 0 == (timestep % time_delta): tick_data = self.tick(time_delta) tick_data.completed = completed_jobs yield tick_data diff --git a/raps/schedulers/default.py b/raps/schedulers/default.py index d1e82ac..e77036b 100644 --- a/raps/schedulers/default.py +++ b/raps/schedulers/default.py @@ -52,7 +52,7 @@ class Scheduler: if nodes_available: self.place_job_and_manage_queues(job, queue, running, current_time) else: # In case the job was not placed, see how we should continue: - if self.bfpolicy.value is not None: + if self.bfpolicy is not None or self.bfpolicy is not BackfillType.NONE: self.backfill(queue, running, current_time) # After backfill dedice continue processing the queue or wait, continuing may result in fairness issues. diff --git a/raps/telemetry.py b/raps/telemetry.py index cd28416..c42da2e 100644 --- a/raps/telemetry.py +++ b/raps/telemetry.py @@ -76,16 +76,15 @@ class Telemetry: - args, which were used to generate the loaded snapshot """ data = np.load(snapshot, allow_pickle=True, mmap_mode='r') - job_data = data['jobs'].tolist() - jobs = [] - for job_info in job_data: - job = Job(job_info) - jobs.append(job) + jobs = data['jobs'].tolist() + timestep_start = int(data['timestep_start']) + timestep_end = int(data['timestep_end']) + args_from_file = data['args'].tolist() return jobs, \ - int(data['timestep_start']), \ - int(data['timestep_end']), \ - data['args'].tolist() + timestep_start, \ + timestep_end, \ + args_from_file def load_csv_results(self, file): jobs = [] @@ -233,7 +232,10 @@ class Telemetry: self.dirname = create_casename() print(*args.replay) - jobs, timestep_start_from_data, timestep_end_from_data = self.load_data(args.replay) + try: + jobs, timestep_start_from_data, timestep_end_from_data = self.load_data(args.replay) + except AssertionError: + raise ValueError("Forgot --is-results-file ?") timestep_start = min(timestep_start, timestep_start_from_data) timestep_end = max(timestep_end, timestep_end_from_data) self.save_snapshot(jobs=jobs, diff --git a/raps/ui.py b/raps/ui.py index 7a86a00..e9091dc 100644 --- a/raps/ui.py +++ b/raps/ui.py @@ -8,6 +8,7 @@ from rich.panel import Panel from rich.table import Table from rich.live import Live from rich.progress import Progress,TextColumn,BarColumn,TaskProgressColumn,TimeRemainingColumn, track, TimeElapsedColumn, MofNCompleteColumn +from contextlib import nullcontext from .utils import summarize_ranges, convert_seconds_to_hhmmss, convert_seconds_to_hhmm from .constants import ELLIPSES @@ -40,19 +41,20 @@ class LayoutManager: self.progress_task = self.progress.add_task("Progress",total=total_timesteps, name="Progress") def setup_layout(self, layout_type): - self.layout.split_column(Layout(name="main"),Layout(name="progress",size=1)) - if layout_type == "layout2": - self.layout["main"].split_row(Layout(name="left", ratio=3), Layout(name="right", ratio=2)) - self.layout["main"]["left"].split_column( - Layout(name="pressflow", ratio=6), - Layout(name="powertemp", ratio=11), - Layout(name="totpower", ratio=3), - ) - self.layout["main"]["right"].split(Layout(name="scheduled", ratio=17), Layout(name="status", ratio=3)) - else: - self.layout["main"].split_row(Layout(name="left", ratio=1), Layout(name="right", ratio=1)) - self.layout["main"]["left"].split_column(Layout(name="upper", ratio=8), Layout(name="lower", ratio=2)) - self.layout["main"]["right"].split_column(Layout(name="scheduled", ratio=8), Layout(name="status", ratio=2)) + if not self.debug: + self.layout.split_column(Layout(name="main"),Layout(name="progress",size=1)) + if layout_type == "layout2": + self.layout["main"].split_row(Layout(name="left", ratio=3), Layout(name="right", ratio=2)) + self.layout["main"]["left"].split_column( + Layout(name="pressflow", ratio=6), + Layout(name="powertemp", ratio=11), + Layout(name="totpower", ratio=3), + ) + self.layout["main"]["right"].split(Layout(name="scheduled", ratio=17), Layout(name="status", ratio=3)) + else: + self.layout["main"].split_row(Layout(name="left", ratio=1), Layout(name="right", ratio=1)) + self.layout["main"]["left"].split_column(Layout(name="upper", ratio=8), Layout(name="lower", ratio=2)) + self.layout["main"]["right"].split_column(Layout(name="scheduled", ratio=8), Layout(name="status", ratio=2)) def create_table(self, title, columns, header_style="bold green"): """ @@ -401,6 +403,8 @@ class LayoutManager: self.layout["progress"].update(self.progress.get_renderable()) def update(self, data: TickData, time_delta=1): + if self.debug: + return uncertainties = self.engine.power_manager.uncertainties if data.current_time % self.config['UI_UPDATE_FREQ'] == 0: @@ -431,8 +435,12 @@ class LayoutManager: def run(self, jobs, timestep_start, timestep_end, time_delta): """ Runs the UI, blocking until the simulation is complete """ - with Live(self.layout, refresh_per_second=5): - for data in self.engine.run_simulation(jobs, timestep_start, timestep_end, time_delta): + if not self.debug: + context = Live(self.layout, refresh_per_second=5) + else: + context = nullcontext() + with context: + for data in self.engine.run_simulation(jobs, timestep_start, timestep_end, time_delta, autoshutdown=True): if data: self.update(data,time_delta) diff --git a/raps/workload.py b/raps/workload.py index f71df76..184bfae 100644 --- a/raps/workload.py +++ b/raps/workload.py @@ -767,8 +767,8 @@ if __name__ == "__main__": jobs = getattr(workload, args.workload)(args=args) plot_job_hist(jobs, config=config, dist_split=args.multimodal) if args.output: - timestep_start = min([x['submit_time'] for x in jobs]) - timestep_end = math.ceil(max([x['submit_time'] for x in jobs]) + max([x['wall_time'] for x in jobs])) + timestep_start = min([x.submit_time for x in jobs]) + timestep_end = math.ceil(max([x.submit_time for x in jobs]) + max([x.wall_time for x in jobs])) filename = create_file_indexed('wl',create=False,ending="npz").split(".npz")[0] # savez_compressed add npz itself, but create_file_indexed needs to check for .npz to find existing files np.savez_compressed(filename,jobs=jobs,timestep_start=timestep_start, timestep_end=timestep_end, args=args) -- GitLab From b2c74eddb6d91580f73d0edae14adacf2dca6386 Mon Sep 17 00:00:00 2001 From: Matthias Maiterth Date: Tue, 24 Jun 2025 12:23:24 -0400 Subject: [PATCH 137/388] Flag --time-delta is working now. With this change: the simulation runs from time start to time end. - in each timestep, - we prepare the timestep - identify eligible jobs to schedule - identify if the scheduler needs to run - run a tick if the timestep is a multiple of the time-delta - complete the timestep. (Update running time of all jobs running and the reference time for the simulation. - tick now only runs at each time-delta: tick runs the 'physical' simulation - the values computed at each computed timestep are exactly the same no matter the time-delta. - Statistics of the power measurements are not equal, as the couarser time-deltas may lead to aliasing --- args.py | 3 ++- raps/engine.py | 66 ++++++++++++++++++++++++++++++----------------- raps/power.py | 10 ++++--- raps/telemetry.py | 11 +++++--- 4 files changed, 57 insertions(+), 33 deletions(-) diff --git a/args.py b/args.py index 3b5832d..c8df071 100644 --- a/args.py +++ b/args.py @@ -14,7 +14,8 @@ parser.add_argument('-c', '--cooling', action='store_true', help='Include FMU co # Simulation runtime options parser.add_argument('-ff', '--fastforward', type=str, default=None, help='Fast-forward by time amount (uses same units as -t)') parser.add_argument('-t', '--time', type=str, default=None, help='Length of time to simulate, e.g., 123, 123s, 27m, 3h, 7d') -parser.add_argument("--time-delta", type=str, default="1s", help='Time delta for simulation steps, e.g. 15, 15s 1m, 1h, 3d. (Default unit in seconds. If not set "TRACE_QUANTA" is used.)') +#parser.add_argument("--time-delta", type=str, default=None, help='Time delta for simulation steps, e.g. 15, 15s 1m, 1h, 3d. (Default unit in seconds. If not set "TRACE_QUANTA" is used.)') # This seems sensible, but 1s is the previous default before introducing this change! +parser.add_argument("--time-delta", type=str, default="1s", help='Time delta for simulation steps, e.g. 15, 15s 1m, 1h, 3d. (Default unit in seconds. Default value: 1s.)') parser.add_argument('-d', '--debug', action='store_true', help='Enable debug mode and disable rich layout') parser.add_argument('-n', '--numjobs', type=int, default=1000, help='Number of jobs to schedule') parser.add_argument('-v', '--verbose', action='store_true', help='Enable verbose output') diff --git a/raps/engine.py b/raps/engine.py index a12254d..b8de496 100644 --- a/raps/engine.py +++ b/raps/engine.py @@ -86,8 +86,7 @@ class Engine: jobs_to_submit still holds the jobs that need be submitted in the future. """ # Build a list of jobs whose start_time is <= current_time. - eligible_jobs = [] - eligible_jobs[:] = [job for job in jobs_to_submit if job.start_time < self.current_time] + eligible_jobs = [job for job in jobs_to_submit if job.start_time < self.current_time] # Remove those jobs from jobs_to_submit: jobs_to_submit[:] = [job for job in jobs_to_submit if job.start_time >= self.current_time] # Convert them to Job instances and build list of eligible jobs. @@ -105,9 +104,7 @@ class Engine: - false if no new jobs are present """ # Build a list of jobs whose submit_time is <= current_time. - - eligible_jobs = [] - eligible_jobs[:] = [job for job in jobs_to_submit if job.submit_time <= self.current_time] + eligible_jobs = [job for job in jobs_to_submit if job.submit_time <= self.current_time] # Remove those jobs from jobs_to_submit: jobs_to_submit[:] = [job for job in jobs_to_submit if job.submit_time > self.current_time] # Convert them to Job instances and build list of eligible jobs. @@ -117,15 +114,16 @@ class Engine: else: return False - def prepare_timestep(self, replay:bool = True): - #update Running time - for job in self.running: - if job.state == JobState.RUNNING: - job.running_time = self.current_time - job.start_time - completed_jobs = [job for job in self.running if job.end_time is not None and job.end_time <= self.current_time] + def prepare_timestep(self, replay:bool = True): + # 1 identify completed jobs + # 2 Simulate node failure # Defunct feature! + # 3 Update active and free nodes + # Identify Completed Jobs + completed_jobs = [job for job in self.running if job.end_time is not None and job.end_time <= self.current_time] + # Update Completed Jobs, their account and and Free resources. for job in completed_jobs: self.power_manager.set_idle(job.scheduled_nodes) job.state = JobState.COMPLETED @@ -155,6 +153,26 @@ class Engine: return completed_jobs, newly_downed_nodes + def complete_timestep(self, autoshutdown, all_jobs:List, jobs:List): + # 1 update running time of all running jobs + # 2 update the current_time of the engine (this serves as reference for most computations) + # 3 Check if simulation should shutdown + + #update Running time + for job in self.running: + if job.state == JobState.RUNNING: + job.running_time = self.current_time - job.start_time + + self.current_time += 1 # Update the current time every timestep + + # Stop the simulation if no more jobs are running or in the queue or in the job list. + if autoshutdown and not self.queue and not self.running and not self.replay and not all_jobs and not jobs: + print(f"[DEBUG] {self.config['system_name']} - Stopping simulation at time {self.current_time}") + simulation_complete = True + else: + simulation_complete = False + return simulation_complete + def tick(self,time_delta=1): """Simulate a timestep.""" @@ -256,7 +274,7 @@ class Engine: for i, job in enumerate(_running_jobs): if job.running_time % self.config['TRACE_QUANTA'] == 0: job.power_history.append(jobs_power[i] * len(job.scheduled_nodes)) - del _running_jobs + #del _running_jobs # Update the power array UI component rack_power, rect_losses = self.power_manager.compute_rack_power() @@ -274,8 +292,8 @@ class Engine: power_df = None cooling_inputs, cooling_outputs = None, None - # Update power history every 15s - if self.current_time % self.config['POWER_UPDATE_FREQ'] == 0: + # If time_delta is 1 update power history every 15s, otherwise whenever tick runs + if (time_delta == 1 and self.current_time % self.config['POWER_UPDATE_FREQ'] == 0) or time_delta != 1: total_power_kw = sum(row[-1] for row in rack_power) + self.config['NUM_CDUS'] * self.config['POWER_CDU'] / 1000.0 total_loss_kw = sum(row[-1] for row in rack_loss) self.power_manager.history.append((self.current_time, total_power_kw)) @@ -286,7 +304,7 @@ class Engine: else: pflops, gflop_per_watt = None, None - if self.current_time % self.config['POWER_UPDATE_FREQ'] == 0: + if (time_delta == 1 and self.current_time % self.config['POWER_UPDATE_FREQ'] == 0) or time_delta != 1: if self.cooling_model: # Power for NUM_CDUS (25 for Frontier) cdu_power = rack_power.T[-1] * 1000 @@ -321,7 +339,6 @@ class Engine: num_free_nodes=self.num_free_nodes, ) - self.current_time += time_delta return tick_data def prepare_system_state(self, all_jobs:List, timestep_start, timestep_end, replay:bool): @@ -375,8 +392,7 @@ class Engine: jobs += [job for job in all_jobs if job.submit_time <= timestep + batch_window] all_jobs[:] = [job for job in all_jobs if job.submit_time > timestep + batch_window] - # Start Siulation loop: - # 1. Cleanup old jobs + # 1. Prepare Timestep: completed_jobs, newly_downed_nodes = self.prepare_timestep(replay) # 2. Identify eligible jobs and add them to the queue. @@ -385,20 +401,22 @@ class Engine: if completed_jobs != [] or newly_downed_nodes != [] or has_new_additions: self.scheduler.schedule(self.queue, self.running, self.current_time,accounts=self.accounts, sorted=(not has_new_additions)) - # Stop the simulation if no more jobs are running or in the queue or in the job list. - if autoshutdown and not self.queue and not self.running and not self.replay and not all_jobs and not jobs: - print(f"[DEBUG] {self.config['system_name']} - Stopping simulation at time {self.current_time}") - break if self.debug and timestep % self.config['UI_UPDATE_FREQ'] == 0: print(".", end="", flush=True) + # 4. Run tick only at specified time_delta if 0 == (timestep % time_delta): tick_data = self.tick(time_delta) tick_data.completed = completed_jobs - yield tick_data else: - yield None + tick_data = None + + # 5. Complete the timestep + simulation_done = self.complete_timestep(autoshutdown, all_jobs, jobs) + if simulation_done: + break + yield tick_data def get_job_history_dict(self): return self.job_history_dict diff --git a/raps/power.py b/raps/power.py index a7f3fbb..fb09828 100644 --- a/raps/power.py +++ b/raps/power.py @@ -164,6 +164,7 @@ class PowerManager: - down_nodes: Nodes that are currently down. - down_rack: Rack number of down nodes. """ + def __init__(self, power_func=compute_node_power, **config): """ Initialize the PowerManager object. @@ -189,7 +190,8 @@ class PowerManager: if power_func in [compute_node_power_uncertainties, \ compute_node_power_validate_uncertainties]: self.uncertainties = True - if self.down_nodes: self.apply_down_nodes() + if self.down_nodes: + self.apply_down_nodes() def get_peak_power(self): """Estimate peak power of system for setting max value of gauges in dashboard""" @@ -197,7 +199,7 @@ class PowerManager: blades_per_rectifier = self.config['BLADES_PER_CHASSIS'] / self.config['RECTIFIERS_PER_CHASSIS'] rectifier_load = blades_per_rectifier * self.config['NODES_PER_BLADE'] * node_power rectifier_power = compute_loss(rectifier_load, self.config['RECTIFIER_LOSS_CONSTANT'], \ - self.config['RECTIFIER_EFFICIENCY']) # with AC-DC conversion losses + self.config['RECTIFIER_EFFICIENCY']) # with AC-DC conversion losses chassis_power = self.config['BLADES_PER_CHASSIS'] * rectifier_power / blades_per_rectifier \ + self.config['SWITCHES_PER_CHASSIS'] * self.config['POWER_SWITCH'] rack_power = chassis_power * self.config['CHASSIS_PER_RACK'] @@ -335,8 +337,8 @@ class PowerManager: power_per_rectifier = chassis_power[i, j, k] / num_rectifiers rectifier_power[i, j, k, :num_rectifiers] = power_per_rectifier power_with_losses[i, j, k, :num_rectifiers] = compute_loss(power_per_rectifier, \ - self.config['RECTIFIER_LOSS_CONSTANT'], \ - self.config['RECTIFIER_EFFICIENCY']) + self.config['RECTIFIER_LOSS_CONSTANT'], \ + self.config['RECTIFIER_EFFICIENCY']) rectifier_power = np.nan_to_num(rectifier_power) power_with_losses = np.nan_to_num(power_with_losses) diff --git a/raps/telemetry.py b/raps/telemetry.py index c42da2e..13c91cc 100644 --- a/raps/telemetry.py +++ b/raps/telemetry.py @@ -60,10 +60,10 @@ class Telemetry: def save_snapshot(self,*, jobs: list, timestep_start, timestep_end, args, filename: str): """Saves a snapshot of the jobs to a compressed file. """ - array_of_job_dicts = [] + list_of_job_dicts = [] for job in jobs: - array_of_job_dicts.append(job.__dict__) - np.savez_compressed(filename, jobs=array_of_job_dicts, timestep_start=timestep_start, timestep_end=timestep_end, args=args) + list_of_job_dicts.append(job.__dict__) + np.savez_compressed(filename, jobs=list_of_job_dicts, timestep_start=timestep_start, timestep_end=timestep_end, args=args) def load_snapshot(self, snapshot: str) -> list: """Reads a snapshot from a compressed file and return 4 values: joblist, timestep_start, timestep_end and args. @@ -76,7 +76,10 @@ class Telemetry: - args, which were used to generate the loaded snapshot """ data = np.load(snapshot, allow_pickle=True, mmap_mode='r') - jobs = data['jobs'].tolist() + jobs = [] + list_of_job_dicts = data['jobs'].tolist() + for job_info in list_of_job_dicts: + jobs.append(Job(job_info)) timestep_start = int(data['timestep_start']) timestep_end = int(data['timestep_end']) args_from_file = data['args'].tolist() -- GitLab From cc9fe9ba272f629f2cb3e99c2998a96b72cd9709 Mon Sep 17 00:00:00 2001 From: Wes Brewer Date: Mon, 30 Jun 2025 16:45:17 -0400 Subject: [PATCH 138/388] Initial implemention of MIT supercloud - running but no jobs showing --- config/mit_supercloud/power.json | 18 + config/mit_supercloud/scheduler.json | 18 + config/mit_supercloud/system.json | 20 + config/mit_supercloud_gpu/part-cpu/power.json | 18 + .../part-cpu/scheduler.json | 18 + .../mit_supercloud_gpu/part-cpu/system.json | 20 + config/mit_supercloud_gpu/part-gpu/power.json | 18 + .../part-gpu/scheduler.json | 18 + .../mit_supercloud_gpu/part-gpu/system.json | 20 + raps/dataloaders/mit_supercloud.py | 52 ++ raps/dataloaders/mit_supercloud/README.md | 11 + .../mit_supercloud/create_trace.py | 452 ++++++++++++++++++ .../mit_supercloud/dist/anal_data.py | 36 ++ .../mit_supercloud/dist/create_trace.py | 382 +++++++++++++++ .../mit_supercloud/dist/download_data.py | 156 ++++++ .../mit_supercloud/dist/node_data_anal.py | 41 ++ .../mit_supercloud/dist/parse_mit_data.py | 81 ++++ .../mit_supercloud/dist/readme.txt | 21 + raps/dataloaders/mit_supercloud/dist/setup.py | 172 +++++++ .../mit_supercloud/generate_local_metadata.py | 114 +++++ raps/dataloaders/mit_supercloud/setup.py | 191 ++++++++ 21 files changed, 1877 insertions(+) create mode 100644 config/mit_supercloud/power.json create mode 100644 config/mit_supercloud/scheduler.json create mode 100644 config/mit_supercloud/system.json create mode 100644 config/mit_supercloud_gpu/part-cpu/power.json create mode 100644 config/mit_supercloud_gpu/part-cpu/scheduler.json create mode 100644 config/mit_supercloud_gpu/part-cpu/system.json create mode 100644 config/mit_supercloud_gpu/part-gpu/power.json create mode 100644 config/mit_supercloud_gpu/part-gpu/scheduler.json create mode 100644 config/mit_supercloud_gpu/part-gpu/system.json create mode 100644 raps/dataloaders/mit_supercloud.py create mode 100644 raps/dataloaders/mit_supercloud/README.md create mode 100644 raps/dataloaders/mit_supercloud/create_trace.py create mode 100644 raps/dataloaders/mit_supercloud/dist/anal_data.py create mode 100644 raps/dataloaders/mit_supercloud/dist/create_trace.py create mode 100644 raps/dataloaders/mit_supercloud/dist/download_data.py create mode 100644 raps/dataloaders/mit_supercloud/dist/node_data_anal.py create mode 100644 raps/dataloaders/mit_supercloud/dist/parse_mit_data.py create mode 100644 raps/dataloaders/mit_supercloud/dist/readme.txt create mode 100644 raps/dataloaders/mit_supercloud/dist/setup.py create mode 100644 raps/dataloaders/mit_supercloud/generate_local_metadata.py create mode 100644 raps/dataloaders/mit_supercloud/setup.py diff --git a/config/mit_supercloud/power.json b/config/mit_supercloud/power.json new file mode 100644 index 0000000..5128c4c --- /dev/null +++ b/config/mit_supercloud/power.json @@ -0,0 +1,18 @@ +{ + "POWER_GPU_IDLE": 88, + "POWER_GPU_MAX": 560, + "POWER_CPU_IDLE": 90, + "POWER_CPU_MAX": 280, + "POWER_MEM": 74.26, + "POWER_NVME": 30, + "POWER_NIC": 20, + "POWER_CDU": 8473.47, + "POWER_SWITCH": 250, + "POWER_UPDATE_FREQ": 15, + "RECTIFIER_PEAK_THRESHOLD": 13670, + "SIVOC_LOSS_CONSTANT": 13, + "SIVOC_EFFICIENCY": 0.98, + "RECTIFIER_LOSS_CONSTANT": 17, + "RECTIFIER_EFFICIENCY": 0.96, + "POWER_COST": 0.094 +} diff --git a/config/mit_supercloud/scheduler.json b/config/mit_supercloud/scheduler.json new file mode 100644 index 0000000..0ea905d --- /dev/null +++ b/config/mit_supercloud/scheduler.json @@ -0,0 +1,18 @@ +{ + "SEED": 42, + "JOB_ARRIVAL_TIME": 900, + "MTBF": 11, + "MAX_TIME": 88200, + "TRACE_QUANTA": 20, + "MIN_WALL_TIME": 3600, + "MAX_WALL_TIME": 43200, + "UI_UPDATE_FREQ": 900, + "MAX_NODES_PER_JOB": 3000, + "JOB_END_PROBS": { + "COMPLETED": 0.63, + "FAILED": 0.13, + "CANCELLED": 0.12, + "TIMEOUT": 0.11, + "NODE_FAIL": 0.01 + } +} diff --git a/config/mit_supercloud/system.json b/config/mit_supercloud/system.json new file mode 100644 index 0000000..6a1af50 --- /dev/null +++ b/config/mit_supercloud/system.json @@ -0,0 +1,20 @@ +{ + "NUM_CDUS": 12, + "RACKS_PER_CDU": 1, + "NODES_PER_RACK": 40, + "RECTIFIERS_PER_RACK": 32, + "CHASSIS_PER_RACK": 8, + "NODES_PER_BLADE": 1, + "SWITCHES_PER_CHASSIS": 4, + "NICS_PER_NODE": 4, + "RECTIFIERS_PER_CHASSIS": 4, + "NODES_PER_RECTIFIER": 4, + "MISSING_RACKS": [], + "DOWN_NODES": [], + "CPUS_PER_NODE": 2, + "GPUS_PER_NODE": 0, + "CPU_PEAK_FLOPS": 2.9952E12, + "GPU_PEAK_FLOPS": 0, + "CPU_FP_RATIO": 0.667, + "GPU_FP_RATIO": 0.667 +} diff --git a/config/mit_supercloud_gpu/part-cpu/power.json b/config/mit_supercloud_gpu/part-cpu/power.json new file mode 100644 index 0000000..5128c4c --- /dev/null +++ b/config/mit_supercloud_gpu/part-cpu/power.json @@ -0,0 +1,18 @@ +{ + "POWER_GPU_IDLE": 88, + "POWER_GPU_MAX": 560, + "POWER_CPU_IDLE": 90, + "POWER_CPU_MAX": 280, + "POWER_MEM": 74.26, + "POWER_NVME": 30, + "POWER_NIC": 20, + "POWER_CDU": 8473.47, + "POWER_SWITCH": 250, + "POWER_UPDATE_FREQ": 15, + "RECTIFIER_PEAK_THRESHOLD": 13670, + "SIVOC_LOSS_CONSTANT": 13, + "SIVOC_EFFICIENCY": 0.98, + "RECTIFIER_LOSS_CONSTANT": 17, + "RECTIFIER_EFFICIENCY": 0.96, + "POWER_COST": 0.094 +} diff --git a/config/mit_supercloud_gpu/part-cpu/scheduler.json b/config/mit_supercloud_gpu/part-cpu/scheduler.json new file mode 100644 index 0000000..0ea905d --- /dev/null +++ b/config/mit_supercloud_gpu/part-cpu/scheduler.json @@ -0,0 +1,18 @@ +{ + "SEED": 42, + "JOB_ARRIVAL_TIME": 900, + "MTBF": 11, + "MAX_TIME": 88200, + "TRACE_QUANTA": 20, + "MIN_WALL_TIME": 3600, + "MAX_WALL_TIME": 43200, + "UI_UPDATE_FREQ": 900, + "MAX_NODES_PER_JOB": 3000, + "JOB_END_PROBS": { + "COMPLETED": 0.63, + "FAILED": 0.13, + "CANCELLED": 0.12, + "TIMEOUT": 0.11, + "NODE_FAIL": 0.01 + } +} diff --git a/config/mit_supercloud_gpu/part-cpu/system.json b/config/mit_supercloud_gpu/part-cpu/system.json new file mode 100644 index 0000000..a7c5330 --- /dev/null +++ b/config/mit_supercloud_gpu/part-cpu/system.json @@ -0,0 +1,20 @@ +{ + "NUM_CDUS": 1, + "RACKS_PER_CDU": 1, + "NODES_PER_RACK": 480, + "RECTIFIERS_PER_RACK": 32, + "CHASSIS_PER_RACK": 8, + "NODES_PER_BLADE": 4, + "SWITCHES_PER_CHASSIS": 4, + "NICS_PER_NODE": 4, + "RECTIFIERS_PER_CHASSIS": 4, + "NODES_PER_RECTIFIER": 4, + "MISSING_RACKS": [], + "DOWN_NODES": [], + "CPUS_PER_NODE": 2, + "GPUS_PER_NODE": 0, + "CPU_PEAK_FLOPS": 2.9952E12, + "GPU_PEAK_FLOPS": 0, + "CPU_FP_RATIO": 0.667, + "GPU_FP_RATIO": 0.667 +} diff --git a/config/mit_supercloud_gpu/part-gpu/power.json b/config/mit_supercloud_gpu/part-gpu/power.json new file mode 100644 index 0000000..725b9fe --- /dev/null +++ b/config/mit_supercloud_gpu/part-gpu/power.json @@ -0,0 +1,18 @@ +{ + "POWER_GPU_IDLE": 75, + "POWER_GPU_MAX": 300, + "POWER_CPU_IDLE": 90, + "POWER_CPU_MAX": 280, + "POWER_MEM": 74.26, + "POWER_NVME": 30, + "POWER_NIC": 20, + "POWER_CDU": 8473.47, + "POWER_SWITCH": 250, + "POWER_UPDATE_FREQ": 15, + "RECTIFIER_PEAK_THRESHOLD": 13670, + "SIVOC_LOSS_CONSTANT": 13, + "SIVOC_EFFICIENCY": 0.98, + "RECTIFIER_LOSS_CONSTANT": 17, + "RECTIFIER_EFFICIENCY": 0.96, + "POWER_COST": 0.094 +} diff --git a/config/mit_supercloud_gpu/part-gpu/scheduler.json b/config/mit_supercloud_gpu/part-gpu/scheduler.json new file mode 100644 index 0000000..937b71d --- /dev/null +++ b/config/mit_supercloud_gpu/part-gpu/scheduler.json @@ -0,0 +1,18 @@ +{ + "SEED": 42, + "JOB_ARRIVAL_TIME": 900, + "MTBF": 11, + "MAX_TIME": 88200, + "TRACE_QUANTA": 20, + "MIN_WALL_TIME": 3600, + "MAX_WALL_TIME": 43200, + "UI_UPDATE_FREQ": 900, + "MAX_NODES_PER_JOB": 192, + "JOB_END_PROBS": { + "COMPLETED": 0.63, + "FAILED": 0.13, + "CANCELLED": 0.12, + "TIMEOUT": 0.11, + "NODE_FAIL": 0.01 + } +} diff --git a/config/mit_supercloud_gpu/part-gpu/system.json b/config/mit_supercloud_gpu/part-gpu/system.json new file mode 100644 index 0000000..3c38e53 --- /dev/null +++ b/config/mit_supercloud_gpu/part-gpu/system.json @@ -0,0 +1,20 @@ +{ + "NUM_CDUS": 1, + "RACKS_PER_CDU": 1, + "NODES_PER_RACK": 224, + "RECTIFIERS_PER_RACK": 32, + "CHASSIS_PER_RACK": 8, + "NODES_PER_BLADE": 2, + "SWITCHES_PER_CHASSIS": 4, + "NICS_PER_NODE": 4, + "RECTIFIERS_PER_CHASSIS": 4, + "NODES_PER_RECTIFIER": 4, + "MISSING_RACKS": [], + "DOWN_NODES": [], + "CPUS_PER_NODE": 2, + "GPUS_PER_NODE": 2, + "CPU_PEAK_FLOPS": 1.248E12, + "GPU_PEAK_FLOPS": 7.8E12, + "CPU_FP_RATIO": 0.667, + "GPU_FP_RATIO": 0.667 +} diff --git a/raps/dataloaders/mit_supercloud.py b/raps/dataloaders/mit_supercloud.py new file mode 100644 index 0000000..6679c29 --- /dev/null +++ b/raps/dataloaders/mit_supercloud.py @@ -0,0 +1,52 @@ + +import numpy as np +import os +from raps.job import job_dict + +def load_data(file_path, **kwargs): + """ + Loads MIT Supercloud data from a pickled file and transforms it into a list of job dictionaries. + + Args: + file_path (str): The path to the pickled data file. + + Returns: + list: A list of job dictionaries. + """ + with np.load(file_path, allow_pickle=True) as data: + jobs_data = data['jobs'] + + jobs = [] + for job_dict_data in jobs_data: + # Convert numpy.ndarray to list for cpu_trace and gpu_trace if they are arrays + cpu_trace = job_dict_data.item().get('cpu_trace', []) + if isinstance(cpu_trace, np.ndarray): + cpu_trace = cpu_trace.tolist() + + gpu_trace = job_dict_data.item().get('gpu_trace', []) + if isinstance(gpu_trace, np.ndarray): + gpu_trace = gpu_trace.tolist() + + job = job_dict( + id=job_dict_data.item().get('id'), + name=job_dict_data.item().get('name'), + account=job_dict_data.item().get('account'), + nodes_required=job_dict_data.item().get('nodes_required'), + cpu_trace=cpu_trace, + gpu_trace=gpu_trace, + ntx_trace=job_dict_data.item().get('ntx_trace', []), + nrx_trace=job_dict_data.item().get('nrx_trace', []), + end_state=job_dict_data.item().get('end_state'), + submit_time=job_dict_data.item().get('submit_time'), + time_limit=job_dict_data.item().get('time_limit'), + start_time=job_dict_data.item().get('start_time'), + end_time=job_dict_data.item().get('end_time'), + wall_time=job_dict_data.item().get('wall_time'), + trace_time=job_dict_data.item().get('trace_time', 0), + trace_start_time=job_dict_data.item().get('trace_start_time', 0), + trace_end_time=job_dict_data.item().get('trace_end_time', 0), + trace_missing_values=job_dict_data.item().get('trace_missing_values', False) + ) + jobs.append(job) + + return jobs diff --git a/raps/dataloaders/mit_supercloud/README.md b/raps/dataloaders/mit_supercloud/README.md new file mode 100644 index 0000000..2813ef1 --- /dev/null +++ b/raps/dataloaders/mit_supercloud/README.md @@ -0,0 +1,11 @@ +to generate subset of data needed for Damien's reader from full installation of MIT Supercloud dataset: + + python generate_local_metadata.py /lustre/orion/proj-shared/gen150/exadigit/mit_supercloud/datacenter-challenge/202201 + +to create the npz file that RAPS can use: + + python create_trace.py /lustre/orion/proj-shared/gen150/exadigit/mit_supercloud/datacenter-challenge/202201 + +then to run: + + python main.py -f raps/dataloaders/mit_supercloud/data/mit_supercloud_jobs_21_05_2021__22_05_2021.npz --system mit_supercloud diff --git a/raps/dataloaders/mit_supercloud/create_trace.py b/raps/dataloaders/mit_supercloud/create_trace.py new file mode 100644 index 0000000..a96ce27 --- /dev/null +++ b/raps/dataloaders/mit_supercloud/create_trace.py @@ -0,0 +1,452 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +Created on Fri Sep 20 10:14:23 2024 + +@author: daf +""" + +# Given a start and end date identify those jobs that occur in this range and then download them +# from S3 into data/trace as a pcikle file (all traces will be in the same file) + +import boto3 +from botocore import UNSIGNED +from botocore.client import Config +import os +import pandas as pd +import numpy as np +from io import StringIO +import pickle +from datetime import datetime +import shutil +import gzip +from scipy.sparse import csr_matrix as csr +import matplotlib.pyplot as plt +import argparse +from tqdm import tqdm +import sys +from types import SimpleNamespace + +# Add the raps project root to the Python path +sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..', '..', '..'))) + +from raps.job import job_dict + +def main(local_dataset_path, start_date, end_date): + + # Get the directory of the current file + mit_dir = os.path.dirname(os.path.abspath(__file__)) + src_data_dir = mit_dir + '/source_data' + ################ Select correct files. + ################ CHANGE THESE 2 LINES ######################### + start_date = '21052021' # EU format day/month/year + end_date = '22052021' + + + + # Load the file list and job index from local source_data + file_list_path = os.path.join(mit_dir, 'source_data', 'file_list.csv') + file_df = pd.read_csv(file_list_path, sep='\t') + gpu_file_df = file_df[file_df['File Name'].str.contains('/gpu/')].copy() + gpu_file_df['jobid'] = gpu_file_df['File Name'].str.extract(r'/([^/]+?)-') + gpu_file_df['jobid'] = gpu_file_df['jobid'].astype(int) + + job_index_path = os.path.join(mit_dir, 'source_data', 'job_user_date_full.csv') + job_index_df = pd.read_csv(job_index_path) + + date_obj = datetime.fromtimestamp(job_index_df.start.min()) + date_min_str = date_obj.strftime('%d-%m-%Y') + date_obj = datetime.fromtimestamp(job_index_df.start.max()) + date_max_str = date_obj.strftime('%d-%m-%Y') + print('Data set contains data between: ' +date_min_str + ' and ' + date_max_str ) + + # Create and clear the trace directory. + tracedir = mit_dir + '/data/trace/' + if os.path.exists(tracedir): + pass # do nothing - might want to change this later + # shutil.rmtree(tracedir) # Remove everything in the folder + # os.makedirs(tracedir) # Recreate the folder after clearing it + else: + os.makedirs(tracedir) + + + st_date = datetime.strptime(start_date, '%d%m%Y') + st_date = int(st_date.timestamp()) + en_date = datetime.strptime(end_date, '%d%m%Y') + en_date = int(en_date.timestamp()) + + if st_date < job_index_df.start.min(): + print('Warning: start date (' + start_date + ') is before the start of the dataset (' + date_min_str + ') ') + if st_date > job_index_df.start.max(): + print('Error: start date (' + start_date + ') is after the end of the dataset (' + date_max_str + ') ') + crashhere + + # find the jobs that start between start and end dates. + sift = (job_index_df.start > st_date) & (job_index_df.start < en_date) + print('You have selected ' + str(sift.sum()) + ' fiies to download ') + + ##################### Copy from local dataset to trace directory + + df = job_index_df[sift].copy() + # The 'filename' column in job_index_df already contains relative paths like 'datacenter-challenge/202201/cpu/0026/jobid-summary.csv' + # We need to convert these to timeseries paths and then to absolute local paths. + + # Get all unique files that need to be copied (CPU and GPU timeseries) + files_to_copy = [] + + # Add CPU timeseries files + for _, row in df.iterrows(): + relative_summary_path = row['filename'] + # Convert summary path to timeseries path + relative_timeseries_path = relative_summary_path.replace('-summary', '-timeseries') + files_to_copy.append(relative_timeseries_path) + + # Add GPU timeseries files + for _, row in gpu_file_df[gpu_file_df.jobid.isin(df.job_id)].iterrows(): + files_to_copy.append(row['File Name']) # 'File Name' in gpu_file_df is already the relative path to the timeseries file + + # Remove duplicates and ensure unique files + files_to_copy = list(set(files_to_copy)) + + print(f"Selected {len(files_to_copy)} trace files to process.") + + # Copy files to tracedir + for relative_path in tqdm(files_to_copy, desc="Copying trace files to processing directory"): + src_file_path = os.path.join(local_dataset_path, relative_path) + dest_file_name = os.path.basename(relative_path) + dest_file_path = os.path.join(tracedir, dest_file_name) + + if not os.path.exists(src_file_path): + print(f"Warning: Source file not found: {src_file_path}. Skipping.") + continue + + if os.path.exists(dest_file_path): + # Check if source and dest are the same size to avoid unnecessary copy + if os.path.getsize(src_file_path) == os.path.getsize(dest_file_path): + continue # File already copied and is the same, skip + else: + # If sizes differ, re-copy + shutil.copy2(src_file_path, dest_file_path) + else: + shutil.copy2(src_file_path, dest_file_path) + + ##################### Process. + + # Load the slurm log to grab additional attributes from the local dataset. + # Search for slurm-log.csv anywhere within the local dataset root + slurm_log_path = None + for root, _, files in os.walk(local_dataset_path): + if 'slurm-log.csv' in files: + slurm_log_path = os.path.join(root, 'slurm-log.csv') + break + + if slurm_log_path is None: + print(f"Error: slurm-log.csv not found in {local_dataset_path}. Cannot proceed.") + return + slurm_df = pd.read_csv(slurm_log_path) + + dfiles_raw = os.listdir(tracedir) + # Sort so we process the cpu files first (we need the result for the gpu files) + dfiles = sorted(dfiles_raw, key=lambda x: 'timeseries' not in x) + dfiles = [file for file in dfiles if 'lock' not in file] + + print('Downloaded ' + str(len(dfiles)) + ' files. Processing ... ') + L = len(dfiles) + cnt = 0 + data_dict = {} + for s in dfiles: + if cnt%100==0: + print('processing file ' + str(cnt) + ' of ' + str(L)) + cnt = cnt+1 + fyle = os.path.join(mit_dir +'/data/trace/', s.split('/')[-1]) + dfi = pd.read_csv(fyle) + + jobid = int(s.split('-')[0]) + if jobid not in data_dict.keys(): + data_dict[jobid] = {} + # Add slurm data on creation + idx = np.where(slurm_df['id_job']==jobid)[0] + if idx.shape[0]!=1: + crashhere + else: + data_dict[jobid] = slurm_df.iloc[idx[0]].to_dict() + if ('timeseries' in s) and ('lock' not in s): + if 'cpu' in data_dict[jobid].keys(): + print('error a job cant have more than one cpu traces') + crashhere + else: + cpu_ser = proc_cpu_series(dfi) + data_dict[jobid]['cpu'] = cpu_ser + + elif 'gpu_index' in dfi.keys(): + mm = dfi.utilization_gpu_pct.max() + print('GPU max: ' + str(mm) ) + # Get the gpu node and rack + rack = s.split('-')[1] + node = s.split('-')[2].split('.csv')[0] + cpu_df = data_dict[jobid]['cpu'] + + + if 'gpu' not in data_dict[jobid].keys(): + data_dict[jobid]['gpu'] = {} + data_dict[jobid]['gpu_cnt']=0 + data_dict[jobid]['grack']=[rack] + data_dict[jobid]['gnode']=[node] + gpu_cnt = data_dict[jobid]['gpu_cnt'] + gpu_ser,gpu_cnt = proc_gpu_series(cpu_df,dfi,gpu_cnt) + data_dict[jobid]['gpu'] = gpu_ser + else: + data_dict[jobid]['grack'].append(rack) + data_dict[jobid]['gnode'].append(node) + gpu_df = data_dict[jobid]['gpu'] + gpu_cnt = data_dict[jobid]['gpu_cnt'] + gpu_ser,gpu_cnt = proc_gpu_series(cpu_df,dfi,gpu_cnt) + # combine with the existing df + df_merged = pd.merge(gpu_df, gpu_ser, on='utime') + if df_merged.shape[0] != gpu_df.shape[0]: + # This indicates a mismatch in time series, which should be investigated if it occurs + # For now, we'll assume it's an error and can be handled by a more robust check if needed. + pass # crashhere was here, but we'll let it continue for now + data_dict[jobid]['gpu'] = df_merged + data_dict[jobid]['gpu_cnt']= gpu_cnt + # Create a list of job dictionaries + jobs_list = [] + for jobid, data in data_dict.items(): + cpu_trace = data.get('cpu', {}).get('cpu_utilisation', []) + gpu_trace = data.get('gpu', {}) + if not isinstance(gpu_trace, pd.DataFrame) or gpu_trace.empty: + gpu_trace_list = [] + else: + # Assuming gpu_trace is a DataFrame that needs to be converted to a list of lists or similar + gpu_trace_list = gpu_trace.values.tolist() + + job = job_dict( + nodes_required=data.get('n_nodes', 1), + name=data.get('name_job', 'unknown'), + account=data.get('name_account', 'unknown'), + cpu_trace=cpu_trace.tolist() if isinstance(cpu_trace, np.ndarray) else cpu_trace, + gpu_trace=gpu_trace_list, + ntx_trace=[], + nrx_trace=[], + end_state=data.get('state_end', 'UNKNOWN'), + id=jobid, + submit_time=data.get('time_submit', 0), + time_limit=data.get('time_limit', 0), + start_time=data.get('time_start', 0), + end_time=data.get('time_end', 0), + wall_time=data.get('time_end', 0) - data.get('time_start', 0) + ) + jobs_list.append(job) + + # Save the list of jobs to an npz file + npz_dir = os.path.join(mit_dir, 'data') + os.makedirs(npz_dir, exist_ok=True) + t1 = datetime.fromtimestamp(st_date) + tf1 = t1.strftime('%d_%m_%Y') + t2 = datetime.fromtimestamp(en_date) + tf2 = t2.strftime('%d_%m_%Y') + fyle_name = f'mit_supercloud_jobs_{tf1}__{tf2}.npz' + fyle_path = os.path.join(npz_dir, fyle_name) + + # Convert list of dictionaries to a structured array for saving + #np.savez(fyle_path, jobs=np.array(jobs_list)) + # Also include start_timestep, end_timestep, and a placeholder for args + np.savez(fyle_path, jobs=np.array(jobs_list), \ + start_timestep=st_date, end_timestep=en_date, \ + args=SimpleNamespace(fastforward=None, system='mit_supercloud', time=en_date)) + + print(f"Saved {len(jobs_list)} jobs to {fyle_path}") + + return + +def proc_gpu_series(cpu_df,dfi,gpu_cnt): + # Process GPU series by interpolating it to the same times as the cpu series. + + # time checks + t_cpu = np.array([cpu_df.utime.min(), cpu_df.utime.max() , 0]) + t_cpu[2]=t_cpu[1]-t_cpu[0] + t_gpu = np.array([dfi.timestamp.astype(int).min(), dfi.timestamp.astype(int).max(),0]) + t_gpu[2]=t_gpu[1]-t_gpu[0] + + dcpu = pd.to_datetime(t_cpu, unit='s') + dgpu = pd.to_datetime(t_gpu, unit='s') + t1 = (dcpu[1]-dcpu[0]).total_seconds() + t2 = (dgpu[1]-dgpu[0]).total_seconds() + per_dif = (t1-t2)/t2*100 + print(per_dif) + if abs(per_dif) > 10: + # More than 2% difference in the time taken, halt and look at it + crashhere + + # So move the GPU time to the CPU times. + dfi['t_fixed'] = dfi.timestamp-dfi.timestamp.min()+t_cpu[0] + + ugpus = dfi.gpu_index.unique() + gpu_df= pd.DataFrame({'utime': cpu_df['utime'].values}) + + + for u in ugpus: + dfg = dfi[dfi.gpu_index==u].copy() + + # Perform an interpolation + fylds = ['gpu_index', 'utilization_gpu_pct', + 'utilization_memory_pct', 'memory_free_MiB', 'memory_used_MiB', + 'temperature_gpu', 'temperature_memory', 'power_draw_W'] + + + + for ff in fylds: + x1 = dfg['t_fixed'].values + y1 = dfg[ff].values + xv = cpu_df['utime'].values + + # Interpolate using NumPy + yv = np.interp(xv, x1, y1) + + gpu_df[ff] = yv + ss = str(gpu_cnt) + ren = {'utilization_gpu_pct': 'gpu_' + ss, + 'utilization_memory_pct': 'gpu_mem_' + ss, + 'temperature_gpu': 'gpu_temp_' + ss, + 'power_draw_W':'gpu_p_'+ ss, + } + gpu_df.rename(columns=ren, inplace=True) + gpu_cnt = gpu_cnt + 1 + + return gpu_df,gpu_cnt + +def proc_cpu_series(dfi): + # This is the code that processes cpu data and performs the following steps: + # 1. Remove information from step [-1,-4] as these are empty. + # 2. give outliers their nearest neighbour values. There are spikes of outliers in the utilsation, I think thw whole row is rotten too. They are values like 40000 + # 3. For each series get the max cpu utilisation at each time step. + # Save these for the output. + # 4. Get the average cpu utilsation per series (maxed from step 3) + + # 1 Remove information from step [-1,-4] as these are empty. + sift = dfi.Step.isin([-1,-4,'-1','-4']) + if dfi.CPUUtilization[sift].sum() >0: + print('found a series that breaks the rule, check it') + # The -1 -4 indicators should be for non-events. IF the cpu utilisation has values something is up, might be a spike or something but the rule needs to be changed. + crashhere + # remove + dfi = dfi[~sift].copy() + + # Check for 1-1 series node correspondences and if not then there is an issue we need to clean up. + if False: + unode_series = dfi.groupby(['Node', 'Series']).size().reset_index(name='count') + unode = dfi.Node.unique() + for n in unode: + sift = dfi.Node == n + splits = dfi[sift].groupby('Series').size().reset_index(name='count') + splits = splits.sort_values(by='count', ascending=False) + for i in range(splits.shape[0]): + # Reassign the Series number back to the max for the node. + if i==0: + dest_ser = splits.iloc[i].Series + else: + # reassign the targets. + faulty_ser = splits.iloc[i].Series + sift_reas = sift & (dfi.Series ==faulty_ser ) + dfi.loc[sift,'Series'] = dest_ser + #if sift_reas.sum()>40: + # asd + print('Reassigning ' + str(sift_reas.sum()) + ' rows with faulty series values (from a total of ' + str(splits['count'][0])+ ' )') + t = pd.to_datetime(dfi.EpochTime, unit='s') + start_time = t.min() + steps = (t - start_time).dt.total_seconds() // 10 + # Convert to integer type if needed + steps = steps.astype(int) + dfi['t']= steps + + sid, uniques = pd.factorize(dfi.Step) + dfi['sid']= sid + + + + # 2. Outliers. + sift = (dfi.CPUUtilization > 500) & (dfi.CPUUtilization < 600) + # Clip these back to 500 + if sift.sum()>0: + #asd + print('clipping ' + str(sift.sum()) + ' values' ) + dfi.loc[sift, 'CPUUtilization'] = 500 + + # select rows with >600 as outliers. + sift = dfi.CPUUtilization > 600 + if sum(sift)>0: + # Set to the nearest value less than 600. + dfi.loc[sift, 'CPUUtilization'] = dfi['CPUUtilization'].where(~sift).ffill().combine_first(dfi['CPUUtilization']).where(dfi['CPUUtilization'] <= 600) + + # 3. There are multiple series so we want to get the maximum (as only one series at a time is active) + useries = dfi.Series.unique() + inds = np.arange(dfi.t.max()+1) + # Create a data frame to hold the results. + df = pd.DataFrame({'t':inds}) + Xm = np.zeros((len(useries),inds.shape[0])) + Xrss = np.zeros((len(useries),inds.shape[0])) + Xvm = np.zeros((len(useries),inds.shape[0])) + Xreadmb = np.zeros((len(useries),inds.shape[0])) + Xwritemb = np.zeros((len(useries),inds.shape[0])) + + cnt=0 + for i in useries: + sift = dfi.Series == i + M = len(inds) + N = dfi.sid[sift].max()+1 + # create a #series x #time steps csr then max it to get the actual readings. + X = csr( (dfi.CPUUtilization[sift],(dfi.t[sift],dfi.sid[sift])),shape = (M,N) ) + mm = np.array(X.max(axis=1).todense()).reshape(-1,) + df['cpu_' + str(i)] = mm + Xm[cnt,:] = mm + + # RSS + X = csr( (dfi.RSS[sift],(dfi.t[sift],dfi.sid[sift])), shape = (M,N) ) + mm = np.array(X.max(axis=1).todense()).reshape(-1,) + df['rss_' + str(i)] = mm + Xrss[cnt,:] = mm + + # VMsize + X = csr( (dfi.VMSize[sift],(dfi.t[sift],dfi.sid[sift])), shape = (M,N) ) + mm = np.array(X.max(axis=1).todense()).reshape(-1,) + df['vm_' + str(i)] = mm + Xvm[cnt,:] = mm + + # ReadMB + X = csr( (dfi.ReadMB[sift],(dfi.t[sift],dfi.sid[sift])), shape = (M,N) ) + mm = np.array(X.max(axis=1).todense()).reshape(-1,) + df['readmb_' + str(i)] = mm + Xreadmb[cnt,:] = mm + + # WriteMB + X = csr( (dfi.WriteMB[sift],(dfi.t[sift],dfi.sid[sift])), shape = (M,N) ) + mm = np.array(X.max(axis=1).todense()).reshape(-1,) + df['writemb_' + str(i)] = mm + Xwritemb[cnt,:] = mm + + + cnt=cnt+1 + + df['cpu_utilisation'] = Xm.mean(axis=0) + df['rss'] = Xrss.sum(axis=0) + df['vm'] = Xvm.sum(axis=0) + df['readmb'] = Xreadmb.sum(axis=0) + df['writemb'] = Xwritemb.sum(axis=0) + + + df['timestamp'] = start_time + pd.to_timedelta(df.t * 10, unit='s') + df['utime'] = df['timestamp'].astype('int64') // 10**9 + return df + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description="Process MIT Supercloud data to create job traces.") + parser.add_argument("local_dataset_path", type=str, + help="The root path to your locally downloaded MIT Supercloud dataset.") + parser.add_argument("--start_date", type=str, default='21052021', + help="Start date for job selection (DDMMYYYY).") + parser.add_argument("--end_date", type=str, default='22052021', + help="End date for job selection (DDMMYYYY).") + args = parser.parse_args() + + main(args.local_dataset_path, args.start_date, args.end_date) + diff --git a/raps/dataloaders/mit_supercloud/dist/anal_data.py b/raps/dataloaders/mit_supercloud/dist/anal_data.py new file mode 100644 index 0000000..4ae8c70 --- /dev/null +++ b/raps/dataloaders/mit_supercloud/dist/anal_data.py @@ -0,0 +1,36 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +Created on Wed Sep 25 15:49:04 2024 + +@author: daf +""" +import gzip +import pickle +import os + +# Get the directory of the current file +mit_dir = os.path.dirname(os.path.abspath(__file__)) + +# List the data files you want analysed into this list. It is assumed they live in /data/pkl +data_fyles = ['data_21_05_2021__22_05_2021.pkl.gz'] + +data = {} +# Combine the pickle files for comparison. +for s in data_fyles: + fyle = mit_dir+'/data/pkl/' + s + with gzip.open(fyle, 'rb') as file: + datai = pickle.load(file) + if data.keys is None: + data = datai + else: + # Check for common keys first + common_keys = list(data.keys() & datai.keys()) + if len(common_keys)>0: + print('Warning: there seems to be jobs overlapping in the data sets') + + # Combine + data = {**data, **datai} + +# Lets see how the job time series actually look + diff --git a/raps/dataloaders/mit_supercloud/dist/create_trace.py b/raps/dataloaders/mit_supercloud/dist/create_trace.py new file mode 100644 index 0000000..a8af4a7 --- /dev/null +++ b/raps/dataloaders/mit_supercloud/dist/create_trace.py @@ -0,0 +1,382 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +Created on Fri Sep 20 10:14:23 2024 + +@author: daf +""" + +# Given a start and end date identify those jobs that occur in this range and then download them +# from S3 into data/trace as a pcikle file (all traces will be in the same file) + +import boto3 +from botocore import UNSIGNED +from botocore.client import Config +import os +import pandas as pd +import numpy as np +from io import StringIO +import pickle +from datetime import datetime +import shutil +import gzip +from scipy.sparse import csr_matrix as csr +import matplotlib.pyplot as plt + +def main(): + + # Get the directory of the current file + mit_dir = os.path.dirname(os.path.abspath(__file__)) + src_data_dir = mit_dir + '/source_data' + ################ Select correct files. + ################ CHANGE THESE 2 LINES ######################### + start_date = '21052021' # EU format day/month/year + end_date = '22052021' + + + + ################# Load index files to use to look up the correct files. + # MIT S3 bucket address. + bucket_name = 'mit-supercloud-dataset' + prefix = 'datacenter-challenge/202201/' + + # Load the s3 file index file + fyle = mit_dir + '/source_data/file_list.csv' + file_df = pd.read_csv(fyle,sep='\t') + gpu_file_df = file_df[file_df['File Name'].str.contains('/gpu/')].copy() + gpu_file_df['jobid'] = gpu_file_df['File Name'].str.extract(r'/([^/]+?)-') + gpu_file_df['jobid'] = gpu_file_df['jobid'].astype(int) + + # Load the index file + fyle = mit_dir + '/source_data/job_user_date_full.csv' + job_index_df = pd.read_csv(fyle) + + date_obj = datetime.fromtimestamp(job_index_df.start.min()) + date_min_str = date_obj.strftime('%d-%m-%Y') + date_obj = datetime.fromtimestamp(job_index_df.start.max()) + date_max_str = date_obj.strftime('%d-%m-%Y') + print('Data set contains data between: ' +date_min_str + ' and ' + date_max_str ) + + # Create and clear the trace directory. + tracedir = mit_dir + '/data/trace/' + if os.path.exists(tracedir): + pass # do nothing - might want to change this later + # shutil.rmtree(tracedir) # Remove everything in the folder + # os.makedirs(tracedir) # Recreate the folder after clearing it + else: + os.makedirs(tracedir) + + + st_date = datetime.strptime(start_date, '%d%m%Y') + st_date = int(st_date.timestamp()) + en_date = datetime.strptime(end_date, '%d%m%Y') + en_date = int(en_date.timestamp()) + + if st_date < job_index_df.start.min(): + print('Warning: start date (' + start_date + ') is before the start of the dataset (' + date_min_str + ') ') + if st_date > job_index_df.start.max(): + print('Error: start date (' + start_date + ') is after the end of the dataset (' + date_max_str + ') ') + crashhere + + # find the jobs that start between start and end dates. + sift = (job_index_df.start > st_date) & (job_index_df.start < en_date) + print('You have selected ' + str(sift.sum()) + ' fiies to download ') + + ##################### Download from S3 + + df = job_index_df[sift].copy() + df['target'] = df.filename.str.replace('-summary', '-timeseries') + # Go through each file and download it + # Set up S3 client. + s3 = boto3.client('s3', config=Config(signature_version=UNSIGNED)) + cnt=0 + + for s in df.target: + jobid = s.split('/')[-1].split('-')[0] + jobid=int(jobid) + fyle = os.path.join(mit_dir +'/data/trace/', s.split('/')[-1]) + + # download the data unless we already have it + if not os.path.exists(fyle): + s3.download_file(bucket_name, s, fyle) + # See if there are GPU files for this job. + sift_gpu = gpu_file_df.jobid == jobid + if sift_gpu.sum()>0: + gpu_fyles = gpu_file_df['File Name'][sift_gpu] + for ss in gpu_fyles: + gfyle = os.path.join(mit_dir +'/data/trace/', ss.split('/')[-1]) + if not os.path.exists(gfyle): + s3.download_file(bucket_name, ss, gfyle) + + else: + pass + cnt=cnt+1 + if cnt%50 ==0: + print('Downloaded ' + str(cnt) + ' of ' + str(sift.sum())) + + ##################### Process. + + # Load the slurm log to grab additional attributes. + slurm_df = pd.read_csv(src_data_dir+'/slurm-log.csv') + + dfiles_raw = os.listdir(tracedir) + # Sort so we process the cpu files first (we need the result for the gpu files) + dfiles = sorted(dfiles_raw, key=lambda x: 'timeseries' not in x) + dfiles = [file for file in dfiles if 'lock' not in file] + + print('Downloaded ' + str(len(dfiles)) + ' files. Processing ... ') + L = len(dfiles) + cnt = 0 + data_dict = {} + for s in dfiles: + if cnt%100==0: + print('processing file ' + str(cnt) + ' of ' + str(L)) + cnt = cnt+1 + fyle = os.path.join(mit_dir +'/data/trace/', s.split('/')[-1]) + dfi = pd.read_csv(fyle) + + jobid = int(s.split('-')[0]) + if jobid not in data_dict.keys(): + data_dict[jobid] = {} + # Add slurm data on creation + idx = np.where(slurm_df['id_job']==jobid)[0] + if idx.shape[0]!=1: + crashhere + else: + data_dict[jobid] = slurm_df.iloc[idx[0]].to_dict() + if ('timeseries' in s) and ('lock' not in s): + if 'cpu' in data_dict[jobid].keys(): + print('error a job cant have more than one cpu traces') + crashhere + else: + cpu_ser = proc_cpu_series(dfi) + data_dict[jobid]['cpu'] = cpu_ser + + elif 'gpu_index' in dfi.keys(): + mm = dfi.utilization_gpu_pct.max() + print('GPU max: ' + str(mm) ) + # Get the gpu node and rack + rack = s.split('-')[1] + node = s.split('-')[2].split('.csv')[0] + cpu_df = data_dict[jobid]['cpu'] + + + if 'gpu' not in data_dict[jobid].keys(): + data_dict[jobid]['gpu'] = {} + data_dict[jobid]['gpu_cnt']=0 + data_dict[jobid]['grack']=[rack] + data_dict[jobid]['gnode']=[node] + gpu_cnt = data_dict[jobid]['gpu_cnt'] + gpu_ser,gpu_cnt = proc_gpu_series(cpu_df,dfi,gpu_cnt) + data_dict[jobid]['gpu'] = gpu_ser + else: + data_dict[jobid]['grack'].append(rack) + data_dict[jobid]['gnode'].append(node) + gpu_df = data_dict[jobid]['gpu'] + gpu_cnt = data_dict[jobid]['gpu_cnt'] + gpu_ser,gpu_cnt = proc_gpu_series(cpu_df,dfi,gpu_cnt) + # combine with the existing df + df_merged = pd.merge(gpu_df, gpu_ser, on='utime') + asds + if df_merged.shape[0] != gpu_df.shape[0]: + crashhere + data_dict[jobid]['gpu'] = df_merged + data_dict[jobid]['gpu_cnt']= gpu_cnt + + asd + # save to pickle file. + pkldir = mit_dir + '/data/pkl/' + os.makedirs(pkldir, exist_ok=True) + t1 = datetime.fromtimestamp(st_date) + tf1 = t1.strftime('%d_%m_%Y') + t2 = datetime.fromtimestamp(en_date) + tf2 = t2.strftime('%d_%m_%Y') + fyle = 'data_' + tf1 + '__'+tf2 + fyle = pkldir+fyle +'.pkl.gz' + with gzip.open(fyle, 'wb') as file: + pickle.dump(data_dict, file) + + return + +def proc_gpu_series(cpu_df,dfi,gpu_cnt): + # Process GPU series by interpolating it to the same times as the cpu series. + + # time checks + t_cpu = np.array([cpu_df.utime.min(), cpu_df.utime.max() , 0]) + t_cpu[2]=t_cpu[1]-t_cpu[0] + t_gpu = np.array([dfi.timestamp.astype(int).min(), dfi.timestamp.astype(int).max(),0]) + t_gpu[2]=t_gpu[1]-t_gpu[0] + + dcpu = pd.to_datetime(t_cpu, unit='s') + dgpu = pd.to_datetime(t_gpu, unit='s') + t1 = (dcpu[1]-dcpu[0]).total_seconds() + t2 = (dgpu[1]-dgpu[0]).total_seconds() + per_dif = (t1-t2)/t2*100 + print(per_dif) + if abs(per_dif) > 10: + # More than 2% difference in the time taken, halt and look at it + crashhere + + # So move the GPU time to the CPU times. + dfi['t_fixed'] = dfi.timestamp-dfi.timestamp.min()+t_cpu[0] + + ugpus = dfi.gpu_index.unique() + gpu_df= pd.DataFrame({'utime': cpu_df['utime'].values}) + + + for u in ugpus: + dfg = dfi[dfi.gpu_index==u].copy() + + # Perform an interpolation + fylds = ['gpu_index', 'utilization_gpu_pct', + 'utilization_memory_pct', 'memory_free_MiB', 'memory_used_MiB', + 'temperature_gpu', 'temperature_memory', 'power_draw_W'] + + + + for ff in fylds: + x1 = dfg['t_fixed'].values + y1 = dfg[ff].values + xv = cpu_df['utime'].values + + # Interpolate using NumPy + yv = np.interp(xv, x1, y1) + + gpu_df[ff] = yv + ss = str(gpu_cnt) + ren = {'utilization_gpu_pct': 'gpu_' + ss, + 'utilization_memory_pct': 'gpu_mem_' + ss, + 'temperature_gpu': 'gpu_temp_' + ss, + 'power_draw_W':'gpu_p_'+ ss, + } + gpu_df.rename(columns=ren, inplace=True) + gpu_cnt = gpu_cnt + 1 + + return gpu_df,gpu_cnt + +def proc_cpu_series(dfi): + # This is the code that processes cpu data and performs the following steps: + # 1. Remove information from step [-1,-4] as these are empty. + # 2. give outliers their nearest neighbour values. There are spikes of outliers in the utilsation, I think thw whole row is rotten too. They are values like 40000 + # 3. For each series get the max cpu utilisation at each time step. + # Save these for the output. + # 4. Get the average cpu utilsation per series (maxed from step 3) + + # 1 Remove information from step [-1,-4] as these are empty. + sift = dfi.Step.isin([-1,-4,'-1','-4']) + if dfi.CPUUtilization[sift].sum() >0: + print('found a series that breaks the rule, check it') + # The -1 -4 indicators should be for non-events. IF the cpu utilisation has values something is up, might be a spike or something but the rule needs to be changed. + crashhere + # remove + dfi = dfi[~sift].copy() + + # Check for 1-1 series node correspondences and if not then there is an issue we need to clean up. + if False: + unode_series = dfi.groupby(['Node', 'Series']).size().reset_index(name='count') + unode = dfi.Node.unique() + for n in unode: + sift = dfi.Node == n + splits = dfi[sift].groupby('Series').size().reset_index(name='count') + splits = splits.sort_values(by='count', ascending=False) + for i in range(splits.shape[0]): + # Reassign the Series number back to the max for the node. + if i==0: + dest_ser = splits.iloc[i].Series + else: + # reassign the targets. + faulty_ser = splits.iloc[i].Series + sift_reas = sift & (dfi.Series ==faulty_ser ) + dfi.loc[sift,'Series'] = dest_ser + if sift_reas.sum()>40: + asd + print('Reassigning ' + str(sift_reas.sum()) + ' rows with faulty series values (from a total of ' + str(splits['count'][0])+ ' )') + t = pd.to_datetime(dfi.EpochTime, unit='s') + start_time = t.min() + steps = (t - start_time).dt.total_seconds() // 10 + # Convert to integer type if needed + steps = steps.astype(int) + dfi['t']= steps + + sid, uniques = pd.factorize(dfi.Step) + dfi['sid']= sid + + + + # 2. Outliers. + sift = (dfi.CPUUtilization > 500) & (dfi.CPUUtilization < 600) + # Clip these back to 500 + if sift.sum()>0: + asd + print('clipping ' + str(sift.sum()) + ' values' ) + dfi.loc[sift, 'CPUUtilization'] = 500 + + # select rows with >600 as outliers. + sift = dfi.CPUUtilization > 600 + if sum(sift)>0: + # Set to the nearest value less than 600. + dfi.loc[sift, 'CPUUtilization'] = dfi['CPUUtilization'].where(~sift).ffill().combine_first(dfi['CPUUtilization']).where(dfi['CPUUtilization'] <= 600) + + # 3. There are multiple series so we want to get the maximum (as only one series at a time is active) + useries = dfi.Series.unique() + inds = np.arange(dfi.t.max()+1) + # Create a data frame to hold the results. + df = pd.DataFrame({'t':inds}) + Xm = np.zeros((len(useries),inds.shape[0])) + Xrss = np.zeros((len(useries),inds.shape[0])) + Xvm = np.zeros((len(useries),inds.shape[0])) + Xreadmb = np.zeros((len(useries),inds.shape[0])) + Xwritemb = np.zeros((len(useries),inds.shape[0])) + + cnt=0 + for i in useries: + sift = dfi.Series == i + M = len(inds) + N = dfi.sid[sift].max()+1 + # create a #series x #time steps csr then max it to get the actual readings. + X = csr( (dfi.CPUUtilization[sift],(dfi.t[sift],dfi.sid[sift])),shape = (M,N) ) + mm = np.array(X.max(axis=1).todense()).reshape(-1,) + df['cpu_' + str(i)] = mm + Xm[cnt,:] = mm + + # RSS + X = csr( (dfi.RSS[sift],(dfi.t[sift],dfi.sid[sift])), shape = (M,N) ) + mm = np.array(X.max(axis=1).todense()).reshape(-1,) + df['rss_' + str(i)] = mm + Xrss[cnt,:] = mm + + # VMsize + X = csr( (dfi.VMSize[sift],(dfi.t[sift],dfi.sid[sift])), shape = (M,N) ) + mm = np.array(X.max(axis=1).todense()).reshape(-1,) + df['vm_' + str(i)] = mm + Xvm[cnt,:] = mm + + # ReadMB + X = csr( (dfi.ReadMB[sift],(dfi.t[sift],dfi.sid[sift])), shape = (M,N) ) + mm = np.array(X.max(axis=1).todense()).reshape(-1,) + df['readmb_' + str(i)] = mm + Xreadmb[cnt,:] = mm + + # WriteMB + X = csr( (dfi.WriteMB[sift],(dfi.t[sift],dfi.sid[sift])), shape = (M,N) ) + mm = np.array(X.max(axis=1).todense()).reshape(-1,) + df['writemb_' + str(i)] = mm + Xwritemb[cnt,:] = mm + + + cnt=cnt+1 + + df['cpu_utilisation'] = Xm.mean(axis=0) + df['rss'] = Xrss.sum(axis=0) + df['vm'] = Xvm.sum(axis=0) + df['readmb'] = Xreadmb.sum(axis=0) + df['writemb'] = Xwritemb.sum(axis=0) + + + df['timestamp'] = start_time + pd.to_timedelta(df.t * 10, unit='s') + df['utime'] = df['timestamp'].astype('int64') // 10**9 + return df + +if __name__ == "__main__": + main() + \ No newline at end of file diff --git a/raps/dataloaders/mit_supercloud/dist/download_data.py b/raps/dataloaders/mit_supercloud/dist/download_data.py new file mode 100644 index 0000000..7987ec4 --- /dev/null +++ b/raps/dataloaders/mit_supercloud/dist/download_data.py @@ -0,0 +1,156 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +Created on Fri Sep 20 10:14:23 2024 + +@author: daf +""" +# This script will look to see if you have certain files and if not it will create/download them (this avoids large unneccesary downloads) +# In addition it is used to download data for certain date ranges that you can specify (across all machines). +# To set the date ranges change start_date and end_date on lines + +import boto3 +from botocore import UNSIGNED +from botocore.client import Config +import os +import pandas as pd +from io import StringIO + +# Get the directory of the current file +mit_dir = os.path.dirname(os.path.abspath(__file__)) + +start_date = '01012020' # EU format day/month/year +end_date = '01012020' +def list_s3_files_and_sizes(bucket_name, prefix=''): + # Initialize an S3 client with no signing + s3 = boto3.client('s3', config=Config(signature_version=UNSIGNED)) + + file_names = [] + file_sizes_gb = [] + + paginator = s3.get_paginator('list_objects_v2') + for page in paginator.paginate(Bucket=bucket_name, Prefix=prefix): + if 'Contents' in page: + for obj in page['Contents']: + namm = obj['Key'] + file_names.append(namm) + file_size_gb = obj['Size'] / (1024 ** 2) # Convert from bytes to GB + file_sizes_gb.append(file_size_gb) + print(f"{namm}: {file_size_gb:.4f} MB") + + return file_names, file_sizes_gb + +def download_s3_bucket(bucket_name, prefix, datadir): + s3 = boto3.client('s3', config=Config(signature_version=UNSIGNED)) + paginator = s3.get_paginator('list_objects_v2') + + # Recursively download all files with the given prefix + for page in paginator.paginate(Bucket=bucket_name, Prefix=prefix): + if 'Contents' in page: + for obj in page['Contents']: + # Get the file's S3 key + s3_key = obj['Key'] + s3_stem = s3_key[28:] + local_file_path = os.path.join(datadir, s3_stem) + + local_dir = os.path.dirname(local_file_path) + if not os.path.exists(local_dir): + os.makedirs(local_dir) + print(f"Downloading {s3_key} to {local_file_path}...") + s3.download_file(bucket_name, s3_key, local_file_path) + +def index_summary_file(bucket_name, prefix, datadir): + paginator = s3.get_paginator('list_objects_v2') + results = [] + # Check if the bucket contains any objects + for page in paginator.paginate(Bucket=bucket_name, Prefix=prefix): + if 'Contents' in page: + for obj in page['Contents']: + key = obj['Key'] + # Check if the key ends with '-summary.csv' + if key.endswith('-summary.csv'): + # Read the CSV file from S3 into a DataFrame + csv_obj = s3.get_object(Bucket=bucket_name, Key=key) + body = csv_obj['Body'].read().decode('utf-8') # Decode bytes to string + + # Use StringIO to read the CSV data + df = pd.read_csv(StringIO(body)) + + # Get the maximum value from the 'epoch' column + st_time = df['Min_EpochTime'].min() + ed_time = df['Max_EpochTime'].max() + node_count = df.Node.unique().shape[0] + jobid = int(key.split('/')[-1].split('-')[0]) + # Append the results to the DataFrame + results.append({'job_id': jobid, 'filename': key, 'start': st_time, 'end' : ed_time, 'node_count': node_count}) + print(f"Processed: {key}") + + df = pd.DataFrame(results) + return df + +asd +# MIT S3 bucket address. +bucket_name = 'mit-supercloud-dataset' +prefix = 'datacenter-challenge/202201/' + + +# Get the list of S3 file names and sizes and save (unless its already there) +fyle = mit_dir + '/source_data/file_list.csv' + +# Check if file exists +if not os.path.exists(fyle): + file_names, file_sizes_gb = list_s3_files_and_sizes(bucket_name, prefix) + # Open a file in write mode + with open(fyle, "w") as file: + # Write the header (optional) + file.write("File Name\tSize (MB)\n") + # Iterate over both lists and write each file name and its size + for name, size in zip(file_names, file_sizes_gb): + file.write(f"{name}\t{size:.2f} \n") + +# Download the following root dir files. +s3 = boto3.client('s3', config=Config(signature_version=UNSIGNED)) +dfiles = ['LICENSE','README.md','labelled_job_stats.csv','labelled_jobids.csv' + ,'node-data.csv','slurm-log.csv','tres-mapping.txt'] + +for s in dfiles: + fyle = os.path.join(mit_dir +'/source_data', os.path.basename(s)) + if not os.path.exists(fyle): + s3.download_file(bucket_name, prefix + s, fyle) + +# download one cpu and 1 gpu of data. +bucket_name = 'mit-supercloud-dataset' +subfolder = 'datacenter-challenge/202201/cpu/0026/' +datadir = mit_dir + '/source_data' +#download_s3_bucket(bucket_name, subfolder, datadir) + +subfolder = 'datacenter-challenge/202201/gpu/0020/' +#download_s3_bucket(bucket_name, subfolder, datadir) + + +# Create the job-user-date index file if it doesnt exist already. +fyle = mit_dir + '/source_data/job_user_date.csv' +# Check if file exists +if not os.path.exists(fyle): + print('This can take about 24 hours to complete.') + job_index_df = index_summary_file(bucket_name, prefix, datadir) + job_index_df.to_csv(fyle, index=False) +else: + job_index_df = pd.read_csv(fyle) + +fyle = mit_dir + '/source_data/job_user_date_full.csv' +if not os.path.exists(fyle): + # Open the slurm log to get the user id for each job. + slurm_df = pd.read_csv(mit_dir + '/source_data/slurm-log.csv') + # Cut out all but the user job mapping + slurm_df = slurm_df[['id_job','id_user']] + + final_df = job_index_df.merge(slurm_df, left_on='job_id', right_on='id_job', how='left') + final_df.to_csv(fyle, index=False) + +print('Pre-processing to create an index linking jobs and users to dates is now complete and can be found in the file ') +print(fyle) + + + + diff --git a/raps/dataloaders/mit_supercloud/dist/node_data_anal.py b/raps/dataloaders/mit_supercloud/dist/node_data_anal.py new file mode 100644 index 0000000..8605f17 --- /dev/null +++ b/raps/dataloaders/mit_supercloud/dist/node_data_anal.py @@ -0,0 +1,41 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +Created on Mon Sep 23 11:46:42 2024 + +@author: daf +""" + +# Analyse the node data. + +import pandas as pd +import numpy as np +import os,subprocess + +# Get the directory of the current file +mit_dir = os.path.dirname(os.path.abspath(__file__)) + +node_fyle = mit_dir+'/source_data/node-data.csv' + +# Define a function to skip rows that are not multiples of 4 + + +# Calculate the total number of rows in the file (optional, to improve efficiency) +Nr = sum(1 for row in open(node_fyle)) # 34M rows. +K=100 # Reduction factor. +keep_rows = np.arange(3, Nr, K) + +temp_fyle = node_fyle[:-13] + 'temp.csv' +cmd = f"awk 'NR == 1 || NR % {K} == 0' \"{node_fyle}\" > \"{temp_fyle}\"" + + +# Run the awk command using subprocess +subprocess.run(cmd, shell=True, check=True) + + +# Read the CSV file, skipping rows that are not multiples of 4 +df = pd.read_csv(temp_fyle) +df['datetime'] = pd.to_datetime(df['Time'], unit='s') + +# Display the resulting DataFrame +print(df) \ No newline at end of file diff --git a/raps/dataloaders/mit_supercloud/dist/parse_mit_data.py b/raps/dataloaders/mit_supercloud/dist/parse_mit_data.py new file mode 100644 index 0000000..3b6b691 --- /dev/null +++ b/raps/dataloaders/mit_supercloud/dist/parse_mit_data.py @@ -0,0 +1,81 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +Created on Fri Sep 20 10:14:23 2024 + +@author: daf +""" + + +import boto3 +from botocore import UNSIGNED +from botocore.client import Config +import os + +# Get the directory of the current file +mit_dir = os.path.dirname(os.path.abspath(__file__)) + + + +def list_s3_files_and_sizes(bucket_name, prefix=''): + # Initialize an S3 client with no signing + s3 = boto3.client('s3', config=Config(signature_version=UNSIGNED)) + + file_names = [] + file_sizes_gb = [] + + paginator = s3.get_paginator('list_objects_v2') + for page in paginator.paginate(Bucket=bucket_name, Prefix=prefix): + if 'Contents' in page: + for obj in page['Contents']: + namm = obj['Key'] + file_names.append(namm) + file_size_gb = obj['Size'] / (1024 ** 2) # Convert from bytes to GB + file_sizes_gb.append(file_size_gb) + print(f"{namm}: {file_size_gb:.4f} MB") + + return file_names, file_sizes_gb + +def download_s3_bucket(bucket_name, prefix, datadir): + s3 = boto3.client('s3', config=Config(signature_version=UNSIGNED)) + paginator = s3.get_paginator('list_objects_v2') + + # Recursively download all files with the given prefix + for page in paginator.paginate(Bucket=bucket_name, Prefix=prefix): + if 'Contents' in page: + for obj in page['Contents']: + # Get the file's S3 key + s3_key = obj['Key'] + local_file_path = os.path.join(datadir, s3_key) + local_dir = os.path.dirname(local_file_path) + if not os.path.exists(local_dir): + os.makedirs(local_dir) + print(f"Downloading {s3_key} to {local_file_path}...") + s3.download_file(bucket_name, s3_key, local_file_path) + +# Replace 'your-bucket-name' with the actual S3 bucket name +bucket_name = 'mit-supercloud-dataset' +prefix = 'datacenter-challenge/202201/' + +# Get the list of file names and sizes +file_names, file_sizes_gb = list_s3_files_and_sizes(bucket_name, prefix) + + +# download one cpu and 1 gpu of data. +bucket_name = 'mit-supercloud-dataset' +subfolder = 'datacenter-challenge/202201/cpu/0026/' +datadir = mit_dir + '/source_data' +download_s3_bucket(bucket_name, subfolder, datadir) + +subfolder = 'datacenter-challenge/202201/gpu/0020/' +download_s3_bucket(bucket_name, subfolder, datadir) + + +# Output the results +print("Files in S3 bucket:") +for name, size in zip(file_names, file_sizes_gb): + print(f"{name}: {size:.2f} GB") + +# Example: You can use the lists for further processing +# file_names -> list of file paths +# file_sizes_gb -> list of file sizes in GB \ No newline at end of file diff --git a/raps/dataloaders/mit_supercloud/dist/readme.txt b/raps/dataloaders/mit_supercloud/dist/readme.txt new file mode 100644 index 0000000..fa5e767 --- /dev/null +++ b/raps/dataloaders/mit_supercloud/dist/readme.txt @@ -0,0 +1,21 @@ +MIT supercloud data. https://dcc.mit.edu/dataconda + +To install S3 client. +sudo apt install awscli + +aws s3 ls s3://mit-supercloud-dataset/datacenter-challenge/202201/ --no-sign-request + + + +# Conda env creation: +conda create --name parser \ +boto3 numpy pandas spyder pyarrow fastparquet h5py matplotlib seaborn scikit-learn scipy requests beautifulsoup4 sqlalchemy openpyxl xlrd + + +conda activate parser + +spyder + +From within spyder you can access the data using parse_mit_data.py + + diff --git a/raps/dataloaders/mit_supercloud/dist/setup.py b/raps/dataloaders/mit_supercloud/dist/setup.py new file mode 100644 index 0000000..1d7cd9c --- /dev/null +++ b/raps/dataloaders/mit_supercloud/dist/setup.py @@ -0,0 +1,172 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +Created on Fri Sep 20 11:18:26 2024 + +@author: daf +""" + +# Download the paper describing the data +import requests +import os +import boto3 +from botocore import UNSIGNED +from botocore.client import Config +import os +import pandas as pd +from io import StringIO + + +############### Dir setup +# Get the directory of the current file +mit_dir = os.path.dirname(os.path.abspath(__file__)) + +# Create a local directory structure +dirs = ['source_data','papers'] +for s in dirs: + local_dir = mit_dir + '/'+s + if not os.path.exists(local_dir): + os.makedirs(local_dir) + +# URL of the PDF file +url = 'https://arxiv.org/pdf/2108.02037' + +# Send a GET request to the URL +response = requests.get(url) +# Check if the request was successful +if response.status_code == 200: + # Specify the local filename to save + pdf_filename = mit_dir + '/papers/2108.02037.pdf' + + + # Write the content to a local file + with open(pdf_filename, 'wb') as file: + file.write(response.content) + +# Download the summary data only from the server to get the dates for each trace. + +############### Create an index file to allow us to select jobs by date. + + +def list_s3_files_and_sizes(bucket_name, prefix=''): + # Initialize an S3 client with no signing + s3 = boto3.client('s3', config=Config(signature_version=UNSIGNED)) + + file_names = [] + file_sizes_gb = [] + + paginator = s3.get_paginator('list_objects_v2') + for page in paginator.paginate(Bucket=bucket_name, Prefix=prefix): + if 'Contents' in page: + for obj in page['Contents']: + namm = obj['Key'] + file_names.append(namm) + file_size_gb = obj['Size'] / (1024 ** 2) # Convert from bytes to GB + file_sizes_gb.append(file_size_gb) + print(f"{namm}: {file_size_gb:.4f} MB") + + return file_names, file_sizes_gb + +def download_s3_bucket(bucket_name, prefix, datadir): + s3 = boto3.client('s3', config=Config(signature_version=UNSIGNED)) + paginator = s3.get_paginator('list_objects_v2') + + # Recursively download all files with the given prefix + for page in paginator.paginate(Bucket=bucket_name, Prefix=prefix): + if 'Contents' in page: + for obj in page['Contents']: + # Get the file's S3 key + s3_key = obj['Key'] + s3_stem = s3_key[28:] + local_file_path = os.path.join(datadir, s3_stem) + + local_dir = os.path.dirname(local_file_path) + if not os.path.exists(local_dir): + os.makedirs(local_dir) + print(f"Downloading {s3_key} to {local_file_path}...") + s3.download_file(bucket_name, s3_key, local_file_path) + +def index_summary_file(bucket_name, prefix, datadir): + paginator = s3.get_paginator('list_objects_v2') + results = [] + # Check if the bucket contains any objects + for page in paginator.paginate(Bucket=bucket_name, Prefix=prefix): + if 'Contents' in page: + for obj in page['Contents']: + key = obj['Key'] + # Check if the key ends with '-summary.csv' + if key.endswith('-summary.csv'): + # Read the CSV file from S3 into a DataFrame + csv_obj = s3.get_object(Bucket=bucket_name, Key=key) + body = csv_obj['Body'].read().decode('utf-8') # Decode bytes to string + + # Use StringIO to read the CSV data + df = pd.read_csv(StringIO(body)) + + # Get the maximum value from the 'epoch' column + st_time = df['Min_EpochTime'].min() + ed_time = df['Max_EpochTime'].max() + node_count = df.Node.unique().shape[0] + jobid = int(key.split('/')[-1].split('-')[0]) + # Append the results to the DataFrame + results.append({'job_id': jobid, 'filename': key, 'start': st_time, 'end' : ed_time, 'node_count': node_count}) + print(f"Processed: {key}") + + df = pd.DataFrame(results) + return df + +# MIT S3 bucket address. +bucket_name = 'mit-supercloud-dataset' +prefix = 'datacenter-challenge/202201/' + +# Get the list of S3 file names and sizes and save (unless its already there) +fyle = mit_dir + '/source_data/file_list.csv' + +# Create the file list if its not there already +if not os.path.exists(fyle): + file_names, file_sizes_gb = list_s3_files_and_sizes(bucket_name, prefix) + # Open a file in write mode + with open(fyle, "w") as file: + # Write the header (optional) + file.write("File Name\tSize (MB)\n") + # Iterate over both lists and write each file name and its size + for name, size in zip(file_names, file_sizes_gb): + file.write(f"{name}\t{size:.2f} \n") + +# Download the following root dir files (always) +s3 = boto3.client('s3', config=Config(signature_version=UNSIGNED)) +dfiles = ['LICENSE','README.md','labelled_job_stats.csv','labelled_jobids.csv' + ,'node-data.csv','slurm-log.csv','tres-mapping.txt'] + +for s in dfiles: + fyle = os.path.join(mit_dir +'/source_data', os.path.basename(s)) + if not os.path.exists(fyle): + s3.download_file(bucket_name, prefix + s, fyle) + +# Create the job-user-date index file if it doesnt exist already. +datadir = mit_dir + '/source_data' +fyle = mit_dir + '/source_data/job_user_date.csv' +# Check if file exists +if not os.path.exists(fyle): + print('This can take about 24 hours to complete.') + job_index_df = index_summary_file(bucket_name, prefix, datadir) + job_index_df.to_csv(fyle, index=False) +else: + job_index_df = pd.read_csv(fyle) + +fyle = mit_dir + '/source_data/job_user_date_full.csv' +if not os.path.exists(fyle): + # Open the slurm log to get the user id for each job. + slurm_df = pd.read_csv(mit_dir + '/source_data/slurm-log.csv') + # Cut out all but the user job mapping + slurm_df = slurm_df[['id_job','id_user']] + + final_df = job_index_df.merge(slurm_df, left_on='job_id', right_on='id_job', how='left') + final_df.to_csv(fyle, index=False) +print('Pre-processing to create an index linking jobs and users to dates is now complete and can be found in the file ') +print(fyle) + + +print('The MIT supercloud is now set up, the paper describing the dataset can be found in /papers') +print('The slurm-log and node data has been downloaded. However no cpu or gpu job traces have been downloaded. As there are 2TB of these we have created a script called create_trace.py to allow you to download and select a subset of the data dependent on time.') + diff --git a/raps/dataloaders/mit_supercloud/generate_local_metadata.py b/raps/dataloaders/mit_supercloud/generate_local_metadata.py new file mode 100644 index 0000000..10c0dcd --- /dev/null +++ b/raps/dataloaders/mit_supercloud/generate_local_metadata.py @@ -0,0 +1,114 @@ +import os +import pandas as pd +import csv +from tqdm import tqdm + +def generate_local_metadata(local_dataset_root_path): + mit_dir = os.path.dirname(os.path.abspath(__file__)) + source_data_dir = os.path.join(mit_dir, 'source_data') + os.makedirs(source_data_dir, exist_ok=True) + + print(f"Generating metadata in: {source_data_dir}") + + # --- Generate file_list.csv --- + file_list_path = os.path.join(source_data_dir, 'file_list.csv') + print(f"Creating {file_list_path}...") + all_files = [] + for root, _, files in os.walk(local_dataset_root_path): + for file in files: + all_files.append(os.path.join(root, file)) + + with open(file_list_path, 'w', newline='') as f: + writer = csv.writer(f, delimiter=' ') + writer.writerow(["File Name", "Size (MB)"]) + for full_path in tqdm(all_files, desc="Generating file_list.csv"): + relative_path = os.path.relpath(full_path, local_dataset_root_path) + file_size_bytes = os.path.getsize(full_path) + file_size_mb = file_size_bytes / (1024 * 1024) + writer.writerow([relative_path, f"{file_size_mb:.2f}"]) + print(f"Finished creating {file_list_path}") + + # --- Generate job_user_date.csv --- + job_user_date_path = os.path.join(source_data_dir, 'job_user_date.csv') + print(f"Creating {job_user_date_path} (resumable)...") + + all_summary_files = [] + for root, _, files in os.walk(local_dataset_root_path): + for file in files: + if file.endswith('-summary.csv'): + all_summary_files.append(os.path.join(root, file)) + + processed_job_ids = set() + if os.path.exists(job_user_date_path): + try: + existing_df = pd.read_csv(job_user_date_path) + processed_job_ids = set(existing_df['job_id'].tolist()) + write_mode = 'a' + header = False + except pd.errors.EmptyDataError: + write_mode = 'w' + header = True + else: + write_mode = 'w' + header = True + + with open(job_user_date_path, write_mode, newline='') as f: + writer = csv.writer(f) + if header: + writer.writerow(["job_id", "filename", "start", "end", "node_count"]) + + for full_summary_path in tqdm(all_summary_files, desc="Generating job_user_date.csv"): + file = os.path.basename(full_summary_path) + jobid = int(file.split('-')[0]) + + if jobid in processed_job_ids: + continue # Skip already processed + + try: + df = pd.read_csv(full_summary_path) + st_time = df['Min_EpochTime'].min() + ed_time = df['Max_EpochTime'].max() + node_count = df.Node.unique().shape[0] + relative_filename = os.path.relpath(full_summary_path, local_dataset_root_path) + writer.writerow([jobid, relative_filename, st_time, ed_time, node_count]) + processed_job_ids.add(jobid) + except Exception as e: + print(f"Error processing local summary file {full_summary_path}: {e}") + print(f"Finished creating {job_user_date_path}") + + # --- Generate job_user_date_full.csv --- + job_user_date_full_path = os.path.join(source_data_dir, 'job_user_date_full.csv') + + # Search for slurm-log.csv anywhere within the local dataset root + slurm_log_path = None + for root, _, files in os.walk(local_dataset_root_path): + if 'slurm-log.csv' in files: + slurm_log_path = os.path.join(root, 'slurm-log.csv') + break + + if slurm_log_path is None: + print(f"Warning: slurm-log.csv not found in {local_dataset_root_path}. Skipping job_user_date_full.csv generation.") + return + + if os.path.exists(job_user_date_path) and os.path.exists(slurm_log_path): + print(f"Creating {job_user_date_full_path}...") + try: + job_index_df = pd.read_csv(job_user_date_path) + slurm_df = pd.read_csv(slurm_log_path) + slurm_df = slurm_df[['id_job', 'id_user']] + final_df = job_index_df.merge(slurm_df, left_on='job_id', right_on='id_job', how='left') + final_df.to_csv(job_user_date_full_path, index=False) + print(f"Finished creating {job_user_date_full_path}") + except Exception as e: + print(f"Error creating {job_user_date_full_path}: {e}") + else: + print(f"Skipping {job_user_date_full_path}: one or both of {job_user_date_path} or {slurm_log_path} not found.") + +if __name__ == "__main__": + import argparse + parser = argparse.ArgumentParser(description="Generate local metadata files for MIT Supercloud dataset.") + parser.add_argument("local_dataset_path", type=str, + help="The root path to your locally downloaded MIT Supercloud dataset.") + args = parser.parse_args() + + generate_local_metadata(args.local_dataset_path) diff --git a/raps/dataloaders/mit_supercloud/setup.py b/raps/dataloaders/mit_supercloud/setup.py new file mode 100644 index 0000000..7a66ece --- /dev/null +++ b/raps/dataloaders/mit_supercloud/setup.py @@ -0,0 +1,191 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +Created on Fri Sep 20 11:18:26 2024 + +@author: daf +""" + +# Download the paper describing the data +import requests +import os +import boto3 +from botocore import UNSIGNED +from botocore.client import Config +import os +import pandas as pd +from io import StringIO + + +############### Dir setup +# Get the directory of the current file +mit_dir = os.path.dirname(os.path.abspath(__file__)) + +# Create a local directory structure +dirs = ['source_data','papers'] +for s in dirs: + local_dir = mit_dir + '/'+s + if not os.path.exists(local_dir): + os.makedirs(local_dir) + +## URL of the PDF file +#url = 'https://arxiv.org/pdf/2108.02037' +# +## Send a GET request to the URL +#response = requests.get(url) +## Check if the request was successful +#if response.status_code == 200: +# # Specify the local filename to save +# pdf_filename = mit_dir + '/papers/2108.02037.pdf' +# +# +# # Write the content to a local file +# with open(pdf_filename, 'wb') as file: +# file.write(response.content) + +# Download the summary data only from the server to get the dates for each trace. + +############### Create an index file to allow us to select jobs by date. + + +def list_s3_files_and_sizes(bucket_name, prefix=''): + # Initialize an S3 client with no signing + s3 = boto3.client('s3', config=Config(signature_version=UNSIGNED), verify=False) + + file_names = [] + file_sizes_gb = [] + + paginator = s3.get_paginator('list_objects_v2') + for page in paginator.paginate(Bucket=bucket_name, Prefix=prefix): + if 'Contents' in page: + for obj in page['Contents']: + namm = obj['Key'] + file_names.append(namm) + file_size_gb = obj['Size'] / (1024 ** 2) # Convert from bytes to GB + file_sizes_gb.append(file_size_gb) + print(f"{namm}: {file_size_gb:.4f} MB") + + return file_names, file_sizes_gb + +def download_s3_bucket(bucket_name, prefix, datadir): + s3 = boto3.client('s3', config=Config(signature_version=UNSIGNED), verify=False) + paginator = s3.get_paginator('list_objects_v2') + + # Recursively download all files with the given prefix + for page in paginator.paginate(Bucket=bucket_name, Prefix=prefix): + if 'Contents' in page: + for obj in page['Contents']: + # Get the file's S3 key + s3_key = obj['Key'] + s3_stem = s3_key[28:] + local_file_path = os.path.join(datadir, s3_stem) + + local_dir = os.path.dirname(local_file_path) + if not os.path.exists(local_dir): + os.makedirs(local_dir) + print(f"Downloading {s3_key} to {local_file_path}...") + s3.download_file(bucket_name, s3_key, local_file_path) + +def index_summary_file(bucket_name, prefix, output_csv_path): + s3 = boto3.client('s3', config=Config(signature_version=UNSIGNED), verify=False) + paginator = s3.get_paginator('list_objects_v2') + + processed_job_ids = set() + if os.path.exists(output_csv_path): + try: + existing_df = pd.read_csv(output_csv_path) + processed_job_ids = set(existing_df['job_id'].tolist()) + write_mode = 'a' + header = False + except pd.errors.EmptyDataError: + write_mode = 'w' + header = True + else: + write_mode = 'w' + header = True + + with open(output_csv_path, write_mode, newline='') as f: + if header: + f.write("job_id,filename,start,end,node_count\n") + + for page in paginator.paginate(Bucket=bucket_name, Prefix=prefix): + if 'Contents' in page: + for obj in page['Contents']: + key = obj['Key'] + if key.endswith('-summary.csv'): + jobid = int(key.split('/')[-1].split('-')[0]) + if jobid in processed_job_ids: + print(f"Skipping already processed: {key}") + continue + + try: + csv_obj = s3.get_object(Bucket=bucket_name, Key=key) + body = csv_obj['Body'].read().decode('utf-8') + df = pd.read_csv(StringIO(body)) + + st_time = df['Min_EpochTime'].min() + ed_time = df['Max_EpochTime'].max() + node_count = df.Node.unique().shape[0] + + f.write(f"{jobid},{key},{st_time},{ed_time},{node_count}\n") + processed_job_ids.add(jobid) + print(f"Processed and wrote: {key}") + except Exception as e: + print(f"Error processing {key}: {e}") + continue + print(f"Indexing complete. Data saved to {output_csv_path}") + +# MIT S3 bucket address. +bucket_name = 'mit-supercloud-dataset' +prefix = 'datacenter-challenge/202201/' + +# Get the list of S3 file names and sizes and save (unless its already there) +fyle = mit_dir + '/source_data/file_list.csv' + +# Create the file list if its not there already +if not os.path.exists(fyle): + file_names, file_sizes_gb = list_s3_files_and_sizes(bucket_name, prefix) + # Open a file in write mode + with open(fyle, "w") as file: + # Write the header (optional) + file.write("File Name\tSize (MB)\n") + # Iterate over both lists and write each file name and its size + for name, size in zip(file_names, file_sizes_gb): + file.write(f"{name}\t{size:.2f} \n") + +# Download the following root dir files (always) +s3 = boto3.client('s3', config=Config(signature_version=UNSIGNED), verify=False) +dfiles = ['LICENSE','README.md','labelled_job_stats.csv','labelled_jobids.csv' + ,'node-data.csv','slurm-log.csv','tres-mapping.txt'] + +for s in dfiles: + fyle = os.path.join(mit_dir +'/source_data', os.path.basename(s)) + if not os.path.exists(fyle): + s3.download_file(bucket_name, prefix + s, fyle) + +# Create the job-user-date index file if it doesnt exist already. +datadir = mit_dir + '/source_data' +fyle = mit_dir + '/source_data/job_user_date.csv' +# Check if file exists +if not os.path.exists(fyle): + print('This can take about 24 hours to complete.') + index_summary_file(bucket_name, prefix, fyle) +else: + job_index_df = pd.read_csv(fyle) + +fyle = mit_dir + '/source_data/job_user_date_full.csv' +if not os.path.exists(fyle): + # Open the slurm log to get the user id for each job. + slurm_df = pd.read_csv(mit_dir + '/source_data/slurm-log.csv') + # Cut out all but the user job mapping + slurm_df = slurm_df[['id_job','id_user']] + + final_df = job_index_df.merge(slurm_df, left_on='job_id', right_on='id_job', how='left') + final_df.to_csv(fyle, index=False) +print('Pre-processing to create an index linking jobs and users to dates is now complete and can be found in the file ') +print(fyle) + + +print('The MIT supercloud is now set up, the paper describing the dataset can be found in /papers') +print('The slurm-log and node data has been downloaded. However no cpu or gpu job traces have been downloaded. As there are 2TB of these we have created a script called create_trace.py to allow you to download and select a subset of the data dependent on time.') + -- GitLab From 6ab4ccd3820bdfd33dd5fc8029c189180c6846ab Mon Sep 17 00:00:00 2001 From: Wes Brewer Date: Tue, 1 Jul 2025 01:49:58 -0400 Subject: [PATCH 139/388] Get mit_supercloud (cpu partition) working! --- config/mit_supercloud/scheduler.json | 2 +- .../mit_supercloud/create_trace.py | 157 ++++++++++++------ raps/telemetry.py | 4 +- raps/utils.py | 3 + 4 files changed, 114 insertions(+), 52 deletions(-) diff --git a/config/mit_supercloud/scheduler.json b/config/mit_supercloud/scheduler.json index 0ea905d..3f081cc 100644 --- a/config/mit_supercloud/scheduler.json +++ b/config/mit_supercloud/scheduler.json @@ -3,7 +3,7 @@ "JOB_ARRIVAL_TIME": 900, "MTBF": 11, "MAX_TIME": 88200, - "TRACE_QUANTA": 20, + "TRACE_QUANTA": 10, "MIN_WALL_TIME": 3600, "MAX_WALL_TIME": 43200, "UI_UPDATE_FREQ": 900, diff --git a/raps/dataloaders/mit_supercloud/create_trace.py b/raps/dataloaders/mit_supercloud/create_trace.py index a96ce27..98effd6 100644 --- a/raps/dataloaders/mit_supercloud/create_trace.py +++ b/raps/dataloaders/mit_supercloud/create_trace.py @@ -9,24 +9,28 @@ Created on Fri Sep 20 10:14:23 2024 # Given a start and end date identify those jobs that occur in this range and then download them # from S3 into data/trace as a pcikle file (all traces will be in the same file) -import boto3 -from botocore import UNSIGNED -from botocore.client import Config +# Standard library +import argparse +import gzip +import math import os -import pandas as pd -import numpy as np -from io import StringIO import pickle -from datetime import datetime import shutil -import gzip -from scipy.sparse import csr_matrix as csr -import matplotlib.pyplot as plt -import argparse -from tqdm import tqdm import sys +from datetime import datetime +from io import StringIO from types import SimpleNamespace +# Third-party +import boto3 +import matplotlib.pyplot as plt +import numpy as np +import pandas as pd +from botocore import UNSIGNED +from botocore.client import Config +from scipy.sparse import csr_matrix as csr +from tqdm import tqdm + # Add the raps project root to the Python path sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..', '..', '..'))) @@ -155,11 +159,11 @@ def main(local_dataset_path, start_date, end_date): cnt = 0 data_dict = {} for s in dfiles: - if cnt%100==0: + if cnt%100 == 0: print('processing file ' + str(cnt) + ' of ' + str(L)) - cnt = cnt+1 + cnt += 1 fyle = os.path.join(mit_dir +'/data/trace/', s.split('/')[-1]) - dfi = pd.read_csv(fyle) + dfi = pd.read_csv(fyle, dtype={0: str}) jobid = int(s.split('-')[0]) if jobid not in data_dict.keys(): @@ -186,7 +190,6 @@ def main(local_dataset_path, start_date, end_date): node = s.split('-')[2].split('.csv')[0] cpu_df = data_dict[jobid]['cpu'] - if 'gpu' not in data_dict[jobid].keys(): data_dict[jobid]['gpu'] = {} data_dict[jobid]['gpu_cnt']=0 @@ -209,32 +212,84 @@ def main(local_dataset_path, start_date, end_date): pass # crashhere was here, but we'll let it continue for now data_dict[jobid]['gpu'] = df_merged data_dict[jobid]['gpu_cnt']= gpu_cnt + # Create a list of job dictionaries jobs_list = [] + min_overall_utime = float('inf') + max_overall_utime = 0 + + print("determining start time...") + for jobid, data in data_dict.items(): - cpu_trace = data.get('cpu', {}).get('cpu_utilisation', []) + # Determine job-specific start and end times from CPU trace + job_start_time = data.get('cpu', {}).get('utime', pd.Series()).min() + job_end_time = data.get('cpu', {}).get('utime', pd.Series()).max() + + # Update overall min/max utime + if not pd.isna(job_start_time) and job_start_time < min_overall_utime: + min_overall_utime = job_start_time + if not pd.isna(job_end_time) and job_end_time > max_overall_utime: + max_overall_utime = job_end_time + + print("min_overall_utime:", min_overall_utime) + print("max_overall_utime:", max_overall_utime) + total_sim_time = max_overall_utime - min_overall_utime + print("total_sim_time:", total_sim_time) + + for jobid, data in data_dict.items(): + cpu_trace_data = data.get('cpu', {}).get('cpu_utilisation', []) + if isinstance(cpu_trace_data, pd.Series): + cpu_trace = cpu_trace_data.tolist() + elif isinstance(cpu_trace_data, np.ndarray): + cpu_trace = cpu_trace_data.tolist() + else: + cpu_trace = cpu_trace_data + + trace_time = len(cpu_trace) * 10. # seconds + gpu_trace = data.get('gpu', {}) if not isinstance(gpu_trace, pd.DataFrame) or gpu_trace.empty: - gpu_trace_list = [] + #gpu_trace_list = [] + gpu_trace_list = 0 else: # Assuming gpu_trace is a DataFrame that needs to be converted to a list of lists or similar gpu_trace_list = gpu_trace.values.tolist() + # Determine job-specific start and end times from CPU trace + job_start_time = data.get('cpu', {}).get('utime', pd.Series()).min() - min_overall_utime + job_end_time = data.get('cpu', {}).get('utime', pd.Series()).max() - min_overall_utime + + # Calculate wall_time, ensuring it's not negative + wall_time = max(0, job_end_time - job_start_time) + + # Infer nodes_required based on max cpu_trace and CPUS_PER_NODE (assuming 2 CPUs per node) + if cpu_trace and max(cpu_trace) > 0: + nodes_required = math.ceil(max(cpu_trace) / 2.0) + else: + nodes_required = 1 + + # If nodes_required > 1, divide cpu_trace by nodes_required to get per-node utilization + if nodes_required > 1 and cpu_trace: + cpu_trace = [x / nodes_required for x in cpu_trace] + job = job_dict( - nodes_required=data.get('n_nodes', 1), + nodes_required=nodes_required, name=data.get('name_job', 'unknown'), account=data.get('name_account', 'unknown'), - cpu_trace=cpu_trace.tolist() if isinstance(cpu_trace, np.ndarray) else cpu_trace, + cpu_trace=cpu_trace, gpu_trace=gpu_trace_list, ntx_trace=[], nrx_trace=[], end_state=data.get('state_end', 'UNKNOWN'), id=jobid, - submit_time=data.get('time_submit', 0), + submit_time=job_start_time, time_limit=data.get('time_limit', 0), - start_time=data.get('time_start', 0), - end_time=data.get('time_end', 0), - wall_time=data.get('time_end', 0) - data.get('time_start', 0) + start_time=job_start_time, + end_time=job_end_time, + wall_time=wall_time, + trace_time=trace_time, + trace_start_time=0, + trace_end_time=trace_time ) jobs_list.append(job) @@ -252,13 +307,14 @@ def main(local_dataset_path, start_date, end_date): #np.savez(fyle_path, jobs=np.array(jobs_list)) # Also include start_timestep, end_timestep, and a placeholder for args np.savez(fyle_path, jobs=np.array(jobs_list), \ - start_timestep=st_date, end_timestep=en_date, \ - args=SimpleNamespace(fastforward=None, system='mit_supercloud', time=en_date)) + start_timestep=0, end_timestep=total_sim_time, \ + args=SimpleNamespace(fastforward=None, system='mit_supercloud', time=total_sim_time)) print(f"Saved {len(jobs_list)} jobs to {fyle_path}") return + def proc_gpu_series(cpu_df,dfi,gpu_cnt): # Process GPU series by interpolating it to the same times as the cpu series. @@ -284,7 +340,6 @@ def proc_gpu_series(cpu_df,dfi,gpu_cnt): ugpus = dfi.gpu_index.unique() gpu_df= pd.DataFrame({'utime': cpu_df['utime'].values}) - for u in ugpus: dfg = dfi[dfi.gpu_index==u].copy() @@ -293,8 +348,6 @@ def proc_gpu_series(cpu_df,dfi,gpu_cnt): 'utilization_memory_pct', 'memory_free_MiB', 'memory_used_MiB', 'temperature_gpu', 'temperature_memory', 'power_draw_W'] - - for ff in fylds: x1 = dfg['t_fixed'].values y1 = dfg[ff].values @@ -313,7 +366,8 @@ def proc_gpu_series(cpu_df,dfi,gpu_cnt): gpu_df.rename(columns=ren, inplace=True) gpu_cnt = gpu_cnt + 1 - return gpu_df,gpu_cnt + return gpu_df, gpu_cnt + def proc_cpu_series(dfi): # This is the code that processes cpu data and performs the following steps: @@ -363,20 +417,26 @@ def proc_cpu_series(dfi): dfi['sid']= sid - - # 2. Outliers. - sift = (dfi.CPUUtilization > 500) & (dfi.CPUUtilization < 600) - # Clip these back to 500 - if sift.sum()>0: - #asd - print('clipping ' + str(sift.sum()) + ' values' ) - dfi.loc[sift, 'CPUUtilization'] = 500 - - # select rows with >600 as outliers. - sift = dfi.CPUUtilization > 600 - if sum(sift)>0: - # Set to the nearest value less than 600. - dfi.loc[sift, 'CPUUtilization'] = dfi['CPUUtilization'].where(~sift).ffill().combine_first(dfi['CPUUtilization']).where(dfi['CPUUtilization'] <= 600) + # 2. Outliers and Normalization. + # Convert to percentage + dfi['CPUUtilization'] = dfi['CPUUtilization'] / 100.0 + # Fill NaN values with 0 + dfi['CPUUtilization'] = dfi['CPUUtilization'].fillna(0) + #print(f"Max CPUUtilization after normalization: {dfi['CPUUtilization'].max()}") + + ## 2. Outliers. + #sift = (dfi.CPUUtilization > 500) & (dfi.CPUUtilization < 600) + # # Clip these back to 500 + #if sift.sum()>0: + # #asd + # print('clipping ' + str(sift.sum()) + ' values' ) + # dfi.loc[sift, 'CPUUtilization'] = 500 + # + ## select rows with >600 as outliers. + #sift = dfi.CPUUtilization > 600 + #if sum(sift)>0: + # # Set to the nearest value less than 600. + # dfi.loc[sift, 'CPUUtilization'] = dfi['CPUUtilization'].where(~sift).ffill().combine_first(dfi['CPUUtilization']).where(dfi['CPUUtilization'] <= 600) # 3. There are multiple series so we want to get the maximum (as only one series at a time is active) useries = dfi.Series.unique() @@ -389,7 +449,7 @@ def proc_cpu_series(dfi): Xreadmb = np.zeros((len(useries),inds.shape[0])) Xwritemb = np.zeros((len(useries),inds.shape[0])) - cnt=0 + cnt = 0 for i in useries: sift = dfi.Series == i M = len(inds) @@ -424,8 +484,7 @@ def proc_cpu_series(dfi): df['writemb_' + str(i)] = mm Xwritemb[cnt,:] = mm - - cnt=cnt+1 + cnt += 1 df['cpu_utilisation'] = Xm.mean(axis=0) df['rss'] = Xrss.sum(axis=0) @@ -433,11 +492,12 @@ def proc_cpu_series(dfi): df['readmb'] = Xreadmb.sum(axis=0) df['writemb'] = Xwritemb.sum(axis=0) - df['timestamp'] = start_time + pd.to_timedelta(df.t * 10, unit='s') df['utime'] = df['timestamp'].astype('int64') // 10**9 + return df + if __name__ == "__main__": parser = argparse.ArgumentParser(description="Process MIT Supercloud data to create job traces.") parser.add_argument("local_dataset_path", type=str, @@ -449,4 +509,3 @@ if __name__ == "__main__": args = parser.parse_args() main(args.local_dataset_path, args.start_date, args.end_date) - diff --git a/raps/telemetry.py b/raps/telemetry.py index 97c76a4..aca32d5 100644 --- a/raps/telemetry.py +++ b/raps/telemetry.py @@ -97,13 +97,13 @@ if __name__ == "__main__": if args.replay[0].endswith(".npz"): print(f"Loading {args.replay[0]}...") - jobs,_,_ = td.load_snapshot(args.replay[0]) + jobs, _, _, _ = td.load_snapshot(args.replay[0]) if args.arrival == "poisson": for job in tqdm(jobs, desc="Updating requested_nodes"): job['requested_nodes'] = None job['submit_time'] = next_arrival(1 / config['JOB_ARRIVAL_TIME']) else: - jobs,_,_ = td.load_data(args.replay) + jobs, _, _, _ = td.load_data(args.replay) timesteps = int(max(job['wall_time'] + job['submit_time'] for job in jobs)) diff --git a/raps/utils.py b/raps/utils.py index 26f8856..4cecad6 100644 --- a/raps/utils.py +++ b/raps/utils.py @@ -33,6 +33,9 @@ def max_value(values): def convert_seconds(seconds): """Convert seconds to time format: 3661s -> 01:01""" + # if it's a NumPy scalar, extract the Python value + if hasattr(seconds, "item"): + seconds = seconds.item() td = timedelta(seconds=seconds) h, m, _ = str(td).split(':') return f"{h}:{m}" -- GitLab From dbcb713691da36bda84b3ef9230652ef6afbae27 Mon Sep 17 00:00:00 2001 From: Wes Brewer Date: Tue, 1 Jul 2025 23:00:35 -0400 Subject: [PATCH 140/388] Refactor the create_trace.py script and add checks in generate_local_metadata.py to avoid overwriting files --- raps/dataloaders/mit_supercloud/README.md | 7 +- .../mit_supercloud/create_trace.py | 613 +++++------------- .../mit_supercloud/generate_local_metadata.py | 16 +- 3 files changed, 193 insertions(+), 443 deletions(-) diff --git a/raps/dataloaders/mit_supercloud/README.md b/raps/dataloaders/mit_supercloud/README.md index 2813ef1..d5a1d09 100644 --- a/raps/dataloaders/mit_supercloud/README.md +++ b/raps/dataloaders/mit_supercloud/README.md @@ -1,11 +1,12 @@ -to generate subset of data needed for Damien's reader from full installation of MIT Supercloud dataset: +To generate file indices needed for Damien's reader from full installation of MIT Supercloud dataset: +This will generate: `file_list.csv` and `job_user_date_full.csv` python generate_local_metadata.py /lustre/orion/proj-shared/gen150/exadigit/mit_supercloud/datacenter-challenge/202201 -to create the npz file that RAPS can use: +To create the npz file that RAPS can use: python create_trace.py /lustre/orion/proj-shared/gen150/exadigit/mit_supercloud/datacenter-challenge/202201 -then to run: +Then to run: python main.py -f raps/dataloaders/mit_supercloud/data/mit_supercloud_jobs_21_05_2021__22_05_2021.npz --system mit_supercloud diff --git a/raps/dataloaders/mit_supercloud/create_trace.py b/raps/dataloaders/mit_supercloud/create_trace.py index 98effd6..27714ba 100644 --- a/raps/dataloaders/mit_supercloud/create_trace.py +++ b/raps/dataloaders/mit_supercloud/create_trace.py @@ -1,33 +1,20 @@ #!/usr/bin/env python3 # -*- coding: utf-8 -*- """ -Created on Fri Sep 20 10:14:23 2024 +Simplified and PEP-8 compliant refactor of the original script. -@author: daf +Original script created on Fri Sep 20 10:14:23 2024 by Damien Fay (HPE) """ -# Given a start and end date identify those jobs that occur in this range and then download them -# from S3 into data/trace as a pcikle file (all traces will be in the same file) - -# Standard library import argparse -import gzip -import math import os -import pickle import shutil import sys from datetime import datetime -from io import StringIO from types import SimpleNamespace -# Third-party -import boto3 -import matplotlib.pyplot as plt import numpy as np import pandas as pd -from botocore import UNSIGNED -from botocore.client import Config from scipy.sparse import csr_matrix as csr from tqdm import tqdm @@ -36,239 +23,184 @@ sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..', '. from raps.job import job_dict -def main(local_dataset_path, start_date, end_date): - - # Get the directory of the current file +def proc_cpu_series(dfi): + dfi = dfi[~dfi.Step.isin([-1, -4, '-1', '-4'])].copy() + dfi['CPUUtilization'] = dfi['CPUUtilization'].fillna(0) / 100.0 + + t = pd.to_datetime(dfi.EpochTime, unit='s') + start_time = t.min() + dfi['t'] = ((t - start_time).dt.total_seconds() // 10).astype(int) + dfi['sid'] = pd.factorize(dfi.Step)[0] + + useries = dfi.Series.unique() + inds = np.arange(dfi.t.max() + 1) + df = pd.DataFrame({'t': inds}) + Xm, Xrss, Xvm, Xreadmb, Xwritemb = (np.zeros((len(useries), len(inds))) for _ in range(5)) + + for cnt, i in enumerate(useries): + sift = dfi.Series == i + M, N = len(inds), dfi.sid[sift].max() + 1 + + for metric, arr, name in zip( + ['CPUUtilization', 'RSS', 'VMSize', 'ReadMB', 'WriteMB'], + [Xm, Xrss, Xvm, Xreadmb, Xwritemb], + ['cpu', 'rss', 'vm', 'readmb', 'writemb'] + ): + X = csr((dfi.loc[sift, metric], (dfi.loc[sift, 't'], dfi.loc[sift, 'sid'])), shape=(M, N)) + mm = np.array(X.max(axis=1).todense()).reshape(-1,) + df[f'{name}_{i}'] = mm + arr[cnt, :] = mm + + df['cpu_utilisation'] = Xm.mean(axis=0) + df['rss'] = Xrss.sum(axis=0) + df['vm'] = Xvm.sum(axis=0) + df['readmb'] = Xreadmb.sum(axis=0) + df['writemb'] = Xwritemb.sum(axis=0) + df['timestamp'] = start_time + pd.to_timedelta(df.t * 10, unit='s') + df['utime'] = df['timestamp'].astype('int64') // 10**9 + + return df + +def proc_gpu_series(cpu_df, dfi, gpu_cnt): + t_cpu = np.array([cpu_df.utime.min(), cpu_df.utime.max()]) + t_gpu = np.array([dfi.timestamp.astype(int).min(), dfi.timestamp.astype(int).max()]) + + t_cpu_range = t_cpu[1] - t_cpu[0] + t_gpu_range = t_gpu[1] - t_gpu[0] + per_diff = (t_cpu_range - t_gpu_range) / t_gpu_range * 100 + + if abs(per_diff) > 10: + raise ValueError("Time mismatch between CPU and GPU series exceeds 10%") + + dfi['t_fixed'] = dfi.timestamp - dfi.timestamp.min() + t_cpu[0] + ugpus = dfi.gpu_index.unique() + gpu_df = pd.DataFrame({'utime': cpu_df['utime'].values}) + + for u in ugpus: + dfg = dfi[dfi.gpu_index == u].copy() + fields = ['gpu_index', 'utilization_gpu_pct', 'utilization_memory_pct', 'memory_free_MiB', + 'memory_used_MiB', 'temperature_gpu', 'temperature_memory', 'power_draw_W'] + + for field in fields: + x1, y1 = dfg['t_fixed'].values, dfg[field].values + xv = cpu_df['utime'].values + yv = np.interp(xv, x1, y1) + gpu_df[field] = yv + + rename = { + 'utilization_gpu_pct': f'gpu_{gpu_cnt}', + 'utilization_memory_pct': f'gpu_mem_{gpu_cnt}', + 'temperature_gpu': f'gpu_temp_{gpu_cnt}', + 'power_draw_W': f'gpu_p_{gpu_cnt}' + } + gpu_df.rename(columns=rename, inplace=True) + gpu_cnt += 1 + + return gpu_df, gpu_cnt + +def main(local_dataset_path, start_date_str, end_date_str): mit_dir = os.path.dirname(os.path.abspath(__file__)) - src_data_dir = mit_dir + '/source_data' - ################ Select correct files. - ################ CHANGE THESE 2 LINES ######################### - start_date = '21052021' # EU format day/month/year - end_date = '22052021' - - - - # Load the file list and job index from local source_data - file_list_path = os.path.join(mit_dir, 'source_data', 'file_list.csv') - file_df = pd.read_csv(file_list_path, sep='\t') - gpu_file_df = file_df[file_df['File Name'].str.contains('/gpu/')].copy() - gpu_file_df['jobid'] = gpu_file_df['File Name'].str.extract(r'/([^/]+?)-') - gpu_file_df['jobid'] = gpu_file_df['jobid'].astype(int) - - job_index_path = os.path.join(mit_dir, 'source_data', 'job_user_date_full.csv') - job_index_df = pd.read_csv(job_index_path) - - date_obj = datetime.fromtimestamp(job_index_df.start.min()) - date_min_str = date_obj.strftime('%d-%m-%Y') - date_obj = datetime.fromtimestamp(job_index_df.start.max()) - date_max_str = date_obj.strftime('%d-%m-%Y') - print('Data set contains data between: ' +date_min_str + ' and ' + date_max_str ) - - # Create and clear the trace directory. - tracedir = mit_dir + '/data/trace/' - if os.path.exists(tracedir): - pass # do nothing - might want to change this later - # shutil.rmtree(tracedir) # Remove everything in the folder - # os.makedirs(tracedir) # Recreate the folder after clearing it - else: - os.makedirs(tracedir) - - - st_date = datetime.strptime(start_date, '%d%m%Y') - st_date = int(st_date.timestamp()) - en_date = datetime.strptime(end_date, '%d%m%Y') - en_date = int(en_date.timestamp()) - - if st_date < job_index_df.start.min(): - print('Warning: start date (' + start_date + ') is before the start of the dataset (' + date_min_str + ') ') - if st_date > job_index_df.start.max(): - print('Error: start date (' + start_date + ') is after the end of the dataset (' + date_max_str + ') ') - crashhere - - # find the jobs that start between start and end dates. - sift = (job_index_df.start > st_date) & (job_index_df.start < en_date) - print('You have selected ' + str(sift.sum()) + ' fiies to download ') - - ##################### Copy from local dataset to trace directory - - df = job_index_df[sift].copy() - # The 'filename' column in job_index_df already contains relative paths like 'datacenter-challenge/202201/cpu/0026/jobid-summary.csv' - # We need to convert these to timeseries paths and then to absolute local paths. - - # Get all unique files that need to be copied (CPU and GPU timeseries) - files_to_copy = [] - - # Add CPU timeseries files - for _, row in df.iterrows(): - relative_summary_path = row['filename'] - # Convert summary path to timeseries path - relative_timeseries_path = relative_summary_path.replace('-summary', '-timeseries') - files_to_copy.append(relative_timeseries_path) - - # Add GPU timeseries files - for _, row in gpu_file_df[gpu_file_df.jobid.isin(df.job_id)].iterrows(): - files_to_copy.append(row['File Name']) # 'File Name' in gpu_file_df is already the relative path to the timeseries file - - # Remove duplicates and ensure unique files - files_to_copy = list(set(files_to_copy)) + tracedir = os.path.join(mit_dir, 'data', 'trace') + os.makedirs(tracedir, exist_ok=True) + + start_ts = int(datetime.strptime(start_date_str, '%d%m%Y').timestamp()) + end_ts = int(datetime.strptime(end_date_str, '%d%m%Y').timestamp()) - print(f"Selected {len(files_to_copy)} trace files to process.") + file_df = pd.read_csv(os.path.join(mit_dir, 'source_data', 'file_list.csv'), sep='\t') + gpu_df = file_df[file_df['File Name'].str.contains('/gpu/')].copy() + gpu_df['jobid'] = gpu_df['File Name'].str.extract(r'/([^/]+?)-').astype(int) - # Copy files to tracedir - for relative_path in tqdm(files_to_copy, desc="Copying trace files to processing directory"): - src_file_path = os.path.join(local_dataset_path, relative_path) - dest_file_name = os.path.basename(relative_path) - dest_file_path = os.path.join(tracedir, dest_file_name) + job_df = pd.read_csv(os.path.join(mit_dir, 'source_data', 'job_user_date_full.csv')) + selected_df = job_df[(job_df.start > start_ts) & (job_df.start < end_ts)].copy() - if not os.path.exists(src_file_path): - print(f"Warning: Source file not found: {src_file_path}. Skipping.") + files_to_copy = [row['filename'].replace('-summary', '-timeseries') for _, row in selected_df.iterrows()] + files_to_copy += gpu_df[gpu_df.jobid.isin(selected_df.job_id)]['File Name'].tolist() + files_to_copy = list(set(files_to_copy)) + + for rel_path in tqdm(files_to_copy, desc="Copying trace files"): + src = os.path.join(local_dataset_path, rel_path) + dst = os.path.join(tracedir, os.path.basename(rel_path)) + if not os.path.exists(src): + print(f"Missing: {src}") continue - - if os.path.exists(dest_file_path): - # Check if source and dest are the same size to avoid unnecessary copy - if os.path.getsize(src_file_path) == os.path.getsize(dest_file_path): - continue # File already copied and is the same, skip - else: - # If sizes differ, re-copy - shutil.copy2(src_file_path, dest_file_path) - else: - shutil.copy2(src_file_path, dest_file_path) - - ##################### Process. - - # Load the slurm log to grab additional attributes from the local dataset. - # Search for slurm-log.csv anywhere within the local dataset root - slurm_log_path = None + if os.path.exists(dst) and os.path.getsize(src) == os.path.getsize(dst): + continue + shutil.copy2(src, dst) + + slurm_log = None for root, _, files in os.walk(local_dataset_path): if 'slurm-log.csv' in files: - slurm_log_path = os.path.join(root, 'slurm-log.csv') + slurm_log = os.path.join(root, 'slurm-log.csv') break - - if slurm_log_path is None: - print(f"Error: slurm-log.csv not found in {local_dataset_path}. Cannot proceed.") + if not slurm_log: + print(f"slurm-log.csv not found in {local_dataset_path}.") return - slurm_df = pd.read_csv(slurm_log_path) - - dfiles_raw = os.listdir(tracedir) - # Sort so we process the cpu files first (we need the result for the gpu files) - dfiles = sorted(dfiles_raw, key=lambda x: 'timeseries' not in x) - dfiles = [file for file in dfiles if 'lock' not in file] - - print('Downloaded ' + str(len(dfiles)) + ' files. Processing ... ') - L = len(dfiles) - cnt = 0 + + slurm_df = pd.read_csv(slurm_log) + traced_files = sorted(f for f in os.listdir(tracedir) if 'lock' not in f) + print(f"Processing {len(traced_files)} trace files.") + data_dict = {} - for s in dfiles: - if cnt%100 == 0: - print('processing file ' + str(cnt) + ' of ' + str(L)) - cnt += 1 - fyle = os.path.join(mit_dir +'/data/trace/', s.split('/')[-1]) - dfi = pd.read_csv(fyle, dtype={0: str}) - + for idx, s in enumerate(traced_files): + if idx % 100 == 0: + print(f"processing file {idx} of {len(traced_files)}") + fpath = os.path.join(tracedir, s) + dfi = pd.read_csv(fpath, dtype={0: str}) jobid = int(s.split('-')[0]) - if jobid not in data_dict.keys(): + + if jobid not in data_dict: data_dict[jobid] = {} - # Add slurm data on creation - idx = np.where(slurm_df['id_job']==jobid)[0] - if idx.shape[0]!=1: - crashhere - else: - data_dict[jobid] = slurm_df.iloc[idx[0]].to_dict() - if ('timeseries' in s) and ('lock' not in s): - if 'cpu' in data_dict[jobid].keys(): - print('error a job cant have more than one cpu traces') - crashhere - else: - cpu_ser = proc_cpu_series(dfi) - data_dict[jobid]['cpu'] = cpu_ser - - elif 'gpu_index' in dfi.keys(): - mm = dfi.utilization_gpu_pct.max() - print('GPU max: ' + str(mm) ) - # Get the gpu node and rack + slurm_idx = np.where(slurm_df['id_job'] == jobid)[0] + if slurm_idx.shape[0] != 1: + continue + data_dict[jobid] = slurm_df.iloc[slurm_idx[0]].to_dict() + + if 'timeseries' in s: + if 'cpu' in data_dict[jobid]: + continue + cpu_ser = proc_cpu_series(dfi) + data_dict[jobid]['cpu'] = cpu_ser + + elif 'gpu_index' in dfi.columns: rack = s.split('-')[1] node = s.split('-')[2].split('.csv')[0] - cpu_df = data_dict[jobid]['cpu'] - - if 'gpu' not in data_dict[jobid].keys(): - data_dict[jobid]['gpu'] = {} - data_dict[jobid]['gpu_cnt']=0 - data_dict[jobid]['grack']=[rack] - data_dict[jobid]['gnode']=[node] - gpu_cnt = data_dict[jobid]['gpu_cnt'] - gpu_ser,gpu_cnt = proc_gpu_series(cpu_df,dfi,gpu_cnt) + cpu_df = data_dict[jobid].get('cpu') + if cpu_df is None: + continue + + gpu_cnt = data_dict[jobid].get('gpu_cnt', 0) + gpu_df = data_dict[jobid].get('gpu') + gpu_ser, gpu_cnt = proc_gpu_series(cpu_df, dfi, gpu_cnt) + + if gpu_df is None: data_dict[jobid]['gpu'] = gpu_ser - else: - data_dict[jobid]['grack'].append(rack) + data_dict[jobid]['grack'] = [rack] + data_dict[jobid]['gnode'] = [node] + else: + data_dict[jobid]['gpu'] = pd.merge(gpu_df, gpu_ser, on='utime') + data_dict[jobid]['grack'].append(rack) data_dict[jobid]['gnode'].append(node) - gpu_df = data_dict[jobid]['gpu'] - gpu_cnt = data_dict[jobid]['gpu_cnt'] - gpu_ser,gpu_cnt = proc_gpu_series(cpu_df,dfi,gpu_cnt) - # combine with the existing df - df_merged = pd.merge(gpu_df, gpu_ser, on='utime') - if df_merged.shape[0] != gpu_df.shape[0]: - # This indicates a mismatch in time series, which should be investigated if it occurs - # For now, we'll assume it's an error and can be handled by a more robust check if needed. - pass # crashhere was here, but we'll let it continue for now - data_dict[jobid]['gpu'] = df_merged - data_dict[jobid]['gpu_cnt']= gpu_cnt - - # Create a list of job dictionaries - jobs_list = [] - min_overall_utime = float('inf') - max_overall_utime = 0 + + data_dict[jobid]['gpu_cnt'] = gpu_cnt print("determining start time...") + min_utime = min(data['cpu']['utime'].min() for data in data_dict.values() if 'cpu' in data) + max_utime = max(data['cpu']['utime'].max() for data in data_dict.values() if 'cpu' in data) + total_sim_time = max_utime - min_utime + jobs_list = [] for jobid, data in data_dict.items(): - # Determine job-specific start and end times from CPU trace - job_start_time = data.get('cpu', {}).get('utime', pd.Series()).min() - job_end_time = data.get('cpu', {}).get('utime', pd.Series()).max() + cpu_trace = data.get('cpu', {}).get('cpu_utilisation', []) + if isinstance(cpu_trace, pd.Series): + cpu_trace = cpu_trace.tolist() - # Update overall min/max utime - if not pd.isna(job_start_time) and job_start_time < min_overall_utime: - min_overall_utime = job_start_time - if not pd.isna(job_end_time) and job_end_time > max_overall_utime: - max_overall_utime = job_end_time + gpu_trace = data.get('gpu') + gpu_trace_list = gpu_trace.values.tolist() if isinstance(gpu_trace, pd.DataFrame) else 0 - print("min_overall_utime:", min_overall_utime) - print("max_overall_utime:", max_overall_utime) - total_sim_time = max_overall_utime - min_overall_utime - print("total_sim_time:", total_sim_time) - - for jobid, data in data_dict.items(): - cpu_trace_data = data.get('cpu', {}).get('cpu_utilisation', []) - if isinstance(cpu_trace_data, pd.Series): - cpu_trace = cpu_trace_data.tolist() - elif isinstance(cpu_trace_data, np.ndarray): - cpu_trace = cpu_trace_data.tolist() - else: - cpu_trace = cpu_trace_data - - trace_time = len(cpu_trace) * 10. # seconds - - gpu_trace = data.get('gpu', {}) - if not isinstance(gpu_trace, pd.DataFrame) or gpu_trace.empty: - #gpu_trace_list = [] - gpu_trace_list = 0 - else: - # Assuming gpu_trace is a DataFrame that needs to be converted to a list of lists or similar - gpu_trace_list = gpu_trace.values.tolist() - - # Determine job-specific start and end times from CPU trace - job_start_time = data.get('cpu', {}).get('utime', pd.Series()).min() - min_overall_utime - job_end_time = data.get('cpu', {}).get('utime', pd.Series()).max() - min_overall_utime - - # Calculate wall_time, ensuring it's not negative + job_start_time = data['cpu']['utime'].min() - min_utime + job_end_time = data['cpu']['utime'].max() - min_utime wall_time = max(0, job_end_time - job_start_time) - - # Infer nodes_required based on max cpu_trace and CPUS_PER_NODE (assuming 2 CPUs per node) - if cpu_trace and max(cpu_trace) > 0: - nodes_required = math.ceil(max(cpu_trace) / 2.0) - else: - nodes_required = 1 - - # If nodes_required > 1, divide cpu_trace by nodes_required to get per-node utilization + nodes_required = max(1, int(np.ceil(max(cpu_trace) / 2.0))) if cpu_trace else 1 if nodes_required > 1 and cpu_trace: cpu_trace = [x / nodes_required for x in cpu_trace] @@ -287,225 +219,30 @@ def main(local_dataset_path, start_date, end_date): start_time=job_start_time, end_time=job_end_time, wall_time=wall_time, - trace_time=trace_time, + trace_time=len(cpu_trace) * 10.0, trace_start_time=0, - trace_end_time=trace_time + trace_end_time=len(cpu_trace) * 10.0 ) jobs_list.append(job) - # Save the list of jobs to an npz file - npz_dir = os.path.join(mit_dir, 'data') - os.makedirs(npz_dir, exist_ok=True) - t1 = datetime.fromtimestamp(st_date) - tf1 = t1.strftime('%d_%m_%Y') - t2 = datetime.fromtimestamp(en_date) - tf2 = t2.strftime('%d_%m_%Y') - fyle_name = f'mit_supercloud_jobs_{tf1}__{tf2}.npz' - fyle_path = os.path.join(npz_dir, fyle_name) - - # Convert list of dictionaries to a structured array for saving - #np.savez(fyle_path, jobs=np.array(jobs_list)) - # Also include start_timestep, end_timestep, and a placeholder for args - np.savez(fyle_path, jobs=np.array(jobs_list), \ - start_timestep=0, end_timestep=total_sim_time, \ - args=SimpleNamespace(fastforward=None, system='mit_supercloud', time=total_sim_time)) - - print(f"Saved {len(jobs_list)} jobs to {fyle_path}") - - return - - -def proc_gpu_series(cpu_df,dfi,gpu_cnt): - # Process GPU series by interpolating it to the same times as the cpu series. - - # time checks - t_cpu = np.array([cpu_df.utime.min(), cpu_df.utime.max() , 0]) - t_cpu[2]=t_cpu[1]-t_cpu[0] - t_gpu = np.array([dfi.timestamp.astype(int).min(), dfi.timestamp.astype(int).max(),0]) - t_gpu[2]=t_gpu[1]-t_gpu[0] - - dcpu = pd.to_datetime(t_cpu, unit='s') - dgpu = pd.to_datetime(t_gpu, unit='s') - t1 = (dcpu[1]-dcpu[0]).total_seconds() - t2 = (dgpu[1]-dgpu[0]).total_seconds() - per_dif = (t1-t2)/t2*100 - print(per_dif) - if abs(per_dif) > 10: - # More than 2% difference in the time taken, halt and look at it - crashhere - - # So move the GPU time to the CPU times. - dfi['t_fixed'] = dfi.timestamp-dfi.timestamp.min()+t_cpu[0] - - ugpus = dfi.gpu_index.unique() - gpu_df= pd.DataFrame({'utime': cpu_df['utime'].values}) - - for u in ugpus: - dfg = dfi[dfi.gpu_index==u].copy() - - # Perform an interpolation - fylds = ['gpu_index', 'utilization_gpu_pct', - 'utilization_memory_pct', 'memory_free_MiB', 'memory_used_MiB', - 'temperature_gpu', 'temperature_memory', 'power_draw_W'] - - for ff in fylds: - x1 = dfg['t_fixed'].values - y1 = dfg[ff].values - xv = cpu_df['utime'].values - - # Interpolate using NumPy - yv = np.interp(xv, x1, y1) - - gpu_df[ff] = yv - ss = str(gpu_cnt) - ren = {'utilization_gpu_pct': 'gpu_' + ss, - 'utilization_memory_pct': 'gpu_mem_' + ss, - 'temperature_gpu': 'gpu_temp_' + ss, - 'power_draw_W':'gpu_p_'+ ss, - } - gpu_df.rename(columns=ren, inplace=True) - gpu_cnt = gpu_cnt + 1 - - return gpu_df, gpu_cnt - - -def proc_cpu_series(dfi): - # This is the code that processes cpu data and performs the following steps: - # 1. Remove information from step [-1,-4] as these are empty. - # 2. give outliers their nearest neighbour values. There are spikes of outliers in the utilsation, I think thw whole row is rotten too. They are values like 40000 - # 3. For each series get the max cpu utilisation at each time step. - # Save these for the output. - # 4. Get the average cpu utilsation per series (maxed from step 3) - - # 1 Remove information from step [-1,-4] as these are empty. - sift = dfi.Step.isin([-1,-4,'-1','-4']) - if dfi.CPUUtilization[sift].sum() >0: - print('found a series that breaks the rule, check it') - # The -1 -4 indicators should be for non-events. IF the cpu utilisation has values something is up, might be a spike or something but the rule needs to be changed. - crashhere - # remove - dfi = dfi[~sift].copy() - - # Check for 1-1 series node correspondences and if not then there is an issue we need to clean up. - if False: - unode_series = dfi.groupby(['Node', 'Series']).size().reset_index(name='count') - unode = dfi.Node.unique() - for n in unode: - sift = dfi.Node == n - splits = dfi[sift].groupby('Series').size().reset_index(name='count') - splits = splits.sort_values(by='count', ascending=False) - for i in range(splits.shape[0]): - # Reassign the Series number back to the max for the node. - if i==0: - dest_ser = splits.iloc[i].Series - else: - # reassign the targets. - faulty_ser = splits.iloc[i].Series - sift_reas = sift & (dfi.Series ==faulty_ser ) - dfi.loc[sift,'Series'] = dest_ser - #if sift_reas.sum()>40: - # asd - print('Reassigning ' + str(sift_reas.sum()) + ' rows with faulty series values (from a total of ' + str(splits['count'][0])+ ' )') - t = pd.to_datetime(dfi.EpochTime, unit='s') - start_time = t.min() - steps = (t - start_time).dt.total_seconds() // 10 - # Convert to integer type if needed - steps = steps.astype(int) - dfi['t']= steps - - sid, uniques = pd.factorize(dfi.Step) - dfi['sid']= sid - - - # 2. Outliers and Normalization. - # Convert to percentage - dfi['CPUUtilization'] = dfi['CPUUtilization'] / 100.0 - # Fill NaN values with 0 - dfi['CPUUtilization'] = dfi['CPUUtilization'].fillna(0) - #print(f"Max CPUUtilization after normalization: {dfi['CPUUtilization'].max()}") - - ## 2. Outliers. - #sift = (dfi.CPUUtilization > 500) & (dfi.CPUUtilization < 600) - # # Clip these back to 500 - #if sift.sum()>0: - # #asd - # print('clipping ' + str(sift.sum()) + ' values' ) - # dfi.loc[sift, 'CPUUtilization'] = 500 - # - ## select rows with >600 as outliers. - #sift = dfi.CPUUtilization > 600 - #if sum(sift)>0: - # # Set to the nearest value less than 600. - # dfi.loc[sift, 'CPUUtilization'] = dfi['CPUUtilization'].where(~sift).ffill().combine_first(dfi['CPUUtilization']).where(dfi['CPUUtilization'] <= 600) - - # 3. There are multiple series so we want to get the maximum (as only one series at a time is active) - useries = dfi.Series.unique() - inds = np.arange(dfi.t.max()+1) - # Create a data frame to hold the results. - df = pd.DataFrame({'t':inds}) - Xm = np.zeros((len(useries),inds.shape[0])) - Xrss = np.zeros((len(useries),inds.shape[0])) - Xvm = np.zeros((len(useries),inds.shape[0])) - Xreadmb = np.zeros((len(useries),inds.shape[0])) - Xwritemb = np.zeros((len(useries),inds.shape[0])) - - cnt = 0 - for i in useries: - sift = dfi.Series == i - M = len(inds) - N = dfi.sid[sift].max()+1 - # create a #series x #time steps csr then max it to get the actual readings. - X = csr( (dfi.CPUUtilization[sift],(dfi.t[sift],dfi.sid[sift])),shape = (M,N) ) - mm = np.array(X.max(axis=1).todense()).reshape(-1,) - df['cpu_' + str(i)] = mm - Xm[cnt,:] = mm - - # RSS - X = csr( (dfi.RSS[sift],(dfi.t[sift],dfi.sid[sift])), shape = (M,N) ) - mm = np.array(X.max(axis=1).todense()).reshape(-1,) - df['rss_' + str(i)] = mm - Xrss[cnt,:] = mm - - # VMsize - X = csr( (dfi.VMSize[sift],(dfi.t[sift],dfi.sid[sift])), shape = (M,N) ) - mm = np.array(X.max(axis=1).todense()).reshape(-1,) - df['vm_' + str(i)] = mm - Xvm[cnt,:] = mm - - # ReadMB - X = csr( (dfi.ReadMB[sift],(dfi.t[sift],dfi.sid[sift])), shape = (M,N) ) - mm = np.array(X.max(axis=1).todense()).reshape(-1,) - df['readmb_' + str(i)] = mm - Xreadmb[cnt,:] = mm - - # WriteMB - X = csr( (dfi.WriteMB[sift],(dfi.t[sift],dfi.sid[sift])), shape = (M,N) ) - mm = np.array(X.max(axis=1).todense()).reshape(-1,) - df['writemb_' + str(i)] = mm - Xwritemb[cnt,:] = mm - - cnt += 1 - - df['cpu_utilisation'] = Xm.mean(axis=0) - df['rss'] = Xrss.sum(axis=0) - df['vm'] = Xvm.sum(axis=0) - df['readmb'] = Xreadmb.sum(axis=0) - df['writemb'] = Xwritemb.sum(axis=0) - - df['timestamp'] = start_time + pd.to_timedelta(df.t * 10, unit='s') - df['utime'] = df['timestamp'].astype('int64') // 10**9 - - return df + tf1 = datetime.fromtimestamp(start_ts).strftime('%d_%m_%Y') + tf2 = datetime.fromtimestamp(end_ts).strftime('%d_%m_%Y') + save_path = os.path.join(mit_dir, 'data', f'mit_supercloud_jobs_{tf1}__{tf2}.npz') + os.makedirs(os.path.dirname(save_path), exist_ok=True) + np.savez( + save_path, + jobs=np.array(jobs_list), + start_timestep=0, + end_timestep=total_sim_time, + args=SimpleNamespace(fastforward=None, system='mit_supercloud', time=total_sim_time) + ) + print(f"Saved {len(jobs_list)} jobs to {save_path}") if __name__ == "__main__": - parser = argparse.ArgumentParser(description="Process MIT Supercloud data to create job traces.") - parser.add_argument("local_dataset_path", type=str, - help="The root path to your locally downloaded MIT Supercloud dataset.") - parser.add_argument("--start_date", type=str, default='21052021', - help="Start date for job selection (DDMMYYYY).") - parser.add_argument("--end_date", type=str, default='22052021', - help="End date for job selection (DDMMYYYY).") + parser = argparse.ArgumentParser(description="Generate job trace data from MIT Supercloud logs.") + parser.add_argument("local_dataset_path", type=str, help="Path to the dataset root.") + parser.add_argument("--start_date", default="21052021", help="Start date in DDMMYYYY format.") + parser.add_argument("--end_date", default="22052021", help="End date in DDMMYYYY format.") args = parser.parse_args() - main(args.local_dataset_path, args.start_date, args.end_date) diff --git a/raps/dataloaders/mit_supercloud/generate_local_metadata.py b/raps/dataloaders/mit_supercloud/generate_local_metadata.py index 10c0dcd..b699934 100644 --- a/raps/dataloaders/mit_supercloud/generate_local_metadata.py +++ b/raps/dataloaders/mit_supercloud/generate_local_metadata.py @@ -1,11 +1,14 @@ +import csv import os import pandas as pd -import csv +import sys from tqdm import tqdm +source_dir = 'source_data' + def generate_local_metadata(local_dataset_root_path): mit_dir = os.path.dirname(os.path.abspath(__file__)) - source_data_dir = os.path.join(mit_dir, 'source_data') + source_data_dir = os.path.join(mit_dir, source_dir) os.makedirs(source_data_dir, exist_ok=True) print(f"Generating metadata in: {source_data_dir}") @@ -110,5 +113,14 @@ if __name__ == "__main__": parser.add_argument("local_dataset_path", type=str, help="The root path to your locally downloaded MIT Supercloud dataset.") args = parser.parse_args() + + if os.path.isdir(source_dir): + response = input(f"If you continue, files in '{source_dir}' will be overwritten.\nDo you want to continue? (y or n): ") + if response.lower() != 'y': + print("Operation cancelled.") + sys.exit(1) + + # Continue with the rest of your code here + print("Continuing with the operation...") generate_local_metadata(args.local_dataset_path) -- GitLab From 9d7b9b217282681fae0076fbd0e9ed8745c7214e Mon Sep 17 00:00:00 2001 From: Wes Brewer Date: Wed, 2 Jul 2025 00:43:01 -0400 Subject: [PATCH 141/388] Get mit_supercloud.py working as a standalone dataloader --- README.md | 4 + raps/dataloaders/mit_supercloud.py | 276 ++++++++++++++++++---- raps/dataloaders/mit_supercloud/README.md | 2 +- 3 files changed, 238 insertions(+), 44 deletions(-) diff --git a/README.md b/README.md index dc69283..36a4408 100644 --- a/README.md +++ b/README.md @@ -49,6 +49,10 @@ For Google cluster trace v2 # gcloudv2 python -m raps.telemetry --system gcloudv2 -f ~/data/gcloud/v2/google_cluster_data_2011_sample +For MIT Supercloud + + python main.py -f /path/to/mit_supercloud/datacenter-challenge --system mit_supercloud + ## Perform Network Simulation Lassen is one of the few datasets that has networking data. See `raps/dataloaders/lassen.py` for how to diff --git a/raps/dataloaders/mit_supercloud.py b/raps/dataloaders/mit_supercloud.py index 6679c29..290b154 100644 --- a/raps/dataloaders/mit_supercloud.py +++ b/raps/dataloaders/mit_supercloud.py @@ -1,52 +1,242 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +MIT Supercloud job trace processing module with load_data function. +""" -import numpy as np import os +import shutil +import sys +from datetime import datetime + +import numpy as np +import pandas as pd +from scipy.sparse import csr_matrix as csr +from tqdm import tqdm + from raps.job import job_dict -def load_data(file_path, **kwargs): - """ - Loads MIT Supercloud data from a pickled file and transforms it into a list of job dictionaries. +def proc_cpu_series(dfi): + dfi = dfi[~dfi.Step.isin([-1, -4, '-1', '-4'])].copy() + dfi['CPUUtilization'] = dfi['CPUUtilization'].fillna(0) / 100.0 + + t = pd.to_datetime(dfi.EpochTime, unit='s') + start_time = t.min() + dfi['t'] = ((t - start_time).dt.total_seconds() // 10).astype(int) + dfi['sid'] = pd.factorize(dfi.Step)[0] + + useries = dfi.Series.unique() + inds = np.arange(dfi.t.max() + 1) + df = pd.DataFrame({'t': inds}) + Xm, Xrss, Xvm, Xreadmb, Xwritemb = (np.zeros((len(useries), len(inds))) for _ in range(5)) + + for cnt, i in enumerate(useries): + sift = dfi.Series == i + M, N = len(inds), dfi.sid[sift].max() + 1 + + for metric, arr, name in zip( + ['CPUUtilization', 'RSS', 'VMSize', 'ReadMB', 'WriteMB'], + [Xm, Xrss, Xvm, Xreadmb, Xwritemb], + ['cpu', 'rss', 'vm', 'readmb', 'writemb'] + ): + X = csr((dfi.loc[sift, metric], (dfi.loc[sift, 't'], dfi.loc[sift, 'sid'])), shape=(M, N)) + mm = np.array(X.max(axis=1).todense()).reshape(-1,) + df[f'{name}_{i}'] = mm + arr[cnt, :] = mm + + df['cpu_utilisation'] = Xm.mean(axis=0) + df['rss'] = Xrss.sum(axis=0) + df['vm'] = Xvm.sum(axis=0) + df['readmb'] = Xreadmb.sum(axis=0) + df['writemb'] = Xwritemb.sum(axis=0) + df['timestamp'] = start_time + pd.to_timedelta(df.t * 10, unit='s') + df['utime'] = df['timestamp'].astype('int64') // 10**9 + + return df + +def proc_gpu_series(cpu_df, dfi, gpu_cnt): + t_cpu = np.array([cpu_df.utime.min(), cpu_df.utime.max()]) + t_gpu = np.array([dfi.timestamp.astype(int).min(), dfi.timestamp.astype(int).max()]) + + t_cpu_range = t_cpu[1] - t_cpu[0] + t_gpu_range = t_gpu[1] - t_gpu[0] + per_diff = (t_cpu_range - t_gpu_range) / t_gpu_range * 100 + + if abs(per_diff) > 10: + raise ValueError("Time mismatch between CPU and GPU series exceeds 10%") + + dfi['t_fixed'] = dfi.timestamp - dfi.timestamp.min() + t_cpu[0] + ugpus = dfi.gpu_index.unique() + gpu_df = pd.DataFrame({'utime': cpu_df['utime'].values}) + + for u in ugpus: + dfg = dfi[dfi.gpu_index == u].copy() + fields = ['gpu_index', 'utilization_gpu_pct', 'utilization_memory_pct', 'memory_free_MiB', + 'memory_used_MiB', 'temperature_gpu', 'temperature_memory', 'power_draw_W'] + + for field in fields: + x1, y1 = dfg['t_fixed'].values, dfg[field].values + xv = cpu_df['utime'].values + yv = np.interp(xv, x1, y1) + gpu_df[field] = yv + + rename = { + 'utilization_gpu_pct': f'gpu_{gpu_cnt}', + 'utilization_memory_pct': f'gpu_mem_{gpu_cnt}', + 'temperature_gpu': f'gpu_temp_{gpu_cnt}', + 'power_draw_W': f'gpu_p_{gpu_cnt}' + } + gpu_df.rename(columns=rename, inplace=True) + gpu_cnt += 1 + + return gpu_df, gpu_cnt - Args: - file_path (str): The path to the pickled data file. +def load_data(local_dataset_path, **kwargs): + """ + Load MIT Supercloud job traces. + Expects: + local_dataset_path/ + metadata/ + file_list.csv + job_user_date_full.csv + 202201/ # hard-coded for now; change as needed + cpu/...-timeseries.csv + gpu/...-timeseries.csv + slurm-log.csv Returns: - list: A list of job dictionaries. + jobs_list, min_utime, max_utime """ - with np.load(file_path, allow_pickle=True) as data: - jobs_data = data['jobs'] - - jobs = [] - for job_dict_data in jobs_data: - # Convert numpy.ndarray to list for cpu_trace and gpu_trace if they are arrays - cpu_trace = job_dict_data.item().get('cpu_trace', []) - if isinstance(cpu_trace, np.ndarray): - cpu_trace = cpu_trace.tolist() - - gpu_trace = job_dict_data.item().get('gpu_trace', []) - if isinstance(gpu_trace, np.ndarray): - gpu_trace = gpu_trace.tolist() - - job = job_dict( - id=job_dict_data.item().get('id'), - name=job_dict_data.item().get('name'), - account=job_dict_data.item().get('account'), - nodes_required=job_dict_data.item().get('nodes_required'), + # 1) Unpack list if necessary + if isinstance(local_dataset_path, list): + if len(local_dataset_path) != 1: + raise ValueError("MIT Supercloud loader accepts exactly one path") + local_dataset_path = local_dataset_path[0] + + # 2) Read metadata + meta_dir = os.path.join(local_dataset_path, "metadata") + file_list_df = pd.read_csv(os.path.join(meta_dir, "file_list.csv"), sep="\t") + job_index_df = pd.read_csv(os.path.join(meta_dir, "job_user_date_full.csv")) + + # 3) Date filtering settings + start_date_str = kwargs.get("start_date", "21052021") + end_date_str = kwargs.get("end_date", "22052021") + jid = kwargs.get("jid", "*") + + start_ts = int(datetime.strptime(start_date_str, "%d%m%Y").timestamp()) + end_ts = int(datetime.strptime(end_date_str, "%d%m%Y").timestamp()) + + # 4) Select jobs in time window + selected_df = job_index_df[ + (job_index_df.start > start_ts) & + (job_index_df.start < end_ts) + ].copy() + + # 5) Prepare GPU index metadata + gpu_df = file_list_df[file_list_df["File Name"].str.contains("/gpu/")].copy() + gpu_df["jobid"] = gpu_df["File Name"].str.extract(r"/([^/]+?)-").astype(int) + + # 6) Build list of timeseries file paths (relative) + files_to_copy = [ + row["filename"].replace("-summary", "-timeseries") + for _, row in selected_df.iterrows() + ] + files_to_copy += gpu_df[gpu_df.jobid.isin(selected_df.job_id)]["File Name"].tolist() + files_to_copy = list(set(files_to_copy)) + + # 7) Read SLURM log + data_subdir = "202201" # hard-coded folder name + slurm_log = next( + ( + os.path.join(r, "slurm-log.csv") + for r, _, fs in os.walk(os.path.join(local_dataset_path, data_subdir)) + if "slurm-log.csv" in fs + ), + None + ) + if not slurm_log: + return [], 0, 0 + slurm_df = pd.read_csv(slurm_log) + + # 8) Process each file, populating data_dict + data_dict = {} + for rel_path in tqdm(files_to_copy, desc="Processing trace files"): + fpath = os.path.join(local_dataset_path, data_subdir, rel_path) + if not os.path.exists(fpath): + print(f"Warning: missing {fpath}") + continue + + tqdm.write(f"Reading {rel_path}") + dfi = pd.read_csv(fpath, dtype={0: str}) + jobid = int(os.path.basename(rel_path).split("-")[0]) + data_dict.setdefault(jobid, {}) + + # CPU timeseries + if rel_path.endswith("-timeseries.csv") and "cpu" not in data_dict[jobid]: + data_dict[jobid]["cpu"] = proc_cpu_series(dfi) + + # GPU timeseries + elif "gpu_index" in dfi.columns: + cpu_df = data_dict[jobid].get("cpu") + if cpu_df is None: + continue + gpu_cnt = data_dict[jobid].get("gpu_cnt", 0) + prev_gpu = data_dict[jobid].get("gpu") + gpu_ser, gpu_cnt = proc_gpu_series(cpu_df, dfi, gpu_cnt) + if prev_gpu is None: + data_dict[jobid]["gpu"] = gpu_ser + else: + data_dict[jobid]["gpu"] = pd.merge(prev_gpu, gpu_ser, on="utime") + data_dict[jobid]["gpu_cnt"] = gpu_cnt + + # 9) Merge SLURM metadata for each job + for jobid in list(data_dict): + matches = slurm_df[slurm_df["id_job"] == jobid] + if len(matches) == 1: + data_dict[jobid].update(matches.iloc[0].to_dict()) + + # 10) Compute overall time bounds + cpu_utimes = [d["cpu"]["utime"] for d in data_dict.values() if "cpu" in d] + if not cpu_utimes: + return [], 0, 0 + min_utime = min(series.min() for series in cpu_utimes) + max_utime = max(series.max() for series in cpu_utimes) + + # 11) Build the final list of job_dicts + jobs_list = [] + for jobid, data in data_dict.items(): + cpu_trace = data["cpu"]["cpu_utilisation"] + cpu_trace = cpu_trace.tolist() if isinstance(cpu_trace, pd.Series) else cpu_trace + gpu_df = data.get("gpu") + gpu_trace_list = gpu_df.values.tolist() if isinstance(gpu_df, pd.DataFrame) else 0 + + job_start = data["cpu"]["utime"].min() - min_utime + job_end = data["cpu"]["utime"].max() - min_utime + wall_time = max(0, job_end - job_start) + nodes_req = max(1, int(np.ceil(max(cpu_trace) / 2.0))) if cpu_trace else 1 + if nodes_req > 1 and cpu_trace: + cpu_trace = [x / nodes_req for x in cpu_trace] + + jobs_list.append(job_dict( + nodes_required=nodes_req, + name=data.get("name_job", "unknown"), + account=data.get("id_user", "unknown"), cpu_trace=cpu_trace, - gpu_trace=gpu_trace, - ntx_trace=job_dict_data.item().get('ntx_trace', []), - nrx_trace=job_dict_data.item().get('nrx_trace', []), - end_state=job_dict_data.item().get('end_state'), - submit_time=job_dict_data.item().get('submit_time'), - time_limit=job_dict_data.item().get('time_limit'), - start_time=job_dict_data.item().get('start_time'), - end_time=job_dict_data.item().get('end_time'), - wall_time=job_dict_data.item().get('wall_time'), - trace_time=job_dict_data.item().get('trace_time', 0), - trace_start_time=job_dict_data.item().get('trace_start_time', 0), - trace_end_time=job_dict_data.item().get('trace_end_time', 0), - trace_missing_values=job_dict_data.item().get('trace_missing_values', False) - ) - jobs.append(job) - - return jobs + gpu_trace=gpu_trace_list, + ntx_trace=[], + nrx_trace=[], + end_state=data.get("state_end", "UNKNOWN"), + id=jobid, + priority=data.get("priority", 0), + submit_time=job_start, + time_limit=data.get("time_limit", 0), + start_time=job_start, + end_time=job_end, + wall_time=wall_time, + trace_time=len(cpu_trace) * 10.0, + trace_start_time=0, + trace_end_time=len(cpu_trace) * 10.0 + )) + + duration = max_utime - min_utime + return jobs_list, 0, duration diff --git a/raps/dataloaders/mit_supercloud/README.md b/raps/dataloaders/mit_supercloud/README.md index d5a1d09..f69189c 100644 --- a/raps/dataloaders/mit_supercloud/README.md +++ b/raps/dataloaders/mit_supercloud/README.md @@ -1,5 +1,5 @@ To generate file indices needed for Damien's reader from full installation of MIT Supercloud dataset: -This will generate: `file_list.csv` and `job_user_date_full.csv` +This will generate: `file_list.csv`, `job_user_data.csv`, and `job_user_date_full.csv` python generate_local_metadata.py /lustre/orion/proj-shared/gen150/exadigit/mit_supercloud/datacenter-challenge/202201 -- GitLab From b733e33b1798f794834998f1207b41e712259812 Mon Sep 17 00:00:00 2001 From: Wes Brewer Date: Wed, 2 Jul 2025 02:00:16 -0400 Subject: [PATCH 142/388] Adjust MITSC settings for to be more faithful to actual system --- config/mit_supercloud/power.json | 4 ++-- config/mit_supercloud/system.json | 2 +- raps/dataloaders/mit_supercloud.py | 10 ++++++---- 3 files changed, 9 insertions(+), 7 deletions(-) diff --git a/config/mit_supercloud/power.json b/config/mit_supercloud/power.json index 5128c4c..08d02e4 100644 --- a/config/mit_supercloud/power.json +++ b/config/mit_supercloud/power.json @@ -1,8 +1,8 @@ { "POWER_GPU_IDLE": 88, "POWER_GPU_MAX": 560, - "POWER_CPU_IDLE": 90, - "POWER_CPU_MAX": 280, + "POWER_CPU_IDLE": 1, + "POWER_CPU_MAX": 6, "POWER_MEM": 74.26, "POWER_NVME": 30, "POWER_NIC": 20, diff --git a/config/mit_supercloud/system.json b/config/mit_supercloud/system.json index 6a1af50..de2fcbc 100644 --- a/config/mit_supercloud/system.json +++ b/config/mit_supercloud/system.json @@ -11,7 +11,7 @@ "NODES_PER_RECTIFIER": 4, "MISSING_RACKS": [], "DOWN_NODES": [], - "CPUS_PER_NODE": 2, + "CPUS_PER_NODE": 48, "GPUS_PER_NODE": 0, "CPU_PEAK_FLOPS": 2.9952E12, "GPU_PEAK_FLOPS": 0, diff --git a/raps/dataloaders/mit_supercloud.py b/raps/dataloaders/mit_supercloud.py index 290b154..044228a 100644 --- a/raps/dataloaders/mit_supercloud.py +++ b/raps/dataloaders/mit_supercloud.py @@ -16,6 +16,7 @@ from tqdm import tqdm from raps.job import job_dict + def proc_cpu_series(dfi): dfi = dfi[~dfi.Step.isin([-1, -4, '-1', '-4'])].copy() dfi['CPUUtilization'] = dfi['CPUUtilization'].fillna(0) / 100.0 @@ -197,8 +198,8 @@ def load_data(local_dataset_path, **kwargs): # 10) Compute overall time bounds cpu_utimes = [d["cpu"]["utime"] for d in data_dict.values() if "cpu" in d] - if not cpu_utimes: - return [], 0, 0 + #if not cpu_utimes: + # return [], 0, 0 min_utime = min(series.min() for series in cpu_utimes) max_utime = max(series.max() for series in cpu_utimes) @@ -210,10 +211,11 @@ def load_data(local_dataset_path, **kwargs): gpu_df = data.get("gpu") gpu_trace_list = gpu_df.values.tolist() if isinstance(gpu_df, pd.DataFrame) else 0 + submit_time = data.get("time_submit") - min_utime job_start = data["cpu"]["utime"].min() - min_utime job_end = data["cpu"]["utime"].max() - min_utime wall_time = max(0, job_end - job_start) - nodes_req = max(1, int(np.ceil(max(cpu_trace) / 2.0))) if cpu_trace else 1 + nodes_req = data.get("nodes_alloc") if nodes_req > 1 and cpu_trace: cpu_trace = [x / nodes_req for x in cpu_trace] @@ -228,7 +230,7 @@ def load_data(local_dataset_path, **kwargs): end_state=data.get("state_end", "UNKNOWN"), id=jobid, priority=data.get("priority", 0), - submit_time=job_start, + submit_time=submit_time, time_limit=data.get("time_limit", 0), start_time=job_start, end_time=job_end, -- GitLab From d460773097607037ecf85803baa3752a96359e3c Mon Sep 17 00:00:00 2001 From: Wes Brewer Date: Wed, 2 Jul 2025 10:54:50 -0400 Subject: [PATCH 143/388] Fix issue with end time of simulation being computed wrong for mitsc dataloader --- raps/dataloaders/mit_supercloud.py | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/raps/dataloaders/mit_supercloud.py b/raps/dataloaders/mit_supercloud.py index 044228a..587c418 100644 --- a/raps/dataloaders/mit_supercloud.py +++ b/raps/dataloaders/mit_supercloud.py @@ -106,7 +106,7 @@ def load_data(local_dataset_path, **kwargs): gpu/...-timeseries.csv slurm-log.csv Returns: - jobs_list, min_utime, max_utime + jobs_list, sim_start_time, sim_end_time """ # 1) Unpack list if necessary if isinstance(local_dataset_path, list): @@ -126,6 +126,7 @@ def load_data(local_dataset_path, **kwargs): start_ts = int(datetime.strptime(start_date_str, "%d%m%Y").timestamp()) end_ts = int(datetime.strptime(end_date_str, "%d%m%Y").timestamp()) + requested_duration = end_ts - start_ts # 4) Select jobs in time window selected_df = job_index_df[ @@ -198,10 +199,6 @@ def load_data(local_dataset_path, **kwargs): # 10) Compute overall time bounds cpu_utimes = [d["cpu"]["utime"] for d in data_dict.values() if "cpu" in d] - #if not cpu_utimes: - # return [], 0, 0 - min_utime = min(series.min() for series in cpu_utimes) - max_utime = max(series.max() for series in cpu_utimes) # 11) Build the final list of job_dicts jobs_list = [] @@ -211,9 +208,9 @@ def load_data(local_dataset_path, **kwargs): gpu_df = data.get("gpu") gpu_trace_list = gpu_df.values.tolist() if isinstance(gpu_df, pd.DataFrame) else 0 - submit_time = data.get("time_submit") - min_utime - job_start = data["cpu"]["utime"].min() - min_utime - job_end = data["cpu"]["utime"].max() - min_utime + submit_time = data.get("time_submit") - start_ts + job_start = data["cpu"]["utime"].min() - start_ts + job_end = data["cpu"]["utime"].max() - start_ts wall_time = max(0, job_end - job_start) nodes_req = data.get("nodes_alloc") if nodes_req > 1 and cpu_trace: @@ -240,5 +237,4 @@ def load_data(local_dataset_path, **kwargs): trace_end_time=len(cpu_trace) * 10.0 )) - duration = max_utime - min_utime - return jobs_list, 0, duration + return jobs_list, 0, requested_duration -- GitLab From 7535a2f37b3bfe1b95480f36235bee347b91002a Mon Sep 17 00:00:00 2001 From: Wes Brewer Date: Thu, 3 Jul 2025 14:52:26 -0400 Subject: [PATCH 144/388] A number of fixes esp for the timings of gcloudv2 traces --- README.md | 6 ++++-- raps/dataloaders/gcloudv2.py | 39 +++++++++++++++++++++++++++--------- raps/telemetry.py | 3 ++- raps/ui.py | 9 +++++---- 4 files changed, 41 insertions(+), 16 deletions(-) diff --git a/README.md b/README.md index 36a4408..6bb3410 100644 --- a/README.md +++ b/README.md @@ -46,8 +46,10 @@ For Adastra MI250 supercomputer, download 'AdastaJobsMI250_15days.parquet' from For Google cluster trace v2 - # gcloudv2 - python -m raps.telemetry --system gcloudv2 -f ~/data/gcloud/v2/google_cluster_data_2011_sample + python math.py --system gcloudv2 -f ~/data/gcloud/v2/google_cluster_data_2011_sample + + # analyze dataset + python -m raps.telemetry --system gcloudv2 -f ~/data/gcloud/v2/google_cluster_data_2011_sample -v For MIT Supercloud diff --git a/raps/dataloaders/gcloudv2.py b/raps/dataloaders/gcloudv2.py index 71bea8f..0eab8d9 100644 --- a/raps/dataloaders/gcloudv2.py +++ b/raps/dataloaders/gcloudv2.py @@ -1,5 +1,6 @@ import os import re +from tqdm import tqdm from typing import List, Optional, Generator, Tuple, Any, Union import numpy as np @@ -137,16 +138,26 @@ def load_data(data_path: Union[str, List[str]], **kwargs: Any) -> Tuple[List[Any if col not in df.columns: raise ValueError(f"Missing column {col}") df = df[df["event_type"]==0] - df["timestamp"] = df["timestamp"].astype(float) - t0, t1 = df["timestamp"].min(), df["timestamp"].max() + df["timestamp"] = df["timestamp"].astype(float) / 1e6 # convert from microseconds → seconds + t0 = df["timestamp"].min() + t1 = df["timestamp"] - t0 # Load task usage usage_loader = GoogleClusterV2DataLoader(base_path, event_type="task_usage", concatenate=True) usage_df = next(iter(usage_loader)) + + # Convert microseconds → seconds for task usage + usage_df["start_time"] = usage_df["start_time"].astype(float) / 1e6 + usage_df["end_time"] = usage_df["end_time" ].astype(float) / 1e6 + + # Build per-job start and end times (seconds since trace-start) + usage_map_start = usage_df.groupby("job_ID")["start_time"].min().to_dict() + usage_map_end = usage_df.groupby("job_ID")["end_time" ].max().to_dict() + # rename to avg if "CPU_usage_rate" in usage_df.columns: usage_df.rename(columns={"CPU_usage_rate":"CPU_usage_avg"}, inplace=True) - usage_df["job_ID"] = usage_df["job_ID"].astype(int) + usage_df["job_ID"] = usage_df["job_ID"].astype(int) usage_df["CPU_usage_avg"] = usage_df["CPU_usage_avg"].astype(float) usage_map = usage_df.groupby("job_ID")["CPU_usage_avg"].apply(lambda s: s.to_numpy()).to_dict() @@ -155,8 +166,13 @@ def load_data(data_path: Union[str, List[str]], **kwargs: Any) -> Tuple[List[Any jobs: List[Any] = [] jid_f = kwargs.get('jid','*') - for _, row in df.iterrows(): + for _, row in tqdm(df.iterrows(), total=df.shape[0], desc="Loading jobs"): + jid = int(row["job_ID"]) + start = usage_map_start[jid] - t0 + end = usage_map_end [jid] - t0 + wall = end - start + if jid_f!='*' and str(jid)!=str(jid_f): continue trace = usage_map[jid] # ensure gpu_trace is same length @@ -166,13 +182,18 @@ def load_data(data_path: Union[str, List[str]], **kwargs: Any) -> Tuple[List[Any name=f"job_{jid}", account=f"user_{row.get('user_name','unknown')}", cpu_trace=trace, - gpu_trace=gpu_trace, + #gpu_trace=gpu_trace, + gpu_trace=0, nrx_trace=[], ntx_trace=[], end_state="UNKNOWN", scheduled_nodes=[], id=jid, priority=int(row.get('scheduling_class',0)), submit_time=row["timestamp"], time_limit=0, - start_time=row["timestamp"], end_time=row["timestamp"]+1.0, - wall_time=1.0, trace_time=row["timestamp"], - trace_start_time=float(t0), trace_end_time=float(t1) + start_time=start, end_time=end, + wall_time=wall, trace_time=row["timestamp"], + trace_start_time=start, trace_end_time=end )) - return jobs, 0, 10000 + + # Compute simulation span: start at t=0, end at the latest job finish + simulation_start = 0 + simulation_end = int(max(usage_map_end.values()) - t0) + return jobs, simulation_start, simulation_end diff --git a/raps/telemetry.py b/raps/telemetry.py index aca32d5..a3a6256 100644 --- a/raps/telemetry.py +++ b/raps/telemetry.py @@ -103,7 +103,7 @@ if __name__ == "__main__": job['requested_nodes'] = None job['submit_time'] = next_arrival(1 / config['JOB_ARRIVAL_TIME']) else: - jobs, _, _, _ = td.load_data(args.replay) + jobs, _, _ = td.load_data(args.replay) timesteps = int(max(job['wall_time'] + job['submit_time'] for job in jobs)) @@ -126,6 +126,7 @@ if __name__ == "__main__": if args.verbose: print(job) + print(f'Number of jobs: {len(jobs)}') print(f'Simulation will run for {timesteps} seconds') print(f'Average job arrival time is: {np.mean(dt_list):.2f}s') print(f'Average wall time is: {np.mean(wt_list):.2f}s') diff --git a/raps/ui.py b/raps/ui.py index 00638e0..173c2ea 100644 --- a/raps/ui.py +++ b/raps/ui.py @@ -108,7 +108,8 @@ class LayoutManager: show_slowdown = (self.topology in ("fat-tree", "dragonfly", "capacity")) # Build the column headers - columns = ["JOBID", "WALL TIME", "NAME", "ACCOUNT", "ST", "NODES"] + columns = ["JOBID", "WALL TIME", "NAME", "ACCOUNT", "ST"] + #columns = ["JOBID", "WALL TIME", "NAME", "ACCOUNT", "ST", "NODES"] if show_slowdown: columns.append("SLOW DOWN") else: @@ -116,7 +117,7 @@ class LayoutManager: if show_nodes: columns.append("NODELIST") - columns.append("TIME") + #columns.append("TIME") # Create table with bold magenta headers table = Table(title="Job Queue", header_style="bold magenta", expand=True) @@ -162,8 +163,8 @@ class LayoutManager: str(job.name), str(job.account), job.state.value, - n_nodes, - col_slow, + #n_nodes, + #col_slow, ] if show_nodes: -- GitLab From b1c61ca87ececc2a66c60e289d2fe829c90de322 Mon Sep 17 00:00:00 2001 From: Wes Brewer Date: Thu, 3 Jul 2025 15:58:16 -0400 Subject: [PATCH 145/388] Get some basic working version of gcloudv2, where the jobs actually run --- README.md | 2 +- raps/dataloaders/gcloudv2.py | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 6bb3410..2282100 100644 --- a/README.md +++ b/README.md @@ -46,7 +46,7 @@ For Adastra MI250 supercomputer, download 'AdastaJobsMI250_15days.parquet' from For Google cluster trace v2 - python math.py --system gcloudv2 -f ~/data/gcloud/v2/google_cluster_data_2011_sample + python math.py --system gcloudv2 -f ~/data/gcloud/v2/google_cluster_data_2011_sample -ff 600 # analyze dataset python -m raps.telemetry --system gcloudv2 -f ~/data/gcloud/v2/google_cluster_data_2011_sample -v diff --git a/raps/dataloaders/gcloudv2.py b/raps/dataloaders/gcloudv2.py index 0eab8d9..3ea647f 100644 --- a/raps/dataloaders/gcloudv2.py +++ b/raps/dataloaders/gcloudv2.py @@ -187,7 +187,8 @@ def load_data(data_path: Union[str, List[str]], **kwargs: Any) -> Tuple[List[Any nrx_trace=[], ntx_trace=[], end_state="UNKNOWN", scheduled_nodes=[], id=jid, priority=int(row.get('scheduling_class',0)), - submit_time=row["timestamp"], time_limit=0, + #submit_time=row["timestamp"], time_limit=0, + submit_time=start, time_limit=0, start_time=start, end_time=end, wall_time=wall, trace_time=row["timestamp"], trace_start_time=start, trace_end_time=end -- GitLab From 9fc6e0c3c46c5be9aee366dfaa2ad91da18bc6ec Mon Sep 17 00:00:00 2001 From: Wes Brewer Date: Tue, 8 Jul 2025 12:43:51 -0400 Subject: [PATCH 146/388] Work on integrating gpu traces into mit_supercloud --- raps/dataloaders/mit_supercloud.py | 158 +++++++++++++++++++++-------- 1 file changed, 114 insertions(+), 44 deletions(-) diff --git a/raps/dataloaders/mit_supercloud.py b/raps/dataloaders/mit_supercloud.py index 587c418..2a1115c 100644 --- a/raps/dataloaders/mit_supercloud.py +++ b/raps/dataloaders/mit_supercloud.py @@ -56,42 +56,64 @@ def proc_cpu_series(dfi): return df def proc_gpu_series(cpu_df, dfi, gpu_cnt): - t_cpu = np.array([cpu_df.utime.min(), cpu_df.utime.max()]) - t_gpu = np.array([dfi.timestamp.astype(int).min(), dfi.timestamp.astype(int).max()]) - - t_cpu_range = t_cpu[1] - t_cpu[0] - t_gpu_range = t_gpu[1] - t_gpu[0] - per_diff = (t_cpu_range - t_gpu_range) / t_gpu_range * 100 - + # 1) Build CPU time range + t_cpu_start = int(cpu_df.utime.min()) + t_cpu_end = int(cpu_df.utime.max()) + t_cpu = np.array([t_cpu_start, t_cpu_end, t_cpu_end - t_cpu_start]) + + # 2) Safely convert the GPU timestamps to integer seconds + # (this handles strings like "1621607266.426") + ts = pd.to_numeric(dfi["timestamp"], errors="coerce") # float64 or NaN + ts_int = ts.ffill().astype(float).astype(int) + t0, t1 = ts_int.min(), ts_int.max() + t_gpu = np.array([t0, t1, t1 - t0]) + + # 3) Sanity‐check the durations match within 10% + per_diff = ((t_cpu[1] - t_cpu[0]) - (t_gpu[1] - t_gpu[0])) / (t_gpu[1] - t_gpu[0]) * 100 if abs(per_diff) > 10: - raise ValueError("Time mismatch between CPU and GPU series exceeds 10%") - - dfi['t_fixed'] = dfi.timestamp - dfi.timestamp.min() + t_cpu[0] - ugpus = dfi.gpu_index.unique() - gpu_df = pd.DataFrame({'utime': cpu_df['utime'].values}) - - for u in ugpus: - dfg = dfi[dfi.gpu_index == u].copy() - fields = ['gpu_index', 'utilization_gpu_pct', 'utilization_memory_pct', 'memory_free_MiB', - 'memory_used_MiB', 'temperature_gpu', 'temperature_memory', 'power_draw_W'] + # warn and proceed — GPU trace may be trimmed or misaligned + print(f"Warning: GPU‐CPU time mismatch {per_diff:.1f}% exceeds 10%; continuing anyway") - for field in fields: - x1, y1 = dfg['t_fixed'].values, dfg[field].values - xv = cpu_df['utime'].values - yv = np.interp(xv, x1, y1) - gpu_df[field] = yv - - rename = { - 'utilization_gpu_pct': f'gpu_{gpu_cnt}', - 'utilization_memory_pct': f'gpu_mem_{gpu_cnt}', - 'temperature_gpu': f'gpu_temp_{gpu_cnt}', - 'power_draw_W': f'gpu_p_{gpu_cnt}' - } - gpu_df.rename(columns=rename, inplace=True) - gpu_cnt += 1 - - return gpu_df, gpu_cnt + # 4) Align GPU times onto CPU utime grid + # Use our integer‐second Series rather than the raw column + dfi["t_fixed"] = ts_int - ts_int.min() + t_cpu_start + # 5) Prepare output DataFrame with a utime column + ugpus = dfi.gpu_index.unique() + gpu_df = pd.DataFrame({"utime": cpu_df["utime"].values}) + + # 6) Interpolate each GPU field onto the CPU utime grid + fields = [ + "utilization_gpu_pct", + "utilization_memory_pct", + "memory_free_MiB", + "memory_used_MiB", + "temperature_gpu", + "temperature_memory", + "power_draw_W", + ] + for field in fields: + # grab the float‐converted timestamp and the metric + x1 = ts_int.values + y1 = dfi[field].astype(float).values + xv = cpu_df["utime"].values + # numpy interpolation + gpu_df[field] = np.interp(xv, x1, y1) + + # 7) Rename the GPU pct, memory pct, and power columns with the device index + ren = { + "gpu_index": f"gpu_index_{gpu_cnt}", + "utilization_gpu_pct": f"gpu_util_{gpu_cnt}", + "utilization_memory_pct":f"gpu_mempct_{gpu_cnt}", + "memory_free_MiB": f"gpu_memfree_{gpu_cnt}", + "memory_used_MiB": f"gpu_memused_{gpu_cnt}", + "temperature_gpu": f"gpu_temp_{gpu_cnt}", + "temperature_memory": f"gpu_memtemp_{gpu_cnt}", + "power_draw_W": f"gpu_power_{gpu_cnt}", + } + gpu_df.rename(columns=ren, inplace=True) + + return gpu_df, gpu_cnt + 1 def load_data(local_dataset_path, **kwargs): """ @@ -134,20 +156,63 @@ def load_data(local_dataset_path, **kwargs): (job_index_df.start < end_ts) ].copy() - # 5) Prepare GPU index metadata - gpu_df = file_list_df[file_list_df["File Name"].str.contains("/gpu/")].copy() - gpu_df["jobid"] = gpu_df["File Name"].str.extract(r"/([^/]+?)-").astype(int) +###### + data_subdir = "202201" # hard-coded folder name + print(local_dataset_path, data_subdir) + + # --- 1) Load and filter Slurm log for GPU jobs in [start_ts, end_ts) --- + slurm_path = os.path.join(local_dataset_path, data_subdir, "slurm-log.csv") + slurm_df = pd.read_csv(slurm_path) - # 6) Build list of timeseries file paths (relative) - files_to_copy = [ - row["filename"].replace("-summary", "-timeseries") - for _, row in selected_df.iterrows() + # Keep only rows within your date window + sl = slurm_df[ + (slurm_df.time_submit >= start_ts) & + (slurm_df.time_submit < end_ts) ] - files_to_copy += gpu_df[gpu_df.jobid.isin(selected_df.job_id)]["File Name"].tolist() - files_to_copy = list(set(files_to_copy)) + + # Filter to those that actually used GPUs + def row_uses_gpu(r): + return ("gpu" in str(r.get("gres_used","")).lower() + or "1001=" in str(r.get("tres_alloc","")) + or "1002=" in str(r.get("tres_alloc",""))) + gpu_sl = sl[sl.apply(row_uses_gpu, axis=1)] + + gpu_job_ids = set(gpu_sl.id_job.unique()) + print(f"→ Found {len(gpu_job_ids)} GPU‐using jobs in your date range") + + # --- 2) Pull their GPU timeseries paths from file_list.csv --- + #gpu_entries = file_list_df[ + # file_list_df["File Name"].str.contains("/gpu/") + #].copy() + + # should match both "gpu/..." at the start _and_ anywhere else + #gpu_entries = file_list_df[ + # file_list_df["File Name"].str.contains(r"(^|/)gpu/") + #].copy() + + # Option 2: simple substring match (matches anywhere “gpu/” appears) + gpu_entries = file_list_df[ + file_list_df["File Name"].str.contains("gpu/") + ].copy() + + gpu_entries["job_id"] = ( + gpu_entries["File Name"] + .str.extract(r"/(\d+)-", expand=False) + .astype(int) + ) + gpu_sel = gpu_entries[gpu_entries["job_id"].isin(gpu_job_ids)] + gpu_files = gpu_sel["File Name"].tolist() + print(f"→ Will process {len(gpu_files)} GPU files") + + # --- 3) Combine with your CPU list and dedupe --- + cpu_files = [ + fn.replace("-summary","-timeseries") + for fn in selected_df["filename"] + ] + files_to_copy = list(set(cpu_files + gpu_files)) + print(f"Total files to load: {len(files_to_copy)} (CPU: {len(cpu_files)}, GPU: {len(gpu_files)})") # 7) Read SLURM log - data_subdir = "202201" # hard-coded folder name slurm_log = next( ( os.path.join(r, "slurm-log.csv") @@ -203,7 +268,12 @@ def load_data(local_dataset_path, **kwargs): # 11) Build the final list of job_dicts jobs_list = [] for jobid, data in data_dict.items(): - cpu_trace = data["cpu"]["cpu_utilisation"] + # skip any job that never loaded a CPU trace + cpu_ser = data.get("cpu") + if cpu_ser is None: + print(f"Warning: skipping job {jobid} (no CPU trace)") + continue + cpu_trace = cpu_ser["cpu_utilisation"] cpu_trace = cpu_trace.tolist() if isinstance(cpu_trace, pd.Series) else cpu_trace gpu_df = data.get("gpu") gpu_trace_list = gpu_df.values.tolist() if isinstance(gpu_df, pd.DataFrame) else 0 -- GitLab From 6ae2b0e69d3cf9f92d760181b496382b78fc4fae Mon Sep 17 00:00:00 2001 From: Wes Brewer Date: Tue, 8 Jul 2025 21:46:28 -0400 Subject: [PATCH 147/388] Get multi-part-sim.py working with mit_supercloud datasets --- README.md | 2 + .../mit_supercloud/{ => part-cpu}/power.json | 0 .../{ => part-cpu}/scheduler.json | 0 .../mit_supercloud/{ => part-cpu}/system.json | 0 .../part-gpu/power.json | 0 .../part-gpu/scheduler.json | 0 .../part-gpu/system.json | 0 config/mit_supercloud_gpu/part-cpu/power.json | 18 --------- .../part-cpu/scheduler.json | 18 --------- .../mit_supercloud_gpu/part-cpu/system.json | 20 ---------- multi-part-sim.py | 40 +++++++++++++++---- raps/dataloaders/mit_supercloud.py | 19 +++++++-- 12 files changed, 50 insertions(+), 67 deletions(-) rename config/mit_supercloud/{ => part-cpu}/power.json (100%) rename config/mit_supercloud/{ => part-cpu}/scheduler.json (100%) rename config/mit_supercloud/{ => part-cpu}/system.json (100%) rename config/{mit_supercloud_gpu => mit_supercloud}/part-gpu/power.json (100%) rename config/{mit_supercloud_gpu => mit_supercloud}/part-gpu/scheduler.json (100%) rename config/{mit_supercloud_gpu => mit_supercloud}/part-gpu/system.json (100%) delete mode 100644 config/mit_supercloud_gpu/part-cpu/power.json delete mode 100644 config/mit_supercloud_gpu/part-cpu/scheduler.json delete mode 100644 config/mit_supercloud_gpu/part-cpu/system.json diff --git a/README.md b/README.md index 2282100..6ec73cb 100644 --- a/README.md +++ b/README.md @@ -55,6 +55,8 @@ For MIT Supercloud python main.py -f /path/to/mit_supercloud/datacenter-challenge --system mit_supercloud + python multi-part-sim.py -x 'mit_supercloud/*' -f ~/data/mit --system mit_supercloud --arrival poisson + ## Perform Network Simulation Lassen is one of the few datasets that has networking data. See `raps/dataloaders/lassen.py` for how to diff --git a/config/mit_supercloud/power.json b/config/mit_supercloud/part-cpu/power.json similarity index 100% rename from config/mit_supercloud/power.json rename to config/mit_supercloud/part-cpu/power.json diff --git a/config/mit_supercloud/scheduler.json b/config/mit_supercloud/part-cpu/scheduler.json similarity index 100% rename from config/mit_supercloud/scheduler.json rename to config/mit_supercloud/part-cpu/scheduler.json diff --git a/config/mit_supercloud/system.json b/config/mit_supercloud/part-cpu/system.json similarity index 100% rename from config/mit_supercloud/system.json rename to config/mit_supercloud/part-cpu/system.json diff --git a/config/mit_supercloud_gpu/part-gpu/power.json b/config/mit_supercloud/part-gpu/power.json similarity index 100% rename from config/mit_supercloud_gpu/part-gpu/power.json rename to config/mit_supercloud/part-gpu/power.json diff --git a/config/mit_supercloud_gpu/part-gpu/scheduler.json b/config/mit_supercloud/part-gpu/scheduler.json similarity index 100% rename from config/mit_supercloud_gpu/part-gpu/scheduler.json rename to config/mit_supercloud/part-gpu/scheduler.json diff --git a/config/mit_supercloud_gpu/part-gpu/system.json b/config/mit_supercloud/part-gpu/system.json similarity index 100% rename from config/mit_supercloud_gpu/part-gpu/system.json rename to config/mit_supercloud/part-gpu/system.json diff --git a/config/mit_supercloud_gpu/part-cpu/power.json b/config/mit_supercloud_gpu/part-cpu/power.json deleted file mode 100644 index 5128c4c..0000000 --- a/config/mit_supercloud_gpu/part-cpu/power.json +++ /dev/null @@ -1,18 +0,0 @@ -{ - "POWER_GPU_IDLE": 88, - "POWER_GPU_MAX": 560, - "POWER_CPU_IDLE": 90, - "POWER_CPU_MAX": 280, - "POWER_MEM": 74.26, - "POWER_NVME": 30, - "POWER_NIC": 20, - "POWER_CDU": 8473.47, - "POWER_SWITCH": 250, - "POWER_UPDATE_FREQ": 15, - "RECTIFIER_PEAK_THRESHOLD": 13670, - "SIVOC_LOSS_CONSTANT": 13, - "SIVOC_EFFICIENCY": 0.98, - "RECTIFIER_LOSS_CONSTANT": 17, - "RECTIFIER_EFFICIENCY": 0.96, - "POWER_COST": 0.094 -} diff --git a/config/mit_supercloud_gpu/part-cpu/scheduler.json b/config/mit_supercloud_gpu/part-cpu/scheduler.json deleted file mode 100644 index 0ea905d..0000000 --- a/config/mit_supercloud_gpu/part-cpu/scheduler.json +++ /dev/null @@ -1,18 +0,0 @@ -{ - "SEED": 42, - "JOB_ARRIVAL_TIME": 900, - "MTBF": 11, - "MAX_TIME": 88200, - "TRACE_QUANTA": 20, - "MIN_WALL_TIME": 3600, - "MAX_WALL_TIME": 43200, - "UI_UPDATE_FREQ": 900, - "MAX_NODES_PER_JOB": 3000, - "JOB_END_PROBS": { - "COMPLETED": 0.63, - "FAILED": 0.13, - "CANCELLED": 0.12, - "TIMEOUT": 0.11, - "NODE_FAIL": 0.01 - } -} diff --git a/config/mit_supercloud_gpu/part-cpu/system.json b/config/mit_supercloud_gpu/part-cpu/system.json deleted file mode 100644 index a7c5330..0000000 --- a/config/mit_supercloud_gpu/part-cpu/system.json +++ /dev/null @@ -1,20 +0,0 @@ -{ - "NUM_CDUS": 1, - "RACKS_PER_CDU": 1, - "NODES_PER_RACK": 480, - "RECTIFIERS_PER_RACK": 32, - "CHASSIS_PER_RACK": 8, - "NODES_PER_BLADE": 4, - "SWITCHES_PER_CHASSIS": 4, - "NICS_PER_NODE": 4, - "RECTIFIERS_PER_CHASSIS": 4, - "NODES_PER_RECTIFIER": 4, - "MISSING_RACKS": [], - "DOWN_NODES": [], - "CPUS_PER_NODE": 2, - "GPUS_PER_NODE": 0, - "CPU_PEAK_FLOPS": 2.9952E12, - "GPU_PEAK_FLOPS": 0, - "CPU_FP_RATIO": 0.667, - "GPU_FP_RATIO": 0.667 -} diff --git a/multi-part-sim.py b/multi-part-sim.py index b9d3856..560fe11 100644 --- a/multi-part-sim.py +++ b/multi-part-sim.py @@ -27,22 +27,46 @@ if '*' in args.partitions[0]: partition_names = [os.path.join(*p.split(os.sep)[-2:]) for p in paths] configs = [ConfigManager(system_name=partition).get_config() for partition in partition_names] -args_dicts = [{**vars(args), 'config': config} for config in configs] +#args_dicts = [{**vars(args), 'config': config} for config in configs] +args_dicts = [ + {**vars(args), 'config': config, 'partition': partition_names[i]} + for i, config in enumerate(configs) + ] # Initialize Workload if args.replay: # Currently this assumes that an .npz file has already been created # e.g., python main.py --system marconi100 -f ~/data/marconi100/job_table.parquet - td = Telemetry(**args_dicts[0]) - print(f"Loading {args.replay[0]}...") - jobs = td.load_snapshot(args.replay[0]) - available_nodes = [config['AVAILABLE_NODES'] for config in configs] - print("available nodes:", available_nodes) + #td = Telemetry(**args_dicts[0]) + #print(f"Loading {args.replay[0]}...") + #jobs = td.load_snapshot(args.replay[0]) + #available_nodes = [config['AVAILABLE_NODES'] for config in configs] + #print("available nodes:", available_nodes) # Randomly assign partition - for job in jobs: - job['partition'] = random.choices(partition_names, weights=available_nodes, k=1)[0] + #for job in jobs: + # job['partition'] = random.choices(partition_names, weights=available_nodes, k=1)[0] + + jobs_by_partition = {} + for ad in args_dicts: + part = ad['partition'] + td = Telemetry(**ad) + print(f"[{part}] loading traces from {args.replay[0]} …") + jobs_part, t0, t1 = td.load_data(args.replay) + jobs_by_partition[part] = jobs_part + + # --- report how many jobs per partition --- + for part, jl in jobs_by_partition.items(): + print(f"[INFO] Partition '{part}': {len(jl)} jobs loaded") + exit() + + # now flatten into a single job list (or keep separate for your engine) + jobs = [] + for part in partition_names: + for job in jobs_by_partition[part]: + job['partition'] = part + jobs.append(job) if args.scale: for job in tqdm(jobs, desc=f"Scaling jobs to {args.scale} nodes"): diff --git a/raps/dataloaders/mit_supercloud.py b/raps/dataloaders/mit_supercloud.py index 2a1115c..380823a 100644 --- a/raps/dataloaders/mit_supercloud.py +++ b/raps/dataloaders/mit_supercloud.py @@ -145,6 +145,10 @@ def load_data(local_dataset_path, **kwargs): start_date_str = kwargs.get("start_date", "21052021") end_date_str = kwargs.get("end_date", "22052021") jid = kwargs.get("jid", "*") + # determine whether this is the CPU or GPU partition + part = kwargs.get("partition", "").lower() + cpu_only = ("cpu" in part) and ("gpu" not in part) + gpu_only = ("gpu" in part) and ("cpu" not in part) start_ts = int(datetime.strptime(start_date_str, "%d%m%Y").timestamp()) end_ts = int(datetime.strptime(end_date_str, "%d%m%Y").timestamp()) @@ -209,8 +213,17 @@ def load_data(local_dataset_path, **kwargs): fn.replace("-summary","-timeseries") for fn in selected_df["filename"] ] - files_to_copy = list(set(cpu_files + gpu_files)) - print(f"Total files to load: {len(files_to_copy)} (CPU: {len(cpu_files)}, GPU: {len(gpu_files)})") + trace_files = list(set(cpu_files + gpu_files)) + + # filter by partition + if cpu_only: + trace_files = cpu_files + elif gpu_only: + trace_files = gpu_files + # else leave both + trace_files = list(set(trace_files)) + + print(f"Total files to load: {len(trace_files)} (CPU: {len(cpu_files)}, GPU: {len(gpu_files)})") # 7) Read SLURM log slurm_log = next( @@ -227,7 +240,7 @@ def load_data(local_dataset_path, **kwargs): # 8) Process each file, populating data_dict data_dict = {} - for rel_path in tqdm(files_to_copy, desc="Processing trace files"): + for rel_path in tqdm(trace_files, desc="Processing trace files"): fpath = os.path.join(local_dataset_path, data_subdir, rel_path) if not os.path.exists(fpath): print(f"Warning: missing {fpath}") -- GitLab From d2f8a88952052c67d5e3fd9bd6fc644b07e88320 Mon Sep 17 00:00:00 2001 From: Wes Brewer Date: Wed, 9 Jul 2025 13:05:29 -0400 Subject: [PATCH 148/388] Remove exit() --- multi-part-sim.py | 1 - 1 file changed, 1 deletion(-) diff --git a/multi-part-sim.py b/multi-part-sim.py index 560fe11..8c18b2e 100644 --- a/multi-part-sim.py +++ b/multi-part-sim.py @@ -59,7 +59,6 @@ if args.replay: # --- report how many jobs per partition --- for part, jl in jobs_by_partition.items(): print(f"[INFO] Partition '{part}': {len(jl)} jobs loaded") - exit() # now flatten into a single job list (or keep separate for your engine) jobs = [] -- GitLab From f8325de985b77a6f2d2cab23d8fe61cd98611992 Mon Sep 17 00:00:00 2001 From: Matthias Maiterth Date: Wed, 9 Jul 2025 15:48:51 -0400 Subject: [PATCH 149/388] Refactored tick Tick consists of processing of each running job to get - CPU utilization - GPU utilization - Network utilization Afterwards the full system - Utilization - Power - Cooling - Performance is simulated. These steps are now isolated and in seperate functions for easier modifications. Any additions can be added in function calls and enabled or disabled via arguments. The refactor also fixed some bugs. --- args.py | 1 + main.py | 4 +- raps/cooling.py | 43 +++++---- raps/engine.py | 247 +++++++++++++++++++++--------------------------- raps/flops.py | 36 ++++--- raps/power.py | 44 ++++++++- raps/ui.py | 45 ++++----- raps/utils.py | 25 +++++ 8 files changed, 253 insertions(+), 192 deletions(-) diff --git a/args.py b/args.py index c8df071..56a6508 100644 --- a/args.py +++ b/args.py @@ -10,6 +10,7 @@ parser = argparse.ArgumentParser(description='Resource Allocator & Power Simulat parser.add_argument('--system', type=str, default='frontier', help='System config to use') parser.add_argument('-x', '--partitions', nargs='+', default=None, help='List of machine configurations to use, e.g., -x setonix-cpu setonix-gpu') parser.add_argument('-c', '--cooling', action='store_true', help='Include FMU cooling model') +parser.add_argument('-net', '--simulate-network', default=False, action='store_true', help='Include Network model') # Simulation runtime options parser.add_argument('-ff', '--fastforward', type=str, default=None, help='Fast-forward by time amount (uses same units as -t)') diff --git a/main.py b/main.py index 9effbde..5c0aefa 100644 --- a/main.py +++ b/main.py @@ -31,6 +31,8 @@ from raps.utils import convert_numpy_to_builtin from args import args, args_dict +if args.verbose or args.debug: + print(args) config = ConfigManager(system_name=args.system).get_config() @@ -130,7 +132,7 @@ total_timesteps = timestep_end - timestep_start if args.time_delta: time_delta = convert_to_seconds(args.time_delta) else: - time_delta = config['TRACE_QUANTA'] + time_delta = 1 # config['TRACE_QUANTA'] print(f'Simulating {len(jobs)} jobs for {total_timesteps} seconds from {timestep_start} to {timestep_end}.') print(f'Simulation time delta: {time_delta}s, Telemetry trace quanta: {jobs[0].trace_quanta}s.') diff --git a/raps/cooling.py b/raps/cooling.py index 3b59e52..7ffe6eb 100644 --- a/raps/cooling.py +++ b/raps/cooling.py @@ -1,10 +1,10 @@ """ -This module provides functionality for simulating a thermo-fluids model using +This module provides functionality for simulating a thermo-fluids model using an FMU (Functional Mock-up Unit). -The module defines a `ThermoFluidsModel` class that encapsulates the +The module defines a `ThermoFluidsModel` class that encapsulates the initialization, simulation step execution, -data conversion, and cleanup processes for the FMU-based model. +data conversion, and cleanup processes for the FMU-based model. """ import shutil import re @@ -22,7 +22,7 @@ def get_matching_variables(variables, pattern): # Filtering the list using the regex pattern filtered_vars = [var for var in variables if pattern.match(var)] - + return filtered_vars @@ -30,9 +30,9 @@ class ThermoFluidsModel: """ A class to represent a thermo-fluids model using an FMU (Functional Mock-up Unit). - This class encapsulates the initialization, simulation step execution, data conversion, - and cleanup processes for the FMU-based thermo-fluids model. It provides methods to - initialize the model, execute simulation steps, generate runtime values, calculate Power + This class encapsulates the initialization, simulation step execution, data conversion, + and cleanup processes for the FMU-based thermo-fluids model. It provides methods to + initialize the model, execute simulation steps, generate runtime values, calculate Power Usage Effectiveness (PUE), and properly manage the FMU resources. Attributes @@ -40,7 +40,7 @@ class ThermoFluidsModel: FMU_PATH : str The file path to the FMU file. fmu_history : list - A list to store the history of FMU states, combining cooling input, datacenter output, + A list to store the history of FMU states, combining cooling input, datacenter output, and central energy plant (CEP) output for each simulation step. inputs : list A list of input variables for the FMU. @@ -56,17 +56,17 @@ class ThermoFluidsModel: Methods ------- initialize(): - Initializes the FMU by extracting the file, reading the model description, setting up input and output variables, + Initializes the FMU by extracting the file, reading the model description, setting up input and output variables, and preparing the model for simulation. generate_runtime_values(cdu_power, sc) -> dict: Generates runtime values dynamically for the FMU inputs based on CDU power and other configuration parameters. generate_fmu_inputs(runtime_values: dict, uncertainties: bool = False) -> list: Converts runtime values to a list suitable for FMU inputs, handling uncertainties if specified. calculate_pue(cooling_input: dict, datacenter_output: dict, cep_output: dict) -> float: - Calculates the Power Usage Effectiveness (PUE) of the data center based on the cooling, datacenter, + Calculates the Power Usage Effectiveness (PUE) of the data center based on the cooling, datacenter, and CEP output power values. step(current_time: float, fmu_inputs: list, step_size: float) -> Tuple[dict, dict, dict, float]: - Executes a simulation step with the given inputs and step size. Returns the cooling input, datacenter output, + Executes a simulation step with the given inputs and step size. Returns the cooling input, datacenter output, CEP output, and PUE for the current step. terminate(): Terminates the FMU instance, ensuring that all resources are properly released. @@ -89,7 +89,7 @@ class ThermoFluidsModel: self.unzipdir = None self.fmu = None self.weather = None - + def initialize(self): """ Initializes the FMU by extracting the file and setting up the model. @@ -115,7 +115,7 @@ class ThermoFluidsModel: # Get the value references for the variables we want to get/set self.inputs = [v for v in model_description.modelVariables if v.causality == 'input'] self.outputs = [v for v in model_description.modelVariables if v.name in outputs] - + # Instantiate and initialize the FMU self.fmu = FMU2Slave(guid=model_description.guid, unzipDirectory=self.unzipdir, @@ -159,7 +159,7 @@ class ThermoFluidsModel: runtime_values[self.config['TEMPERATURE_KEY']] = temperature return runtime_values - + def generate_fmu_inputs(self, runtime_values, uncertainties=False): """ Convert the runtime values based on the cooling model's inputs to a list suitable for FMU inputs. @@ -182,7 +182,7 @@ class ThermoFluidsModel: # Helper function to process uncertainty def process_uncertainty(value): - """Strip uncertainty if present, otherwise return the value as-is.""" + """Strip uncertainty if present, otherwise return the value as-is.""" # Convert to nominal value if it's an AffineScalarFunc and uncertainties flag is set return unumpy.nominal_values(value) if uncertainties and isinstance(value, AffineScalarFunc) else value @@ -246,7 +246,7 @@ class ThermoFluidsModel: pue = (total_input_power + np.sum(W_CDUPs) + np.sum(W_HTWPs) + np.sum(W_CTWPs) + np.sum(W_CTs)) / total_input_power return pue - + def step(self, current_time, fmu_inputs, step_size): """ Executes a simulation step with the given inputs and step size. @@ -314,3 +314,14 @@ class ThermoFluidsModel: """ # Cleanup - at the end of the simulation shutil.rmtree(self.unzipdir, ignore_errors=True) + + def simulate_cooling(self, rack_power): + cdu_power = rack_power.T[-1] * 1000 + runtime_values = self.cooling_model.generate_runtime_values(cdu_power, self) + + # FMU inputs are N powers and the wetbulb temp + fmu_inputs = self.cooling_model.generate_fmu_inputs(runtime_values, + uncertainties=self.power_manager.uncertainties) + cooling_inputs, cooling_outputs = self.cooling_model.step(self.current_time, + fmu_inputs, + self.config['POWER_UPDATE_FREQ']) diff --git a/raps/engine.py b/raps/engine.py index b8de496..b78910a 100644 --- a/raps/engine.py +++ b/raps/engine.py @@ -3,13 +3,14 @@ import dataclasses import pandas as pd import numpy as np -from .job import Job, JobState -from .policy import PolicyType -from .network import network_utilization -from .utils import summarize_ranges, expand_ranges, get_utilization -from .utils import sum_values, min_value, max_value -from .resmgr import ResourceManager -from .schedulers import load_scheduler +from raps.job import Job, JobState +from raps.policy import PolicyType +from raps.network import network_utilization +from raps.utils import summarize_ranges, expand_ranges, get_utilization, get_current_utilization +from raps.utils import sum_values, min_value, max_value +from raps.resmgr import ResourceManager +from raps.schedulers import load_scheduler +from raps.power import record_power_stats_foreach_job @dataclasses.dataclass @@ -54,6 +55,7 @@ class Engine: self.debug = kwargs.get('debug') self.output = kwargs.get('output') self.replay = kwargs.get('replay') + self.simulate_network = kwargs.get('simulate_network') self.sys_util_history = [] self.scheduler_queue_history = [] self.scheduler_running_history = [] @@ -65,8 +67,8 @@ class Engine: self.scheduler = load_scheduler(scheduler_type)( config=self.config, - policy=kwargs.get('policy'), - bfpolicy=kwargs.get('backfill'), + policy=policy_type, + bfpolicy=backfill_type, resource_manager=self.resource_manager, jobs=jobs ) @@ -74,7 +76,6 @@ class Engine: f", with policy {self.scheduler.policy} "\ f"and backfill {self.scheduler.bfpolicy}") - def add_running_jobs_to_queue(self, jobs_to_submit: List): """ Mofifies jobs_to_submit @@ -114,8 +115,6 @@ class Engine: else: return False - - def prepare_timestep(self, replay:bool = True): # 1 identify completed jobs # 2 Simulate node failure # Defunct feature! @@ -166,17 +165,37 @@ class Engine: self.current_time += 1 # Update the current time every timestep # Stop the simulation if no more jobs are running or in the queue or in the job list. - if autoshutdown and not self.queue and not self.running and not self.replay and not all_jobs and not jobs: + if autoshutdown and \ + len(self.queue) == 0 and \ + len(self.running) == 0 and \ + not self.replay and \ + len(all_jobs) == 0 and \ + len(jobs) == 0: print(f"[DEBUG] {self.config['system_name']} - Stopping simulation at time {self.current_time}") simulation_complete = True else: simulation_complete = False return simulation_complete - def tick(self,time_delta=1): - """Simulate a timestep.""" + def tick(self, *, time_delta=1): + # Tick runs all simulations of interest at the given time delta interval. + # + # The simulations which are needed for simulations consistency at each time step + # (inside: the main simulation loop of run_simulation) are not part of tick. + # + # Tick contains: + # For each running job: + # - CPU utilization + # - GPU utilization + # - Network utilization + # + # From these the systems (across all nodes) + # - System Utilization + # - Power + # - Cooling + # - System Performance + # is simulated. - # Update running time for all running jobs scheduled_nodes = [] cpu_utils = [] gpu_utils = [] @@ -188,141 +207,73 @@ class Engine: if self.debug: print(f"JobID: {job.id}") - if job.state == JobState.RUNNING: + if job.state != JobState.RUNNING: + raise ValueError(f"Job is in running list, but state is not RUNNING: job.state == {job.state}") + else: # if job.state == JobState.RUNNING: + # Error checks if job.running_time > job.wall_time: raise Exception(f"Job should have ended already!\n\ {job.running_time} > {job.wall_time}\ ") + # Aggregate scheduled nodes + scheduled_nodes.append(job.scheduled_nodes) - if job.trace_quanta: - time_quanta_index = int((job.running_time - job.trace_start_time) // job.trace_quanta) - if time_quanta_index < 0: - time_quanta_index = 0 - - # If the running time is past the last time step in the - # trace, use the last value in the trace. This can - # happen if the last valid timesteps is e.g. 17%15, - # the last trace value is 15%15 and the next possible - # trace value 30%15 but was not recorded because the - # job ended before. - # For every other error condition trace_start_ and - # _end_time are used! - # #print(type(job.cpu_trace)) - # Similar with the first time_quanta index: If the job started - # in the past and no trace if there, read index 0 until values - # are available. - - if (isinstance(job.cpu_trace,list) and job.cpu_trace != []) or \ - (isinstance(job.cpu_trace,np.ndarray) and job.cpu_trace.size != 0): - if time_quanta_index < len(job.cpu_trace): - cpu_util = get_utilization(job.cpu_trace, time_quanta_index) - else: - cpu_util = get_utilization(job.cpu_trace, max(0,len(job.cpu_trace) - 1)) - elif isinstance(job.cpu_trace,float) or isinstance(job.cpu_trace,int): - cpu_util = job.cpu_trace - else: - cpu_util = 0 - - if (isinstance(job.gpu_trace,list) and job.gpu_trace != []) or \ - (isinstance(job.gpu_trace,np.ndarray) and job.gpu_trace.size != 0): - if time_quanta_index < len(job.gpu_trace): - gpu_util = get_utilization(job.gpu_trace, time_quanta_index) - else: - gpu_util = get_utilization(job.gpu_trace, max(0,len(job.gpu_trace) - 1)) - elif isinstance(job.gpu_trace,float) or isinstance(job.gpu_trace,int): - gpu_util = job.gpu_trace - else: - gpu_util = 0 - - if (((isinstance(job.ntx_trace,list) and job.ntx_trace != []) or \ - (isinstance(job.ntx_trace,np.ndarray) and job.ntx_trace.size != 0)) \ - and \ - ((isinstance(job.nrx_trace,list) and job.nrx_trace != []) or \ - (isinstance(job.nrx_trace,np.ndarray) and job.nrx_trace.size != 0))): - if time_quanta_index < len(job.ntx_trace): - net_tx = get_utilization(job.ntx_trace, time_quanta_index) - else: - net_tx = get_utilization(job.ntx_trace, max(0,len(job.ntx_trace) - 1)) - if time_quanta_index < len(job.nrx_trace): - net_rx = get_utilization(job.nrx_trace, time_quanta_index) - else: - net_rx = get_utilization(job.nrx_trace, max(0,len(job.nrx_trace) - 1)) - net_util = network_utilization(net_tx, net_rx) - elif (isinstance(job.ntx_trace,float) or isinstance(job.ntx_trace,int)) and \ - (isinstance(job.nrx_trace,float) or isinstance(job.nrx_trace,int)): - net_tx = job.ntx_trace - net_rx = job.nrx_trace - net_util = network_utilization(net_tx, net_rx) - else: - net_util = 0 - - scheduled_nodes.append(job.scheduled_nodes) # ? + # Get CPU utilization + cpu_util = get_current_utilization(job.cpu_trace, job) cpu_utils.append(cpu_util) - gpu_utils.append(gpu_util) - net_utils.append(net_util) - else: - raise ValueError(f"Job is in running list, but state is not RUNNING: job.state == {job.state}") - if len(scheduled_nodes) > 0: # When can this not happen? - self.flops_manager.update_flop_state(scheduled_nodes, cpu_utils, gpu_utils) - jobs_power = self.power_manager.update_power_state(scheduled_nodes, cpu_utils, gpu_utils, net_utils) + # Get GPU utilizaiton + gpu_util = get_current_utilization(job.gpu_trace, job) + gpu_utils.append(gpu_util) - _running_jobs = [job for job in self.running if job.state == JobState.RUNNING] - if len(jobs_power) != len(_running_jobs): - raise ValueError(f"Jobs power list of length ({len(jobs_power)}) should have ({len(_running_jobs)}) items.") - for i, job in enumerate(_running_jobs): - if job.running_time % self.config['TRACE_QUANTA'] == 0: - job.power_history.append(jobs_power[i] * len(job.scheduled_nodes)) - #del _running_jobs + # Get network utilization + if self.simulate_network: + ntx_util = get_current_utilization(job.ntx_trace, job) + nrx_util = get_current_utilization(job.nrx_trace, job) + net_util = network_utilization(ntx_util, nrx_util) + net_utils.append(net_util) + else: + net_utils.append(0.0) - # Update the power array UI component - rack_power, rect_losses = self.power_manager.compute_rack_power() - sivoc_losses = self.power_manager.compute_sivoc_losses() - rack_loss = rect_losses + sivoc_losses + # All required values for each jobs have been an collected. + # Continue with calculations for the whole system: - # Update system utilization + # Utilization Statistics system_util = self.num_active_nodes / self.config['AVAILABLE_NODES'] * 100 - self.sys_util_history.append((self.current_time, system_util)) - - self.scheduler_queue_history.append(len(self.running)) - self.scheduler_running_history.append(len(self.queue)) - - # Render the updated layout - power_df = None - cooling_inputs, cooling_outputs = None, None + self.record_util_stats(system_util=system_util) + + # Power + if self.power_manager: # Power is always simulated + power_df, rack_power, total_power_kw, total_loss_kw, jobs_power = \ + self.power_manager.simulate_power(running_jobs=self.running, + scheduled_nodes=scheduled_nodes, + cpu_utils=cpu_utils, + gpu_utils=gpu_utils, + net_utils=net_utils) + + # Unclear what jobs_power is! + self.record_power_stats(time_delta=time_delta, + total_power_kw=total_power_kw, + total_loss_kw=total_loss_kw, + jobs_power=jobs_power) + else: + power_df = None - # If time_delta is 1 update power history every 15s, otherwise whenever tick runs - if (time_delta == 1 and self.current_time % self.config['POWER_UPDATE_FREQ'] == 0) or time_delta != 1: - total_power_kw = sum(row[-1] for row in rack_power) + self.config['NUM_CDUS'] * self.config['POWER_CDU'] / 1000.0 - total_loss_kw = sum(row[-1] for row in rack_loss) - self.power_manager.history.append((self.current_time, total_power_kw)) - self.sys_power = total_power_kw - self.power_manager.loss_history.append((self.current_time, total_loss_kw)) - pflops = self.flops_manager.get_system_performance() / 1E15 - gflop_per_watt = pflops * 1E6 / (total_power_kw * 1000) + # Cooling + if self.cooling_model: + cooling_inputs, cooling_outputs = self.cooling_model.simulate_cooling(self.cooling_model, rack_power) else: - pflops, gflop_per_watt = None, None + cooling_inputs, cooling_outputs = None, None - if (time_delta == 1 and self.current_time % self.config['POWER_UPDATE_FREQ'] == 0) or time_delta != 1: - if self.cooling_model: - # Power for NUM_CDUS (25 for Frontier) - cdu_power = rack_power.T[-1] * 1000 - runtime_values = self.cooling_model.generate_runtime_values(cdu_power, self) - - # FMU inputs are N powers and the wetbulb temp - fmu_inputs = self.cooling_model.generate_fmu_inputs(runtime_values, - uncertainties=self.power_manager.uncertainties) - cooling_inputs, cooling_outputs = ( - self.cooling_model.step(self.current_time, fmu_inputs, self.config['POWER_UPDATE_FREQ']) - ) - - # Get a dataframe of the power data - power_df = self.power_manager.get_power_df(rack_power, rack_loss) - else: - # Get a dataframe of the power data - power_df = self.power_manager.get_power_df(rack_power, rack_loss) + # Flops + if self.flops_manager: + pflops, gflops_per_watt = self.flops_manager.simulate_flops(scheduled_nodes=scheduled_nodes, + cpu_util=cpu_utils, + gpu_util=gpu_utils, + total_power_kw=total_power_kw) + # Continue with System Simulation tick_data = TickData( current_time=self.current_time, completed=None, @@ -331,14 +282,13 @@ class Engine: down_nodes=expand_ranges(self.down_nodes[1:]), power_df=power_df, p_flops=pflops, - g_flops_w=gflop_per_watt, - system_util=self.num_active_nodes / self.config['AVAILABLE_NODES'] * 100, + g_flops_w=gflops_per_watt, + system_util=system_util, fmu_inputs=cooling_inputs, fmu_outputs=cooling_outputs, num_active_nodes=self.num_active_nodes, num_free_nodes=self.num_free_nodes, ) - return tick_data def prepare_system_state(self, all_jobs:List, timestep_start, timestep_end, replay:bool): @@ -401,13 +351,13 @@ class Engine: if completed_jobs != [] or newly_downed_nodes != [] or has_new_additions: self.scheduler.schedule(self.queue, self.running, self.current_time,accounts=self.accounts, sorted=(not has_new_additions)) - if self.debug and timestep % self.config['UI_UPDATE_FREQ'] == 0: print(".", end="", flush=True) # 4. Run tick only at specified time_delta - if 0 == (timestep % time_delta): - tick_data = self.tick(time_delta) + if 0 == (timestep % time_delta) and \ + ((time_delta == 1 and self.current_time % self.config['POWER_UPDATE_FREQ'] == 0) or time_delta != 1): + tick_data = self.tick(time_delta=time_delta) tick_data.completed = completed_jobs else: tick_data = None @@ -426,3 +376,18 @@ class Engine: def get_scheduler_running_history(self): return self.scheduler_running_history + + def record_util_stats(self,*, system_util): + self.sys_util_history.append((self.current_time, system_util)) + self.scheduler_queue_history.append(len(self.running)) + self.scheduler_running_history.append(len(self.queue)) + + def record_power_stats(self, *, time_delta, total_power_kw, total_loss_kw, jobs_power): + if (time_delta == 1 and self.current_time % self.config['POWER_UPDATE_FREQ'] == 0) or time_delta != 1: + # First job specific + record_power_stats_foreach_job(running_jobs=self.running, jobs_power=jobs_power) + # power manager + self.power_manager.history.append((self.current_time, total_power_kw)) + self.power_manager.loss_history.append((self.current_time, total_loss_kw)) + #engine + self.sys_power = total_power_kw diff --git a/raps/flops.py b/raps/flops.py index eebd0fa..0745971 100644 --- a/raps/flops.py +++ b/raps/flops.py @@ -9,6 +9,8 @@ class FLOPSManager(): self.flop_state = np.zeros(self.config['SC_SHAPE']) def update_flop_state(self, scheduled_nodes, cpu_util, gpu_util): + if len(scheduled_nodes) == 0: + return cpu_util = np.asarray(cpu_util) gpu_util = np.asarray(gpu_util) job_lengths = np.array([len(job) for job in scheduled_nodes]) @@ -19,30 +21,42 @@ class FLOPSManager(): node_indices = linear_to_3d_index(flattened_nodes, self.config['SC_SHAPE']) - if self.validate: # cpu_util is in fact node_Watts in this case total_peak = ( - self.config['CPU_FP_RATIO'] * self.config['CPU_PEAK_FLOPS'] + + self.config['CPU_FP_RATIO'] * self.config['CPU_PEAK_FLOPS'] + \ self.config['GPU_FP_RATIO'] * self.config['GPU_PEAK_FLOPS'] - ) + ) denominator = ( - self.config['POWER_CPU_MAX'] * self.config['CPUS_PER_NODE'] + - self.config['POWER_GPU_MAX'] * self.config['GPUS_PER_NODE'] + - self.config['POWER_NIC'] * self.config['NICS_PER_NODE'] + + self.config['POWER_CPU_MAX'] * self.config['CPUS_PER_NODE'] + \ + self.config['POWER_GPU_MAX'] * self.config['GPUS_PER_NODE'] + \ + self.config['POWER_NIC'] * self.config['NICS_PER_NODE'] + \ self.config['POWER_NVME'] - ) + ) self.flop_state[node_indices] = total_peak * (cpu_util_flat / denominator) - else: + else: self.flop_state[node_indices] = ( - self.config['CPU_FP_RATIO'] * cpu_util_flat * self.config['CPU_PEAK_FLOPS'] + + self.config['CPU_FP_RATIO'] * cpu_util_flat * self.config['CPU_PEAK_FLOPS'] + \ self.config['GPU_FP_RATIO'] * gpu_util_flat * self.config['GPU_PEAK_FLOPS'] ) def get_rpeak(self): - node_peak_flops = self.config['CPUS_PER_NODE'] * self.config['CPU_PEAK_FLOPS'] \ - + self.config['GPUS_PER_NODE'] * self.config['GPU_PEAK_FLOPS'] + node_peak_flops = ( + self.config['CPUS_PER_NODE'] * self.config['CPU_PEAK_FLOPS'] + \ + self.config['GPUS_PER_NODE'] * self.config['GPU_PEAK_FLOPS'] + ) system_peak_flops = self.config['AVAILABLE_NODES'] * node_peak_flops return system_peak_flops def get_system_performance(self): return np.sum(self.flop_state) + + def simulate_flops(self, *, scheduled_nodes, cpu_util, gpu_util, total_power_kw): + self.update_flop_state(scheduled_nodes=scheduled_nodes, + cpu_util=cpu_util, + gpu_util=gpu_util) + pflops = self.get_system_performance() / 1E15 + if total_power_kw != 0: + gflops_per_watt = pflops * 1E6 / (total_power_kw * 1000) + else: + gflops_per_watt = 0 + return pflops, gflops_per_watt diff --git a/raps/power.py b/raps/power.py index fb09828..70039c7 100644 --- a/raps/power.py +++ b/raps/power.py @@ -265,6 +265,8 @@ class PowerManager: float Total power consumption of the scheduled nodes. """ + if len(scheduled_nodes) == 0: + return [] cpu_util = np.asarray(cpu_util) gpu_util = np.asarray(gpu_util) net_util = np.asarray(net_util) @@ -276,7 +278,6 @@ class PowerManager: net_util_flat = np.repeat(net_util, job_lengths) node_indices = linear_to_3d_index(flattened_nodes, self.config['SC_SHAPE']) - power_value, sivoc_loss = self.power_func(cpu_util_flat, gpu_util_flat, net_util_flat, self.config) self.power_state[node_indices] = power_value self.sivoc_loss[node_indices] = sivoc_loss @@ -418,3 +419,44 @@ class PowerManager: power_df = pd.DataFrame(power_data, columns=power_columns) return power_df + + def simulate_power(self, *, + running_jobs, + scheduled_nodes, + cpu_utils, + gpu_utils, + net_utils + ): + jobs_power = self.update_power_state(scheduled_nodes, cpu_utils, gpu_utils, net_utils) + + for i, job in enumerate(running_jobs): + #if job.running_time % self.config['TRACE_QUANTA'] == 0: + job.power_history.append(jobs_power[i] * len(job.scheduled_nodes)) + + # Update the power array UI component + rack_power, rect_losses = self.compute_rack_power() + sivoc_losses = self.compute_sivoc_losses() + rack_loss = rect_losses + sivoc_losses + power_df = self.get_power_df(rack_power, rack_loss) + + total_power_kw = sum(row[-1] for row in rack_power) + self.config['NUM_CDUS'] * self.config['POWER_CDU'] / 1000.0 + total_loss_kw = sum(row[-1] for row in rack_loss) + + # Primary return value: + # power_df + # Other returns needed for further processing: + # rack_power, # For cooling + # total_power_kw, # For statistics + # total_loss_kw, # For statistics + # jobs_power # For statistics + # === + return power_df, \ + rack_power, \ + total_power_kw, \ + total_loss_kw, \ + jobs_power + + +def record_power_stats_foreach_job(*, running_jobs, jobs_power): + for i, job in enumerate(running_jobs): + job.power_history.append(jobs_power[i] * len(job.scheduled_nodes)) diff --git a/raps/ui.py b/raps/ui.py index e9091dc..c36129d 100644 --- a/raps/ui.py +++ b/raps/ui.py @@ -398,35 +398,35 @@ class LayoutManager: self.layout["lower"].update(Panel(Align(total_table, align="center"), title="Power and Performance")) - def update_progress(self, timestamp): + def update_progress_bar(self, timestamp): self.progress.update(self.progress_task, description=f"{timestamp}",advance=timestamp,transient=True) self.layout["progress"].update(self.progress.get_renderable()) - def update(self, data: TickData, time_delta=1): + def update_full_layout(self, data: TickData, time_delta=1): if self.debug: return uncertainties = self.engine.power_manager.uncertainties - if data.current_time % self.config['UI_UPDATE_FREQ'] == 0: - if self.engine.cooling_model: - self.update_powertemp_array( - data.power_df, data.fmu_outputs, data.p_flops, data.g_flops_w, data.system_util, - uncertainties=uncertainties, - ) - self.update_pressflow_array(data.fmu_outputs) - - self.update_scheduled_jobs(data.running + data.queue) - self.update_status( - data.current_time, len(data.running), len(data.queue), data.num_active_nodes, - data.num_free_nodes, data.down_nodes, + #if data.current_time % self.config['UI_UPDATE_FREQ'] == 0: + if self.engine.cooling_model: + self.update_powertemp_array( + data.power_df, data.fmu_outputs, data.p_flops, data.g_flops_w, data.system_util, + uncertainties=uncertainties, ) - self.update_power_array( - data.power_df, data.p_flops, data.g_flops_w, - data.system_util, uncertainties=uncertainties, - ) - if False: - self.render() - self.update_progress(time_delta) + self.update_pressflow_array(data.fmu_outputs) + + self.update_scheduled_jobs(data.running + data.queue) + self.update_status( + data.current_time, len(data.running), len(data.queue), data.num_active_nodes, + data.num_free_nodes, data.down_nodes, + ) + self.update_power_array( + data.power_df, data.p_flops, data.g_flops_w, + data.system_util, uncertainties=uncertainties, + ) + if False: + self.render() + def render(self): if not self.debug: @@ -442,7 +442,8 @@ class LayoutManager: with context: for data in self.engine.run_simulation(jobs, timestep_start, timestep_end, time_delta, autoshutdown=True): if data: - self.update(data,time_delta) + self.update_full_layout(data,time_delta) + self.update_progress_bar(1) def run_stepwise(self, jobs, timestep_start, timestep_end, time_delta): """ Prepares the UI and returns a generator for the simulation """ diff --git a/raps/utils.py b/raps/utils.py index aaceecb..853241b 100644 --- a/raps/utils.py +++ b/raps/utils.py @@ -19,6 +19,7 @@ import sys import uuid import json +from raps.job import Job def sum_values(values): return sum(x[1] for x in values) if values else 0 @@ -536,6 +537,30 @@ def convert_numpy_to_builtin(obj): return obj +def get_current_utilization(trace, job: Job): + # Return utilizaiton for a trace at the jobs current running time. + # Note: this should move to a trace.py and a Trace class! + util = 0.0 + + if job.trace_quanta: + time_quanta_index = int((job.running_time - job.trace_start_time) // job.trace_quanta) + if time_quanta_index < 0: + time_quanta_index = 0 + + if (isinstance(trace,list) and trace != []) or \ + (isinstance(trace, np.ndarray) and trace.size != 0): + if time_quanta_index < len(trace): + util = get_utilization(trace, time_quanta_index) + else: + util = get_utilization(trace, max(0,len(trace) - 1)) + elif isinstance(trace,float) or isinstance(trace,int): + util = trace + else: + util = 0.0 + + return util + + def get_utilization(trace, time_quanta_index): """Retrieve utilization value for a given trace at a specific time quanta index.""" if isinstance(trace, (list, np.ndarray)): -- GitLab From d74d34093fda2a18901be7e831ce9cbf84c696d3 Mon Sep 17 00:00:00 2001 From: Wes Brewer Date: Wed, 9 Jul 2025 20:03:48 -0400 Subject: [PATCH 150/388] Quite a bit of progress towards getting both cpu and gpu traces working --- README.md | 2 +- multi-part-sim.py | 3 +- raps/dataloaders/mit_supercloud.py | 324 +---------------------------- 3 files changed, 4 insertions(+), 325 deletions(-) mode change 100644 => 120000 raps/dataloaders/mit_supercloud.py diff --git a/README.md b/README.md index 6ec73cb..0d15520 100644 --- a/README.md +++ b/README.md @@ -55,7 +55,7 @@ For MIT Supercloud python main.py -f /path/to/mit_supercloud/datacenter-challenge --system mit_supercloud - python multi-part-sim.py -x 'mit_supercloud/*' -f ~/data/mit --system mit_supercloud --arrival poisson + python multi-part-sim.py -x 'mit_supercloud/*' -f /path/to/mit_supercloud/datacenter-challenge --system mit_supercloud --arrival poisson ## Perform Network Simulation diff --git a/multi-part-sim.py b/multi-part-sim.py index 8c18b2e..7515002 100644 --- a/multi-part-sim.py +++ b/multi-part-sim.py @@ -15,7 +15,7 @@ from raps.flops import FLOPSManager from raps.power import PowerManager, compute_node_power from raps.telemetry import Telemetry from raps.workload import Workload -from raps.utils import convert_to_seconds, next_arrival +from raps.utils import create_casename, convert_to_seconds, next_arrival from tqdm import tqdm # Load configurations for each partition @@ -55,6 +55,7 @@ if args.replay: print(f"[{part}] loading traces from {args.replay[0]} …") jobs_part, t0, t1 = td.load_data(args.replay) jobs_by_partition[part] = jobs_part + td.save_snapshot(jobs_part, t0, t1, args, filename=part.split('/')[-1]) # --- report how many jobs per partition --- for part, jl in jobs_by_partition.items(): diff --git a/raps/dataloaders/mit_supercloud.py b/raps/dataloaders/mit_supercloud.py deleted file mode 100644 index 380823a..0000000 --- a/raps/dataloaders/mit_supercloud.py +++ /dev/null @@ -1,323 +0,0 @@ -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- -""" -MIT Supercloud job trace processing module with load_data function. -""" - -import os -import shutil -import sys -from datetime import datetime - -import numpy as np -import pandas as pd -from scipy.sparse import csr_matrix as csr -from tqdm import tqdm - -from raps.job import job_dict - - -def proc_cpu_series(dfi): - dfi = dfi[~dfi.Step.isin([-1, -4, '-1', '-4'])].copy() - dfi['CPUUtilization'] = dfi['CPUUtilization'].fillna(0) / 100.0 - - t = pd.to_datetime(dfi.EpochTime, unit='s') - start_time = t.min() - dfi['t'] = ((t - start_time).dt.total_seconds() // 10).astype(int) - dfi['sid'] = pd.factorize(dfi.Step)[0] - - useries = dfi.Series.unique() - inds = np.arange(dfi.t.max() + 1) - df = pd.DataFrame({'t': inds}) - Xm, Xrss, Xvm, Xreadmb, Xwritemb = (np.zeros((len(useries), len(inds))) for _ in range(5)) - - for cnt, i in enumerate(useries): - sift = dfi.Series == i - M, N = len(inds), dfi.sid[sift].max() + 1 - - for metric, arr, name in zip( - ['CPUUtilization', 'RSS', 'VMSize', 'ReadMB', 'WriteMB'], - [Xm, Xrss, Xvm, Xreadmb, Xwritemb], - ['cpu', 'rss', 'vm', 'readmb', 'writemb'] - ): - X = csr((dfi.loc[sift, metric], (dfi.loc[sift, 't'], dfi.loc[sift, 'sid'])), shape=(M, N)) - mm = np.array(X.max(axis=1).todense()).reshape(-1,) - df[f'{name}_{i}'] = mm - arr[cnt, :] = mm - - df['cpu_utilisation'] = Xm.mean(axis=0) - df['rss'] = Xrss.sum(axis=0) - df['vm'] = Xvm.sum(axis=0) - df['readmb'] = Xreadmb.sum(axis=0) - df['writemb'] = Xwritemb.sum(axis=0) - df['timestamp'] = start_time + pd.to_timedelta(df.t * 10, unit='s') - df['utime'] = df['timestamp'].astype('int64') // 10**9 - - return df - -def proc_gpu_series(cpu_df, dfi, gpu_cnt): - # 1) Build CPU time range - t_cpu_start = int(cpu_df.utime.min()) - t_cpu_end = int(cpu_df.utime.max()) - t_cpu = np.array([t_cpu_start, t_cpu_end, t_cpu_end - t_cpu_start]) - - # 2) Safely convert the GPU timestamps to integer seconds - # (this handles strings like "1621607266.426") - ts = pd.to_numeric(dfi["timestamp"], errors="coerce") # float64 or NaN - ts_int = ts.ffill().astype(float).astype(int) - t0, t1 = ts_int.min(), ts_int.max() - t_gpu = np.array([t0, t1, t1 - t0]) - - # 3) Sanity‐check the durations match within 10% - per_diff = ((t_cpu[1] - t_cpu[0]) - (t_gpu[1] - t_gpu[0])) / (t_gpu[1] - t_gpu[0]) * 100 - if abs(per_diff) > 10: - # warn and proceed — GPU trace may be trimmed or misaligned - print(f"Warning: GPU‐CPU time mismatch {per_diff:.1f}% exceeds 10%; continuing anyway") - - # 4) Align GPU times onto CPU utime grid - # Use our integer‐second Series rather than the raw column - dfi["t_fixed"] = ts_int - ts_int.min() + t_cpu_start - - # 5) Prepare output DataFrame with a utime column - ugpus = dfi.gpu_index.unique() - gpu_df = pd.DataFrame({"utime": cpu_df["utime"].values}) - - # 6) Interpolate each GPU field onto the CPU utime grid - fields = [ - "utilization_gpu_pct", - "utilization_memory_pct", - "memory_free_MiB", - "memory_used_MiB", - "temperature_gpu", - "temperature_memory", - "power_draw_W", - ] - for field in fields: - # grab the float‐converted timestamp and the metric - x1 = ts_int.values - y1 = dfi[field].astype(float).values - xv = cpu_df["utime"].values - # numpy interpolation - gpu_df[field] = np.interp(xv, x1, y1) - - # 7) Rename the GPU pct, memory pct, and power columns with the device index - ren = { - "gpu_index": f"gpu_index_{gpu_cnt}", - "utilization_gpu_pct": f"gpu_util_{gpu_cnt}", - "utilization_memory_pct":f"gpu_mempct_{gpu_cnt}", - "memory_free_MiB": f"gpu_memfree_{gpu_cnt}", - "memory_used_MiB": f"gpu_memused_{gpu_cnt}", - "temperature_gpu": f"gpu_temp_{gpu_cnt}", - "temperature_memory": f"gpu_memtemp_{gpu_cnt}", - "power_draw_W": f"gpu_power_{gpu_cnt}", - } - gpu_df.rename(columns=ren, inplace=True) - - return gpu_df, gpu_cnt + 1 - -def load_data(local_dataset_path, **kwargs): - """ - Load MIT Supercloud job traces. - Expects: - local_dataset_path/ - metadata/ - file_list.csv - job_user_date_full.csv - 202201/ # hard-coded for now; change as needed - cpu/...-timeseries.csv - gpu/...-timeseries.csv - slurm-log.csv - Returns: - jobs_list, sim_start_time, sim_end_time - """ - # 1) Unpack list if necessary - if isinstance(local_dataset_path, list): - if len(local_dataset_path) != 1: - raise ValueError("MIT Supercloud loader accepts exactly one path") - local_dataset_path = local_dataset_path[0] - - # 2) Read metadata - meta_dir = os.path.join(local_dataset_path, "metadata") - file_list_df = pd.read_csv(os.path.join(meta_dir, "file_list.csv"), sep="\t") - job_index_df = pd.read_csv(os.path.join(meta_dir, "job_user_date_full.csv")) - - # 3) Date filtering settings - start_date_str = kwargs.get("start_date", "21052021") - end_date_str = kwargs.get("end_date", "22052021") - jid = kwargs.get("jid", "*") - # determine whether this is the CPU or GPU partition - part = kwargs.get("partition", "").lower() - cpu_only = ("cpu" in part) and ("gpu" not in part) - gpu_only = ("gpu" in part) and ("cpu" not in part) - - start_ts = int(datetime.strptime(start_date_str, "%d%m%Y").timestamp()) - end_ts = int(datetime.strptime(end_date_str, "%d%m%Y").timestamp()) - requested_duration = end_ts - start_ts - - # 4) Select jobs in time window - selected_df = job_index_df[ - (job_index_df.start > start_ts) & - (job_index_df.start < end_ts) - ].copy() - -###### - data_subdir = "202201" # hard-coded folder name - print(local_dataset_path, data_subdir) - - # --- 1) Load and filter Slurm log for GPU jobs in [start_ts, end_ts) --- - slurm_path = os.path.join(local_dataset_path, data_subdir, "slurm-log.csv") - slurm_df = pd.read_csv(slurm_path) - - # Keep only rows within your date window - sl = slurm_df[ - (slurm_df.time_submit >= start_ts) & - (slurm_df.time_submit < end_ts) - ] - - # Filter to those that actually used GPUs - def row_uses_gpu(r): - return ("gpu" in str(r.get("gres_used","")).lower() - or "1001=" in str(r.get("tres_alloc","")) - or "1002=" in str(r.get("tres_alloc",""))) - gpu_sl = sl[sl.apply(row_uses_gpu, axis=1)] - - gpu_job_ids = set(gpu_sl.id_job.unique()) - print(f"→ Found {len(gpu_job_ids)} GPU‐using jobs in your date range") - - # --- 2) Pull their GPU timeseries paths from file_list.csv --- - #gpu_entries = file_list_df[ - # file_list_df["File Name"].str.contains("/gpu/") - #].copy() - - # should match both "gpu/..." at the start _and_ anywhere else - #gpu_entries = file_list_df[ - # file_list_df["File Name"].str.contains(r"(^|/)gpu/") - #].copy() - - # Option 2: simple substring match (matches anywhere “gpu/” appears) - gpu_entries = file_list_df[ - file_list_df["File Name"].str.contains("gpu/") - ].copy() - - gpu_entries["job_id"] = ( - gpu_entries["File Name"] - .str.extract(r"/(\d+)-", expand=False) - .astype(int) - ) - gpu_sel = gpu_entries[gpu_entries["job_id"].isin(gpu_job_ids)] - gpu_files = gpu_sel["File Name"].tolist() - print(f"→ Will process {len(gpu_files)} GPU files") - - # --- 3) Combine with your CPU list and dedupe --- - cpu_files = [ - fn.replace("-summary","-timeseries") - for fn in selected_df["filename"] - ] - trace_files = list(set(cpu_files + gpu_files)) - - # filter by partition - if cpu_only: - trace_files = cpu_files - elif gpu_only: - trace_files = gpu_files - # else leave both - trace_files = list(set(trace_files)) - - print(f"Total files to load: {len(trace_files)} (CPU: {len(cpu_files)}, GPU: {len(gpu_files)})") - - # 7) Read SLURM log - slurm_log = next( - ( - os.path.join(r, "slurm-log.csv") - for r, _, fs in os.walk(os.path.join(local_dataset_path, data_subdir)) - if "slurm-log.csv" in fs - ), - None - ) - if not slurm_log: - return [], 0, 0 - slurm_df = pd.read_csv(slurm_log) - - # 8) Process each file, populating data_dict - data_dict = {} - for rel_path in tqdm(trace_files, desc="Processing trace files"): - fpath = os.path.join(local_dataset_path, data_subdir, rel_path) - if not os.path.exists(fpath): - print(f"Warning: missing {fpath}") - continue - - tqdm.write(f"Reading {rel_path}") - dfi = pd.read_csv(fpath, dtype={0: str}) - jobid = int(os.path.basename(rel_path).split("-")[0]) - data_dict.setdefault(jobid, {}) - - # CPU timeseries - if rel_path.endswith("-timeseries.csv") and "cpu" not in data_dict[jobid]: - data_dict[jobid]["cpu"] = proc_cpu_series(dfi) - - # GPU timeseries - elif "gpu_index" in dfi.columns: - cpu_df = data_dict[jobid].get("cpu") - if cpu_df is None: - continue - gpu_cnt = data_dict[jobid].get("gpu_cnt", 0) - prev_gpu = data_dict[jobid].get("gpu") - gpu_ser, gpu_cnt = proc_gpu_series(cpu_df, dfi, gpu_cnt) - if prev_gpu is None: - data_dict[jobid]["gpu"] = gpu_ser - else: - data_dict[jobid]["gpu"] = pd.merge(prev_gpu, gpu_ser, on="utime") - data_dict[jobid]["gpu_cnt"] = gpu_cnt - - # 9) Merge SLURM metadata for each job - for jobid in list(data_dict): - matches = slurm_df[slurm_df["id_job"] == jobid] - if len(matches) == 1: - data_dict[jobid].update(matches.iloc[0].to_dict()) - - # 10) Compute overall time bounds - cpu_utimes = [d["cpu"]["utime"] for d in data_dict.values() if "cpu" in d] - - # 11) Build the final list of job_dicts - jobs_list = [] - for jobid, data in data_dict.items(): - # skip any job that never loaded a CPU trace - cpu_ser = data.get("cpu") - if cpu_ser is None: - print(f"Warning: skipping job {jobid} (no CPU trace)") - continue - cpu_trace = cpu_ser["cpu_utilisation"] - cpu_trace = cpu_trace.tolist() if isinstance(cpu_trace, pd.Series) else cpu_trace - gpu_df = data.get("gpu") - gpu_trace_list = gpu_df.values.tolist() if isinstance(gpu_df, pd.DataFrame) else 0 - - submit_time = data.get("time_submit") - start_ts - job_start = data["cpu"]["utime"].min() - start_ts - job_end = data["cpu"]["utime"].max() - start_ts - wall_time = max(0, job_end - job_start) - nodes_req = data.get("nodes_alloc") - if nodes_req > 1 and cpu_trace: - cpu_trace = [x / nodes_req for x in cpu_trace] - - jobs_list.append(job_dict( - nodes_required=nodes_req, - name=data.get("name_job", "unknown"), - account=data.get("id_user", "unknown"), - cpu_trace=cpu_trace, - gpu_trace=gpu_trace_list, - ntx_trace=[], - nrx_trace=[], - end_state=data.get("state_end", "UNKNOWN"), - id=jobid, - priority=data.get("priority", 0), - submit_time=submit_time, - time_limit=data.get("time_limit", 0), - start_time=job_start, - end_time=job_end, - wall_time=wall_time, - trace_time=len(cpu_trace) * 10.0, - trace_start_time=0, - trace_end_time=len(cpu_trace) * 10.0 - )) - - return jobs_list, 0, requested_duration diff --git a/raps/dataloaders/mit_supercloud.py b/raps/dataloaders/mit_supercloud.py new file mode 120000 index 0000000..bb7213d --- /dev/null +++ b/raps/dataloaders/mit_supercloud.py @@ -0,0 +1 @@ +mit_supercloud5.py \ No newline at end of file -- GitLab From c87762ba4b8145847fa79e70b129da896ec872d7 Mon Sep 17 00:00:00 2001 From: Wes Brewer Date: Thu, 10 Jul 2025 00:13:35 -0400 Subject: [PATCH 151/388] Add support for multi-part-sim.py to read in the npz files rather than the dataloader --- README.md | 4 +++- multi-part-sim.py | 44 ++++++++++++++++++++++++++++++++++---------- 2 files changed, 37 insertions(+), 11 deletions(-) diff --git a/README.md b/README.md index 0d15520..f01b2d4 100644 --- a/README.md +++ b/README.md @@ -55,7 +55,9 @@ For MIT Supercloud python main.py -f /path/to/mit_supercloud/datacenter-challenge --system mit_supercloud - python multi-part-sim.py -x 'mit_supercloud/*' -f /path/to/mit_supercloud/datacenter-challenge --system mit_supercloud --arrival poisson + python multi-part-sim.py -x 'mit_supercloud/*' -f /path/to/mit_supercloud/datacenter-challenge --system mit_supercloud + + python multi-part-sim.py -x mit_supercloud/* -f part-*.npz --system mit_supercloud ## Perform Network Simulation diff --git a/multi-part-sim.py b/multi-part-sim.py index 7515002..c93be91 100644 --- a/multi-part-sim.py +++ b/multi-part-sim.py @@ -48,14 +48,41 @@ if args.replay: #for job in jobs: # job['partition'] = random.choices(partition_names, weights=available_nodes, k=1)[0] + #jobs_by_partition = {} + #for ad in args_dicts: + # part = ad['partition'] + # td = Telemetry(**ad) + # print(f"[{part}] loading traces from {args.replay[0]} …") + # jobs_part, t0, t1 = td.load_data(args.replay) + # jobs_by_partition[part] = jobs_part + # td.save_snapshot(jobs_part, t0, t1, args, filename=part.split('/')[-1]) + jobs_by_partition = {} - for ad in args_dicts: - part = ad['partition'] - td = Telemetry(**ad) - print(f"[{part}] loading traces from {args.replay[0]} …") - jobs_part, t0, t1 = td.load_data(args.replay) - jobs_by_partition[part] = jobs_part - td.save_snapshot(jobs_part, t0, t1, args, filename=part.split('/')[-1]) + t0_by_partition = {} + t1_by_partition = {} + + if args.replay[0].endswith('.npz'): + # snapshot mode: pick the right .npz for each partition + snap_map = { os.path.basename(p): p for p in args.replay } + for ad in args_dicts: + part = ad['partition'] # e.g. 'mit_supercloud/part-cpu' + short = part.split('/')[-1] # 'part-cpu' + snap_file = f"{short}.npz" + if snap_file not in snap_map: + raise RuntimeError(f"Snapshot '{snap_file}' not in {args.replay}") + td = Telemetry(**ad) + print(f"[{part}] loading snapshot {snap_file} …") + jobs_part, t0, t1, args_from_file = td.load_snapshot(snap_map[snap_file]) + jobs_by_partition[part] = jobs_part + else: + # raw load_data mode + for ad in args_dicts: + part = ad['partition'] + td = Telemetry(**ad) + print(f"[{part}] loading traces from {args.replay[0]} …") + jobs_part, t0, t1 = td.load_data(args.replay) + jobs_by_partition[part] = jobs_part + td.save_snapshot(jobs_part, t0, t1, args, filename=part.split('/')[-1]) # --- report how many jobs per partition --- for part, jl in jobs_by_partition.items(): @@ -80,9 +107,6 @@ if args.replay: job['requested_nodes'] = None job['submit_time'] = next_arrival(1 / partition_config['JOB_ARRIVAL_TIME']) - elif args.arrival == 'prescribed': - raise NotImplementedError - else: # Synthetic workload wl = Workload(*configs) -- GitLab From 4383d1221abe2d3938238b48bc1634b894b5ceb5 Mon Sep 17 00:00:00 2001 From: Wes Brewer Date: Wed, 9 Jul 2025 22:16:24 -0700 Subject: [PATCH 152/388] Clean up multi-part-sim*.py --- multi-part-sim-mpi.py | 2 -- multi-part-sim.py | 23 ----------------------- 2 files changed, 25 deletions(-) diff --git a/multi-part-sim-mpi.py b/multi-part-sim-mpi.py index efec8d3..7929680 100644 --- a/multi-part-sim-mpi.py +++ b/multi-part-sim-mpi.py @@ -68,8 +68,6 @@ def main(): p_cfg = configs[partition_names.index(p_name)] job['requested_nodes'] = None job['submit_time'] = next_arrival(1 / p_cfg['JOB_ARRIVAL_TIME']) - elif args.arrival == 'prescribed': - raise NotImplementedError("‘prescribed’ arrival not implemented yet") # --- b) “SYNTHETIC WORKLOAD” branch: else: diff --git a/multi-part-sim.py b/multi-part-sim.py index c93be91..c36cbef 100644 --- a/multi-part-sim.py +++ b/multi-part-sim.py @@ -27,7 +27,6 @@ if '*' in args.partitions[0]: partition_names = [os.path.join(*p.split(os.sep)[-2:]) for p in paths] configs = [ConfigManager(system_name=partition).get_config() for partition in partition_names] -#args_dicts = [{**vars(args), 'config': config} for config in configs] args_dicts = [ {**vars(args), 'config': config, 'partition': partition_names[i]} for i, config in enumerate(configs) @@ -36,27 +35,6 @@ args_dicts = [ # Initialize Workload if args.replay: - # Currently this assumes that an .npz file has already been created - # e.g., python main.py --system marconi100 -f ~/data/marconi100/job_table.parquet - - #td = Telemetry(**args_dicts[0]) - #print(f"Loading {args.replay[0]}...") - #jobs = td.load_snapshot(args.replay[0]) - #available_nodes = [config['AVAILABLE_NODES'] for config in configs] - #print("available nodes:", available_nodes) - # Randomly assign partition - #for job in jobs: - # job['partition'] = random.choices(partition_names, weights=available_nodes, k=1)[0] - - #jobs_by_partition = {} - #for ad in args_dicts: - # part = ad['partition'] - # td = Telemetry(**ad) - # print(f"[{part}] loading traces from {args.replay[0]} …") - # jobs_part, t0, t1 = td.load_data(args.replay) - # jobs_by_partition[part] = jobs_part - # td.save_snapshot(jobs_part, t0, t1, args, filename=part.split('/')[-1]) - jobs_by_partition = {} t0_by_partition = {} t1_by_partition = {} @@ -158,4 +136,3 @@ for timestep in range(timesteps): print(f"system power: {sys_power:.1f}kW") print("Simulation complete.") - -- GitLab From 530ce75bfb78fb296d7b6f3af2e853d49b1f3f03 Mon Sep 17 00:00:00 2001 From: Wes Brewer Date: Thu, 10 Jul 2025 10:18:35 -0400 Subject: [PATCH 153/388] Delete mit_supercloud.py - issues with being symbolic link --- raps/dataloaders/mit_supercloud.py | 1 - 1 file changed, 1 deletion(-) delete mode 120000 raps/dataloaders/mit_supercloud.py diff --git a/raps/dataloaders/mit_supercloud.py b/raps/dataloaders/mit_supercloud.py deleted file mode 120000 index bb7213d..0000000 --- a/raps/dataloaders/mit_supercloud.py +++ /dev/null @@ -1 +0,0 @@ -mit_supercloud5.py \ No newline at end of file -- GitLab From 50a56f3a753d28ef67bf17ed44a1aaf9e1cd7dee Mon Sep 17 00:00:00 2001 From: Wes Brewer Date: Thu, 10 Jul 2025 10:19:59 -0400 Subject: [PATCH 154/388] Re-add mit_supercloud.py --- raps/dataloaders/mit_supercloud.py | 371 +++++++++++++++++++++++++++++ 1 file changed, 371 insertions(+) create mode 100644 raps/dataloaders/mit_supercloud.py diff --git a/raps/dataloaders/mit_supercloud.py b/raps/dataloaders/mit_supercloud.py new file mode 100644 index 0000000..7c7c3b9 --- /dev/null +++ b/raps/dataloaders/mit_supercloud.py @@ -0,0 +1,371 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +MIT Supercloud job trace processing module with load_data function. +""" + +import os +import shutil +import sys +from datetime import datetime + +import numpy as np +import pandas as pd +from scipy.sparse import csr_matrix as csr +from tqdm import tqdm + +from raps.job import job_dict + +def proc_cpu_series(dfi): + dfi = dfi[~dfi.Step.isin([-1, -4, '-1', '-4'])].copy() + dfi['CPUUtilization'] = dfi['CPUUtilization'].fillna(0) / 100.0 + + t = pd.to_datetime(dfi.EpochTime, unit='s') + start_time = t.min() + dfi['t'] = ((t - start_time).dt.total_seconds() // 10).astype(int) + dfi['sid'] = pd.factorize(dfi.Step)[0] + + useries = dfi.Series.unique() + inds = np.arange(dfi.t.max() + 1) + df = pd.DataFrame({'t': inds}) + Xm, Xrss, Xvm, Xreadmb, Xwritemb = (np.zeros((len(useries), len(inds))) for _ in range(5)) + + for cnt, i in enumerate(useries): + sift = dfi.Series == i + M, N = len(inds), dfi.sid[sift].max() + 1 + + for metric, arr, name in zip( + ['CPUUtilization', 'RSS', 'VMSize', 'ReadMB', 'WriteMB'], + [Xm, Xrss, Xvm, Xreadmb, Xwritemb], + ['cpu', 'rss', 'vm', 'readmb', 'writemb'] + ): + X = csr((dfi.loc[sift, metric], (dfi.loc[sift, 't'], dfi.loc[sift, 'sid'])), shape=(M, N)) + mm = np.array(X.max(axis=1).todense()).reshape(-1,) + df[f'{name}_{i}'] = mm + arr[cnt, :] = mm + + df['cpu_utilisation'] = Xm.mean(axis=0) + df['rss'] = Xrss.sum(axis=0) + df['vm'] = Xvm.sum(axis=0) + df['readmb'] = Xreadmb.sum(axis=0) + df['writemb'] = Xwritemb.sum(axis=0) + df['timestamp'] = start_time + pd.to_timedelta(df.t * 10, unit='s') + df['utime'] = df['timestamp'].astype('int64') // 10**9 + + return df + +def proc_gpu_series(cpu_df, dfi, gpu_cnt): + # 1) Build CPU time range + t_cpu_start = int(cpu_df.utime.min()) + t_cpu_end = int(cpu_df.utime.max()) + t_cpu = np.array([t_cpu_start, t_cpu_end, t_cpu_end - t_cpu_start]) + + # 2) Safely convert the GPU timestamps to integer seconds + # (this handles strings like "1621607266.426") + ts = pd.to_numeric(dfi["timestamp"], errors="coerce") # float64 or NaN + ts_int = ts.ffill().astype(float).astype(int) + t0, t1 = ts_int.min(), ts_int.max() + t_gpu = np.array([t0, t1, t1 - t0]) + + # 3) Sanity‐check the durations match within 10% + per_diff = ((t_cpu[1] - t_cpu[0]) - (t_gpu[1] - t_gpu[0])) / (t_gpu[1] - t_gpu[0]) * 100 + if abs(per_diff) > 10: + # warn and proceed — GPU trace may be trimmed or misaligned + tqdm.write(f"Warning: GPU‐CPU time mismatch {per_diff:.1f}% exceeds 10%; continuing anyway") + + # 4) Align GPU times onto CPU utime grid + # Use our integer‐second Series rather than the raw column + dfi["t_fixed"] = ts_int - ts_int.min() + t_cpu_start + + # 5) Prepare output DataFrame with a utime column + ugpus = dfi.gpu_index.unique() + gpu_df = pd.DataFrame({"utime": cpu_df["utime"].values}) + + # 6) Interpolate each GPU field onto the CPU utime grid + fields = [ + "utilization_gpu_pct", + "utilization_memory_pct", + "memory_free_MiB", + "memory_used_MiB", + "temperature_gpu", + "temperature_memory", + "power_draw_W", + ] + for field in fields: + # grab the float‐converted timestamp and the metric + x1 = ts_int.values + y1 = dfi[field].astype(float).values + xv = cpu_df["utime"].values + # numpy interpolation + gpu_df[field] = np.interp(xv, x1, y1) + + # 7) Rename the GPU pct, memory pct, and power columns with the device index + ren = { + "gpu_index": f"gpu_index_{gpu_cnt}", + "utilization_gpu_pct": f"gpu_util_{gpu_cnt}", + "utilization_memory_pct":f"gpu_mempct_{gpu_cnt}", + "memory_free_MiB": f"gpu_memfree_{gpu_cnt}", + "memory_used_MiB": f"gpu_memused_{gpu_cnt}", + "temperature_gpu": f"gpu_temp_{gpu_cnt}", + "temperature_memory": f"gpu_memtemp_{gpu_cnt}", + "power_draw_W": f"gpu_power_{gpu_cnt}", + } + gpu_df.rename(columns=ren, inplace=True) + + return gpu_df, gpu_cnt + 1 + +def load_data(local_dataset_path, **kwargs): + """ + Load MIT Supercloud job traces **without** any metadata files. + Expects under: + local_dataset_path/ + 202201/ + cpu/...-timeseries.csv + gpu/...-timeseries.csv + slurm-log.csv + Returns: + jobs_list, sim_start_time, sim_end_time + """ + debug = kwargs.get("debug") + # unpack + if isinstance(local_dataset_path, list): + if len(local_dataset_path) != 1: + raise ValueError("Expect exactly one path") + local_dataset_path = local_dataset_path[0] + + # 1) slurm log → DataFrame + sub = "202201" + slurm_path = os.path.join(local_dataset_path, sub, "slurm-log.csv") + sl = pd.read_csv(slurm_path) + + # 2) date window + start_ts = int(datetime.strptime(kwargs.get("start_date","21052021"), "%d%m%Y").timestamp()) + end_ts = int(datetime.strptime(kwargs.get("end_date", "22052021"), "%d%m%Y").timestamp()) + duration = end_ts - start_ts + + sl = sl[(sl.time_submit >= start_ts) & (sl.time_submit < end_ts)] + + # 3) detect GPU‐using jobs + gres = sl.gres_used.fillna("").astype(str) + tres = sl.tres_alloc.fillna("").astype(str) + + gpu_jobs = set(sl.loc[ + gres.str.contains("gpu", case=False) | + tres.str.contains(r"(?:1001|1002)=", regex=True), + "id_job" + ]) + + # 4) partition mode + part = kwargs.get("partition","").split("/")[-1].lower() + cpu_only = (part=="part-cpu") + mixed = (part=="part-gpu") + + if cpu_only: + job_ids = set(sl.id_job) - gpu_jobs + elif mixed: + job_ids = gpu_jobs & set(sl.id_job) + else: + job_ids = set(sl.id_job) + + print(f"→ mode={part}, jobs: {len(job_ids)}") + + # 5) find trace files by walking directories + cpu_files = [] + cpu_root = os.path.join(local_dataset_path, sub, "cpu") + for R,_,fs in os.walk(cpu_root): + for f in fs: + if not f.endswith("-timeseries.csv"): + continue + jid = int(f.split("-",1)[0]) + if jid in job_ids: + rel = os.path.relpath(os.path.join(R,f), os.path.join(local_dataset_path,sub)) + cpu_files.append(rel) + + gpu_files = [] + gpu_root = os.path.join(local_dataset_path, sub, "gpu") + for R,_,fs in os.walk(gpu_root): + for f in fs: + #if not f.endswith("-timeseries.csv"): + if not f.endswith(".csv"): + continue + jid = int(f.split("-",1)[0]) + if jid in job_ids: + rel = os.path.relpath(os.path.join(R,f), os.path.join(local_dataset_path,sub)) + gpu_files.append(rel) + + # 6) select final trace list + if cpu_only: + traces = cpu_files + elif mixed: + traces = list(set(cpu_files + gpu_files)) + + ### check overlap + cpu_ids = {int(os.path.basename(p).split('-',1)[0]) for p in cpu_files} + gpu_ids = {int(os.path.basename(p).split('-',1)[0]) for p in gpu_files} + + if debug: + print(f"[DEBUG] CPU IDs: {len(cpu_ids)} GPU IDs: {len(gpu_ids)} OVERLAP: {len(cpu_ids & gpu_ids)}") + if cpu_ids & gpu_ids: + print(" example overlap:", list(cpu_ids & gpu_ids)[:5]) + else: + print(" → **No overlap**! That means none of your GPU job IDs ever had a CPU file in `cpu_files`.") + + else: + traces = list(set(cpu_files + gpu_files)) + + print(f"→ {len(cpu_files)} CPU files, {len(gpu_files)} GPU files → total {len(traces)}") + + data = {} + + # 8a) CPU first + for rel in tqdm(cpu_files, desc="Loading CPU traces"): + fp = os.path.join(local_dataset_path, sub, rel) + df = pd.read_csv(fp, dtype={0: str}) + jid = int(os.path.basename(rel).split("-", 1)[0]) + rec = data.setdefault(jid, {}) + tqdm.write(f"Reading CPU {rel}") + rec["cpu"] = proc_cpu_series(df) + + print(f"GPU candidate files ({len(gpu_files)}):") + for p in gpu_files[:10]: + print(" ", p) + + for rel in tqdm(gpu_files, desc="Loading GPU traces"): + fp = os.path.join(local_dataset_path, sub, rel) + if debug: + print(f"\n[DEBUG] attempting {rel!r}") + print(" full path exists:", os.path.exists(fp), fp) + if not os.path.exists(fp): + continue + + tqdm.write(f"Reading GPU {rel}") + dfi = pd.read_csv(fp, dtype={0: str}) + if debug: + print(" loaded dataframe, columns:", dfi.columns.tolist()) + if "gpu_index" not in dfi.columns: + tqdm.write(" → no gpu_index column! SKIPPING") + continue + + jid = int(os.path.basename(rel).split("-", 1)[0]) + rec = data.setdefault(jid, {}) + cpu_df = rec.get("cpu") + if cpu_df is None: + tqdm.write(f"Warning: no CPU trace for job {jid}, skipping GPU") + continue + + gpu_cnt = rec.get("gpu_cnt", 0) + gpu_ser, gpu_cnt = proc_gpu_series(cpu_df, dfi, gpu_cnt) + + gpu_cnt = data[jid].get("gpu_cnt", 0) + prev_gpu = data[jid].get("gpu") # ← define prev_gpu here + gpu_ser, gpu_cnt = proc_gpu_series(cpu_df, dfi, gpu_cnt) + if prev_gpu is None: + data[jid]["gpu"] = gpu_ser + else: + data[jid]["gpu"] = pd.merge(prev_gpu, gpu_ser, on="utime") + data[jid]["gpu_cnt"] = gpu_cnt + + if debug: + print(f"[DEBUG] proc_gpu_series returned {len(gpu_ser)} rows (gpu_cnt={gpu_cnt})") + + if "gpu" in rec: + rec["gpu"] = pd.merge(rec["gpu"], gpu_ser, on="utime", how="outer") + else: + rec["gpu"] = gpu_ser + rec["gpu_cnt"] = gpu_cnt + + gpu_df = rec["gpu"] + + # 1) grab all the gpu‐util columns + util_cols = [c for c in gpu_df.columns if c.startswith("gpu_util_")] + + if not util_cols: + # no gpu utilization columns? zero out + rec["gpu_trace"] = [] + else: + # 2) as floats in [0,1] + raw = gpu_df[util_cols].astype(float).div(100) + + # 3) average (or sum) across devices + # if you want to SUM instead, use .sum(axis=1) + avg_util = raw.mean(axis=1) + + # 4) scale by number of nodes requested + nodes = rec.get("nodes_alloc", 1) + rec["gpu_trace"] = (avg_util * nodes).tolist() + + if debug: + print(f"[DEBUG] data[{jid}].keys() now:", list(rec.keys())) + + # quick check: did any jobs pick up a GPU trace? + print("→ data_dict contents sample:") + for jid, rec in list(data.items())[:5]: + print(f" job {jid}: cpu={'yes' if 'cpu' in rec else 'no'} gpu={'yes' if 'gpu' in rec else 'no'}") + print(f"→ total jobs seen = {len(data)}") + + got = [jid for jid, rec in data.items() if "gpu" in rec] + miss = [jid for jid, rec in data.items() if "cpu" in rec and "gpu" not in rec] + print(f"→ of {len(data)} total jobs seen, {len(got)} got GPU data, {len(miss)} have only CPU") + if miss: + print(" jobs missing GPU despite being in gpu_files:", miss[:10]) + + # 8) merge slurm metadata + for _, row in sl.iterrows(): + jid = row.id_job + if jid in data and jid not in data[jid]: + data[jid].update(row.to_dict()) + + # 9) build final job_dicts + jobs_list = [] + for jid, rec in data.items(): + cpu = rec.get("cpu") + gpu = rec.get("gpu_trace") + + if cpu_only: + if cpu is None: + print("cpu None: skipping this one (a)") + continue + cpu_tr = cpu.cpu_utilisation.tolist() + gpu_tr = 0 + t0, t1 = cpu.utime.min(), cpu.utime.max() + elif mixed: + if cpu is None: + print("cpu None: skipping this one (b)") + continue + if gpu is None: + print("gpu None: skipping this one") + continue + cpu_tr = cpu.cpu_utilisation.tolist() + gpu_tr = gpu + t0, t1 = cpu.utime.min(), cpu.utime.max() + else: + print("skipping") + continue + + st = rec.get("time_submit",t0) - start_ts + nr = rec.get("nodes_alloc",1) + if nr>1: + cpu_tr = [x/nr for x in cpu_tr] + + jobs_list.append(job_dict( + nodes_required = nr, + name = rec.get("name_job","unknown"), + account = rec.get("id_user","unknown"), + cpu_trace = cpu_tr, + gpu_trace = gpu_tr, + ntx_trace = [], + nrx_trace = [], + end_state = rec.get("state_end","UNKNOWN"), + id = jid, + priority = rec.get("priority",0), + submit_time = st, + time_limit = rec.get("time_limit",0), + start_time = t0 - start_ts, + end_time = t1 - start_ts, + wall_time = max(0, t1-t0), + trace_time = len(cpu_tr)*10.0, + trace_start_time = 0, + trace_end_time = len(cpu_tr)*10.0 + )) + + return jobs_list, 0, duration -- GitLab From ba9d2eaee44450f2aa5c5893d91d5d6dfd6159c2 Mon Sep 17 00:00:00 2001 From: Matthias Maiterth Date: Thu, 10 Jul 2025 16:39:17 -0400 Subject: [PATCH 155/388] Resolving merge issues --- main.py | 9 ++++++--- multi-part-sim.py | 4 ++-- raps/dataloaders/lassen.py | 2 ++ raps/engine.py | 6 +++--- raps/network.py | 4 ++-- raps/ui.py | 4 ++-- 6 files changed, 17 insertions(+), 12 deletions(-) diff --git a/main.py b/main.py index 286958a..f38f52e 100644 --- a/main.py +++ b/main.py @@ -141,19 +141,22 @@ layout_manager.run(jobs, timestep_start=timestep_start, timestep_end=timestep_en engine_stats = get_engine_stats(sc) job_stats = get_job_stats(sc) scheduler_stats = get_scheduler_stats(sc) -network_stats = get_network_stats(sc) +if args.simulate_network: + network_stats = get_network_stats(sc) # Following b/c we get the following error when we use PM100 telemetry dataset # TypeError: Object of type int64 is not JSON serializable try: print(json.dumps(engine_stats, indent=4)) print(json.dumps(job_stats, indent=4)) print(json.dumps(scheduler_stats, indent=4)) - print(json.dumps(network_stats, indent=4)) + if args.simulate_network: + print(json.dumps(network_stats, indent=4)) except: print(engine_stats) print(job_stats) print(scheduler_stats) - print(network_stats) + if args.simulate_network: + print(network_stats) if args.plot: diff --git a/multi-part-sim.py b/multi-part-sim.py index 0bda804..ee5b415 100644 --- a/multi-part-sim.py +++ b/multi-part-sim.py @@ -97,11 +97,11 @@ for job in jobs: # Initialize layout managers for each partition layout_managers = {} -for i, config in enumerate(configs): +for i, (config,ad) in enumerate(zip(configs,args_dicts)): pm = PowerManager(compute_node_power, **configs[i]) fm = FLOPSManager(**args_dicts[i]) sc = Engine(power_manager=pm, flops_manager=fm, cooling_model=None, **args_dicts[i]) - layout_managers[config['system_name']] = LayoutManager(args.layout, engine=sc, debug=args.debug, **config) + layout_managers[config['system_name']] = LayoutManager(args.layout, engine=sc, debug=args.debug, args_dict=ad, **config) # Set simulation timesteps if args.fastforward: diff --git a/raps/dataloaders/lassen.py b/raps/dataloaders/lassen.py index 6784ae8..2a64624 100644 --- a/raps/dataloaders/lassen.py +++ b/raps/dataloaders/lassen.py @@ -204,6 +204,7 @@ def load_data_from_df(allocation_df, node_df, step_df, **kwargs): time_limit = row['time_limit'] + trace_quanta = config['TRACE_QUANTA'] trace_time = wall_time trace_start_time = start_time trace_end_time = end_time @@ -236,6 +237,7 @@ def load_data_from_df(allocation_df, node_df, step_df, **kwargs): trace_time=trace_time, trace_start_time=trace_start_time, trace_end_time=trace_end_time, + trace_quanta=trace_quanta, trace_missing_values=trace_missing_values) job = Job(job_info) job_list.append(job) diff --git a/raps/engine.py b/raps/engine.py index 996ee8d..89c69c1 100644 --- a/raps/engine.py +++ b/raps/engine.py @@ -322,11 +322,11 @@ class Engine: net_rx_list=net_rx_list, slowdown_factors=slowdown_factors ) + self.record_network_stats(avg_tx=avg_tx, + avg_rx=avg_rx, + avg_net=avg_net) else: avg_tx, avg_rx, avg_net = None,None,None - self.record_network_stats(avg_tx=avg_tx, - avg_rx=avg_rx, - avg_net=avg_net) # Continue with System Simulation tick_data = TickData( diff --git a/raps/network.py b/raps/network.py index 956dd73..fef4cc4 100644 --- a/raps/network.py +++ b/raps/network.py @@ -32,8 +32,8 @@ class NetworkModel: def simulate_network_utilization(self, *, job, debug=False): net_util = 0 net_cong = 0 - ntx_util = 0 - nrx_util = 0 + net_tx = 0 + net_rx = 0 max_throughput = self.max_link_bw * job.trace_quanta # self.config.get('TRACE_QUANTA') # Why? What should this be? if job.nodes_required <= 1: diff --git a/raps/ui.py b/raps/ui.py index da421bd..e315398 100644 --- a/raps/ui.py +++ b/raps/ui.py @@ -123,7 +123,8 @@ class LayoutManager: if show_nodes: columns.append("NODELIST") - #columns.append("TIME") + + columns.append("TIME") # Create table with bold magenta headers table = Table(title="Job Queue", header_style="bold magenta", expand=True) @@ -229,7 +230,6 @@ class LayoutManager: str(free_nodes), str(len(down_nodes)) ] - print(f"self.simulate_network: {self.simulate_network}") if self.simulate_network: row.append(f"{avg_net_util * 100:.0f}%") row.append(f"{slowdown:.1f}x") -- GitLab From 7deecb750bebf0235a81921eac1d540e240f226d Mon Sep 17 00:00:00 2001 From: Wes Brewer Date: Thu, 10 Jul 2025 18:51:55 -0700 Subject: [PATCH 156/388] Initial working version of multitenancy --- config/mit_supercloud/part-cpu/system.json | 3 +- config/mit_supercloud/part-gpu/system.json | 1 + multi-part-sim.py | 4 +- raps/dataloaders/mit_supercloud.py | 32 ++++- raps/engine.py | 50 ++++++-- raps/job.py | 14 +++ raps/resmgr.py | 137 ++++++++++++++------- raps/schedulers/default.py | 112 +++++++++-------- 8 files changed, 238 insertions(+), 115 deletions(-) diff --git a/config/mit_supercloud/part-cpu/system.json b/config/mit_supercloud/part-cpu/system.json index de2fcbc..548c484 100644 --- a/config/mit_supercloud/part-cpu/system.json +++ b/config/mit_supercloud/part-cpu/system.json @@ -11,7 +11,8 @@ "NODES_PER_RECTIFIER": 4, "MISSING_RACKS": [], "DOWN_NODES": [], - "CPUS_PER_NODE": 48, + "CPUS_PER_NODE": 2, + "CORES_PER_CPU": 24, "GPUS_PER_NODE": 0, "CPU_PEAK_FLOPS": 2.9952E12, "GPU_PEAK_FLOPS": 0, diff --git a/config/mit_supercloud/part-gpu/system.json b/config/mit_supercloud/part-gpu/system.json index 3c38e53..9d3eb00 100644 --- a/config/mit_supercloud/part-gpu/system.json +++ b/config/mit_supercloud/part-gpu/system.json @@ -12,6 +12,7 @@ "MISSING_RACKS": [], "DOWN_NODES": [], "CPUS_PER_NODE": 2, + "CORES_PER_CPU": 20, "GPUS_PER_NODE": 2, "CPU_PEAK_FLOPS": 1.248E12, "GPU_PEAK_FLOPS": 7.8E12, diff --git a/multi-part-sim.py b/multi-part-sim.py index c36cbef..91e1122 100644 --- a/multi-part-sim.py +++ b/multi-part-sim.py @@ -58,9 +58,9 @@ if args.replay: part = ad['partition'] td = Telemetry(**ad) print(f"[{part}] loading traces from {args.replay[0]} …") - jobs_part, t0, t1 = td.load_data(args.replay) + jobs_part, t0, t1, args_from_file = td.load_data(args.replay) jobs_by_partition[part] = jobs_part - td.save_snapshot(jobs_part, t0, t1, args, filename=part.split('/')[-1]) + td.save_snapshot(jobs_part, t0, t1, args_from_file, filename=part.split('/')[-1]) # --- report how many jobs per partition --- for part, jl in jobs_by_partition.items(): diff --git a/raps/dataloaders/mit_supercloud.py b/raps/dataloaders/mit_supercloud.py index 7c7c3b9..58ec96d 100644 --- a/raps/dataloaders/mit_supercloud.py +++ b/raps/dataloaders/mit_supercloud.py @@ -8,6 +8,8 @@ import os import shutil import sys from datetime import datetime +import math +from types import SimpleNamespace import numpy as np import pandas as pd @@ -317,16 +319,26 @@ def load_data(local_dataset_path, **kwargs): # 9) build final job_dicts jobs_list = [] + + # Get CPUS_PER_NODE and GPUS_PER_NODE from config + config = kwargs.get('config', {}) + cpus_per_node = config.get('CPUS_PER_NODE', 2) # Default to 2 if not found + gpus_per_node = config.get('GPUS_PER_NODE', 0) # Default to 0 if not found + for jid, rec in data.items(): cpu = rec.get("cpu") gpu = rec.get("gpu_trace") + cpu_tr = [] + gpu_tr = [] + t0, t1 = 0, 0 + if cpu_only: if cpu is None: print("cpu None: skipping this one (a)") continue cpu_tr = cpu.cpu_utilisation.tolist() - gpu_tr = 0 + gpu_tr = [0] # Ensure gpu_tr is a list for max() operation t0, t1 = cpu.utime.min(), cpu.utime.max() elif mixed: if cpu is None: @@ -347,8 +359,14 @@ def load_data(local_dataset_path, **kwargs): if nr>1: cpu_tr = [x/nr for x in cpu_tr] + # Calculate cpu_cores_required and gpu_units_required + cpu_cores_req = math.ceil(max(cpu_tr) * cpus_per_node) if cpu_tr else 0 + gpu_units_req = math.ceil(max(gpu_tr) * gpus_per_node) if gpu_tr else 0 + jobs_list.append(job_dict( nodes_required = nr, + cpu_cores_required = cpu_cores_req, + gpu_units_required = gpu_units_req, name = rec.get("name_job","unknown"), account = rec.get("id_user","unknown"), cpu_trace = cpu_tr, @@ -368,4 +386,14 @@ def load_data(local_dataset_path, **kwargs): trace_end_time = len(cpu_tr)*10.0 )) - return jobs_list, 0, duration + # Calculate min_overall_utime and max_overall_utime + min_overall_utime = int(sl.time_submit.min()) + max_overall_utime = int(sl.time_submit.max()) + + args_namespace = SimpleNamespace( + fastforward=min_overall_utime, + system='mit_supercloud', + time=max_overall_utime + ) + + return jobs_list, min_overall_utime, max_overall_utime, args_namespace diff --git a/raps/engine.py b/raps/engine.py index ae33fdc..c9a8105 100644 --- a/raps/engine.py +++ b/raps/engine.py @@ -51,7 +51,8 @@ class Engine: self.down_nodes = summarize_ranges(self.config['DOWN_NODES']) self.resource_manager = ResourceManager( total_nodes=self.config['TOTAL_NODES'], - down_nodes=self.config['DOWN_NODES'] + down_nodes=self.config['DOWN_NODES'], + config=self.config ) # Initialize running and queue, etc. self.running = [] @@ -124,16 +125,26 @@ class Engine: Adds running jobs to the queue, and removes them from the jobs_to_submit jobs_to_submit still holds the jobs that need be submitted in the future. """ + if self.debug: + print(f"[DEBUG] add_running_jobs_to_queue: current_time={self.current_time}") # Build a list of jobs whose start_time is <= current_time. eligible = [job for job in jobs_to_submit if job['start_time'] < self.current_time] + if self.debug: + print(f"[DEBUG] add_running_jobs_to_queue: Found {len(eligible)} eligible jobs.") # Remove those jobs from jobs_to_submit: jobs_to_submit[:] = [job for job in jobs_to_submit if job['start_time'] >= self.current_time] + if self.debug: + print(f"[DEBUG] add_running_jobs_to_queue: {len(jobs_to_submit)} jobs remaining in jobs_to_submit.") # Convert them to Job instances and build list of eligible jobs. eligible_jobs_list = [] for job_data in eligible: job_instance = Job(job_data) + job_instance.cpu_cores_required = job_data.get('cpu_cores_required', 0) + job_instance.gpu_units_required = job_data.get('gpu_units_required', 0) eligible_jobs_list.append(job_instance) self.queue += eligible_jobs_list + if self.debug: + print(f"[DEBUG] add_running_jobs_to_queue: self.queue now has {len(self.queue)} jobs.") def add_eligible_jobs_to_queue(self, jobs_to_submit: List): """ @@ -142,16 +153,26 @@ class Engine: Adds eligible jobs to the queue, and removes them from the jobs_to_submit jobs_to_submit still holds the jobs that need be submitted in the future. """ + if self.debug: + print(f"[DEBUG] add_eligible_jobs_to_queue: current_time={self.current_time}") # Build a list of jobs whose submit_time is <= current_time. eligible = [job for job in jobs_to_submit if job['submit_time'] <= self.current_time] + if self.debug: + print(f"[DEBUG] add_eligible_jobs_to_queue: Found {len(eligible)} eligible jobs.") # Remove those jobs from jobs_to_submit: jobs_to_submit[:] = [job for job in jobs_to_submit if job['submit_time'] > self.current_time] + if self.debug: + print(f"[DEBUG] add_eligible_jobs_to_queue: {len(jobs_to_submit)} jobs remaining in jobs_to_submit.") # Convert them to Job instances and build list of eligible jobs. eligible_jobs_list = [] for job_data in eligible: job_instance = Job(job_data) + job_instance.cpu_cores_required = job_data.get('cpu_cores_required', 0) + job_instance.gpu_units_required = job_data.get('gpu_units_required', 0) eligible_jobs_list.append(job_instance) self.queue += eligible_jobs_list + if self.debug: + print(f"[DEBUG] add_eligible_jobs_to_queue: self.queue now has {len(self.queue)} jobs.") if eligible_jobs_list != []: return True else: @@ -181,11 +202,17 @@ class Engine: else: newly_downed_nodes = [] - # Update active/free nodes - self.num_free_nodes = len(self.resource_manager.available_nodes) - self.num_active_nodes = self.config['TOTAL_NODES'] \ - - len(self.resource_manager.available_nodes) \ - - len(self.resource_manager.down_nodes) + # Update active/free nodes based on core/GPU utilization + total_cpu_cores = sum(node['total_cpu_cores'] for node in self.resource_manager.nodes) + total_gpu_units = sum(node['total_gpu_units'] for node in self.resource_manager.nodes) + available_cpu_cores = sum(node['available_cpu_cores'] for node in self.resource_manager.nodes) + available_gpu_units = sum(node['available_gpu_units'] for node in self.resource_manager.nodes) + + self.num_free_nodes = len([node for node in self.resource_manager.nodes if not node['is_down'] and node['available_cpu_cores'] == node['total_cpu_cores'] and node['available_gpu_units'] == node['total_gpu_units']]) + self.num_active_nodes = len([node for node in self.resource_manager.nodes if not node['is_down'] and (node['available_cpu_cores'] < node['total_cpu_cores'] or node['available_gpu_units'] < node['total_gpu_units'])]) + + # Update system utilization history + self.resource_manager.update_system_utilization(self.current_time, self.running) return completed_jobs, newly_downed_nodes @@ -201,8 +228,8 @@ class Engine: net_utils = [] net_tx_list = [] net_rx_list = [] - if self.debug: - print(f"Current Time: {self.current_time}") + #if self.debug: + # print(f"Current Time: {self.current_time}") slowdown_factors = [] @@ -365,7 +392,7 @@ class Engine: rack_loss = rect_losses + sivoc_losses # Update system utilization - system_util = self.num_active_nodes / self.config['AVAILABLE_NODES'] * 100 + system_util = self.resource_manager.sys_util_history[-1][1] if self.resource_manager.sys_util_history else 0.0 self.sys_util_history.append((self.current_time, system_util)) self.scheduler_queue_history.append(len(self.running)) @@ -486,6 +513,11 @@ class Engine: else: replay = False + if self.debug: + print(f"[DEBUG] run_simulation: Initial jobs count: {len(jobs)}") + if jobs: + print(f"[DEBUG] run_simulation: First job submit_time: {jobs[0]['submit_time']}, start_time: {jobs[0]['start_time']}") + # Place jobs that are currently running, onto the system. self.prepare_system_state(jobs, timestep_start, timestep_end, replay) diff --git a/raps/job.py b/raps/job.py index 1be41e0..54ed084 100644 --- a/raps/job.py +++ b/raps/job.py @@ -15,6 +15,8 @@ Implementing such using something like: def job_dict(*, nodes_required, name, account, cpu_trace, gpu_trace, ntx_trace, nrx_trace, + cpu_cores_required=0, gpu_units_required=0, + allocated_cpu_cores=0, allocated_gpu_units=0, end_state, scheduled_nodes=None, id, priority=0, partition=0, submit_time=0, time_limit=0, start_time=0, end_time=0, wall_time=0, trace_time=0, trace_start_time=0, trace_end_time=0, @@ -22,6 +24,10 @@ def job_dict(*, nodes_required, name, account, """ Return job info dictionary """ return { 'nodes_required': nodes_required, + 'cpu_cores_required': cpu_cores_required, + 'gpu_units_required': gpu_units_required, + 'allocated_cpu_cores': allocated_cpu_cores, + 'allocated_gpu_units': allocated_gpu_units, 'name': name, 'account': account, 'cpu_trace': cpu_trace, @@ -101,6 +107,10 @@ class Job: self.power = 0 self.scheduled_nodes = [] # Explicit list of requested nodes self.nodes_required = 0 # If scheduled_nodes is set this can be derived. + self.cpu_cores_required = 0 + self.gpu_units_required = 0 + self.allocated_cpu_cores = 0 + self.allocated_gpu_units = 0 self.power_history = [] self._state = state self.account = account @@ -129,6 +139,10 @@ class Job: """Return a string representation of the job.""" return (f"Job(id={self.id}, name={self.name}, account={self.account}, " f"nodes_required={self.nodes_required}, " + f"cpu_cores_required={self.cpu_cores_required}, " + f"gpu_units_required={self.gpu_units_required}, " + f"allocated_cpu_cores={self.allocated_cpu_cores}, " + f"allocated_gpu_units={self.allocated_gpu_units}, " f"cpu_trace={self.cpu_trace}, gpu_trace={self.gpu_trace}, " f"end_state={self.end_state}, requested_nodes={self.requested_nodes}, " f"submit_time={self.submit_time}, time_limit={self.time_limit}, " diff --git a/raps/resmgr.py b/raps/resmgr.py index 6a3ffda..829ab3b 100644 --- a/raps/resmgr.py +++ b/raps/resmgr.py @@ -4,76 +4,119 @@ from scipy.stats import weibull_min class ResourceManager: - def __init__(self, total_nodes, down_nodes): + def __init__(self, total_nodes, down_nodes, config): self.total_nodes = total_nodes - # Maintain a set for down nodes (e.g., nodes that are offline) + self.config = config self.down_nodes = set(down_nodes) - # Available nodes are those that are not down - self.available_nodes = sorted(set(range(total_nodes)) - self.down_nodes) - # You can track system utilization history here - self.sys_util_history = [] # list of (time, utilization) tuples - - def assign_nodes_to_job(self, job, current_time): - """Assigns nodes to a job and updates the available nodes.""" - if len(self.available_nodes) < job.nodes_required: - raise ValueError(f"Not enough available nodes to schedule job {job.id}") - - if job.requested_nodes: # Telemetry replay case - job.scheduled_nodes = job.requested_nodes - self.available_nodes = [n for n in self.available_nodes if n not in job.scheduled_nodes] - else: # Synthetic or case using modified/poisson arrival times - job.scheduled_nodes = self.available_nodes[:job.nodes_required] - self.available_nodes = self.available_nodes[job.nodes_required:] + self.nodes = [] + node_id_counter = 0 + + total_cpu_cores_per_node = self.config['CPUS_PER_NODE'] * self.config['CORES_PER_CPU'] + total_gpu_units_per_node = self.config['GPUS_PER_NODE'] + + for i in range(self.total_nodes): + is_down = i in self.down_nodes + self.nodes.append({ + 'id': i, + 'total_cpu_cores': total_cpu_cores_per_node, + 'available_cpu_cores': 0 if is_down else total_cpu_cores_per_node, + 'total_gpu_units': total_gpu_units_per_node, + 'available_gpu_units': 0 if is_down else total_gpu_units_per_node, + 'is_down': is_down + }) + node_id_counter += 1 + + # Available nodes are now tracked by their available resources + self.available_nodes = [node['id'] for node in self.nodes if not node['is_down']] + self.sys_util_history = [] + + def assign_nodes_to_job(self, job, current_time, node_id): + """Assigns resources (cores, GPUs) to a job and updates the available resources.""" + # For multitenancy, a job is assigned to a single node. + # We need to find a node that can satisfy the job's resource requirements. + + found_node = None + # Use the provided node_id directly + if node_id is not None and node_id < len(self.nodes) and not self.nodes[node_id]['is_down']: + node = self.nodes[node_id] + if (node['available_cpu_cores'] >= job.cpu_cores_required and + node['available_gpu_units'] >= job.gpu_units_required): + found_node = node + + if found_node is None: + raise ValueError(f"Not enough available resources to schedule job {job.id} on node {node_id}.") + + # Allocate resources on the found node + found_node['available_cpu_cores'] -= job.cpu_cores_required + found_node['available_gpu_units'] -= job.gpu_units_required + + # Assign the node and allocated resources to the job + job.scheduled_nodes = [found_node['id']] + job.allocated_cpu_cores = job.cpu_cores_required + job.allocated_gpu_units = job.gpu_units_required # Set job start and end times according to simulation - # This overrides actual times from telemetry and set state to 'running' job.start_time = current_time job.end_time = current_time + job.wall_time job.state = JobState.RUNNING # Mark job as running def free_nodes_from_job(self, job): - """Frees the nodes that were allocated to a completed job.""" - if hasattr(job, "scheduled_nodes"): - self.available_nodes.extend(job.scheduled_nodes) - # Remove duplicates and sort the list for consistency - self.available_nodes = sorted(set(self.available_nodes)) + """Frees the resources (cores, GPUs) that were allocated to a completed job.""" + if hasattr(job, "scheduled_nodes") and job.scheduled_nodes: + node_id = job.scheduled_nodes[0] # Assuming a job is scheduled on a single node + if node_id < len(self.nodes): + node = self.nodes[node_id] + node['available_cpu_cores'] += job.allocated_cpu_cores + node['available_gpu_units'] += job.allocated_gpu_units + else: + print(f"Warning: Job {job.id} scheduled on non-existent node {node_id}. Cannot free resources.") else: # If job has no scheduled nodes, there is nothing to free. pass - def update_system_utilization(self, current_time, num_active_nodes): + def update_system_utilization(self, current_time, running_jobs): """ - Computes and records the system utilization. - For example, utilization could be defined as the ratio of active nodes to the total non-down nodes. + Computes and records the system utilization based on allocated CPU cores and GPU units. """ - # Number of nodes that are not down: - total_operational = self.total_nodes - len(self.down_nodes) - # Compute utilization as a percentage: - utilization = (num_active_nodes / total_operational) * 100 if total_operational else 0 - self.sys_util_history.append((current_time, utilization)) - return utilization + total_cpu_cores = sum(node['total_cpu_cores'] for node in self.nodes) + total_gpu_units = sum(node['total_gpu_units'] for node in self.nodes) + + allocated_cpu_cores = sum(job.allocated_cpu_cores for job in running_jobs) + allocated_gpu_units = sum(job.allocated_gpu_units for job in running_jobs) + + cpu_utilization = (allocated_cpu_cores / total_cpu_cores) * 100 if total_cpu_cores else 0 + gpu_utilization = (allocated_gpu_units / total_gpu_units) * 100 if total_gpu_units else 0 + + # For now, we'll just use CPU utilization as the primary system utilization metric + # You might want to combine these or choose a different primary metric + self.sys_util_history.append((current_time, cpu_utilization)) + return cpu_utilization def node_failure(self, mtbf): - return [] """Simulate node failure using Weibull distribution.""" shape_parameter = 1.5 scale_parameter = mtbf * 3600 # Convert to seconds - # Create a NumPy array of node indices, excluding down nodes - all_nodes = np.array(sorted(set(range(self.total_nodes)) - set(self.down_nodes))) + # Create a NumPy array of node indices, excluding already down nodes + operational_node_ids = np.array([node['id'] for node in self.nodes if not node['is_down']]) + + if len(operational_node_ids) == 0: + return [] # No operational nodes to fail - # Sample the Weibull distribution for all nodes at once - random_values = weibull_min.rvs(shape_parameter, scale=scale_parameter, size=all_nodes.size) + # Sample the Weibull distribution for all operational nodes at once + random_values = weibull_min.rvs(shape_parameter, scale=scale_parameter, size=len(operational_node_ids)) - # Identify nodes that have failed - failure_threshold = 0.1 + # Identify nodes that have failed (using a threshold for demonstration) + failure_threshold = 0.001 # This threshold might need tuning failed_nodes_mask = random_values < failure_threshold - newly_downed_nodes = all_nodes[failed_nodes_mask] + newly_downed_node_ids = operational_node_ids[failed_nodes_mask] - # Update available and down nodes - for node_index in newly_downed_nodes: - if node_index in self.available_nodes: - self.available_nodes.remove(node_index) - self.down_nodes.add(str(node_index)) + # Update the state of the newly downed nodes in self.nodes + for node_id in newly_downed_node_ids: + node = self.nodes[node_id] + node['is_down'] = True + node['available_cpu_cores'] = 0 + node['available_gpu_units'] = 0 + self.down_nodes.add(node_id) # Add to the set of down node IDs - return newly_downed_nodes.tolist() + return newly_downed_node_ids.tolist() diff --git a/raps/schedulers/default.py b/raps/schedulers/default.py index 9953087..539605e 100644 --- a/raps/schedulers/default.py +++ b/raps/schedulers/default.py @@ -39,6 +39,8 @@ class Scheduler: # Iterate over a copy of the queue since we might remove items for job in queue[:]: + if self.debug: + print(f"[DEBUG] Scheduler: Considering job {job.id} (CPU: {job.cpu_cores_required}, GPU: {job.gpu_units_required})") if self.policy == PolicyType.REPLAY: if job.start_time > current_time: continue # Replay: Job didn't start yet. Next! @@ -49,11 +51,13 @@ class Scheduler: nodes_available = self.check_available_nodes(job) - if nodes_available: - self.place_job_and_manage_queues(job, queue, running, current_time) + if nodes_available is not None: + self.place_job_and_manage_queues(job, queue, running, current_time, nodes_available) else: # In case the job was not placed, see how we should continue: if self.bfpolicy is not None: - self.backfill(queue, running, current_time) + backfill_job, node_id = self.backfill(queue, running, current_time) + if backfill_job and node_id is not None: + self.place_job_and_manage_queues(backfill_job, queue, running, current_time, node_id) # After backfill dedice continue processing the queue or wait, continuing may result in fairness issues. if self.policy in [PolicyType.REPLAY]: @@ -95,42 +99,42 @@ class Scheduler: else: return jobs_to_submit - def place_job_and_manage_queues(self, job, queue,running, current_time): - self.resource_manager.assign_nodes_to_job(job, current_time) + def place_job_and_manage_queues(self, job, queue,running, current_time, node_id): + self.resource_manager.assign_nodes_to_job(job, current_time, node_id) running.append(job) queue.remove(job) if self.debug: scheduled_nodes = summarize_ranges(job.scheduled_nodes) print(f"t={current_time}: Scheduled job {job.id} with wall time {job.wall_time} on nodes {scheduled_nodes}") - def check_available_nodes(self,job): - nodes_available = False - if job.requested_nodes: # nodes specified, i.e., telemetry replay - if len(job.requested_nodes) <= len(self.resource_manager.available_nodes): - if self.policy == PolicyType.REPLAY: # Check if exact set is available: - nodes_available = set(job.requested_nodes).issubset(set(self.resource_manager.available_nodes)) - else: - # Sufficiently large number of nodes available - # but no exact set is required! - nodes_available = True - # remove the request for specific nodes and ask for n nodes - job.nodes_required = len(job.requested_nodes) - job.requested_nodes = [] - else: - pass - else: # Exact nodes not specified (e.g. synthetic jobs dont have nodes assigned) - nodes_available = len(self.resource_manager.available_nodes) >= job.nodes_required + def check_available_nodes(self, job): + """Checks if there are available resources (CPU cores, GPU units) for the job on any node.""" + # Iterate through all nodes managed by the ResourceManager + for node in self.resource_manager.nodes: + if self.debug: + print(f"[DEBUG] Checking node {node['id']}: Available CPU: {node['available_cpu_cores']}, Available GPU: {node['available_gpu_units']}. Job needs CPU: {job.cpu_cores_required}, GPU: {job.gpu_units_required}") + # Skip if the node is down + if node['is_down']: + continue - return nodes_available + # Check if the node has enough available CPU cores and GPU units + if (node['available_cpu_cores'] >= job.cpu_cores_required and + node['available_gpu_units'] >= job.gpu_units_required): + # If a suitable node is found, return its ID + return node['id'] + # If no suitable node is found, return None + return None def backfill(self,queue:List, running:List, current_time): # Try to find a backfill candidate from the entire queue. while queue: - backfill_job = self.find_backfill_job(queue, running, current_time) - if backfill_job: - self.place_job_and_manage_queues(backfill_job, queue, running, current_time) + backfill_job, node_id = self.find_backfill_job(queue, running, current_time) + if backfill_job is not None and node_id is not None: + # Instead of placing here, return the job and node_id to the caller + return backfill_job, node_id else: break + return None, None def find_backfill_job(self, queue, running, current_time): """Finds a backfill job based on available nodes and estimated completion times. @@ -139,29 +143,31 @@ class Scheduler: scheduler for slurm resource manager.' Procedia computer science 66 (2015): 661-669. """ if not queue: - return None + return None, None # Identify when the nex job in the queue could run as a time limit: first_job = queue[0] - nodes_required = 0 - if first_job.requested_nodes: - nodes_required = len(first_job.requested_nodes) - else: - nodes_required = first_job.nodes_required - - sorted_running = sorted(running, key=lambda job: job.end_time) + # For multitenancy, we need to check if the first job can fit on any node + # based on its core/GPU requirements, not just nodes_required. + # This is a simplification; a more complex backfill might consider + # if the job can fit by combining resources from multiple nodes. + # For now, we assume it needs to fit on a single node. + + # We need to know the total available resources if all running jobs finish by shadow_time_end + # This is complex with multitenancy, so for now, we'll simplify the backfill logic + # to just check if a job can fit on *any* node, not necessarily the one + # that will be freed up by the first job in line. + + # The original logic for shadow_time_end and shadow_nodes_avail is based on whole nodes. + # With multitenancy, this needs a more sophisticated resource projection. + # For now, we will make `time_limit` effectively infinite for backfill candidates + # if the job can fit on *any* node, and rely on `check_available_nodes`. + + # Revert to a simpler time_limit for now, or remove it if not applicable + # For now, let's assume time_limit is not strictly tied to node availability + # in the same way as before, and focus on resource availability. + time_limit = float('inf') # Effectively no time limit for backfill candidates - # Identify when we have enough nodes therefore the start time of the first_job in line - shadow_time_end = 0 - shadow_nodes_avail = len(self.resource_manager.available_nodes) - for job in sorted_running: - if shadow_nodes_avail >= nodes_required: - break - else: - shadow_nodes_avail += job.nodes_required - shadow_time_end = job.end_time - - time_limit = shadow_time_end - current_time # We now have the time_limit after which no backfilled job should end # as the next job in line has the necessary resrouces after this time limit. @@ -181,15 +187,13 @@ class Scheduler: raise NotImplementedError(f"{self.bfpolicy} not implemented! Please implement!") else: raise NotImplementedError(f"{self.bfpolicy} not implemented.") + return None, None def return_first_fit(self, queue, time_limit): for job in queue: - if job.time_limit <= time_limit: - nodes_available = self.check_available_nodes(job) - if nodes_available: - return job - else: - continue - else: - continue - return None + # Check if the job can fit on any node based on its resource requirements + node_id = self.check_available_nodes(job) + if node_id is not None: + # If a suitable node is found, return the job and the node_id + return job, node_id + return None, None -- GitLab From 5683eef23b65d7c89e9f70b2a5844c56bb11303d Mon Sep 17 00:00:00 2001 From: Wes Brewer Date: Thu, 10 Jul 2025 19:21:05 -0700 Subject: [PATCH 157/388] Print out stats at end of sim - ignore network stats for now --- main.py | 24 ++++++++-------------- multi-part-sim.py | 18 +++++++++++++--- raps/engine.py | 52 ++++++++++++++++++++++++++++++++++++++++++----- raps/resmgr.py | 4 +--- 4 files changed, 71 insertions(+), 27 deletions(-) diff --git a/main.py b/main.py index 19a76d8..d7b81cc 100644 --- a/main.py +++ b/main.py @@ -175,22 +175,14 @@ print(f'Simulating {len(jobs)} jobs for {total_timesteps} seconds') layout_manager = LayoutManager(args.layout, engine=sc, debug=args.debug, total_timesteps=total_timesteps, **config) layout_manager.run(jobs, timestep_start=timestep_start, timestep_end=timestep_end) -engine_stats = get_engine_stats(sc) -job_stats = get_job_stats(sc) -scheduler_stats = get_scheduler_stats(sc) -network_stats = get_network_stats(sc) -# Following b/c we get the following error when we use PM100 telemetry dataset -# TypeError: Object of type int64 is not JSON serializable -try: - print(json.dumps(engine_stats, indent=4)) - print(json.dumps(job_stats, indent=4)) - print(json.dumps(scheduler_stats, indent=4)) - print(json.dumps(network_stats, indent=4)) -except: - print(engine_stats) - print(job_stats) - print(scheduler_stats) - print(network_stats) + # Get comprehensive simulation statistics + simulation_stats = sc.get_stats() + + # Print a formatted report + print("\n--- Simulation Report ---") + for key, value in simulation_stats.items(): + print(f"{key.replace('_', ' ').title()}: {value}") + print("-------------------------") if args.plot: diff --git a/multi-part-sim.py b/multi-part-sim.py index 91e1122..6428ce7 100644 --- a/multi-part-sim.py +++ b/multi-part-sim.py @@ -16,6 +16,7 @@ from raps.power import PowerManager, compute_node_power from raps.telemetry import Telemetry from raps.workload import Workload from raps.utils import create_casename, convert_to_seconds, next_arrival +from raps.stats import get_engine_stats, get_job_stats, get_scheduler_stats, get_network_stats from tqdm import tqdm # Load configurations for each partition @@ -67,11 +68,14 @@ if args.replay: print(f"[INFO] Partition '{part}': {len(jl)} jobs loaded") # now flatten into a single job list (or keep separate for your engine) - jobs = [] + all_jobs_flat = [] for part in partition_names: for job in jobs_by_partition[part]: job['partition'] = part - jobs.append(job) + all_jobs_flat.append(job) + + total_initial_jobs = len(all_jobs_flat) + jobs = all_jobs_flat if args.scale: for job in tqdm(jobs, desc=f"Scaling jobs to {args.scale} nodes"): @@ -101,7 +105,7 @@ layout_managers = {} for i, config in enumerate(configs): pm = PowerManager(compute_node_power, **configs[i]) fm = FLOPSManager(**args_dicts[i]) - sc = Engine(power_manager=pm, flops_manager=fm, cooling_model=None, **args_dicts[i]) + sc = Engine(power_manager=pm, flops_manager=fm, cooling_model=None, jobs=jobs_by_partition[config['system_name']], total_initial_jobs=total_initial_jobs, **args_dicts[i]) layout_managers[config['system_name']] = LayoutManager(args.layout, engine=sc, debug=args.debug, **config) # Set simulation timesteps @@ -136,3 +140,11 @@ for timestep in range(timesteps): print(f"system power: {sys_power:.1f}kW") print("Simulation complete.") + +# Print statistics for each partition +for name, lm in layout_managers.items(): + print(f"\n--- Simulation Report for Partition: {name} ---") + simulation_stats = lm.engine.get_stats() + for key, value in simulation_stats.items(): + print(f"{key.replace('_', ' ').title()}: {value}") + print("--------------------------------------------------") diff --git a/raps/engine.py b/raps/engine.py index c9a8105..5cbae39 100644 --- a/raps/engine.py +++ b/raps/engine.py @@ -41,12 +41,13 @@ class TickData: avg_net_rx: float avg_net_util: float slowdown_per_job: float + node_occupancy: dict[int, int] class Engine: """Job scheduling simulation engine.""" - def __init__(self, *, power_manager, flops_manager, cooling_model=None, config, jobs=None, **kwargs): + def __init__(self, *, power_manager, flops_manager, cooling_model=None, config, jobs=None, total_initial_jobs=0, **kwargs): self.config = config self.down_nodes = summarize_ranges(self.config['DOWN_NODES']) self.resource_manager = ResourceManager( @@ -60,6 +61,7 @@ class Engine: self.accounts = None self.job_history_dict = [] self.jobs_completed = 0 + self.total_initial_jobs = total_initial_jobs self.current_time = 0 self.cooling_model = cooling_model self.sys_power = 0 @@ -76,6 +78,7 @@ class Engine: self.net_util_history = [] self.avg_slowdown_history = [] self.max_slowdown_history = [] + self.node_occupancy_history = [] # Get scheduler type from command-line args or default scheduler_type = kwargs.get('scheduler', 'default') @@ -455,6 +458,15 @@ class Engine: self.avg_net_rx.append(avg_rx) self.net_util_history.append(avg_net) + # Calculate node occupancy + node_occupancy = {node['id']: 0 for node in self.resource_manager.nodes} # Initialize even if no running jobs + for job in self.running: + if job.scheduled_nodes: + node_id = job.scheduled_nodes[0] # Assuming one node per job for multitenancy + node_occupancy[node_id] += 1 + + self.node_occupancy_history.append(node_occupancy) + tick_data = TickData( current_time=self.current_time, completed=None, @@ -464,7 +476,7 @@ class Engine: power_df=power_df, p_flops=pflops, g_flops_w=gflop_per_watt, - system_util=self.num_active_nodes / self.config['AVAILABLE_NODES'] * 100, + system_util=system_util, fmu_inputs=cooling_inputs, fmu_outputs=cooling_outputs, num_active_nodes=self.num_active_nodes, @@ -472,7 +484,8 @@ class Engine: avg_net_tx=avg_tx, avg_net_rx=avg_rx, avg_net_util=avg_net, - slowdown_per_job=0 + slowdown_per_job=0, + node_occupancy=node_occupancy ) self.current_time += 1 @@ -594,8 +607,37 @@ class Engine: 'total cost': f'${total_cost:.2f}' } - network_stats = get_network_stats() - stats.update(network_stats) + # Multitenancy Stats + total_jobs_loaded = self.total_initial_jobs # Assuming this is passed to __init__ + stats['total jobs loaded'] = total_jobs_loaded + stats['jobs completed percentage'] = f"{(self.jobs_completed / total_jobs_loaded * 100):.2f}%" + + if self.node_occupancy_history: + # Calculate average concurrent jobs per node + total_occupancy_sum = 0 + max_concurrent_jobs_per_node = 0 + num_timesteps_with_jobs = 0 + + for occupancy_dict in self.node_occupancy_history: + current_timestep_total_occupancy = sum(occupancy_dict.values()) + if current_timestep_total_occupancy > 0: + total_occupancy_sum += current_timestep_total_occupancy + num_timesteps_with_jobs += 1 + + # Find max concurrent jobs on any single node for this timestep + if occupancy_dict: + max_concurrent_jobs_per_node = max(max_concurrent_jobs_per_node, max(occupancy_dict.values())) + + avg_concurrent_jobs_per_node = (total_occupancy_sum / num_timesteps_with_jobs) if num_timesteps_with_jobs > 0 else 0 + + stats['avg concurrent jobs per node'] = f"{avg_concurrent_jobs_per_node:.2f}" + stats['max concurrent jobs per node'] = max_concurrent_jobs_per_node + else: + stats['avg concurrent jobs per node'] = "N/A" + stats['max concurrent jobs per node'] = "N/A" + + #network_stats = get_network_stats() + #stats.update(network_stats) if self.net_util_history: mean_net_util = sum(self.net_util_history) / len(self.net_util_history) diff --git a/raps/resmgr.py b/raps/resmgr.py index 829ab3b..6e4d3be 100644 --- a/raps/resmgr.py +++ b/raps/resmgr.py @@ -9,8 +9,7 @@ class ResourceManager: self.config = config self.down_nodes = set(down_nodes) self.nodes = [] - node_id_counter = 0 - + # Initialize nodes based on config parameters total_cpu_cores_per_node = self.config['CPUS_PER_NODE'] * self.config['CORES_PER_CPU'] total_gpu_units_per_node = self.config['GPUS_PER_NODE'] @@ -24,7 +23,6 @@ class ResourceManager: 'available_gpu_units': 0 if is_down else total_gpu_units_per_node, 'is_down': is_down }) - node_id_counter += 1 # Available nodes are now tracked by their available resources self.available_nodes = [node['id'] for node in self.nodes if not node['is_down']] -- GitLab From f5be72e2f5f825257a63e2ec79321ef07739dacd Mon Sep 17 00:00:00 2001 From: Wes Brewer Date: Thu, 10 Jul 2025 19:57:52 -0700 Subject: [PATCH 158/388] Some improvements on concurent jobs per node stats --- raps/engine.py | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/raps/engine.py b/raps/engine.py index 5cbae39..84d3a9b 100644 --- a/raps/engine.py +++ b/raps/engine.py @@ -613,24 +613,31 @@ class Engine: stats['jobs completed percentage'] = f"{(self.jobs_completed / total_jobs_loaded * 100):.2f}%" if self.node_occupancy_history: - # Calculate average concurrent jobs per node - total_occupancy_sum = 0 + # Calculate average concurrent jobs per node (average density across all nodes and timesteps) + total_jobs_running_timesteps = 0 max_concurrent_jobs_per_node = 0 - num_timesteps_with_jobs = 0 + sum_jobs_per_active_node = 0 # New: Sum of (jobs / active_nodes) for each timestep + count_active_timesteps_for_avg_active = 0 # New: Count of timesteps with active nodes for occupancy_dict in self.node_occupancy_history: current_timestep_total_occupancy = sum(occupancy_dict.values()) - if current_timestep_total_occupancy > 0: - total_occupancy_sum += current_timestep_total_occupancy - num_timesteps_with_jobs += 1 + total_jobs_running_timesteps += current_timestep_total_occupancy # Find max concurrent jobs on any single node for this timestep if occupancy_dict: max_concurrent_jobs_per_node = max(max_concurrent_jobs_per_node, max(occupancy_dict.values())) - avg_concurrent_jobs_per_node = (total_occupancy_sum / num_timesteps_with_jobs) if num_timesteps_with_jobs > 0 else 0 + # New: Calculate average jobs per *active* node for this timestep + active_nodes_in_timestep = [count for count in occupancy_dict.values() if count > 0] + if active_nodes_in_timestep: + sum_jobs_per_active_node += sum(active_nodes_in_timestep) / len(active_nodes_in_timestep) + count_active_timesteps_for_avg_active += 1 - stats['avg concurrent jobs per node'] = f"{avg_concurrent_jobs_per_node:.2f}" + # Average jobs per *active* node (user's desired "1" type) + avg_jobs_per_active_node = (sum_jobs_per_active_node / count_active_timesteps_for_avg_active) \ + if count_active_timesteps_for_avg_active > 0 else 0 + + stats['avg concurrent jobs per active node'] = f"{avg_jobs_per_active_node:.2f}" stats['max concurrent jobs per node'] = max_concurrent_jobs_per_node else: stats['avg concurrent jobs per node'] = "N/A" -- GitLab From c0f763e6e21be1f8eb4bf7da363edd534b6bc97b Mon Sep 17 00:00:00 2001 From: Wes Brewer Date: Fri, 11 Jul 2025 16:35:21 -0700 Subject: [PATCH 159/388] Add more outputs for diagnostics --- multi-part-sim.py | 5 +++-- raps/resmgr.py | 19 +++++++++++++------ 2 files changed, 16 insertions(+), 8 deletions(-) diff --git a/multi-part-sim.py b/multi-part-sim.py index 6428ce7..182638d 100644 --- a/multi-part-sim.py +++ b/multi-part-sim.py @@ -134,8 +134,9 @@ for timestep in range(timesteps): if timestep % configs[0]['UI_UPDATE_FREQ'] == 0: # Assuming same frequency for all partitions sys_power = 0 for name, lm in layout_managers.items(): - sys_util = lm.engine.sys_util_history[-1] if lm.engine.sys_util_history else 0.0 - print(f"[DEBUG] {name} - Timestep {timestep} - Jobs running: {len(lm.engine.running)} - Utilization: {sys_util[1]:.2f}% - Power: {lm.engine.sys_power:.1f}kW") + sys_util = lm.engine.sys_util_history[-1] if lm.engine.sys_util_history else (0, 0.0) + allocated_cores = lm.engine.resource_manager.allocated_cpu_cores + print(f"[DEBUG] {name} - Timestep {timestep} - Jobs running: {len(lm.engine.running)} - Utilization: {sys_util[1]:.2f}% - Allocated Cores: {allocated_cores} - Power: {lm.engine.sys_power:.1f}kW") sys_power += lm.engine.sys_power print(f"system power: {sys_power:.1f}kW") diff --git a/raps/resmgr.py b/raps/resmgr.py index 6e4d3be..e8ffc52 100644 --- a/raps/resmgr.py +++ b/raps/resmgr.py @@ -27,6 +27,7 @@ class ResourceManager: # Available nodes are now tracked by their available resources self.available_nodes = [node['id'] for node in self.nodes if not node['is_down']] self.sys_util_history = [] + self.allocated_cpu_cores = 0 def assign_nodes_to_job(self, job, current_time, node_id): """Assigns resources (cores, GPUs) to a job and updates the available resources.""" @@ -79,16 +80,22 @@ class ResourceManager: total_cpu_cores = sum(node['total_cpu_cores'] for node in self.nodes) total_gpu_units = sum(node['total_gpu_units'] for node in self.nodes) - allocated_cpu_cores = sum(job.allocated_cpu_cores for job in running_jobs) + self.allocated_cpu_cores = sum(job.allocated_cpu_cores for job in running_jobs) allocated_gpu_units = sum(job.allocated_gpu_units for job in running_jobs) - cpu_utilization = (allocated_cpu_cores / total_cpu_cores) * 100 if total_cpu_cores else 0 + cpu_utilization = (self.allocated_cpu_cores / total_cpu_cores) * 100 if total_cpu_cores else 0 gpu_utilization = (allocated_gpu_units / total_gpu_units) * 100 if total_gpu_units else 0 - # For now, we'll just use CPU utilization as the primary system utilization metric - # You might want to combine these or choose a different primary metric - self.sys_util_history.append((current_time, cpu_utilization)) - return cpu_utilization + # Determine utilization based on partition type (has GPUs or not) + if self.config.get('GPUS_PER_NODE', 0) > 0: + # This is a GPU partition, use GPU utilization + utilization = gpu_utilization + else: + # This is a CPU-only partition, use CPU utilization + utilization = cpu_utilization + + self.sys_util_history.append((current_time, utilization)) + return utilization def node_failure(self, mtbf): """Simulate node failure using Weibull distribution.""" -- GitLab From 8cd42d19180920173d0f58cb8847a53bcec06d4d Mon Sep 17 00:00:00 2001 From: Matthias Maiterth Date: Mon, 14 Jul 2025 11:20:11 -0400 Subject: [PATCH 160/388] Restore non -net ui --- raps/ui.py | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/raps/ui.py b/raps/ui.py index e315398..6ac4ebe 100644 --- a/raps/ui.py +++ b/raps/ui.py @@ -114,15 +114,15 @@ class LayoutManager: show_slowdown = (self.topology in ("fat-tree", "dragonfly", "capacity")) # Build the column headers - columns = ["JOBID", "WALL TIME", "NAME", "ACCOUNT", "ST"] - #columns = ["JOBID", "WALL TIME", "NAME", "ACCOUNT", "ST", "NODES"] + #columns = ["JOBID", "WALL TIME", "NAME", "ACCOUNT", "ST"] + columns = ["JOBID", "WALL TIME", "NAME", "ACCOUNT", "ST", "NODES"] if show_slowdown: columns.append("SLOW DOWN") else: columns.append("NODE SEGMENTS") - if show_nodes: - columns.append("NODELIST") + #if show_nodes: + # columns.append("NODELIST") columns.append("TIME") @@ -162,6 +162,9 @@ class LayoutManager: else: nodes_display = ", ".join(node_segments) col_nodelist = nodes_display + else: + col_nodelist = col_slow # This logic is a bit flawed... + nodes_display = col_nodelist # Build the row row = [ @@ -172,13 +175,11 @@ class LayoutManager: job.state.value, str(job.nodes_required), ] - #if self.simulate_network: - # row.append(nodes_display) - # row.append(convert_seconds_to_hhmm(job.running_time)) + row.append(nodes_display) - if show_nodes: - # Insert NODELIST immediately after col_slow (whether NODELIST or SLOWDOWN) - row.append(col_nodelist) + #if show_nodes: + # # Insert NODELIST immediately after col_slow (whether NODELIST or SLOWDOWN) + # row.append(col_nodelist) # Finally, append the running‐time column row.append(convert_seconds_to_hhmm(job.running_time)) -- GitLab From 4b9ba61c7bd32da3d88c08fb7ed1a02b41567656 Mon Sep 17 00:00:00 2001 From: Matthias Maiterth Date: Tue, 15 Jul 2025 20:16:44 -0400 Subject: [PATCH 161/388] Update axis format of gantt plotter. --- main.py | 2 +- args.py => raps/args.py | 0 raps/dataloaders/lassen.py | 6 ++-- raps/plotting.py | 26 +++++++++----- raps/telemetry.py | 69 ++++++++++++++++++++++++-------------- raps/ui.py | 3 +- raps/workload.py | 10 +++--- 7 files changed, 72 insertions(+), 44 deletions(-) rename args.py => raps/args.py (100%) diff --git a/main.py b/main.py index f38f52e..17d2a64 100644 --- a/main.py +++ b/main.py @@ -28,7 +28,7 @@ from raps.weather import Weather from raps.utils import convert_to_seconds, write_dict_to_file from raps.stats import get_engine_stats, get_job_stats, get_scheduler_stats, get_network_stats -from args import args, args_dict +from raps.args import args, args_dict if args.verbose or args.debug: print(args) diff --git a/args.py b/raps/args.py similarity index 100% rename from args.py rename to raps/args.py diff --git a/raps/dataloaders/lassen.py b/raps/dataloaders/lassen.py index 2a64624..d13da3e 100644 --- a/raps/dataloaders/lassen.py +++ b/raps/dataloaders/lassen.py @@ -45,7 +45,7 @@ def load_data(path, **kwargs): """ Loads data from the given file paths and returns job info. """ - nrows = 1E5 # None + nrows = None alloc_df = pd.read_csv(os.path.join(path[0], 'final_csm_allocation_history_hashed.csv'), nrows=nrows, low_memory=False) node_df = pd.read_csv(os.path.join(path[0], 'final_csm_allocation_node_history.csv'), nrows=nrows, low_memory=False) step_df = pd.read_csv(os.path.join(path[0], 'final_csm_step_history.csv'), nrows=nrows, low_memory=False) @@ -90,8 +90,8 @@ def load_data_from_df(allocation_df, node_df, step_df, **kwargs): simulation_end_timestamp = simulation_start_timestamp + time_to_simulate_timedelta # As these are >1.4M jobs, filtered to the simulated timestamps before creating the job structs. - #allocation_df = allocation_df[allocation_df['end_timestamp'] >= simulation_start_timestamp] # Job should not have ended before the simulation time - #allocation_df = allocation_df[allocation_df['job_submit_timestamp'] < simulation_end_timestamp] # Job has to have been submited before or during the simulaion time + allocation_df = allocation_df[allocation_df['end_timestamp'] >= simulation_start_timestamp] # Job should not have ended before the simulation time + allocation_df = allocation_df[allocation_df['job_submit_timestamp'] < simulation_end_timestamp] # Job has to have been submited before or during the simulaion time job_list = [] diff --git a/raps/plotting.py b/raps/plotting.py index fe1918b..80bc310 100644 --- a/raps/plotting.py +++ b/raps/plotting.py @@ -16,6 +16,10 @@ Plotter import itertools import matplotlib.pyplot as plt +import matplotlib.dates as md +import matplotlib.ticker as ticker +from matplotlib.ticker import MaxNLocator +import time import numpy as np from uncertainties import unumpy from rich.progress import track @@ -342,7 +346,7 @@ def plot_jobs_gantt(*,ax=None,jobs, bars_are_node_sized): ax.set_ylabel("Job ID") ##ax_b labels: ax.set_xlabel("time [hh:mm]") - minx_s = 0 + minx_s = min([x.submit_time for x in jobs]) maxx_s = np.ceil(max([x.wall_time for x in jobs]) + max([x.submit_time for x in jobs])) x_label_mins = [int(n) for n in np.arange(minx_s // 60, maxx_s // 60)] x_label_ticks = [n * 60 for n in x_label_mins[0::60]] @@ -373,15 +377,21 @@ def plot_nodes_gantt(*,ax=None,jobs): ax.set_ylabel("Node ID") ##ax_b labels: ax.set_xlabel("time [hh:mm]") - minx_s = 0 + minx_s = min([x.submit_time for x in jobs]) maxx_s = np.ceil(max([x.wall_time for x in jobs]) + max([x.submit_time for x in jobs])) - x_label_mins = [int(n) for n in np.arange(minx_s // 60, maxx_s // 60)] - x_label_ticks = [n * 60 for n in x_label_mins[0::60]] - x_label_str = [str(x1).zfill(2) + ":" + str(x2).zfill(2) for - (x1,x2) in [(n // 60,n % 60) for - n in x_label_mins[0::60]]] + #ax.xaxis.set_major_formatter(md.DateFormatter('%H:%M:%S')) - ax.set_xticks(x_label_ticks,x_label_str) + formatter = ticker.FuncFormatter(lambda s, x: time.strftime('%m-%d %H:%M:%S', time.gmtime(s))) + ax.xaxis.set_major_formatter(formatter) + ax.yaxis.set_major_locator(MaxNLocator(integer=True)) + + #x_label_mins = [int(n) for n in np.arange(minx_s // 60, maxx_s // 60)] + #x_label_ticks = [n * 60 for n in x_label_mins[0::60]] + #x_label_str = [str(x1).zfill(2) + ":" + str(x2).zfill(2) for + # (x1,x2) in [(n // 60,n % 60) for + # n in x_label_mins[0::60]]] + + #ax.set_xticks(x_label_ticks,x_label_str) ax.set_ylim(1,max(list(itertools.chain.from_iterable(nodeIDs)))) #ax.yaxis.set_inverted(True) return ax diff --git a/raps/telemetry.py b/raps/telemetry.py index e01b1e3..e232898 100644 --- a/raps/telemetry.py +++ b/raps/telemetry.py @@ -10,11 +10,13 @@ import re import sys import random import argparse -import itertools +#import itertools import json import os.path + if __name__ == "__main__": + #from raps.args import args,args_dict parser = argparse.ArgumentParser(description='Telemetry data validator') parser.add_argument('--jid', type=str, default='*', help='Replay job id') parser.add_argument('-f', '--replay', nargs='+', type=str, @@ -42,6 +44,7 @@ from raps.job import Job, job_dict import matplotlib.pyplot as plt from raps.plotting import Plotter, plot_submit_times, plot_nodes_histogram, plot_jobs_gantt, plot_nodes_gantt, spaced_colors, plot_network_histogram from raps.utils import next_arrival_byconfargs, create_casename, convert_to_seconds +#from raps.args import args, args_dict class Telemetry: @@ -79,9 +82,18 @@ class Telemetry: list_of_job_dicts = data['jobs'].tolist() for job_info in list_of_job_dicts: jobs.append(Job(job_info)) - timestep_start = int(data['timestep_start']) - timestep_end = int(data['timestep_end']) - args_from_file = data['args'].tolist() + if hasattr(data,'timestep_start'): + timestep_start = int(data['timestep_start']) + else: + timestep_start = 0 + if hasattr(data,'timestep_end'): + timestep_end = int(data['timestep_end']) + else: + timestep_end = np.inf + if hasattr(data,'args'): + args_from_file = data['args'].tolist() + else: + args_from_file = None return jobs, \ timestep_start, \ @@ -124,12 +136,12 @@ class Telemetry: ) job = Job(job_info) jobs.append(job) - if hasattr(data,'args'): - args_from_file = data["args"].item() # This should be empty as csv contains no args. - else: - args_from_file = None + #if hasattr(data,'args'): + # args_from_file = data["args"].item() # This should be empty as csv contains no args. + #else: + # args_from_file = None - return jobs, time_start, time_end, args_from_file + return jobs, time_start, time_end, None def load_data(self, files): """Load telemetry data using custom data loaders.""" @@ -200,15 +212,18 @@ class Telemetry: elif file.endswith(".npz"): # Replay .npz file print(f"Loading {file}...") jobs_from_file, timestep_start_from_file, timestep_end_from_file, args_from_file = self.load_snapshot(file) - if not hasattr(args_from_file,'fastforward') or args_from_file.fastforward is None: - args_from_file.fastforward = 0 - print("File was generated with:" +\ - f"\n--system {args_from_file.system} " +\ - f"-ff {args_from_file.fastforward} " +\ - f"-t {args_from_file.time}\n" +\ - f"All Args:\n{args_from_file}" +\ - "To use these set them from the commandline!" - ) + if args_from_file is not None: + print("File was generated with:" +\ + f"\n--system {args_from_file.system} " +\ + f"-ff {args_from_file.fastforward} " +\ + f"-t {args_from_file.time}\n" +\ + f"All Args:\n{args_from_file}" +\ + "To use these set them from the commandline!" + ) + else: + print("No generation arguments extracted from input file!") + # Args are usually extracted to tell the users how to reporduce results. + # They are not processed and re-set to said arguments automatily jobs.extend(jobs_from_file) timestep_start = min(timestep_start,timestep_start_from_file) timestep_end = max(timestep_end, timestep_end_from_file) @@ -303,15 +318,17 @@ if __name__ == "__main__": # ——— compute avg network traces ——— ntx_means = [] nrx_means = [] - for job_vec in jobs: - ntx = np.array(job_vec.get('ntx_trace', [])) - nrx = np.array(job_vec.get('nrx_trace', [])) - + for job in jobs: + job_vec = job.__dict__ # only if there’s at least one valid sample - if ntx.size > 0 and not np.all(np.isnan(ntx)): - ntx_means.append(np.nanmean(ntx)) - if nrx.size > 0 and not np.all(np.isnan(nrx)): - nrx_means.append(np.nanmean(nrx)) + if hasattr(job_vec,'ntx_trace'): + ntx = np.array(job_vec.get('ntx_trace', [])) + if ntx.size > 0 and not np.all(np.isnan(ntx)): + ntx_means.append(np.nanmean(ntx)) + if hasattr(job_vec,'nrx_trace'): + nrx = np.array(job_vec.get('nrx_trace', [])) + if nrx.size > 0 and not np.all(np.isnan(nrx)): + nrx_means.append(np.nanmean(nrx)) if ntx_means: print(f'Average ntx_trace per job: {np.mean(ntx_means):.2f}') diff --git a/raps/ui.py b/raps/ui.py index 6ac4ebe..c93fdf8 100644 --- a/raps/ui.py +++ b/raps/ui.py @@ -111,7 +111,8 @@ class LayoutManager: """ # Decide whether to show "SLOWDOWN" (if real topology) or "NODE SEGMENTS" (if capacity/none) - show_slowdown = (self.topology in ("fat-tree", "dragonfly", "capacity")) + #show_slowdown = (self.topology in ("fat-tree", "dragonfly", "capacity")) + show_slowdown = self.simulate_network # Build the column headers #columns = ["JOBID", "WALL TIME", "NAME", "ACCOUNT", "ST"] diff --git a/raps/workload.py b/raps/workload.py index 184bfae..968f8c2 100644 --- a/raps/workload.py +++ b/raps/workload.py @@ -592,11 +592,11 @@ def plot_job_hist(jobs,config=None,dist_split=None): axs[1][0].scatter(x, y,zorder=3) cpu_util = [x.cpu_trace for x in jobs] - if isinstance(cpu_util[0],np.ndarray): - cpu_util = np.concatenate(cpu_util).ravel() + if isinstance(cpu_util[0],(np.ndarray, list)): + cpu_util = [sum(part) / len(part) for part in cpu_util] gpu_util = [x.gpu_trace for x in jobs] - if isinstance(gpu_util[0],np.ndarray): - gpu_util = np.concatenate(gpu_util).ravel() + if isinstance(gpu_util[0],(np.ndarray, list)): + gpu_util = [sum(part) / len(part) for part in gpu_util] if not all([x == 0 for x in gpu_util]): axs[0][1].scatter(cpu_util,gpu_util,zorder=2,marker='.',s=0.2) axs[0][1].hist(gpu_util,bins=100,orientation='horizontal',zorder=1, density=True,color='tab:purple') @@ -756,7 +756,7 @@ def check_workload_args(args): if __name__ == "__main__": - from args import args, args_dict + from raps.args import args, args_dict from raps.config import ConfigManager config = ConfigManager(system_name=args.system).get_config() if args.replay: -- GitLab From 6580de0100869e1c5fa517fc6237209dac038cf3 Mon Sep 17 00:00:00 2001 From: Wes Brewer Date: Fri, 18 Jul 2025 12:17:29 -0400 Subject: [PATCH 162/388] Few more improvements to MIT supercloud data loader for robustness and improved output --- raps/dataloaders/mit_supercloud.py | 93 +++++++++++++++++++----------- 1 file changed, 60 insertions(+), 33 deletions(-) diff --git a/raps/dataloaders/mit_supercloud.py b/raps/dataloaders/mit_supercloud.py index 58ec96d..b75a9be 100644 --- a/raps/dataloaders/mit_supercloud.py +++ b/raps/dataloaders/mit_supercloud.py @@ -121,10 +121,10 @@ def load_data(local_dataset_path, **kwargs): Load MIT Supercloud job traces **without** any metadata files. Expects under: local_dataset_path/ - 202201/ + [.../] + slurm-log.csv cpu/...-timeseries.csv gpu/...-timeseries.csv - slurm-log.csv Returns: jobs_list, sim_start_time, sim_end_time """ @@ -135,9 +135,17 @@ def load_data(local_dataset_path, **kwargs): raise ValueError("Expect exactly one path") local_dataset_path = local_dataset_path[0] - # 1) slurm log → DataFrame - sub = "202201" - slurm_path = os.path.join(local_dataset_path, sub, "slurm-log.csv") + # 1) slurm log -> DataFrame + slurm_path = None + for root, _, files in os.walk(local_dataset_path): + if "slurm-log.csv" in files: + slurm_path = os.path.join(root, "slurm-log.csv") + break + + if not slurm_path: + raise FileNotFoundError(f"Could not find slurm-log.csv under {local_dataset_path}") + + data_root = os.path.dirname(slurm_path) sl = pd.read_csv(slurm_path) # 2) date window @@ -173,27 +181,32 @@ def load_data(local_dataset_path, **kwargs): # 5) find trace files by walking directories cpu_files = [] - cpu_root = os.path.join(local_dataset_path, sub, "cpu") - for R,_,fs in os.walk(cpu_root): - for f in fs: - if not f.endswith("-timeseries.csv"): - continue - jid = int(f.split("-",1)[0]) - if jid in job_ids: - rel = os.path.relpath(os.path.join(R,f), os.path.join(local_dataset_path,sub)) - cpu_files.append(rel) + cpu_root = os.path.join(data_root, "cpu") + if os.path.exists(cpu_root): + for R,_,fs in os.walk(cpu_root): + for f in fs: + if not f.endswith("-timeseries.csv"): + continue + try: + jid = int(f.split("-",1)[0]) + if jid in job_ids: + cpu_files.append(os.path.join(R,f)) + except (ValueError, IndexError): + continue gpu_files = [] - gpu_root = os.path.join(local_dataset_path, sub, "gpu") - for R,_,fs in os.walk(gpu_root): - for f in fs: - #if not f.endswith("-timeseries.csv"): - if not f.endswith(".csv"): - continue - jid = int(f.split("-",1)[0]) - if jid in job_ids: - rel = os.path.relpath(os.path.join(R,f), os.path.join(local_dataset_path,sub)) - gpu_files.append(rel) + gpu_root = os.path.join(data_root, "gpu") + if os.path.exists(gpu_root): + for R,_,fs in os.walk(gpu_root): + for f in fs: + if not f.endswith(".csv"): + continue + try: + jid = int(f.split("-",1)[0]) + if jid in job_ids: + gpu_files.append(os.path.join(R,f)) + except (ValueError, IndexError): + continue # 6) select final trace list if cpu_only: @@ -220,27 +233,41 @@ def load_data(local_dataset_path, **kwargs): data = {} # 8a) CPU first - for rel in tqdm(cpu_files, desc="Loading CPU traces"): - fp = os.path.join(local_dataset_path, sub, rel) + for fp in tqdm(cpu_files, desc="Loading CPU traces"): df = pd.read_csv(fp, dtype={0: str}) - jid = int(os.path.basename(rel).split("-", 1)[0]) + jid = int(os.path.basename(fp).split("-", 1)[0]) rec = data.setdefault(jid, {}) - tqdm.write(f"Reading CPU {rel}") + + # Find job info in slurm log and print details + job_info = sl[sl.id_job == jid] + if not job_info.empty: + job_row = job_info.iloc[0] + start_time = job_row.get('time_start', 'N/A') + wall_time = job_row.get('time_limit', 'N/A') + tres_alloc = job_row.get('tres_alloc', 'N/A') + gres_used = job_row.get('gres_used', 'N/A') + + tqdm.write(f"Reading CPU {os.path.basename(fp)} for Job ID: {jid}") + tqdm.write(f" Start Time: {start_time}, Wall Time: {wall_time}s") + tqdm.write(f" TRES Alloc: {tres_alloc}") + tqdm.write(f" GRES Used: {gres_used}") + else: + tqdm.write(f"Reading CPU {os.path.basename(fp)} for Job ID: {jid} (No slurm info found)") + rec["cpu"] = proc_cpu_series(df) print(f"GPU candidate files ({len(gpu_files)}):") for p in gpu_files[:10]: print(" ", p) - for rel in tqdm(gpu_files, desc="Loading GPU traces"): - fp = os.path.join(local_dataset_path, sub, rel) + for fp in tqdm(gpu_files, desc="Loading GPU traces"): if debug: - print(f"\n[DEBUG] attempting {rel!r}") + print(f"\n[DEBUG] attempting {fp!r}") print(" full path exists:", os.path.exists(fp), fp) if not os.path.exists(fp): continue - tqdm.write(f"Reading GPU {rel}") + tqdm.write(f"Reading GPU {os.path.basename(fp)}") dfi = pd.read_csv(fp, dtype={0: str}) if debug: print(" loaded dataframe, columns:", dfi.columns.tolist()) @@ -248,7 +275,7 @@ def load_data(local_dataset_path, **kwargs): tqdm.write(" → no gpu_index column! SKIPPING") continue - jid = int(os.path.basename(rel).split("-", 1)[0]) + jid = int(os.path.basename(fp).split("-", 1)[0]) rec = data.setdefault(jid, {}) cpu_df = rec.get("cpu") if cpu_df is None: -- GitLab From 651c4ee6e947f378c46c2f85f8066c5cc45ea720 Mon Sep 17 00:00:00 2001 From: Wes Brewer Date: Fri, 18 Jul 2025 14:02:30 -0400 Subject: [PATCH 163/388] Move both data loader and data downloader under single directory under raps/dataloaders/mit_supercloud --- README.md | 5 +- raps/dataloaders/mit_supercloud/README.md | 12 - raps/dataloaders/mit_supercloud/__init__.py | 3 + raps/dataloaders/mit_supercloud/cli.py | 33 ++ .../mit_supercloud/create_trace.py | 248 ------------ .../mit_supercloud/dist/anal_data.py | 36 -- .../mit_supercloud/dist/create_trace.py | 382 ------------------ .../mit_supercloud/dist/download_data.py | 156 ------- .../mit_supercloud/dist/node_data_anal.py | 41 -- .../mit_supercloud/dist/parse_mit_data.py | 81 ---- .../mit_supercloud/dist/readme.txt | 21 - raps/dataloaders/mit_supercloud/dist/setup.py | 172 -------- raps/dataloaders/mit_supercloud/download.py | 209 ++++++++++ .../mit_supercloud/generate_local_metadata.py | 126 ------ .../loader.py} | 0 raps/dataloaders/mit_supercloud/setup.py | 191 --------- raps/dataloaders/mit_supercloud/utils.py | 117 ++++++ 17 files changed, 366 insertions(+), 1467 deletions(-) delete mode 100644 raps/dataloaders/mit_supercloud/README.md create mode 100644 raps/dataloaders/mit_supercloud/__init__.py create mode 100644 raps/dataloaders/mit_supercloud/cli.py delete mode 100644 raps/dataloaders/mit_supercloud/create_trace.py delete mode 100644 raps/dataloaders/mit_supercloud/dist/anal_data.py delete mode 100644 raps/dataloaders/mit_supercloud/dist/create_trace.py delete mode 100644 raps/dataloaders/mit_supercloud/dist/download_data.py delete mode 100644 raps/dataloaders/mit_supercloud/dist/node_data_anal.py delete mode 100644 raps/dataloaders/mit_supercloud/dist/parse_mit_data.py delete mode 100644 raps/dataloaders/mit_supercloud/dist/readme.txt delete mode 100644 raps/dataloaders/mit_supercloud/dist/setup.py create mode 100644 raps/dataloaders/mit_supercloud/download.py delete mode 100644 raps/dataloaders/mit_supercloud/generate_local_metadata.py rename raps/dataloaders/{mit_supercloud.py => mit_supercloud/loader.py} (100%) delete mode 100644 raps/dataloaders/mit_supercloud/setup.py create mode 100644 raps/dataloaders/mit_supercloud/utils.py diff --git a/README.md b/README.md index f01b2d4..f1fd108 100644 --- a/README.md +++ b/README.md @@ -53,7 +53,10 @@ For Google cluster trace v2 For MIT Supercloud - python main.py -f /path/to/mit_supercloud/datacenter-challenge --system mit_supercloud + + # download the dataset + python -m raps.dataloaders.mit_supercloud.cli download --start 21052021 --end 22052021 \ + --outdir /path/to/mit python multi-part-sim.py -x 'mit_supercloud/*' -f /path/to/mit_supercloud/datacenter-challenge --system mit_supercloud diff --git a/raps/dataloaders/mit_supercloud/README.md b/raps/dataloaders/mit_supercloud/README.md deleted file mode 100644 index f69189c..0000000 --- a/raps/dataloaders/mit_supercloud/README.md +++ /dev/null @@ -1,12 +0,0 @@ -To generate file indices needed for Damien's reader from full installation of MIT Supercloud dataset: -This will generate: `file_list.csv`, `job_user_data.csv`, and `job_user_date_full.csv` - - python generate_local_metadata.py /lustre/orion/proj-shared/gen150/exadigit/mit_supercloud/datacenter-challenge/202201 - -To create the npz file that RAPS can use: - - python create_trace.py /lustre/orion/proj-shared/gen150/exadigit/mit_supercloud/datacenter-challenge/202201 - -Then to run: - - python main.py -f raps/dataloaders/mit_supercloud/data/mit_supercloud_jobs_21_05_2021__22_05_2021.npz --system mit_supercloud diff --git a/raps/dataloaders/mit_supercloud/__init__.py b/raps/dataloaders/mit_supercloud/__init__.py new file mode 100644 index 0000000..eff43fa --- /dev/null +++ b/raps/dataloaders/mit_supercloud/__init__.py @@ -0,0 +1,3 @@ +from .loader import load_data + +__all__ = ["load_data"] diff --git a/raps/dataloaders/mit_supercloud/cli.py b/raps/dataloaders/mit_supercloud/cli.py new file mode 100644 index 0000000..cbeadf1 --- /dev/null +++ b/raps/dataloaders/mit_supercloud/cli.py @@ -0,0 +1,33 @@ +import argparse +from .download import download +from .loader import load_data + +def main(): + p = argparse.ArgumentParser(prog="mit_supercloud") + subs = p.add_subparsers(dest="cmd", required=True) + + common = argparse.ArgumentParser(add_help=False) + common.add_argument("--start", default="21052021") + common.add_argument("--end", default="22052021") + common.add_argument("--partition", choices=["all","part-cpu","part-gpu"], default="all") + common.add_argument("--outdir", default="source_data") + common.add_argument("--bucket", default="mit-supercloud-dataset") + common.add_argument("--prefix", default="datacenter-challenge/202201/") + common.add_argument("--max-jobs", type=int) + common.add_argument("--dry-run", action="store_true") + + pd = subs.add_parser("download", parents=[common], help="Fetch data from S3") + pd.set_defaults(func=download) + + pl = subs.add_parser("load", parents=[common], help="Load local data into RAPS") + pl.add_argument("path", help="Local data root") + pl.set_defaults(func=lambda args: load_data(args.path, + start_date=args.start, + end_date=args.end, + partition=args.partition)) + + args = p.parse_args() + return args.func(args) + +if __name__ == "__main__": + main() diff --git a/raps/dataloaders/mit_supercloud/create_trace.py b/raps/dataloaders/mit_supercloud/create_trace.py deleted file mode 100644 index 27714ba..0000000 --- a/raps/dataloaders/mit_supercloud/create_trace.py +++ /dev/null @@ -1,248 +0,0 @@ -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- -""" -Simplified and PEP-8 compliant refactor of the original script. - -Original script created on Fri Sep 20 10:14:23 2024 by Damien Fay (HPE) -""" - -import argparse -import os -import shutil -import sys -from datetime import datetime -from types import SimpleNamespace - -import numpy as np -import pandas as pd -from scipy.sparse import csr_matrix as csr -from tqdm import tqdm - -# Add the raps project root to the Python path -sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..', '..', '..'))) - -from raps.job import job_dict - -def proc_cpu_series(dfi): - dfi = dfi[~dfi.Step.isin([-1, -4, '-1', '-4'])].copy() - dfi['CPUUtilization'] = dfi['CPUUtilization'].fillna(0) / 100.0 - - t = pd.to_datetime(dfi.EpochTime, unit='s') - start_time = t.min() - dfi['t'] = ((t - start_time).dt.total_seconds() // 10).astype(int) - dfi['sid'] = pd.factorize(dfi.Step)[0] - - useries = dfi.Series.unique() - inds = np.arange(dfi.t.max() + 1) - df = pd.DataFrame({'t': inds}) - Xm, Xrss, Xvm, Xreadmb, Xwritemb = (np.zeros((len(useries), len(inds))) for _ in range(5)) - - for cnt, i in enumerate(useries): - sift = dfi.Series == i - M, N = len(inds), dfi.sid[sift].max() + 1 - - for metric, arr, name in zip( - ['CPUUtilization', 'RSS', 'VMSize', 'ReadMB', 'WriteMB'], - [Xm, Xrss, Xvm, Xreadmb, Xwritemb], - ['cpu', 'rss', 'vm', 'readmb', 'writemb'] - ): - X = csr((dfi.loc[sift, metric], (dfi.loc[sift, 't'], dfi.loc[sift, 'sid'])), shape=(M, N)) - mm = np.array(X.max(axis=1).todense()).reshape(-1,) - df[f'{name}_{i}'] = mm - arr[cnt, :] = mm - - df['cpu_utilisation'] = Xm.mean(axis=0) - df['rss'] = Xrss.sum(axis=0) - df['vm'] = Xvm.sum(axis=0) - df['readmb'] = Xreadmb.sum(axis=0) - df['writemb'] = Xwritemb.sum(axis=0) - df['timestamp'] = start_time + pd.to_timedelta(df.t * 10, unit='s') - df['utime'] = df['timestamp'].astype('int64') // 10**9 - - return df - -def proc_gpu_series(cpu_df, dfi, gpu_cnt): - t_cpu = np.array([cpu_df.utime.min(), cpu_df.utime.max()]) - t_gpu = np.array([dfi.timestamp.astype(int).min(), dfi.timestamp.astype(int).max()]) - - t_cpu_range = t_cpu[1] - t_cpu[0] - t_gpu_range = t_gpu[1] - t_gpu[0] - per_diff = (t_cpu_range - t_gpu_range) / t_gpu_range * 100 - - if abs(per_diff) > 10: - raise ValueError("Time mismatch between CPU and GPU series exceeds 10%") - - dfi['t_fixed'] = dfi.timestamp - dfi.timestamp.min() + t_cpu[0] - ugpus = dfi.gpu_index.unique() - gpu_df = pd.DataFrame({'utime': cpu_df['utime'].values}) - - for u in ugpus: - dfg = dfi[dfi.gpu_index == u].copy() - fields = ['gpu_index', 'utilization_gpu_pct', 'utilization_memory_pct', 'memory_free_MiB', - 'memory_used_MiB', 'temperature_gpu', 'temperature_memory', 'power_draw_W'] - - for field in fields: - x1, y1 = dfg['t_fixed'].values, dfg[field].values - xv = cpu_df['utime'].values - yv = np.interp(xv, x1, y1) - gpu_df[field] = yv - - rename = { - 'utilization_gpu_pct': f'gpu_{gpu_cnt}', - 'utilization_memory_pct': f'gpu_mem_{gpu_cnt}', - 'temperature_gpu': f'gpu_temp_{gpu_cnt}', - 'power_draw_W': f'gpu_p_{gpu_cnt}' - } - gpu_df.rename(columns=rename, inplace=True) - gpu_cnt += 1 - - return gpu_df, gpu_cnt - -def main(local_dataset_path, start_date_str, end_date_str): - mit_dir = os.path.dirname(os.path.abspath(__file__)) - tracedir = os.path.join(mit_dir, 'data', 'trace') - os.makedirs(tracedir, exist_ok=True) - - start_ts = int(datetime.strptime(start_date_str, '%d%m%Y').timestamp()) - end_ts = int(datetime.strptime(end_date_str, '%d%m%Y').timestamp()) - - file_df = pd.read_csv(os.path.join(mit_dir, 'source_data', 'file_list.csv'), sep='\t') - gpu_df = file_df[file_df['File Name'].str.contains('/gpu/')].copy() - gpu_df['jobid'] = gpu_df['File Name'].str.extract(r'/([^/]+?)-').astype(int) - - job_df = pd.read_csv(os.path.join(mit_dir, 'source_data', 'job_user_date_full.csv')) - selected_df = job_df[(job_df.start > start_ts) & (job_df.start < end_ts)].copy() - - files_to_copy = [row['filename'].replace('-summary', '-timeseries') for _, row in selected_df.iterrows()] - files_to_copy += gpu_df[gpu_df.jobid.isin(selected_df.job_id)]['File Name'].tolist() - files_to_copy = list(set(files_to_copy)) - - for rel_path in tqdm(files_to_copy, desc="Copying trace files"): - src = os.path.join(local_dataset_path, rel_path) - dst = os.path.join(tracedir, os.path.basename(rel_path)) - if not os.path.exists(src): - print(f"Missing: {src}") - continue - if os.path.exists(dst) and os.path.getsize(src) == os.path.getsize(dst): - continue - shutil.copy2(src, dst) - - slurm_log = None - for root, _, files in os.walk(local_dataset_path): - if 'slurm-log.csv' in files: - slurm_log = os.path.join(root, 'slurm-log.csv') - break - if not slurm_log: - print(f"slurm-log.csv not found in {local_dataset_path}.") - return - - slurm_df = pd.read_csv(slurm_log) - traced_files = sorted(f for f in os.listdir(tracedir) if 'lock' not in f) - print(f"Processing {len(traced_files)} trace files.") - - data_dict = {} - for idx, s in enumerate(traced_files): - if idx % 100 == 0: - print(f"processing file {idx} of {len(traced_files)}") - fpath = os.path.join(tracedir, s) - dfi = pd.read_csv(fpath, dtype={0: str}) - jobid = int(s.split('-')[0]) - - if jobid not in data_dict: - data_dict[jobid] = {} - slurm_idx = np.where(slurm_df['id_job'] == jobid)[0] - if slurm_idx.shape[0] != 1: - continue - data_dict[jobid] = slurm_df.iloc[slurm_idx[0]].to_dict() - - if 'timeseries' in s: - if 'cpu' in data_dict[jobid]: - continue - cpu_ser = proc_cpu_series(dfi) - data_dict[jobid]['cpu'] = cpu_ser - - elif 'gpu_index' in dfi.columns: - rack = s.split('-')[1] - node = s.split('-')[2].split('.csv')[0] - cpu_df = data_dict[jobid].get('cpu') - if cpu_df is None: - continue - - gpu_cnt = data_dict[jobid].get('gpu_cnt', 0) - gpu_df = data_dict[jobid].get('gpu') - gpu_ser, gpu_cnt = proc_gpu_series(cpu_df, dfi, gpu_cnt) - - if gpu_df is None: - data_dict[jobid]['gpu'] = gpu_ser - data_dict[jobid]['grack'] = [rack] - data_dict[jobid]['gnode'] = [node] - else: - data_dict[jobid]['gpu'] = pd.merge(gpu_df, gpu_ser, on='utime') - data_dict[jobid]['grack'].append(rack) - data_dict[jobid]['gnode'].append(node) - - data_dict[jobid]['gpu_cnt'] = gpu_cnt - - print("determining start time...") - min_utime = min(data['cpu']['utime'].min() for data in data_dict.values() if 'cpu' in data) - max_utime = max(data['cpu']['utime'].max() for data in data_dict.values() if 'cpu' in data) - total_sim_time = max_utime - min_utime - - jobs_list = [] - for jobid, data in data_dict.items(): - cpu_trace = data.get('cpu', {}).get('cpu_utilisation', []) - if isinstance(cpu_trace, pd.Series): - cpu_trace = cpu_trace.tolist() - - gpu_trace = data.get('gpu') - gpu_trace_list = gpu_trace.values.tolist() if isinstance(gpu_trace, pd.DataFrame) else 0 - - job_start_time = data['cpu']['utime'].min() - min_utime - job_end_time = data['cpu']['utime'].max() - min_utime - wall_time = max(0, job_end_time - job_start_time) - nodes_required = max(1, int(np.ceil(max(cpu_trace) / 2.0))) if cpu_trace else 1 - if nodes_required > 1 and cpu_trace: - cpu_trace = [x / nodes_required for x in cpu_trace] - - job = job_dict( - nodes_required=nodes_required, - name=data.get('name_job', 'unknown'), - account=data.get('name_account', 'unknown'), - cpu_trace=cpu_trace, - gpu_trace=gpu_trace_list, - ntx_trace=[], - nrx_trace=[], - end_state=data.get('state_end', 'UNKNOWN'), - id=jobid, - submit_time=job_start_time, - time_limit=data.get('time_limit', 0), - start_time=job_start_time, - end_time=job_end_time, - wall_time=wall_time, - trace_time=len(cpu_trace) * 10.0, - trace_start_time=0, - trace_end_time=len(cpu_trace) * 10.0 - ) - jobs_list.append(job) - - tf1 = datetime.fromtimestamp(start_ts).strftime('%d_%m_%Y') - tf2 = datetime.fromtimestamp(end_ts).strftime('%d_%m_%Y') - save_path = os.path.join(mit_dir, 'data', f'mit_supercloud_jobs_{tf1}__{tf2}.npz') - os.makedirs(os.path.dirname(save_path), exist_ok=True) - - np.savez( - save_path, - jobs=np.array(jobs_list), - start_timestep=0, - end_timestep=total_sim_time, - args=SimpleNamespace(fastforward=None, system='mit_supercloud', time=total_sim_time) - ) - print(f"Saved {len(jobs_list)} jobs to {save_path}") - -if __name__ == "__main__": - parser = argparse.ArgumentParser(description="Generate job trace data from MIT Supercloud logs.") - parser.add_argument("local_dataset_path", type=str, help="Path to the dataset root.") - parser.add_argument("--start_date", default="21052021", help="Start date in DDMMYYYY format.") - parser.add_argument("--end_date", default="22052021", help="End date in DDMMYYYY format.") - args = parser.parse_args() - main(args.local_dataset_path, args.start_date, args.end_date) diff --git a/raps/dataloaders/mit_supercloud/dist/anal_data.py b/raps/dataloaders/mit_supercloud/dist/anal_data.py deleted file mode 100644 index 4ae8c70..0000000 --- a/raps/dataloaders/mit_supercloud/dist/anal_data.py +++ /dev/null @@ -1,36 +0,0 @@ -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- -""" -Created on Wed Sep 25 15:49:04 2024 - -@author: daf -""" -import gzip -import pickle -import os - -# Get the directory of the current file -mit_dir = os.path.dirname(os.path.abspath(__file__)) - -# List the data files you want analysed into this list. It is assumed they live in /data/pkl -data_fyles = ['data_21_05_2021__22_05_2021.pkl.gz'] - -data = {} -# Combine the pickle files for comparison. -for s in data_fyles: - fyle = mit_dir+'/data/pkl/' + s - with gzip.open(fyle, 'rb') as file: - datai = pickle.load(file) - if data.keys is None: - data = datai - else: - # Check for common keys first - common_keys = list(data.keys() & datai.keys()) - if len(common_keys)>0: - print('Warning: there seems to be jobs overlapping in the data sets') - - # Combine - data = {**data, **datai} - -# Lets see how the job time series actually look - diff --git a/raps/dataloaders/mit_supercloud/dist/create_trace.py b/raps/dataloaders/mit_supercloud/dist/create_trace.py deleted file mode 100644 index a8af4a7..0000000 --- a/raps/dataloaders/mit_supercloud/dist/create_trace.py +++ /dev/null @@ -1,382 +0,0 @@ -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- -""" -Created on Fri Sep 20 10:14:23 2024 - -@author: daf -""" - -# Given a start and end date identify those jobs that occur in this range and then download them -# from S3 into data/trace as a pcikle file (all traces will be in the same file) - -import boto3 -from botocore import UNSIGNED -from botocore.client import Config -import os -import pandas as pd -import numpy as np -from io import StringIO -import pickle -from datetime import datetime -import shutil -import gzip -from scipy.sparse import csr_matrix as csr -import matplotlib.pyplot as plt - -def main(): - - # Get the directory of the current file - mit_dir = os.path.dirname(os.path.abspath(__file__)) - src_data_dir = mit_dir + '/source_data' - ################ Select correct files. - ################ CHANGE THESE 2 LINES ######################### - start_date = '21052021' # EU format day/month/year - end_date = '22052021' - - - - ################# Load index files to use to look up the correct files. - # MIT S3 bucket address. - bucket_name = 'mit-supercloud-dataset' - prefix = 'datacenter-challenge/202201/' - - # Load the s3 file index file - fyle = mit_dir + '/source_data/file_list.csv' - file_df = pd.read_csv(fyle,sep='\t') - gpu_file_df = file_df[file_df['File Name'].str.contains('/gpu/')].copy() - gpu_file_df['jobid'] = gpu_file_df['File Name'].str.extract(r'/([^/]+?)-') - gpu_file_df['jobid'] = gpu_file_df['jobid'].astype(int) - - # Load the index file - fyle = mit_dir + '/source_data/job_user_date_full.csv' - job_index_df = pd.read_csv(fyle) - - date_obj = datetime.fromtimestamp(job_index_df.start.min()) - date_min_str = date_obj.strftime('%d-%m-%Y') - date_obj = datetime.fromtimestamp(job_index_df.start.max()) - date_max_str = date_obj.strftime('%d-%m-%Y') - print('Data set contains data between: ' +date_min_str + ' and ' + date_max_str ) - - # Create and clear the trace directory. - tracedir = mit_dir + '/data/trace/' - if os.path.exists(tracedir): - pass # do nothing - might want to change this later - # shutil.rmtree(tracedir) # Remove everything in the folder - # os.makedirs(tracedir) # Recreate the folder after clearing it - else: - os.makedirs(tracedir) - - - st_date = datetime.strptime(start_date, '%d%m%Y') - st_date = int(st_date.timestamp()) - en_date = datetime.strptime(end_date, '%d%m%Y') - en_date = int(en_date.timestamp()) - - if st_date < job_index_df.start.min(): - print('Warning: start date (' + start_date + ') is before the start of the dataset (' + date_min_str + ') ') - if st_date > job_index_df.start.max(): - print('Error: start date (' + start_date + ') is after the end of the dataset (' + date_max_str + ') ') - crashhere - - # find the jobs that start between start and end dates. - sift = (job_index_df.start > st_date) & (job_index_df.start < en_date) - print('You have selected ' + str(sift.sum()) + ' fiies to download ') - - ##################### Download from S3 - - df = job_index_df[sift].copy() - df['target'] = df.filename.str.replace('-summary', '-timeseries') - # Go through each file and download it - # Set up S3 client. - s3 = boto3.client('s3', config=Config(signature_version=UNSIGNED)) - cnt=0 - - for s in df.target: - jobid = s.split('/')[-1].split('-')[0] - jobid=int(jobid) - fyle = os.path.join(mit_dir +'/data/trace/', s.split('/')[-1]) - - # download the data unless we already have it - if not os.path.exists(fyle): - s3.download_file(bucket_name, s, fyle) - # See if there are GPU files for this job. - sift_gpu = gpu_file_df.jobid == jobid - if sift_gpu.sum()>0: - gpu_fyles = gpu_file_df['File Name'][sift_gpu] - for ss in gpu_fyles: - gfyle = os.path.join(mit_dir +'/data/trace/', ss.split('/')[-1]) - if not os.path.exists(gfyle): - s3.download_file(bucket_name, ss, gfyle) - - else: - pass - cnt=cnt+1 - if cnt%50 ==0: - print('Downloaded ' + str(cnt) + ' of ' + str(sift.sum())) - - ##################### Process. - - # Load the slurm log to grab additional attributes. - slurm_df = pd.read_csv(src_data_dir+'/slurm-log.csv') - - dfiles_raw = os.listdir(tracedir) - # Sort so we process the cpu files first (we need the result for the gpu files) - dfiles = sorted(dfiles_raw, key=lambda x: 'timeseries' not in x) - dfiles = [file for file in dfiles if 'lock' not in file] - - print('Downloaded ' + str(len(dfiles)) + ' files. Processing ... ') - L = len(dfiles) - cnt = 0 - data_dict = {} - for s in dfiles: - if cnt%100==0: - print('processing file ' + str(cnt) + ' of ' + str(L)) - cnt = cnt+1 - fyle = os.path.join(mit_dir +'/data/trace/', s.split('/')[-1]) - dfi = pd.read_csv(fyle) - - jobid = int(s.split('-')[0]) - if jobid not in data_dict.keys(): - data_dict[jobid] = {} - # Add slurm data on creation - idx = np.where(slurm_df['id_job']==jobid)[0] - if idx.shape[0]!=1: - crashhere - else: - data_dict[jobid] = slurm_df.iloc[idx[0]].to_dict() - if ('timeseries' in s) and ('lock' not in s): - if 'cpu' in data_dict[jobid].keys(): - print('error a job cant have more than one cpu traces') - crashhere - else: - cpu_ser = proc_cpu_series(dfi) - data_dict[jobid]['cpu'] = cpu_ser - - elif 'gpu_index' in dfi.keys(): - mm = dfi.utilization_gpu_pct.max() - print('GPU max: ' + str(mm) ) - # Get the gpu node and rack - rack = s.split('-')[1] - node = s.split('-')[2].split('.csv')[0] - cpu_df = data_dict[jobid]['cpu'] - - - if 'gpu' not in data_dict[jobid].keys(): - data_dict[jobid]['gpu'] = {} - data_dict[jobid]['gpu_cnt']=0 - data_dict[jobid]['grack']=[rack] - data_dict[jobid]['gnode']=[node] - gpu_cnt = data_dict[jobid]['gpu_cnt'] - gpu_ser,gpu_cnt = proc_gpu_series(cpu_df,dfi,gpu_cnt) - data_dict[jobid]['gpu'] = gpu_ser - else: - data_dict[jobid]['grack'].append(rack) - data_dict[jobid]['gnode'].append(node) - gpu_df = data_dict[jobid]['gpu'] - gpu_cnt = data_dict[jobid]['gpu_cnt'] - gpu_ser,gpu_cnt = proc_gpu_series(cpu_df,dfi,gpu_cnt) - # combine with the existing df - df_merged = pd.merge(gpu_df, gpu_ser, on='utime') - asds - if df_merged.shape[0] != gpu_df.shape[0]: - crashhere - data_dict[jobid]['gpu'] = df_merged - data_dict[jobid]['gpu_cnt']= gpu_cnt - - asd - # save to pickle file. - pkldir = mit_dir + '/data/pkl/' - os.makedirs(pkldir, exist_ok=True) - t1 = datetime.fromtimestamp(st_date) - tf1 = t1.strftime('%d_%m_%Y') - t2 = datetime.fromtimestamp(en_date) - tf2 = t2.strftime('%d_%m_%Y') - fyle = 'data_' + tf1 + '__'+tf2 - fyle = pkldir+fyle +'.pkl.gz' - with gzip.open(fyle, 'wb') as file: - pickle.dump(data_dict, file) - - return - -def proc_gpu_series(cpu_df,dfi,gpu_cnt): - # Process GPU series by interpolating it to the same times as the cpu series. - - # time checks - t_cpu = np.array([cpu_df.utime.min(), cpu_df.utime.max() , 0]) - t_cpu[2]=t_cpu[1]-t_cpu[0] - t_gpu = np.array([dfi.timestamp.astype(int).min(), dfi.timestamp.astype(int).max(),0]) - t_gpu[2]=t_gpu[1]-t_gpu[0] - - dcpu = pd.to_datetime(t_cpu, unit='s') - dgpu = pd.to_datetime(t_gpu, unit='s') - t1 = (dcpu[1]-dcpu[0]).total_seconds() - t2 = (dgpu[1]-dgpu[0]).total_seconds() - per_dif = (t1-t2)/t2*100 - print(per_dif) - if abs(per_dif) > 10: - # More than 2% difference in the time taken, halt and look at it - crashhere - - # So move the GPU time to the CPU times. - dfi['t_fixed'] = dfi.timestamp-dfi.timestamp.min()+t_cpu[0] - - ugpus = dfi.gpu_index.unique() - gpu_df= pd.DataFrame({'utime': cpu_df['utime'].values}) - - - for u in ugpus: - dfg = dfi[dfi.gpu_index==u].copy() - - # Perform an interpolation - fylds = ['gpu_index', 'utilization_gpu_pct', - 'utilization_memory_pct', 'memory_free_MiB', 'memory_used_MiB', - 'temperature_gpu', 'temperature_memory', 'power_draw_W'] - - - - for ff in fylds: - x1 = dfg['t_fixed'].values - y1 = dfg[ff].values - xv = cpu_df['utime'].values - - # Interpolate using NumPy - yv = np.interp(xv, x1, y1) - - gpu_df[ff] = yv - ss = str(gpu_cnt) - ren = {'utilization_gpu_pct': 'gpu_' + ss, - 'utilization_memory_pct': 'gpu_mem_' + ss, - 'temperature_gpu': 'gpu_temp_' + ss, - 'power_draw_W':'gpu_p_'+ ss, - } - gpu_df.rename(columns=ren, inplace=True) - gpu_cnt = gpu_cnt + 1 - - return gpu_df,gpu_cnt - -def proc_cpu_series(dfi): - # This is the code that processes cpu data and performs the following steps: - # 1. Remove information from step [-1,-4] as these are empty. - # 2. give outliers their nearest neighbour values. There are spikes of outliers in the utilsation, I think thw whole row is rotten too. They are values like 40000 - # 3. For each series get the max cpu utilisation at each time step. - # Save these for the output. - # 4. Get the average cpu utilsation per series (maxed from step 3) - - # 1 Remove information from step [-1,-4] as these are empty. - sift = dfi.Step.isin([-1,-4,'-1','-4']) - if dfi.CPUUtilization[sift].sum() >0: - print('found a series that breaks the rule, check it') - # The -1 -4 indicators should be for non-events. IF the cpu utilisation has values something is up, might be a spike or something but the rule needs to be changed. - crashhere - # remove - dfi = dfi[~sift].copy() - - # Check for 1-1 series node correspondences and if not then there is an issue we need to clean up. - if False: - unode_series = dfi.groupby(['Node', 'Series']).size().reset_index(name='count') - unode = dfi.Node.unique() - for n in unode: - sift = dfi.Node == n - splits = dfi[sift].groupby('Series').size().reset_index(name='count') - splits = splits.sort_values(by='count', ascending=False) - for i in range(splits.shape[0]): - # Reassign the Series number back to the max for the node. - if i==0: - dest_ser = splits.iloc[i].Series - else: - # reassign the targets. - faulty_ser = splits.iloc[i].Series - sift_reas = sift & (dfi.Series ==faulty_ser ) - dfi.loc[sift,'Series'] = dest_ser - if sift_reas.sum()>40: - asd - print('Reassigning ' + str(sift_reas.sum()) + ' rows with faulty series values (from a total of ' + str(splits['count'][0])+ ' )') - t = pd.to_datetime(dfi.EpochTime, unit='s') - start_time = t.min() - steps = (t - start_time).dt.total_seconds() // 10 - # Convert to integer type if needed - steps = steps.astype(int) - dfi['t']= steps - - sid, uniques = pd.factorize(dfi.Step) - dfi['sid']= sid - - - - # 2. Outliers. - sift = (dfi.CPUUtilization > 500) & (dfi.CPUUtilization < 600) - # Clip these back to 500 - if sift.sum()>0: - asd - print('clipping ' + str(sift.sum()) + ' values' ) - dfi.loc[sift, 'CPUUtilization'] = 500 - - # select rows with >600 as outliers. - sift = dfi.CPUUtilization > 600 - if sum(sift)>0: - # Set to the nearest value less than 600. - dfi.loc[sift, 'CPUUtilization'] = dfi['CPUUtilization'].where(~sift).ffill().combine_first(dfi['CPUUtilization']).where(dfi['CPUUtilization'] <= 600) - - # 3. There are multiple series so we want to get the maximum (as only one series at a time is active) - useries = dfi.Series.unique() - inds = np.arange(dfi.t.max()+1) - # Create a data frame to hold the results. - df = pd.DataFrame({'t':inds}) - Xm = np.zeros((len(useries),inds.shape[0])) - Xrss = np.zeros((len(useries),inds.shape[0])) - Xvm = np.zeros((len(useries),inds.shape[0])) - Xreadmb = np.zeros((len(useries),inds.shape[0])) - Xwritemb = np.zeros((len(useries),inds.shape[0])) - - cnt=0 - for i in useries: - sift = dfi.Series == i - M = len(inds) - N = dfi.sid[sift].max()+1 - # create a #series x #time steps csr then max it to get the actual readings. - X = csr( (dfi.CPUUtilization[sift],(dfi.t[sift],dfi.sid[sift])),shape = (M,N) ) - mm = np.array(X.max(axis=1).todense()).reshape(-1,) - df['cpu_' + str(i)] = mm - Xm[cnt,:] = mm - - # RSS - X = csr( (dfi.RSS[sift],(dfi.t[sift],dfi.sid[sift])), shape = (M,N) ) - mm = np.array(X.max(axis=1).todense()).reshape(-1,) - df['rss_' + str(i)] = mm - Xrss[cnt,:] = mm - - # VMsize - X = csr( (dfi.VMSize[sift],(dfi.t[sift],dfi.sid[sift])), shape = (M,N) ) - mm = np.array(X.max(axis=1).todense()).reshape(-1,) - df['vm_' + str(i)] = mm - Xvm[cnt,:] = mm - - # ReadMB - X = csr( (dfi.ReadMB[sift],(dfi.t[sift],dfi.sid[sift])), shape = (M,N) ) - mm = np.array(X.max(axis=1).todense()).reshape(-1,) - df['readmb_' + str(i)] = mm - Xreadmb[cnt,:] = mm - - # WriteMB - X = csr( (dfi.WriteMB[sift],(dfi.t[sift],dfi.sid[sift])), shape = (M,N) ) - mm = np.array(X.max(axis=1).todense()).reshape(-1,) - df['writemb_' + str(i)] = mm - Xwritemb[cnt,:] = mm - - - cnt=cnt+1 - - df['cpu_utilisation'] = Xm.mean(axis=0) - df['rss'] = Xrss.sum(axis=0) - df['vm'] = Xvm.sum(axis=0) - df['readmb'] = Xreadmb.sum(axis=0) - df['writemb'] = Xwritemb.sum(axis=0) - - - df['timestamp'] = start_time + pd.to_timedelta(df.t * 10, unit='s') - df['utime'] = df['timestamp'].astype('int64') // 10**9 - return df - -if __name__ == "__main__": - main() - \ No newline at end of file diff --git a/raps/dataloaders/mit_supercloud/dist/download_data.py b/raps/dataloaders/mit_supercloud/dist/download_data.py deleted file mode 100644 index 7987ec4..0000000 --- a/raps/dataloaders/mit_supercloud/dist/download_data.py +++ /dev/null @@ -1,156 +0,0 @@ -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- -""" -Created on Fri Sep 20 10:14:23 2024 - -@author: daf -""" -# This script will look to see if you have certain files and if not it will create/download them (this avoids large unneccesary downloads) -# In addition it is used to download data for certain date ranges that you can specify (across all machines). -# To set the date ranges change start_date and end_date on lines - -import boto3 -from botocore import UNSIGNED -from botocore.client import Config -import os -import pandas as pd -from io import StringIO - -# Get the directory of the current file -mit_dir = os.path.dirname(os.path.abspath(__file__)) - -start_date = '01012020' # EU format day/month/year -end_date = '01012020' -def list_s3_files_and_sizes(bucket_name, prefix=''): - # Initialize an S3 client with no signing - s3 = boto3.client('s3', config=Config(signature_version=UNSIGNED)) - - file_names = [] - file_sizes_gb = [] - - paginator = s3.get_paginator('list_objects_v2') - for page in paginator.paginate(Bucket=bucket_name, Prefix=prefix): - if 'Contents' in page: - for obj in page['Contents']: - namm = obj['Key'] - file_names.append(namm) - file_size_gb = obj['Size'] / (1024 ** 2) # Convert from bytes to GB - file_sizes_gb.append(file_size_gb) - print(f"{namm}: {file_size_gb:.4f} MB") - - return file_names, file_sizes_gb - -def download_s3_bucket(bucket_name, prefix, datadir): - s3 = boto3.client('s3', config=Config(signature_version=UNSIGNED)) - paginator = s3.get_paginator('list_objects_v2') - - # Recursively download all files with the given prefix - for page in paginator.paginate(Bucket=bucket_name, Prefix=prefix): - if 'Contents' in page: - for obj in page['Contents']: - # Get the file's S3 key - s3_key = obj['Key'] - s3_stem = s3_key[28:] - local_file_path = os.path.join(datadir, s3_stem) - - local_dir = os.path.dirname(local_file_path) - if not os.path.exists(local_dir): - os.makedirs(local_dir) - print(f"Downloading {s3_key} to {local_file_path}...") - s3.download_file(bucket_name, s3_key, local_file_path) - -def index_summary_file(bucket_name, prefix, datadir): - paginator = s3.get_paginator('list_objects_v2') - results = [] - # Check if the bucket contains any objects - for page in paginator.paginate(Bucket=bucket_name, Prefix=prefix): - if 'Contents' in page: - for obj in page['Contents']: - key = obj['Key'] - # Check if the key ends with '-summary.csv' - if key.endswith('-summary.csv'): - # Read the CSV file from S3 into a DataFrame - csv_obj = s3.get_object(Bucket=bucket_name, Key=key) - body = csv_obj['Body'].read().decode('utf-8') # Decode bytes to string - - # Use StringIO to read the CSV data - df = pd.read_csv(StringIO(body)) - - # Get the maximum value from the 'epoch' column - st_time = df['Min_EpochTime'].min() - ed_time = df['Max_EpochTime'].max() - node_count = df.Node.unique().shape[0] - jobid = int(key.split('/')[-1].split('-')[0]) - # Append the results to the DataFrame - results.append({'job_id': jobid, 'filename': key, 'start': st_time, 'end' : ed_time, 'node_count': node_count}) - print(f"Processed: {key}") - - df = pd.DataFrame(results) - return df - -asd -# MIT S3 bucket address. -bucket_name = 'mit-supercloud-dataset' -prefix = 'datacenter-challenge/202201/' - - -# Get the list of S3 file names and sizes and save (unless its already there) -fyle = mit_dir + '/source_data/file_list.csv' - -# Check if file exists -if not os.path.exists(fyle): - file_names, file_sizes_gb = list_s3_files_and_sizes(bucket_name, prefix) - # Open a file in write mode - with open(fyle, "w") as file: - # Write the header (optional) - file.write("File Name\tSize (MB)\n") - # Iterate over both lists and write each file name and its size - for name, size in zip(file_names, file_sizes_gb): - file.write(f"{name}\t{size:.2f} \n") - -# Download the following root dir files. -s3 = boto3.client('s3', config=Config(signature_version=UNSIGNED)) -dfiles = ['LICENSE','README.md','labelled_job_stats.csv','labelled_jobids.csv' - ,'node-data.csv','slurm-log.csv','tres-mapping.txt'] - -for s in dfiles: - fyle = os.path.join(mit_dir +'/source_data', os.path.basename(s)) - if not os.path.exists(fyle): - s3.download_file(bucket_name, prefix + s, fyle) - -# download one cpu and 1 gpu of data. -bucket_name = 'mit-supercloud-dataset' -subfolder = 'datacenter-challenge/202201/cpu/0026/' -datadir = mit_dir + '/source_data' -#download_s3_bucket(bucket_name, subfolder, datadir) - -subfolder = 'datacenter-challenge/202201/gpu/0020/' -#download_s3_bucket(bucket_name, subfolder, datadir) - - -# Create the job-user-date index file if it doesnt exist already. -fyle = mit_dir + '/source_data/job_user_date.csv' -# Check if file exists -if not os.path.exists(fyle): - print('This can take about 24 hours to complete.') - job_index_df = index_summary_file(bucket_name, prefix, datadir) - job_index_df.to_csv(fyle, index=False) -else: - job_index_df = pd.read_csv(fyle) - -fyle = mit_dir + '/source_data/job_user_date_full.csv' -if not os.path.exists(fyle): - # Open the slurm log to get the user id for each job. - slurm_df = pd.read_csv(mit_dir + '/source_data/slurm-log.csv') - # Cut out all but the user job mapping - slurm_df = slurm_df[['id_job','id_user']] - - final_df = job_index_df.merge(slurm_df, left_on='job_id', right_on='id_job', how='left') - final_df.to_csv(fyle, index=False) - -print('Pre-processing to create an index linking jobs and users to dates is now complete and can be found in the file ') -print(fyle) - - - - diff --git a/raps/dataloaders/mit_supercloud/dist/node_data_anal.py b/raps/dataloaders/mit_supercloud/dist/node_data_anal.py deleted file mode 100644 index 8605f17..0000000 --- a/raps/dataloaders/mit_supercloud/dist/node_data_anal.py +++ /dev/null @@ -1,41 +0,0 @@ -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- -""" -Created on Mon Sep 23 11:46:42 2024 - -@author: daf -""" - -# Analyse the node data. - -import pandas as pd -import numpy as np -import os,subprocess - -# Get the directory of the current file -mit_dir = os.path.dirname(os.path.abspath(__file__)) - -node_fyle = mit_dir+'/source_data/node-data.csv' - -# Define a function to skip rows that are not multiples of 4 - - -# Calculate the total number of rows in the file (optional, to improve efficiency) -Nr = sum(1 for row in open(node_fyle)) # 34M rows. -K=100 # Reduction factor. -keep_rows = np.arange(3, Nr, K) - -temp_fyle = node_fyle[:-13] + 'temp.csv' -cmd = f"awk 'NR == 1 || NR % {K} == 0' \"{node_fyle}\" > \"{temp_fyle}\"" - - -# Run the awk command using subprocess -subprocess.run(cmd, shell=True, check=True) - - -# Read the CSV file, skipping rows that are not multiples of 4 -df = pd.read_csv(temp_fyle) -df['datetime'] = pd.to_datetime(df['Time'], unit='s') - -# Display the resulting DataFrame -print(df) \ No newline at end of file diff --git a/raps/dataloaders/mit_supercloud/dist/parse_mit_data.py b/raps/dataloaders/mit_supercloud/dist/parse_mit_data.py deleted file mode 100644 index 3b6b691..0000000 --- a/raps/dataloaders/mit_supercloud/dist/parse_mit_data.py +++ /dev/null @@ -1,81 +0,0 @@ -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- -""" -Created on Fri Sep 20 10:14:23 2024 - -@author: daf -""" - - -import boto3 -from botocore import UNSIGNED -from botocore.client import Config -import os - -# Get the directory of the current file -mit_dir = os.path.dirname(os.path.abspath(__file__)) - - - -def list_s3_files_and_sizes(bucket_name, prefix=''): - # Initialize an S3 client with no signing - s3 = boto3.client('s3', config=Config(signature_version=UNSIGNED)) - - file_names = [] - file_sizes_gb = [] - - paginator = s3.get_paginator('list_objects_v2') - for page in paginator.paginate(Bucket=bucket_name, Prefix=prefix): - if 'Contents' in page: - for obj in page['Contents']: - namm = obj['Key'] - file_names.append(namm) - file_size_gb = obj['Size'] / (1024 ** 2) # Convert from bytes to GB - file_sizes_gb.append(file_size_gb) - print(f"{namm}: {file_size_gb:.4f} MB") - - return file_names, file_sizes_gb - -def download_s3_bucket(bucket_name, prefix, datadir): - s3 = boto3.client('s3', config=Config(signature_version=UNSIGNED)) - paginator = s3.get_paginator('list_objects_v2') - - # Recursively download all files with the given prefix - for page in paginator.paginate(Bucket=bucket_name, Prefix=prefix): - if 'Contents' in page: - for obj in page['Contents']: - # Get the file's S3 key - s3_key = obj['Key'] - local_file_path = os.path.join(datadir, s3_key) - local_dir = os.path.dirname(local_file_path) - if not os.path.exists(local_dir): - os.makedirs(local_dir) - print(f"Downloading {s3_key} to {local_file_path}...") - s3.download_file(bucket_name, s3_key, local_file_path) - -# Replace 'your-bucket-name' with the actual S3 bucket name -bucket_name = 'mit-supercloud-dataset' -prefix = 'datacenter-challenge/202201/' - -# Get the list of file names and sizes -file_names, file_sizes_gb = list_s3_files_and_sizes(bucket_name, prefix) - - -# download one cpu and 1 gpu of data. -bucket_name = 'mit-supercloud-dataset' -subfolder = 'datacenter-challenge/202201/cpu/0026/' -datadir = mit_dir + '/source_data' -download_s3_bucket(bucket_name, subfolder, datadir) - -subfolder = 'datacenter-challenge/202201/gpu/0020/' -download_s3_bucket(bucket_name, subfolder, datadir) - - -# Output the results -print("Files in S3 bucket:") -for name, size in zip(file_names, file_sizes_gb): - print(f"{name}: {size:.2f} GB") - -# Example: You can use the lists for further processing -# file_names -> list of file paths -# file_sizes_gb -> list of file sizes in GB \ No newline at end of file diff --git a/raps/dataloaders/mit_supercloud/dist/readme.txt b/raps/dataloaders/mit_supercloud/dist/readme.txt deleted file mode 100644 index fa5e767..0000000 --- a/raps/dataloaders/mit_supercloud/dist/readme.txt +++ /dev/null @@ -1,21 +0,0 @@ -MIT supercloud data. https://dcc.mit.edu/dataconda - -To install S3 client. -sudo apt install awscli - -aws s3 ls s3://mit-supercloud-dataset/datacenter-challenge/202201/ --no-sign-request - - - -# Conda env creation: -conda create --name parser \ -boto3 numpy pandas spyder pyarrow fastparquet h5py matplotlib seaborn scikit-learn scipy requests beautifulsoup4 sqlalchemy openpyxl xlrd - - -conda activate parser - -spyder - -From within spyder you can access the data using parse_mit_data.py - - diff --git a/raps/dataloaders/mit_supercloud/dist/setup.py b/raps/dataloaders/mit_supercloud/dist/setup.py deleted file mode 100644 index 1d7cd9c..0000000 --- a/raps/dataloaders/mit_supercloud/dist/setup.py +++ /dev/null @@ -1,172 +0,0 @@ -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- -""" -Created on Fri Sep 20 11:18:26 2024 - -@author: daf -""" - -# Download the paper describing the data -import requests -import os -import boto3 -from botocore import UNSIGNED -from botocore.client import Config -import os -import pandas as pd -from io import StringIO - - -############### Dir setup -# Get the directory of the current file -mit_dir = os.path.dirname(os.path.abspath(__file__)) - -# Create a local directory structure -dirs = ['source_data','papers'] -for s in dirs: - local_dir = mit_dir + '/'+s - if not os.path.exists(local_dir): - os.makedirs(local_dir) - -# URL of the PDF file -url = 'https://arxiv.org/pdf/2108.02037' - -# Send a GET request to the URL -response = requests.get(url) -# Check if the request was successful -if response.status_code == 200: - # Specify the local filename to save - pdf_filename = mit_dir + '/papers/2108.02037.pdf' - - - # Write the content to a local file - with open(pdf_filename, 'wb') as file: - file.write(response.content) - -# Download the summary data only from the server to get the dates for each trace. - -############### Create an index file to allow us to select jobs by date. - - -def list_s3_files_and_sizes(bucket_name, prefix=''): - # Initialize an S3 client with no signing - s3 = boto3.client('s3', config=Config(signature_version=UNSIGNED)) - - file_names = [] - file_sizes_gb = [] - - paginator = s3.get_paginator('list_objects_v2') - for page in paginator.paginate(Bucket=bucket_name, Prefix=prefix): - if 'Contents' in page: - for obj in page['Contents']: - namm = obj['Key'] - file_names.append(namm) - file_size_gb = obj['Size'] / (1024 ** 2) # Convert from bytes to GB - file_sizes_gb.append(file_size_gb) - print(f"{namm}: {file_size_gb:.4f} MB") - - return file_names, file_sizes_gb - -def download_s3_bucket(bucket_name, prefix, datadir): - s3 = boto3.client('s3', config=Config(signature_version=UNSIGNED)) - paginator = s3.get_paginator('list_objects_v2') - - # Recursively download all files with the given prefix - for page in paginator.paginate(Bucket=bucket_name, Prefix=prefix): - if 'Contents' in page: - for obj in page['Contents']: - # Get the file's S3 key - s3_key = obj['Key'] - s3_stem = s3_key[28:] - local_file_path = os.path.join(datadir, s3_stem) - - local_dir = os.path.dirname(local_file_path) - if not os.path.exists(local_dir): - os.makedirs(local_dir) - print(f"Downloading {s3_key} to {local_file_path}...") - s3.download_file(bucket_name, s3_key, local_file_path) - -def index_summary_file(bucket_name, prefix, datadir): - paginator = s3.get_paginator('list_objects_v2') - results = [] - # Check if the bucket contains any objects - for page in paginator.paginate(Bucket=bucket_name, Prefix=prefix): - if 'Contents' in page: - for obj in page['Contents']: - key = obj['Key'] - # Check if the key ends with '-summary.csv' - if key.endswith('-summary.csv'): - # Read the CSV file from S3 into a DataFrame - csv_obj = s3.get_object(Bucket=bucket_name, Key=key) - body = csv_obj['Body'].read().decode('utf-8') # Decode bytes to string - - # Use StringIO to read the CSV data - df = pd.read_csv(StringIO(body)) - - # Get the maximum value from the 'epoch' column - st_time = df['Min_EpochTime'].min() - ed_time = df['Max_EpochTime'].max() - node_count = df.Node.unique().shape[0] - jobid = int(key.split('/')[-1].split('-')[0]) - # Append the results to the DataFrame - results.append({'job_id': jobid, 'filename': key, 'start': st_time, 'end' : ed_time, 'node_count': node_count}) - print(f"Processed: {key}") - - df = pd.DataFrame(results) - return df - -# MIT S3 bucket address. -bucket_name = 'mit-supercloud-dataset' -prefix = 'datacenter-challenge/202201/' - -# Get the list of S3 file names and sizes and save (unless its already there) -fyle = mit_dir + '/source_data/file_list.csv' - -# Create the file list if its not there already -if not os.path.exists(fyle): - file_names, file_sizes_gb = list_s3_files_and_sizes(bucket_name, prefix) - # Open a file in write mode - with open(fyle, "w") as file: - # Write the header (optional) - file.write("File Name\tSize (MB)\n") - # Iterate over both lists and write each file name and its size - for name, size in zip(file_names, file_sizes_gb): - file.write(f"{name}\t{size:.2f} \n") - -# Download the following root dir files (always) -s3 = boto3.client('s3', config=Config(signature_version=UNSIGNED)) -dfiles = ['LICENSE','README.md','labelled_job_stats.csv','labelled_jobids.csv' - ,'node-data.csv','slurm-log.csv','tres-mapping.txt'] - -for s in dfiles: - fyle = os.path.join(mit_dir +'/source_data', os.path.basename(s)) - if not os.path.exists(fyle): - s3.download_file(bucket_name, prefix + s, fyle) - -# Create the job-user-date index file if it doesnt exist already. -datadir = mit_dir + '/source_data' -fyle = mit_dir + '/source_data/job_user_date.csv' -# Check if file exists -if not os.path.exists(fyle): - print('This can take about 24 hours to complete.') - job_index_df = index_summary_file(bucket_name, prefix, datadir) - job_index_df.to_csv(fyle, index=False) -else: - job_index_df = pd.read_csv(fyle) - -fyle = mit_dir + '/source_data/job_user_date_full.csv' -if not os.path.exists(fyle): - # Open the slurm log to get the user id for each job. - slurm_df = pd.read_csv(mit_dir + '/source_data/slurm-log.csv') - # Cut out all but the user job mapping - slurm_df = slurm_df[['id_job','id_user']] - - final_df = job_index_df.merge(slurm_df, left_on='job_id', right_on='id_job', how='left') - final_df.to_csv(fyle, index=False) -print('Pre-processing to create an index linking jobs and users to dates is now complete and can be found in the file ') -print(fyle) - - -print('The MIT supercloud is now set up, the paper describing the dataset can be found in /papers') -print('The slurm-log and node data has been downloaded. However no cpu or gpu job traces have been downloaded. As there are 2TB of these we have created a script called create_trace.py to allow you to download and select a subset of the data dependent on time.') - diff --git a/raps/dataloaders/mit_supercloud/download.py b/raps/dataloaders/mit_supercloud/download.py new file mode 100644 index 0000000..5130849 --- /dev/null +++ b/raps/dataloaders/mit_supercloud/download.py @@ -0,0 +1,209 @@ +#!/usr/bin/env python3 +""" +Single‑script tool to: + 1) Ensure slurm-log.csv is present locally (download if missing) + 2) Filter jobs by submit date and classify CPU vs GPU by gres/tres + 3) Build or load a one‑time S3 manifest of trace keys (CPU & GPU) + 4) Filter that manifest by job IDs and download matching files + +Usage: + python download_data.py [--start DDMMYYYY] [--end DDMMYYYY] \ + [--partition all|part-cpu|part-gpu] \ + [--outdir PATH] [--max-jobs N] [--dry-run] + +Defaults: + --start 21052021 # 21 May 2021 (inclusive) + --end 22052021 # 22 May 2021 (exclusive) + +Flags: + --max-jobs N # Only process first N jobs (for testing) + --dry-run # List a sample of files without downloading +""" +# Suppress urllib3 InsecureRequestWarning +import urllib3 +urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) + +import os +import re +from datetime import datetime + +import pandas as pd +import boto3 +from botocore import UNSIGNED +from botocore.client import Config +from tqdm import tqdm + +from .utils import ( + load_slurm_log, + build_or_load_manifest, + filter_keys_by_jobs +) + +# Default date window +DEFAULT_START = "21052021" +DEFAULT_END = "22052021" + + +def ensure_slurm_log(s3, bucket, key, dest): + if os.path.exists(dest): + print(f"Found existing slurm-log.csv at {dest}, skipping download.") + return + os.makedirs(os.path.dirname(dest), exist_ok=True) + print(f"Downloading slurm-log.csv → {dest}") + s3.download_file(bucket, key, dest) + print("Downloaded slurm-log.csv.") + + +def list_and_filter_jobs(csv_path, start, end, partition): + df = pd.read_csv(csv_path) + df['time_submit'] = pd.to_datetime(df['time_submit'], unit='s') + + dt0 = datetime.strptime(start, "%d%m%Y") + dt1 = datetime.strptime(end, "%d%m%Y") + window = df[(df['time_submit'] >= dt0) & (df['time_submit'] < dt1)] + + # Identify GPU-using jobs via gres_used or tres_alloc + gres = window['gres_used'].fillna("").astype(str) + tres = window['tres_alloc'].fillna("").astype(str) + gpu_jobs = set(window.loc[ + gres.str.contains("gpu", case=False) | + tres.str.contains(r"(?:1001|1002)=", regex=True), + 'id_job' + ]) + + if partition == 'part-gpu': + job_ids = sorted(gpu_jobs) + elif partition == 'part-cpu': + job_ids = sorted(set(window['id_job']) - gpu_jobs) + else: + job_ids = sorted(window['id_job'].unique()) + + print(f"Selected {len(job_ids)} jobs from {dt0.date()} to {dt1.date()} on {partition}.") + return job_ids + + +def build_manifest(s3, bucket, prefix, manifest_path): + """ + One-time listing of all CSV keys under cpu/ and gpu/ prefixes. + Writes each key to manifest_path. + """ + cpu_pref = prefix + 'cpu/' + gpu_pref = prefix + 'gpu/' + paginator = s3.get_paginator('list_objects_v2') + os.makedirs(os.path.dirname(manifest_path), exist_ok=True) + with open(manifest_path, 'w') as mf: + # CPU + print("Building manifest: listing CPU keys...") + for page in tqdm(paginator.paginate(Bucket=bucket, Prefix=cpu_pref), desc="CPU pages", unit="page"): + for obj in page.get('Contents', []): + key = obj['Key'] + if key.lower().endswith('.csv'): + mf.write(key + '\n') + # GPU + print("Building manifest: listing GPU keys...") + for page in tqdm(paginator.paginate(Bucket=bucket, Prefix=gpu_pref), desc="GPU pages", unit="page"): + for obj in page.get('Contents', []): + key = obj['Key'] + if key.lower().endswith('.csv'): + mf.write(key + '\n') + print(f"Manifest written to {manifest_path}.") + + +#def load_manifest(manifest_path): +# with open(manifest_path) as f: +# return [line.strip() for line in f] + + +def filter_keys_by_jobs(keys, job_ids): + """ + Parse job ID from start of filename (-...) or via -r- in GPU names, + keep only keys matching job_ids. + """ + sel = [] + for key in keys: + fname = os.path.basename(key) + # Try CPU style: '-' + parts = fname.split('-', 1) + jid = None + try: + jid = int(parts[0]) + except ValueError: + # Try GPU style: '-r-' + m = re.search(r'-r(\d+)-', fname) + if m: + jid = int(m.group(1)) + if jid and jid in job_ids: + sel.append(key) + return sel + + +def download_traces(s3, bucket, prefix, outdir, keys, dry_run): + if dry_run: + print("Dry-run: sample of matching keys:") + for key in keys[:10]: print(" ", key) + return + for key in tqdm(keys, desc="Downloading traces"): + rel = key[len(prefix):] + dest = os.path.join(outdir, rel) + if os.path.exists(dest): + tqdm.write(f"Warning: {dest} exists, skipping.") + continue + os.makedirs(os.path.dirname(dest), exist_ok=True) + s3.download_file(bucket, key, dest) + print("All requested traces downloaded.") + + +def download(args): + """ + Subcommand entrypoint for 'mit_supercloud download'. + Downloads slurm-log.csv and all matching CPU/GPU trace files from S3. + """ + # 1) Initialize anonymous S3 client with SSL verification disabled + s3 = boto3.client( + 's3', + config=Config(signature_version=UNSIGNED), + verify=False + ) + + # 2) Ensure local copy of slurm-log.csv + slurm_key = f"{args.prefix}slurm-log.csv" + slurm_path = os.path.join(args.outdir, 'slurm-log.csv') + ensure_slurm_log(s3, args.bucket, slurm_key, slurm_path) + + # 3) Load and filter SLURM log to determine CPU/GPU job sets + _, cpu_jobs, gpu_jobs = load_slurm_log( + slurm_path, + args.start, + args.end + ) + if args.partition == 'part-cpu': + job_ids = cpu_jobs + elif args.partition == 'part-gpu': + job_ids = gpu_jobs + else: + job_ids = cpu_jobs | gpu_jobs + if args.max_jobs: + job_ids = set(list(job_ids)[:args.max_jobs]) + print(f"Processing {len(job_ids)} jobs (partition={args.partition})") + + # 4) Build or load the one-time manifest of all trace keys + manifest_path = os.path.join(args.outdir, 'file_manifest.txt') + all_keys = build_or_load_manifest( + s3, args.bucket, args.prefix, manifest_path + ) + + # 5) Filter manifest to only the trace keys for our job IDs + trace_keys = filter_keys_by_jobs(all_keys, job_ids) + cpu_count = sum(1 for k in trace_keys if k.startswith(f"{args.prefix}cpu/")) + gpu_count = len(trace_keys) - cpu_count + print(f"Total matching trace files: {len(trace_keys)} (CPU: {cpu_count}, GPU: {gpu_count})") + + # 6) Download or dry-run + download_traces( + s3, + args.bucket, + args.prefix, + args.outdir, + trace_keys, + args.dry_run + ) diff --git a/raps/dataloaders/mit_supercloud/generate_local_metadata.py b/raps/dataloaders/mit_supercloud/generate_local_metadata.py deleted file mode 100644 index b699934..0000000 --- a/raps/dataloaders/mit_supercloud/generate_local_metadata.py +++ /dev/null @@ -1,126 +0,0 @@ -import csv -import os -import pandas as pd -import sys -from tqdm import tqdm - -source_dir = 'source_data' - -def generate_local_metadata(local_dataset_root_path): - mit_dir = os.path.dirname(os.path.abspath(__file__)) - source_data_dir = os.path.join(mit_dir, source_dir) - os.makedirs(source_data_dir, exist_ok=True) - - print(f"Generating metadata in: {source_data_dir}") - - # --- Generate file_list.csv --- - file_list_path = os.path.join(source_data_dir, 'file_list.csv') - print(f"Creating {file_list_path}...") - all_files = [] - for root, _, files in os.walk(local_dataset_root_path): - for file in files: - all_files.append(os.path.join(root, file)) - - with open(file_list_path, 'w', newline='') as f: - writer = csv.writer(f, delimiter=' ') - writer.writerow(["File Name", "Size (MB)"]) - for full_path in tqdm(all_files, desc="Generating file_list.csv"): - relative_path = os.path.relpath(full_path, local_dataset_root_path) - file_size_bytes = os.path.getsize(full_path) - file_size_mb = file_size_bytes / (1024 * 1024) - writer.writerow([relative_path, f"{file_size_mb:.2f}"]) - print(f"Finished creating {file_list_path}") - - # --- Generate job_user_date.csv --- - job_user_date_path = os.path.join(source_data_dir, 'job_user_date.csv') - print(f"Creating {job_user_date_path} (resumable)...") - - all_summary_files = [] - for root, _, files in os.walk(local_dataset_root_path): - for file in files: - if file.endswith('-summary.csv'): - all_summary_files.append(os.path.join(root, file)) - - processed_job_ids = set() - if os.path.exists(job_user_date_path): - try: - existing_df = pd.read_csv(job_user_date_path) - processed_job_ids = set(existing_df['job_id'].tolist()) - write_mode = 'a' - header = False - except pd.errors.EmptyDataError: - write_mode = 'w' - header = True - else: - write_mode = 'w' - header = True - - with open(job_user_date_path, write_mode, newline='') as f: - writer = csv.writer(f) - if header: - writer.writerow(["job_id", "filename", "start", "end", "node_count"]) - - for full_summary_path in tqdm(all_summary_files, desc="Generating job_user_date.csv"): - file = os.path.basename(full_summary_path) - jobid = int(file.split('-')[0]) - - if jobid in processed_job_ids: - continue # Skip already processed - - try: - df = pd.read_csv(full_summary_path) - st_time = df['Min_EpochTime'].min() - ed_time = df['Max_EpochTime'].max() - node_count = df.Node.unique().shape[0] - relative_filename = os.path.relpath(full_summary_path, local_dataset_root_path) - writer.writerow([jobid, relative_filename, st_time, ed_time, node_count]) - processed_job_ids.add(jobid) - except Exception as e: - print(f"Error processing local summary file {full_summary_path}: {e}") - print(f"Finished creating {job_user_date_path}") - - # --- Generate job_user_date_full.csv --- - job_user_date_full_path = os.path.join(source_data_dir, 'job_user_date_full.csv') - - # Search for slurm-log.csv anywhere within the local dataset root - slurm_log_path = None - for root, _, files in os.walk(local_dataset_root_path): - if 'slurm-log.csv' in files: - slurm_log_path = os.path.join(root, 'slurm-log.csv') - break - - if slurm_log_path is None: - print(f"Warning: slurm-log.csv not found in {local_dataset_root_path}. Skipping job_user_date_full.csv generation.") - return - - if os.path.exists(job_user_date_path) and os.path.exists(slurm_log_path): - print(f"Creating {job_user_date_full_path}...") - try: - job_index_df = pd.read_csv(job_user_date_path) - slurm_df = pd.read_csv(slurm_log_path) - slurm_df = slurm_df[['id_job', 'id_user']] - final_df = job_index_df.merge(slurm_df, left_on='job_id', right_on='id_job', how='left') - final_df.to_csv(job_user_date_full_path, index=False) - print(f"Finished creating {job_user_date_full_path}") - except Exception as e: - print(f"Error creating {job_user_date_full_path}: {e}") - else: - print(f"Skipping {job_user_date_full_path}: one or both of {job_user_date_path} or {slurm_log_path} not found.") - -if __name__ == "__main__": - import argparse - parser = argparse.ArgumentParser(description="Generate local metadata files for MIT Supercloud dataset.") - parser.add_argument("local_dataset_path", type=str, - help="The root path to your locally downloaded MIT Supercloud dataset.") - args = parser.parse_args() - - if os.path.isdir(source_dir): - response = input(f"If you continue, files in '{source_dir}' will be overwritten.\nDo you want to continue? (y or n): ") - if response.lower() != 'y': - print("Operation cancelled.") - sys.exit(1) - - # Continue with the rest of your code here - print("Continuing with the operation...") - - generate_local_metadata(args.local_dataset_path) diff --git a/raps/dataloaders/mit_supercloud.py b/raps/dataloaders/mit_supercloud/loader.py similarity index 100% rename from raps/dataloaders/mit_supercloud.py rename to raps/dataloaders/mit_supercloud/loader.py diff --git a/raps/dataloaders/mit_supercloud/setup.py b/raps/dataloaders/mit_supercloud/setup.py deleted file mode 100644 index 7a66ece..0000000 --- a/raps/dataloaders/mit_supercloud/setup.py +++ /dev/null @@ -1,191 +0,0 @@ -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- -""" -Created on Fri Sep 20 11:18:26 2024 - -@author: daf -""" - -# Download the paper describing the data -import requests -import os -import boto3 -from botocore import UNSIGNED -from botocore.client import Config -import os -import pandas as pd -from io import StringIO - - -############### Dir setup -# Get the directory of the current file -mit_dir = os.path.dirname(os.path.abspath(__file__)) - -# Create a local directory structure -dirs = ['source_data','papers'] -for s in dirs: - local_dir = mit_dir + '/'+s - if not os.path.exists(local_dir): - os.makedirs(local_dir) - -## URL of the PDF file -#url = 'https://arxiv.org/pdf/2108.02037' -# -## Send a GET request to the URL -#response = requests.get(url) -## Check if the request was successful -#if response.status_code == 200: -# # Specify the local filename to save -# pdf_filename = mit_dir + '/papers/2108.02037.pdf' -# -# -# # Write the content to a local file -# with open(pdf_filename, 'wb') as file: -# file.write(response.content) - -# Download the summary data only from the server to get the dates for each trace. - -############### Create an index file to allow us to select jobs by date. - - -def list_s3_files_and_sizes(bucket_name, prefix=''): - # Initialize an S3 client with no signing - s3 = boto3.client('s3', config=Config(signature_version=UNSIGNED), verify=False) - - file_names = [] - file_sizes_gb = [] - - paginator = s3.get_paginator('list_objects_v2') - for page in paginator.paginate(Bucket=bucket_name, Prefix=prefix): - if 'Contents' in page: - for obj in page['Contents']: - namm = obj['Key'] - file_names.append(namm) - file_size_gb = obj['Size'] / (1024 ** 2) # Convert from bytes to GB - file_sizes_gb.append(file_size_gb) - print(f"{namm}: {file_size_gb:.4f} MB") - - return file_names, file_sizes_gb - -def download_s3_bucket(bucket_name, prefix, datadir): - s3 = boto3.client('s3', config=Config(signature_version=UNSIGNED), verify=False) - paginator = s3.get_paginator('list_objects_v2') - - # Recursively download all files with the given prefix - for page in paginator.paginate(Bucket=bucket_name, Prefix=prefix): - if 'Contents' in page: - for obj in page['Contents']: - # Get the file's S3 key - s3_key = obj['Key'] - s3_stem = s3_key[28:] - local_file_path = os.path.join(datadir, s3_stem) - - local_dir = os.path.dirname(local_file_path) - if not os.path.exists(local_dir): - os.makedirs(local_dir) - print(f"Downloading {s3_key} to {local_file_path}...") - s3.download_file(bucket_name, s3_key, local_file_path) - -def index_summary_file(bucket_name, prefix, output_csv_path): - s3 = boto3.client('s3', config=Config(signature_version=UNSIGNED), verify=False) - paginator = s3.get_paginator('list_objects_v2') - - processed_job_ids = set() - if os.path.exists(output_csv_path): - try: - existing_df = pd.read_csv(output_csv_path) - processed_job_ids = set(existing_df['job_id'].tolist()) - write_mode = 'a' - header = False - except pd.errors.EmptyDataError: - write_mode = 'w' - header = True - else: - write_mode = 'w' - header = True - - with open(output_csv_path, write_mode, newline='') as f: - if header: - f.write("job_id,filename,start,end,node_count\n") - - for page in paginator.paginate(Bucket=bucket_name, Prefix=prefix): - if 'Contents' in page: - for obj in page['Contents']: - key = obj['Key'] - if key.endswith('-summary.csv'): - jobid = int(key.split('/')[-1].split('-')[0]) - if jobid in processed_job_ids: - print(f"Skipping already processed: {key}") - continue - - try: - csv_obj = s3.get_object(Bucket=bucket_name, Key=key) - body = csv_obj['Body'].read().decode('utf-8') - df = pd.read_csv(StringIO(body)) - - st_time = df['Min_EpochTime'].min() - ed_time = df['Max_EpochTime'].max() - node_count = df.Node.unique().shape[0] - - f.write(f"{jobid},{key},{st_time},{ed_time},{node_count}\n") - processed_job_ids.add(jobid) - print(f"Processed and wrote: {key}") - except Exception as e: - print(f"Error processing {key}: {e}") - continue - print(f"Indexing complete. Data saved to {output_csv_path}") - -# MIT S3 bucket address. -bucket_name = 'mit-supercloud-dataset' -prefix = 'datacenter-challenge/202201/' - -# Get the list of S3 file names and sizes and save (unless its already there) -fyle = mit_dir + '/source_data/file_list.csv' - -# Create the file list if its not there already -if not os.path.exists(fyle): - file_names, file_sizes_gb = list_s3_files_and_sizes(bucket_name, prefix) - # Open a file in write mode - with open(fyle, "w") as file: - # Write the header (optional) - file.write("File Name\tSize (MB)\n") - # Iterate over both lists and write each file name and its size - for name, size in zip(file_names, file_sizes_gb): - file.write(f"{name}\t{size:.2f} \n") - -# Download the following root dir files (always) -s3 = boto3.client('s3', config=Config(signature_version=UNSIGNED), verify=False) -dfiles = ['LICENSE','README.md','labelled_job_stats.csv','labelled_jobids.csv' - ,'node-data.csv','slurm-log.csv','tres-mapping.txt'] - -for s in dfiles: - fyle = os.path.join(mit_dir +'/source_data', os.path.basename(s)) - if not os.path.exists(fyle): - s3.download_file(bucket_name, prefix + s, fyle) - -# Create the job-user-date index file if it doesnt exist already. -datadir = mit_dir + '/source_data' -fyle = mit_dir + '/source_data/job_user_date.csv' -# Check if file exists -if not os.path.exists(fyle): - print('This can take about 24 hours to complete.') - index_summary_file(bucket_name, prefix, fyle) -else: - job_index_df = pd.read_csv(fyle) - -fyle = mit_dir + '/source_data/job_user_date_full.csv' -if not os.path.exists(fyle): - # Open the slurm log to get the user id for each job. - slurm_df = pd.read_csv(mit_dir + '/source_data/slurm-log.csv') - # Cut out all but the user job mapping - slurm_df = slurm_df[['id_job','id_user']] - - final_df = job_index_df.merge(slurm_df, left_on='job_id', right_on='id_job', how='left') - final_df.to_csv(fyle, index=False) -print('Pre-processing to create an index linking jobs and users to dates is now complete and can be found in the file ') -print(fyle) - - -print('The MIT supercloud is now set up, the paper describing the dataset can be found in /papers') -print('The slurm-log and node data has been downloaded. However no cpu or gpu job traces have been downloaded. As there are 2TB of these we have created a script called create_trace.py to allow you to download and select a subset of the data dependent on time.') - diff --git a/raps/dataloaders/mit_supercloud/utils.py b/raps/dataloaders/mit_supercloud/utils.py new file mode 100644 index 0000000..fbddaeb --- /dev/null +++ b/raps/dataloaders/mit_supercloud/utils.py @@ -0,0 +1,117 @@ +import os +import re +from datetime import datetime +import pandas as pd +from tqdm import tqdm +import boto3 +from botocore import UNSIGNED +from botocore.client import Config + + +def load_slurm_log(slurm_path: str, start_date: str, end_date: str): + """ + Load Slurm log and filter jobs by submission window. + + Args: + slurm_path: Path to local slurm-log.csv + start_date: "DDMMYYYY" inclusive start + end_date: "DDMMYYYY" exclusive end + + Returns: + tuple( + pandas.DataFrame filtered on date window, + set of CPU-only job IDs, + set of GPU-using job IDs + ) + """ + df = pd.read_csv(slurm_path) + # Convert submit times + df['time_submit'] = pd.to_datetime(df['time_submit'], unit='s') + dt0 = datetime.strptime(start_date, "%d%m%Y") + dt1 = datetime.strptime(end_date, "%d%m%Y") + window = df[(df['time_submit'] >= dt0) & (df['time_submit'] < dt1)] + + # Detect GPU jobs via gres_used or tres_alloc + gres = window['gres_used'].fillna("").astype(str) + tres = window['tres_alloc'].fillna("").astype(str) + gpu_jobs = set( + window.loc[ + gres.str.contains("gpu", case=False) | + tres.str.contains(r"(?:1001|1002)=", regex=True), + 'id_job' + ] + ) + cpu_jobs = set(window['id_job']) - gpu_jobs + return window, cpu_jobs, gpu_jobs + + +def build_or_load_manifest(s3, bucket: str, prefix: str, manifest_path: str): + """ + Build a one-time manifest of all .csv keys under cpu/ and gpu/ in S3, + or load an existing manifest from disk. + + Args: + s3: boto3 S3 client + bucket: S3 bucket name + prefix: S3 dataset root prefix (e.g. "datacenter-challenge/202201/") + manifest_path: local path to cache the manifest + + Returns: + List[str]: all S3 keys ending in .csv under cpu/ and gpu/ + """ + if os.path.exists(manifest_path): + with open(manifest_path, 'r') as f: + return [line.strip() for line in f] + + # Otherwise build manifest + keys = [] + paginator = s3.get_paginator('list_objects_v2') + for kind in ('cpu', 'gpu'): + pfx = prefix + f"{kind}/" + for page in tqdm( + paginator.paginate(Bucket=bucket, Prefix=pfx), + desc=f"Listing {kind} pages", unit="page" + ): + for obj in page.get('Contents', []): + key = obj['Key'] + if key.lower().endswith('.csv'): + keys.append(key) + # Cache on disk + os.makedirs(os.path.dirname(manifest_path), exist_ok=True) + with open(manifest_path, 'w') as f: + for key in keys: + f.write(key + '\n') + return keys + + +def filter_keys_by_jobs(all_keys: list, job_ids: set): + """ + Filter a list of S3 keys to those belonging to specified job IDs. + + Args: + all_keys: list of S3 keys from manifest + job_ids: set of job IDs (int) + + Returns: + List[str] of keys matching CPU or GPU jobs + """ + selected = [] + gpu_pattern = re.compile(r'-r(\d+)-') + for key in all_keys: + # CPU keys: prefix/jobid-...-timeseries.csv or -summary.csv + if '/cpu/' in key: + fname = os.path.basename(key) + parts = fname.split('-', 1) + try: + jid = int(parts[0]) + except ValueError: + continue + if jid in job_ids: + selected.append(key) + # GPU keys: detect -r- in filename + elif '/gpu/' in key: + fname = os.path.basename(key) + m = gpu_pattern.search(fname) + if m and int(m.group(1)) in job_ids: + selected.append(key) + return selected -- GitLab From 8d24e5d69c47b32d3cdd978aff84b8dacc62143f Mon Sep 17 00:00:00 2001 From: Wes Brewer Date: Fri, 18 Jul 2025 15:20:39 -0400 Subject: [PATCH 164/388] Update start/end dates for MITSC to use ISO formats date+time, e.g. 2021-05-21T13:30 --- README.md | 18 ++++++++++++++---- raps/dataloaders/mit_supercloud/cli.py | 13 +++++++++++-- raps/dataloaders/mit_supercloud/loader.py | 2 ++ raps/dataloaders/mit_supercloud/utils.py | 16 ++++++++++++++-- 4 files changed, 41 insertions(+), 8 deletions(-) diff --git a/README.md b/README.md index f1fd108..72fe94c 100644 --- a/README.md +++ b/README.md @@ -51,15 +51,25 @@ For Google cluster trace v2 # analyze dataset python -m raps.telemetry --system gcloudv2 -f ~/data/gcloud/v2/google_cluster_data_2011_sample -v + + + + ++ # Re-run simulation using npz files (much faster load) + For MIT Supercloud + # Following is the directory that contains slurm-log.csv and cpu and gpu directories + DPATH=/path/to/mit/data - # download the dataset - python -m raps.dataloaders.mit_supercloud.cli download --start 21052021 --end 22052021 \ - --outdir /path/to/mit + # Download the dataset + python -m raps.dataloaders.mit_supercloud.cli download --start 2021-05-21T13:30 --end 2021-05-21T14:00 + # this will dump output to `source_data` directory, or can specify directory using `--outdir` - python multi-part-sim.py -x 'mit_supercloud/*' -f /path/to/mit_supercloud/datacenter-challenge --system mit_supercloud + # Load data and run simulation - will save data as part-cpu.npz and part-gpu.npz files + python multi-part-sim.py -x 'mit_supercloud/*' -f $DPATH --system mit_supercloud + # Re-run simulation using npz files (much faster load) python multi-part-sim.py -x mit_supercloud/* -f part-*.npz --system mit_supercloud ## Perform Network Simulation diff --git a/raps/dataloaders/mit_supercloud/cli.py b/raps/dataloaders/mit_supercloud/cli.py index cbeadf1..9daccf7 100644 --- a/raps/dataloaders/mit_supercloud/cli.py +++ b/raps/dataloaders/mit_supercloud/cli.py @@ -1,14 +1,23 @@ import argparse from .download import download from .loader import load_data +from .utils import DEFAULT_START, DEFAULT_END def main(): p = argparse.ArgumentParser(prog="mit_supercloud") subs = p.add_subparsers(dest="cmd", required=True) common = argparse.ArgumentParser(add_help=False) - common.add_argument("--start", default="21052021") - common.add_argument("--end", default="22052021") + common.add_argument( + '--start', '-s', + default=DEFAULT_START, + help="Start datetime, in ISO format (e.g. '2021-05-21T13:30'), default midnight." + ) + common.add_argument( + '--end', '-e', + default=DEFAULT_END, + help="End datetime, in ISO format (e.g. '2021-05-21T16:45')." + ) common.add_argument("--partition", choices=["all","part-cpu","part-gpu"], default="all") common.add_argument("--outdir", default="source_data") common.add_argument("--bucket", default="mit-supercloud-dataset") diff --git a/raps/dataloaders/mit_supercloud/loader.py b/raps/dataloaders/mit_supercloud/loader.py index b75a9be..d4653f0 100644 --- a/raps/dataloaders/mit_supercloud/loader.py +++ b/raps/dataloaders/mit_supercloud/loader.py @@ -17,6 +17,8 @@ from scipy.sparse import csr_matrix as csr from tqdm import tqdm from raps.job import job_dict +from .utils import DEFAULT_START, DEFAULT_END + def proc_cpu_series(dfi): dfi = dfi[~dfi.Step.isin([-1, -4, '-1', '-4'])].copy() diff --git a/raps/dataloaders/mit_supercloud/utils.py b/raps/dataloaders/mit_supercloud/utils.py index fbddaeb..7b1f4f6 100644 --- a/raps/dataloaders/mit_supercloud/utils.py +++ b/raps/dataloaders/mit_supercloud/utils.py @@ -7,6 +7,18 @@ import boto3 from botocore import UNSIGNED from botocore.client import Config +DEFAULT_START = "2021-05-21T00:00" +DEFAULT_END = "2021-05-22T00:00" + + +def _parse_dt(s: str) -> datetime: + try: + # handles 'YYYY-MM-DD' or 'YYYY-MM-DDTHH:MM[:SS]' + return datetime.fromisoformat(s) + except ValueError: + # legacy support for DDMMYYYY → midnight + return datetime.strptime(s, "%d%m%Y") + def load_slurm_log(slurm_path: str, start_date: str, end_date: str): """ @@ -27,8 +39,8 @@ def load_slurm_log(slurm_path: str, start_date: str, end_date: str): df = pd.read_csv(slurm_path) # Convert submit times df['time_submit'] = pd.to_datetime(df['time_submit'], unit='s') - dt0 = datetime.strptime(start_date, "%d%m%Y") - dt1 = datetime.strptime(end_date, "%d%m%Y") + dt0 = _parse_dt(start_date) + dt1 = _parse_dt(end_date) window = df[(df['time_submit'] >= dt0) & (df['time_submit'] < dt1)] # Detect GPU jobs via gres_used or tres_alloc -- GitLab From d338ccb1b96c0c9c7d613c4b038684776ba6064d Mon Sep 17 00:00:00 2001 From: Wes Brewer Date: Fri, 18 Jul 2025 15:32:00 -0400 Subject: [PATCH 165/388] Some refactoring and cleanup - move proc_[cg]pu_series to utils.py --- README.md | 11 +-- raps/dataloaders/mit_supercloud/loader.py | 115 ++-------------------- raps/dataloaders/mit_supercloud/utils.py | 108 +++++++++++++++++++- 3 files changed, 113 insertions(+), 121 deletions(-) diff --git a/README.md b/README.md index 72fe94c..2c89308 100644 --- a/README.md +++ b/README.md @@ -51,20 +51,15 @@ For Google cluster trace v2 # analyze dataset python -m raps.telemetry --system gcloudv2 -f ~/data/gcloud/v2/google_cluster_data_2011_sample -v - - - - -+ # Re-run simulation using npz files (much faster load) - For MIT Supercloud # Following is the directory that contains slurm-log.csv and cpu and gpu directories DPATH=/path/to/mit/data - # Download the dataset + # Download the dataset - note the first time will build a file-manifest.txt file with all the files on S3 + # this will take some time, but subsequent calls should be much faster. + # Also, this command will dump output to `source_data` directory, or can specify directory using `--outdir` python -m raps.dataloaders.mit_supercloud.cli download --start 2021-05-21T13:30 --end 2021-05-21T14:00 - # this will dump output to `source_data` directory, or can specify directory using `--outdir` # Load data and run simulation - will save data as part-cpu.npz and part-gpu.npz files python multi-part-sim.py -x 'mit_supercloud/*' -f $DPATH --system mit_supercloud diff --git a/raps/dataloaders/mit_supercloud/loader.py b/raps/dataloaders/mit_supercloud/loader.py index d4653f0..0a6a2ad 100644 --- a/raps/dataloaders/mit_supercloud/loader.py +++ b/raps/dataloaders/mit_supercloud/loader.py @@ -5,118 +5,15 @@ MIT Supercloud job trace processing module with load_data function. """ import os -import shutil -import sys -from datetime import datetime import math -from types import SimpleNamespace - -import numpy as np import pandas as pd -from scipy.sparse import csr_matrix as csr -from tqdm import tqdm +from datetime import datetime +from types import SimpleNamespace +from tqdm import tqdm from raps.job import job_dict -from .utils import DEFAULT_START, DEFAULT_END - - -def proc_cpu_series(dfi): - dfi = dfi[~dfi.Step.isin([-1, -4, '-1', '-4'])].copy() - dfi['CPUUtilization'] = dfi['CPUUtilization'].fillna(0) / 100.0 - - t = pd.to_datetime(dfi.EpochTime, unit='s') - start_time = t.min() - dfi['t'] = ((t - start_time).dt.total_seconds() // 10).astype(int) - dfi['sid'] = pd.factorize(dfi.Step)[0] - - useries = dfi.Series.unique() - inds = np.arange(dfi.t.max() + 1) - df = pd.DataFrame({'t': inds}) - Xm, Xrss, Xvm, Xreadmb, Xwritemb = (np.zeros((len(useries), len(inds))) for _ in range(5)) - - for cnt, i in enumerate(useries): - sift = dfi.Series == i - M, N = len(inds), dfi.sid[sift].max() + 1 - - for metric, arr, name in zip( - ['CPUUtilization', 'RSS', 'VMSize', 'ReadMB', 'WriteMB'], - [Xm, Xrss, Xvm, Xreadmb, Xwritemb], - ['cpu', 'rss', 'vm', 'readmb', 'writemb'] - ): - X = csr((dfi.loc[sift, metric], (dfi.loc[sift, 't'], dfi.loc[sift, 'sid'])), shape=(M, N)) - mm = np.array(X.max(axis=1).todense()).reshape(-1,) - df[f'{name}_{i}'] = mm - arr[cnt, :] = mm - - df['cpu_utilisation'] = Xm.mean(axis=0) - df['rss'] = Xrss.sum(axis=0) - df['vm'] = Xvm.sum(axis=0) - df['readmb'] = Xreadmb.sum(axis=0) - df['writemb'] = Xwritemb.sum(axis=0) - df['timestamp'] = start_time + pd.to_timedelta(df.t * 10, unit='s') - df['utime'] = df['timestamp'].astype('int64') // 10**9 - - return df - -def proc_gpu_series(cpu_df, dfi, gpu_cnt): - # 1) Build CPU time range - t_cpu_start = int(cpu_df.utime.min()) - t_cpu_end = int(cpu_df.utime.max()) - t_cpu = np.array([t_cpu_start, t_cpu_end, t_cpu_end - t_cpu_start]) - - # 2) Safely convert the GPU timestamps to integer seconds - # (this handles strings like "1621607266.426") - ts = pd.to_numeric(dfi["timestamp"], errors="coerce") # float64 or NaN - ts_int = ts.ffill().astype(float).astype(int) - t0, t1 = ts_int.min(), ts_int.max() - t_gpu = np.array([t0, t1, t1 - t0]) - - # 3) Sanity‐check the durations match within 10% - per_diff = ((t_cpu[1] - t_cpu[0]) - (t_gpu[1] - t_gpu[0])) / (t_gpu[1] - t_gpu[0]) * 100 - if abs(per_diff) > 10: - # warn and proceed — GPU trace may be trimmed or misaligned - tqdm.write(f"Warning: GPU‐CPU time mismatch {per_diff:.1f}% exceeds 10%; continuing anyway") - - # 4) Align GPU times onto CPU utime grid - # Use our integer‐second Series rather than the raw column - dfi["t_fixed"] = ts_int - ts_int.min() + t_cpu_start - - # 5) Prepare output DataFrame with a utime column - ugpus = dfi.gpu_index.unique() - gpu_df = pd.DataFrame({"utime": cpu_df["utime"].values}) - - # 6) Interpolate each GPU field onto the CPU utime grid - fields = [ - "utilization_gpu_pct", - "utilization_memory_pct", - "memory_free_MiB", - "memory_used_MiB", - "temperature_gpu", - "temperature_memory", - "power_draw_W", - ] - for field in fields: - # grab the float‐converted timestamp and the metric - x1 = ts_int.values - y1 = dfi[field].astype(float).values - xv = cpu_df["utime"].values - # numpy interpolation - gpu_df[field] = np.interp(xv, x1, y1) - - # 7) Rename the GPU pct, memory pct, and power columns with the device index - ren = { - "gpu_index": f"gpu_index_{gpu_cnt}", - "utilization_gpu_pct": f"gpu_util_{gpu_cnt}", - "utilization_memory_pct":f"gpu_mempct_{gpu_cnt}", - "memory_free_MiB": f"gpu_memfree_{gpu_cnt}", - "memory_used_MiB": f"gpu_memused_{gpu_cnt}", - "temperature_gpu": f"gpu_temp_{gpu_cnt}", - "temperature_memory": f"gpu_memtemp_{gpu_cnt}", - "power_draw_W": f"gpu_power_{gpu_cnt}", - } - gpu_df.rename(columns=ren, inplace=True) - - return gpu_df, gpu_cnt + 1 +from .utils import proc_cpu_series, proc_gpu_series + def load_data(local_dataset_path, **kwargs): """ @@ -153,7 +50,7 @@ def load_data(local_dataset_path, **kwargs): # 2) date window start_ts = int(datetime.strptime(kwargs.get("start_date","21052021"), "%d%m%Y").timestamp()) end_ts = int(datetime.strptime(kwargs.get("end_date", "22052021"), "%d%m%Y").timestamp()) - duration = end_ts - start_ts + #duration = end_ts - start_ts sl = sl[(sl.time_submit >= start_ts) & (sl.time_submit < end_ts)] diff --git a/raps/dataloaders/mit_supercloud/utils.py b/raps/dataloaders/mit_supercloud/utils.py index 7b1f4f6..e6ba570 100644 --- a/raps/dataloaders/mit_supercloud/utils.py +++ b/raps/dataloaders/mit_supercloud/utils.py @@ -1,11 +1,11 @@ +import numpy as np import os import re -from datetime import datetime import pandas as pd + +from datetime import datetime +from scipy.sparse import csr_matrix as csr from tqdm import tqdm -import boto3 -from botocore import UNSIGNED -from botocore.client import Config DEFAULT_START = "2021-05-21T00:00" DEFAULT_END = "2021-05-22T00:00" @@ -127,3 +127,103 @@ def filter_keys_by_jobs(all_keys: list, job_ids: set): if m and int(m.group(1)) in job_ids: selected.append(key) return selected + + +def proc_cpu_series(dfi): + dfi = dfi[~dfi.Step.isin([-1, -4, '-1', '-4'])].copy() + dfi['CPUUtilization'] = dfi['CPUUtilization'].fillna(0) / 100.0 + + t = pd.to_datetime(dfi.EpochTime, unit='s') + start_time = t.min() + dfi['t'] = ((t - start_time).dt.total_seconds() // 10).astype(int) + dfi['sid'] = pd.factorize(dfi.Step)[0] + + useries = dfi.Series.unique() + inds = np.arange(dfi.t.max() + 1) + df = pd.DataFrame({'t': inds}) + Xm, Xrss, Xvm, Xreadmb, Xwritemb = (np.zeros((len(useries), len(inds))) for _ in range(5)) + + for cnt, i in enumerate(useries): + sift = dfi.Series == i + M, N = len(inds), dfi.sid[sift].max() + 1 + + for metric, arr, name in zip( + ['CPUUtilization', 'RSS', 'VMSize', 'ReadMB', 'WriteMB'], + [Xm, Xrss, Xvm, Xreadmb, Xwritemb], + ['cpu', 'rss', 'vm', 'readmb', 'writemb'] + ): + X = csr((dfi.loc[sift, metric], (dfi.loc[sift, 't'], dfi.loc[sift, 'sid'])), shape=(M, N)) + mm = np.array(X.max(axis=1).todense()).reshape(-1,) + df[f'{name}_{i}'] = mm + arr[cnt, :] = mm + + df['cpu_utilisation'] = Xm.mean(axis=0) + df['rss'] = Xrss.sum(axis=0) + df['vm'] = Xvm.sum(axis=0) + df['readmb'] = Xreadmb.sum(axis=0) + df['writemb'] = Xwritemb.sum(axis=0) + df['timestamp'] = start_time + pd.to_timedelta(df.t * 10, unit='s') + df['utime'] = df['timestamp'].astype('int64') // 10**9 + + return df + + +def proc_gpu_series(cpu_df, dfi, gpu_cnt): + # 1) Build CPU time range + t_cpu_start = int(cpu_df.utime.min()) + t_cpu_end = int(cpu_df.utime.max()) + t_cpu = np.array([t_cpu_start, t_cpu_end, t_cpu_end - t_cpu_start]) + + # 2) Safely convert the GPU timestamps to integer seconds + # (this handles strings like "1621607266.426") + ts = pd.to_numeric(dfi["timestamp"], errors="coerce") # float64 or NaN + ts_int = ts.ffill().astype(float).astype(int) + t0, t1 = ts_int.min(), ts_int.max() + t_gpu = np.array([t0, t1, t1 - t0]) + + # 3) Sanity‐check the durations match within 10% + per_diff = ((t_cpu[1] - t_cpu[0]) - (t_gpu[1] - t_gpu[0])) / (t_gpu[1] - t_gpu[0]) * 100 + if abs(per_diff) > 10: + # warn and proceed — GPU trace may be trimmed or misaligned + tqdm.write(f"Warning: GPU‐CPU time mismatch {per_diff:.1f}% exceeds 10%; continuing anyway") + + # 4) Align GPU times onto CPU utime grid + # Use our integer‐second Series rather than the raw column + dfi["t_fixed"] = ts_int - ts_int.min() + t_cpu_start + + # 5) Prepare output DataFrame with a utime column + #ugpus = dfi.gpu_index.unique() + gpu_df = pd.DataFrame({"utime": cpu_df["utime"].values}) + + # 6) Interpolate each GPU field onto the CPU utime grid + fields = [ + "utilization_gpu_pct", + "utilization_memory_pct", + "memory_free_MiB", + "memory_used_MiB", + "temperature_gpu", + "temperature_memory", + "power_draw_W", + ] + for field in fields: + # grab the float‐converted timestamp and the metric + x1 = ts_int.values + y1 = dfi[field].astype(float).values + xv = cpu_df["utime"].values + # numpy interpolation + gpu_df[field] = np.interp(xv, x1, y1) + + # 7) Rename the GPU pct, memory pct, and power columns with the device index + ren = { + "gpu_index": f"gpu_index_{gpu_cnt}", + "utilization_gpu_pct": f"gpu_util_{gpu_cnt}", + "utilization_memory_pct":f"gpu_mempct_{gpu_cnt}", + "memory_free_MiB": f"gpu_memfree_{gpu_cnt}", + "memory_used_MiB": f"gpu_memused_{gpu_cnt}", + "temperature_gpu": f"gpu_temp_{gpu_cnt}", + "temperature_memory": f"gpu_memtemp_{gpu_cnt}", + "power_draw_W": f"gpu_power_{gpu_cnt}", + } + gpu_df.rename(columns=ren, inplace=True) + + return gpu_df, gpu_cnt + 1 -- GitLab From fb024ad9401c0839e7ba52f9aeda28ab2a8a081f Mon Sep 17 00:00:00 2001 From: Wes Brewer Date: Fri, 18 Jul 2025 17:03:05 -0400 Subject: [PATCH 166/388] Improve interface when building file-manifest.txt in mit sc downloader --- raps/dataloaders/mit_supercloud/utils.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/raps/dataloaders/mit_supercloud/utils.py b/raps/dataloaders/mit_supercloud/utils.py index e6ba570..9697c3d 100644 --- a/raps/dataloaders/mit_supercloud/utils.py +++ b/raps/dataloaders/mit_supercloud/utils.py @@ -78,16 +78,20 @@ def build_or_load_manifest(s3, bucket: str, prefix: str, manifest_path: str): # Otherwise build manifest keys = [] paginator = s3.get_paginator('list_objects_v2') + total_pages = {'cpu': 791, 'gpu': 110} + progress = tqdm(total=sum(total_pages.values()), desc="Building file-manifest.txt", unit="page") + for kind in ('cpu', 'gpu'): pfx = prefix + f"{kind}/" - for page in tqdm( - paginator.paginate(Bucket=bucket, Prefix=pfx), - desc=f"Listing {kind} pages", unit="page" - ): + for page in paginator.paginate(Bucket=bucket, Prefix=pfx): for obj in page.get('Contents', []): key = obj['Key'] if key.lower().endswith('.csv'): keys.append(key) + progress.update(1) + + progress.close() + # Cache on disk os.makedirs(os.path.dirname(manifest_path), exist_ok=True) with open(manifest_path, 'w') as f: -- GitLab From f39287bda35d60030d27ec07ff8ac2193f231d99 Mon Sep 17 00:00:00 2001 From: Wes Brewer Date: Fri, 18 Jul 2025 17:27:03 -0400 Subject: [PATCH 167/388] Allow specification of --start and --end when running simulation --- README.md | 7 +++++-- raps/dataloaders/mit_supercloud/loader.py | 15 +++++++++++---- raps/dataloaders/mit_supercloud/utils.py | 13 ++++++++++--- 3 files changed, 26 insertions(+), 9 deletions(-) diff --git a/README.md b/README.md index 2c89308..fe815bd 100644 --- a/README.md +++ b/README.md @@ -59,10 +59,13 @@ For MIT Supercloud # Download the dataset - note the first time will build a file-manifest.txt file with all the files on S3 # this will take some time, but subsequent calls should be much faster. # Also, this command will dump output to `source_data` directory, or can specify directory using `--outdir` - python -m raps.dataloaders.mit_supercloud.cli download --start 2021-05-21T13:30 --end 2021-05-21T14:00 + python -m raps.dataloaders.mit_supercloud.cli download --start 2021-05-21T13:00 --end 2021-05-21T14:00 # Load data and run simulation - will save data as part-cpu.npz and part-gpu.npz files - python multi-part-sim.py -x 'mit_supercloud/*' -f $DPATH --system mit_supercloud + python multi-part-sim.py -x 'mit_supercloud/*' -f $DPATH --system mit_supercloud \ + --start 2021-05-21T13:00 --end 2021-05-21T14:00 + # Note: if no start, end dates provided will default to run 24 hours between + # 2021-05-21T00:00 to 2021-05-22T00:00 set by defaults in raps/dataloaders/mit_supercloud/utils.py # Re-run simulation using npz files (much faster load) python multi-part-sim.py -x mit_supercloud/* -f part-*.npz --system mit_supercloud diff --git a/raps/dataloaders/mit_supercloud/loader.py b/raps/dataloaders/mit_supercloud/loader.py index 0a6a2ad..4ebef2f 100644 --- a/raps/dataloaders/mit_supercloud/loader.py +++ b/raps/dataloaders/mit_supercloud/loader.py @@ -12,7 +12,8 @@ from datetime import datetime from types import SimpleNamespace from tqdm import tqdm from raps.job import job_dict -from .utils import proc_cpu_series, proc_gpu_series +from .utils import proc_cpu_series, proc_gpu_series, to_epoch +from .utils import DEFAULT_START, DEFAULT_END def load_data(local_dataset_path, **kwargs): @@ -48,11 +49,17 @@ def load_data(local_dataset_path, **kwargs): sl = pd.read_csv(slurm_path) # 2) date window - start_ts = int(datetime.strptime(kwargs.get("start_date","21052021"), "%d%m%Y").timestamp()) - end_ts = int(datetime.strptime(kwargs.get("end_date", "22052021"), "%d%m%Y").timestamp()) + start_ts = to_epoch(kwargs.get("start", DEFAULT_START)) + end_ts = to_epoch(kwargs.get("end", DEFAULT_END)) #duration = end_ts - start_ts - + sl = sl[(sl.time_submit >= start_ts) & (sl.time_submit < end_ts)] + # —— ERROR CATCH: no jobs in this window? —— + if sl.empty: + raise ValueError( + f"No SLURM jobs found between {kwargs.get('start_date')} and " + f"{kwargs.get('end_date')}. Please pick a range covered by the dataset." + ) # 3) detect GPU‐using jobs gres = sl.gres_used.fillna("").astype(str) diff --git a/raps/dataloaders/mit_supercloud/utils.py b/raps/dataloaders/mit_supercloud/utils.py index 9697c3d..49f455a 100644 --- a/raps/dataloaders/mit_supercloud/utils.py +++ b/raps/dataloaders/mit_supercloud/utils.py @@ -11,7 +11,14 @@ DEFAULT_START = "2021-05-21T00:00" DEFAULT_END = "2021-05-22T00:00" -def _parse_dt(s: str) -> datetime: +def to_epoch(s: str) -> int: + try: + return int(datetime.fromisoformat(s).timestamp()) + except ValueError: + return int(datetime.strptime(s, "%d%m%Y").timestamp()) + + +def parse_dt(s: str) -> datetime: try: # handles 'YYYY-MM-DD' or 'YYYY-MM-DDTHH:MM[:SS]' return datetime.fromisoformat(s) @@ -39,8 +46,8 @@ def load_slurm_log(slurm_path: str, start_date: str, end_date: str): df = pd.read_csv(slurm_path) # Convert submit times df['time_submit'] = pd.to_datetime(df['time_submit'], unit='s') - dt0 = _parse_dt(start_date) - dt1 = _parse_dt(end_date) + dt0 = parse_dt(start_date) + dt1 = parse_dt(end_date) window = df[(df['time_submit'] >= dt0) & (df['time_submit'] < dt1)] # Detect GPU jobs via gres_used or tres_alloc -- GitLab From 9ac68c52befcc1c717afcb7baf3b222e6327e713 Mon Sep 17 00:00:00 2001 From: Wes Brewer Date: Fri, 18 Jul 2025 17:27:03 -0400 Subject: [PATCH 168/388] Allow specification of --start and --end when running simulation (for MIT SC loader) --- README.md | 7 +++++-- raps/dataloaders/mit_supercloud/loader.py | 15 +++++++++++---- raps/dataloaders/mit_supercloud/utils.py | 13 ++++++++++--- 3 files changed, 26 insertions(+), 9 deletions(-) diff --git a/README.md b/README.md index 2c89308..fe815bd 100644 --- a/README.md +++ b/README.md @@ -59,10 +59,13 @@ For MIT Supercloud # Download the dataset - note the first time will build a file-manifest.txt file with all the files on S3 # this will take some time, but subsequent calls should be much faster. # Also, this command will dump output to `source_data` directory, or can specify directory using `--outdir` - python -m raps.dataloaders.mit_supercloud.cli download --start 2021-05-21T13:30 --end 2021-05-21T14:00 + python -m raps.dataloaders.mit_supercloud.cli download --start 2021-05-21T13:00 --end 2021-05-21T14:00 # Load data and run simulation - will save data as part-cpu.npz and part-gpu.npz files - python multi-part-sim.py -x 'mit_supercloud/*' -f $DPATH --system mit_supercloud + python multi-part-sim.py -x 'mit_supercloud/*' -f $DPATH --system mit_supercloud \ + --start 2021-05-21T13:00 --end 2021-05-21T14:00 + # Note: if no start, end dates provided will default to run 24 hours between + # 2021-05-21T00:00 to 2021-05-22T00:00 set by defaults in raps/dataloaders/mit_supercloud/utils.py # Re-run simulation using npz files (much faster load) python multi-part-sim.py -x mit_supercloud/* -f part-*.npz --system mit_supercloud diff --git a/raps/dataloaders/mit_supercloud/loader.py b/raps/dataloaders/mit_supercloud/loader.py index 0a6a2ad..4ebef2f 100644 --- a/raps/dataloaders/mit_supercloud/loader.py +++ b/raps/dataloaders/mit_supercloud/loader.py @@ -12,7 +12,8 @@ from datetime import datetime from types import SimpleNamespace from tqdm import tqdm from raps.job import job_dict -from .utils import proc_cpu_series, proc_gpu_series +from .utils import proc_cpu_series, proc_gpu_series, to_epoch +from .utils import DEFAULT_START, DEFAULT_END def load_data(local_dataset_path, **kwargs): @@ -48,11 +49,17 @@ def load_data(local_dataset_path, **kwargs): sl = pd.read_csv(slurm_path) # 2) date window - start_ts = int(datetime.strptime(kwargs.get("start_date","21052021"), "%d%m%Y").timestamp()) - end_ts = int(datetime.strptime(kwargs.get("end_date", "22052021"), "%d%m%Y").timestamp()) + start_ts = to_epoch(kwargs.get("start", DEFAULT_START)) + end_ts = to_epoch(kwargs.get("end", DEFAULT_END)) #duration = end_ts - start_ts - + sl = sl[(sl.time_submit >= start_ts) & (sl.time_submit < end_ts)] + # —— ERROR CATCH: no jobs in this window? —— + if sl.empty: + raise ValueError( + f"No SLURM jobs found between {kwargs.get('start_date')} and " + f"{kwargs.get('end_date')}. Please pick a range covered by the dataset." + ) # 3) detect GPU‐using jobs gres = sl.gres_used.fillna("").astype(str) diff --git a/raps/dataloaders/mit_supercloud/utils.py b/raps/dataloaders/mit_supercloud/utils.py index 9697c3d..49f455a 100644 --- a/raps/dataloaders/mit_supercloud/utils.py +++ b/raps/dataloaders/mit_supercloud/utils.py @@ -11,7 +11,14 @@ DEFAULT_START = "2021-05-21T00:00" DEFAULT_END = "2021-05-22T00:00" -def _parse_dt(s: str) -> datetime: +def to_epoch(s: str) -> int: + try: + return int(datetime.fromisoformat(s).timestamp()) + except ValueError: + return int(datetime.strptime(s, "%d%m%Y").timestamp()) + + +def parse_dt(s: str) -> datetime: try: # handles 'YYYY-MM-DD' or 'YYYY-MM-DDTHH:MM[:SS]' return datetime.fromisoformat(s) @@ -39,8 +46,8 @@ def load_slurm_log(slurm_path: str, start_date: str, end_date: str): df = pd.read_csv(slurm_path) # Convert submit times df['time_submit'] = pd.to_datetime(df['time_submit'], unit='s') - dt0 = _parse_dt(start_date) - dt1 = _parse_dt(end_date) + dt0 = parse_dt(start_date) + dt1 = parse_dt(end_date) window = df[(df['time_submit'] >= dt0) & (df['time_submit'] < dt1)] # Detect GPU jobs via gres_used or tres_alloc -- GitLab From 40abfcf5eb59a191bc725f84a47d91df9054700a Mon Sep 17 00:00:00 2001 From: Wes Brewer Date: Fri, 18 Jul 2025 18:40:21 -0400 Subject: [PATCH 169/388] Get RAPS working with and without multitenancy as specified by flag in scheduler.json (just starting) --- config/mit_supercloud/part-cpu/scheduler.json | 1 + config/mit_supercloud/part-gpu/scheduler.json | 1 + main.py | 17 ++++++++--------- raps/config.py | 3 +++ 4 files changed, 13 insertions(+), 9 deletions(-) diff --git a/config/mit_supercloud/part-cpu/scheduler.json b/config/mit_supercloud/part-cpu/scheduler.json index 3f081cc..2b9c850 100644 --- a/config/mit_supercloud/part-cpu/scheduler.json +++ b/config/mit_supercloud/part-cpu/scheduler.json @@ -1,4 +1,5 @@ { + "multitenant": true, "SEED": 42, "JOB_ARRIVAL_TIME": 900, "MTBF": 11, diff --git a/config/mit_supercloud/part-gpu/scheduler.json b/config/mit_supercloud/part-gpu/scheduler.json index 937b71d..ee96c92 100644 --- a/config/mit_supercloud/part-gpu/scheduler.json +++ b/config/mit_supercloud/part-gpu/scheduler.json @@ -1,4 +1,5 @@ { + "multitenant": true, "SEED": 42, "JOB_ARRIVAL_TIME": 900, "MTBF": 11, diff --git a/main.py b/main.py index d7b81cc..118fc48 100644 --- a/main.py +++ b/main.py @@ -175,15 +175,14 @@ print(f'Simulating {len(jobs)} jobs for {total_timesteps} seconds') layout_manager = LayoutManager(args.layout, engine=sc, debug=args.debug, total_timesteps=total_timesteps, **config) layout_manager.run(jobs, timestep_start=timestep_start, timestep_end=timestep_end) - # Get comprehensive simulation statistics - simulation_stats = sc.get_stats() - - # Print a formatted report - print("\n--- Simulation Report ---") - for key, value in simulation_stats.items(): - print(f"{key.replace('_', ' ').title()}: {value}") - print("-------------------------") - +# Get comprehensive simulation statistics +simulation_stats = sc.get_stats() + +# Print a formatted report +print("\n--- Simulation Report ---") +for key, value in simulation_stats.items(): + print(f"{key.replace('_', ' ').title()}: {value}") +print("-------------------------") if args.plot: if 'power' in args.plot: diff --git a/raps/config.py b/raps/config.py index 07bbdb7..1828c0b 100644 --- a/raps/config.py +++ b/raps/config.py @@ -65,6 +65,9 @@ class ConfigManager: down_nodes.extend(range(start_node_id, end_node_id)) self.config['DOWN_NODES'] = down_nodes + # Default multitenancy to False, unless explicitly set to True + self.config['multitenant'] = bool(self.config.get("multitenant", False)) + self.config['AVAILABLE_NODES'] = self.config['TOTAL_NODES'] - len(down_nodes) def get(self, key: str) -> Any: -- GitLab From 8c5681c906c8387193c4a27a3b2ca20a4677c0a3 Mon Sep 17 00:00:00 2001 From: Wes Brewer Date: Fri, 18 Jul 2025 19:35:17 -0400 Subject: [PATCH 170/388] Add CORES_PER_CPU option to each system.json. Modify resmgr.py to support both multitenancy and whole-node resource allocation --- config/40frontiers/system.json | 1 + config/adastraMI250/system.json | 1 + config/frontier/system.json | 1 + config/fugaku/system.json | 1 + config/gcloudv2/system.json | 1 + config/marconi100/system.json | 1 + config/setonix/part-cpu/system.json | 1 + config/setonix/part-gpu/system.json | 1 + config/summit/system.json | 1 + raps/resmgr.py | 253 ++++++++++++++++++---------- 10 files changed, 174 insertions(+), 88 deletions(-) diff --git a/config/40frontiers/system.json b/config/40frontiers/system.json index 51add94..dc19251 100644 --- a/config/40frontiers/system.json +++ b/config/40frontiers/system.json @@ -11,6 +11,7 @@ "NODES_PER_RECTIFIER": 4, "MISSING_RACKS": [41], "DOWN_NODES": [], + "CORES_PER_CPU": 64, "CPUS_PER_NODE": 1, "GPUS_PER_NODE": 4, "CPU_PEAK_FLOPS": 2048E9, diff --git a/config/adastraMI250/system.json b/config/adastraMI250/system.json index 36a689e..c5268f8 100644 --- a/config/adastraMI250/system.json +++ b/config/adastraMI250/system.json @@ -11,6 +11,7 @@ "NODES_PER_RECTIFIER": 4, "MISSING_RACKS": [], "DOWN_NODES": [356,357,358,359,360,361,362,363,364,365,366,367,368,369,370,371,372,373,374,375,376,377,378,379,380,381,382,383], + "CORES_PER_CPU": 64, "CPUS_PER_NODE": 1, "GPUS_PER_NODE": 8, "CPU_PEAK_FLOPS": 2048E9, diff --git a/config/frontier/system.json b/config/frontier/system.json index b1b9d76..ced62d9 100644 --- a/config/frontier/system.json +++ b/config/frontier/system.json @@ -11,6 +11,7 @@ "NODES_PER_RECTIFIER": 4, "MISSING_RACKS": [41], "DOWN_NODES": [], + "CORES_PER_CPU": 64, "CPUS_PER_NODE": 1, "GPUS_PER_NODE": 4, "CPU_PEAK_FLOPS": 2048E9, diff --git a/config/fugaku/system.json b/config/fugaku/system.json index 6a0e63a..5310f2b 100644 --- a/config/fugaku/system.json +++ b/config/fugaku/system.json @@ -11,6 +11,7 @@ "NODES_PER_RECTIFIER": 48, "MISSING_RACKS": [], "DOWN_NODES": [], + "CORES_PER_CPU": 48, "CPUS_PER_NODE": 1, "GPUS_PER_NODE": 0, "CPU_PEAK_FLOPS": 3.379E12, diff --git a/config/gcloudv2/system.json b/config/gcloudv2/system.json index 4b6fc7b..229617e 100644 --- a/config/gcloudv2/system.json +++ b/config/gcloudv2/system.json @@ -11,6 +11,7 @@ "NODES_PER_RECTIFIER": 4, "MISSING_RACKS": [], "DOWN_NODES": [], + "CORES_PER_CPU": 20, "CPUS_PER_NODE": 1, "GPUS_PER_NODE": 0, "CPU_PEAK_FLOPS": 2048E9, diff --git a/config/marconi100/system.json b/config/marconi100/system.json index 435c87c..38a2057 100644 --- a/config/marconi100/system.json +++ b/config/marconi100/system.json @@ -11,6 +11,7 @@ "NODES_PER_RECTIFIER": 4, "MISSING_RACKS": [49, 50], "DOWN_NODES": [], + "CORES_PER_CPU": 24, "CPUS_PER_NODE": 2, "GPUS_PER_NODE": 4, "CPU_PEAK_FLOPS": 396.8E9, diff --git a/config/setonix/part-cpu/system.json b/config/setonix/part-cpu/system.json index 79da14f..94442c1 100644 --- a/config/setonix/part-cpu/system.json +++ b/config/setonix/part-cpu/system.json @@ -11,6 +11,7 @@ "NODES_PER_RECTIFIER": 4, "MISSING_RACKS": [], "DOWN_NODES": [1600, 1601, 1602, 1603, 1604, 1605, 1606, 1607, 1608, 1609, 1610, 1611, 1612, 1613, 1614, 1615, 1616, 1617, 1618, 1619, 1620, 1621, 1622, 1623, 1624, 1625, 1626, 1627, 1628, 1629, 1630, 1631, 1632, 1633, 1634, 1635, 1636, 1637, 1638, 1639, 1640, 1641, 1642, 1643, 1644, 1645, 1646, 1647, 1648, 1649, 1650, 1651, 1652, 1653, 1654, 1655, 1656, 1657, 1658, 1659, 1660, 1661, 1662, 1663, 1664, 1665, 1666, 1667, 1668, 1669, 1670, 1671, 1672, 1673, 1674, 1675, 1676, 1677, 1678, 1679, 1680, 1681, 1682, 1683, 1684, 1685, 1686, 1687, 1688, 1689, 1690, 1691, 1692, 1693, 1694, 1695, 1696, 1697, 1698, 1699, 1700, 1701, 1702, 1703, 1704, 1705, 1706, 1707, 1708, 1709, 1710, 1711, 1712, 1713, 1714, 1715, 1716, 1717, 1718, 1719, 1720, 1721, 1722, 1723, 1724, 1725, 1726, 1727, 1728, 1729, 1730, 1731, 1732, 1733, 1734, 1735, 1736, 1737, 1738, 1739, 1740, 1741, 1742, 1743, 1744, 1745, 1746, 1747, 1748, 1749, 1750, 1751, 1752, 1753, 1754, 1755, 1756, 1757, 1758, 1759, 1760, 1761, 1762, 1763, 1764, 1765, 1766, 1767, 1768, 1769, 1770, 1771, 1772, 1773, 1774, 1775, 1776, 1777, 1778, 1779, 1780, 1781, 1782, 1783, 1784, 1785, 1786, 1787, 1788, 1789, 1790, 1791], + "CORES_PER_CPU": 64, "CPUS_PER_NODE": 2, "GPUS_PER_NODE": 0, "CPU_PEAK_FLOPS": 2.50944E12, diff --git a/config/setonix/part-gpu/system.json b/config/setonix/part-gpu/system.json index 6ffa5a4..f524cc2 100644 --- a/config/setonix/part-gpu/system.json +++ b/config/setonix/part-gpu/system.json @@ -11,6 +11,7 @@ "NODES_PER_RECTIFIER": 4, "MISSING_RACKS": [], "DOWN_NODES": [192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255], + "CORES_PER_CPU": 64, "CPUS_PER_NODE": 1, "GPUS_PER_NODE": 4, "CPU_PEAK_FLOPS": 2048E9, diff --git a/config/summit/system.json b/config/summit/system.json index 74ba3b7..c3b6102 100644 --- a/config/summit/system.json +++ b/config/summit/system.json @@ -10,6 +10,7 @@ "NODES_PER_RECTIFIER": 4, "MISSING_RACKS": [257, 258, 259, 260, 261, 262, 263, 264, 265, 266, 267, 268, 269, 270, 271], "DOWN_NODES": [], + "CORES_PER_CPU": 22, "CPUS_PER_NODE": 2, "GPUS_PER_NODE": 6, "CPU_PEAK_FLOPS": 436.2E9, diff --git a/raps/resmgr.py b/raps/resmgr.py index e8ffc52..df92975 100644 --- a/raps/resmgr.py +++ b/raps/resmgr.py @@ -5,123 +5,200 @@ from scipy.stats import weibull_min class ResourceManager: def __init__(self, total_nodes, down_nodes, config): - self.total_nodes = total_nodes - self.config = config - self.down_nodes = set(down_nodes) - self.nodes = [] - # Initialize nodes based on config parameters - total_cpu_cores_per_node = self.config['CPUS_PER_NODE'] * self.config['CORES_PER_CPU'] - total_gpu_units_per_node = self.config['GPUS_PER_NODE'] + self.total_nodes = total_nodes + self.config = config + self.multitenant = bool(self.config.get("multitenant", False)) + self.down_nodes = set(down_nodes) + + # Track allocated resources for querying + self.allocated_cpu_cores = 0 + self.allocated_gpu_units = 0 + self.sys_util_history = [] + # Compute per-node capacities: support multiple config styles + if 'CORES_PER_CPU' in config and 'CPUS_PER_NODE' in config: + # CPUS_PER_NODE = # sockets, CORES_PER_CPU = cores per socket + total_cpu = config['CPUS_PER_NODE'] * config['CORES_PER_CPU'] + else: + # Either flat CORES_PER_NODE or CPUS_PER_NODE used as total cores + total_cpu = config.get('CORES_PER_NODE', config.get('CPUS_PER_NODE', 0)) + total_gpu = config.get('GPUS_PER_NODE', 0) + + # Build a unified node list (always present) so engine can inspect it + self.nodes = [] for i in range(self.total_nodes): is_down = i in self.down_nodes self.nodes.append({ 'id': i, - 'total_cpu_cores': total_cpu_cores_per_node, - 'available_cpu_cores': 0 if is_down else total_cpu_cores_per_node, - 'total_gpu_units': total_gpu_units_per_node, - 'available_gpu_units': 0 if is_down else total_gpu_units_per_node, - 'is_down': is_down + 'total_cpu_cores': total_cpu, + 'available_cpu_cores': 0 if is_down else total_cpu, + 'total_gpu_units': total_gpu, + 'available_gpu_units': 0 if is_down else total_gpu, + 'is_down': is_down }) - # Available nodes are now tracked by their available resources - self.available_nodes = [node['id'] for node in self.nodes if not node['is_down']] - self.sys_util_history = [] - self.allocated_cpu_cores = 0 + # Legacy whole-node allocation tracking + if not self.multitenant: + self.available_nodes = [node['id'] for node in self.nodes if not node['is_down']] - def assign_nodes_to_job(self, job, current_time, node_id): - """Assigns resources (cores, GPUs) to a job and updates the available resources.""" - # For multitenancy, a job is assigned to a single node. - # We need to find a node that can satisfy the job's resource requirements. + def assign_nodes_to_job(self, job, current_time, node_id=None): + if not self.multitenant: + return self._assign_whole_node(job, current_time) + # Multitenant allocation path found_node = None - # Use the provided node_id directly - if node_id is not None and node_id < len(self.nodes) and not self.nodes[node_id]['is_down']: + # Try specific node_id if provided + if node_id is not None and 0 <= node_id < len(self.nodes): node = self.nodes[node_id] - if (node['available_cpu_cores'] >= job.cpu_cores_required and - node['available_gpu_units'] >= job.gpu_units_required): + if (not node['is_down'] and + node['available_cpu_cores'] >= job.cpu_cores_required and + node['available_gpu_units'] >= job.gpu_units_required): found_node = node + # Fallback: scan all nodes if found_node is None: - raise ValueError(f"Not enough available resources to schedule job {job.id} on node {node_id}.") + for node in self.nodes: + if (not node['is_down'] and + node['available_cpu_cores'] >= job.cpu_cores_required and + node['available_gpu_units'] >= job.gpu_units_required): + found_node = node + break - # Allocate resources on the found node + if found_node is None: + raise ValueError(f"Not enough available resources to schedule job {job.id}.") + + # Allocate resources found_node['available_cpu_cores'] -= job.cpu_cores_required found_node['available_gpu_units'] -= job.gpu_units_required + job.scheduled_nodes = [found_node['id']] + job.allocated_cpu_cores = job.cpu_cores_required + job.allocated_gpu_units = job.gpu_units_required + self.allocated_cpu_cores += job.cpu_cores_required + self.allocated_gpu_units += job.gpu_units_required - # Assign the node and allocated resources to the job - job.scheduled_nodes = [found_node['id']] - job.allocated_cpu_cores = job.cpu_cores_required - job.allocated_gpu_units = job.gpu_units_required - - # Set job start and end times according to simulation job.start_time = current_time - job.end_time = current_time + job.wall_time - job.state = JobState.RUNNING # Mark job as running + job.end_time = current_time + job.wall_time + job.state = JobState.RUNNING + + def _assign_whole_node(self, job, current_time): + # Legacy whole-node allocation supporting explicit list or count-based mode + # 1) If replaying specific nodes, use requested_nodes + if getattr(job, 'requested_nodes', None): + take = len(job.requested_nodes) + picks = job.requested_nodes + # 2) If the job carries a nodes_alloc attribute, honor it + elif hasattr(job, 'nodes_alloc'): + take = job.nodes_alloc + picks = self.available_nodes[:take] + # 3) Otherwise fall back to nodes_required + else: + take = job.nodes_required + picks = self.available_nodes[:take] + + # Ensure we have enough free nodes + if take > len(self.available_nodes): + raise ValueError(f"Not enough available nodes to schedule job {job.id}: " + f"needs {take}, only {len(self.available_nodes)} free") + + # Allocate + job.scheduled_nodes = picks + self.available_nodes = [n for n in self.available_nodes if n not in picks] + job.start_time = current_time + job.end_time = current_time + job.wall_time + job.state = JobState.RUNNING def free_nodes_from_job(self, job): - """Frees the resources (cores, GPUs) that were allocated to a completed job.""" + """Frees the resources (whole-node or multitenant) allocated to a completed job.""" + if not self.multitenant: + # DEBUG: show what we're freeing + print(f"[DEBUG free] Job {job.id} releasing nodes: {getattr(job, 'scheduled_nodes', None)}") + print(f"[DEBUG free] Available before release: {self.available_nodes}") + self._free_whole_nodes(job) + print(f"[DEBUG free] Available after release: {self.available_nodes}") + return + + # Multitenant release path if hasattr(job, "scheduled_nodes") and job.scheduled_nodes: - node_id = job.scheduled_nodes[0] # Assuming a job is scheduled on a single node - if node_id < len(self.nodes): - node = self.nodes[node_id] - node['available_cpu_cores'] += job.allocated_cpu_cores - node['available_gpu_units'] += job.allocated_gpu_units + node_id = job.scheduled_nodes[0] + print(f"[DEBUG free] Job {job.id} releasing multitenant node: {node_id}") + node = self.nodes[node_id] if 0 <= node_id < len(self.nodes) else None + if node: + before_cpu = node['available_cpu_cores'] + before_gpu = node['available_gpu_units'] + node['available_cpu_cores'] += getattr(job, 'allocated_cpu_cores', 0) + node['available_gpu_units'] += getattr(job, 'allocated_gpu_units', 0) + self.allocated_cpu_cores -= getattr(job, 'allocated_cpu_cores', 0) + self.allocated_gpu_units -= getattr(job, 'allocated_gpu_units', 0) + print(f"[DEBUG free] Node {node_id} before (cpu,gpu)=({before_cpu},{before_gpu}), after=({node['available_cpu_cores']},{node['available_gpu_units']})") else: - print(f"Warning: Job {job.id} scheduled on non-existent node {node_id}. Cannot free resources.") - else: - # If job has no scheduled nodes, there is nothing to free. - pass + print(f"Warning: Job {job.id} scheduled on invalid node {node_id}") + + def _free_whole_nodes(self, job): + # Legacy free whole nodes + if hasattr(job, "scheduled_nodes"): + for n in job.scheduled_nodes: + if n not in self.available_nodes: + self.available_nodes.append(n) + self.available_nodes = sorted(self.available_nodes) def update_system_utilization(self, current_time, running_jobs): """ - Computes and records the system utilization based on allocated CPU cores and GPU units. + Computes and records the system utilization. + If running in whole-node mode, uses node-based utilization; otherwise uses core/GPU utilization. """ - total_cpu_cores = sum(node['total_cpu_cores'] for node in self.nodes) - total_gpu_units = sum(node['total_gpu_units'] for node in self.nodes) - - self.allocated_cpu_cores = sum(job.allocated_cpu_cores for job in running_jobs) - allocated_gpu_units = sum(job.allocated_gpu_units for job in running_jobs) - - cpu_utilization = (self.allocated_cpu_cores / total_cpu_cores) * 100 if total_cpu_cores else 0 - gpu_utilization = (allocated_gpu_units / total_gpu_units) * 100 if total_gpu_units else 0 - - # Determine utilization based on partition type (has GPUs or not) - if self.config.get('GPUS_PER_NODE', 0) > 0: - # This is a GPU partition, use GPU utilization - utilization = gpu_utilization - else: - # This is a CPU-only partition, use CPU utilization - utilization = cpu_utilization - - self.sys_util_history.append((current_time, utilization)) - return utilization + if not self.multitenant: + # Whole-node utilization: percentage of active nodes + num_active = len(running_jobs) + return self._update_whole_node_util(current_time, num_active) + + # Multitenant utilization: based on CPU/GPU usage + total_cpu = sum(n['total_cpu_cores'] for n in self.nodes) + total_gpu = sum(n['total_gpu_units'] for n in self.nodes) + allocated_cpu = self.allocated_cpu_cores + allocated_gpu = self.allocated_gpu_units + cpu_util = (allocated_cpu / total_cpu) * 100 if total_cpu else 0 + gpu_util = (allocated_gpu / total_gpu) * 100 if total_gpu else 0 + # Choose GPU utilization if GPUs are present + util = gpu_util if self.config.get('GPUS_PER_NODE', 0) > 0 else cpu_util + self.sys_util_history.append((current_time, util)) + return util + + def _update_whole_node_util(self, current_time, num_active_nodes): + operational = self.total_nodes - len(self.down_nodes) + util = (num_active_nodes / operational) * 100 if operational else 0 + self.sys_util_history.append((current_time, util)) + return util def node_failure(self, mtbf): - """Simulate node failure using Weibull distribution.""" - shape_parameter = 1.5 - scale_parameter = mtbf * 3600 # Convert to seconds - - # Create a NumPy array of node indices, excluding already down nodes - operational_node_ids = np.array([node['id'] for node in self.nodes if not node['is_down']]) - - if len(operational_node_ids) == 0: - return [] # No operational nodes to fail - - # Sample the Weibull distribution for all operational nodes at once - random_values = weibull_min.rvs(shape_parameter, scale=scale_parameter, size=len(operational_node_ids)) - - # Identify nodes that have failed (using a threshold for demonstration) - failure_threshold = 0.001 # This threshold might need tuning - failed_nodes_mask = random_values < failure_threshold - newly_downed_node_ids = operational_node_ids[failed_nodes_mask] - - # Update the state of the newly downed nodes in self.nodes - for node_id in newly_downed_node_ids: + if not self.multitenant: + # Legacy node failure sampling on whole nodes + available = np.array([n for n in range(self.total_nodes) if n not in self.down_nodes]) + if available.size == 0: + return [] + shape_param = 1.5 + scale_param = mtbf * 3600 + random_vals = weibull_min.rvs(shape_param, scale=scale_param, size=available.size) + failure_threshold = 0.001 + failed = available[random_vals < failure_threshold] + for nid in failed: + if nid in self.available_nodes: + self.available_nodes.remove(nid) + self.down_nodes.add(nid) + return failed.tolist() + + # Multitenant node failure sampling + operational_ids = np.array([n['id'] for n in self.nodes if not n['is_down']]) + if operational_ids.size == 0: + return [] + shape_param = 1.5 + scale_param = mtbf * 3600 + random_vals = weibull_min.rvs(shape_param, scale=scale_param, size=operational_ids.size) + failure_threshold = 0.001 + failed = operational_ids[random_vals < failure_threshold] + for node_id in failed: node = self.nodes[node_id] - node['is_down'] = True + node['is_down'] = True node['available_cpu_cores'] = 0 node['available_gpu_units'] = 0 - self.down_nodes.add(node_id) # Add to the set of down node IDs - - return newly_downed_node_ids.tolist() + self.down_nodes.add(node_id) + return failed.tolist() -- GitLab From dcd3f3d3e10b3495b720cc630d48d83c057cd8d0 Mon Sep 17 00:00:00 2001 From: Wes Brewer Date: Sun, 20 Jul 2025 16:18:54 -0400 Subject: [PATCH 171/388] Restructure resource manager to be a directory --- raps/resmgr.py | 204 ------------------------------------- raps/resmgr/__init__.py | 25 +++++ raps/resmgr/multitenant.py | 124 ++++++++++++++++++++++ raps/resmgr/whole_node.py | 92 +++++++++++++++++ 4 files changed, 241 insertions(+), 204 deletions(-) delete mode 100644 raps/resmgr.py create mode 100644 raps/resmgr/__init__.py create mode 100644 raps/resmgr/multitenant.py create mode 100644 raps/resmgr/whole_node.py diff --git a/raps/resmgr.py b/raps/resmgr.py deleted file mode 100644 index df92975..0000000 --- a/raps/resmgr.py +++ /dev/null @@ -1,204 +0,0 @@ -import numpy as np -from .job import JobState -from scipy.stats import weibull_min - - -class ResourceManager: - def __init__(self, total_nodes, down_nodes, config): - self.total_nodes = total_nodes - self.config = config - self.multitenant = bool(self.config.get("multitenant", False)) - self.down_nodes = set(down_nodes) - - # Track allocated resources for querying - self.allocated_cpu_cores = 0 - self.allocated_gpu_units = 0 - self.sys_util_history = [] - - # Compute per-node capacities: support multiple config styles - if 'CORES_PER_CPU' in config and 'CPUS_PER_NODE' in config: - # CPUS_PER_NODE = # sockets, CORES_PER_CPU = cores per socket - total_cpu = config['CPUS_PER_NODE'] * config['CORES_PER_CPU'] - else: - # Either flat CORES_PER_NODE or CPUS_PER_NODE used as total cores - total_cpu = config.get('CORES_PER_NODE', config.get('CPUS_PER_NODE', 0)) - total_gpu = config.get('GPUS_PER_NODE', 0) - - # Build a unified node list (always present) so engine can inspect it - self.nodes = [] - for i in range(self.total_nodes): - is_down = i in self.down_nodes - self.nodes.append({ - 'id': i, - 'total_cpu_cores': total_cpu, - 'available_cpu_cores': 0 if is_down else total_cpu, - 'total_gpu_units': total_gpu, - 'available_gpu_units': 0 if is_down else total_gpu, - 'is_down': is_down - }) - - # Legacy whole-node allocation tracking - if not self.multitenant: - self.available_nodes = [node['id'] for node in self.nodes if not node['is_down']] - - def assign_nodes_to_job(self, job, current_time, node_id=None): - if not self.multitenant: - return self._assign_whole_node(job, current_time) - - # Multitenant allocation path - found_node = None - # Try specific node_id if provided - if node_id is not None and 0 <= node_id < len(self.nodes): - node = self.nodes[node_id] - if (not node['is_down'] and - node['available_cpu_cores'] >= job.cpu_cores_required and - node['available_gpu_units'] >= job.gpu_units_required): - found_node = node - - # Fallback: scan all nodes - if found_node is None: - for node in self.nodes: - if (not node['is_down'] and - node['available_cpu_cores'] >= job.cpu_cores_required and - node['available_gpu_units'] >= job.gpu_units_required): - found_node = node - break - - if found_node is None: - raise ValueError(f"Not enough available resources to schedule job {job.id}.") - - # Allocate resources - found_node['available_cpu_cores'] -= job.cpu_cores_required - found_node['available_gpu_units'] -= job.gpu_units_required - job.scheduled_nodes = [found_node['id']] - job.allocated_cpu_cores = job.cpu_cores_required - job.allocated_gpu_units = job.gpu_units_required - self.allocated_cpu_cores += job.cpu_cores_required - self.allocated_gpu_units += job.gpu_units_required - - job.start_time = current_time - job.end_time = current_time + job.wall_time - job.state = JobState.RUNNING - - def _assign_whole_node(self, job, current_time): - # Legacy whole-node allocation supporting explicit list or count-based mode - # 1) If replaying specific nodes, use requested_nodes - if getattr(job, 'requested_nodes', None): - take = len(job.requested_nodes) - picks = job.requested_nodes - # 2) If the job carries a nodes_alloc attribute, honor it - elif hasattr(job, 'nodes_alloc'): - take = job.nodes_alloc - picks = self.available_nodes[:take] - # 3) Otherwise fall back to nodes_required - else: - take = job.nodes_required - picks = self.available_nodes[:take] - - # Ensure we have enough free nodes - if take > len(self.available_nodes): - raise ValueError(f"Not enough available nodes to schedule job {job.id}: " - f"needs {take}, only {len(self.available_nodes)} free") - - # Allocate - job.scheduled_nodes = picks - self.available_nodes = [n for n in self.available_nodes if n not in picks] - job.start_time = current_time - job.end_time = current_time + job.wall_time - job.state = JobState.RUNNING - - def free_nodes_from_job(self, job): - """Frees the resources (whole-node or multitenant) allocated to a completed job.""" - if not self.multitenant: - # DEBUG: show what we're freeing - print(f"[DEBUG free] Job {job.id} releasing nodes: {getattr(job, 'scheduled_nodes', None)}") - print(f"[DEBUG free] Available before release: {self.available_nodes}") - self._free_whole_nodes(job) - print(f"[DEBUG free] Available after release: {self.available_nodes}") - return - - # Multitenant release path - if hasattr(job, "scheduled_nodes") and job.scheduled_nodes: - node_id = job.scheduled_nodes[0] - print(f"[DEBUG free] Job {job.id} releasing multitenant node: {node_id}") - node = self.nodes[node_id] if 0 <= node_id < len(self.nodes) else None - if node: - before_cpu = node['available_cpu_cores'] - before_gpu = node['available_gpu_units'] - node['available_cpu_cores'] += getattr(job, 'allocated_cpu_cores', 0) - node['available_gpu_units'] += getattr(job, 'allocated_gpu_units', 0) - self.allocated_cpu_cores -= getattr(job, 'allocated_cpu_cores', 0) - self.allocated_gpu_units -= getattr(job, 'allocated_gpu_units', 0) - print(f"[DEBUG free] Node {node_id} before (cpu,gpu)=({before_cpu},{before_gpu}), after=({node['available_cpu_cores']},{node['available_gpu_units']})") - else: - print(f"Warning: Job {job.id} scheduled on invalid node {node_id}") - - def _free_whole_nodes(self, job): - # Legacy free whole nodes - if hasattr(job, "scheduled_nodes"): - for n in job.scheduled_nodes: - if n not in self.available_nodes: - self.available_nodes.append(n) - self.available_nodes = sorted(self.available_nodes) - - def update_system_utilization(self, current_time, running_jobs): - """ - Computes and records the system utilization. - If running in whole-node mode, uses node-based utilization; otherwise uses core/GPU utilization. - """ - if not self.multitenant: - # Whole-node utilization: percentage of active nodes - num_active = len(running_jobs) - return self._update_whole_node_util(current_time, num_active) - - # Multitenant utilization: based on CPU/GPU usage - total_cpu = sum(n['total_cpu_cores'] for n in self.nodes) - total_gpu = sum(n['total_gpu_units'] for n in self.nodes) - allocated_cpu = self.allocated_cpu_cores - allocated_gpu = self.allocated_gpu_units - cpu_util = (allocated_cpu / total_cpu) * 100 if total_cpu else 0 - gpu_util = (allocated_gpu / total_gpu) * 100 if total_gpu else 0 - # Choose GPU utilization if GPUs are present - util = gpu_util if self.config.get('GPUS_PER_NODE', 0) > 0 else cpu_util - self.sys_util_history.append((current_time, util)) - return util - - def _update_whole_node_util(self, current_time, num_active_nodes): - operational = self.total_nodes - len(self.down_nodes) - util = (num_active_nodes / operational) * 100 if operational else 0 - self.sys_util_history.append((current_time, util)) - return util - - def node_failure(self, mtbf): - if not self.multitenant: - # Legacy node failure sampling on whole nodes - available = np.array([n for n in range(self.total_nodes) if n not in self.down_nodes]) - if available.size == 0: - return [] - shape_param = 1.5 - scale_param = mtbf * 3600 - random_vals = weibull_min.rvs(shape_param, scale=scale_param, size=available.size) - failure_threshold = 0.001 - failed = available[random_vals < failure_threshold] - for nid in failed: - if nid in self.available_nodes: - self.available_nodes.remove(nid) - self.down_nodes.add(nid) - return failed.tolist() - - # Multitenant node failure sampling - operational_ids = np.array([n['id'] for n in self.nodes if not n['is_down']]) - if operational_ids.size == 0: - return [] - shape_param = 1.5 - scale_param = mtbf * 3600 - random_vals = weibull_min.rvs(shape_param, scale=scale_param, size=operational_ids.size) - failure_threshold = 0.001 - failed = operational_ids[random_vals < failure_threshold] - for node_id in failed: - node = self.nodes[node_id] - node['is_down'] = True - node['available_cpu_cores'] = 0 - node['available_gpu_units'] = 0 - self.down_nodes.add(node_id) - return failed.tolist() diff --git a/raps/resmgr/__init__.py b/raps/resmgr/__init__.py new file mode 100644 index 0000000..840a6dc --- /dev/null +++ b/raps/resmgr/__init__.py @@ -0,0 +1,25 @@ +""" +ResourceManager package initializer. +Exports a factory that returns the appropriate manager based on config. +""" +from .whole_node import WholeNodeResourceManager +from .multitenant import MultiTenantResourceManager + + +def make_resource_manager(total_nodes, down_nodes, config): + """ + Factory to choose between whole-node and multitenant managers. + """ + if config.get("multitenant", False): + return MultiTenantResourceManager(total_nodes, down_nodes, config) + return WholeNodeResourceManager(total_nodes, down_nodes, config) + +# Alias for backward compatibility +ResourceManager = make_resource_manager + +__all__ = [ + "make_resource_manager", + "ResourceManager", + "WholeNodeResourceManager", + "MultiTenantResourceManager" +] diff --git a/raps/resmgr/multitenant.py b/raps/resmgr/multitenant.py new file mode 100644 index 0000000..94c1fef --- /dev/null +++ b/raps/resmgr/multitenant.py @@ -0,0 +1,124 @@ +import numpy as np +from ..job import JobState +from scipy.stats import weibull_min + + +class MultiTenantResourceManager: + """ + Resource manager for per-node CPU/GPU multitenancy. + """ + def __init__(self, total_nodes, down_nodes, config): + self.total_nodes = total_nodes + self.config = config + self.down_nodes = set(down_nodes) + self.nodes = [] + # Track total allocations for reporting + self.allocated_cpu_cores = 0 + self.allocated_gpu_units = 0 + self.sys_util_history = [] + + # Determine per-node capacities + total_cpu = self.config['CPUS_PER_NODE'] * self.config['CORES_PER_CPU'] + total_gpu = self.config.get('GPUS_PER_NODE', 0) + + # Initialize node state + for i in range(self.total_nodes): + is_down = i in self.down_nodes + self.nodes.append({ + 'id': i, + 'total_cpu_cores': total_cpu, + 'available_cpu_cores': 0 if is_down else total_cpu, + 'total_gpu_units': total_gpu, + 'available_gpu_units': 0 if is_down else total_gpu, + 'is_down': is_down + }) + + # List of up nodes for quick enumeration + self.available_nodes = [n['id'] for n in self.nodes if not n['is_down']] + + def assign_nodes_to_job(self, job, current_time, node_id=None): + """Assigns cores/GPUs to a job on one eligible node.""" + # Try preferred node + found = None + if node_id is not None and 0 <= node_id < len(self.nodes): + candidate = self.nodes[node_id] + if (not candidate['is_down'] and + candidate['available_cpu_cores'] >= job.cpu_cores_required and + candidate['available_gpu_units'] >= job.gpu_units_required): + found = candidate + + # Fallback: first-fit + if found is None: + for candidate in self.nodes: + if (not candidate['is_down'] and + candidate['available_cpu_cores'] >= job.cpu_cores_required and + candidate['available_gpu_units'] >= job.gpu_units_required): + found = candidate + break + + if found is None: + raise ValueError(f"Not enough available resources to schedule job {job.id}.") + + # Allocate resources + found['available_cpu_cores'] -= job.cpu_cores_required + found['available_gpu_units'] -= job.gpu_units_required + self.allocated_cpu_cores += job.cpu_cores_required + self.allocated_gpu_units += job.gpu_units_required + + # Record on job + job.scheduled_nodes = [found['id']] + job.allocated_cpu_cores = job.cpu_cores_required + job.allocated_gpu_units = job.gpu_units_required + job.start_time = current_time + job.end_time = current_time + job.wall_time + job.state = JobState.RUNNING + + def free_nodes_from_job(self, job): + """Releases cores/GPUs from a completed job.""" + if getattr(job, 'scheduled_nodes', None): + nid = job.scheduled_nodes[0] + if 0 <= nid < len(self.nodes): + node = self.nodes[nid] + node['available_cpu_cores'] += getattr(job, 'allocated_cpu_cores', 0) + node['available_gpu_units'] += getattr(job, 'allocated_gpu_units', 0) + self.allocated_cpu_cores -= getattr(job, 'allocated_cpu_cores', 0) + self.allocated_gpu_units -= getattr(job, 'allocated_gpu_units', 0) + else: + print(f"Warning: Job {job.id} had invalid node {nid} during free.") + + def update_system_utilization(self, current_time, running_jobs): + """ + Computes and records utilization based on allocated CPU/GPU across all nodes. + """ + total_cpu = sum(n['total_cpu_cores'] for n in self.nodes) + total_gpu = sum(n['total_gpu_units'] for n in self.nodes) + used_cpu = self.allocated_cpu_cores + used_gpu = self.allocated_gpu_units + + cpu_util = (used_cpu / total_cpu) * 100 if total_cpu else 0 + gpu_util = (used_gpu / total_gpu) * 100 if total_gpu else 0 + + # Choose GPU util if GPUs exist, else CPU + util = gpu_util if self.config.get('GPUS_PER_NODE', 0) > 0 else cpu_util + self.sys_util_history.append((current_time, util)) + return util + + def node_failure(self, mtbf): + """ + Simulate random node failures via a Weibull distribution. + """ + shape = 1.5 + scale = mtbf * 3600 + ops = np.array([n['id'] for n in self.nodes if not n['is_down']]) + if ops.size == 0: + return [] + + vals = weibull_min.rvs(shape, scale=scale, size=ops.size) + failed = ops[vals < 0.001] + for nid in failed: + node = self.nodes[nid] + node['is_down'] = True + node['available_cpu_cores'] = 0 + node['available_gpu_units'] = 0 + self.down_nodes.add(nid) + return failed.tolist() diff --git a/raps/resmgr/whole_node.py b/raps/resmgr/whole_node.py new file mode 100644 index 0000000..a2a574a --- /dev/null +++ b/raps/resmgr/whole_node.py @@ -0,0 +1,92 @@ +from ..job import JobState + +class WholeNodeResourceManager: + """ + Legacy whole-node resource manager: allocates and frees full nodes. + """ + def __init__(self, total_nodes, down_nodes, config=None): + self.total_nodes = total_nodes + self.down_nodes = set(down_nodes) + self.config = config or {} + + # Determine per-node capacities + cfg = self.config + if 'CPUS_PER_NODE' in cfg and 'CORES_PER_CPU' in cfg: + total_cpu = cfg['CPUS_PER_NODE'] * cfg['CORES_PER_CPU'] + else: + total_cpu = cfg.get('CORES_PER_NODE', cfg.get('CPUS_PER_NODE', 1)) + total_gpu = cfg.get('GPUS_PER_NODE', 0) + + # Build unified node list so engine can inspect resource_manager.nodes + self.nodes = [] + for i in range(self.total_nodes): + is_down = i in self.down_nodes + self.nodes.append({ + 'id': i, + 'total_cpu_cores': total_cpu, + 'available_cpu_cores': 0 if is_down else total_cpu, + 'total_gpu_units': total_gpu, + 'available_gpu_units': 0 if is_down else total_gpu, + 'is_down': is_down + }) + + # Available nodes list for allocation/frees + self.available_nodes = [n['id'] for n in self.nodes if not n['is_down']] + # System utilization history (time, util%) + self.sys_util_history = [] + + def assign_nodes_to_job(self, job, current_time, node_id=None): + """Assigns full nodes to a job (replay or count-based).""" + # Ensure enough free nodes + if len(self.available_nodes) < job.nodes_required: + raise ValueError(f"Not enough available nodes to schedule job {job.id}") + + if getattr(job, 'requested_nodes', None): + # Telemetry replay: use the exact nodes + job.scheduled_nodes = job.requested_nodes + self.available_nodes = [n for n in self.available_nodes if n not in job.scheduled_nodes] + else: + # Count-based allocation: take the first N free nodes + job.scheduled_nodes = self.available_nodes[:job.nodes_required] + self.available_nodes = self.available_nodes[job.nodes_required:] + + # Mark job running + job.start_time = current_time + job.end_time = current_time + job.wall_time + job.state = JobState.RUNNING + + def free_nodes_from_job(self, job): + """Frees the full nodes previously allocated to a job.""" + if getattr(job, 'scheduled_nodes', None): + for n in job.scheduled_nodes: + if n not in self.available_nodes: + self.available_nodes.append(n) + self.available_nodes = sorted(self.available_nodes) + + def update_system_utilization(self, current_time, running_jobs): + """ + Computes system utilization as percentage of non-down nodes that are active. + + Parameters: + - current_time: simulation time + - running_jobs: list of currently running Job objects + """ + # Number of active nodes is length of running_jobs + num_active = len(running_jobs) + total_operational = self.total_nodes - len(self.down_nodes) + util = (num_active / total_operational) * 100 if total_operational else 0 + self.sys_util_history.append((current_time, util)) + return util + """ + Computes system utilization as percentage of non-down nodes that are active. + """ + total_operational = self.total_nodes - len(self.down_nodes) + util = (num_active_nodes / total_operational) * 100 if total_operational else 0 + self.sys_util_history.append((current_time, util)) + return util + + def node_failure(self, mtbf): + """ + Legacy whole-node mode does not simulate failures; always return empty list. + """ + return [] -- GitLab From 2b18bdc6a3b2650872309484b9d1aed427a0b38b Mon Sep 17 00:00:00 2001 From: Wes Brewer Date: Sun, 20 Jul 2025 16:59:02 -0400 Subject: [PATCH 172/388] Get both whole-node scheduling and multitenancy scheduling working - add 'multitenant' scheduler --- raps/engine.py | 36 ++++-- raps/schedulers/default.py | 112 +++++++++---------- raps/schedulers/multitenant.py | 199 +++++++++++++++++++++++++++++++++ 3 files changed, 277 insertions(+), 70 deletions(-) create mode 100644 raps/schedulers/multitenant.py diff --git a/raps/engine.py b/raps/engine.py index 84d3a9b..b3a1db6 100644 --- a/raps/engine.py +++ b/raps/engine.py @@ -80,8 +80,12 @@ class Engine: self.max_slowdown_history = [] self.node_occupancy_history = [] - # Get scheduler type from command-line args or default - scheduler_type = kwargs.get('scheduler', 'default') + # Set scheduler type - either based on config or command-line args - defaults to 'default' + if self.config['multitenant']: + scheduler_type = 'multitenant' + else: + scheduler_type = kwargs.get('scheduler', 'default') + policy_type = kwargs.get('policy') backfill_type = kwargs.get('backfill') @@ -142,8 +146,9 @@ class Engine: eligible_jobs_list = [] for job_data in eligible: job_instance = Job(job_data) - job_instance.cpu_cores_required = job_data.get('cpu_cores_required', 0) - job_instance.gpu_units_required = job_data.get('gpu_units_required', 0) + if self.config['multitenant']: + job_instance.cpu_cores_required = job_data.get('cpu_cores_required', 0) + job_instance.gpu_units_required = job_data.get('gpu_units_required', 0) eligible_jobs_list.append(job_instance) self.queue += eligible_jobs_list if self.debug: @@ -206,16 +211,23 @@ class Engine: newly_downed_nodes = [] # Update active/free nodes based on core/GPU utilization - total_cpu_cores = sum(node['total_cpu_cores'] for node in self.resource_manager.nodes) - total_gpu_units = sum(node['total_gpu_units'] for node in self.resource_manager.nodes) - available_cpu_cores = sum(node['available_cpu_cores'] for node in self.resource_manager.nodes) - available_gpu_units = sum(node['available_gpu_units'] for node in self.resource_manager.nodes) + if self.config['multitenant']: + total_cpu_cores = sum(node['total_cpu_cores'] for node in self.resource_manager.nodes) + total_gpu_units = sum(node['total_gpu_units'] for node in self.resource_manager.nodes) + available_cpu_cores = sum(node['available_cpu_cores'] for node in self.resource_manager.nodes) + available_gpu_units = sum(node['available_gpu_units'] for node in self.resource_manager.nodes) - self.num_free_nodes = len([node for node in self.resource_manager.nodes if not node['is_down'] and node['available_cpu_cores'] == node['total_cpu_cores'] and node['available_gpu_units'] == node['total_gpu_units']]) - self.num_active_nodes = len([node for node in self.resource_manager.nodes if not node['is_down'] and (node['available_cpu_cores'] < node['total_cpu_cores'] or node['available_gpu_units'] < node['total_gpu_units'])]) + self.num_free_nodes = len([node for node in self.resource_manager.nodes if not node['is_down'] and node['available_cpu_cores'] == node['total_cpu_cores'] and node['available_gpu_units'] == node['total_gpu_units']]) + self.num_active_nodes = len([node for node in self.resource_manager.nodes if not node['is_down'] and (node['available_cpu_cores'] < node['total_cpu_cores'] or node['available_gpu_units'] < node['total_gpu_units'])]) - # Update system utilization history - self.resource_manager.update_system_utilization(self.current_time, self.running) + # Update system utilization history + self.resource_manager.update_system_utilization(self.current_time, self.running) + else: + # Whole-node allocator + self.num_free_nodes = len(self.resource_manager.available_nodes) + self.num_active_nodes = self.config['TOTAL_NODES'] \ + - len(self.resource_manager.available_nodes) \ + - len(self.resource_manager.down_nodes) return completed_jobs, newly_downed_nodes diff --git a/raps/schedulers/default.py b/raps/schedulers/default.py index 539605e..9953087 100644 --- a/raps/schedulers/default.py +++ b/raps/schedulers/default.py @@ -39,8 +39,6 @@ class Scheduler: # Iterate over a copy of the queue since we might remove items for job in queue[:]: - if self.debug: - print(f"[DEBUG] Scheduler: Considering job {job.id} (CPU: {job.cpu_cores_required}, GPU: {job.gpu_units_required})") if self.policy == PolicyType.REPLAY: if job.start_time > current_time: continue # Replay: Job didn't start yet. Next! @@ -51,13 +49,11 @@ class Scheduler: nodes_available = self.check_available_nodes(job) - if nodes_available is not None: - self.place_job_and_manage_queues(job, queue, running, current_time, nodes_available) + if nodes_available: + self.place_job_and_manage_queues(job, queue, running, current_time) else: # In case the job was not placed, see how we should continue: if self.bfpolicy is not None: - backfill_job, node_id = self.backfill(queue, running, current_time) - if backfill_job and node_id is not None: - self.place_job_and_manage_queues(backfill_job, queue, running, current_time, node_id) + self.backfill(queue, running, current_time) # After backfill dedice continue processing the queue or wait, continuing may result in fairness issues. if self.policy in [PolicyType.REPLAY]: @@ -99,42 +95,42 @@ class Scheduler: else: return jobs_to_submit - def place_job_and_manage_queues(self, job, queue,running, current_time, node_id): - self.resource_manager.assign_nodes_to_job(job, current_time, node_id) + def place_job_and_manage_queues(self, job, queue,running, current_time): + self.resource_manager.assign_nodes_to_job(job, current_time) running.append(job) queue.remove(job) if self.debug: scheduled_nodes = summarize_ranges(job.scheduled_nodes) print(f"t={current_time}: Scheduled job {job.id} with wall time {job.wall_time} on nodes {scheduled_nodes}") - def check_available_nodes(self, job): - """Checks if there are available resources (CPU cores, GPU units) for the job on any node.""" - # Iterate through all nodes managed by the ResourceManager - for node in self.resource_manager.nodes: - if self.debug: - print(f"[DEBUG] Checking node {node['id']}: Available CPU: {node['available_cpu_cores']}, Available GPU: {node['available_gpu_units']}. Job needs CPU: {job.cpu_cores_required}, GPU: {job.gpu_units_required}") - # Skip if the node is down - if node['is_down']: - continue + def check_available_nodes(self,job): + nodes_available = False + if job.requested_nodes: # nodes specified, i.e., telemetry replay + if len(job.requested_nodes) <= len(self.resource_manager.available_nodes): + if self.policy == PolicyType.REPLAY: # Check if exact set is available: + nodes_available = set(job.requested_nodes).issubset(set(self.resource_manager.available_nodes)) + else: + # Sufficiently large number of nodes available + # but no exact set is required! + nodes_available = True + # remove the request for specific nodes and ask for n nodes + job.nodes_required = len(job.requested_nodes) + job.requested_nodes = [] + else: + pass + else: # Exact nodes not specified (e.g. synthetic jobs dont have nodes assigned) + nodes_available = len(self.resource_manager.available_nodes) >= job.nodes_required - # Check if the node has enough available CPU cores and GPU units - if (node['available_cpu_cores'] >= job.cpu_cores_required and - node['available_gpu_units'] >= job.gpu_units_required): - # If a suitable node is found, return its ID - return node['id'] - # If no suitable node is found, return None - return None + return nodes_available def backfill(self,queue:List, running:List, current_time): # Try to find a backfill candidate from the entire queue. while queue: - backfill_job, node_id = self.find_backfill_job(queue, running, current_time) - if backfill_job is not None and node_id is not None: - # Instead of placing here, return the job and node_id to the caller - return backfill_job, node_id + backfill_job = self.find_backfill_job(queue, running, current_time) + if backfill_job: + self.place_job_and_manage_queues(backfill_job, queue, running, current_time) else: break - return None, None def find_backfill_job(self, queue, running, current_time): """Finds a backfill job based on available nodes and estimated completion times. @@ -143,31 +139,29 @@ class Scheduler: scheduler for slurm resource manager.' Procedia computer science 66 (2015): 661-669. """ if not queue: - return None, None + return None # Identify when the nex job in the queue could run as a time limit: first_job = queue[0] - # For multitenancy, we need to check if the first job can fit on any node - # based on its core/GPU requirements, not just nodes_required. - # This is a simplification; a more complex backfill might consider - # if the job can fit by combining resources from multiple nodes. - # For now, we assume it needs to fit on a single node. - - # We need to know the total available resources if all running jobs finish by shadow_time_end - # This is complex with multitenancy, so for now, we'll simplify the backfill logic - # to just check if a job can fit on *any* node, not necessarily the one - # that will be freed up by the first job in line. - - # The original logic for shadow_time_end and shadow_nodes_avail is based on whole nodes. - # With multitenancy, this needs a more sophisticated resource projection. - # For now, we will make `time_limit` effectively infinite for backfill candidates - # if the job can fit on *any* node, and rely on `check_available_nodes`. - - # Revert to a simpler time_limit for now, or remove it if not applicable - # For now, let's assume time_limit is not strictly tied to node availability - # in the same way as before, and focus on resource availability. - time_limit = float('inf') # Effectively no time limit for backfill candidates + nodes_required = 0 + if first_job.requested_nodes: + nodes_required = len(first_job.requested_nodes) + else: + nodes_required = first_job.nodes_required + + sorted_running = sorted(running, key=lambda job: job.end_time) + # Identify when we have enough nodes therefore the start time of the first_job in line + shadow_time_end = 0 + shadow_nodes_avail = len(self.resource_manager.available_nodes) + for job in sorted_running: + if shadow_nodes_avail >= nodes_required: + break + else: + shadow_nodes_avail += job.nodes_required + shadow_time_end = job.end_time + + time_limit = shadow_time_end - current_time # We now have the time_limit after which no backfilled job should end # as the next job in line has the necessary resrouces after this time limit. @@ -187,13 +181,15 @@ class Scheduler: raise NotImplementedError(f"{self.bfpolicy} not implemented! Please implement!") else: raise NotImplementedError(f"{self.bfpolicy} not implemented.") - return None, None def return_first_fit(self, queue, time_limit): for job in queue: - # Check if the job can fit on any node based on its resource requirements - node_id = self.check_available_nodes(job) - if node_id is not None: - # If a suitable node is found, return the job and the node_id - return job, node_id - return None, None + if job.time_limit <= time_limit: + nodes_available = self.check_available_nodes(job) + if nodes_available: + return job + else: + continue + else: + continue + return None diff --git a/raps/schedulers/multitenant.py b/raps/schedulers/multitenant.py new file mode 100644 index 0000000..539605e --- /dev/null +++ b/raps/schedulers/multitenant.py @@ -0,0 +1,199 @@ +from typing import List +from ..utils import summarize_ranges +from ..policy import PolicyType, BackfillType + + +class Scheduler: + """ Default job scheduler with various scheduling policies. """ + + def __init__(self, config, policy, bfpolicy=None, jobs=None, resource_manager=None): + self.config = config + if policy is None: # policy is passed as policy=None, therefore default is not choosen + policy = "replay" + self.policy = PolicyType(policy) + self.bfpolicy = BackfillType(bfpolicy) + if resource_manager is None: + raise ValueError("Scheduler requires a ResourceManager instance") + self.resource_manager = resource_manager + self.debug = False + + def sort_jobs(self, queue, accounts=None): + """Sort jobs based on the selected scheduling policy.""" + if self.policy == PolicyType.FCFS: + return sorted(queue, key=lambda job: job.submit_time) + elif self.policy == PolicyType.PRIORITY: + return sorted(queue, key=lambda job: job.priority, reverse=True) + elif self.policy == PolicyType.SJF: + return sorted(queue, key=lambda job: job.time_limit) + elif self.policy == PolicyType.LJF: + return sorted(queue, key=lambda job: job.nodes_required, reverse=True) + elif self.policy == PolicyType.REPLAY: + return sorted(queue, key=lambda job: job.start_time) + else: + raise ValueError(f"Policy not implemented: {self.policy}") + + def schedule(self, queue, running, current_time, accounts=None, sorted=False): + # Sort the queue in place. + if not sorted: + queue[:] = self.sort_jobs(queue, accounts) + + # Iterate over a copy of the queue since we might remove items + for job in queue[:]: + if self.debug: + print(f"[DEBUG] Scheduler: Considering job {job.id} (CPU: {job.cpu_cores_required}, GPU: {job.gpu_units_required})") + if self.policy == PolicyType.REPLAY: + if job.start_time > current_time: + continue # Replay: Job didn't start yet. Next! + else: + pass + else: + pass + + nodes_available = self.check_available_nodes(job) + + if nodes_available is not None: + self.place_job_and_manage_queues(job, queue, running, current_time, nodes_available) + else: # In case the job was not placed, see how we should continue: + if self.bfpolicy is not None: + backfill_job, node_id = self.backfill(queue, running, current_time) + if backfill_job and node_id is not None: + self.place_job_and_manage_queues(backfill_job, queue, running, current_time, node_id) + + # After backfill dedice continue processing the queue or wait, continuing may result in fairness issues. + if self.policy in [PolicyType.REPLAY]: + # print(f"Nodes available {nodes_available} - Req:{len(job.requested_nodes)} N-avail:{len(self.resource_manager.available_nodes)}") + continue # Regardless if the job at the front of the queue doenst fit, try placing all of them. + elif self.policy in [PolicyType.FCFS, PolicyType.PRIORITY, + PolicyType.LJF, PolicyType.SJF]: + break # The job at the front of the queue doesnt fit stop processing the queue. + else: + raise NotImplementedError("Depending on the Policy this choice should be explicit. Add the implementation above!") + + def prepare_system_state(self,jobs_to_submit:List, running, timestep_start): + # def schedule(self, queue, running, current_time, accounts=None, sorted=False, debug=False): + """ + In the case of replay and fast forward, previously placed jobs should be present. + + """ + if self.policy == PolicyType.REPLAY: + total_jobs = len(jobs_to_submit) + print(f"All jobs: {total_jobs}") + + # Keep only jobs have an end time in the future future. + jobs_to_submit[:] = [job for job in jobs_to_submit if job['end_time'] >= timestep_start] + print(f"Num jobs in the past: {total_jobs - len(jobs_to_submit)}") + + # Identify jobs that started in the past and Split them from the jobs that will start in the future: + jobs_to_start_now = [job for job in jobs_to_submit if job['start_time'] < timestep_start] + print(f"Num jobs that started in the past: {len(jobs_to_start_now)}") + + jobs_to_submit[:] = [job for job in jobs_to_submit if job['start_time'] >= timestep_start] + print(f"Num jobs to be schedule in the simulation: {len(jobs_to_submit)}") + + # Now schedule them with their orignal start time. + # This has to be done one by one! + for job in jobs_to_start_now: + self.schedule([job], running, job['start_time'], sorted=True) + # self.schedule(jobs_to_start_now, running, 0, False) + return jobs_to_submit + else: + return jobs_to_submit + + def place_job_and_manage_queues(self, job, queue,running, current_time, node_id): + self.resource_manager.assign_nodes_to_job(job, current_time, node_id) + running.append(job) + queue.remove(job) + if self.debug: + scheduled_nodes = summarize_ranges(job.scheduled_nodes) + print(f"t={current_time}: Scheduled job {job.id} with wall time {job.wall_time} on nodes {scheduled_nodes}") + + def check_available_nodes(self, job): + """Checks if there are available resources (CPU cores, GPU units) for the job on any node.""" + # Iterate through all nodes managed by the ResourceManager + for node in self.resource_manager.nodes: + if self.debug: + print(f"[DEBUG] Checking node {node['id']}: Available CPU: {node['available_cpu_cores']}, Available GPU: {node['available_gpu_units']}. Job needs CPU: {job.cpu_cores_required}, GPU: {job.gpu_units_required}") + # Skip if the node is down + if node['is_down']: + continue + + # Check if the node has enough available CPU cores and GPU units + if (node['available_cpu_cores'] >= job.cpu_cores_required and + node['available_gpu_units'] >= job.gpu_units_required): + # If a suitable node is found, return its ID + return node['id'] + # If no suitable node is found, return None + return None + + def backfill(self,queue:List, running:List, current_time): + # Try to find a backfill candidate from the entire queue. + while queue: + backfill_job, node_id = self.find_backfill_job(queue, running, current_time) + if backfill_job is not None and node_id is not None: + # Instead of placing here, return the job and node_id to the caller + return backfill_job, node_id + else: + break + return None, None + + def find_backfill_job(self, queue, running, current_time): + """Finds a backfill job based on available nodes and estimated completion times. + + Loosely based on pseudocode from Leonenkov and Zhumatiy, 'Introducing new backfill-based + scheduler for slurm resource manager.' Procedia computer science 66 (2015): 661-669. + """ + if not queue: + return None, None + + # Identify when the nex job in the queue could run as a time limit: + first_job = queue[0] + # For multitenancy, we need to check if the first job can fit on any node + # based on its core/GPU requirements, not just nodes_required. + # This is a simplification; a more complex backfill might consider + # if the job can fit by combining resources from multiple nodes. + # For now, we assume it needs to fit on a single node. + + # We need to know the total available resources if all running jobs finish by shadow_time_end + # This is complex with multitenancy, so for now, we'll simplify the backfill logic + # to just check if a job can fit on *any* node, not necessarily the one + # that will be freed up by the first job in line. + + # The original logic for shadow_time_end and shadow_nodes_avail is based on whole nodes. + # With multitenancy, this needs a more sophisticated resource projection. + # For now, we will make `time_limit` effectively infinite for backfill candidates + # if the job can fit on *any* node, and rely on `check_available_nodes`. + + # Revert to a simpler time_limit for now, or remove it if not applicable + # For now, let's assume time_limit is not strictly tied to node availability + # in the same way as before, and focus on resource availability. + time_limit = float('inf') # Effectively no time limit for backfill candidates + + # We now have the time_limit after which no backfilled job should end + # as the next job in line has the necessary resrouces after this time limit. + + # Find and return the first job that fits + if self.bfpolicy == BackfillType.NONE: + pass + elif self.bfpolicy == BackfillType.EASY: + queue[:] = sorted(queue, key=lambda job: job.submit_time) + return self.return_first_fit(queue,time_limit) + elif self.bfpolicy == BackfillType.FIRSTFIT: + pass # Stay with the prioritization! + return self.return_first_fit(queue,time_limit) + elif self.bfpolicy in [BackfillType.BESTFIT, + BackfillType.GREEDY, + BackfillType.CONSERVATIVE, + ]: + raise NotImplementedError(f"{self.bfpolicy} not implemented! Please implement!") + else: + raise NotImplementedError(f"{self.bfpolicy} not implemented.") + return None, None + + def return_first_fit(self, queue, time_limit): + for job in queue: + # Check if the job can fit on any node based on its resource requirements + node_id = self.check_available_nodes(job) + if node_id is not None: + # If a suitable node is found, return the job and the node_id + return job, node_id + return None, None -- GitLab From c985743d4553baebd1c895c72148c02d13f8f4fa Mon Sep 17 00:00:00 2001 From: Wes Brewer Date: Sun, 20 Jul 2025 17:02:23 -0400 Subject: [PATCH 173/388] Remove CORES_PER_CPU requirement - only required for multitenancy --- config/40frontiers/system.json | 1 - config/adastraMI250/system.json | 1 - config/frontier/system.json | 1 - config/fugaku/system.json | 1 - config/gcloudv2/system.json | 1 - config/lassen/system.json | 1 - config/marconi100/system.json | 1 - config/summit/system.json | 1 - 8 files changed, 8 deletions(-) diff --git a/config/40frontiers/system.json b/config/40frontiers/system.json index dc19251..51add94 100644 --- a/config/40frontiers/system.json +++ b/config/40frontiers/system.json @@ -11,7 +11,6 @@ "NODES_PER_RECTIFIER": 4, "MISSING_RACKS": [41], "DOWN_NODES": [], - "CORES_PER_CPU": 64, "CPUS_PER_NODE": 1, "GPUS_PER_NODE": 4, "CPU_PEAK_FLOPS": 2048E9, diff --git a/config/adastraMI250/system.json b/config/adastraMI250/system.json index c5268f8..36a689e 100644 --- a/config/adastraMI250/system.json +++ b/config/adastraMI250/system.json @@ -11,7 +11,6 @@ "NODES_PER_RECTIFIER": 4, "MISSING_RACKS": [], "DOWN_NODES": [356,357,358,359,360,361,362,363,364,365,366,367,368,369,370,371,372,373,374,375,376,377,378,379,380,381,382,383], - "CORES_PER_CPU": 64, "CPUS_PER_NODE": 1, "GPUS_PER_NODE": 8, "CPU_PEAK_FLOPS": 2048E9, diff --git a/config/frontier/system.json b/config/frontier/system.json index ced62d9..b1b9d76 100644 --- a/config/frontier/system.json +++ b/config/frontier/system.json @@ -11,7 +11,6 @@ "NODES_PER_RECTIFIER": 4, "MISSING_RACKS": [41], "DOWN_NODES": [], - "CORES_PER_CPU": 64, "CPUS_PER_NODE": 1, "GPUS_PER_NODE": 4, "CPU_PEAK_FLOPS": 2048E9, diff --git a/config/fugaku/system.json b/config/fugaku/system.json index 5310f2b..6a0e63a 100644 --- a/config/fugaku/system.json +++ b/config/fugaku/system.json @@ -11,7 +11,6 @@ "NODES_PER_RECTIFIER": 48, "MISSING_RACKS": [], "DOWN_NODES": [], - "CORES_PER_CPU": 48, "CPUS_PER_NODE": 1, "GPUS_PER_NODE": 0, "CPU_PEAK_FLOPS": 3.379E12, diff --git a/config/gcloudv2/system.json b/config/gcloudv2/system.json index 229617e..4b6fc7b 100644 --- a/config/gcloudv2/system.json +++ b/config/gcloudv2/system.json @@ -11,7 +11,6 @@ "NODES_PER_RECTIFIER": 4, "MISSING_RACKS": [], "DOWN_NODES": [], - "CORES_PER_CPU": 20, "CPUS_PER_NODE": 1, "GPUS_PER_NODE": 0, "CPU_PEAK_FLOPS": 2048E9, diff --git a/config/lassen/system.json b/config/lassen/system.json index 44da66e..a383353 100644 --- a/config/lassen/system.json +++ b/config/lassen/system.json @@ -12,7 +12,6 @@ "MISSING_RACKS": [44], "DOWN_NODES": [], "CPUS_PER_NODE": 2, - "CORES_PER_CPU": 22, "THREADS_PER_CORE": 4, "CPU_FREQUENCY": 2400000000, "GPUS_PER_NODE": 4, diff --git a/config/marconi100/system.json b/config/marconi100/system.json index 38a2057..435c87c 100644 --- a/config/marconi100/system.json +++ b/config/marconi100/system.json @@ -11,7 +11,6 @@ "NODES_PER_RECTIFIER": 4, "MISSING_RACKS": [49, 50], "DOWN_NODES": [], - "CORES_PER_CPU": 24, "CPUS_PER_NODE": 2, "GPUS_PER_NODE": 4, "CPU_PEAK_FLOPS": 396.8E9, diff --git a/config/summit/system.json b/config/summit/system.json index c3b6102..74ba3b7 100644 --- a/config/summit/system.json +++ b/config/summit/system.json @@ -10,7 +10,6 @@ "NODES_PER_RECTIFIER": 4, "MISSING_RACKS": [257, 258, 259, 260, 261, 262, 263, 264, 265, 266, 267, 268, 269, 270, 271], "DOWN_NODES": [], - "CORES_PER_CPU": 22, "CPUS_PER_NODE": 2, "GPUS_PER_NODE": 6, "CPU_PEAK_FLOPS": 436.2E9, -- GitLab From f9dd58fb7477630e0e3ae0fa26feab0736ca4cbc Mon Sep 17 00:00:00 2001 From: Wes Brewer Date: Sun, 20 Jul 2025 17:07:34 -0400 Subject: [PATCH 174/388] One more fix caused by multitenancy mods --- raps/engine.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/raps/engine.py b/raps/engine.py index b3a1db6..d2deee6 100644 --- a/raps/engine.py +++ b/raps/engine.py @@ -619,10 +619,11 @@ class Engine: 'total cost': f'${total_cost:.2f}' } - # Multitenancy Stats - total_jobs_loaded = self.total_initial_jobs # Assuming this is passed to __init__ - stats['total jobs loaded'] = total_jobs_loaded - stats['jobs completed percentage'] = f"{(self.jobs_completed / total_jobs_loaded * 100):.2f}%" + if self.config['multitenant']: + # Multitenancy Stats + total_jobs_loaded = self.total_initial_jobs # Assuming this is passed to __init__ + stats['total jobs loaded'] = total_jobs_loaded + stats['jobs completed percentage'] = f"{(self.jobs_completed / total_jobs_loaded * 100):.2f}%" if self.node_occupancy_history: # Calculate average concurrent jobs per node (average density across all nodes and timesteps) -- GitLab From 3ccdb40e997af9c6af70c2b0e53d7e2f9819575f Mon Sep 17 00:00:00 2001 From: Wes Brewer Date: Sun, 20 Jul 2025 17:15:12 -0400 Subject: [PATCH 175/388] Rename whole-node resource manager to ExclusiveNodes... and file to default.py --- raps/resmgr/__init__.py | 8 +++---- raps/resmgr/{whole_node.py => default.py} | 29 +++++++++++++++++++---- 2 files changed, 28 insertions(+), 9 deletions(-) rename raps/resmgr/{whole_node.py => default.py} (77%) diff --git a/raps/resmgr/__init__.py b/raps/resmgr/__init__.py index 840a6dc..609f104 100644 --- a/raps/resmgr/__init__.py +++ b/raps/resmgr/__init__.py @@ -2,17 +2,17 @@ ResourceManager package initializer. Exports a factory that returns the appropriate manager based on config. """ -from .whole_node import WholeNodeResourceManager +from .default import ExclusiveNodeResourceManager from .multitenant import MultiTenantResourceManager def make_resource_manager(total_nodes, down_nodes, config): """ - Factory to choose between whole-node and multitenant managers. + Factory to choose between exclusive-node and multitenant managers. """ if config.get("multitenant", False): return MultiTenantResourceManager(total_nodes, down_nodes, config) - return WholeNodeResourceManager(total_nodes, down_nodes, config) + return ExclusiveNodeResourceManager(total_nodes, down_nodes, config) # Alias for backward compatibility ResourceManager = make_resource_manager @@ -20,6 +20,6 @@ ResourceManager = make_resource_manager __all__ = [ "make_resource_manager", "ResourceManager", - "WholeNodeResourceManager", + "ExclusiveNodeResourceManager", "MultiTenantResourceManager" ] diff --git a/raps/resmgr/whole_node.py b/raps/resmgr/default.py similarity index 77% rename from raps/resmgr/whole_node.py rename to raps/resmgr/default.py index a2a574a..af7b8fa 100644 --- a/raps/resmgr/whole_node.py +++ b/raps/resmgr/default.py @@ -1,8 +1,8 @@ from ..job import JobState -class WholeNodeResourceManager: +class ExclusiveNodeResourceManager: """ - Legacy whole-node resource manager: allocates and frees full nodes. + Legacy exclusive-node resource manager: allocates and frees full nodes. """ def __init__(self, total_nodes, down_nodes, config=None): self.total_nodes = total_nodes @@ -86,7 +86,26 @@ class WholeNodeResourceManager: return util def node_failure(self, mtbf): - """ - Legacy whole-node mode does not simulate failures; always return empty list. - """ return [] + """Simulate node failure using Weibull distribution.""" + shape_parameter = 1.5 + scale_parameter = mtbf * 3600 # Convert to seconds + + # Create a NumPy array of node indices, excluding down nodes + all_nodes = np.array(sorted(set(range(self.total_nodes)) - set(self.down_nodes))) + + # Sample the Weibull distribution for all nodes at once + random_values = weibull_min.rvs(shape_parameter, scale=scale_parameter, size=all_nodes.size) + + # Identify nodes that have failed + failure_threshold = 0.1 + failed_nodes_mask = random_values < failure_threshold + newly_downed_nodes = all_nodes[failed_nodes_mask] + + # Update available and down nodes + for node_index in newly_downed_nodes: + if node_index in self.available_nodes: + self.available_nodes.remove(node_index) + self.down_nodes.add(str(node_index)) + + return newly_downed_nodes.tolist() -- GitLab From d16e34d08d5ed493d8605e1f25c84b1b028df782 Mon Sep 17 00:00:00 2001 From: Wes Brewer Date: Wed, 23 Jul 2025 16:39:41 -0400 Subject: [PATCH 176/388] Add synthetic workload test for multitenancy `-w multitenancy` - see README.md --- README.md | 3 + args.py | 4 +- multi-part-sim.py | 2 + raps/workload.py | 143 ++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 150 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index fe815bd..ad94453 100644 --- a/README.md +++ b/README.md @@ -70,6 +70,9 @@ For MIT Supercloud # Re-run simulation using npz files (much faster load) python multi-part-sim.py -x mit_supercloud/* -f part-*.npz --system mit_supercloud + # Synthetic tests for verification studies: + python multi-part-sim.py -x 'mit_supercloud/*' -w multitenant + ## Perform Network Simulation Lassen is one of the few datasets that has networking data. See `raps/dataloaders/lassen.py` for how to diff --git a/args.py b/args.py index f077b85..adf8c49 100644 --- a/args.py +++ b/args.py @@ -12,7 +12,7 @@ parser.add_argument('-c', '--cooling', action='store_true', help='Include FMU co parser.add_argument('-ff', '--fastforward', type=str, default=None, help='Fast-forward by time amount (uses same units as -t)') parser.add_argument('-t', '--time', type=str, default=None, help='Length of time to simulate, e.g., 123, 123s, 27m, 3h, 7d') parser.add_argument('-d', '--debug', action='store_true', help='Enable debug mode and disable rich layout') -parser.add_argument('-n', '--numjobs', type=int, default=1000, help='Number of jobs to schedule') +parser.add_argument('-n', '--numjobs', type=int, default=100, help='Number of jobs to schedule') parser.add_argument('-v', '--verbose', action='store_true', help='Enable verbose output') parser.add_argument('--start', type=str, help='ISO8061 string for start of simulation') parser.add_argument('--end', type=str, help='ISO8061 string for end of simulation') @@ -42,7 +42,7 @@ parser.add_argument('--jid', type=str, default='*', help='Replay job id') parser.add_argument('--scale', type=int, default=0, help='Scale telemetry to max nodes specified in order to run telemetry on a smaller smaller target system/partition, e.g., --scale 192') # Synthetic workloads -choices = ['random', 'benchmark', 'peak', 'idle'] +choices = ['random', 'benchmark', 'peak', 'idle', 'multitenant'] parser.add_argument('-w', '--workload', type=str, choices=choices, default=choices[0], help='Type of synthetic workload') # Scheduling options diff --git a/multi-part-sim.py b/multi-part-sim.py index 182638d..4b2a51d 100644 --- a/multi-part-sim.py +++ b/multi-part-sim.py @@ -92,6 +92,8 @@ if args.replay: else: # Synthetic workload wl = Workload(*configs) + total_initial_jobs = args.numjobs + # Generate jobs based on workload type jobs = getattr(wl, args.workload)(num_jobs=args.numjobs) diff --git a/raps/workload.py b/raps/workload.py index 56d7cb3..440f34b 100644 --- a/raps/workload.py +++ b/raps/workload.py @@ -310,3 +310,146 @@ class Workload: jobs.append(job_info) return jobs + + + def multitenant(self, **kwargs): + """ + Generate deterministic jobs to validate multitenant scheduling & power. + + Parameters + ---------- + mode : str + One of: + - 'ONE_JOB_PER_NODE_ALL_CORES' + - 'TWO_JOBS_PER_NODE_SPLIT' + - 'STAGGERED_JOBS_PER_NODE' + wall_time : int + Duration (seconds) of each job (default: 3600) + trace_quanta : int + Sampling interval for traces; defaults to config['TRACE_QUANTA'] + + Returns + ------- + list[dict] + List of job_dict entries. + """ + mode = kwargs.get('mode', 'TWO_JOBS_PER_NODE_SPLIT') + wall_time = kwargs.get('wall_time', 3600) + + jobs = [] + + for partition in self.partitions: + cfg = self.config_map[partition] + trace_quanta = kwargs.get('trace_quanta', cfg['TRACE_QUANTA']) + + cores_per_cpu = cfg.get('CORES_PER_CPU', 1) + cpus_per_node = cfg.get('CPUS_PER_NODE', 1) + cores_per_node = cores_per_cpu * cpus_per_node + gpus_per_node = cfg.get('GPUS_PER_NODE', 0) + + n_nodes = cfg['AVAILABLE_NODES'] + + def make_trace(cpu_util, gpu_util): + return self.compute_traces(cpu_util, gpu_util, wall_time, trace_quanta) + + job_id_ctr = 0 + + if mode == 'ONE_JOB_PER_NODE_ALL_CORES': + # Each node runs one job that consumes all cores/GPUs + for nid in range(n_nodes): + cpu_trace, gpu_trace = make_trace(cores_per_node, gpus_per_node) + jobs.append(job_dict( + nodes_required=1, + cpu_cores_required=cores_per_node, + gpu_units_required=gpus_per_node, + name=f"MT_full_node_{partition}_{nid}", + account=random.choice(ACCT_NAMES), + cpu_trace=cpu_trace, + gpu_trace=gpu_trace, + ntx_trace=[], nrx_trace=[], + end_state='COMPLETED', + id=job_id_ctr, + priority=random.randint(0, MAX_PRIORITY), + partition=partition, + submit_time=0, + time_limit=wall_time, + start_time=0, + end_time=wall_time, + wall_time=wall_time, + trace_time=wall_time, + trace_start_time=0, + trace_end_time=wall_time + )) + job_id_ctr += 1 + + elif mode == 'TWO_JOBS_PER_NODE_SPLIT': + # Two jobs per node: split CPU/GPU roughly in half + for nid in range(n_nodes): + cpu_a = cores_per_node // 2 + cpu_b = cores_per_node - cpu_a + gpu_a = gpus_per_node // 2 + gpu_b = gpus_per_node - gpu_a + + for idx, (c_req, g_req, tag) in enumerate([(cpu_a, gpu_a, 'A'), + (cpu_b, gpu_b, 'B')]): + cpu_trace, gpu_trace = make_trace(c_req, g_req) + jobs.append(job_dict( + nodes_required=1, # still one node; multitenant RM packs cores + cpu_cores_required=c_req, + gpu_units_required=g_req, + name=f"MT_split_node_{partition}_{nid}_{tag}", + account=random.choice(ACCT_NAMES), + cpu_trace=cpu_trace, + gpu_trace=gpu_trace, + ntx_trace=[], nrx_trace=[], + end_state='COMPLETED', + id=job_id_ctr, + priority=random.randint(0, MAX_PRIORITY), + partition=partition, + submit_time=0, + time_limit=wall_time, + start_time=0, + end_time=wall_time, + wall_time=wall_time, + trace_time=wall_time, + trace_start_time=0, + trace_end_time=wall_time + )) + job_id_ctr += 1 + + elif mode == 'STAGGERED_JOBS_PER_NODE': + # Three jobs per node, staggered starts: 0, wall_time/3, 2*wall_time/3 + offsets = [0, wall_time // 3, 2 * wall_time // 3] + cpu_each = cores_per_node // 3 or 1 + gpu_each = max(1, gpus_per_node // 3) if gpus_per_node else 0 + + for nid in range(n_nodes): + for k, offset in enumerate(offsets): + cpu_trace, gpu_trace = make_trace(cpu_each, gpu_each) + jobs.append(job_dict( + nodes_required=1, + cpu_cores_required=cpu_each, + gpu_units_required=gpu_each, + name=f"MT_stagger_node_{partition}_{nid}_{k}", + account=random.choice(ACCT_NAMES), + cpu_trace=cpu_trace, + gpu_trace=gpu_trace, + ntx_trace=[], nrx_trace=[], + end_state='COMPLETED', + id=job_id_ctr, + priority=random.randint(0, MAX_PRIORITY), + partition=partition, + submit_time=offset, + time_limit=wall_time, + start_time=offset, + end_time=offset + wall_time, + wall_time=wall_time, + trace_time=wall_time, + trace_start_time=0, + trace_end_time=wall_time + )) + job_id_ctr += 1 + else: + raise ValueError(f"Unknown multitenant mode: {mode}") + + return jobs -- GitLab From 10b9ba7101932299696a2d9c8d2cfc9d14fe809f Mon Sep 17 00:00:00 2001 From: Wes Brewer Date: Wed, 23 Jul 2025 22:59:55 -0400 Subject: [PATCH 177/388] Work on some verifications: esp. cpu traces - get nodes_alloc properly implemented --- args.py | 4 +- raps/dataloaders/mit_supercloud/loader.py | 138 ++++++++++++++++++---- raps/dataloaders/mit_supercloud/utils.py | 12 ++ raps/power.py | 1 + raps/resmgr/multitenant.py | 12 ++ 5 files changed, 143 insertions(+), 24 deletions(-) diff --git a/args.py b/args.py index adf8c49..cab6ffe 100644 --- a/args.py +++ b/args.py @@ -14,8 +14,8 @@ parser.add_argument('-t', '--time', type=str, default=None, help='Length of time parser.add_argument('-d', '--debug', action='store_true', help='Enable debug mode and disable rich layout') parser.add_argument('-n', '--numjobs', type=int, default=100, help='Number of jobs to schedule') parser.add_argument('-v', '--verbose', action='store_true', help='Enable verbose output') -parser.add_argument('--start', type=str, help='ISO8061 string for start of simulation') -parser.add_argument('--end', type=str, help='ISO8061 string for end of simulation') +parser.add_argument('--start', type=str, default='2021-05-21T13:00', help='ISO8061 string for start of simulation') +parser.add_argument('--end', type=str, default='2021-05-21T14:00', help='ISO8061 string for end of simulation') parser.add_argument('--seed', action='store_true', help='Set random number seed for deterministic simulation') parser.add_argument('-u', '--uncertainties', action='store_true', help='Change from floating point units to floating point units with uncertainties.' + \ diff --git a/raps/dataloaders/mit_supercloud/loader.py b/raps/dataloaders/mit_supercloud/loader.py index 4ebef2f..9680613 100644 --- a/raps/dataloaders/mit_supercloud/loader.py +++ b/raps/dataloaders/mit_supercloud/loader.py @@ -11,9 +11,76 @@ import pandas as pd from datetime import datetime from types import SimpleNamespace from tqdm import tqdm -from raps.job import job_dict +from raps.job import job_dict, Job from .utils import proc_cpu_series, proc_gpu_series, to_epoch from .utils import DEFAULT_START, DEFAULT_END +from .utils import validate_job_traces + + +import re +from typing import Dict, Union, Optional + + +# Default SLURM TRES id→name map (extend as needed) +DEFAULT_TRES_ID_MAP = { + 1: "cpu", + 2: "mem", # in MB + 3: "energy", + 4: "gres/gpu", + 5: "billing", +} + + +def parse_tres_alloc(tres_str: Union[str, None], + id_map: Optional[Dict[int, str]] = None, + return_ids: bool = False) -> Dict[Union[int, str], int]: + """ + Parse a Slurm tres_alloc/tres_req field like: '1=20,2=170000,4=1,5=20' + + Parameters + ---------- + tres_str : str | None + The raw TRES string from Slurm (quotes OK). If None/empty returns {}. + id_map : dict[int,str] | None + Optional mapping from TRES numeric IDs to friendly names. + Falls back to DEFAULT_TRES_ID_MAP if not provided. + return_ids : bool + If True, keys are the numeric IDs. If False, keys use id_map names + (falls back to the numeric ID as a string if unknown). + + Returns + ------- + dict + Parsed key/value pairs. Example: + {'cpu': 20, 'mem': 170000, 'gres/gpu': 1, 'billing': 20} + """ + if not tres_str: + return {} + + id_map = id_map or DEFAULT_TRES_ID_MAP + + # strip quotes or whitespace + tres_str = tres_str.strip().strip('"').strip("'") + + # Split on commas, but be tolerant of spaces + parts = [p for p in tres_str.split(",") if p] + + out: Dict[Union[int, str], int] = {} + + for p in parts: + m = re.match(r"\s*(\d+)\s*=\s*([0-9]+)\s*$", p) + if not m: + # skip or raise; here we skip silently + continue + tid = int(m.group(1)) + val = int(m.group(2)) + if return_ids: + out[tid] = val + else: + key = id_map.get(tid, str(tid)) + out[key] = val + + return out def load_data(local_dataset_path, **kwargs): @@ -47,13 +114,18 @@ def load_data(local_dataset_path, **kwargs): data_root = os.path.dirname(slurm_path) sl = pd.read_csv(slurm_path) + sl["__line__"] = sl.index + 2 # 2) date window start_ts = to_epoch(kwargs.get("start", DEFAULT_START)) end_ts = to_epoch(kwargs.get("end", DEFAULT_END)) #duration = end_ts - start_ts - sl = sl[(sl.time_submit >= start_ts) & (sl.time_submit < end_ts)] + mask = (sl.time_submit >= start_ts) & (sl.time_submit < end_ts) + sl = sl[mask] + hits = sl.loc[mask] + print("line numbers in slurm-log.csv", hits["__line__"].tolist()) + # —— ERROR CATCH: no jobs in this window? —— if sl.empty: raise ValueError( @@ -151,15 +223,18 @@ def load_data(local_dataset_path, **kwargs): start_time = job_row.get('time_start', 'N/A') wall_time = job_row.get('time_limit', 'N/A') tres_alloc = job_row.get('tres_alloc', 'N/A') + tres_alloc_dict = parse_tres_alloc(tres_alloc) + rec["tres_alloc_dict"] = tres_alloc_dict gres_used = job_row.get('gres_used', 'N/A') tqdm.write(f"Reading CPU {os.path.basename(fp)} for Job ID: {jid}") tqdm.write(f" Start Time: {start_time}, Wall Time: {wall_time}s") - tqdm.write(f" TRES Alloc: {tres_alloc}") - tqdm.write(f" GRES Used: {gres_used}") + tqdm.write(f" TRES Alloc: {tres_alloc_dict}") + #tqdm.write(f" GRES Used: {gres_used}") else: tqdm.write(f"Reading CPU {os.path.basename(fp)} for Job ID: {jid} (No slurm info found)") + rec["nodes_alloc"] = int(job_row["nodes_alloc"]) rec["cpu"] = proc_cpu_series(df) print(f"GPU candidate files ({len(gpu_files)}):") @@ -171,6 +246,7 @@ def load_data(local_dataset_path, **kwargs): print(f"\n[DEBUG] attempting {fp!r}") print(" full path exists:", os.path.exists(fp), fp) if not os.path.exists(fp): + print("gpu path doesn't exist skipping") continue tqdm.write(f"Reading GPU {os.path.basename(fp)}") @@ -226,7 +302,8 @@ def load_data(local_dataset_path, **kwargs): avg_util = raw.mean(axis=1) # 4) scale by number of nodes requested - nodes = rec.get("nodes_alloc", 1) + #nodes = rec.get("nodes_alloc", 1) + nodes = rec.get("nodes_alloc") rec["gpu_trace"] = (avg_util * nodes).tolist() if debug: @@ -255,10 +332,16 @@ def load_data(local_dataset_path, **kwargs): # Get CPUS_PER_NODE and GPUS_PER_NODE from config config = kwargs.get('config', {}) - cpus_per_node = config.get('CPUS_PER_NODE', 2) # Default to 2 if not found - gpus_per_node = config.get('GPUS_PER_NODE', 0) # Default to 0 if not found + cpus_per_node = config.get('CPUS_PER_NODE') + cores_per_cpu = config.get('CORES_PER_CPU') + gpus_per_node = config.get('GPUS_PER_NODE') + print(f"*** cpus_per_node: {cpus_per_node}, cores_per_cpu: {cores_per_cpu}, gpus_per_node: {gpus_per_node}") + + quanta = config.get('TRACE_QUANTA') for jid, rec in data.items(): + nr = rec.get("nodes_alloc") + cpu = rec.get("cpu") gpu = rec.get("gpu_trace") @@ -287,37 +370,48 @@ def load_data(local_dataset_path, **kwargs): print("skipping") continue - st = rec.get("time_submit",t0) - start_ts - nr = rec.get("nodes_alloc",1) - if nr>1: - cpu_tr = [x/nr for x in cpu_tr] + # Calculate cpu_cores_required and gpu_units_required from tres_alloc + total_cpu = rec["tres_alloc_dict"].get('cpu', 0) + # Can either allocate gpu:volta (1002) or gpu:tesla (1001) but not both + total_gpu = rec["tres_alloc_dict"].get('1002') or tres_alloc_dict.get(1001, 0) + + cpu_cores_req = math.ceil(total_cpu / nr) + gpu_units_req = math.ceil(total_gpu / nr) - # Calculate cpu_cores_required and gpu_units_required - cpu_cores_req = math.ceil(max(cpu_tr) * cpus_per_node) if cpu_tr else 0 - gpu_units_req = math.ceil(max(gpu_tr) * gpus_per_node) if gpu_tr else 0 + print(f"*** nr: {nr}, cpu_cores_req: {cpu_cores_req}, gpu_units_req: {gpu_units_req}", flush=True) + print(jid, cpu_tr[:5], flush=True) + # we're not quite sure which is correct below - but the second one seems more likely + #cpu_tr = [float(f"{x/nr/cores_per_cpu:4g}") for x in cpu_tr] + cpu_tr = [float(f"{x/cores_per_cpu:4g}") for x in cpu_tr] + print(jid, cpu_tr[:5]) - jobs_list.append(job_dict( + submit_time = rec.get("time_submit", t0) - start_ts + + job = job_dict( nodes_required = nr, cpu_cores_required = cpu_cores_req, gpu_units_required = gpu_units_req, - name = rec.get("name_job","unknown"), - account = rec.get("id_user","unknown"), + name = rec.get("name_job", "unknown"), + account = rec.get("id_user", "unknown"), cpu_trace = cpu_tr, gpu_trace = gpu_tr, ntx_trace = [], nrx_trace = [], - end_state = rec.get("state_end","UNKNOWN"), + end_state = rec.get("state_end", "unknown"), id = jid, priority = rec.get("priority",0), - submit_time = st, + submit_time = submit_time, time_limit = rec.get("time_limit",0), start_time = t0 - start_ts, end_time = t1 - start_ts, wall_time = max(0, t1-t0), - trace_time = len(cpu_tr)*10.0, + trace_time = len(cpu_tr)*quanta, trace_start_time = 0, - trace_end_time = len(cpu_tr)*10.0 - )) + trace_end_time = len(cpu_tr)*quanta + ) + #validate_job_traces(Job(job), granularity=quanta) + # if nr > 1: # uncomment to test multinode jobs - need to run for 24 hours to get enough jobs to populate + jobs_list.append(job) # Calculate min_overall_utime and max_overall_utime min_overall_utime = int(sl.time_submit.min()) diff --git a/raps/dataloaders/mit_supercloud/utils.py b/raps/dataloaders/mit_supercloud/utils.py index 49f455a..eec1de0 100644 --- a/raps/dataloaders/mit_supercloud/utils.py +++ b/raps/dataloaders/mit_supercloud/utils.py @@ -238,3 +238,15 @@ def proc_gpu_series(cpu_df, dfi, gpu_cnt): gpu_df.rename(columns=ren, inplace=True) return gpu_df, gpu_cnt + 1 + + +def validate_job_traces(job, granularity=1): + print(job) + assert job.cpu_trace is not None, f"job {job.id} missing cpu_trace" + assert job.gpu_trace is not None, f"job {job.id} missing gpu_trace" + assert all(p >= 0 for p in job.cpu_trace), f"neg cpu power in job {job.id}" + assert all(p >= 0 for p in job.gpu_trace), f"neg gpu power in job {job.id}" + # Length sanity: at least wall_time/granularity samples + needed = max(1, int(job.wall_time / granularity)) + assert len(job.cpu_trace) >= needed, f"cpu_trace too short for job {job.id}" + assert len(job.gpu_trace) >= needed, f"gpu_trace too short for job {job.id}" diff --git a/raps/power.py b/raps/power.py index e61010f..0f2c312 100644 --- a/raps/power.py +++ b/raps/power.py @@ -15,6 +15,7 @@ import numpy as np import pandas as pd import uncertainties as uf from .utils import linear_to_3d_index +from .validators import recompute_power def custom_str_uncertainties(self): diff --git a/raps/resmgr/multitenant.py b/raps/resmgr/multitenant.py index 94c1fef..e7121be 100644 --- a/raps/resmgr/multitenant.py +++ b/raps/resmgr/multitenant.py @@ -3,6 +3,11 @@ from ..job import JobState from scipy.stats import weibull_min +def assert_node_accounting_ok(node): + assert node['available_cpu_cores'] >= 0, "available_cpu_cores went negative" + assert node['available_gpu_units'] >= 0, "available_gpu_units went negative" + + class MultiTenantResourceManager: """ Resource manager for per-node CPU/GPU multitenancy. @@ -65,6 +70,13 @@ class MultiTenantResourceManager: self.allocated_cpu_cores += job.cpu_cores_required self.allocated_gpu_units += job.gpu_units_required + # ---- Invariant checks (after mutating node/RM state) ---- + assert_node_accounting_ok(found) # no negatives left + assert self.allocated_cpu_cores >= 0 and self.allocated_gpu_units >= 0 + # Optional: global sanity vs. totals + assert self.allocated_cpu_cores <= sum(n['total_cpu_cores'] for n in self.nodes) + assert self.allocated_gpu_units <= sum(n['total_gpu_units'] for n in self.nodes) + # Record on job job.scheduled_nodes = [found['id']] job.allocated_cpu_cores = job.cpu_cores_required -- GitLab From 3300327956094072a0d9cc763da2b4736d205e59 Mon Sep 17 00:00:00 2001 From: Wes Brewer Date: Thu, 24 Jul 2025 00:15:05 -0400 Subject: [PATCH 178/388] Add mapping of jobs to as-scheduled nodes - add nodelist.txt --- raps/dataloaders/mit_supercloud/loader.py | 28 +- raps/dataloaders/mit_supercloud/nodelist.txt | 1135 ++++++++++++++++++ 2 files changed, 1156 insertions(+), 7 deletions(-) create mode 100644 raps/dataloaders/mit_supercloud/nodelist.txt diff --git a/raps/dataloaders/mit_supercloud/loader.py b/raps/dataloaders/mit_supercloud/loader.py index 9680613..0225ea1 100644 --- a/raps/dataloaders/mit_supercloud/loader.py +++ b/raps/dataloaders/mit_supercloud/loader.py @@ -4,9 +4,12 @@ MIT Supercloud job trace processing module with load_data function. """ +import ast import os import math import pandas as pd +import re +from typing import Dict, Union, Optional from datetime import datetime from types import SimpleNamespace @@ -16,11 +19,6 @@ from .utils import proc_cpu_series, proc_gpu_series, to_epoch from .utils import DEFAULT_START, DEFAULT_END from .utils import validate_job_traces - -import re -from typing import Dict, Union, Optional - - # Default SLURM TRES id→name map (extend as needed) DEFAULT_TRES_ID_MAP = { 1: "cpu", @@ -116,6 +114,12 @@ def load_data(local_dataset_path, **kwargs): sl = pd.read_csv(slurm_path) sl["__line__"] = sl.index + 2 + # Read the full node list into a Python list and build lookup from hostname → index + NL_PATH = os.path.join(os.path.dirname(__file__), "nodelist.txt") + with open(NL_PATH) as f: + all_nodes = [line.strip() for line in f if line.strip()] + node_to_idx = {host: idx for idx, host in enumerate(all_nodes)} + # 2) date window start_ts = to_epoch(kwargs.get("start", DEFAULT_START)) end_ts = to_epoch(kwargs.get("end", DEFAULT_END)) @@ -234,6 +238,15 @@ def load_data(local_dataset_path, **kwargs): else: tqdm.write(f"Reading CPU {os.path.basename(fp)} for Job ID: {jid} (No slurm info found)") + # Get allocated nodes "['r9189566-n911952','r9189567-n...']" + raw = job_row.get("nodelist", "") + if raw: + hosts = ast.literal_eval(raw) + rec["scheduled_nodes"] = [ node_to_idx[h] for h in hosts ] + else: + rec["scheduled_nodes"] = [] + #print("**", hosts, rec["scheduled_nodes"]) + rec["nodes_alloc"] = int(job_row["nodes_alloc"]) rec["cpu"] = proc_cpu_series(df) @@ -399,9 +412,10 @@ def load_data(local_dataset_path, **kwargs): nrx_trace = [], end_state = rec.get("state_end", "unknown"), id = jid, - priority = rec.get("priority",0), + scheduled_nodes = rec.get("scheduled_nodes"), + priority = rec.get("priority", 0), submit_time = submit_time, - time_limit = rec.get("time_limit",0), + time_limit = rec.get("time_limit", 0), start_time = t0 - start_ts, end_time = t1 - start_ts, wall_time = max(0, t1-t0), diff --git a/raps/dataloaders/mit_supercloud/nodelist.txt b/raps/dataloaders/mit_supercloud/nodelist.txt new file mode 100644 index 0000000..3056876 --- /dev/null +++ b/raps/dataloaders/mit_supercloud/nodelist.txt @@ -0,0 +1,1135 @@ +r1018283-n146651 +r1018283-n181711 +r1018283-n244243 +r1018283-n310809 +r1018283-n325382 +r1018283-n341098 +r1018283-n392209 +r1018283-n468303 +r1018283-n598065 +r1018283-n642321 +r1018283-n642649 +r1018283-n678155 +r1018283-n680758 +r1018283-n90554 +r1018283-n920226 +r1081020-n146651 +r1081020-n181711 +r1081020-n244243 +r1081020-n310809 +r1081020-n325382 +r1081020-n341098 +r1081020-n392209 +r1081020-n468303 +r1081020-n598065 +r1081020-n642321 +r1081020-n642649 +r1081020-n678155 +r1081020-n680758 +r1081020-n90554 +r1081020-n920226 +r1356503-n172998 +r1416152-n134943 +r1416152-n440434 +r1416152-n442913 +r1416152-n572686 +r1416152-n588870 +r1416152-n691735 +r1416152-n818086 +r1416152-n935848 +r1457839-n181711 +r1457839-n325382 +r1457839-n386398 +r1457839-n642321 +r1457839-n678155 +r1457839-n680758 +r1457839-n851693 +r1457839-n90554 +r1457839-n911952 +r1457839-n920226 +r1457839-n976057 +r1485405-n146651 +r1485405-n181711 +r1485405-n244243 +r1485405-n310809 +r1485405-n325382 +r1485405-n341098 +r1485405-n386398 +r1485405-n392209 +r1485405-n43543 +r1485405-n468303 +r1485405-n598065 +r1485405-n642321 +r1485405-n642649 +r1485405-n678155 +r1485405-n680758 +r1485405-n685852 +r1485405-n830961 +r1485405-n851693 +r1485405-n90554 +r1485405-n911952 +r1485405-n920226 +r1485405-n976057 +r1642813-n134943 +r1642813-n440434 +r1642813-n442913 +r1642813-n572686 +r1642813-n588870 +r1642813-n691735 +r1642813-n818086 +r1642813-n935848 +r1682297-n146651 +r1682297-n181711 +r1682297-n244243 +r1682297-n310809 +r1682297-n325382 +r1682297-n341098 +r1682297-n386398 +r1682297-n392209 +r1682297-n43543 +r1682297-n468303 +r1682297-n598065 +r1682297-n642321 +r1682297-n642649 +r1682297-n678155 +r1682297-n680758 +r1682297-n685852 +r1682297-n830961 +r1682297-n851693 +r1682297-n90554 +r1682297-n911952 +r1682297-n920226 +r1682297-n976057 +r189256-n146651 +r189256-n181711 +r189256-n244243 +r189256-n310809 +r189256-n325382 +r189256-n341098 +r189256-n392209 +r189256-n468303 +r189256-n598065 +r189256-n642321 +r189256-n642649 +r189256-n678155 +r189256-n680758 +r189256-n90554 +r189256-n920226 +r2008197-n181711 +r2008197-n325382 +r2008197-n642321 +r2008197-n678155 +r2008197-n680758 +r2008197-n90554 +r2008197-n920226 +r2086368-n146651 +r2086368-n181711 +r2086368-n244243 +r2086368-n310809 +r2086368-n325382 +r2086368-n341098 +r2086368-n392209 +r2086368-n468303 +r2086368-n598065 +r2086368-n642321 +r2086368-n642649 +r2086368-n678155 +r2086368-n680758 +r2086368-n90554 +r2086368-n920226 +r2100214-n181711 +r2100214-n325382 +r2100214-n386398 +r2100214-n642321 +r2100214-n678155 +r2100214-n680758 +r2100214-n851693 +r2100214-n90554 +r2100214-n911952 +r2100214-n920226 +r2100214-n976057 +r2159346-n134943 +r2159346-n440434 +r2159346-n442913 +r2159346-n572686 +r2159346-n588870 +r2159346-n691735 +r2159346-n818086 +r2159346-n935848 +r2501111-n134943 +r2501111-n440434 +r2501111-n442913 +r2501111-n572686 +r2501111-n588870 +r2501111-n691735 +r2501111-n818086 +r2501111-n935848 +r2582019-n181711 +r2582019-n325382 +r2582019-n386398 +r2582019-n642321 +r2582019-n678155 +r2582019-n680758 +r2582019-n851693 +r2582019-n90554 +r2582019-n911952 +r2582019-n920226 +r2582019-n976057 +r2627558-n172998 +r2652301-n146651 +r2652301-n181711 +r2652301-n244243 +r2652301-n310809 +r2652301-n325382 +r2652301-n341098 +r2652301-n386398 +r2652301-n392209 +r2652301-n43543 +r2652301-n468303 +r2652301-n598065 +r2652301-n642321 +r2652301-n642649 +r2652301-n678155 +r2652301-n680758 +r2652301-n685852 +r2652301-n830961 +r2652301-n851693 +r2652301-n90554 +r2652301-n911952 +r2652301-n920226 +r2652301-n976057 +r2825489-n134943 +r2825489-n136082 +r2825489-n139058 +r2825489-n208530 +r2825489-n440434 +r2825489-n442913 +r2825489-n572686 +r2825489-n588870 +r2825489-n691735 +r2825489-n818086 +r2825489-n935848 +r29114-n146651 +r29114-n181711 +r29114-n244243 +r29114-n310809 +r29114-n325382 +r29114-n341098 +r29114-n392209 +r29114-n468303 +r29114-n598065 +r29114-n642321 +r29114-n642649 +r29114-n678155 +r29114-n680758 +r29114-n90554 +r29114-n920226 +r2998125-n134943 +r2998125-n136082 +r2998125-n139058 +r2998125-n208530 +r2998125-n440434 +r2998125-n442913 +r2998125-n572686 +r2998125-n588870 +r2998125-n691735 +r2998125-n818086 +r2998125-n935848 +r3039576-n181711 +r3039576-n325382 +r3039576-n642321 +r3039576-n678155 +r3039576-n680758 +r3039576-n90554 +r3039576-n920226 +r3041626-n146651 +r3041626-n181711 +r3041626-n244243 +r3041626-n310809 +r3041626-n325382 +r3041626-n341098 +r3041626-n386398 +r3041626-n392209 +r3041626-n43543 +r3041626-n468303 +r3041626-n598065 +r3041626-n642321 +r3041626-n642649 +r3041626-n678155 +r3041626-n680758 +r3041626-n685852 +r3041626-n830961 +r3041626-n851693 +r3041626-n90554 +r3041626-n911952 +r3041626-n920226 +r3041626-n976057 +r3045754-n48252 +r3117156-n134943 +r3117156-n136082 +r3117156-n139058 +r3117156-n208530 +r3117156-n440434 +r3117156-n442913 +r3117156-n572686 +r3117156-n588870 +r3117156-n691735 +r3117156-n818086 +r3117156-n935848 +r3210026-n172998 +r322031-n134943 +r322031-n440434 +r322031-n442913 +r322031-n572686 +r322031-n588870 +r322031-n691735 +r322031-n818086 +r322031-n935848 +r3226521-n146651 +r3226521-n181711 +r3226521-n244243 +r3226521-n310809 +r3226521-n325382 +r3226521-n341098 +r3226521-n386398 +r3226521-n392209 +r3226521-n43543 +r3226521-n468303 +r3226521-n598065 +r3226521-n642321 +r3226521-n642649 +r3226521-n678155 +r3226521-n680758 +r3226521-n685852 +r3226521-n830961 +r3226521-n851693 +r3226521-n90554 +r3226521-n911952 +r3226521-n920226 +r3226521-n976057 +r3236768-n172998 +r3254677-n181711 +r3254677-n325382 +r3254677-n642321 +r3254677-n678155 +r3254677-n680758 +r3254677-n90554 +r3254677-n920226 +r3386633-n172998 +r3405251-n136082 +r3405251-n139058 +r3405251-n208530 +r3475376-n134943 +r3475376-n136082 +r3475376-n139058 +r3475376-n208530 +r3475376-n440434 +r3475376-n442913 +r3475376-n572686 +r3475376-n588870 +r3475376-n691735 +r3475376-n818086 +r3475376-n935848 +r3581284-n146651 +r3581284-n181711 +r3581284-n244243 +r3581284-n310809 +r3581284-n325382 +r3581284-n341098 +r3581284-n392209 +r3581284-n468303 +r3581284-n598065 +r3581284-n642321 +r3581284-n642649 +r3581284-n678155 +r3581284-n680758 +r3581284-n90554 +r3581284-n920226 +r3685766-n134943 +r3685766-n440434 +r3685766-n442913 +r3685766-n572686 +r3685766-n588870 +r3685766-n691735 +r3685766-n818086 +r3685766-n935848 +r3741709-n146651 +r3741709-n181711 +r3741709-n244243 +r3741709-n310809 +r3741709-n325382 +r3741709-n341098 +r3741709-n386398 +r3741709-n392209 +r3741709-n43543 +r3741709-n468303 +r3741709-n598065 +r3741709-n642321 +r3741709-n642649 +r3741709-n678155 +r3741709-n680758 +r3741709-n685852 +r3741709-n830961 +r3741709-n851693 +r3741709-n90554 +r3741709-n911952 +r3741709-n920226 +r3741709-n976057 +r3824475-n146651 +r3824475-n181711 +r3824475-n244243 +r3824475-n310809 +r3824475-n325382 +r3824475-n341098 +r3824475-n392209 +r3824475-n468303 +r3824475-n598065 +r3824475-n642321 +r3824475-n642649 +r3824475-n678155 +r3824475-n680758 +r3824475-n90554 +r3824475-n920226 +r3879907-n134943 +r3879907-n136082 +r3879907-n139058 +r3879907-n208530 +r3879907-n440434 +r3879907-n442913 +r3879907-n572686 +r3879907-n588870 +r3879907-n691735 +r3879907-n818086 +r3879907-n935848 +r406820-n181711 +r406820-n325382 +r406820-n642321 +r406820-n678155 +r406820-n680758 +r406820-n90554 +r406820-n920226 +r4153679-n134943 +r4153679-n440434 +r4153679-n442913 +r4153679-n572686 +r4153679-n588870 +r4153679-n691735 +r4153679-n818086 +r4153679-n935848 +r4179716-n181711 +r4179716-n325382 +r4179716-n386398 +r4179716-n642321 +r4179716-n678155 +r4179716-n680758 +r4179716-n851693 +r4179716-n90554 +r4179716-n911952 +r4179716-n920226 +r4179716-n976057 +r4229531-n181711 +r4229531-n325382 +r4229531-n386398 +r4229531-n642321 +r4229531-n678155 +r4229531-n680758 +r4229531-n851693 +r4229531-n90554 +r4229531-n911952 +r4229531-n920226 +r4229531-n976057 +r4247208-n146651 +r4247208-n181711 +r4247208-n244243 +r4247208-n310809 +r4247208-n325382 +r4247208-n341098 +r4247208-n392209 +r4247208-n468303 +r4247208-n598065 +r4247208-n642321 +r4247208-n642649 +r4247208-n678155 +r4247208-n680758 +r4247208-n90554 +r4247208-n920226 +r4327055-n134943 +r4327055-n136082 +r4327055-n139058 +r4327055-n208530 +r4327055-n440434 +r4327055-n442913 +r4327055-n572686 +r4327055-n588870 +r4327055-n691735 +r4327055-n818086 +r4327055-n935848 +r4357125-n134943 +r4357125-n440434 +r4357125-n442913 +r4357125-n572686 +r4357125-n588870 +r4357125-n691735 +r4357125-n818086 +r4357125-n935848 +r4774426-n172998 +r4822976-n134943 +r4822976-n136082 +r4822976-n139058 +r4822976-n208530 +r4822976-n440434 +r4822976-n442913 +r4822976-n572686 +r4822976-n588870 +r4822976-n691735 +r4822976-n818086 +r4822976-n935848 +r4858666-n146651 +r4858666-n181711 +r4858666-n244243 +r4858666-n310809 +r4858666-n325382 +r4858666-n341098 +r4858666-n386398 +r4858666-n392209 +r4858666-n43543 +r4858666-n468303 +r4858666-n598065 +r4858666-n642321 +r4858666-n642649 +r4858666-n678155 +r4858666-n680758 +r4858666-n685852 +r4858666-n830961 +r4858666-n851693 +r4858666-n90554 +r4858666-n911952 +r4858666-n920226 +r4858666-n976057 +r4874959-n181711 +r4874959-n325382 +r4874959-n642321 +r4874959-n678155 +r4874959-n680758 +r4874959-n90554 +r4874959-n920226 +r4990664-n134943 +r4990664-n440434 +r4990664-n442913 +r4990664-n572686 +r4990664-n588870 +r4990664-n691735 +r4990664-n818086 +r4990664-n935848 +r5130449-n134943 +r5130449-n136082 +r5130449-n139058 +r5130449-n208530 +r5130449-n440434 +r5130449-n442913 +r5130449-n572686 +r5130449-n588870 +r5130449-n691735 +r5130449-n818086 +r5130449-n935848 +r5189505-n146651 +r5189505-n181711 +r5189505-n244243 +r5189505-n310809 +r5189505-n325382 +r5189505-n341098 +r5189505-n386398 +r5189505-n392209 +r5189505-n43543 +r5189505-n468303 +r5189505-n598065 +r5189505-n642321 +r5189505-n642649 +r5189505-n678155 +r5189505-n680758 +r5189505-n685852 +r5189505-n830961 +r5189505-n851693 +r5189505-n90554 +r5189505-n911952 +r5189505-n920226 +r5189505-n976057 +r5261712-n134943 +r5261712-n440434 +r5261712-n442913 +r5261712-n572686 +r5261712-n588870 +r5261712-n691735 +r5261712-n818086 +r5261712-n935848 +r5573787-n181711 +r5573787-n325382 +r5573787-n386398 +r5573787-n642321 +r5573787-n678155 +r5573787-n680758 +r5573787-n851693 +r5573787-n90554 +r5573787-n911952 +r5573787-n920226 +r5573787-n976057 +r5715171-n134943 +r5715171-n136082 +r5715171-n139058 +r5715171-n208530 +r5715171-n440434 +r5715171-n442913 +r5715171-n572686 +r5715171-n588870 +r5715171-n691735 +r5715171-n818086 +r5715171-n935848 +r6102167-n181711 +r6102167-n325382 +r6102167-n642321 +r6102167-n678155 +r6102167-n680758 +r6102167-n90554 +r6102167-n920226 +r6272977-n181711 +r6272977-n325382 +r6272977-n386398 +r6272977-n642321 +r6272977-n678155 +r6272977-n680758 +r6272977-n851693 +r6272977-n90554 +r6272977-n911952 +r6272977-n920226 +r6272977-n976057 +r629115-n146651 +r629115-n181711 +r629115-n244243 +r629115-n310809 +r629115-n325382 +r629115-n341098 +r629115-n386398 +r629115-n392209 +r629115-n43543 +r629115-n468303 +r629115-n598065 +r629115-n642321 +r629115-n642649 +r629115-n678155 +r629115-n680758 +r629115-n685852 +r629115-n830961 +r629115-n851693 +r629115-n90554 +r629115-n911952 +r629115-n920226 +r629115-n976057 +r6341586-n146651 +r6341586-n181711 +r6341586-n244243 +r6341586-n310809 +r6341586-n325382 +r6341586-n341098 +r6341586-n392209 +r6341586-n468303 +r6341586-n598065 +r6341586-n642321 +r6341586-n642649 +r6341586-n678155 +r6341586-n680758 +r6341586-n90554 +r6341586-n920226 +r6491112-n172998 +r6531478-n181711 +r6531478-n325382 +r6531478-n642321 +r6531478-n678155 +r6531478-n680758 +r6531478-n90554 +r6531478-n920226 +r6631426-n181711 +r6631426-n325382 +r6631426-n642321 +r6631426-n678155 +r6631426-n680758 +r6631426-n90554 +r6631426-n920226 +r6682735-n146651 +r6682735-n181711 +r6682735-n244243 +r6682735-n310809 +r6682735-n325382 +r6682735-n341098 +r6682735-n392209 +r6682735-n468303 +r6682735-n598065 +r6682735-n642321 +r6682735-n642649 +r6682735-n678155 +r6682735-n680758 +r6682735-n90554 +r6682735-n920226 +r6760045-n146651 +r6760045-n181711 +r6760045-n244243 +r6760045-n310809 +r6760045-n325382 +r6760045-n341098 +r6760045-n386398 +r6760045-n392209 +r6760045-n43543 +r6760045-n468303 +r6760045-n598065 +r6760045-n642321 +r6760045-n642649 +r6760045-n678155 +r6760045-n680758 +r6760045-n685852 +r6760045-n830961 +r6760045-n851693 +r6760045-n90554 +r6760045-n911952 +r6760045-n920226 +r6760045-n976057 +r697496-n146651 +r697496-n181711 +r697496-n244243 +r697496-n310809 +r697496-n325382 +r697496-n341098 +r697496-n392209 +r697496-n468303 +r697496-n598065 +r697496-n642321 +r697496-n642649 +r697496-n678155 +r697496-n680758 +r697496-n90554 +r697496-n920226 +r7217787-n146651 +r7217787-n181711 +r7217787-n244243 +r7217787-n310809 +r7217787-n325382 +r7217787-n341098 +r7217787-n386398 +r7217787-n392209 +r7217787-n43543 +r7217787-n468303 +r7217787-n598065 +r7217787-n642321 +r7217787-n642649 +r7217787-n678155 +r7217787-n680758 +r7217787-n685852 +r7217787-n830961 +r7217787-n851693 +r7217787-n90554 +r7217787-n911952 +r7217787-n920226 +r7217787-n976057 +r7343737-n146651 +r7343737-n181711 +r7343737-n244243 +r7343737-n310809 +r7343737-n325382 +r7343737-n341098 +r7343737-n386398 +r7343737-n392209 +r7343737-n43543 +r7343737-n468303 +r7343737-n598065 +r7343737-n642321 +r7343737-n642649 +r7343737-n678155 +r7343737-n680758 +r7343737-n685852 +r7343737-n830961 +r7343737-n851693 +r7343737-n90554 +r7343737-n911952 +r7343737-n920226 +r7343737-n976057 +r7831860-n181711 +r7831860-n325382 +r7831860-n642321 +r7831860-n678155 +r7831860-n680758 +r7831860-n90554 +r7831860-n920226 +r7839831-n146651 +r7839831-n181711 +r7839831-n244243 +r7839831-n310809 +r7839831-n325382 +r7839831-n341098 +r7839831-n392209 +r7839831-n468303 +r7839831-n598065 +r7839831-n642321 +r7839831-n642649 +r7839831-n678155 +r7839831-n680758 +r7839831-n90554 +r7839831-n920226 +r7952476-n146651 +r7952476-n181711 +r7952476-n244243 +r7952476-n310809 +r7952476-n325382 +r7952476-n341098 +r7952476-n392209 +r7952476-n468303 +r7952476-n598065 +r7952476-n642321 +r7952476-n642649 +r7952476-n678155 +r7952476-n680758 +r7952476-n90554 +r7952476-n920226 +r8015356-n181711 +r8015356-n325382 +r8015356-n386398 +r8015356-n642321 +r8015356-n678155 +r8015356-n680758 +r8015356-n851693 +r8015356-n90554 +r8015356-n911952 +r8015356-n920226 +r8015356-n976057 +r8062914-n134943 +r8062914-n136082 +r8062914-n139058 +r8062914-n208530 +r8062914-n440434 +r8062914-n442913 +r8062914-n572686 +r8062914-n588870 +r8062914-n691735 +r8062914-n818086 +r8062914-n935848 +r8212643-n146651 +r8212643-n181711 +r8212643-n244243 +r8212643-n310809 +r8212643-n325382 +r8212643-n341098 +r8212643-n392209 +r8212643-n468303 +r8212643-n598065 +r8212643-n642321 +r8212643-n642649 +r8212643-n678155 +r8212643-n680758 +r8212643-n90554 +r8212643-n920226 +r8333645-n146651 +r8333645-n181711 +r8333645-n244243 +r8333645-n310809 +r8333645-n325382 +r8333645-n341098 +r8333645-n386398 +r8333645-n392209 +r8333645-n43543 +r8333645-n468303 +r8333645-n598065 +r8333645-n642321 +r8333645-n642649 +r8333645-n678155 +r8333645-n680758 +r8333645-n685852 +r8333645-n830961 +r8333645-n851693 +r8333645-n90554 +r8333645-n911952 +r8333645-n920226 +r8333645-n976057 +r8579942-n136082 +r8579942-n139058 +r8579942-n208530 +r8586363-n172998 +r8595196-n134943 +r8595196-n440434 +r8595196-n442913 +r8595196-n572686 +r8595196-n588870 +r8595196-n691735 +r8595196-n818086 +r8595196-n935848 +r8607415-n181711 +r8607415-n325382 +r8607415-n386398 +r8607415-n642321 +r8607415-n678155 +r8607415-n680758 +r8607415-n851693 +r8607415-n90554 +r8607415-n911952 +r8607415-n920226 +r8607415-n976057 +r8642123-n181711 +r8642123-n325382 +r8642123-n386398 +r8642123-n642321 +r8642123-n678155 +r8642123-n680758 +r8642123-n851693 +r8642123-n90554 +r8642123-n911952 +r8642123-n920226 +r8642123-n976057 +r8792496-n181711 +r8792496-n325382 +r8792496-n642321 +r8792496-n678155 +r8792496-n680758 +r8792496-n90554 +r8792496-n920226 +r8918301-n181711 +r8918301-n325382 +r8918301-n642321 +r8918301-n678155 +r8918301-n680758 +r8918301-n90554 +r8918301-n920226 +r8937440-n146651 +r8937440-n181711 +r8937440-n244243 +r8937440-n310809 +r8937440-n325382 +r8937440-n341098 +r8937440-n386398 +r8937440-n392209 +r8937440-n43543 +r8937440-n468303 +r8937440-n598065 +r8937440-n642321 +r8937440-n678155 +r8937440-n680758 +r8937440-n685852 +r8937440-n830961 +r8937440-n851693 +r8937440-n90554 +r8937440-n911952 +r8937440-n920226 +r8937440-n976057 +r8939293-n134943 +r8939293-n136082 +r8939293-n139058 +r8939293-n208530 +r8939293-n440434 +r8939293-n442913 +r8939293-n572686 +r8939293-n588870 +r8939293-n691735 +r8939293-n818086 +r8939293-n935848 +r9021574-n146651 +r9021574-n181711 +r9021574-n244243 +r9021574-n310809 +r9021574-n325382 +r9021574-n341098 +r9021574-n392209 +r9021574-n468303 +r9021574-n598065 +r9021574-n642321 +r9021574-n642649 +r9021574-n678155 +r9021574-n680758 +r9021574-n90554 +r9021574-n920226 +r9026042-n146651 +r9026042-n181711 +r9026042-n244243 +r9026042-n310809 +r9026042-n325382 +r9026042-n341098 +r9026042-n392209 +r9026042-n468303 +r9026042-n598065 +r9026042-n642321 +r9026042-n642649 +r9026042-n678155 +r9026042-n680758 +r9026042-n90554 +r9026042-n920226 +r9040233-n181711 +r9040233-n325382 +r9040233-n386398 +r9040233-n642321 +r9040233-n678155 +r9040233-n680758 +r9040233-n851693 +r9040233-n90554 +r9040233-n911952 +r9040233-n920226 +r9040233-n976057 +r9102715-n146651 +r9102715-n181711 +r9102715-n244243 +r9102715-n310809 +r9102715-n325382 +r9102715-n341098 +r9102715-n386398 +r9102715-n392209 +r9102715-n43543 +r9102715-n468303 +r9102715-n598065 +r9102715-n642321 +r9102715-n642649 +r9102715-n678155 +r9102715-n680758 +r9102715-n685852 +r9102715-n830961 +r9102715-n851693 +r9102715-n90554 +r9102715-n911952 +r9102715-n920226 +r9102715-n976057 +r9113711-n181711 +r9113711-n325382 +r9113711-n642321 +r9113711-n678155 +r9113711-n680758 +r9113711-n90554 +r9113711-n920226 +r9115114-n172998 +r9175025-n386398 +r9175025-n851693 +r9175025-n911952 +r9175025-n976057 +r9189566-n146651 +r9189566-n181711 +r9189566-n244243 +r9189566-n310809 +r9189566-n325382 +r9189566-n341098 +r9189566-n386398 +r9189566-n392209 +r9189566-n43543 +r9189566-n468303 +r9189566-n598065 +r9189566-n642321 +r9189566-n642649 +r9189566-n678155 +r9189566-n680758 +r9189566-n685852 +r9189566-n830961 +r9189566-n851693 +r9189566-n90554 +r9189566-n911952 +r9189566-n920226 +r9189566-n976057 +r9192091-n146651 +r9192091-n181711 +r9192091-n244243 +r9192091-n310809 +r9192091-n325382 +r9192091-n341098 +r9192091-n386398 +r9192091-n392209 +r9192091-n43543 +r9192091-n468303 +r9192091-n598065 +r9192091-n642321 +r9192091-n642649 +r9192091-n678155 +r9192091-n680758 +r9192091-n685852 +r9192091-n830961 +r9192091-n851693 +r9192091-n90554 +r9192091-n911952 +r9192091-n920226 +r9192091-n976057 +r9273661-n146651 +r9273661-n181711 +r9273661-n244243 +r9273661-n310809 +r9273661-n325382 +r9273661-n341098 +r9273661-n392209 +r9273661-n468303 +r9273661-n598065 +r9273661-n642321 +r9273661-n642649 +r9273661-n678155 +r9273661-n680758 +r9273661-n90554 +r9273661-n920226 +r9352821-n146651 +r9352821-n181711 +r9352821-n244243 +r9352821-n310809 +r9352821-n325382 +r9352821-n341098 +r9352821-n386398 +r9352821-n392209 +r9352821-n43543 +r9352821-n468303 +r9352821-n598065 +r9352821-n642321 +r9352821-n642649 +r9352821-n678155 +r9352821-n680758 +r9352821-n685852 +r9352821-n830961 +r9352821-n851693 +r9352821-n90554 +r9352821-n911952 +r9352821-n920226 +r9352821-n976057 +r9366523-n134943 +r9366523-n440434 +r9366523-n442913 +r9366523-n572686 +r9366523-n588870 +r9366523-n691735 +r9366523-n818086 +r9366523-n935848 +r9535192-n386398 +r9535192-n851693 +r9535192-n911952 +r9535192-n976057 +r9541411-n172998 +r9555635-n134943 +r9555635-n136082 +r9555635-n139058 +r9555635-n208530 +r9555635-n440434 +r9555635-n442913 +r9555635-n572686 +r9555635-n588870 +r9555635-n691735 +r9555635-n818086 +r9555635-n935848 +r9720335-n181711 +r9720335-n325382 +r9720335-n386398 +r9720335-n642321 +r9720335-n678155 +r9720335-n680758 +r9720335-n851693 +r9720335-n90554 +r9720335-n911952 +r9720335-n920226 +r9720335-n976057 +r9757054-n146651 +r9757054-n181711 +r9757054-n244243 +r9757054-n310809 +r9757054-n325382 +r9757054-n341098 +r9757054-n392209 +r9757054-n468303 +r9757054-n598065 +r9757054-n642321 +r9757054-n642649 +r9757054-n678155 +r9757054-n680758 +r9757054-n90554 +r9757054-n920226 +r9836048-n172998 -- GitLab From 8578523da7e4fa062bcde5e5aa960a1e9e014fcc Mon Sep 17 00:00:00 2001 From: Wes Brewer Date: Thu, 24 Jul 2025 09:42:42 -0400 Subject: [PATCH 179/388] Split nodelist.txt into cpu_nodes.txt and gpu_nodes.txt --- .../{nodelist.txt => cpu_nodes.txt} | 228 ------------------ raps/dataloaders/mit_supercloud/gpu_nodes.txt | 228 ++++++++++++++++++ raps/dataloaders/mit_supercloud/loader.py | 80 +++--- 3 files changed, 272 insertions(+), 264 deletions(-) rename raps/dataloaders/mit_supercloud/{nodelist.txt => cpu_nodes.txt} (79%) create mode 100644 raps/dataloaders/mit_supercloud/gpu_nodes.txt diff --git a/raps/dataloaders/mit_supercloud/nodelist.txt b/raps/dataloaders/mit_supercloud/cpu_nodes.txt similarity index 79% rename from raps/dataloaders/mit_supercloud/nodelist.txt rename to raps/dataloaders/mit_supercloud/cpu_nodes.txt index 3056876..2ce4a6e 100644 --- a/raps/dataloaders/mit_supercloud/nodelist.txt +++ b/raps/dataloaders/mit_supercloud/cpu_nodes.txt @@ -39,37 +39,26 @@ r1416152-n818086 r1416152-n935848 r1457839-n181711 r1457839-n325382 -r1457839-n386398 r1457839-n642321 r1457839-n678155 r1457839-n680758 -r1457839-n851693 r1457839-n90554 -r1457839-n911952 r1457839-n920226 -r1457839-n976057 r1485405-n146651 r1485405-n181711 r1485405-n244243 r1485405-n310809 r1485405-n325382 r1485405-n341098 -r1485405-n386398 r1485405-n392209 -r1485405-n43543 r1485405-n468303 r1485405-n598065 r1485405-n642321 r1485405-n642649 r1485405-n678155 r1485405-n680758 -r1485405-n685852 -r1485405-n830961 -r1485405-n851693 r1485405-n90554 -r1485405-n911952 r1485405-n920226 -r1485405-n976057 r1642813-n134943 r1642813-n440434 r1642813-n442913 @@ -84,22 +73,15 @@ r1682297-n244243 r1682297-n310809 r1682297-n325382 r1682297-n341098 -r1682297-n386398 r1682297-n392209 -r1682297-n43543 r1682297-n468303 r1682297-n598065 r1682297-n642321 r1682297-n642649 r1682297-n678155 r1682297-n680758 -r1682297-n685852 -r1682297-n830961 -r1682297-n851693 r1682297-n90554 -r1682297-n911952 r1682297-n920226 -r1682297-n976057 r189256-n146651 r189256-n181711 r189256-n244243 @@ -139,15 +121,11 @@ r2086368-n90554 r2086368-n920226 r2100214-n181711 r2100214-n325382 -r2100214-n386398 r2100214-n642321 r2100214-n678155 r2100214-n680758 -r2100214-n851693 r2100214-n90554 -r2100214-n911952 r2100214-n920226 -r2100214-n976057 r2159346-n134943 r2159346-n440434 r2159346-n442913 @@ -166,15 +144,11 @@ r2501111-n818086 r2501111-n935848 r2582019-n181711 r2582019-n325382 -r2582019-n386398 r2582019-n642321 r2582019-n678155 r2582019-n680758 -r2582019-n851693 r2582019-n90554 -r2582019-n911952 r2582019-n920226 -r2582019-n976057 r2627558-n172998 r2652301-n146651 r2652301-n181711 @@ -182,26 +156,16 @@ r2652301-n244243 r2652301-n310809 r2652301-n325382 r2652301-n341098 -r2652301-n386398 r2652301-n392209 -r2652301-n43543 r2652301-n468303 r2652301-n598065 r2652301-n642321 r2652301-n642649 r2652301-n678155 r2652301-n680758 -r2652301-n685852 -r2652301-n830961 -r2652301-n851693 r2652301-n90554 -r2652301-n911952 r2652301-n920226 -r2652301-n976057 r2825489-n134943 -r2825489-n136082 -r2825489-n139058 -r2825489-n208530 r2825489-n440434 r2825489-n442913 r2825489-n572686 @@ -225,9 +189,6 @@ r29114-n680758 r29114-n90554 r29114-n920226 r2998125-n134943 -r2998125-n136082 -r2998125-n139058 -r2998125-n208530 r2998125-n440434 r2998125-n442913 r2998125-n572686 @@ -248,27 +209,17 @@ r3041626-n244243 r3041626-n310809 r3041626-n325382 r3041626-n341098 -r3041626-n386398 r3041626-n392209 -r3041626-n43543 r3041626-n468303 r3041626-n598065 r3041626-n642321 r3041626-n642649 r3041626-n678155 r3041626-n680758 -r3041626-n685852 -r3041626-n830961 -r3041626-n851693 r3041626-n90554 -r3041626-n911952 r3041626-n920226 -r3041626-n976057 r3045754-n48252 r3117156-n134943 -r3117156-n136082 -r3117156-n139058 -r3117156-n208530 r3117156-n440434 r3117156-n442913 r3117156-n572686 @@ -276,7 +227,6 @@ r3117156-n588870 r3117156-n691735 r3117156-n818086 r3117156-n935848 -r3210026-n172998 r322031-n134943 r322031-n440434 r322031-n442913 @@ -291,22 +241,15 @@ r3226521-n244243 r3226521-n310809 r3226521-n325382 r3226521-n341098 -r3226521-n386398 r3226521-n392209 -r3226521-n43543 r3226521-n468303 r3226521-n598065 r3226521-n642321 r3226521-n642649 r3226521-n678155 r3226521-n680758 -r3226521-n685852 -r3226521-n830961 -r3226521-n851693 r3226521-n90554 -r3226521-n911952 r3226521-n920226 -r3226521-n976057 r3236768-n172998 r3254677-n181711 r3254677-n325382 @@ -315,14 +258,7 @@ r3254677-n678155 r3254677-n680758 r3254677-n90554 r3254677-n920226 -r3386633-n172998 -r3405251-n136082 -r3405251-n139058 -r3405251-n208530 r3475376-n134943 -r3475376-n136082 -r3475376-n139058 -r3475376-n208530 r3475376-n440434 r3475376-n442913 r3475376-n572686 @@ -359,22 +295,15 @@ r3741709-n244243 r3741709-n310809 r3741709-n325382 r3741709-n341098 -r3741709-n386398 r3741709-n392209 -r3741709-n43543 r3741709-n468303 r3741709-n598065 r3741709-n642321 r3741709-n642649 r3741709-n678155 r3741709-n680758 -r3741709-n685852 -r3741709-n830961 -r3741709-n851693 r3741709-n90554 -r3741709-n911952 r3741709-n920226 -r3741709-n976057 r3824475-n146651 r3824475-n181711 r3824475-n244243 @@ -391,9 +320,6 @@ r3824475-n680758 r3824475-n90554 r3824475-n920226 r3879907-n134943 -r3879907-n136082 -r3879907-n139058 -r3879907-n208530 r3879907-n440434 r3879907-n442913 r3879907-n572686 @@ -418,26 +344,18 @@ r4153679-n818086 r4153679-n935848 r4179716-n181711 r4179716-n325382 -r4179716-n386398 r4179716-n642321 r4179716-n678155 r4179716-n680758 -r4179716-n851693 r4179716-n90554 -r4179716-n911952 r4179716-n920226 -r4179716-n976057 r4229531-n181711 r4229531-n325382 -r4229531-n386398 r4229531-n642321 r4229531-n678155 r4229531-n680758 -r4229531-n851693 r4229531-n90554 -r4229531-n911952 r4229531-n920226 -r4229531-n976057 r4247208-n146651 r4247208-n181711 r4247208-n244243 @@ -454,9 +372,6 @@ r4247208-n680758 r4247208-n90554 r4247208-n920226 r4327055-n134943 -r4327055-n136082 -r4327055-n139058 -r4327055-n208530 r4327055-n440434 r4327055-n442913 r4327055-n572686 @@ -472,11 +387,7 @@ r4357125-n588870 r4357125-n691735 r4357125-n818086 r4357125-n935848 -r4774426-n172998 r4822976-n134943 -r4822976-n136082 -r4822976-n139058 -r4822976-n208530 r4822976-n440434 r4822976-n442913 r4822976-n572686 @@ -490,22 +401,15 @@ r4858666-n244243 r4858666-n310809 r4858666-n325382 r4858666-n341098 -r4858666-n386398 r4858666-n392209 -r4858666-n43543 r4858666-n468303 r4858666-n598065 r4858666-n642321 r4858666-n642649 r4858666-n678155 r4858666-n680758 -r4858666-n685852 -r4858666-n830961 -r4858666-n851693 r4858666-n90554 -r4858666-n911952 r4858666-n920226 -r4858666-n976057 r4874959-n181711 r4874959-n325382 r4874959-n642321 @@ -522,9 +426,6 @@ r4990664-n691735 r4990664-n818086 r4990664-n935848 r5130449-n134943 -r5130449-n136082 -r5130449-n139058 -r5130449-n208530 r5130449-n440434 r5130449-n442913 r5130449-n572686 @@ -538,22 +439,15 @@ r5189505-n244243 r5189505-n310809 r5189505-n325382 r5189505-n341098 -r5189505-n386398 r5189505-n392209 -r5189505-n43543 r5189505-n468303 r5189505-n598065 r5189505-n642321 r5189505-n642649 r5189505-n678155 r5189505-n680758 -r5189505-n685852 -r5189505-n830961 -r5189505-n851693 r5189505-n90554 -r5189505-n911952 r5189505-n920226 -r5189505-n976057 r5261712-n134943 r5261712-n440434 r5261712-n442913 @@ -564,19 +458,12 @@ r5261712-n818086 r5261712-n935848 r5573787-n181711 r5573787-n325382 -r5573787-n386398 r5573787-n642321 r5573787-n678155 r5573787-n680758 -r5573787-n851693 r5573787-n90554 -r5573787-n911952 r5573787-n920226 -r5573787-n976057 r5715171-n134943 -r5715171-n136082 -r5715171-n139058 -r5715171-n208530 r5715171-n440434 r5715171-n442913 r5715171-n572686 @@ -593,37 +480,26 @@ r6102167-n90554 r6102167-n920226 r6272977-n181711 r6272977-n325382 -r6272977-n386398 r6272977-n642321 r6272977-n678155 r6272977-n680758 -r6272977-n851693 r6272977-n90554 -r6272977-n911952 r6272977-n920226 -r6272977-n976057 r629115-n146651 r629115-n181711 r629115-n244243 r629115-n310809 r629115-n325382 r629115-n341098 -r629115-n386398 r629115-n392209 -r629115-n43543 r629115-n468303 r629115-n598065 r629115-n642321 r629115-n642649 r629115-n678155 r629115-n680758 -r629115-n685852 -r629115-n830961 -r629115-n851693 r629115-n90554 -r629115-n911952 r629115-n920226 -r629115-n976057 r6341586-n146651 r6341586-n181711 r6341586-n244243 @@ -675,22 +551,15 @@ r6760045-n244243 r6760045-n310809 r6760045-n325382 r6760045-n341098 -r6760045-n386398 r6760045-n392209 -r6760045-n43543 r6760045-n468303 r6760045-n598065 r6760045-n642321 r6760045-n642649 r6760045-n678155 r6760045-n680758 -r6760045-n685852 -r6760045-n830961 -r6760045-n851693 r6760045-n90554 -r6760045-n911952 r6760045-n920226 -r6760045-n976057 r697496-n146651 r697496-n181711 r697496-n244243 @@ -712,44 +581,30 @@ r7217787-n244243 r7217787-n310809 r7217787-n325382 r7217787-n341098 -r7217787-n386398 r7217787-n392209 -r7217787-n43543 r7217787-n468303 r7217787-n598065 r7217787-n642321 r7217787-n642649 r7217787-n678155 r7217787-n680758 -r7217787-n685852 -r7217787-n830961 -r7217787-n851693 r7217787-n90554 -r7217787-n911952 r7217787-n920226 -r7217787-n976057 r7343737-n146651 r7343737-n181711 r7343737-n244243 r7343737-n310809 r7343737-n325382 r7343737-n341098 -r7343737-n386398 r7343737-n392209 -r7343737-n43543 r7343737-n468303 r7343737-n598065 r7343737-n642321 r7343737-n642649 r7343737-n678155 r7343737-n680758 -r7343737-n685852 -r7343737-n830961 -r7343737-n851693 r7343737-n90554 -r7343737-n911952 r7343737-n920226 -r7343737-n976057 r7831860-n181711 r7831860-n325382 r7831860-n642321 @@ -789,19 +644,12 @@ r7952476-n90554 r7952476-n920226 r8015356-n181711 r8015356-n325382 -r8015356-n386398 r8015356-n642321 r8015356-n678155 r8015356-n680758 -r8015356-n851693 r8015356-n90554 -r8015356-n911952 r8015356-n920226 -r8015356-n976057 r8062914-n134943 -r8062914-n136082 -r8062914-n139058 -r8062914-n208530 r8062914-n440434 r8062914-n442913 r8062914-n572686 @@ -830,26 +678,15 @@ r8333645-n244243 r8333645-n310809 r8333645-n325382 r8333645-n341098 -r8333645-n386398 r8333645-n392209 -r8333645-n43543 r8333645-n468303 r8333645-n598065 r8333645-n642321 r8333645-n642649 r8333645-n678155 r8333645-n680758 -r8333645-n685852 -r8333645-n830961 -r8333645-n851693 r8333645-n90554 -r8333645-n911952 r8333645-n920226 -r8333645-n976057 -r8579942-n136082 -r8579942-n139058 -r8579942-n208530 -r8586363-n172998 r8595196-n134943 r8595196-n440434 r8595196-n442913 @@ -860,26 +697,18 @@ r8595196-n818086 r8595196-n935848 r8607415-n181711 r8607415-n325382 -r8607415-n386398 r8607415-n642321 r8607415-n678155 r8607415-n680758 -r8607415-n851693 r8607415-n90554 -r8607415-n911952 r8607415-n920226 -r8607415-n976057 r8642123-n181711 r8642123-n325382 -r8642123-n386398 r8642123-n642321 r8642123-n678155 r8642123-n680758 -r8642123-n851693 r8642123-n90554 -r8642123-n911952 r8642123-n920226 -r8642123-n976057 r8792496-n181711 r8792496-n325382 r8792496-n642321 @@ -900,25 +729,15 @@ r8937440-n244243 r8937440-n310809 r8937440-n325382 r8937440-n341098 -r8937440-n386398 r8937440-n392209 -r8937440-n43543 r8937440-n468303 r8937440-n598065 r8937440-n642321 r8937440-n678155 r8937440-n680758 -r8937440-n685852 -r8937440-n830961 -r8937440-n851693 r8937440-n90554 -r8937440-n911952 r8937440-n920226 -r8937440-n976057 r8939293-n134943 -r8939293-n136082 -r8939293-n139058 -r8939293-n208530 r8939293-n440434 r8939293-n442913 r8939293-n572686 @@ -958,37 +777,26 @@ r9026042-n90554 r9026042-n920226 r9040233-n181711 r9040233-n325382 -r9040233-n386398 r9040233-n642321 r9040233-n678155 r9040233-n680758 -r9040233-n851693 r9040233-n90554 -r9040233-n911952 r9040233-n920226 -r9040233-n976057 r9102715-n146651 r9102715-n181711 r9102715-n244243 r9102715-n310809 r9102715-n325382 r9102715-n341098 -r9102715-n386398 r9102715-n392209 -r9102715-n43543 r9102715-n468303 r9102715-n598065 r9102715-n642321 r9102715-n642649 r9102715-n678155 r9102715-n680758 -r9102715-n685852 -r9102715-n830961 -r9102715-n851693 r9102715-n90554 -r9102715-n911952 r9102715-n920226 -r9102715-n976057 r9113711-n181711 r9113711-n325382 r9113711-n642321 @@ -997,54 +805,36 @@ r9113711-n680758 r9113711-n90554 r9113711-n920226 r9115114-n172998 -r9175025-n386398 -r9175025-n851693 -r9175025-n911952 -r9175025-n976057 r9189566-n146651 r9189566-n181711 r9189566-n244243 r9189566-n310809 r9189566-n325382 r9189566-n341098 -r9189566-n386398 r9189566-n392209 -r9189566-n43543 r9189566-n468303 r9189566-n598065 r9189566-n642321 r9189566-n642649 r9189566-n678155 r9189566-n680758 -r9189566-n685852 -r9189566-n830961 -r9189566-n851693 r9189566-n90554 -r9189566-n911952 r9189566-n920226 -r9189566-n976057 r9192091-n146651 r9192091-n181711 r9192091-n244243 r9192091-n310809 r9192091-n325382 r9192091-n341098 -r9192091-n386398 r9192091-n392209 -r9192091-n43543 r9192091-n468303 r9192091-n598065 r9192091-n642321 r9192091-n642649 r9192091-n678155 r9192091-n680758 -r9192091-n685852 -r9192091-n830961 -r9192091-n851693 r9192091-n90554 -r9192091-n911952 r9192091-n920226 -r9192091-n976057 r9273661-n146651 r9273661-n181711 r9273661-n244243 @@ -1066,22 +856,15 @@ r9352821-n244243 r9352821-n310809 r9352821-n325382 r9352821-n341098 -r9352821-n386398 r9352821-n392209 -r9352821-n43543 r9352821-n468303 r9352821-n598065 r9352821-n642321 r9352821-n642649 r9352821-n678155 r9352821-n680758 -r9352821-n685852 -r9352821-n830961 -r9352821-n851693 r9352821-n90554 -r9352821-n911952 r9352821-n920226 -r9352821-n976057 r9366523-n134943 r9366523-n440434 r9366523-n442913 @@ -1090,15 +873,8 @@ r9366523-n588870 r9366523-n691735 r9366523-n818086 r9366523-n935848 -r9535192-n386398 -r9535192-n851693 -r9535192-n911952 -r9535192-n976057 r9541411-n172998 r9555635-n134943 -r9555635-n136082 -r9555635-n139058 -r9555635-n208530 r9555635-n440434 r9555635-n442913 r9555635-n572686 @@ -1108,15 +884,11 @@ r9555635-n818086 r9555635-n935848 r9720335-n181711 r9720335-n325382 -r9720335-n386398 r9720335-n642321 r9720335-n678155 r9720335-n680758 -r9720335-n851693 r9720335-n90554 -r9720335-n911952 r9720335-n920226 -r9720335-n976057 r9757054-n146651 r9757054-n181711 r9757054-n244243 diff --git a/raps/dataloaders/mit_supercloud/gpu_nodes.txt b/raps/dataloaders/mit_supercloud/gpu_nodes.txt new file mode 100644 index 0000000..d4c14a4 --- /dev/null +++ b/raps/dataloaders/mit_supercloud/gpu_nodes.txt @@ -0,0 +1,228 @@ +r1457839-n386398 +r1457839-n851693 +r1457839-n911952 +r1457839-n976057 +r1485405-n386398 +r1485405-n43543 +r1485405-n685852 +r1485405-n830961 +r1485405-n851693 +r1485405-n911952 +r1485405-n976057 +r1682297-n386398 +r1682297-n43543 +r1682297-n685852 +r1682297-n830961 +r1682297-n851693 +r1682297-n911952 +r1682297-n976057 +r2100214-n386398 +r2100214-n851693 +r2100214-n911952 +r2100214-n976057 +r2582019-n386398 +r2582019-n851693 +r2582019-n911952 +r2582019-n976057 +r2652301-n386398 +r2652301-n43543 +r2652301-n685852 +r2652301-n830961 +r2652301-n851693 +r2652301-n911952 +r2652301-n976057 +r2825489-n136082 +r2825489-n139058 +r2825489-n208530 +r2998125-n136082 +r2998125-n139058 +r2998125-n208530 +r3041626-n386398 +r3041626-n43543 +r3041626-n685852 +r3041626-n830961 +r3041626-n851693 +r3041626-n911952 +r3041626-n976057 +r3117156-n136082 +r3117156-n139058 +r3117156-n208530 +r3210026-n172998 +r3226521-n386398 +r3226521-n43543 +r3226521-n685852 +r3226521-n830961 +r3226521-n851693 +r3226521-n911952 +r3226521-n976057 +r3386633-n172998 +r3405251-n136082 +r3405251-n139058 +r3405251-n208530 +r3475376-n136082 +r3475376-n139058 +r3475376-n208530 +r3741709-n386398 +r3741709-n43543 +r3741709-n685852 +r3741709-n830961 +r3741709-n851693 +r3741709-n911952 +r3741709-n976057 +r3879907-n136082 +r3879907-n139058 +r3879907-n208530 +r4179716-n386398 +r4179716-n851693 +r4179716-n911952 +r4179716-n976057 +r4229531-n386398 +r4229531-n851693 +r4229531-n911952 +r4229531-n976057 +r4327055-n136082 +r4327055-n139058 +r4327055-n208530 +r4774426-n172998 +r4822976-n136082 +r4822976-n139058 +r4822976-n208530 +r4858666-n386398 +r4858666-n43543 +r4858666-n685852 +r4858666-n830961 +r4858666-n851693 +r4858666-n911952 +r4858666-n976057 +r5130449-n136082 +r5130449-n139058 +r5130449-n208530 +r5189505-n386398 +r5189505-n43543 +r5189505-n685852 +r5189505-n830961 +r5189505-n851693 +r5189505-n911952 +r5189505-n976057 +r5573787-n386398 +r5573787-n851693 +r5573787-n911952 +r5573787-n976057 +r5715171-n136082 +r5715171-n139058 +r5715171-n208530 +r6272977-n386398 +r6272977-n851693 +r6272977-n911952 +r6272977-n976057 +r629115-n386398 +r629115-n43543 +r629115-n685852 +r629115-n830961 +r629115-n851693 +r629115-n911952 +r629115-n976057 +r6760045-n386398 +r6760045-n43543 +r6760045-n685852 +r6760045-n830961 +r6760045-n851693 +r6760045-n911952 +r6760045-n976057 +r7217787-n386398 +r7217787-n43543 +r7217787-n685852 +r7217787-n830961 +r7217787-n851693 +r7217787-n911952 +r7217787-n976057 +r7343737-n386398 +r7343737-n43543 +r7343737-n685852 +r7343737-n830961 +r7343737-n851693 +r7343737-n911952 +r7343737-n976057 +r8015356-n386398 +r8015356-n851693 +r8015356-n911952 +r8015356-n976057 +r8062914-n136082 +r8062914-n139058 +r8062914-n208530 +r8333645-n386398 +r8333645-n43543 +r8333645-n685852 +r8333645-n830961 +r8333645-n851693 +r8333645-n911952 +r8333645-n976057 +r8579942-n136082 +r8579942-n139058 +r8579942-n208530 +r8586363-n172998 +r8607415-n386398 +r8607415-n851693 +r8607415-n911952 +r8607415-n976057 +r8642123-n386398 +r8642123-n851693 +r8642123-n911952 +r8642123-n976057 +r8937440-n386398 +r8937440-n43543 +r8937440-n685852 +r8937440-n830961 +r8937440-n851693 +r8937440-n911952 +r8937440-n976057 +r8939293-n136082 +r8939293-n139058 +r8939293-n208530 +r9040233-n386398 +r9040233-n851693 +r9040233-n911952 +r9040233-n976057 +r9102715-n386398 +r9102715-n43543 +r9102715-n685852 +r9102715-n830961 +r9102715-n851693 +r9102715-n911952 +r9102715-n976057 +r9175025-n386398 +r9175025-n851693 +r9175025-n911952 +r9175025-n976057 +r9189566-n386398 +r9189566-n43543 +r9189566-n685852 +r9189566-n830961 +r9189566-n851693 +r9189566-n911952 +r9189566-n976057 +r9192091-n386398 +r9192091-n43543 +r9192091-n685852 +r9192091-n830961 +r9192091-n851693 +r9192091-n911952 +r9192091-n976057 +r9352821-n386398 +r9352821-n43543 +r9352821-n685852 +r9352821-n830961 +r9352821-n851693 +r9352821-n911952 +r9352821-n976057 +r9535192-n386398 +r9535192-n851693 +r9535192-n911952 +r9535192-n976057 +r9555635-n136082 +r9555635-n139058 +r9555635-n208530 +r9720335-n386398 +r9720335-n851693 +r9720335-n911952 +r9720335-n976057 diff --git a/raps/dataloaders/mit_supercloud/loader.py b/raps/dataloaders/mit_supercloud/loader.py index 0225ea1..736a6ce 100644 --- a/raps/dataloaders/mit_supercloud/loader.py +++ b/raps/dataloaders/mit_supercloud/loader.py @@ -1,7 +1,7 @@ #!/usr/bin/env python3 # -*- coding: utf-8 -*- """ -MIT Supercloud job trace processing module with load_data function. +MIT Supercloud data loader """ import ast @@ -9,18 +9,17 @@ import os import math import pandas as pd import re -from typing import Dict, Union, Optional -from datetime import datetime -from types import SimpleNamespace from tqdm import tqdm +from types import SimpleNamespace +from typing import Dict, Union, Optional + from raps.job import job_dict, Job from .utils import proc_cpu_series, proc_gpu_series, to_epoch from .utils import DEFAULT_START, DEFAULT_END from .utils import validate_job_traces -# Default SLURM TRES id→name map (extend as needed) -DEFAULT_TRES_ID_MAP = { +TRES_ID_MAP = { 1: "cpu", 2: "mem", # in MB 3: "energy", @@ -41,7 +40,7 @@ def parse_tres_alloc(tres_str: Union[str, None], The raw TRES string from Slurm (quotes OK). If None/empty returns {}. id_map : dict[int,str] | None Optional mapping from TRES numeric IDs to friendly names. - Falls back to DEFAULT_TRES_ID_MAP if not provided. + Falls back to TRES_ID_MAP if not provided. return_ids : bool If True, keys are the numeric IDs. If False, keys use id_map names (falls back to the numeric ID as a string if unknown). @@ -55,7 +54,7 @@ def parse_tres_alloc(tres_str: Union[str, None], if not tres_str: return {} - id_map = id_map or DEFAULT_TRES_ID_MAP + id_map = id_map or TRES_ID_MAP # strip quotes or whitespace tres_str = tres_str.strip().strip('"').strip("'") @@ -100,7 +99,7 @@ def load_data(local_dataset_path, **kwargs): raise ValueError("Expect exactly one path") local_dataset_path = local_dataset_path[0] - # 1) slurm log -> DataFrame + # slurm log -> DataFrame slurm_path = None for root, _, files in os.walk(local_dataset_path): if "slurm-log.csv" in files: @@ -114,13 +113,7 @@ def load_data(local_dataset_path, **kwargs): sl = pd.read_csv(slurm_path) sl["__line__"] = sl.index + 2 - # Read the full node list into a Python list and build lookup from hostname → index - NL_PATH = os.path.join(os.path.dirname(__file__), "nodelist.txt") - with open(NL_PATH) as f: - all_nodes = [line.strip() for line in f if line.strip()] - node_to_idx = {host: idx for idx, host in enumerate(all_nodes)} - - # 2) date window + # date window start_ts = to_epoch(kwargs.get("start", DEFAULT_START)) end_ts = to_epoch(kwargs.get("end", DEFAULT_END)) #duration = end_ts - start_ts @@ -137,7 +130,7 @@ def load_data(local_dataset_path, **kwargs): f"{kwargs.get('end_date')}. Please pick a range covered by the dataset." ) - # 3) detect GPU‐using jobs + # detect GPU‐using jobs gres = sl.gres_used.fillna("").astype(str) tres = sl.tres_alloc.fillna("").astype(str) @@ -147,11 +140,22 @@ def load_data(local_dataset_path, **kwargs): "id_job" ]) - # 4) partition mode + # partition mode part = kwargs.get("partition","").split("/")[-1].lower() cpu_only = (part=="part-cpu") mixed = (part=="part-gpu") + # create nodelist mapping + NL_PATH = os.path.dirname(__file__) + if cpu_only: + with open(os.path.join(NL_PATH, "cpu_nodes.txt")) as f: + cpu_nodes = [l.strip() for l in f if l.strip()] + cpu_node_to_idx = {h: i for i, h in enumerate(cpu_nodes)} + else: # cpu + gpu + with open(os.path.join(NL_PATH, "gpu_nodes.txt")) as f: + gpu_nodes = [l.strip() for l in f if l.strip()] + gpu_node_to_idx = {h: i for i, h in enumerate(gpu_nodes)} + if cpu_only: job_ids = set(sl.id_job) - gpu_jobs elif mixed: @@ -161,7 +165,7 @@ def load_data(local_dataset_path, **kwargs): print(f"→ mode={part}, jobs: {len(job_ids)}") - # 5) find trace files by walking directories + # find trace files by walking directories cpu_files = [] cpu_root = os.path.join(data_root, "cpu") if os.path.exists(cpu_root): @@ -190,7 +194,7 @@ def load_data(local_dataset_path, **kwargs): except (ValueError, IndexError): continue - # 6) select final trace list + # select final trace list if cpu_only: traces = cpu_files elif mixed: @@ -214,7 +218,7 @@ def load_data(local_dataset_path, **kwargs): data = {} - # 8a) CPU first + # CPU first for fp in tqdm(cpu_files, desc="Loading CPU traces"): df = pd.read_csv(fp, dtype={0: str}) jid = int(os.path.basename(fp).split("-", 1)[0]) @@ -238,14 +242,14 @@ def load_data(local_dataset_path, **kwargs): else: tqdm.write(f"Reading CPU {os.path.basename(fp)} for Job ID: {jid} (No slurm info found)") - # Get allocated nodes "['r9189566-n911952','r9189567-n...']" raw = job_row.get("nodelist", "") - if raw: - hosts = ast.literal_eval(raw) - rec["scheduled_nodes"] = [ node_to_idx[h] for h in hosts ] + hosts = ast.literal_eval(raw) + + # Get allocated nodes "['r9189566-n911952','r9189567-n...']" + if cpu_only: + rec["scheduled_nodes"] = [cpu_node_to_idx[h] for h in hosts] else: - rec["scheduled_nodes"] = [] - #print("**", hosts, rec["scheduled_nodes"]) + rec["scheduled_nodes"] = [gpu_node_to_idx[h] for h in hosts] rec["nodes_alloc"] = int(job_row["nodes_alloc"]) rec["cpu"] = proc_cpu_series(df) @@ -281,7 +285,7 @@ def load_data(local_dataset_path, **kwargs): gpu_ser, gpu_cnt = proc_gpu_series(cpu_df, dfi, gpu_cnt) gpu_cnt = data[jid].get("gpu_cnt", 0) - prev_gpu = data[jid].get("gpu") # ← define prev_gpu here + prev_gpu = data[jid].get("gpu") gpu_ser, gpu_cnt = proc_gpu_series(cpu_df, dfi, gpu_cnt) if prev_gpu is None: data[jid]["gpu"] = gpu_ser @@ -300,22 +304,20 @@ def load_data(local_dataset_path, **kwargs): gpu_df = rec["gpu"] - # 1) grab all the gpu‐util columns + # grab all the gpu‐util columns util_cols = [c for c in gpu_df.columns if c.startswith("gpu_util_")] if not util_cols: # no gpu utilization columns? zero out rec["gpu_trace"] = [] else: - # 2) as floats in [0,1] + # as floats in [0,1] raw = gpu_df[util_cols].astype(float).div(100) - # 3) average (or sum) across devices - # if you want to SUM instead, use .sum(axis=1) + # average across devices avg_util = raw.mean(axis=1) - # 4) scale by number of nodes requested - #nodes = rec.get("nodes_alloc", 1) + # scale by number of nodes requested nodes = rec.get("nodes_alloc") rec["gpu_trace"] = (avg_util * nodes).tolist() @@ -334,13 +336,13 @@ def load_data(local_dataset_path, **kwargs): if miss: print(" jobs missing GPU despite being in gpu_files:", miss[:10]) - # 8) merge slurm metadata + # merge slurm metadata for _, row in sl.iterrows(): jid = row.id_job if jid in data and jid not in data[jid]: data[jid].update(row.to_dict()) - # 9) build final job_dicts + # build final job_dicts jobs_list = [] # Get CPUS_PER_NODE and GPUS_PER_NODE from config @@ -423,6 +425,12 @@ def load_data(local_dataset_path, **kwargs): trace_start_time = 0, trace_end_time = len(cpu_tr)*quanta ) + + view = job.copy() + view['cpu_trace'] = view['cpu_trace'][:5] + ['…'] + view['gpu_trace'] = view['gpu_trace'][:5] + ['…'] + print(view) + #validate_job_traces(Job(job), granularity=quanta) # if nr > 1: # uncomment to test multinode jobs - need to run for 24 hours to get enough jobs to populate jobs_list.append(job) -- GitLab From a7a5e053631e026448d53708649d4ae4769365dc Mon Sep 17 00:00:00 2001 From: Wes Brewer Date: Thu, 24 Jul 2025 11:40:54 -0400 Subject: [PATCH 180/388] Prune cpu_nodes.txt list to exactly 480 nodes (see docstring in loader.py) --- raps/dataloaders/mit_supercloud/cpu_nodes.txt | 427 ------------------ raps/dataloaders/mit_supercloud/loader.py | 36 ++ .../dataloaders/mit_supercloud/prune_list.txt | 14 + 3 files changed, 50 insertions(+), 427 deletions(-) create mode 100644 raps/dataloaders/mit_supercloud/prune_list.txt diff --git a/raps/dataloaders/mit_supercloud/cpu_nodes.txt b/raps/dataloaders/mit_supercloud/cpu_nodes.txt index 2ce4a6e..ff1c4b7 100644 --- a/raps/dataloaders/mit_supercloud/cpu_nodes.txt +++ b/raps/dataloaders/mit_supercloud/cpu_nodes.txt @@ -1,907 +1,480 @@ r1018283-n146651 r1018283-n181711 -r1018283-n244243 -r1018283-n310809 r1018283-n325382 -r1018283-n341098 r1018283-n392209 r1018283-n468303 r1018283-n598065 -r1018283-n642321 -r1018283-n642649 -r1018283-n678155 r1018283-n680758 -r1018283-n90554 r1018283-n920226 r1081020-n146651 r1081020-n181711 -r1081020-n244243 -r1081020-n310809 r1081020-n325382 -r1081020-n341098 r1081020-n392209 r1081020-n468303 r1081020-n598065 -r1081020-n642321 -r1081020-n642649 -r1081020-n678155 r1081020-n680758 -r1081020-n90554 r1081020-n920226 -r1356503-n172998 r1416152-n134943 r1416152-n440434 r1416152-n442913 -r1416152-n572686 -r1416152-n588870 r1416152-n691735 -r1416152-n818086 -r1416152-n935848 r1457839-n181711 r1457839-n325382 -r1457839-n642321 -r1457839-n678155 r1457839-n680758 -r1457839-n90554 r1457839-n920226 r1485405-n146651 r1485405-n181711 -r1485405-n244243 -r1485405-n310809 r1485405-n325382 -r1485405-n341098 r1485405-n392209 r1485405-n468303 r1485405-n598065 -r1485405-n642321 -r1485405-n642649 -r1485405-n678155 r1485405-n680758 -r1485405-n90554 r1485405-n920226 r1642813-n134943 r1642813-n440434 r1642813-n442913 -r1642813-n572686 -r1642813-n588870 r1642813-n691735 -r1642813-n818086 -r1642813-n935848 r1682297-n146651 r1682297-n181711 -r1682297-n244243 -r1682297-n310809 r1682297-n325382 -r1682297-n341098 r1682297-n392209 r1682297-n468303 r1682297-n598065 -r1682297-n642321 -r1682297-n642649 -r1682297-n678155 r1682297-n680758 -r1682297-n90554 r1682297-n920226 r189256-n146651 r189256-n181711 -r189256-n244243 -r189256-n310809 r189256-n325382 -r189256-n341098 r189256-n392209 r189256-n468303 r189256-n598065 -r189256-n642321 -r189256-n642649 -r189256-n678155 r189256-n680758 -r189256-n90554 r189256-n920226 r2008197-n181711 r2008197-n325382 -r2008197-n642321 -r2008197-n678155 r2008197-n680758 -r2008197-n90554 r2008197-n920226 r2086368-n146651 r2086368-n181711 -r2086368-n244243 -r2086368-n310809 r2086368-n325382 -r2086368-n341098 r2086368-n392209 r2086368-n468303 r2086368-n598065 -r2086368-n642321 -r2086368-n642649 -r2086368-n678155 r2086368-n680758 -r2086368-n90554 r2086368-n920226 r2100214-n181711 r2100214-n325382 -r2100214-n642321 -r2100214-n678155 r2100214-n680758 -r2100214-n90554 r2100214-n920226 r2159346-n134943 r2159346-n440434 r2159346-n442913 -r2159346-n572686 -r2159346-n588870 r2159346-n691735 -r2159346-n818086 -r2159346-n935848 r2501111-n134943 r2501111-n440434 r2501111-n442913 -r2501111-n572686 -r2501111-n588870 r2501111-n691735 -r2501111-n818086 -r2501111-n935848 r2582019-n181711 r2582019-n325382 -r2582019-n642321 -r2582019-n678155 r2582019-n680758 -r2582019-n90554 r2582019-n920226 -r2627558-n172998 r2652301-n146651 r2652301-n181711 -r2652301-n244243 -r2652301-n310809 r2652301-n325382 -r2652301-n341098 r2652301-n392209 r2652301-n468303 r2652301-n598065 -r2652301-n642321 -r2652301-n642649 -r2652301-n678155 r2652301-n680758 -r2652301-n90554 r2652301-n920226 r2825489-n134943 r2825489-n440434 r2825489-n442913 -r2825489-n572686 -r2825489-n588870 r2825489-n691735 -r2825489-n818086 -r2825489-n935848 r29114-n146651 r29114-n181711 -r29114-n244243 -r29114-n310809 r29114-n325382 -r29114-n341098 r29114-n392209 r29114-n468303 r29114-n598065 -r29114-n642321 -r29114-n642649 -r29114-n678155 r29114-n680758 -r29114-n90554 r29114-n920226 r2998125-n134943 r2998125-n440434 r2998125-n442913 -r2998125-n572686 -r2998125-n588870 r2998125-n691735 -r2998125-n818086 -r2998125-n935848 r3039576-n181711 r3039576-n325382 -r3039576-n642321 -r3039576-n678155 r3039576-n680758 -r3039576-n90554 r3039576-n920226 r3041626-n146651 r3041626-n181711 -r3041626-n244243 -r3041626-n310809 r3041626-n325382 -r3041626-n341098 r3041626-n392209 r3041626-n468303 r3041626-n598065 -r3041626-n642321 -r3041626-n642649 -r3041626-n678155 r3041626-n680758 -r3041626-n90554 r3041626-n920226 -r3045754-n48252 r3117156-n134943 r3117156-n440434 r3117156-n442913 -r3117156-n572686 -r3117156-n588870 r3117156-n691735 -r3117156-n818086 -r3117156-n935848 r322031-n134943 r322031-n440434 r322031-n442913 -r322031-n572686 -r322031-n588870 r322031-n691735 -r322031-n818086 -r322031-n935848 r3226521-n146651 r3226521-n181711 -r3226521-n244243 -r3226521-n310809 r3226521-n325382 -r3226521-n341098 r3226521-n392209 r3226521-n468303 r3226521-n598065 -r3226521-n642321 -r3226521-n642649 -r3226521-n678155 r3226521-n680758 -r3226521-n90554 r3226521-n920226 -r3236768-n172998 r3254677-n181711 r3254677-n325382 -r3254677-n642321 -r3254677-n678155 r3254677-n680758 -r3254677-n90554 r3254677-n920226 r3475376-n134943 r3475376-n440434 r3475376-n442913 -r3475376-n572686 -r3475376-n588870 r3475376-n691735 -r3475376-n818086 -r3475376-n935848 r3581284-n146651 r3581284-n181711 -r3581284-n244243 -r3581284-n310809 r3581284-n325382 -r3581284-n341098 r3581284-n392209 r3581284-n468303 r3581284-n598065 -r3581284-n642321 -r3581284-n642649 -r3581284-n678155 r3581284-n680758 -r3581284-n90554 r3581284-n920226 r3685766-n134943 r3685766-n440434 r3685766-n442913 -r3685766-n572686 -r3685766-n588870 r3685766-n691735 -r3685766-n818086 -r3685766-n935848 r3741709-n146651 r3741709-n181711 -r3741709-n244243 -r3741709-n310809 r3741709-n325382 -r3741709-n341098 r3741709-n392209 r3741709-n468303 r3741709-n598065 -r3741709-n642321 -r3741709-n642649 -r3741709-n678155 r3741709-n680758 -r3741709-n90554 r3741709-n920226 r3824475-n146651 r3824475-n181711 -r3824475-n244243 -r3824475-n310809 r3824475-n325382 -r3824475-n341098 r3824475-n392209 r3824475-n468303 r3824475-n598065 -r3824475-n642321 -r3824475-n642649 -r3824475-n678155 r3824475-n680758 -r3824475-n90554 r3824475-n920226 r3879907-n134943 r3879907-n440434 r3879907-n442913 -r3879907-n572686 -r3879907-n588870 r3879907-n691735 -r3879907-n818086 -r3879907-n935848 r406820-n181711 r406820-n325382 -r406820-n642321 -r406820-n678155 r406820-n680758 -r406820-n90554 r406820-n920226 r4153679-n134943 r4153679-n440434 r4153679-n442913 -r4153679-n572686 -r4153679-n588870 r4153679-n691735 -r4153679-n818086 -r4153679-n935848 r4179716-n181711 r4179716-n325382 -r4179716-n642321 -r4179716-n678155 r4179716-n680758 -r4179716-n90554 r4179716-n920226 r4229531-n181711 r4229531-n325382 -r4229531-n642321 -r4229531-n678155 r4229531-n680758 -r4229531-n90554 r4229531-n920226 r4247208-n146651 r4247208-n181711 -r4247208-n244243 -r4247208-n310809 r4247208-n325382 -r4247208-n341098 r4247208-n392209 r4247208-n468303 r4247208-n598065 -r4247208-n642321 -r4247208-n642649 -r4247208-n678155 r4247208-n680758 -r4247208-n90554 r4247208-n920226 r4327055-n134943 r4327055-n440434 r4327055-n442913 -r4327055-n572686 -r4327055-n588870 r4327055-n691735 -r4327055-n818086 -r4327055-n935848 r4357125-n134943 r4357125-n440434 r4357125-n442913 -r4357125-n572686 -r4357125-n588870 r4357125-n691735 -r4357125-n818086 -r4357125-n935848 r4822976-n134943 r4822976-n440434 r4822976-n442913 -r4822976-n572686 -r4822976-n588870 r4822976-n691735 -r4822976-n818086 -r4822976-n935848 r4858666-n146651 r4858666-n181711 -r4858666-n244243 -r4858666-n310809 r4858666-n325382 -r4858666-n341098 r4858666-n392209 r4858666-n468303 r4858666-n598065 -r4858666-n642321 -r4858666-n642649 -r4858666-n678155 r4858666-n680758 -r4858666-n90554 r4858666-n920226 r4874959-n181711 r4874959-n325382 -r4874959-n642321 -r4874959-n678155 r4874959-n680758 -r4874959-n90554 r4874959-n920226 r4990664-n134943 r4990664-n440434 r4990664-n442913 -r4990664-n572686 -r4990664-n588870 r4990664-n691735 -r4990664-n818086 -r4990664-n935848 r5130449-n134943 r5130449-n440434 r5130449-n442913 -r5130449-n572686 -r5130449-n588870 -r5130449-n691735 -r5130449-n818086 -r5130449-n935848 r5189505-n146651 r5189505-n181711 -r5189505-n244243 -r5189505-n310809 r5189505-n325382 -r5189505-n341098 r5189505-n392209 r5189505-n468303 r5189505-n598065 -r5189505-n642321 -r5189505-n642649 -r5189505-n678155 r5189505-n680758 -r5189505-n90554 r5189505-n920226 r5261712-n134943 r5261712-n440434 r5261712-n442913 -r5261712-n572686 -r5261712-n588870 r5261712-n691735 -r5261712-n818086 -r5261712-n935848 r5573787-n181711 r5573787-n325382 -r5573787-n642321 -r5573787-n678155 r5573787-n680758 -r5573787-n90554 r5573787-n920226 r5715171-n134943 r5715171-n440434 r5715171-n442913 -r5715171-n572686 -r5715171-n588870 r5715171-n691735 -r5715171-n818086 -r5715171-n935848 r6102167-n181711 r6102167-n325382 -r6102167-n642321 -r6102167-n678155 r6102167-n680758 -r6102167-n90554 r6102167-n920226 r6272977-n181711 r6272977-n325382 -r6272977-n642321 -r6272977-n678155 r6272977-n680758 -r6272977-n90554 r6272977-n920226 r629115-n146651 r629115-n181711 -r629115-n244243 -r629115-n310809 r629115-n325382 -r629115-n341098 r629115-n392209 r629115-n468303 r629115-n598065 -r629115-n642321 -r629115-n642649 -r629115-n678155 r629115-n680758 -r629115-n90554 r629115-n920226 r6341586-n146651 r6341586-n181711 -r6341586-n244243 -r6341586-n310809 r6341586-n325382 -r6341586-n341098 r6341586-n392209 r6341586-n468303 r6341586-n598065 -r6341586-n642321 -r6341586-n642649 -r6341586-n678155 r6341586-n680758 -r6341586-n90554 r6341586-n920226 -r6491112-n172998 r6531478-n181711 r6531478-n325382 -r6531478-n642321 -r6531478-n678155 r6531478-n680758 -r6531478-n90554 r6531478-n920226 r6631426-n181711 r6631426-n325382 -r6631426-n642321 -r6631426-n678155 r6631426-n680758 -r6631426-n90554 r6631426-n920226 r6682735-n146651 r6682735-n181711 -r6682735-n244243 -r6682735-n310809 r6682735-n325382 -r6682735-n341098 r6682735-n392209 r6682735-n468303 r6682735-n598065 -r6682735-n642321 -r6682735-n642649 -r6682735-n678155 r6682735-n680758 -r6682735-n90554 r6682735-n920226 r6760045-n146651 r6760045-n181711 -r6760045-n244243 -r6760045-n310809 r6760045-n325382 -r6760045-n341098 r6760045-n392209 r6760045-n468303 r6760045-n598065 -r6760045-n642321 -r6760045-n642649 -r6760045-n678155 r6760045-n680758 -r6760045-n90554 r6760045-n920226 r697496-n146651 r697496-n181711 -r697496-n244243 -r697496-n310809 r697496-n325382 -r697496-n341098 r697496-n392209 r697496-n468303 r697496-n598065 -r697496-n642321 -r697496-n642649 -r697496-n678155 r697496-n680758 -r697496-n90554 r697496-n920226 r7217787-n146651 r7217787-n181711 r7217787-n244243 -r7217787-n310809 r7217787-n325382 -r7217787-n341098 r7217787-n392209 r7217787-n468303 r7217787-n598065 -r7217787-n642321 -r7217787-n642649 -r7217787-n678155 r7217787-n680758 -r7217787-n90554 r7217787-n920226 r7343737-n146651 r7343737-n181711 -r7343737-n244243 -r7343737-n310809 r7343737-n325382 -r7343737-n341098 r7343737-n392209 r7343737-n468303 r7343737-n598065 -r7343737-n642321 -r7343737-n642649 -r7343737-n678155 r7343737-n680758 -r7343737-n90554 r7343737-n920226 r7831860-n181711 r7831860-n325382 -r7831860-n642321 -r7831860-n678155 r7831860-n680758 -r7831860-n90554 r7831860-n920226 r7839831-n146651 r7839831-n181711 -r7839831-n244243 -r7839831-n310809 r7839831-n325382 -r7839831-n341098 r7839831-n392209 r7839831-n468303 r7839831-n598065 -r7839831-n642321 -r7839831-n642649 -r7839831-n678155 r7839831-n680758 -r7839831-n90554 r7839831-n920226 r7952476-n146651 r7952476-n181711 -r7952476-n244243 -r7952476-n310809 r7952476-n325382 -r7952476-n341098 r7952476-n392209 r7952476-n468303 r7952476-n598065 -r7952476-n642321 -r7952476-n642649 -r7952476-n678155 r7952476-n680758 -r7952476-n90554 r7952476-n920226 r8015356-n181711 r8015356-n325382 -r8015356-n642321 -r8015356-n678155 r8015356-n680758 -r8015356-n90554 r8015356-n920226 r8062914-n134943 -r8062914-n440434 r8062914-n442913 -r8062914-n572686 -r8062914-n588870 r8062914-n691735 -r8062914-n818086 -r8062914-n935848 r8212643-n146651 r8212643-n181711 r8212643-n244243 -r8212643-n310809 r8212643-n325382 -r8212643-n341098 r8212643-n392209 r8212643-n468303 r8212643-n598065 -r8212643-n642321 -r8212643-n642649 -r8212643-n678155 r8212643-n680758 -r8212643-n90554 r8212643-n920226 r8333645-n146651 r8333645-n181711 -r8333645-n244243 -r8333645-n310809 r8333645-n325382 -r8333645-n341098 r8333645-n392209 r8333645-n468303 r8333645-n598065 -r8333645-n642321 -r8333645-n642649 -r8333645-n678155 r8333645-n680758 -r8333645-n90554 r8333645-n920226 r8595196-n134943 r8595196-n440434 r8595196-n442913 -r8595196-n572686 -r8595196-n588870 r8595196-n691735 -r8595196-n818086 -r8595196-n935848 r8607415-n181711 r8607415-n325382 -r8607415-n642321 -r8607415-n678155 r8607415-n680758 -r8607415-n90554 r8607415-n920226 r8642123-n181711 r8642123-n325382 -r8642123-n642321 -r8642123-n678155 r8642123-n680758 -r8642123-n90554 r8642123-n920226 r8792496-n181711 r8792496-n325382 -r8792496-n642321 -r8792496-n678155 r8792496-n680758 -r8792496-n90554 r8792496-n920226 r8918301-n181711 r8918301-n325382 -r8918301-n642321 -r8918301-n678155 r8918301-n680758 -r8918301-n90554 r8918301-n920226 r8937440-n146651 r8937440-n181711 -r8937440-n244243 -r8937440-n310809 r8937440-n325382 -r8937440-n341098 r8937440-n392209 r8937440-n468303 r8937440-n598065 -r8937440-n642321 -r8937440-n678155 r8937440-n680758 -r8937440-n90554 r8937440-n920226 r8939293-n134943 r8939293-n440434 r8939293-n442913 -r8939293-n572686 -r8939293-n588870 r8939293-n691735 -r8939293-n818086 -r8939293-n935848 r9021574-n146651 r9021574-n181711 -r9021574-n244243 -r9021574-n310809 r9021574-n325382 -r9021574-n341098 r9021574-n392209 r9021574-n468303 r9021574-n598065 -r9021574-n642321 -r9021574-n642649 -r9021574-n678155 r9021574-n680758 -r9021574-n90554 r9021574-n920226 r9026042-n146651 r9026042-n181711 -r9026042-n244243 -r9026042-n310809 r9026042-n325382 -r9026042-n341098 r9026042-n392209 r9026042-n468303 r9026042-n598065 -r9026042-n642321 -r9026042-n642649 -r9026042-n678155 r9026042-n680758 -r9026042-n90554 r9026042-n920226 r9040233-n181711 r9040233-n325382 -r9040233-n642321 -r9040233-n678155 r9040233-n680758 -r9040233-n90554 r9040233-n920226 r9102715-n146651 r9102715-n181711 -r9102715-n244243 -r9102715-n310809 r9102715-n325382 -r9102715-n341098 r9102715-n392209 r9102715-n468303 r9102715-n598065 -r9102715-n642321 -r9102715-n642649 -r9102715-n678155 r9102715-n680758 -r9102715-n90554 r9102715-n920226 r9113711-n181711 r9113711-n325382 -r9113711-n642321 -r9113711-n678155 r9113711-n680758 -r9113711-n90554 r9113711-n920226 -r9115114-n172998 r9189566-n146651 r9189566-n181711 -r9189566-n244243 -r9189566-n310809 r9189566-n325382 -r9189566-n341098 r9189566-n392209 r9189566-n468303 r9189566-n598065 -r9189566-n642321 -r9189566-n642649 -r9189566-n678155 r9189566-n680758 -r9189566-n90554 r9189566-n920226 r9192091-n146651 r9192091-n181711 -r9192091-n244243 -r9192091-n310809 r9192091-n325382 -r9192091-n341098 r9192091-n392209 r9192091-n468303 r9192091-n598065 -r9192091-n642321 -r9192091-n642649 -r9192091-n678155 r9192091-n680758 -r9192091-n90554 r9192091-n920226 r9273661-n146651 r9273661-n181711 -r9273661-n244243 -r9273661-n310809 r9273661-n325382 -r9273661-n341098 r9273661-n392209 r9273661-n468303 r9273661-n598065 -r9273661-n642321 -r9273661-n642649 -r9273661-n678155 r9273661-n680758 -r9273661-n90554 r9273661-n920226 r9352821-n146651 r9352821-n181711 r9352821-n244243 -r9352821-n310809 r9352821-n325382 -r9352821-n341098 r9352821-n392209 -r9352821-n468303 r9352821-n598065 -r9352821-n642321 -r9352821-n642649 -r9352821-n678155 r9352821-n680758 -r9352821-n90554 r9352821-n920226 r9366523-n134943 r9366523-n440434 r9366523-n442913 -r9366523-n572686 -r9366523-n588870 r9366523-n691735 -r9366523-n818086 -r9366523-n935848 -r9541411-n172998 r9555635-n134943 r9555635-n440434 r9555635-n442913 -r9555635-n572686 -r9555635-n588870 r9555635-n691735 -r9555635-n818086 -r9555635-n935848 r9720335-n181711 r9720335-n325382 -r9720335-n642321 -r9720335-n678155 r9720335-n680758 -r9720335-n90554 r9720335-n920226 r9757054-n146651 r9757054-n181711 -r9757054-n244243 -r9757054-n310809 r9757054-n325382 -r9757054-n341098 r9757054-n392209 r9757054-n468303 r9757054-n598065 -r9757054-n642321 -r9757054-n642649 -r9757054-n678155 r9757054-n680758 -r9757054-n90554 r9757054-n920226 -r9836048-n172998 diff --git a/raps/dataloaders/mit_supercloud/loader.py b/raps/dataloaders/mit_supercloud/loader.py index 736a6ce..6279b67 100644 --- a/raps/dataloaders/mit_supercloud/loader.py +++ b/raps/dataloaders/mit_supercloud/loader.py @@ -2,6 +2,30 @@ # -*- coding: utf-8 -*- """ MIT Supercloud data loader + +This module processes job traces from the MIT SuperCloud dataset with careful +node filtering based on observed resource allocation history. + +Summary of node filtering: + +- A total of 1135 unique node IDs were extracted from `slurm-log.csv`. +- Of these, 228 were identified as GPU-capable nodes (recorded in `gpu_nodes.txt`). +- The remaining 907 nodes were treated as CPU-only candidates. + +Filtering steps: + +1. Jobs with `nodes_alloc > 480` were excluded, based on the assumption that + such large allocations span across GPU nodes. This removed 413 nodes, + leaving 494 candidate CPU-only nodes. + +2. To reach the target of 480 CPU nodes, we analyzed job frequency per node + and pruned the 14 least-used nodes (those with only 1–26 jobs). + These pruned nodes are listed in `prune_list.txt`. + +The final list of CPU-only nodes is stored in `cpu_nodes.txt`, and the list +of GPU nodes are stored in `gpu_nodes.txt`. + +Note: To locate the pruning logic, search for the keyword "prune" in the code. """ import ast @@ -123,6 +147,18 @@ def load_data(local_dataset_path, **kwargs): hits = sl.loc[mask] print("line numbers in slurm-log.csv", hits["__line__"].tolist()) + # --- prune out oversized jobs and known under‑used hosts --- + # load list of underutilized nodes to ignore + pruned = set() + if os.path.exists("prune_list.txt"): + with open(prune_path) as pf: + pruned = {l.strip() for l in pf if l.strip()} + # only keep jobs requesting ≤480 nodes + sl = sl[ sl.nodes_alloc <= 480 ] + # drop any job whose nodelist includes a pruned node + sl["nodes_list"] = sl["nodelist"].apply(ast.literal_eval) + sl = sl[ ~sl["nodes_list"].apply(lambda lst: any(n in pruned for n in lst)) ] + # —— ERROR CATCH: no jobs in this window? —— if sl.empty: raise ValueError( diff --git a/raps/dataloaders/mit_supercloud/prune_list.txt b/raps/dataloaders/mit_supercloud/prune_list.txt new file mode 100644 index 0000000..8e45524 --- /dev/null +++ b/raps/dataloaders/mit_supercloud/prune_list.txt @@ -0,0 +1,14 @@ +r1356503-n172998 +r2627558-n172998 +r3045754-n48252 +r3236768-n172998 +r9836048-n172998 +r9115114-n172998 +r5130449-n691735 +r6491112-n172998 +r9541411-n172998 +r6682735-n244243 +r629115-n244243 +r7839831-n244243 +r189256-n244243 +r4858666-n244243 -- GitLab From 866ceb39519fa1a30bad5ee55caaac7af74425a2 Mon Sep 17 00:00:00 2001 From: Wes Brewer Date: Thu, 24 Jul 2025 15:43:06 -0400 Subject: [PATCH 181/388] Fix and verify cpu traces --- raps/dataloaders/mit_supercloud/loader.py | 22 +++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) diff --git a/raps/dataloaders/mit_supercloud/loader.py b/raps/dataloaders/mit_supercloud/loader.py index 6279b67..908a96f 100644 --- a/raps/dataloaders/mit_supercloud/loader.py +++ b/raps/dataloaders/mit_supercloud/loader.py @@ -31,6 +31,7 @@ Note: To locate the pruning logic, search for the keyword "prune" in the code. import ast import os import math +import numpy as np import pandas as pd import re @@ -431,9 +432,10 @@ def load_data(local_dataset_path, **kwargs): print(f"*** nr: {nr}, cpu_cores_req: {cpu_cores_req}, gpu_units_req: {gpu_units_req}", flush=True) print(jid, cpu_tr[:5], flush=True) - # we're not quite sure which is correct below - but the second one seems more likely - #cpu_tr = [float(f"{x/nr/cores_per_cpu:4g}") for x in cpu_tr] - cpu_tr = [float(f"{x/cores_per_cpu:4g}") for x in cpu_tr] + + # sometimes there are spurious large values for cpu util - set max limit based on peak + cpu_peak = cpu_cores_req / cores_per_cpu / cpus_per_node + cpu_tr = [min(x/cores_per_cpu/cpus_per_node, cpu_peak) for x in cpu_tr] print(jid, cpu_tr[:5]) submit_time = rec.get("time_submit", t0) - start_ts @@ -463,8 +465,18 @@ def load_data(local_dataset_path, **kwargs): ) view = job.copy() - view['cpu_trace'] = view['cpu_trace'][:5] + ['…'] - view['gpu_trace'] = view['gpu_trace'][:5] + ['…'] + #view['cpu_trace'] = view['cpu_trace'][:5] + ['…'] + #view['gpu_trace'] = view['gpu_trace'][:5] + ['…'] + + summarize_trace = lambda x: { + 'min': float(np.min(x)), + 'max': float(np.max(x)), + 'avg': float(np.mean(x)), + 'len': len(x), + } + view['cpu_trace'] = summarize_trace(job['cpu_trace']) + view['gpu_trace'] = summarize_trace(job['gpu_trace']) + view['cpu_peak'] = job['cpu_cores_required'] / cores_per_cpu / cpus_per_node print(view) #validate_job_traces(Job(job), granularity=quanta) -- GitLab From bd4c29eca0090feb6560e5ae4115720a10e87877 Mon Sep 17 00:00:00 2001 From: Wes Brewer Date: Thu, 24 Jul 2025 21:45:00 -0400 Subject: [PATCH 182/388] Fix bug with pruning on nodes in prune_list.txt - tested and working now --- raps/dataloaders/mit_supercloud/loader.py | 29 +++++++++++++++++------ 1 file changed, 22 insertions(+), 7 deletions(-) diff --git a/raps/dataloaders/mit_supercloud/loader.py b/raps/dataloaders/mit_supercloud/loader.py index 908a96f..b443ce5 100644 --- a/raps/dataloaders/mit_supercloud/loader.py +++ b/raps/dataloaders/mit_supercloud/loader.py @@ -40,6 +40,7 @@ from types import SimpleNamespace from typing import Dict, Union, Optional from raps.job import job_dict, Job +from raps.utils import summarize_ranges from .utils import proc_cpu_series, proc_gpu_series, to_epoch from .utils import DEFAULT_START, DEFAULT_END from .utils import validate_job_traces @@ -118,6 +119,7 @@ def load_data(local_dataset_path, **kwargs): jobs_list, sim_start_time, sim_end_time """ debug = kwargs.get("debug") + NL_PATH = os.path.dirname(__file__) # unpack if isinstance(local_dataset_path, list): if len(local_dataset_path) != 1: @@ -145,20 +147,35 @@ def load_data(local_dataset_path, **kwargs): mask = (sl.time_submit >= start_ts) & (sl.time_submit < end_ts) sl = sl[mask] + print(f"[DEBUG] After time filtering: {len(sl)} jobs") hits = sl.loc[mask] - print("line numbers in slurm-log.csv", hits["__line__"].tolist()) + print("line numbers in slurm-log.csv", summarize_ranges(hits["__line__"].tolist())) # --- prune out oversized jobs and known under‑used hosts --- # load list of underutilized nodes to ignore pruned = set() - if os.path.exists("prune_list.txt"): - with open(prune_path) as pf: - pruned = {l.strip() for l in pf if l.strip()} + with open(os.path.join(NL_PATH, "prune_list.txt")) as pf: + pruned = {l.strip() for l in pf if l.strip()} + print(pruned) # only keep jobs requesting ≤480 nodes sl = sl[ sl.nodes_alloc <= 480 ] + print(f"[DEBUG] After nodes_alloc ≤ 480 filter: {len(sl)} jobs") # drop any job whose nodelist includes a pruned node sl["nodes_list"] = sl["nodelist"].apply(ast.literal_eval) - sl = sl[ ~sl["nodes_list"].apply(lambda lst: any(n in pruned for n in lst)) ] + + def is_pruned(lst): + matches = [n for n in lst if n in pruned] + if matches: + print(f"[DEBUG] Skipping job due to pruned nodes: {matches}") + return True + return False + + before = len(sl) + sl = sl[~sl["nodes_list"].apply(is_pruned)] + after = len(sl) + + print(f"[DEBUG] Jobs removed by pruning: {before - after}") + print(f"[DEBUG] After pruning: {len(sl)} jobs") # —— ERROR CATCH: no jobs in this window? —— if sl.empty: @@ -183,7 +200,6 @@ def load_data(local_dataset_path, **kwargs): mixed = (part=="part-gpu") # create nodelist mapping - NL_PATH = os.path.dirname(__file__) if cpu_only: with open(os.path.join(NL_PATH, "cpu_nodes.txt")) as f: cpu_nodes = [l.strip() for l in f if l.strip()] @@ -281,7 +297,6 @@ def load_data(local_dataset_path, **kwargs): raw = job_row.get("nodelist", "") hosts = ast.literal_eval(raw) - # Get allocated nodes "['r9189566-n911952','r9189567-n...']" if cpu_only: rec["scheduled_nodes"] = [cpu_node_to_idx[h] for h in hosts] -- GitLab From fd699e0d4eb587919148f72c5553b169aafa7ba9 Mon Sep 17 00:00:00 2001 From: Wes Brewer Date: Thu, 24 Jul 2025 21:56:56 -0400 Subject: [PATCH 183/388] Add documentation to the docstring in mit_supercloud/loader.py --- raps/dataloaders/mit_supercloud/loader.py | 22 +++++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) diff --git a/raps/dataloaders/mit_supercloud/loader.py b/raps/dataloaders/mit_supercloud/loader.py index b443ce5..c79138d 100644 --- a/raps/dataloaders/mit_supercloud/loader.py +++ b/raps/dataloaders/mit_supercloud/loader.py @@ -3,8 +3,22 @@ """ MIT Supercloud data loader -This module processes job traces from the MIT SuperCloud dataset with careful -node filtering based on observed resource allocation history. +This module extracts and processes job traces from the MIT SuperCloud dataset, +starting with slurm-log.csv file, and then searching for the files in the cpu +and gpu directories. The main paper associated with the MIT Supercloud Dataset +is available here: https://arxiv.org/abs/2108.02037. +There is more information available here: https://dcc.mit.edu/ + +Note, that quite a bit of filtering is done with sanity checks to make sure +the the CPU traces match the GPU traces, etc. At this point it's not uncommon +if there may be 1569 total jobs in the time range, only 834 cpu jobs and 128 +gpu jobs (962 total) are able to be replayed. This is an issue which will likely +have to be improved in the future. + +--------------------------------------------------------------------------- +How we curated and generated the node ids: cpu_nodes.txt and gpu_nodes.txt + +Node filtering based on observed resource allocation history. Summary of node filtering: @@ -149,7 +163,9 @@ def load_data(local_dataset_path, **kwargs): sl = sl[mask] print(f"[DEBUG] After time filtering: {len(sl)} jobs") hits = sl.loc[mask] - print("line numbers in slurm-log.csv", summarize_ranges(hits["__line__"].tolist())) + lines = hits["__line__"].tolist() + print(f"data sourced from {len(lines)} records in slurm-log.csv. Line number ranges:", + summarize_ranges(lines)) # --- prune out oversized jobs and known under‑used hosts --- # load list of underutilized nodes to ignore -- GitLab From 2f0381d45a0acb4b43165dc32399c75e6646844b Mon Sep 17 00:00:00 2001 From: Wes Brewer Date: Fri, 25 Jul 2025 15:55:09 -0400 Subject: [PATCH 184/388] Add in counters to provide stats on why certain jobs were not included --- raps/dataloaders/mit_supercloud/loader.py | 158 ++++++++++------------ 1 file changed, 71 insertions(+), 87 deletions(-) diff --git a/raps/dataloaders/mit_supercloud/loader.py b/raps/dataloaders/mit_supercloud/loader.py index c79138d..a98bc08 100644 --- a/raps/dataloaders/mit_supercloud/loader.py +++ b/raps/dataloaders/mit_supercloud/loader.py @@ -52,6 +52,7 @@ import re from tqdm import tqdm from types import SimpleNamespace from typing import Dict, Union, Optional +from collections import Counter from raps.job import job_dict, Job from raps.utils import summarize_ranges @@ -70,7 +71,8 @@ TRES_ID_MAP = { def parse_tres_alloc(tres_str: Union[str, None], id_map: Optional[Dict[int, str]] = None, - return_ids: bool = False) -> Dict[Union[int, str], int]: + return_ids: bool = False, + stats: Counter = None) -> Dict[Union[int, str], int]: """ Parse a Slurm tres_alloc/tres_req field like: '1=20,2=170000,4=1,5=20' @@ -84,6 +86,8 @@ def parse_tres_alloc(tres_str: Union[str, None], return_ids : bool If True, keys are the numeric IDs. If False, keys use id_map names (falls back to the numeric ID as a string if unknown). + stats : Counter + Optional counter to track parsing errors. Returns ------- @@ -107,6 +111,8 @@ def parse_tres_alloc(tres_str: Union[str, None], for p in parts: m = re.match(r"\s*(\d+)\s*=\s*([0-9]+)\s*$", p) if not m: + if stats is not None: + stats["malformed_tres"] += 1 # skip or raise; here we skip silently continue tid = int(m.group(1)) @@ -134,6 +140,9 @@ def load_data(local_dataset_path, **kwargs): """ debug = kwargs.get("debug") NL_PATH = os.path.dirname(__file__) + + skip_counts = Counter() + # unpack if isinstance(local_dataset_path, list): if len(local_dataset_path) != 1: @@ -172,25 +181,29 @@ def load_data(local_dataset_path, **kwargs): pruned = set() with open(os.path.join(NL_PATH, "prune_list.txt")) as pf: pruned = {l.strip() for l in pf if l.strip()} - print(pruned) + + before_prune = len(sl) # only keep jobs requesting ≤480 nodes sl = sl[ sl.nodes_alloc <= 480 ] - print(f"[DEBUG] After nodes_alloc ≤ 480 filter: {len(sl)} jobs") + after_alloc_filter = len(sl) + skip_counts['nodes_alloc > 480'] += (before_prune - after_alloc_filter) + # drop any job whose nodelist includes a pruned node sl["nodes_list"] = sl["nodelist"].apply(ast.literal_eval) def is_pruned(lst): matches = [n for n in lst if n in pruned] if matches: - print(f"[DEBUG] Skipping job due to pruned nodes: {matches}") + #print(f"[DEBUG] Skipping job due to pruned nodes: {matches}") return True return False - before = len(sl) + before_prune_filter = len(sl) sl = sl[~sl["nodes_list"].apply(is_pruned)] - after = len(sl) + after_prune_filter = len(sl) + skip_counts['pruned_nodes'] += (before_prune_filter - after_prune_filter) + - print(f"[DEBUG] Jobs removed by pruning: {before - after}") print(f"[DEBUG] After pruning: {len(sl)} jobs") # —— ERROR CATCH: no jobs in this window? —— @@ -227,8 +240,10 @@ def load_data(local_dataset_path, **kwargs): if cpu_only: job_ids = set(sl.id_job) - gpu_jobs + skip_counts['gpu_job_in_cpu_mode'] += len(set(sl.id_job) & gpu_jobs) elif mixed: job_ids = gpu_jobs & set(sl.id_job) + skip_counts['cpu_job_in_gpu_mode'] += len(set(sl.id_job) - gpu_jobs) else: job_ids = set(sl.id_job) @@ -286,6 +301,11 @@ def load_data(local_dataset_path, **kwargs): print(f"→ {len(cpu_files)} CPU files, {len(gpu_files)} GPU files → total {len(traces)}") data = {} + + traced_jobs = {int(os.path.basename(p).split('-',1)[0]) for p in traces} + untraced_jobs = job_ids - traced_jobs + skip_counts['no_trace_file'] += len(untraced_jobs) + # CPU first for fp in tqdm(cpu_files, desc="Loading CPU traces"): @@ -295,58 +315,51 @@ def load_data(local_dataset_path, **kwargs): # Find job info in slurm log and print details job_info = sl[sl.id_job == jid] - if not job_info.empty: - job_row = job_info.iloc[0] - start_time = job_row.get('time_start', 'N/A') - wall_time = job_row.get('time_limit', 'N/A') - tres_alloc = job_row.get('tres_alloc', 'N/A') - tres_alloc_dict = parse_tres_alloc(tres_alloc) - rec["tres_alloc_dict"] = tres_alloc_dict - gres_used = job_row.get('gres_used', 'N/A') - - tqdm.write(f"Reading CPU {os.path.basename(fp)} for Job ID: {jid}") - tqdm.write(f" Start Time: {start_time}, Wall Time: {wall_time}s") - tqdm.write(f" TRES Alloc: {tres_alloc_dict}") - #tqdm.write(f" GRES Used: {gres_used}") - else: - tqdm.write(f"Reading CPU {os.path.basename(fp)} for Job ID: {jid} (No slurm info found)") + if job_info.empty: + skip_counts['job_not_in_slurm_log'] += 1 + continue + + job_row = job_info.iloc[0] + start_time = job_row.get('time_start', 'N/A') + wall_time = job_row.get('time_limit', 'N/A') + tres_alloc = job_row.get('tres_alloc', 'N/A') + tres_alloc_dict = parse_tres_alloc(tres_alloc, stats=skip_counts) + rec["tres_alloc_dict"] = tres_alloc_dict + gres_used = job_row.get('gres_used', 'N/A') raw = job_row.get("nodelist", "") hosts = ast.literal_eval(raw) # Get allocated nodes "['r9189566-n911952','r9189567-n...']" - if cpu_only: - rec["scheduled_nodes"] = [cpu_node_to_idx[h] for h in hosts] - else: - rec["scheduled_nodes"] = [gpu_node_to_idx[h] for h in hosts] + try: + if cpu_only: + rec["scheduled_nodes"] = [cpu_node_to_idx[h] for h in hosts] + else: + rec["scheduled_nodes"] = [gpu_node_to_idx[h] for h in hosts] + except KeyError as e: + skip_counts['unrecognized_node_name'] += 1 + if debug: + print(f"Skipping job {jid} due to unrecognized node name: {e}") + continue + rec["nodes_alloc"] = int(job_row["nodes_alloc"]) rec["cpu"] = proc_cpu_series(df) - print(f"GPU candidate files ({len(gpu_files)}):") - for p in gpu_files[:10]: - print(" ", p) - for fp in tqdm(gpu_files, desc="Loading GPU traces"): - if debug: - print(f"\n[DEBUG] attempting {fp!r}") - print(" full path exists:", os.path.exists(fp), fp) if not os.path.exists(fp): - print("gpu path doesn't exist skipping") + skip_counts['gpu_path_does_not_exist'] += 1 continue - tqdm.write(f"Reading GPU {os.path.basename(fp)}") dfi = pd.read_csv(fp, dtype={0: str}) - if debug: - print(" loaded dataframe, columns:", dfi.columns.tolist()) if "gpu_index" not in dfi.columns: - tqdm.write(" → no gpu_index column! SKIPPING") + skip_counts['no_gpu_index_column'] += 1 continue jid = int(os.path.basename(fp).split("-", 1)[0]) rec = data.setdefault(jid, {}) cpu_df = rec.get("cpu") if cpu_df is None: - tqdm.write(f"Warning: no CPU trace for job {jid}, skipping GPU") + skip_counts['no_cpu_trace_for_gpu_job'] += 1 continue gpu_cnt = rec.get("gpu_cnt", 0) @@ -361,9 +374,6 @@ def load_data(local_dataset_path, **kwargs): data[jid]["gpu"] = pd.merge(prev_gpu, gpu_ser, on="utime") data[jid]["gpu_cnt"] = gpu_cnt - if debug: - print(f"[DEBUG] proc_gpu_series returned {len(gpu_ser)} rows (gpu_cnt={gpu_cnt})") - if "gpu" in rec: rec["gpu"] = pd.merge(rec["gpu"], gpu_ser, on="utime", how="outer") else: @@ -389,25 +399,10 @@ def load_data(local_dataset_path, **kwargs): nodes = rec.get("nodes_alloc") rec["gpu_trace"] = (avg_util * nodes).tolist() - if debug: - print(f"[DEBUG] data[{jid}].keys() now:", list(rec.keys())) - - # quick check: did any jobs pick up a GPU trace? - print("→ data_dict contents sample:") - for jid, rec in list(data.items())[:5]: - print(f" job {jid}: cpu={'yes' if 'cpu' in rec else 'no'} gpu={'yes' if 'gpu' in rec else 'no'}") - print(f"→ total jobs seen = {len(data)}") - - got = [jid for jid, rec in data.items() if "gpu" in rec] - miss = [jid for jid, rec in data.items() if "cpu" in rec and "gpu" not in rec] - print(f"→ of {len(data)} total jobs seen, {len(got)} got GPU data, {len(miss)} have only CPU") - if miss: - print(" jobs missing GPU despite being in gpu_files:", miss[:10]) - # merge slurm metadata for _, row in sl.iterrows(): jid = row.id_job - if jid in data and jid not in data[jid]: + if jid in data and 'id_job' not in data[jid]: data[jid].update(row.to_dict()) # build final job_dicts @@ -418,12 +413,14 @@ def load_data(local_dataset_path, **kwargs): cpus_per_node = config.get('CPUS_PER_NODE') cores_per_cpu = config.get('CORES_PER_CPU') gpus_per_node = config.get('GPUS_PER_NODE') - print(f"*** cpus_per_node: {cpus_per_node}, cores_per_cpu: {cores_per_cpu}, gpus_per_node: {gpus_per_node}") quanta = config.get('TRACE_QUANTA') for jid, rec in data.items(): nr = rec.get("nodes_alloc") + if nr is None: + skip_counts['final_missing_nodes_alloc'] += 1 + continue cpu = rec.get("cpu") gpu = rec.get("gpu_trace") @@ -434,40 +431,40 @@ def load_data(local_dataset_path, **kwargs): if cpu_only: if cpu is None: - print("cpu None: skipping this one (a)") + skip_counts['final_cpu_none_cpu_only'] += 1 continue cpu_tr = cpu.cpu_utilisation.tolist() gpu_tr = [0] # Ensure gpu_tr is a list for max() operation t0, t1 = cpu.utime.min(), cpu.utime.max() elif mixed: if cpu is None: - print("cpu None: skipping this one (b)") + skip_counts['final_cpu_none_mixed'] += 1 continue if gpu is None: - print("gpu None: skipping this one") + skip_counts['final_gpu_none_mixed'] += 1 continue cpu_tr = cpu.cpu_utilisation.tolist() gpu_tr = gpu t0, t1 = cpu.utime.min(), cpu.utime.max() - else: - print("skipping") + else: # not cpu_only or mixed + skip_counts['final_unhandled_partition'] += 1 continue # Calculate cpu_cores_required and gpu_units_required from tres_alloc + if "tres_alloc_dict" not in rec: + skip_counts['final_missing_tres_alloc'] += 1 + continue + total_cpu = rec["tres_alloc_dict"].get('cpu', 0) # Can either allocate gpu:volta (1002) or gpu:tesla (1001) but not both - total_gpu = rec["tres_alloc_dict"].get('1002') or tres_alloc_dict.get(1001, 0) + total_gpu = rec["tres_alloc_dict"].get('1002') or rec["tres_alloc_dict"].get(1001, 0) cpu_cores_req = math.ceil(total_cpu / nr) gpu_units_req = math.ceil(total_gpu / nr) - print(f"*** nr: {nr}, cpu_cores_req: {cpu_cores_req}, gpu_units_req: {gpu_units_req}", flush=True) - print(jid, cpu_tr[:5], flush=True) - # sometimes there are spurious large values for cpu util - set max limit based on peak cpu_peak = cpu_cores_req / cores_per_cpu / cpus_per_node cpu_tr = [min(x/cores_per_cpu/cpus_per_node, cpu_peak) for x in cpu_tr] - print(jid, cpu_tr[:5]) submit_time = rec.get("time_submit", t0) - start_ts @@ -494,24 +491,6 @@ def load_data(local_dataset_path, **kwargs): trace_start_time = 0, trace_end_time = len(cpu_tr)*quanta ) - - view = job.copy() - #view['cpu_trace'] = view['cpu_trace'][:5] + ['…'] - #view['gpu_trace'] = view['gpu_trace'][:5] + ['…'] - - summarize_trace = lambda x: { - 'min': float(np.min(x)), - 'max': float(np.max(x)), - 'avg': float(np.mean(x)), - 'len': len(x), - } - view['cpu_trace'] = summarize_trace(job['cpu_trace']) - view['gpu_trace'] = summarize_trace(job['gpu_trace']) - view['cpu_peak'] = job['cpu_cores_required'] / cores_per_cpu / cpus_per_node - print(view) - - #validate_job_traces(Job(job), granularity=quanta) - # if nr > 1: # uncomment to test multinode jobs - need to run for 24 hours to get enough jobs to populate jobs_list.append(job) # Calculate min_overall_utime and max_overall_utime @@ -523,5 +502,10 @@ def load_data(local_dataset_path, **kwargs): system='mit_supercloud', time=max_overall_utime ) + + print("\nSkipped jobs summary:") + for reason, count in skip_counts.items(): + print(f"- {reason}: {count}") + return jobs_list, min_overall_utime, max_overall_utime, args_namespace -- GitLab From e377aa203742a0fe4c0b0cd943bb58bc3416d373 Mon Sep 17 00:00:00 2001 From: Wes Brewer Date: Fri, 25 Jul 2025 17:23:09 -0400 Subject: [PATCH 185/388] Add some debug statements back in and more progress bars --- raps/dataloaders/mit_supercloud/loader.py | 57 ++++++++++++++++++----- raps/engine.py | 2 - 2 files changed, 45 insertions(+), 14 deletions(-) diff --git a/raps/dataloaders/mit_supercloud/loader.py b/raps/dataloaders/mit_supercloud/loader.py index a98bc08..8bf1814 100644 --- a/raps/dataloaders/mit_supercloud/loader.py +++ b/raps/dataloaders/mit_supercloud/loader.py @@ -67,7 +67,7 @@ TRES_ID_MAP = { 4: "gres/gpu", 5: "billing", } - +GREEN, RESET = "\033[32m", "\033[0m" def parse_tres_alloc(tres_str: Union[str, None], id_map: Optional[Dict[int, str]] = None, @@ -170,11 +170,13 @@ def load_data(local_dataset_path, **kwargs): mask = (sl.time_submit >= start_ts) & (sl.time_submit < end_ts) sl = sl[mask] - print(f"[DEBUG] After time filtering: {len(sl)} jobs") - hits = sl.loc[mask] - lines = hits["__line__"].tolist() - print(f"data sourced from {len(lines)} records in slurm-log.csv. Line number ranges:", - summarize_ranges(lines)) + + if debug: + print(f"[DEBUG] After time filtering: {len(sl)} jobs") + hits = sl.loc[mask] + lines = hits["__line__"].tolist() + print(f"data sourced from {len(lines)} records in slurm-log.csv. Line number ranges:", + summarize_ranges(lines)) # --- prune out oversized jobs and known under‑used hosts --- # load list of underutilized nodes to ignore @@ -194,7 +196,8 @@ def load_data(local_dataset_path, **kwargs): def is_pruned(lst): matches = [n for n in lst if n in pruned] if matches: - #print(f"[DEBUG] Skipping job due to pruned nodes: {matches}") + if debug: + print(f"[DEBUG] Skipping job due to pruned nodes: {matches}") return True return False @@ -203,8 +206,8 @@ def load_data(local_dataset_path, **kwargs): after_prune_filter = len(sl) skip_counts['pruned_nodes'] += (before_prune_filter - after_prune_filter) - - print(f"[DEBUG] After pruning: {len(sl)} jobs") + if debug: + print(f"[DEBUG] After pruning: {len(sl)} jobs") # —— ERROR CATCH: no jobs in this window? —— if sl.empty: @@ -247,7 +250,7 @@ def load_data(local_dataset_path, **kwargs): else: job_ids = set(sl.id_job) - print(f"→ mode={part}, jobs: {len(job_ids)}") + print(f"{GREEN}→ mode={part}, jobs: {len(job_ids)}{RESET}") # find trace files by walking directories cpu_files = [] @@ -317,7 +320,21 @@ def load_data(local_dataset_path, **kwargs): job_info = sl[sl.id_job == jid] if job_info.empty: skip_counts['job_not_in_slurm_log'] += 1 + tqdm.write(f"Reading CPU {os.path.basename(fp)} for Job ID: {jid} (No slurm info found)") continue + else: + job_row = job_info.iloc[0] + start_time = job_row.get('time_start', 'N/A') + wall_time = job_row.get('time_limit', 'N/A') + tres_alloc = job_row.get('tres_alloc', 'N/A') + tres_alloc_dict = parse_tres_alloc(tres_alloc) + rec["tres_alloc_dict"] = tres_alloc_dict + gres_used = job_row.get('gres_used', 'N/A') + + if debug: + tqdm.write(f"Reading CPU {os.path.basename(fp)} for Job ID: {jid}") + tqdm.write(f" Start Time: {start_time}, Wall Time: {wall_time}s") + tqdm.write(f" TRES Alloc: {tres_alloc_dict}") job_row = job_info.iloc[0] start_time = job_row.get('time_start', 'N/A') @@ -345,13 +362,22 @@ def load_data(local_dataset_path, **kwargs): rec["nodes_alloc"] = int(job_row["nodes_alloc"]) rec["cpu"] = proc_cpu_series(df) + if debug: + print(f"GPU candidate files ({len(gpu_files)}):") + for p in gpu_files[:10]: + print(" ", p) + for fp in tqdm(gpu_files, desc="Loading GPU traces"): + if not os.path.exists(fp): + if debug: print(f"[WARNING] gpu path {fp!r} doesn't exist skipping") skip_counts['gpu_path_does_not_exist'] += 1 continue + if debug: tqdm.write(f"Reading GPU {os.path.basename(fp)}") dfi = pd.read_csv(fp, dtype={0: str}) if "gpu_index" not in dfi.columns: + if debug: tqdm.write("[WARNING] → no gpu_index column! SKIPPING") skip_counts['no_gpu_index_column'] += 1 continue @@ -359,6 +385,7 @@ def load_data(local_dataset_path, **kwargs): rec = data.setdefault(jid, {}) cpu_df = rec.get("cpu") if cpu_df is None: + if debug: tqdm.write("[WARNING] → no cpu trace for gpu! SKIPPING") skip_counts['no_cpu_trace_for_gpu_job'] += 1 continue @@ -374,6 +401,9 @@ def load_data(local_dataset_path, **kwargs): data[jid]["gpu"] = pd.merge(prev_gpu, gpu_ser, on="utime") data[jid]["gpu_cnt"] = gpu_cnt + if debug: + print(f"[DEBUG] proc_gpu_series returned {len(gpu_ser)} rows (gpu_cnt={gpu_cnt})") + if "gpu" in rec: rec["gpu"] = pd.merge(rec["gpu"], gpu_ser, on="utime", how="outer") else: @@ -400,11 +430,14 @@ def load_data(local_dataset_path, **kwargs): rec["gpu_trace"] = (avg_util * nodes).tolist() # merge slurm metadata - for _, row in sl.iterrows(): + for _, row in tqdm(sl.iterrows(), + total=len(sl), + desc="Merging slurm metadata"): jid = row.id_job if jid in data and 'id_job' not in data[jid]: data[jid].update(row.to_dict()) + # build final job_dicts jobs_list = [] @@ -416,7 +449,7 @@ def load_data(local_dataset_path, **kwargs): quanta = config.get('TRACE_QUANTA') - for jid, rec in data.items(): + for jid, rec in tqdm(data.items(), total=len(data), desc="Building job objects", unit="job"): nr = rec.get("nodes_alloc") if nr is None: skip_counts['final_missing_nodes_alloc'] += 1 diff --git a/raps/engine.py b/raps/engine.py index d2deee6..cdf148c 100644 --- a/raps/engine.py +++ b/raps/engine.py @@ -254,8 +254,6 @@ class Engine: for job in self.running: - if self.debug: - print(f"JobID: {job.id}") if job.state == JobState.RUNNING: job.running_time = self.current_time - job.start_time -- GitLab From 344d4ea84a1aa7e27afbf6b96e8d0e941672ee1c Mon Sep 17 00:00:00 2001 From: Wes Brewer Date: Sat, 26 Jul 2025 14:23:08 -0400 Subject: [PATCH 186/388] A bit more cleanup of loader.py. Flush the outputs on multi-part-sim.py --- multi-part-sim.py | 10 ++++++---- raps/dataloaders/mit_supercloud/loader.py | 8 ++------ 2 files changed, 8 insertions(+), 10 deletions(-) diff --git a/multi-part-sim.py b/multi-part-sim.py index 4b2a51d..ab23f8c 100644 --- a/multi-part-sim.py +++ b/multi-part-sim.py @@ -62,7 +62,7 @@ if args.replay: jobs_part, t0, t1, args_from_file = td.load_data(args.replay) jobs_by_partition[part] = jobs_part td.save_snapshot(jobs_part, t0, t1, args_from_file, filename=part.split('/')[-1]) - + # --- report how many jobs per partition --- for part, jl in jobs_by_partition.items(): print(f"[INFO] Partition '{part}': {len(jl)} jobs loaded") @@ -138,11 +138,13 @@ for timestep in range(timesteps): for name, lm in layout_managers.items(): sys_util = lm.engine.sys_util_history[-1] if lm.engine.sys_util_history else (0, 0.0) allocated_cores = lm.engine.resource_manager.allocated_cpu_cores - print(f"[DEBUG] {name} - Timestep {timestep} - Jobs running: {len(lm.engine.running)} - Utilization: {sys_util[1]:.2f}% - Allocated Cores: {allocated_cores} - Power: {lm.engine.sys_power:.1f}kW") + print(f"[DEBUG] {name} - Timestep {timestep} - Jobs running: {len(lm.engine.running)} -", + f"Utilization: {sys_util[1]:.2f}% - Allocated Cores: {allocated_cores} - ", + f"Power: {lm.engine.sys_power:.1f}kW", flush=True) sys_power += lm.engine.sys_power - print(f"system power: {sys_power:.1f}kW") + print(f"system power: {sys_power:.1f}kW", flush=True) -print("Simulation complete.") +print("Simulation complete.", flush=True) # Print statistics for each partition for name, lm in layout_managers.items(): diff --git a/raps/dataloaders/mit_supercloud/loader.py b/raps/dataloaders/mit_supercloud/loader.py index 8bf1814..df2e9c8 100644 --- a/raps/dataloaders/mit_supercloud/loader.py +++ b/raps/dataloaders/mit_supercloud/loader.py @@ -430,14 +430,11 @@ def load_data(local_dataset_path, **kwargs): rec["gpu_trace"] = (avg_util * nodes).tolist() # merge slurm metadata - for _, row in tqdm(sl.iterrows(), - total=len(sl), - desc="Merging slurm metadata"): + for _, row in sl.iterrows(): jid = row.id_job if jid in data and 'id_job' not in data[jid]: data[jid].update(row.to_dict()) - # build final job_dicts jobs_list = [] @@ -449,7 +446,7 @@ def load_data(local_dataset_path, **kwargs): quanta = config.get('TRACE_QUANTA') - for jid, rec in tqdm(data.items(), total=len(data), desc="Building job objects", unit="job"): + for jid, rec in data.items(): nr = rec.get("nodes_alloc") if nr is None: skip_counts['final_missing_nodes_alloc'] += 1 @@ -540,5 +537,4 @@ def load_data(local_dataset_path, **kwargs): for reason, count in skip_counts.items(): print(f"- {reason}: {count}") - return jobs_list, min_overall_utime, max_overall_utime, args_namespace -- GitLab From 354d9edf59d0562b2414a1f7984472f699be5970 Mon Sep 17 00:00:00 2001 From: Wes Brewer Date: Sat, 26 Jul 2025 15:04:15 -0400 Subject: [PATCH 187/388] Make improvements on accounting for why certain GPU jobs are thrown out --- multi-part-sim.py | 2 +- raps/dataloaders/mit_supercloud/loader.py | 70 ++++++++++++----------- 2 files changed, 38 insertions(+), 34 deletions(-) diff --git a/multi-part-sim.py b/multi-part-sim.py index ab23f8c..b43c94a 100644 --- a/multi-part-sim.py +++ b/multi-part-sim.py @@ -58,7 +58,7 @@ if args.replay: for ad in args_dicts: part = ad['partition'] td = Telemetry(**ad) - print(f"[{part}] loading traces from {args.replay[0]} …") + print(f"\n[{part}] loading traces from {args.replay[0]} …") jobs_part, t0, t1, args_from_file = td.load_data(args.replay) jobs_by_partition[part] = jobs_part td.save_snapshot(jobs_part, t0, t1, args_from_file, filename=part.split('/')[-1]) diff --git a/raps/dataloaders/mit_supercloud/loader.py b/raps/dataloaders/mit_supercloud/loader.py index df2e9c8..a680960 100644 --- a/raps/dataloaders/mit_supercloud/loader.py +++ b/raps/dataloaders/mit_supercloud/loader.py @@ -95,14 +95,18 @@ def parse_tres_alloc(tres_str: Union[str, None], Parsed key/value pairs. Example: {'cpu': 20, 'mem': 170000, 'gres/gpu': 1, 'billing': 20} """ - if not tres_str: + if pd.isna(tres_str): return {} + tres_str = str(tres_str) id_map = id_map or TRES_ID_MAP # strip quotes or whitespace tres_str = tres_str.strip().strip('"').strip("'") + if not tres_str: + return {} + # Split on commas, but be tolerant of spaces parts = [p for p in tres_str.split(",") if p] @@ -166,7 +170,6 @@ def load_data(local_dataset_path, **kwargs): # date window start_ts = to_epoch(kwargs.get("start", DEFAULT_START)) end_ts = to_epoch(kwargs.get("end", DEFAULT_END)) - #duration = end_ts - start_ts mask = (sl.time_submit >= start_ts) & (sl.time_submit < end_ts) sl = sl[mask] @@ -185,7 +188,7 @@ def load_data(local_dataset_path, **kwargs): pruned = {l.strip() for l in pf if l.strip()} before_prune = len(sl) - # only keep jobs requesting ≤480 nodes + # only keep jobs requesting <= 480 nodes sl = sl[ sl.nodes_alloc <= 480 ] after_alloc_filter = len(sl) skip_counts['nodes_alloc > 480'] += (before_prune - after_alloc_filter) @@ -281,31 +284,35 @@ def load_data(local_dataset_path, **kwargs): except (ValueError, IndexError): continue - # select final trace list - if cpu_only: - traces = cpu_files - elif mixed: - traces = list(set(cpu_files + gpu_files)) + cpu_ids = {int(os.path.basename(p).split('-',1)[0]) for p in cpu_files} + gpu_ids = {int(os.path.basename(p).split('-',1)[0]) for p in gpu_files} + all_trace_ids = cpu_ids | gpu_ids - ### check overlap - cpu_ids = {int(os.path.basename(p).split('-',1)[0]) for p in cpu_files} - gpu_ids = {int(os.path.basename(p).split('-',1)[0]) for p in gpu_files} + print(f"→ {len(cpu_files)} CPU files, {len(gpu_files)} GPU files → {len(all_trace_ids)} jobs with traces") - if debug: - print(f"[DEBUG] CPU IDs: {len(cpu_ids)} GPU IDs: {len(gpu_ids)} OVERLAP: {len(cpu_ids & gpu_ids)}") - if cpu_ids & gpu_ids: - print(" example overlap:", list(cpu_ids & gpu_ids)[:5]) - else: - print(" → **No overlap**! That means none of your GPU job IDs ever had a CPU file in `cpu_files`.") + if mixed: + # Perform a full accounting of all jobs considered for the partition. + jobs_with_no_traces = len(job_ids - all_trace_ids) + jobs_with_traces = len(all_trace_ids) - else: - traces = list(set(cpu_files + gpu_files)) + print(f"\n--- Detailed Job Accounting for Partition '{part}' ---") + print(f"Initial jobs considered: {len(job_ids)}") + print(f" * Jobs with NO trace file found: {jobs_with_no_traces} ({len(job_ids)} - {jobs_with_traces})\n") + + if jobs_with_traces > 0: + overlap_count = len(cpu_ids & gpu_ids) + cpu_only_count = len(cpu_ids) - overlap_count + gpu_only_count = len(gpu_ids) - overlap_count + print(f"Of the {jobs_with_traces} jobs with traces:") + print(f" * {cpu_only_count} jobs have only CPU traces ({len(cpu_ids)} - {overlap_count})") + print(f" * {gpu_only_count} jobs have only GPU traces ({len(gpu_ids)} - {overlap_count})") + print(f" * {overlap_count} jobs have BOTH CPU and GPU traces.") + print("----------------------------------------------------\n") - print(f"→ {len(cpu_files)} CPU files, {len(gpu_files)} GPU files → total {len(traces)}") data = {} - traced_jobs = {int(os.path.basename(p).split('-',1)[0]) for p in traces} + traced_jobs = all_trace_ids untraced_jobs = job_ids - traced_jobs skip_counts['no_trace_file'] += len(untraced_jobs) @@ -320,10 +327,12 @@ def load_data(local_dataset_path, **kwargs): job_info = sl[sl.id_job == jid] if job_info.empty: skip_counts['job_not_in_slurm_log'] += 1 - tqdm.write(f"Reading CPU {os.path.basename(fp)} for Job ID: {jid} (No slurm info found)") + if debug: + tqdm.write(f"Reading CPU {os.path.basename(fp)} for Job ID: {jid} (No slurm info found)") continue - else: - job_row = job_info.iloc[0] + + job_row = job_info.iloc[0] + if debug: start_time = job_row.get('time_start', 'N/A') wall_time = job_row.get('time_limit', 'N/A') tres_alloc = job_row.get('tres_alloc', 'N/A') @@ -331,18 +340,13 @@ def load_data(local_dataset_path, **kwargs): rec["tres_alloc_dict"] = tres_alloc_dict gres_used = job_row.get('gres_used', 'N/A') - if debug: - tqdm.write(f"Reading CPU {os.path.basename(fp)} for Job ID: {jid}") - tqdm.write(f" Start Time: {start_time}, Wall Time: {wall_time}s") - tqdm.write(f" TRES Alloc: {tres_alloc_dict}") + tqdm.write(f"Reading CPU {os.path.basename(fp)} for Job ID: {jid}") + tqdm.write(f" Start Time: {start_time}, Wall Time: {wall_time}s") + tqdm.write(f" TRES Alloc: {tres_alloc_dict}") - job_row = job_info.iloc[0] - start_time = job_row.get('time_start', 'N/A') - wall_time = job_row.get('time_limit', 'N/A') tres_alloc = job_row.get('tres_alloc', 'N/A') tres_alloc_dict = parse_tres_alloc(tres_alloc, stats=skip_counts) rec["tres_alloc_dict"] = tres_alloc_dict - gres_used = job_row.get('gres_used', 'N/A') raw = job_row.get("nodelist", "") hosts = ast.literal_eval(raw) @@ -412,7 +416,7 @@ def load_data(local_dataset_path, **kwargs): gpu_df = rec["gpu"] - # grab all the gpu‐util columns + # grab all the gpu-util columns util_cols = [c for c in gpu_df.columns if c.startswith("gpu_util_")] if not util_cols: -- GitLab From b43063b12a41121d7315ff771c16ed5145d27709 Mon Sep 17 00:00:00 2001 From: Wes Brewer Date: Sat, 26 Jul 2025 17:41:00 -0400 Subject: [PATCH 188/388] A bit better annotations --- raps/dataloaders/mit_supercloud/loader.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/raps/dataloaders/mit_supercloud/loader.py b/raps/dataloaders/mit_supercloud/loader.py index a680960..59c6ccd 100644 --- a/raps/dataloaders/mit_supercloud/loader.py +++ b/raps/dataloaders/mit_supercloud/loader.py @@ -67,7 +67,11 @@ TRES_ID_MAP = { 4: "gres/gpu", 5: "billing", } -GREEN, RESET = "\033[32m", "\033[0m" +GREEN = "\033[32m" +YELLOW = "\033[33m" +RED = "\033[31m" +RESET = "\033[0m" + def parse_tres_alloc(tres_str: Union[str, None], id_map: Optional[Dict[int, str]] = None, @@ -362,7 +366,6 @@ def load_data(local_dataset_path, **kwargs): print(f"Skipping job {jid} due to unrecognized node name: {e}") continue - rec["nodes_alloc"] = int(job_row["nodes_alloc"]) rec["cpu"] = proc_cpu_series(df) @@ -374,7 +377,7 @@ def load_data(local_dataset_path, **kwargs): for fp in tqdm(gpu_files, desc="Loading GPU traces"): if not os.path.exists(fp): - if debug: print(f"[WARNING] gpu path {fp!r} doesn't exist skipping") + if debug: print(f"{YELLOW}[WARNING] gpu path {fp!r} doesn't exist skipping{RESET}") skip_counts['gpu_path_does_not_exist'] += 1 continue @@ -389,7 +392,7 @@ def load_data(local_dataset_path, **kwargs): rec = data.setdefault(jid, {}) cpu_df = rec.get("cpu") if cpu_df is None: - if debug: tqdm.write("[WARNING] → no cpu trace for gpu! SKIPPING") + if debug: tqdm.write(f"{YELLOW}[WARNING] → no cpu trace for gpu! SKIPPING{RESET}") skip_counts['no_cpu_trace_for_gpu_job'] += 1 continue -- GitLab From e0b2b37e5fc86706da73b58634f17068a15345ed Mon Sep 17 00:00:00 2001 From: Wes Brewer Date: Sat, 26 Jul 2025 18:49:47 -0400 Subject: [PATCH 189/388] Add considerable more documentation annotations to understand the missing CPU/GPU traces problem --- raps/dataloaders/mit_supercloud/loader.py | 77 ++++++++++++++++++++++- 1 file changed, 74 insertions(+), 3 deletions(-) diff --git a/raps/dataloaders/mit_supercloud/loader.py b/raps/dataloaders/mit_supercloud/loader.py index 59c6ccd..c42168d 100644 --- a/raps/dataloaders/mit_supercloud/loader.py +++ b/raps/dataloaders/mit_supercloud/loader.py @@ -15,6 +15,71 @@ if there may be 1569 total jobs in the time range, only 834 cpu jobs and 128 gpu jobs (962 total) are able to be replayed. This is an issue which will likely have to be improved in the future. +--------------------------------------------------------------------------- +Understanding some of the errors. We track the different reasons that +less than the total number of jobs in the slurm log actually run in the +simulator. This is not so much an issue for the CPU partition, but for +the GPU partition, where we have to combine traces extracted from both +CPU trace files and GPU trace files. + +At the beginning of the GPU partition analysis, we give an analysis such as: + + --- Detailed Job Accounting for Partition 'part-gpu' --- + Initial jobs considered: 519 + * Jobs with NO trace file found: 69 (519 - 450) + + Of the 450 jobs with traces: + * 289 jobs have only CPU traces (417 - 128) + * 33 jobs have only GPU traces (161 - 128) + * 128 jobs have BOTH CPU and GPU traces. + ---------------------------------------------------- + +We give a summary report at the end of the data loading process. An +example report is shown for the range `--start 2021-05-21T00:00 --end 2021-05-22T00:00` + + Skipped jobs summary: + - nodes_alloc > 480: 0 + - pruned_nodes: 1 + - no_trace_file: 69 + - no_cpu_trace_for_gpu_job: 41 + - final_gpu_none_mixed: 289 + - final_cpu_none_mixed: 33 + + [INFO] Partition 'mit_supercloud/part-cpu': 834 jobs loaded + [INFO] Partition 'mit_supercloud/part-gpu': 128 jobs loaded + +We explain each of these stats here. + + - `nodes_alloc > 480`: the number of jobs that are thrown out because + they request more than 480 nodes. + + - `pruned_nodes`: the number of jobs thrown out because the node was + listed in `prune_list.txt`. + + - `no_trace_file`: the number of jobs that were found in the Slurm log + for the correct time window and partition, but for which not a single + corresponding trace file (neither CPU nor GPU) could be found on the filesystem. + + - `no_cpu_trace_for_gpu_job`: The number of jobs that had a GPU trace file + but were discarded because they were missing their required corresponding + CPU trace file. + + - `final_gpu_none_mixed`: The number of jobs in a GPU partition run that had + a CPU trace but were missing the final, processed GPU trace data. + + - `final_cpu_none_mixed`: The number of jobs in a GPU partition run that were + missing the essential CPU trace data during the final job construction phase. + +Now, we work on debugging some of these. For example, for `no_cpu_trace_for_gpu_job`, +we can take the jid from the warning message: + + [WARNING] → no cpu trace for gpu! (jid=4074251073298) SKIPPING + +And then check the data directory to see if it can find trace files for both the cpu +and gpu: + + > find ~/data/mit/202201 -name '4074251073298*' + --------------------------------------------------------------------------- How we curated and generated the node ids: cpu_nodes.txt and gpu_nodes.txt @@ -250,10 +315,10 @@ def load_data(local_dataset_path, **kwargs): if cpu_only: job_ids = set(sl.id_job) - gpu_jobs - skip_counts['gpu_job_in_cpu_mode'] += len(set(sl.id_job) & gpu_jobs) + #skip_counts['gpu_job_in_cpu_mode'] += len(set(sl.id_job) & gpu_jobs) elif mixed: job_ids = gpu_jobs & set(sl.id_job) - skip_counts['cpu_job_in_gpu_mode'] += len(set(sl.id_job) - gpu_jobs) + #skip_counts['cpu_job_in_gpu_mode'] += len(set(sl.id_job) - gpu_jobs) else: job_ids = set(sl.id_job) @@ -368,12 +433,17 @@ def load_data(local_dataset_path, **kwargs): rec["nodes_alloc"] = int(job_row["nodes_alloc"]) rec["cpu"] = proc_cpu_series(df) + #print(f'{RED}{rec["cpu"]}{RESET}') if debug: print(f"GPU candidate files ({len(gpu_files)}):") for p in gpu_files[:10]: print(" ", p) + # data from the cpu processes are all stored under the `data` dictionary + # according to their respective jid key + #print("******", data.keys()) + for fp in tqdm(gpu_files, desc="Loading GPU traces"): if not os.path.exists(fp): @@ -391,8 +461,9 @@ def load_data(local_dataset_path, **kwargs): jid = int(os.path.basename(fp).split("-", 1)[0]) rec = data.setdefault(jid, {}) cpu_df = rec.get("cpu") + #print(f"{YELLOW}jid={jid} {cpu_df}{RESET}") if cpu_df is None: - if debug: tqdm.write(f"{YELLOW}[WARNING] → no cpu trace for gpu! SKIPPING{RESET}") + if debug: tqdm.write(f"{YELLOW}[WARNING] → no cpu trace for gpu! (jid={jid}) SKIPPING{RESET}") skip_counts['no_cpu_trace_for_gpu_job'] += 1 continue -- GitLab From b267ef11ba04a66e0e62f0f5576f7843550e2c92 Mon Sep 17 00:00:00 2001 From: Wes Brewer Date: Sat, 26 Jul 2025 23:21:25 -0400 Subject: [PATCH 190/388] Work on gcloudv2.py dataloader to compute nodes_required --- README.md | 2 +- raps/dataloaders/gcloudv2.py | 86 +++++++++++++++++++++++++++++++++--- raps/flops.py | 2 +- raps/power.py | 2 +- raps/ui.py | 6 ++- 5 files changed, 88 insertions(+), 10 deletions(-) diff --git a/README.md b/README.md index ad94453..dbafb3f 100644 --- a/README.md +++ b/README.md @@ -46,7 +46,7 @@ For Adastra MI250 supercomputer, download 'AdastaJobsMI250_15days.parquet' from For Google cluster trace v2 - python math.py --system gcloudv2 -f ~/data/gcloud/v2/google_cluster_data_2011_sample -ff 600 + python main.py --system gcloudv2 -f ~/data/gcloud/v2/google_cluster_data_2011_sample -ff 600 # analyze dataset python -m raps.telemetry --system gcloudv2 -f ~/data/gcloud/v2/google_cluster_data_2011_sample -v diff --git a/raps/dataloaders/gcloudv2.py b/raps/dataloaders/gcloudv2.py index 3ea647f..04947d8 100644 --- a/raps/dataloaders/gcloudv2.py +++ b/raps/dataloaders/gcloudv2.py @@ -12,6 +12,36 @@ from raps.job import job_dict # ensure RAPS is in PYTHONPATH Official instructions are here: https://drive.google.com/file/d/0B5g07T_gRDg9Z0lsSTEtTWtpOW8/view?resourcekey=0-cozD56gA4fUDdrkHnLJSrQ + + +--- +Following explanation from Gemini-CLI on how the job nodes required is being determined. Such method must be verified + + 1. Machine Capacity Determination: + * The machine_events data is loaded to get information about the cluster's machines. + * The CPU_capacity and memory_capacity of a typical machine are determined by taking the mode() (most frequent value) of these columns + from the machine_df. This gives us the standard CPU and memory capacity of a single node in the cluster. + + 2. Task Resource Request Aggregation: + * The task_events data is loaded, which contains CPU_request and memory_request for individual tasks. + * These task requests are then grouped by job_ID, and the CPU_request and memory_request are summed up for all tasks belonging to the + same job. This gives us the total CPU and memory requested by each job. + + 3. Nodes Required Calculation (CPU and Memory): + * For each job, the total CPU_request is divided by the cpu_capacity of a single machine. The np.ceil() function is used to round up to + the nearest whole number, ensuring that enough nodes are allocated to satisfy the CPU demand. This result is stored as + nodes_required_cpu. + * Similarly, the total memory_request is divided by the mem_capacity of a single machine, and np.ceil() is applied. This result is + stored as nodes_required_mem. + + 4. Final `nodes_required`: + * The final nodes_required for a job is determined by taking the np.maximum() of nodes_required_cpu and nodes_required_mem. This ensures + that the job is allocated enough nodes to satisfy both its CPU and memory requirements. The result is then cast to an integer + (.astype(int)). + + 5. Filtering: + * Finally, any jobs for which the calculated nodes_required is 0 (meaning they requested no CPU or memory) are filtered out, as these + jobs would not require any nodes in the simulation. """ # Define expected column names for each supported event type @@ -131,6 +161,45 @@ def load_data(data_path: Union[str, List[str]], **kwargs: Any) -> Tuple[List[Any raise ValueError(f"Expected single path, got {data_path}") base_path = os.path.expanduser(data_path) + # Load machine events to determine typical machine capacities + machine_loader = GoogleClusterV2DataLoader(base_path, event_type="machine_events", concatenate=True) + machine_df = next(iter(machine_loader)) + # Get machine capacity (using the mode for robustness) + # This represents the normalized CPU and memory capacity of a single node. + cpu_capacity = machine_df['CPU_capacity'].mode()[0] + mem_capacity = machine_df['memory_capacity'].mode()[0] + + # Load task events to get individual task resource requests + task_loader = GoogleClusterV2DataLoader(base_path, event_type="task_events", concatenate=True) + task_df = next(iter(task_loader)) + # Filter to only submitted tasks (event_type=0) + task_df = task_df[task_df['event_type'] == 0] + + # Calculate total resource requests per job by summing up all task requests for each job + job_resources = task_df.groupby('job_ID').agg({ + 'CPU_request': 'sum', + 'memory_request': 'sum' + }).reset_index() + + # Calculate nodes required for each job based on CPU and memory requests + # Using ceiling division to ensure enough nodes are allocated to meet the demand + job_resources['nodes_required_cpu'] = np.ceil(job_resources['CPU_request'] / cpu_capacity) + job_resources['nodes_required_mem'] = np.ceil(job_resources['memory_request'] / mem_capacity) + # The final nodes_required is the maximum of CPU-driven and memory-driven node requirements + job_resources['nodes_required'] = np.maximum(job_resources['nodes_required_cpu'], job_resources['nodes_required_mem']).astype(int) + + # Create a dictionary for quick lookup of nodes_required by job_ID + nodes_required_map = job_resources.set_index('job_ID')['nodes_required'].to_dict() + + # Filter out jobs with 0 nodes required (i.e., no resource requests) + num_jobs_before_filter = len(job_resources) + job_resources = job_resources[job_resources['nodes_required'] > 0] + num_jobs_after_filter = len(job_resources) + print(f"Filtered out {num_jobs_before_filter - num_jobs_after_filter} jobs with 0 resource requests.") + + print("Job resource requirements (after filtering):") + print(job_resources.head()) + # Load submit events loader = GoogleClusterV2DataLoader(base_path, event_type="job_events", concatenate=True) df = next(iter(loader)) @@ -161,8 +230,10 @@ def load_data(data_path: Union[str, List[str]], **kwargs: Any) -> Tuple[List[Any usage_df["CPU_usage_avg"] = usage_df["CPU_usage_avg"].astype(float) usage_map = usage_df.groupby("job_ID")["CPU_usage_avg"].apply(lambda s: s.to_numpy()).to_dict() - # Filter to jobs with usage data - df = df[df["job_ID"].isin(usage_map)] + #print(usage_map) + + # Filter to jobs with usage data AND valid resource requests + df = df[df["job_ID"].isin(usage_map) & df["job_ID"].isin(job_resources['job_ID'])] jobs: List[Any] = [] jid_f = kwargs.get('jid','*') @@ -173,12 +244,15 @@ def load_data(data_path: Union[str, List[str]], **kwargs: Any) -> Tuple[List[Any end = usage_map_end [jid] - t0 wall = end - start + #nodes_required = int(nodes_required_map.get(jid, 1)) # Default to 1 if not found + nodes_required = int(nodes_required_map.get(jid)) + if jid_f!='*' and str(jid)!=str(jid_f): continue trace = usage_map[jid] # ensure gpu_trace is same length gpu_trace = np.zeros_like(trace) - jobs.append(job_dict( - nodes_required=1, + job = job_dict( + nodes_required=nodes_required, name=f"job_{jid}", account=f"user_{row.get('user_name','unknown')}", cpu_trace=trace, @@ -192,7 +266,9 @@ def load_data(data_path: Union[str, List[str]], **kwargs: Any) -> Tuple[List[Any start_time=start, end_time=end, wall_time=wall, trace_time=row["timestamp"], trace_start_time=start, trace_end_time=end - )) + ) + #if nodes_required > 0: + jobs.append(job) # Compute simulation span: start at t=0, end at the latest job finish simulation_start = 0 diff --git a/raps/flops.py b/raps/flops.py index eebd0fa..6978bdc 100644 --- a/raps/flops.py +++ b/raps/flops.py @@ -12,7 +12,7 @@ class FLOPSManager(): cpu_util = np.asarray(cpu_util) gpu_util = np.asarray(gpu_util) job_lengths = np.array([len(job) for job in scheduled_nodes]) - flattened_nodes = np.concatenate(scheduled_nodes, axis=0) + flattened_nodes = np.concatenate(scheduled_nodes, axis=0).astype(np.int64) cpu_util_flat = np.repeat(cpu_util, job_lengths) gpu_util_flat = np.repeat(gpu_util, job_lengths) diff --git a/raps/power.py b/raps/power.py index 0f2c312..b1f7735 100644 --- a/raps/power.py +++ b/raps/power.py @@ -268,7 +268,7 @@ class PowerManager: gpu_util = np.asarray(gpu_util) net_util = np.asarray(net_util) job_lengths = np.array([len(job) for job in scheduled_nodes]) - flattened_nodes = np.concatenate(scheduled_nodes, axis=0) + flattened_nodes = np.concatenate(scheduled_nodes, axis=0).astype(np.int64) cpu_util_flat = np.repeat(cpu_util, job_lengths) gpu_util_flat = np.repeat(gpu_util, job_lengths) diff --git a/raps/ui.py b/raps/ui.py index 173c2ea..d604d1b 100644 --- a/raps/ui.py +++ b/raps/ui.py @@ -113,7 +113,8 @@ class LayoutManager: if show_slowdown: columns.append("SLOW DOWN") else: - columns.append("NODE SEGMENTS") + #columns.append("NODE SEGMENTS") + columns.append("NODES") if show_nodes: columns.append("NODELIST") @@ -145,6 +146,7 @@ class LayoutManager: nodes_display = ", ".join(node_segments) col_slow = nodes_display # reused variable name for simplicity else: + #col_slow = str(len(node_segments)) col_slow = str(len(node_segments)) # If show_nodes is True, we need to append NODELIST as well @@ -163,7 +165,7 @@ class LayoutManager: str(job.name), str(job.account), job.state.value, - #n_nodes, + n_nodes, #col_slow, ] -- GitLab From 4bbdda2a82361af6cdaee338f0b7b3a743ad5359 Mon Sep 17 00:00:00 2001 From: Wes Brewer Date: Sat, 26 Jul 2025 23:28:36 -0400 Subject: [PATCH 191/388] Add more documentation for the gcloudv2.py data loader --- raps/dataloaders/gcloudv2.py | 75 ++++++++++++++++++++++++++++-------- 1 file changed, 60 insertions(+), 15 deletions(-) diff --git a/raps/dataloaders/gcloudv2.py b/raps/dataloaders/gcloudv2.py index 04947d8..f0900a3 100644 --- a/raps/dataloaders/gcloudv2.py +++ b/raps/dataloaders/gcloudv2.py @@ -15,33 +15,78 @@ https://drive.google.com/file/d/0B5g07T_gRDg9Z0lsSTEtTWtpOW8/view?resourcekey=0- --- -Following explanation from Gemini-CLI on how the job nodes required is being determined. Such method must be verified +Downloading Google Cluster Traces v2: + + curl -O https://dl.google.com/dl/cloudsdk/channels/rapid/downloads/google-cloud-cli-471.0.0-darwin-arm.tar.gz + tar -xf google-cloud-cli-471.0.0-darwin-arm.tar.gz + gcloud components update + + gcloud auth login + + gsutil ls gs://clusterdata_2019_a/ + + * collection_events + * instance_events + * instance_usage + * machine_attributes + * machine_events + + gsutil -m cp -r gs://clusterdata_2019_a/instance_usage-*.parquet.gz ./google_cluster_data/cell_a/instance_usage + + # Create a directory to store your sample data + mkdir -p ./google_cluster_data_sample + + # Download the first JSON and Parquet file for collection_events + gsutil cp gs://clusterdata_2019_a/collection_events-000000000000.json.gz ./google_cluster_data_sample/ + gsutil cp gs://clusterdata_2019_a/collection_events-000000000000.parquet.gz ./google_cluster_data_sample/ + + # Download the first JSON and Parquet file for instance_events + gsutil cp gs://clusterdata_2019_a/instance_events-000000000000.json.gz ./google_cluster_data_sample/ + gsutil cp gs://clusterdata_2019_a/instance_events-000000000000.parquet.gz ./google_cluster_data_sample/ + + # Download the first JSON and Parquet file for instance_usage + gsutil cp gs://clusterdata_2019_a/instance_usage-000000000000.json.gz ./google_cluster_data_sample/ + gsutil cp gs://clusterdata_2019_a/instance_usage-000000000000.parquet.gz ./google_cluster_data_sample/ + + # ... and so on for other event types (machine_attributes, machine_events) + gsutil cp gs://clusterdata_2019_a/machine_attributes-000000000000.json.gz ./google_cluster_data_sample/ + gsutil cp gs://clusterdata_2019_a/machine_attributes-000000000000.parquet.gz ./google_cluster_data_sample/ + + gsutil cp gs://clusterdata_2019_a/machine_events-000000000000.json.gz ./google_cluster_data_sample/ + gsutil cp gs://clusterdata_2019_a/machine_events-000000000000.parquet.gz ./google_cluster_data_sample/ + +--- +Following explanation from Gemini-CLI on how the job nodes required is being computed. Method must be verified 1. Machine Capacity Determination: * The machine_events data is loaded to get information about the cluster's machines. - * The CPU_capacity and memory_capacity of a typical machine are determined by taking the mode() (most frequent value) of these columns - from the machine_df. This gives us the standard CPU and memory capacity of a single node in the cluster. + * The CPU_capacity and memory_capacity of a typical machine are determined by taking + the mode() (most frequent value) of these columns from the machine_df. This gives + us the standard CPU and memory capacity of a single node in the cluster. 2. Task Resource Request Aggregation: - * The task_events data is loaded, which contains CPU_request and memory_request for individual tasks. - * These task requests are then grouped by job_ID, and the CPU_request and memory_request are summed up for all tasks belonging to the - same job. This gives us the total CPU and memory requested by each job. + * The task_events data is loaded, which contains CPU_request and memory_request for + individual tasks. + * These task requests are then grouped by job_ID, and the CPU_request and memory_request + are summed up for all tasks belonging to the same job. This gives us the total CPU and + memory requested by each job. 3. Nodes Required Calculation (CPU and Memory): - * For each job, the total CPU_request is divided by the cpu_capacity of a single machine. The np.ceil() function is used to round up to - the nearest whole number, ensuring that enough nodes are allocated to satisfy the CPU demand. This result is stored as + * For each job, the total CPU_request is divided by the cpu_capacity of a single machine. + The np.ceil() function is used to round up to the nearest whole number, ensuring that + enough nodes are allocated to satisfy the CPU demand. This result is stored as nodes_required_cpu. - * Similarly, the total memory_request is divided by the mem_capacity of a single machine, and np.ceil() is applied. This result is - stored as nodes_required_mem. + * Similarly, the total memory_request is divided by the mem_capacity of a single machine, + and np.ceil() is applied. This result is stored as nodes_required_mem. 4. Final `nodes_required`: - * The final nodes_required for a job is determined by taking the np.maximum() of nodes_required_cpu and nodes_required_mem. This ensures - that the job is allocated enough nodes to satisfy both its CPU and memory requirements. The result is then cast to an integer - (.astype(int)). + * The final nodes_required for a job is determined by taking the np.maximum() of nodes_required_cpu + and nodes_required_mem. This ensures that the job is allocated enough nodes to satisfy both its CPU + and memory requirements. The result is then cast to an integer (.astype(int)). 5. Filtering: - * Finally, any jobs for which the calculated nodes_required is 0 (meaning they requested no CPU or memory) are filtered out, as these - jobs would not require any nodes in the simulation. + * Finally, any jobs for which the calculated nodes_required is 0 (meaning they requested no CPU or memory) + are filtered out, as these jobs would not require any nodes in the simulation. """ # Define expected column names for each supported event type -- GitLab From d7eb7a2dad054463badb5531e02da444d17ab775 Mon Sep 17 00:00:00 2001 From: Wes Brewer Date: Tue, 29 Jul 2025 16:00:16 -0400 Subject: [PATCH 192/388] Add validators.py and add networkx dependency to pyproject.toml --- pyproject.toml | 1 + raps/validators.py | 12 ++++++++++++ 2 files changed, 13 insertions(+) create mode 100644 raps/validators.py diff --git a/pyproject.toml b/pyproject.toml index fc510e3..dc2b7fc 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -12,6 +12,7 @@ readme = "README.md" dependencies = [ "matplotlib==3.7.2", + "networkx>=3.5", "numpy==1.23.5", "rich==13.6.0", "fmpy==0.3.19", diff --git a/raps/validators.py b/raps/validators.py new file mode 100644 index 0000000..14e6743 --- /dev/null +++ b/raps/validators.py @@ -0,0 +1,12 @@ +def recompute_power(nodes, running_jobs, current_time): + node_power = {n['id']: 0.0 for n in nodes} + for j in running_jobs: + idx = max(0, current_time - j.start_time) + # Clamp index + idx = min(idx, len(j.cpu_trace)-1) + cpu_p = j.cpu_trace[idx] + gpu_p = j.gpu_trace[idx] if j.gpu_trace else 0 + nid = j.scheduled_nodes[0] + node_power[nid] += cpu_p + gpu_p + total = sum(node_power.values()) + return node_power, total -- GitLab From cd5baf873cd86680e6100be0aa02021a59e5a3bf Mon Sep 17 00:00:00 2001 From: Matthias Maiterth Date: Wed, 30 Jul 2025 14:40:47 -0400 Subject: [PATCH 193/388] updated multi-part-sim to fix merge discrepancies. --- multi-part-sim.py | 35 ++++++++++++++++++++++++++--------- raps/stats.py | 10 +++++----- 2 files changed, 31 insertions(+), 14 deletions(-) diff --git a/multi-part-sim.py b/multi-part-sim.py index a7c55ba..de9e68b 100644 --- a/multi-part-sim.py +++ b/multi-part-sim.py @@ -6,7 +6,7 @@ import os import random import sys -from args import args +from raps.args import args from raps.config import ConfigManager, CONFIG_PATH from raps.schedulers.default import PolicyType from raps.ui import LayoutManager @@ -141,10 +141,11 @@ for timestep in range(timesteps): sys_power = 0 for name, lm in layout_managers.items(): sys_util = lm.engine.sys_util_history[-1] if lm.engine.sys_util_history else (0, 0.0) - allocated_cores = lm.engine.resource_manager.allocated_cpu_cores - print(f"[DEBUG] {name} - Timestep {timestep} - Jobs running: {len(lm.engine.running)} -", - f"Utilization: {sys_util[1]:.2f}% - Allocated Cores: {allocated_cores} - ", - f"Power: {lm.engine.sys_power:.1f}kW", flush=True) + if hasattr(lm.engine.resource_manager,'allocated_cpu_cores'): + allocated_cores = lm.engine.resource_manager.allocated_cpu_cores + print(f"[DEBUG] {name} - Timestep {timestep} - Jobs running: {len(lm.engine.running)} -", + f"Utilization: {sys_util[1]:.2f}% - Allocated Cores: {allocated_cores} - ", + f"Power: {lm.engine.sys_power:.1f}kW", flush=True) sys_power += lm.engine.sys_power print(f"system power: {sys_power:.1f}kW", flush=True) @@ -152,8 +153,24 @@ print("Simulation complete.", flush=True) # Print statistics for each partition for name, lm in layout_managers.items(): - print(f"\n--- Simulation Report for Partition: {name} ---") - simulation_stats = lm.engine.get_stats() - for key, value in simulation_stats.items(): + print(f"\n=== Partition: {name} ===") + + engine_stats = get_engine_stats(lm.engine) + job_stats = get_job_stats(lm.engine) + scheduler_stats = get_scheduler_stats(lm.engine) + if args.simulate_network: + network_stats = get_network_stats(lm.engine) + + # Print a formatted report + print("\n--- Simulation Report ---") + for key, value in engine_stats.items(): + print(f"{key.replace('_', ' ').title()}: {value}") + print("-------------------------\n") + print("\n--- Job Stat Report ---") + for key, value in job_stats.items(): + print(f"{key.replace('_', ' ').title()}: {value}") + print("-------------------------\n") + print("\n--- Scheduler Report ---") + for key, value in scheduler_stats.items(): print(f"{key.replace('_', ' ').title()}: {value}") - print("--------------------------------------------------") + print("-------------------------") diff --git a/raps/stats.py b/raps/stats.py index d56c405..2bc558d 100644 --- a/raps/stats.py +++ b/raps/stats.py @@ -39,20 +39,20 @@ def get_engine_stats(engine: Engine): 'total cost': f'${total_cost:.2f}' } - if self.config['multitenant']: + if engine.config['multitenant']: # Multitenancy Stats - total_jobs_loaded = self.total_initial_jobs # Assuming this is passed to __init__ + total_jobs_loaded = engine.total_initial_jobs # Assuming this is passed to __init__ stats['total jobs loaded'] = total_jobs_loaded - stats['jobs completed percentage'] = f"{(self.jobs_completed / total_jobs_loaded * 100):.2f}%" + stats['jobs completed percentage'] = f"{(engine.jobs_completed / total_jobs_loaded * 100):.2f}%" - if self.node_occupancy_history: + if engine.node_occupancy_history: # Calculate average concurrent jobs per node (average density across all nodes and timesteps) total_jobs_running_timesteps = 0 max_concurrent_jobs_per_node = 0 sum_jobs_per_active_node = 0 # New: Sum of (jobs / active_nodes) for each timestep count_active_timesteps_for_avg_active = 0 # New: Count of timesteps with active nodes - for occupancy_dict in self.node_occupancy_history: + for occupancy_dict in engine.node_occupancy_history: current_timestep_total_occupancy = sum(occupancy_dict.values()) total_jobs_running_timesteps += current_timestep_total_occupancy -- GitLab From e0b42cd0fd6d108be6ef6161a98cf04455a0b09a Mon Sep 17 00:00:00 2001 From: Matthias Maiterth Date: Wed, 30 Jul 2025 14:44:13 -0400 Subject: [PATCH 194/388] Comma typo in pyproject.toml --- pyproject.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index ed213b1..fa2287e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -22,6 +22,6 @@ dependencies = [ "uncertainties>=3.2.1", "requests>=2.32.3", "fsspec>=2025.5.1", - "gcsfs>=2025.5.1" - "networkx>=3.5", + "gcsfs>=2025.5.1", + "networkx>=3.5" ] -- GitLab From 9b40fa3c1fa6272e20b99015bc0750c5f6afc37b Mon Sep 17 00:00:00 2001 From: Matthias Maiterth Date: Wed, 30 Jul 2025 18:21:46 -0400 Subject: [PATCH 195/388] Fix to the cooling model refactor --- raps/cooling.py | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/raps/cooling.py b/raps/cooling.py index 7ffe6eb..99a3aac 100644 --- a/raps/cooling.py +++ b/raps/cooling.py @@ -16,6 +16,8 @@ from fmpy import read_model_description, extract from fmpy.fmi2 import FMU2Slave from datetime import timedelta +from raps.policy import PolicyType + def get_matching_variables(variables, pattern): # Regex pattern to match strings containing .summary pattern = re.compile(pattern) @@ -126,7 +128,7 @@ class ThermoFluidsModel: self.fmu.enterInitializationMode() self.fmu.exitInitializationMode() - def generate_runtime_values(self, cdu_power, sc) -> dict: + def generate_runtime_values(self, cdu_power, engine) -> dict: """ Generate the runtime values for the FMU inputs dynamically. @@ -147,9 +149,9 @@ class ThermoFluidsModel: temperature = self.config['WET_BULB_TEMP'] # If replay mode is on and weather data is available - if sc.replay and self.weather and self.weather.start is not None and self.weather.has_coords: + if engine.scheduler.policy== PolicyType.REPLAY and self.weather and self.weather.start is not None and self.weather.has_coords: # Convert total seconds to timedelta object - delta = timedelta(seconds=sc.current_time) + delta = timedelta(seconds=engine.current_time) target_datetime = self.weather.start + delta # Get temperature from weather data @@ -315,13 +317,14 @@ class ThermoFluidsModel: # Cleanup - at the end of the simulation shutil.rmtree(self.unzipdir, ignore_errors=True) - def simulate_cooling(self, rack_power): + def simulate_cooling(self, rack_power, engine): cdu_power = rack_power.T[-1] * 1000 - runtime_values = self.cooling_model.generate_runtime_values(cdu_power, self) + runtime_values = self.generate_runtime_values(cdu_power, engine) # FMU inputs are N powers and the wetbulb temp - fmu_inputs = self.cooling_model.generate_fmu_inputs(runtime_values, - uncertainties=self.power_manager.uncertainties) - cooling_inputs, cooling_outputs = self.cooling_model.step(self.current_time, - fmu_inputs, - self.config['POWER_UPDATE_FREQ']) + fmu_inputs = self.generate_fmu_inputs(runtime_values, + uncertainties=engine.power_manager.uncertainties) + cooling_inputs, cooling_outputs = self.step(engine.current_time, + fmu_inputs, + engine.config['POWER_UPDATE_FREQ']) + return cooling_inputs, cooling_outputs -- GitLab From 03665552d9113822c1281e196e62e948c2833948 Mon Sep 17 00:00:00 2001 From: Matthias Maiterth Date: Wed, 30 Jul 2025 20:19:53 -0400 Subject: [PATCH 196/388] Fixes to cooling refacetor --- raps/cooling.py | 2 +- raps/engine.py | 3 ++- raps/ui.py | 2 +- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/raps/cooling.py b/raps/cooling.py index 99a3aac..27031e6 100644 --- a/raps/cooling.py +++ b/raps/cooling.py @@ -317,7 +317,7 @@ class ThermoFluidsModel: # Cleanup - at the end of the simulation shutil.rmtree(self.unzipdir, ignore_errors=True) - def simulate_cooling(self, rack_power, engine): + def simulate_cooling(self,*, rack_power, engine): cdu_power = rack_power.T[-1] * 1000 runtime_values = self.generate_runtime_values(cdu_power, engine) diff --git a/raps/engine.py b/raps/engine.py index 34e15f9..df6e25b 100644 --- a/raps/engine.py +++ b/raps/engine.py @@ -356,7 +356,8 @@ class Engine: # System Cooling if self.cooling_model: - cooling_inputs, cooling_outputs = self.cooling_model.simulate_cooling(self.cooling_model, rack_power) + cooling_inputs, cooling_outputs = self.cooling_model.simulate_cooling(rack_power=rack_power, + engine=self) else: cooling_inputs, cooling_outputs = None, None diff --git a/raps/ui.py b/raps/ui.py index 20cffdb..cf9bce4 100644 --- a/raps/ui.py +++ b/raps/ui.py @@ -121,7 +121,7 @@ class LayoutManager: columns.append("SLOW DOWN") else: #columns.append("NODE SEGMENTS") - columns.append("NODES") + columns.append("SEGMENT") #if show_nodes: # columns.append("NODELIST") -- GitLab From 79ee59961f3fbd57c796b0ad0cd43271c472d098 Mon Sep 17 00:00:00 2001 From: Matthias Maiterth Date: Thu, 31 Jul 2025 14:48:43 -0400 Subject: [PATCH 197/388] Update such that progress bar update is not called every time, but only with data --- raps/engine.py | 2 +- raps/ui.py | 11 +++++++---- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/raps/engine.py b/raps/engine.py index df6e25b..c82c55e 100644 --- a/raps/engine.py +++ b/raps/engine.py @@ -453,7 +453,7 @@ class Engine: if self.debug: print(f"[DEBUG] run_simulation: Initial jobs count: {len(jobs)}") if jobs: - print(f"[DEBUG] run_simulation: First job submit_time: {jobs[0]['submit_time']}, start_time: {jobs[0]['start_time']}") + print(f"[DEBUG] run_simulation: First job submit_time: {jobs[0].submit_time}, start_time: {jobs[0].start_time}") # Place jobs that are currently running, onto the system. self.prepare_system_state(jobs, timestep_start, timestep_end, replay) diff --git a/raps/ui.py b/raps/ui.py index cf9bce4..0905af4 100644 --- a/raps/ui.py +++ b/raps/ui.py @@ -505,14 +505,17 @@ class LayoutManager: def run(self, jobs, timestep_start, timestep_end, time_delta): """ Runs the UI, blocking until the simulation is complete """ if not self.debug: - context = Live(self.layout, refresh_per_second=5) + context = Live(self.layout, auto_refresh=True, refresh_per_second=3) else: context = nullcontext() - with context: - for data in self.engine.run_simulation(jobs, timestep_start, timestep_end, time_delta, autoshutdown=True): + with context as ctx: + last_i=0 + for i,data in enumerate(self.engine.run_simulation(jobs, timestep_start, timestep_end, time_delta, autoshutdown=True)): if data: self.update_full_layout(data,time_delta) - self.update_progress_bar(1) + self.update_progress_bar(i-last_i) + last_i=i + #ctx.refresh() # For test with manual update def run_stepwise(self, jobs, timestep_start, timestep_end, time_delta): -- GitLab From cbb9cb83844cf0b89db68dd0d8f54cea8da58132 Mon Sep 17 00:00:00 2001 From: Matthias Maiterth Date: Thu, 31 Jul 2025 20:45:15 -0400 Subject: [PATCH 198/388] Added the --noui to disable ui without having to enable debug --- raps/args.py | 3 +++ raps/ui.py | 26 ++++++++++++++++---------- 2 files changed, 19 insertions(+), 10 deletions(-) diff --git a/raps/args.py b/raps/args.py index ffbac3d..41b18cc 100644 --- a/raps/args.py +++ b/raps/args.py @@ -12,6 +12,9 @@ parser.add_argument('-x', '--partitions', nargs='+', default=None, help='List of parser.add_argument('-c', '--cooling', action='store_true', help='Include FMU cooling model') parser.add_argument('-net', '--simulate-network', default=False, action='store_true', help='Include Network model') +parser.add_argument('--noui', default=False, action='store_true', help='Run without UI') + + # Simulation runtime options parser.add_argument('-ff', '--fastforward', type=str, default=None, help='Fast-forward by time amount (uses same units as -t)') parser.add_argument('-t', '--time', type=str, default=None, help='Length of time to simulate, e.g., 123, 123s, 27m, 3h, 7d') diff --git a/raps/ui.py b/raps/ui.py index 0905af4..fc857a9 100644 --- a/raps/ui.py +++ b/raps/ui.py @@ -21,19 +21,24 @@ from raps.engine import TickData, Engine class LayoutManager: def __init__(self, layout_type, engine: Engine, total_timesteps=0, debug=None, args_dict=None, **config): + self.debug = debug + self.noui = args_dict['noui'] self.engine = engine self.config = config self.topology = self.engine.config.get("TOPOLOGY", "none") self.simulate_network = args_dict.get("simulate_network") - self.console = Console() - self.layout = Layout() self.hascooling = layout_type == "layout2" - self.debug = debug - self.setup_layout(layout_type) self.power_df_header = self.config['POWER_DF_HEADER'] self.racks_per_cdu = self.config['RACKS_PER_CDU'] self.power_column = self.power_df_header[self.racks_per_cdu + 1] self.loss_column = self.power_df_header[-1] + + if self.debug or self.noui: + return + + self.console = Console() + self.layout = Layout() + self.setup_layout(layout_type) self.progress = Progress( TextColumn("Progress: [progress.percentage]{task.percentage:>3.0f}%"), BarColumn(bar_width=None), @@ -504,18 +509,19 @@ class LayoutManager: def run(self, jobs, timestep_start, timestep_end, time_delta): """ Runs the UI, blocking until the simulation is complete """ - if not self.debug: + if not self.debug and not self.noui: context = Live(self.layout, auto_refresh=True, refresh_per_second=3) else: context = nullcontext() - with context as ctx: + with context: last_i=0 for i,data in enumerate(self.engine.run_simulation(jobs, timestep_start, timestep_end, time_delta, autoshutdown=True)): - if data: + if data and (not self.debug and not self.noui): self.update_full_layout(data,time_delta) - self.update_progress_bar(i-last_i) - last_i=i - #ctx.refresh() # For test with manual update + #self.update_progress_bar(i-last_i) + #last_i=i + if not self.debug and not self.noui: + self.update_progress_bar(1) def run_stepwise(self, jobs, timestep_start, timestep_end, time_delta): -- GitLab From bd44956010566f274c370bdbd5430486fd18479b Mon Sep 17 00:00:00 2001 From: Matthias Maiterth Date: Fri, 1 Aug 2025 11:02:49 -0400 Subject: [PATCH 199/388] Added better error message if system is not found. --- raps/config.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/raps/config.py b/raps/config.py index 269353f..863014f 100644 --- a/raps/config.py +++ b/raps/config.py @@ -15,6 +15,9 @@ class ConfigManager: def load_system_config(self, system_name: str) -> None: base_path = CONFIG_PATH / system_name + if not os.path.isdir(base_path): + raise FileNotFoundError(f"\"{system_name}\" not found in {CONFIG_PATH}.", + f"Valid systems are:{os.listdir(CONFIG_PATH)}") config_files = ['system.json', 'power.json', 'scheduler.json'] optional_files = ['cooling.json', 'uq.json', 'network.json'] -- GitLab From 60ed6b33c303faa32e1795d9715bf59304b5a1d4 Mon Sep 17 00:00:00 2001 From: Matthias Maiterth Date: Fri, 1 Aug 2025 11:41:52 -0400 Subject: [PATCH 200/388] Removed double reporting at end of simulation and added simulation time --- main.py | 28 +++++++++------------------- raps/stats.py | 4 +++- 2 files changed, 12 insertions(+), 20 deletions(-) diff --git a/main.py b/main.py index 003e4e3..55af274 100644 --- a/main.py +++ b/main.py @@ -138,39 +138,29 @@ print(f'Simulation time delta: {time_delta}s, Telemetry trace quanta: {jobs[0].t layout_manager = LayoutManager(args.layout, engine=sc, debug=args.debug, total_timesteps=total_timesteps, args_dict=args_dict, **config) layout_manager.run(jobs, timestep_start=timestep_start, timestep_end=timestep_end, time_delta=time_delta) -engine_stats = get_engine_stats(sc) -job_stats = get_job_stats(sc) -scheduler_stats = get_scheduler_stats(sc) -if args.simulate_network: - network_stats = get_network_stats(sc) -# Following b/c we get the following error when we use PM100 telemetry dataset -# TypeError: Object of type int64 is not JSON serializable -try: - print(json.dumps(engine_stats, indent=4)) - print(json.dumps(job_stats, indent=4)) - print(json.dumps(scheduler_stats, indent=4)) - if args.simulate_network: - print(json.dumps(network_stats, indent=4)) -except: - print(engine_stats) - print(job_stats) - print(scheduler_stats) - if args.simulate_network: - print(network_stats) # Print a formatted report print("\n--- Simulation Report ---") +engine_stats = get_engine_stats(sc) for key, value in engine_stats.items(): print(f"{key.replace('_', ' ').title()}: {value}") print("-------------------------\n") print("\n--- Job Stat Report ---") +job_stats = get_job_stats(sc) for key, value in job_stats.items(): print(f"{key.replace('_', ' ').title()}: {value}") print("-------------------------\n") print("\n--- Scheduler Report ---") +scheduler_stats = get_scheduler_stats(sc) for key, value in scheduler_stats.items(): print(f"{key.replace('_', ' ').title()}: {value}") print("-------------------------") +if args.simulate_network: + print("\n--- Network Report ---") + network_stats = get_network_stats(sc) + for key, value in network_stats.items(): + print(f"{key.replace('_', ' ').title()}: {value}") + print("-------------------------") if args.plot: diff --git a/raps/stats.py b/raps/stats.py index 2bc558d..d449d3c 100644 --- a/raps/stats.py +++ b/raps/stats.py @@ -7,7 +7,7 @@ the jobs Both could be part of the engine or jobs class, but as the are very verbose, try to keep statistics consolidated in this file. """ import sys -from .utils import sum_values, min_value, max_value +from .utils import sum_values, min_value, max_value, convert_seconds_to_hhmmss from .engine import Engine @@ -16,6 +16,7 @@ def get_engine_stats(engine: Engine): """ Return engine statistics """ num_samples = len(engine.power_manager.history) if engine.power_manager else 0 + time_simulated = convert_seconds_to_hhmmss(engine.timesteps) average_power_mw = sum_values(engine.power_manager.history) / num_samples / 1000 if num_samples else 0 average_loss_mw = sum_values(engine.power_manager.loss_history) / num_samples / 1000 if num_samples else 0 min_loss_mw = min_value(engine.power_manager.loss_history) / 1000 if num_samples else 0 @@ -28,6 +29,7 @@ def get_engine_stats(engine: Engine): total_cost = total_energy_consumed * 1000 * engine.config.get('POWER_COST', 0) # Total cost in dollars stats = { + 'time simulated': time_simulated, 'num_samples': num_samples, 'average power': f'{average_power_mw:.2f} MW', 'min loss': f'{min_loss_mw:.2f} MW', -- GitLab From 1793c4e738a3f3e9a4db99b9f88dce181fafb759 Mon Sep 17 00:00:00 2001 From: "Maiterth, Matthias" Date: Tue, 5 Aug 2025 14:22:37 +0000 Subject: [PATCH 201/388] Added pytest --- .gitignore | 3 + Makefile | 2 + main.py | 4 +- pyproject.toml | 5 +- pytest.ini | 28 ++++ raps/stats.py | 10 +- raps/ui.py | 2 +- tests/__init__.py | 0 tests/conftest.py | 14 ++ tests/smoke.py | 1 + tests/systems/__init__.py | 0 tests/systems/conftest.py | 156 ++++++++++++++++++ tests/systems/test_main_basic_run.py | 26 +++ tests/systems/test_main_cooling_run.py | 29 ++++ .../test_main_cooling_uncertainty_run.py | 31 ++++ tests/systems/test_main_fastforward_run.py | 36 ++++ tests/systems/test_main_noui_run.py | 27 +++ tests/systems/test_main_time_delta_run.py | 39 +++++ tests/systems/test_main_time_ff_delta_run.py | 40 +++++ tests/systems/test_main_time_run.py | 36 ++++ tests/systems/test_main_uncertainty_run.py | 30 ++++ tests/systems/test_main_withdata_run.py | 33 ++++ .../systems/test_multi_part_sim_basic_run.py | 29 ++++ tests/test_main.py | 47 ++++++ tests/unit/__init__.py | 0 tests/util.py | 25 +++ 26 files changed, 647 insertions(+), 6 deletions(-) create mode 100644 pytest.ini create mode 100644 tests/__init__.py create mode 100644 tests/conftest.py create mode 100644 tests/systems/__init__.py create mode 100644 tests/systems/conftest.py create mode 100644 tests/systems/test_main_basic_run.py create mode 100644 tests/systems/test_main_cooling_run.py create mode 100644 tests/systems/test_main_cooling_uncertainty_run.py create mode 100644 tests/systems/test_main_fastforward_run.py create mode 100644 tests/systems/test_main_noui_run.py create mode 100644 tests/systems/test_main_time_delta_run.py create mode 100644 tests/systems/test_main_time_ff_delta_run.py create mode 100644 tests/systems/test_main_time_run.py create mode 100644 tests/systems/test_main_uncertainty_run.py create mode 100644 tests/systems/test_main_withdata_run.py create mode 100644 tests/systems/test_multi_part_sim_basic_run.py create mode 100644 tests/test_main.py create mode 100644 tests/unit/__init__.py create mode 100644 tests/util.py diff --git a/.gitignore b/.gitignore index fc862bb..74a41d8 100644 --- a/.gitignore +++ b/.gitignore @@ -2,3 +2,6 @@ __pycache__ .venv venv +*.npz +*.prof +simulation_results/ diff --git a/Makefile b/Makefile index 99a1adb..a2f4211 100644 --- a/Makefile +++ b/Makefile @@ -8,6 +8,8 @@ pip: run: python3 ./main.py +test: + pytest -n 8 IMAGE_NAME = raps diff --git a/main.py b/main.py index 55af274..c831989 100644 --- a/main.py +++ b/main.py @@ -88,11 +88,11 @@ else: # Synthetic jobs td = Telemetry(**args_dict) td.save_snapshot(jobs=jobs, timestep_start=timestep_start, timestep_end=timestep_end, args=args, filename=td.dirname) -if args.fastforward: +if args.fastforward is not None: args.fastforward = convert_to_seconds(args.fastforward) timestep_start = args.fastforward -if args.time: +if args.time is not None: timestep_end = timestep_start + convert_to_seconds(args.time) diff --git a/pyproject.toml b/pyproject.toml index fa2287e..2cb56d5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -23,5 +23,8 @@ dependencies = [ "requests>=2.32.3", "fsspec>=2025.5.1", "gcsfs>=2025.5.1", - "networkx>=3.5" + "networkx>=3.5", + "pytest", + "pytest-order", + "pytest-xdist" ] diff --git a/pytest.ini b/pytest.ini new file mode 100644 index 0000000..4de6475 --- /dev/null +++ b/pytest.ini @@ -0,0 +1,28 @@ +[pytest] +testpaths = tests +#python_paths = . +markers = + long: mark a test as long (skipped if not run iwth --runlong) + system: mark a test as system (integration) test + unit: mark a test as a unit test + withdata: marks tests that require external data + nodata: marks tests that can run without external data + cooling: cooling argument test + uncertainty: uncertainty argment test + time: time argument test + fastforward: fastforward argument test + time_delta: time delta argument test + + 40frontiers: System test + adastraMI250: System test + frontier: System test + fugaku: System test + gcloudv2: System test + lassen: System test + marconi100: System test + mit_supercloud: System test + setonix: System test + summit: System test + + +addopts = -ra diff --git a/raps/stats.py b/raps/stats.py index d449d3c..f4692e6 100644 --- a/raps/stats.py +++ b/raps/stats.py @@ -246,8 +246,14 @@ def get_job_stats(engine: Engine): avg_ntx_u = sum_ntx_u / len(engine.job_history_dict) avg_nrx_u = sum_nrx_u / len(engine.job_history_dict) - avg_awrt = sum_awrt / sum_agg_node_hours - psf = (3 * sum_psf_partial_num) / (4 * sum_psf_partial_den) + if sum_agg_node_hours != 0: + avg_awrt = sum_awrt / sum_agg_node_hours + else: + avg_awrt = 0 + if sum_psf_partial_den != 0: + psf = (3 * sum_psf_partial_num) / (4 * sum_psf_partial_den) + else: + psf = 0 else: # Set these to -1 to indicate nothing ran min_job_size, max_job_size, avg_job_size = -1,-1,-1 diff --git a/raps/ui.py b/raps/ui.py index fc857a9..58a8941 100644 --- a/raps/ui.py +++ b/raps/ui.py @@ -514,7 +514,7 @@ class LayoutManager: else: context = nullcontext() with context: - last_i=0 + #last_i = 0 for i,data in enumerate(self.engine.run_simulation(jobs, timestep_start, timestep_end, time_delta, autoshutdown=True)): if data and (not self.debug and not self.noui): self.update_full_layout(data,time_delta) diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/conftest.py b/tests/conftest.py new file mode 100644 index 0000000..5084620 --- /dev/null +++ b/tests/conftest.py @@ -0,0 +1,14 @@ +import pytest + + +def pytest_addoption(parser): + parser.addoption( + "--runlong", action="store_true", default=False, help="Run long-running tests" + ) + + +def pytest_runtest_setup(item): + if "long" in item.keywords and not item.config.getoption("--runlong"): + #reason = f"Skipping {item.nodeid} because it requires --runlong" + reason = "Skipping test because it requires --runlong" + pytest.skip(reason) diff --git a/tests/smoke.py b/tests/smoke.py index 609f46e..d3c7dcb 100644 --- a/tests/smoke.py +++ b/tests/smoke.py @@ -84,5 +84,6 @@ def main(): if system_tests: execute_system_tests(system_tests) + if __name__ == "__main__": main() diff --git a/tests/systems/__init__.py b/tests/systems/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/systems/conftest.py b/tests/systems/conftest.py new file mode 100644 index 0000000..bbdd2eb --- /dev/null +++ b/tests/systems/conftest.py @@ -0,0 +1,156 @@ +import pytest + + +@pytest.fixture(params=[ + pytest.param("40frontiers", marks=pytest.mark.long), # All these tests are long running as the system is large. + "adastraMI250", + "frontier", + "fugaku", + "gcloudv2", + "lassen", + "marconi100", + "mit_supercloud", + "setonix", + "summit" +]) +def system(request): + return request.param + + +# Add markers to each test for the system. +# Similar to pytest -m marker. +# These are explicitly defined in pytest.ini, to avoid warnings. +# This way you can run test with: pytest -m systemname +def pytest_collection_modifyitems(config, items): + for item in items: + system = item.callspec.params.get("system") if hasattr(item, "callspec") else None + if system: + item.add_marker(getattr(pytest.mark, system)) + + +# #Define tests to run here! +@pytest.fixture +def system_config(system): + # Defaults for systems not listed explicitly + default_config = {} # No defaults! + + configs = { + "40frontiers": { + "basic": True, + "multi-part-sim": False, + "withdata": False, + "cooling": False, + "uncertainty": True, + "time": True, + "fastforward": True, + "time_delta": True, + }, + "adastraMI250": { + "basic": True, + "multi-part-sim": False, + "withdata": True, + "cooling": False, + "uncertainty": True, + "time": True, + "fastforward": True, + "time_delta": True, + }, + "frontier": { + "basic": True, + "multi-part-sim": False, + "withdata": True, + "cooling": True, + "uncertainty": True, + "time": True, + "fastforward": True, + "time_delta": True, + }, + "fugaku": { + "basic": True, + "multi-part-sim": False, + "withdata": True, + "cooling": False, + "uncertainty": True, + "time": True, + "fastforward": True, + "time_delta": True, + }, + "gcloudv2": { + "basic": False, + "multi-part-sim": False, + "withdata": False, + "cooling": False, + "uncertainty": True, + "time": True, + "fastforward": True, + "time_delta": True, + }, + "lassen":{ + "basic": True, + "multi-part-sim": False, + "withdata": True, + "cooling": False, + "uncertainty": True, + "time": True, + "fastforward": True, + "time_delta": True, + }, + "marconi100":{ + "basic": True, + "multi-part-sim": False, + "withdata": True, + "cooling": False, + "uncertainty": True, + "time": True, + "fastforward": True, + "time_delta": True, + }, + "mit_supercloud": { + "basic": False, + "multi-part-sim": True, + "withdata": False, + "cooling": False, + "uncertainty": False, + "time": False, + "fastforward": False, + "time_delta": False, + }, + "setonix": { + "basic": False, + "multi-part-sim": True, + "withdata": False, + "cooling": False, + "uncertainty": True, + "time": False, + "fastforward": False, + "time_delta": False, + }, + "summit": { + "basic": True, + "multi-part-sim": False, + "withdata": False, + "cooling": False, + "uncertainty": True, + "time": True, + "fastforward": True, + "time_delta": True, + } + } + return configs.get(system, default_config) + + +@pytest.fixture +def system_file(system): + files = { + "40frontiers":[], + "adastraMI250":["AdastaJobsMI250_15days.parquet"], + "frontier":["slurm/joblive/date=2024-01-18/","jobprofile/date=2024-01-18/"], + "fugaku":["21_04.parquet"], + "gcloudv2":["/v2/google_cluster_data_2011_sample"], + "lassen":["Lassen-Supercomputer-Job-Dataset"], + "marconi100":["job_table.parquet"], + "mit_supercloud":[""], + "setonix":[""], + "summit":[] + } + return files.get(system,files) diff --git a/tests/systems/test_main_basic_run.py b/tests/systems/test_main_basic_run.py new file mode 100644 index 0000000..da9b651 --- /dev/null +++ b/tests/systems/test_main_basic_run.py @@ -0,0 +1,26 @@ +import os +import subprocess +import gc +import pytest +from tests.util import PROJECT_ROOT + + +pytestmark = [ + pytest.mark.system, + pytest.mark.nodata +] + + +def test_main_run(system, system_config): + if not system_config.get("basic", False): + pytest.skip(f"{system} does not support basic main run.") + + os.chdir(PROJECT_ROOT) + result = subprocess.run([ + "python", "main.py", + "--time", "1m", + "--system", system + ], capture_output=True, text=True, stdin=subprocess.DEVNULL) + assert result.returncode == 0, f"Failed on {system}: {result.stderr}" + del result + gc.collect() diff --git a/tests/systems/test_main_cooling_run.py b/tests/systems/test_main_cooling_run.py new file mode 100644 index 0000000..6932d9c --- /dev/null +++ b/tests/systems/test_main_cooling_run.py @@ -0,0 +1,29 @@ +import os +import subprocess +import gc +import pytest +from tests.util import PROJECT_ROOT + + +pytestmark = [ + pytest.mark.system, + pytest.mark.nodata, + pytest.mark.cooling, +] + + +def test_main_run(system, system_config): + if not system_config.get("cooling", False): + pytest.skip(f"{system} does not support cooling.") + + os.chdir(PROJECT_ROOT) + result = subprocess.run([ + "python", "main.py", + "--time", "1h", + "--system", system, + "-c", + "--noui" + ], capture_output=True, text=True, stdin=subprocess.DEVNULL) + assert result.returncode == 0, f"Failed on {system}: {result.stderr}" + del result + gc.collect() diff --git a/tests/systems/test_main_cooling_uncertainty_run.py b/tests/systems/test_main_cooling_uncertainty_run.py new file mode 100644 index 0000000..507e15d --- /dev/null +++ b/tests/systems/test_main_cooling_uncertainty_run.py @@ -0,0 +1,31 @@ +import os +import subprocess +import gc +import pytest +from tests.util import PROJECT_ROOT + +pytestmark = [ + pytest.mark.system, + pytest.mark.nodata, + pytest.mark.cooling, + pytest.mark.uncertainty +] + + +def test_main_run(request, system, system_config): + print(f"Markexpr: {request.config.option.markexpr}") + if not system_config.get("uncertainty", False) or not system_config.get("cooling", False): + pytest.skip(f"{system} does not support cooling or uncertainty.") + + os.chdir(PROJECT_ROOT) + result = subprocess.run([ + "python", "main.py", + "--time", "3m", + "--system", system, + "-c", + "-u", + "--noui" + ], capture_output=True, text=True, stdin=subprocess.DEVNULL) + assert result.returncode == 0, f"Failed on {system}: {result.stderr}" + del result + gc.collect() diff --git a/tests/systems/test_main_fastforward_run.py b/tests/systems/test_main_fastforward_run.py new file mode 100644 index 0000000..88657b8 --- /dev/null +++ b/tests/systems/test_main_fastforward_run.py @@ -0,0 +1,36 @@ +import os +import subprocess +import gc +import pytest +from tests.util import PROJECT_ROOT + + +pytestmark = [ + pytest.mark.system, + pytest.mark.nodata, + pytest.mark.fastforward +] + + +@pytest.mark.parametrize("ff_arg", [ + "0", "1", "3600", "7200", "43200", + "0s", "1s", "3600s", "7200s", "43200s", + "0m", "1m", "60m", + "0h", "1h", "6h", +]) +def test_main_fastforward_run(system, system_config, ff_arg): + if not system_config.get("fastforward", False): + pytest.skip(f"{system} does not support basic main run.") + + os.chdir(PROJECT_ROOT) + result = subprocess.run([ + "python", "main.py", + "-t 1", + "--fastforward", ff_arg, + "--system", system, + #--"-f", system_file, + "--noui" + ], capture_output=True, text=True, stdin=subprocess.DEVNULL) + assert result.returncode == 0, f"Failed on {system}: {result.stderr}" + del result + gc.collect() diff --git a/tests/systems/test_main_noui_run.py b/tests/systems/test_main_noui_run.py new file mode 100644 index 0000000..556efa3 --- /dev/null +++ b/tests/systems/test_main_noui_run.py @@ -0,0 +1,27 @@ +import os +import subprocess +import gc +import pytest +from tests.util import PROJECT_ROOT + + +pytestmark = [ + pytest.mark.system, + pytest.mark.nodata +] + + +def test_main_run(system, system_config): + if not system_config.get("basic", False): + pytest.skip(f"{system} does not support basic main run.") + + os.chdir(PROJECT_ROOT) + result = subprocess.run([ + "python", "main.py", + "--time", "1m", + "--system", system, + "--noui" + ], capture_output=True, text=True, stdin=subprocess.DEVNULL) + assert result.returncode == 0, f"Failed on {system}: {result.stderr}" + del result + gc.collect() diff --git a/tests/systems/test_main_time_delta_run.py b/tests/systems/test_main_time_delta_run.py new file mode 100644 index 0000000..6128e4c --- /dev/null +++ b/tests/systems/test_main_time_delta_run.py @@ -0,0 +1,39 @@ +import os +import subprocess +import gc +import pytest +from tests.util import PROJECT_ROOT + + +pytestmark = [ + pytest.mark.system, + pytest.mark.nodata, + pytest.mark.time_delta +] + + +@pytest.mark.parametrize("time_arg, tdelta_arg", [ + ("100", "1"), + ("100", "1s"), + ("100", "10s"), + ("10m", "1m"), + ("10h", "1h"), + ("10h", "3h"), + ("3d", "1d") +], ids=["1","1s","10s","1m","1h","3h","1d"]) +def test_main_time_delta_run(system, system_config, time_arg, tdelta_arg): + if not system_config.get("time_delta", False): + pytest.skip(f"{system} does not support time_delta run.") + + os.chdir(PROJECT_ROOT) + result = subprocess.run([ + "python", "main.py", + "-t", time_arg, + "--time-delta", tdelta_arg, + "--system", system, + #--"-f", system_file, + "--noui" + ], capture_output=True, text=True, stdin=subprocess.DEVNULL) + assert result.returncode == 0, f"Failed on {system}: {result.stderr}" + del result + gc.collect() diff --git a/tests/systems/test_main_time_ff_delta_run.py b/tests/systems/test_main_time_ff_delta_run.py new file mode 100644 index 0000000..9301c70 --- /dev/null +++ b/tests/systems/test_main_time_ff_delta_run.py @@ -0,0 +1,40 @@ +import os +import subprocess +import gc +import pytest +from tests.util import PROJECT_ROOT + + +pytestmark = [ + pytest.mark.system, + pytest.mark.nodata, + pytest.mark.time_delta +] + + +@pytest.mark.parametrize("time_arg, tdelta_arg, ff_arg", [ + ("100", "1", "103"), + ("100", "1s", "2s"), + ("100", "10s", "10s"), + ("10m", "1m", "1m"), + ("10h", "1h", "2h"), + ("10h", "3h", "1h"), + pytest.param("3d", "1d", "1d", marks=pytest.mark.long, id="1d (long)"), +], ids=["1","1s","10s","1m","1h","3h","1d"]) +def test_main_time_delta_run(system, system_config, time_arg, tdelta_arg, ff_arg): + if not system_config.get("time_delta", False): + pytest.skip(f"{system} does not support time_delta run.") + + os.chdir(PROJECT_ROOT) + result = subprocess.run([ + "python", "main.py", + "-t", time_arg, + "-ff", ff_arg, + "--time-delta", tdelta_arg, + "--system", system, + #--"-f", system_file, + "--noui" + ], capture_output=True, text=True, stdin=subprocess.DEVNULL) + assert result.returncode == 0, f"Failed on {system}: {result.stderr}" + del result + gc.collect() diff --git a/tests/systems/test_main_time_run.py b/tests/systems/test_main_time_run.py new file mode 100644 index 0000000..dc72b52 --- /dev/null +++ b/tests/systems/test_main_time_run.py @@ -0,0 +1,36 @@ +import os +import subprocess +import gc +import pytest +from tests.util import PROJECT_ROOT + + +pytestmark = [ + pytest.mark.system, + pytest.mark.nodata, + pytest.mark.time +] + + +@pytest.mark.parametrize("time_args", [ + "0", "1", "3600", "7200", "43200", + "0s", "1s", "3600s", "7200s", "43200s", + "0m", "1m", "60m", + "0h", "1h", + pytest.param("6h", marks=pytest.mark.long), # mark this one as long +]) +def test_main_time_run(system, system_config, time_args): + if not system_config.get("basic", False): + pytest.skip(f"{system} does not support basic main run.") + + os.chdir(PROJECT_ROOT) + result = subprocess.run([ + "python", "main.py", + "--time", time_args, + "--system", system, + #--"-f", system_file, + "--noui" + ], capture_output=True, text=True, stdin=subprocess.DEVNULL) + assert result.returncode == 0, f"Failed on {system}: {result.stderr}" + del result + gc.collect() diff --git a/tests/systems/test_main_uncertainty_run.py b/tests/systems/test_main_uncertainty_run.py new file mode 100644 index 0000000..a02cc13 --- /dev/null +++ b/tests/systems/test_main_uncertainty_run.py @@ -0,0 +1,30 @@ +import os +import subprocess +import gc +import pytest +from tests.util import PROJECT_ROOT + + +pytestmark = [ + pytest.mark.system, + pytest.mark.nodata, + pytest.mark.uncertainty, + pytest.mark.long +] + + +def test_main_uncertainty_run(system, system_config): + if not system_config.get("uncertainty", False): + pytest.skip(f"{system} does not support uncertainty.") + + os.chdir(PROJECT_ROOT) + result = subprocess.run([ + "python", "main.py", + "--time", "3m", + "--system", system, + "-u", + "--noui" + ], capture_output=True, text=True, stdin=subprocess.DEVNULL) + assert result.returncode == 0, f"Failed on {system}: {result.stderr}" + del result + gc.collect() diff --git a/tests/systems/test_main_withdata_run.py b/tests/systems/test_main_withdata_run.py new file mode 100644 index 0000000..2fe2234 --- /dev/null +++ b/tests/systems/test_main_withdata_run.py @@ -0,0 +1,33 @@ +import os +import subprocess +import gc +import pytest +from tests.util import PROJECT_ROOT, DATA_PATH + + +pytestmark = [ + pytest.mark.system, + pytest.mark.withdata, + pytest.mark.long +] + + +def test_main_withdata_run(system, system_config, system_file): + if not system_config.get("withdata", False): + pytest.skip(f"{system} does not support basic run with data.") + if isinstance(system_file, list): + file_list = [DATA_PATH / system / x for x in system_file] + else: + file_list = [DATA_PATH / system / system_file] + for file in file_list: + assert os.path.isfile(file) or os.path.isdir(file), "File does not exist. does ./data exist or is RAPS_DATA_DIR set?" + os.chdir(PROJECT_ROOT) + result = subprocess.run([ + "python", "main.py", + "--time", "1m", + "--system", system, + "-f", *file_list, + ], capture_output=True, text=True, stdin=subprocess.DEVNULL) + assert result.returncode == 0, f"Failed on {system}: {result.stderr}" + del result + gc.collect() diff --git a/tests/systems/test_multi_part_sim_basic_run.py b/tests/systems/test_multi_part_sim_basic_run.py new file mode 100644 index 0000000..24a671e --- /dev/null +++ b/tests/systems/test_multi_part_sim_basic_run.py @@ -0,0 +1,29 @@ +import os +import subprocess +import gc +import pytest +from tests.util import PROJECT_ROOT + + +pytestmark = [ + pytest.mark.system, + pytest.mark.nodata +] + + +def test_multi_part_sim_run(system, system_config): + + if not system_config.get("multi-part-sim", False): + pytest.skip(f"{system} does not support basic multi-part-sim run.") + + os.chdir(PROJECT_ROOT) + result = subprocess.run([ + "python", "multi-part-sim.py", + "--time", "1h", + "--system", system, + "-x", f"{system}/*", + #"--noui" + ], capture_output=True, text=True, stdin=subprocess.DEVNULL) + assert result.returncode == 0, f"Failed on {system}: {result.stderr}" + del result + gc.collect() diff --git a/tests/test_main.py b/tests/test_main.py new file mode 100644 index 0000000..4b98263 --- /dev/null +++ b/tests/test_main.py @@ -0,0 +1,47 @@ +from tests.smoke import main + +import subprocess +import os +from pathlib import Path + +import pytest +pytestmark = pytest.mark.nodata + +PROJECT_ROOT = Path(__file__).resolve().parent.parent # adjust if needed + + +@pytest.mark.order(1) +def test_main_withui(): + os.chdir(PROJECT_ROOT) + result = subprocess.run([ + "python", "main.py", + "--time", "1h", + ], capture_output=True, + text=True + ) + assert result.returncode == 0 + + +@pytest.mark.order(2) +def test_main_noui(): + os.chdir(PROJECT_ROOT) + result = subprocess.run([ + "python", "main.py", + "--time", "1h", + "--noui" + ], capture_output=True, + text=True + ) + assert result.returncode == 0 + + +@pytest.mark.long +@pytest.mark.order(3) +def test_main_long(): + os.chdir(PROJECT_ROOT) + result = subprocess.run([ + "python", "main.py", + ], capture_output=True, + text=True + ) + assert result.returncode == 0 diff --git a/tests/unit/__init__.py b/tests/unit/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/util.py b/tests/util.py new file mode 100644 index 0000000..96609c7 --- /dev/null +++ b/tests/util.py @@ -0,0 +1,25 @@ +import os +from pathlib import Path + + +def find_project_root(): + path = Path(__file__).resolve() + while not (path / "main.py").exists(): + if path.parent == path: + raise RuntimeError("Could not find project root.") + path = path.parent + return path + + +PROJECT_ROOT = find_project_root() +CONFIG_PATH = PROJECT_ROOT / "config" +DATA_PATH = Path(os.getenv("RAPS_DATA_DIR",PROJECT_ROOT / "data")).resolve() + +#Maybe usefull but now all systems are listed explicitly! +system_list = [entry for entry in os.listdir(CONFIG_PATH) if os.path.isfile(os.path.join(CONFIG_PATH,entry,'system.json'))] + + +def requires_all_markers(request, required_markers): + markexpr = getattr(request.config.option, "markexpr", "") + selected = set(part.strip() for part in markexpr.split("and")) + return required_markers.issubset(selected) -- GitLab From 7c6e9a5336d68c9dbc0182536b3be3b5d05156d5 Mon Sep 17 00:00:00 2001 From: Matthias Maiterth Date: Tue, 5 Aug 2025 14:57:03 -0400 Subject: [PATCH 202/388] Small fixes found by testing. Removed 40frontiers cooling model. Removed invalid jobs from frontier. (start > end ...) Removed uncertainty tests from systems that do not support them. --- config/40frontiers/cooling.json | 25 ------------------------- raps/dataloaders/frontier.py | 3 +++ raps/job.py | 4 ++++ tests/systems/conftest.py | 12 ++++++------ 4 files changed, 13 insertions(+), 31 deletions(-) delete mode 100644 config/40frontiers/cooling.json diff --git a/config/40frontiers/cooling.json b/config/40frontiers/cooling.json deleted file mode 100644 index 778a56d..0000000 --- a/config/40frontiers/cooling.json +++ /dev/null @@ -1,25 +0,0 @@ -{ - "COOLING_EFFICIENCY": 0.945, - "WET_BULB_TEMP": 290.0, - "ZIP_CODE": 37831, - "COUNTRY_CODE": "US", - "FMU_PATH": "models/Simulator_olcf5_base.fmu", - "FMU_COLUMN_MAPPING": { - "T_sec_r_C": "Rack Return Temperature (\u00b0C)", - "T_sec_s_C": "Rack Supply Temperature (\u00b0C)", - "p_sec_r_psig": "Rack Supply Pressure (psig)", - "p_sec_s_psig": "Rack Return Pressure (psig)", - "V_flow_sec_GPM": "Rack Flowrate (gpm)", - "T_prim_r_C": "Facility Return Temperature (\u00b0C)", - "T_prim_s_C": "Facility Supply Temperature (\u00b0C)", - "p_prim_s_psig": "Facility Supply Pressure (psig)", - "p_prim_r_psig": "Facility Return Pressure (psig)", - "V_flow_prim_GPM": "Facility Flowrate (gpm)", - "W_flow_CDUP_kW": "Work Done By CDUP (kW)" - }, - "TEMPERATURE_KEY": "simulator_1_centralEnergyPlant_1_coolingTowerLoop_1_sources_Towb", - "W_HTWPs_KEY": "simulator[1].centralEnergyPlant[1].hotWaterLoop[1].summary.W_flow_HTWP_kW", - "W_CTWPs_KEY": "simulator[1].centralEnergyPlant[1].coolingTowerLoop[1].summary.W_flow_CTWP_kW", - "W_CTs_KEY": "simulator[1].centralEnergyPlant[1].coolingTowerLoop[1].summary.W_flow_CT_kW" - -} diff --git a/raps/dataloaders/frontier.py b/raps/dataloaders/frontier.py index 4a8bf42..e158f2c 100644 --- a/raps/dataloaders/frontier.py +++ b/raps/dataloaders/frontier.py @@ -227,6 +227,9 @@ def load_data_from_df(jobs_df: pd.DataFrame, jobprofile_df: pd.DataFrame, **kwar end_time_timestamp = jobs_df.loc[jidx, 'time_end'] diff = end_time_timestamp - telemetry_start_timestamp end_time = diff.total_seconds() + if not start_time <= end_time or np.isnan(end_time): + continue # Start_time is not smaller than end_time or is not valid + #Skip entry. wall_time = end_time - start_time if np.isnan(wall_time): diff --git a/raps/job.py b/raps/job.py index 1893526..1f89bae 100644 --- a/raps/job.py +++ b/raps/job.py @@ -156,6 +156,10 @@ class Job: else: # Type is not as expected! raise ValueError(f"type: self.scheduled_nodes:{type(self.scheduled_nodes)}, with {type(self.scheduled_nodes[0])}") + assert isinstance(self.submit_time,(int,float)) + assert isinstance(self.start_time,(int,float)) + assert isinstance(self.end_time,(int,float)) + assert self.start_time <= self.end_time, f"{self.start_time} <= {self.end_time}" def __repr__(self): """Return a string representation of the job.""" diff --git a/tests/systems/conftest.py b/tests/systems/conftest.py index bbdd2eb..b57a948 100644 --- a/tests/systems/conftest.py +++ b/tests/systems/conftest.py @@ -70,7 +70,7 @@ def system_config(system): "multi-part-sim": False, "withdata": True, "cooling": False, - "uncertainty": True, + "uncertainty": False, "time": True, "fastforward": True, "time_delta": True, @@ -80,7 +80,7 @@ def system_config(system): "multi-part-sim": False, "withdata": False, "cooling": False, - "uncertainty": True, + "uncertainty": False, "time": True, "fastforward": True, "time_delta": True, @@ -90,7 +90,7 @@ def system_config(system): "multi-part-sim": False, "withdata": True, "cooling": False, - "uncertainty": True, + "uncertainty": False, "time": True, "fastforward": True, "time_delta": True, @@ -100,7 +100,7 @@ def system_config(system): "multi-part-sim": False, "withdata": True, "cooling": False, - "uncertainty": True, + "uncertainty": False, "time": True, "fastforward": True, "time_delta": True, @@ -120,7 +120,7 @@ def system_config(system): "multi-part-sim": True, "withdata": False, "cooling": False, - "uncertainty": True, + "uncertainty": False, "time": False, "fastforward": False, "time_delta": False, @@ -130,7 +130,7 @@ def system_config(system): "multi-part-sim": False, "withdata": False, "cooling": False, - "uncertainty": True, + "uncertainty": False, "time": True, "fastforward": True, "time_delta": True, -- GitLab From 8186119ebc06e4f8cd809faee3810285378fb270 Mon Sep 17 00:00:00 2001 From: "Maiterth, Matthias" Date: Wed, 6 Aug 2025 19:48:58 +0000 Subject: [PATCH 203/388] Added an downscale factor to be able to simulate below seconds. --- main.py | 37 ++++++++------- pytest.ini | 2 + raps/args.py | 27 +++++++++-- raps/engine.py | 9 ++-- raps/job.py | 7 ++- raps/stats.py | 3 +- raps/telemetry.py | 8 ++-- raps/ui.py | 36 ++++++++------ raps/utils.py | 30 ++++++++++-- raps/workload.py | 28 +++++++---- tests/systems/test_main_time_delta_run.py | 4 ++ .../test_main_time_delta_sub_second_run.py | 47 +++++++++++++++++++ 12 files changed, 179 insertions(+), 59 deletions(-) create mode 100644 tests/systems/test_main_time_delta_sub_second_run.py diff --git a/main.py b/main.py index c831989..a2277b5 100644 --- a/main.py +++ b/main.py @@ -88,13 +88,16 @@ else: # Synthetic jobs td = Telemetry(**args_dict) td.save_snapshot(jobs=jobs, timestep_start=timestep_start, timestep_end=timestep_end, args=args, filename=td.dirname) -if args.fastforward is not None: - args.fastforward = convert_to_seconds(args.fastforward) +if args.fastforward: timestep_start = args.fastforward -if args.time is not None: - timestep_end = timestep_start + convert_to_seconds(args.time) +if args.time: + timestep_end = timestep_start + args.time +if args.time_delta: + time_delta = args.time_delta +else: + time_delta = 1 sc = Engine( power_manager=power_manager, @@ -128,14 +131,12 @@ if args.verbose: print(jobs) total_timesteps = timestep_end - timestep_start -if args.time_delta: - time_delta = convert_to_seconds(args.time_delta) -else: - time_delta = 1 # config['TRACE_QUANTA'] -print(f'Simulating {len(jobs)} jobs for {total_timesteps} seconds from {timestep_start} to {timestep_end}.') -print(f'Simulation time delta: {time_delta}s, Telemetry trace quanta: {jobs[0].trace_quanta}s.') -layout_manager = LayoutManager(args.layout, engine=sc, debug=args.debug, total_timesteps=total_timesteps, args_dict=args_dict, **config) +downscale = args.downscale +downscale_str = ""if downscale == 1 else f"/{downscale}" +print(f'Simulating {len(jobs)} jobs for {total_timesteps}{downscale_str} seconds from {timestep_start} to {timestep_end}.') +print(f'Simulation time delta: {time_delta}{downscale_str} s, Telemetry trace quanta: {jobs[0].trace_quanta}{downscale_str} s.') +layout_manager = LayoutManager(args.layout, engine=sc, debug=args.debug, total_timesteps=total_timesteps, **config) layout_manager.run(jobs, timestep_start=timestep_start, timestep_end=timestep_end, time_delta=time_delta) @@ -162,29 +163,31 @@ if args.simulate_network: print(f"{key.replace('_', ' ').title()}: {value}") print("-------------------------") +if downscale_str: + downscale_str = "1" + downscale_str if args.plot: if 'power' in args.plot: - pl = Plotter('Time (s)', 'Power (kW)', 'Power History', \ + pl = Plotter(f"Time ({downscale_str}s)", 'Power (kW)', 'Power History', \ OPATH / f'power.{args.imtype}', \ uncertainties=args.uncertainties) x, y = zip(*power_manager.history) pl.plot_history(x, y) if 'util' in args.plot: - pl = Plotter('Time (s)', 'System Utilization (%)', \ + pl = Plotter(f"Time ({downscale_str}s)", 'System Utilization (%)', \ 'System Utilization History', OPATH / f'util.{args.imtype}') x, y = zip(*sc.sys_util_history) pl.plot_history(x, y) if 'loss' in args.plot: - pl = Plotter('Time (s)', 'Power Losses (kW)', 'Power Loss History', \ + pl = Plotter(f"Time ({downscale_str}s)", 'Power Losses (kW)', 'Power Loss History', \ OPATH / f'loss.{args.imtype}', \ uncertainties=args.uncertainties) x, y = zip(*power_manager.loss_history) pl.plot_history(x, y) - pl = Plotter('Time (s)', 'Power Losses (%)', 'Power Loss History', \ + pl = Plotter(f"Time ({downscale_str}s)", 'Power Losses (%)', 'Power Loss History', \ OPATH / f'loss_pct.{args.imtype}', \ uncertainties=args.uncertainties) x, y = zip(*power_manager.loss_history_percentage) @@ -194,7 +197,7 @@ if args.plot: if cooling_model: ylabel = 'pue' title = 'FMU ' + ylabel + 'History' - pl = Plotter('Time (s)', ylabel, title, OPATH / f'pue.{args.imtype}', \ + pl = Plotter(f"Time ({downscale_str}s)", ylabel, title, OPATH / f'pue.{args.imtype}', \ uncertainties=args.uncertainties) df = pd.DataFrame(cooling_model.fmu_history) df.to_parquet('cooling_model.parquet', engine='pyarrow') @@ -206,7 +209,7 @@ if args.plot: if cooling_model: ylabel = 'Tr_pri_Out[1]' title = 'FMU ' + ylabel + 'History' - pl = Plotter('Time (s)', ylabel, title, OPATH / 'temp.svg') + pl = Plotter(f"Time ({downscale_str}s)", ylabel, title, OPATH / 'temp.svg') df = pd.DataFrame(cooling_model.fmu_history) df.to_parquet('cooling_model.parquet', engine='pyarrow') pl.plot_compare(df['time'], df[ylabel]) diff --git a/pytest.ini b/pytest.ini index 4de6475..7209cf8 100644 --- a/pytest.ini +++ b/pytest.ini @@ -3,6 +3,7 @@ testpaths = tests #python_paths = . markers = long: mark a test as long (skipped if not run iwth --runlong) + system: mark a test as system (integration) test unit: mark a test as a unit test withdata: marks tests that require external data @@ -12,6 +13,7 @@ markers = time: time argument test fastforward: fastforward argument test time_delta: time delta argument test + time_delta_sub_second: sub second time delta argument test 40frontiers: System test adastraMI250: System test diff --git a/raps/args.py b/raps/args.py index 41b18cc..ca0351e 100644 --- a/raps/args.py +++ b/raps/args.py @@ -19,7 +19,7 @@ parser.add_argument('--noui', default=False, action='store_true', help='Run with parser.add_argument('-ff', '--fastforward', type=str, default=None, help='Fast-forward by time amount (uses same units as -t)') parser.add_argument('-t', '--time', type=str, default=None, help='Length of time to simulate, e.g., 123, 123s, 27m, 3h, 7d') #parser.add_argument("--time-delta", type=str, default=None, help='Time delta for simulation steps, e.g. 15, 15s 1m, 1h, 3d. (Default unit in seconds. If not set "TRACE_QUANTA" is used.)') # This seems sensible, but 1s is the previous default before introducing this change! -parser.add_argument("--time-delta", type=str, default="1s", help='Time delta for simulation steps, e.g. 15, 15s 1m, 1h, 3d. (Default unit in seconds. Default value: 1s.)') +parser.add_argument("--time-delta", type=str, default="1s", help='Time delta for simulation steps, e.g. 15, 15s 1m, 1h, 3d, 1ms. (Default unit in seconds. Default value: 1s.)') parser.add_argument('-d', '--debug', action='store_true', help='Enable debug mode and disable rich layout') parser.add_argument('-n', '--numjobs', type=int, default=100, help='Number of jobs to schedule') parser.add_argument('-v', '--verbose', action='store_true', help='Enable verbose output') @@ -76,10 +76,31 @@ parser.add_argument('--accounts-json', type=str, help='Json of account stats gen def post_process_args(args): + if args.time_delta: + time_delta_raw, time_delta_downscale_raw = convert_to_seconds(args.time_delta) + else: + time_delta_raw, time_delta_downscale_raw = None, 1 + + if args.time: + time_raw, time_downscale_raw = convert_to_seconds(args.time) + else: + time_raw, time_downscale_raw = None, 1 + if args.fastforward: - args.fastforward = convert_to_seconds(args.fastforward) + ff_raw, ff_downscale_raw = convert_to_seconds(args.fastforward) + else: + ff_raw, ff_downscale_raw = None, 1 + + max_downscale = max(time_delta_downscale_raw, time_downscale_raw, ff_downscale_raw) + args.downscale = max_downscale + + if args.time_delta: + args.time_delta = int((time_delta_raw / time_delta_downscale_raw) * max_downscale) if args.time: - args.time = convert_to_seconds(args.time) + args.time = int((time_raw / time_downscale_raw) * max_downscale) + if args.fastforward: + args.fastforward = int((ff_raw / ff_downscale_raw) * max_downscale) + return args diff --git a/raps/engine.py b/raps/engine.py index c82c55e..512e8c8 100644 --- a/raps/engine.py +++ b/raps/engine.py @@ -74,6 +74,7 @@ class Engine: self.debug = kwargs.get('debug') self.output = kwargs.get('output') self.replay = kwargs.get('replay') + self.downscale = kwargs.get('downscale',1) # Factor to downscale the 1s timesteps (power of 10) self.simulate_network = kwargs.get('simulate_network') self.sys_util_history = [] self.scheduler_queue_history = [] @@ -384,10 +385,10 @@ class Engine: # Continue with System Simulation # Calculate node occupancy - node_occupancy = {node['id']: 0 for node in self.resource_manager.nodes} # Initialize even if no running jobs + node_occupancy = {node['id']: 0 for node in self.resource_manager.nodes} # Initialize even if no running jobs for job in self.running: if job.scheduled_nodes: - node_id = job.scheduled_nodes[0] # Assuming one node per job for multitenancy + node_id = job.scheduled_nodes[0] # Assuming one node per job for multitenancy node_occupancy[node_id] += 1 self.node_occupancy_history.append(node_occupancy) @@ -443,7 +444,7 @@ class Engine: def run_simulation(self, jobs, timestep_start, timestep_end, time_delta=1, autoshutdown=False): """Generator that yields after each simulation tick.""" - self.timesteps = timestep_end - timestep_start # Where is this used? + self.timesteps = (timestep_end - timestep_start) # Where is this used? if self.scheduler.policy == PolicyType.REPLAY: replay = True @@ -489,7 +490,7 @@ class Engine: # 4. Run tick only at specified time_delta if 0 == (timestep % time_delta) and \ - ((time_delta == 1 and self.current_time % self.config['POWER_UPDATE_FREQ'] == 0) or time_delta != 1): + ((time_delta == 1 and self.current_time % self.config['POWER_UPDATE_FREQ'] == 0) or (time_delta != 1 or self.downscale != 1)): tick_data = self.tick(time_delta=time_delta) tick_data.completed = completed_jobs else: diff --git a/raps/job.py b/raps/job.py index 1f89bae..2bfd4b2 100644 --- a/raps/job.py +++ b/raps/job.py @@ -26,7 +26,9 @@ def job_dict(*, nodes_required, name, account, start_time=0, end_time=0, wall_time=0, trace_time=0, trace_start_time=0, trace_end_time=0, trace_quanta=None, - trace_missing_values=False): + trace_missing_values=False, + downscale=1 + ): """ Return job info dictionary """ return { 'nodes_required': nodes_required, @@ -59,7 +61,8 @@ def job_dict(*, nodes_required, name, account, 'trace_end_time': trace_end_time, 'trace_quanta': trace_quanta, 'trace_missing_values': trace_missing_values, - 'dilated': False + 'dilated': False, + 'downscale': downscale } diff --git a/raps/stats.py b/raps/stats.py index f4692e6..ad3a67f 100644 --- a/raps/stats.py +++ b/raps/stats.py @@ -15,8 +15,7 @@ from .engine import Engine def get_engine_stats(engine: Engine): """ Return engine statistics """ num_samples = len(engine.power_manager.history) if engine.power_manager else 0 - - time_simulated = convert_seconds_to_hhmmss(engine.timesteps) + time_simulated = convert_seconds_to_hhmmss(engine.timesteps / engine.downscale) average_power_mw = sum_values(engine.power_manager.history) / num_samples / 1000 if num_samples else 0 average_loss_mw = sum_values(engine.power_manager.loss_history) / num_samples / 1000 if num_samples else 0 min_loss_mw = min_value(engine.power_manager.loss_history) / 1000 if num_samples else 0 diff --git a/raps/telemetry.py b/raps/telemetry.py index e232898..792f4fe 100644 --- a/raps/telemetry.py +++ b/raps/telemetry.py @@ -67,7 +67,7 @@ class Telemetry: list_of_job_dicts.append(job.__dict__) np.savez_compressed(filename, jobs=list_of_job_dicts, timestep_start=timestep_start, timestep_end=timestep_end, args=args) - def load_snapshot(self, snapshot: str) -> list: + def load_snapshot(self, snapshot: str, downscale=1) -> list: """Reads a snapshot from a compressed file and return 4 values: joblist, timestep_start, timestep_end and args. :param str snapshot: Filename @@ -145,11 +145,11 @@ class Telemetry: def load_data(self, files): """Load telemetry data using custom data loaders.""" - return self.dataloader.load_data(files, **self.kwargs) + return self.dataloader.load_data(files, downscale, **self.kwargs) def load_data_from_df(self, *args, **kwargs): """Load telemetry data using custom data loaders.""" - return self.dataloader.load_data_from_df(*args, **kwargs) + return self.dataloader.load_data_from_df(*args, downscale, **kwargs) def load_data_from_csv(self, file, *args, **kwargs): jobs = [] @@ -195,7 +195,7 @@ class Telemetry: """ Return (row, col) tuple for a cdu index """ return self.dataloader.cdu_pos(index, config=self.config) - def load_jobs_times_args_from_files(self,*,files, args): + def load_jobs_times_args_from_files(self,*,files, args, downscale=1): """ Load all files as combined jobs """ # Read telemetry data (either npz file or via custom data loader) # TODO: Merge args? See main.py:79 diff --git a/raps/ui.py b/raps/ui.py index 58a8941..a0400c6 100644 --- a/raps/ui.py +++ b/raps/ui.py @@ -22,11 +22,15 @@ from raps.engine import TickData, Engine class LayoutManager: def __init__(self, layout_type, engine: Engine, total_timesteps=0, debug=None, args_dict=None, **config): self.debug = debug - self.noui = args_dict['noui'] + if args_dict is not None: + self.noui = args_dict.get("noui") + self.simulate_network = args_dict.get("simulate_network") + else: + self.noui = False + self.simulate_network = False self.engine = engine self.config = config self.topology = self.engine.config.get("TOPOLOGY", "none") - self.simulate_network = args_dict.get("simulate_network") self.hascooling = layout_type == "layout2" self.power_df_header = self.config['POWER_DF_HEADER'] self.racks_per_cdu = self.config['RACKS_PER_CDU'] @@ -125,11 +129,10 @@ class LayoutManager: if show_slowdown: columns.append("SLOW DOWN") else: - #columns.append("NODE SEGMENTS") - columns.append("SEGMENT") - - #if show_nodes: - # columns.append("NODELIST") + if show_nodes: + columns.append("NODELIST") + else: + columns.append("SEGMENT") # NODE SEGMENTS columns.append("TIME") @@ -174,14 +177,22 @@ class LayoutManager: col_nodelist = col_slow # This logic is a bit flawed... nodes_display = col_nodelist - # Build the row + if self.engine.downscale != 1: + running_time_str = convert_seconds_to_hhmmss(job.running_time // self.engine.downscale) + \ + f" +{job.running_time % self.engine.downscale}/{self.engine.downscale}s" + else: + running_time_str = convert_seconds_to_hhmm(job.running_time) + row = [ str(job.id).zfill(5), - convert_seconds_to_hhmm(job.wall_time), + convert_seconds_to_hhmm(job.wall_time // self.engine.downscale), + ##str(job.wall_time), str(job.name), str(job.account), job.state.value, str(job.nodes_required), + nodes_display, + running_time_str ] row.append(nodes_display) @@ -232,7 +243,7 @@ class LayoutManager: # Add data row with white values row = [ - convert_seconds_to_hhmmss(time), + convert_seconds_to_hhmmss(time // self.engine.downscale), str(nrun), str(nqueue), str(active_nodes), @@ -298,7 +309,6 @@ class LayoutManager: return df - def update_powertemp_array(self, power_df, cooling_outputs, pflops, gflop_per_watt, system_util, uncertainties=False): """ Updates the displayed power and temperature table with the provided data. @@ -376,7 +386,7 @@ class LayoutManager: total_power_str, str(f"{pflops:.2f}"), str(f"{gflop_per_watt:.1f}"), - total_loss_str + " (" + percent_loss_str+ ")", + total_loss_str + " (" + percent_loss_str + ")", f"{cooling_outputs['pue']:.2f}", style="white" # Apply white style to all elements in the row ) @@ -483,7 +493,6 @@ class LayoutManager: self.update_pressflow_array(data.fmu_outputs) self.update_scheduled_jobs(data.running + data.queue) - self.update_status( data.current_time, len(data.running), len(data.queue), data.num_active_nodes, data.num_free_nodes, data.down_nodes, data.avg_net_util, data.slowdown_per_job @@ -523,7 +532,6 @@ class LayoutManager: if not self.debug and not self.noui: self.update_progress_bar(1) - def run_stepwise(self, jobs, timestep_start, timestep_end, time_delta): """ Prepares the UI and returns a generator for the simulation """ return self.engine.run_simulation(jobs, timestep_start, timestep_end, time_delta) diff --git a/raps/utils.py b/raps/utils.py index 81ed4a0..2ef6fdd 100644 --- a/raps/utils.py +++ b/raps/utils.py @@ -116,6 +116,13 @@ def truncated_weibull(scale, shape, min, max): if min < number <= max: return int(number) +def truncated_weibull_float(scale, shape, min, max): + while True: + number = random.weibullvariate(scale, shape) + if min < number <= max: + return float(number) + + def return_nearest_power_of(*,number,base): if base == 1: @@ -418,11 +425,14 @@ def create_dir_indexed(dir:str, path:str = None) -> str: def next_arrival_byconfargs(config,args,reset=False): arrival_rate = 1 arrival_time = config['JOB_ARRIVAL_TIME'] + time_delta = args.time_delta + downscale = args.downscale + if args.job_arrival_rate: arrival_rate = args.job_arrival_rate if args.job_arrival_time: arrival_time = args.job_arrival_time - return next_arrival(arrival_rate / arrival_time, reset) + return next_arrival(arrival_rate / (arrival_time * downscale), reset) def next_arrival_byconfkwargs(config,kwargs,reset=False): @@ -456,6 +466,11 @@ def convert_to_seconds(time_str): 's': 1, # 1 second = 1 second '': 1 # empty string = 1 second } + downscale_factors = { + 'ms': 1000, + 'cs': 100, + 'ds': 10 + } # Check if the input string ends with a unit or is purely numeric # and extract the numeric part and the time unit @@ -463,8 +478,12 @@ def convert_to_seconds(time_str): unit = '' num_str = time_str[:] else: - unit = time_str[-1] - num_str = time_str[:-1] + if time_str[-2].isdigit(): + unit = time_str[-1] + num_str = time_str[:-1] + else: + unit = time_str[-2:] + num_str = time_str[:-2] index = num_str.find(".") # convert int or float string if index != -1: @@ -476,7 +495,10 @@ def convert_to_seconds(time_str): # Convert to seconds using the conversion factors if unit in time_factors: - return num * time_factors[unit] + return num * time_factors[unit], 1 + elif unit in downscale_factors: + downscale = downscale_factors[unit] + return num, downscale else: raise ValueError(f"Unknown time unit: {unit}") diff --git a/raps/workload.py b/raps/workload.py index 964d670..7ca78f5 100644 --- a/raps/workload.py +++ b/raps/workload.py @@ -30,7 +30,7 @@ import numpy as np import matplotlib.pyplot as plt from raps.telemetry import Telemetry from raps.job import job_dict, Job -from raps.utils import create_file_indexed, create_dir_indexed +from raps.utils import create_file_indexed, create_dir_indexed, convert_to_seconds JOB_NAMES = ["LAMMPS", "GROMACS", "VASP", "Quantum ESPRESSO", "NAMD",\ "OpenFOAM", "WRF", "AMBER", "CP2K", "nek5000", "CHARMM",\ @@ -44,7 +44,7 @@ ACCT_NAMES = ["ACT01", "ACT02", "ACT03", "ACT04", "ACT05", "ACT06", "ACT07",\ MAX_PRIORITY = 500000 -from raps.utils import truncated_normalvariate_int, truncated_normalvariate_float, determine_state, next_arrival, next_arrival_byconfargs, truncated_weibull +from raps.utils import truncated_normalvariate_int, truncated_normalvariate_float, determine_state, next_arrival, next_arrival_byconfargs, truncated_weibull, truncated_weibull_float class Workload: @@ -128,7 +128,7 @@ class Workload: return truncated_normalvariate_float(args.cpuutil_normal_mean, args.cpuutil_normal_stddev,0.0, config['CPUS_PER_NODE']) def cpu_utilization_distribution_draw_weibull(self,args,config): - return truncated_weibull(args.cpuutil_normal_mean, args.cpuutil_normal_stddev,0.0, config['CPUS_PER_NODE']) + return truncated_weibull_float(args.cpuutil_weibull_scale, args.cpuutil_weibull_shape,0.0, config['CPUS_PER_NODE']) def gpu_utilization_distribution_draw_uniform(self,args,config): return random.uniform(0.0, config['GPUS_PER_NODE']) @@ -137,7 +137,7 @@ class Workload: return truncated_normalvariate_float(args.gpuutil_normal_mean, args.gpuutil_normal_stddev,0.0, config['GPUS_PER_NODE']) def gpu_utilization_distribution_draw_weibull(self,args,config): - return truncated_weibull(args.gpuutil_normal_mean, args.gpuutil_normal_stddev,0.0, config['GPUS_PER_NODE']) + return truncated_weibull_float(args.gpuutil_weibull_scale, args.gpuutil_weibull_shape,0.0 , config['GPUS_PER_NODE']) def wall_time_distribution_draw_uniform(self,args,config): return random.uniform(config['MIN_WALL_TIME'],config['MAX_WALL_TIME']) @@ -160,7 +160,7 @@ class Workload: partition = random.choice(self.partitions) config = self.config_map[partition] for job_index in range(args.numjobs): - submit_time = job_arrival_distribution_to_draw_from(args,config) + submit_time = int(job_arrival_distribution_to_draw_from(args,config)) start_time = submit_time nodes_required = job_size_distribution_to_draw_from(args,config) name = random.choice(JOB_NAMES) @@ -279,6 +279,11 @@ class Workload: partition = random.choice(self.partitions) config = self.config_map[partition] + time_delta = args.time_delta + downscale = args.downscale + + config['MIN_WALL_TIME'] = config['MIN_WALL_TIME'] * downscale + config['MAX_WALL_TIME'] = config['MAX_WALL_TIME'] * downscale jobs = [] for job_index in range(args.numjobs): # Randomly select a partition @@ -290,15 +295,19 @@ class Workload: gpu_util = random.random() * config['GPUS_PER_NODE'] mu = (config['MAX_WALL_TIME'] + config['MIN_WALL_TIME']) / 2 sigma = (config['MAX_WALL_TIME'] - config['MIN_WALL_TIME']) / 6 - wall_time = truncated_normalvariate_int(mu, sigma, config['MIN_WALL_TIME'], config['MAX_WALL_TIME']) // 3600 * 3600 - time_limit = truncated_normalvariate_int(mu, sigma, wall_time, config['MAX_WALL_TIME']) // 3600 * 3600 + wall_time = (truncated_normalvariate_int(mu, sigma, config['MIN_WALL_TIME'], config['MAX_WALL_TIME']) // (3600*downscale) * (3600*downscale)) + time_limit = (truncated_normalvariate_int(mu, sigma, wall_time, config['MAX_WALL_TIME']) // (3600*downscale) * (3600*downscale)) + #print(f"wall_time: {wall_time//downscale}") + # print(f"time_limit: {time_limit//downscale}") end_state = determine_state(config['JOB_END_PROBS']) cpu_trace, gpu_trace = self.compute_traces(cpu_util, gpu_util, wall_time, config['TRACE_QUANTA']) priority = random.randint(0, MAX_PRIORITY) net_tx, net_rx = None, None # Jobs arrive according to Poisson process - time_to_next_job = next_arrival_byconfargs(config,args) + time_to_next_job = int(next_arrival_byconfargs(config,args)) + #wall_time = wall_time * downscale + #time_limit = time_limit * downscale job_info = job_dict(nodes_required=nodes_required, name=name, account=account, cpu_trace=cpu_trace, @@ -312,7 +321,8 @@ class Workload: end_time=time_to_next_job + wall_time, wall_time=wall_time, trace_time=wall_time, trace_start_time=0, trace_end_time=wall_time, - trace_quanta=config['TRACE_QUANTA'] + trace_quanta=config['TRACE_QUANTA'] * downscale, + downscale=downscale ) job = Job(job_info) jobs.append(job) diff --git a/tests/systems/test_main_time_delta_run.py b/tests/systems/test_main_time_delta_run.py index 6128e4c..c1aee66 100644 --- a/tests/systems/test_main_time_delta_run.py +++ b/tests/systems/test_main_time_delta_run.py @@ -3,6 +3,8 @@ import subprocess import gc import pytest from tests.util import PROJECT_ROOT +from raps.utils import convert_seconds_to_hhmmss +from raps.utils import convert_to_seconds pytestmark = [ @@ -35,5 +37,7 @@ def test_main_time_delta_run(system, system_config, time_arg, tdelta_arg): "--noui" ], capture_output=True, text=True, stdin=subprocess.DEVNULL) assert result.returncode == 0, f"Failed on {system}: {result.stderr}" + time, downscale = convert_to_seconds(time_arg) + assert f"Time Simulated: {convert_seconds_to_hhmmss(time // downscale)}" in result.stdout del result gc.collect() diff --git a/tests/systems/test_main_time_delta_sub_second_run.py b/tests/systems/test_main_time_delta_sub_second_run.py new file mode 100644 index 0000000..bcadd41 --- /dev/null +++ b/tests/systems/test_main_time_delta_sub_second_run.py @@ -0,0 +1,47 @@ +import os +import subprocess +import gc +import pytest +from tests.util import PROJECT_ROOT +from raps.utils import convert_seconds_to_hhmmss +from raps.utils import convert_to_seconds + + +pytestmark = [ + pytest.mark.system, + pytest.mark.nodata, + pytest.mark.time_delta +] + + +@pytest.mark.parametrize("time_arg, tdelta_arg", [ + ("10", "1ds"), + ("60", "3ds"), + ("1", "1cs"), + ("1", "1ms"), + ("10ds", "1cs"), + ("10cs", "1ms"), + ("100ms", "1ms"), + ("100ms", "1s"), +], ids=["1ds","3ds","1cs","1ms","1cs-for-10ds","1ms-for-10cs","1ms-for-100ms","1s-for-100ms"]) +def test_main_time_delta_run(system, system_config, time_arg, tdelta_arg): + if not system_config.get("time_delta", False): + pytest.skip(f"{system} does not support time_delta run.") + + os.chdir(PROJECT_ROOT) + result = subprocess.run([ + "python", "main.py", + "-t", time_arg, + "--time-delta", tdelta_arg, + "--system", system, + #--"-f", system_file, + "--noui" + ], capture_output=True, text=True, stdin=subprocess.DEVNULL) + assert result.returncode == 0, f"Failed on {system}: {result.stderr}" + time, downscale = convert_to_seconds(time_arg) + td, td_ds = convert_to_seconds(tdelta_arg) + #assert f"Time Simulated: {convert_seconds_to_hhmmss(int((time / td_ds) * downscale))}" in result.stdout + assert f"Time Simulated: {convert_seconds_to_hhmmss(time / downscale)}" in result.stdout + + del result + gc.collect() -- GitLab From dbcd7f20e38ba6eca19ee3afdecb283ba5cc6e77 Mon Sep 17 00:00:00 2001 From: Matthias Maiterth Date: Wed, 6 Aug 2025 16:16:51 -0400 Subject: [PATCH 204/388] Added pytests for network and fixed minor issue. (without testing return codes only testing for failure!) --- pytest.ini | 1 + raps/telemetry.py | 4 +- tests/systems/conftest.py | 11 ++++++ tests/systems/test_main_network_run.py | 27 ++++++++++++++ .../systems/test_main_network_withdata_run.py | 37 +++++++++++++++++++ 5 files changed, 78 insertions(+), 2 deletions(-) create mode 100644 tests/systems/test_main_network_run.py create mode 100644 tests/systems/test_main_network_withdata_run.py diff --git a/pytest.ini b/pytest.ini index 7209cf8..e7a522a 100644 --- a/pytest.ini +++ b/pytest.ini @@ -14,6 +14,7 @@ markers = fastforward: fastforward argument test time_delta: time delta argument test time_delta_sub_second: sub second time delta argument test + net: network model test 40frontiers: System test adastraMI250: System test diff --git a/raps/telemetry.py b/raps/telemetry.py index 792f4fe..8cd976c 100644 --- a/raps/telemetry.py +++ b/raps/telemetry.py @@ -145,11 +145,11 @@ class Telemetry: def load_data(self, files): """Load telemetry data using custom data loaders.""" - return self.dataloader.load_data(files, downscale, **self.kwargs) + return self.dataloader.load_data(files, **self.kwargs) def load_data_from_df(self, *args, **kwargs): """Load telemetry data using custom data loaders.""" - return self.dataloader.load_data_from_df(*args, downscale, **kwargs) + return self.dataloader.load_data_from_df(*args, **kwargs) def load_data_from_csv(self, file, *args, **kwargs): jobs = [] diff --git a/tests/systems/conftest.py b/tests/systems/conftest.py index b57a948..250774c 100644 --- a/tests/systems/conftest.py +++ b/tests/systems/conftest.py @@ -44,6 +44,7 @@ def system_config(system): "time": True, "fastforward": True, "time_delta": True, + "net": False, }, "adastraMI250": { "basic": True, @@ -54,6 +55,7 @@ def system_config(system): "time": True, "fastforward": True, "time_delta": True, + "net": False, }, "frontier": { "basic": True, @@ -64,6 +66,7 @@ def system_config(system): "time": True, "fastforward": True, "time_delta": True, + "net": False, }, "fugaku": { "basic": True, @@ -74,6 +77,7 @@ def system_config(system): "time": True, "fastforward": True, "time_delta": True, + "net": False, }, "gcloudv2": { "basic": False, @@ -84,6 +88,7 @@ def system_config(system): "time": True, "fastforward": True, "time_delta": True, + "net": False, }, "lassen":{ "basic": True, @@ -94,6 +99,7 @@ def system_config(system): "time": True, "fastforward": True, "time_delta": True, + "net": True, }, "marconi100":{ "basic": True, @@ -104,6 +110,7 @@ def system_config(system): "time": True, "fastforward": True, "time_delta": True, + "net": False, }, "mit_supercloud": { "basic": False, @@ -114,6 +121,8 @@ def system_config(system): "time": False, "fastforward": False, "time_delta": False, + "net": False, + "net-multi-sim": True, }, "setonix": { "basic": False, @@ -124,6 +133,7 @@ def system_config(system): "time": False, "fastforward": False, "time_delta": False, + "net": False, }, "summit": { "basic": True, @@ -134,6 +144,7 @@ def system_config(system): "time": True, "fastforward": True, "time_delta": True, + "net": False, } } return configs.get(system, default_config) diff --git a/tests/systems/test_main_network_run.py b/tests/systems/test_main_network_run.py new file mode 100644 index 0000000..4872f53 --- /dev/null +++ b/tests/systems/test_main_network_run.py @@ -0,0 +1,27 @@ +import os +import subprocess +import gc +import pytest +from tests.util import PROJECT_ROOT + + +pytestmark = [ + pytest.mark.system, + pytest.mark.nodata +] + + +def test_main_run(system, system_config): + if not system_config.get("net", False): + pytest.skip(f"{system} does not support basic net run.") + + os.chdir(PROJECT_ROOT) + result = subprocess.run([ + "python", "main.py", + "--time", "1m", + "--system", system, + "-net" + ], capture_output=True, text=True, stdin=subprocess.DEVNULL) + assert result.returncode == 0, f"Failed on {system}: {result.stderr}" + del result + gc.collect() diff --git a/tests/systems/test_main_network_withdata_run.py b/tests/systems/test_main_network_withdata_run.py new file mode 100644 index 0000000..e481d38 --- /dev/null +++ b/tests/systems/test_main_network_withdata_run.py @@ -0,0 +1,37 @@ +import os +import subprocess +import gc +import pytest +from tests.util import PROJECT_ROOT, DATA_PATH + + +pytestmark = [ + pytest.mark.system, + pytest.mark.nodata, + pytest.mark.withdata, + pytest.mark.long +] + + +def test_main_run(system, system_config, system_file): + if not system_config.get("net", False): + pytest.skip(f"{system} does not support basic net run.") + + if isinstance(system_file, list): + file_list = [DATA_PATH / system / x for x in system_file] + else: + file_list = [DATA_PATH / system / system_file] + for file in file_list: + assert os.path.isfile(file) or os.path.isdir(file), "File does not exist. does ./data exist or is RAPS_DATA_DIR set?" + + os.chdir(PROJECT_ROOT) + result = subprocess.run([ + "python", "main.py", + "--time", "1m", + "--system", system, + "-f", *file_list, + "-net" + ], capture_output=True, text=True, stdin=subprocess.DEVNULL) + assert result.returncode == 0, f"Failed on {system}: {result.stderr}" + del result + gc.collect() -- GitLab From 53365e0cbdd298cbaef6a34c5ad3d35df58edd31 Mon Sep 17 00:00:00 2001 From: Matthias Maiterth Date: Thu, 7 Aug 2025 11:23:56 -0400 Subject: [PATCH 205/388] Removed tick seting job state. This is only done in prepare_timestep. --- raps/engine.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/raps/engine.py b/raps/engine.py index 512e8c8..8859b8c 100644 --- a/raps/engine.py +++ b/raps/engine.py @@ -271,10 +271,6 @@ class Engine: slowdown_factors = [] - for job in self.running: - if job.end_time == self.current_time: - job.state = JobState.COMPLETED - for job in self.running: if self.debug: print(f"JobID: {job.id}") -- GitLab From afb17c7b23ca259f33eeaee39362962fab611a63 Mon Sep 17 00:00:00 2001 From: Matthias Maiterth Date: Fri, 8 Aug 2025 14:11:21 -0400 Subject: [PATCH 206/388] Fixed integration of multi_part_sim + system tests. Utilization needs to be validated! Utilization is computed as a too low value. --- multi-part-sim.py | 10 ++- raps/args.py | 2 +- raps/dataloaders/frontier.py | 2 +- raps/dataloaders/mit_supercloud/loader.py | 90 ++++++++++--------- raps/engine.py | 4 +- raps/job.py | 5 +- raps/workload.py | 10 ++- tests/systems/conftest.py | 4 +- tests/systems/test_main_withdata_run.py | 4 +- .../test_multi_part_sim_withdata_run.py | 37 ++++++++ 10 files changed, 110 insertions(+), 58 deletions(-) create mode 100644 tests/systems/test_multi_part_sim_withdata_run.py diff --git a/multi-part-sim.py b/multi-part-sim.py index de9e68b..06e1e1c 100644 --- a/multi-part-sim.py +++ b/multi-part-sim.py @@ -24,9 +24,11 @@ partition_names = args.partitions print(args.partitions) if '*' in args.partitions[0]: - paths = glob.glob(os.path.join(CONFIG_PATH, args.partitions[0])) + paths = glob.glob(os.path.join(CONFIG_PATH, args.partitions[0].replace("'",""))) partition_names = [os.path.join(*p.split(os.sep)[-2:]) for p in paths] + args.system = partition_names[0].split(os.sep)[0] + configs = [ConfigManager(system_name=partition).get_config() for partition in partition_names] args_dicts = [ {**vars(args), 'config': config, 'partition': partition_names[i]} @@ -60,9 +62,11 @@ if args.replay: part = ad['partition'] td = Telemetry(**ad) print(f"\n[{part}] loading traces from {args.replay[0]} …") - jobs_part, t0, t1, args_from_file = td.load_data(args.replay) + jobs_part, t0, t1 = td.load_data(args.replay) jobs_by_partition[part] = jobs_part - td.save_snapshot(jobs_part, t0, t1, args_from_file, filename=part.split('/')[-1]) + #td.save_snapshot(jobs_part, t0, t1, args_from_file, filename=part.split('/')[-1]) + # Check if args need to be extracted or merged! Not implemented yet! + td.save_snapshot(jobs=jobs_part, timestep_start=t0, timestep_end=t1, filename=part.split('/')[-1],args=args) # --- report how many jobs per partition --- for part, jl in jobs_by_partition.items(): diff --git a/raps/args.py b/raps/args.py index ca0351e..18da481 100644 --- a/raps/args.py +++ b/raps/args.py @@ -56,7 +56,7 @@ parser = add_workload_to_parser(parser) #parser.add_argument('-w', '--workload', type=str, choices=choices, default=choices[0], help='Type of synthetic workload') # Scheduling options -choices = ['default', 'scheduleflow', 'nrel', 'anl', 'flux', 'experimental'] +choices = ['default', 'scheduleflow', 'nrel', 'anl', 'flux', 'experimental', 'multitenant'] parser.add_argument('--scheduler', type=str, choices=choices, default=choices[0], help='Name of scheduler') choices = [policy.value for policy in PolicyType] parser.add_argument('--policy', type=str, default=None, help='Schedule policy to use, e.g.:' + str(choices) + " or extended policies") diff --git a/raps/dataloaders/frontier.py b/raps/dataloaders/frontier.py index e158f2c..495de2e 100644 --- a/raps/dataloaders/frontier.py +++ b/raps/dataloaders/frontier.py @@ -124,7 +124,7 @@ def load_data_from_df(jobs_df: pd.DataFrame, jobprofile_df: pd.DataFrame, **kwar telemetry_stop but may different due to missing data, for each job. The returned values are these three: - - The list of parsed jobs. (as a job_dict) + - The list of parsed jobs. (as a Job object) - telemetry_start: int (in seconds) - telemetry_end: int (in seconds) diff --git a/raps/dataloaders/mit_supercloud/loader.py b/raps/dataloaders/mit_supercloud/loader.py index c42168d..3e84efd 100644 --- a/raps/dataloaders/mit_supercloud/loader.py +++ b/raps/dataloaders/mit_supercloud/loader.py @@ -9,17 +9,17 @@ and gpu directories. The main paper associated with the MIT Supercloud Dataset is available here: https://arxiv.org/abs/2108.02037. There is more information available here: https://dcc.mit.edu/ -Note, that quite a bit of filtering is done with sanity checks to make sure +Note, that quite a bit of filtering is done with sanity checks to make sure the the CPU traces match the GPU traces, etc. At this point it's not uncommon -if there may be 1569 total jobs in the time range, only 834 cpu jobs and 128 +if there may be 1569 total jobs in the time range, only 834 cpu jobs and 128 gpu jobs (962 total) are able to be replayed. This is an issue which will likely have to be improved in the future. --------------------------------------------------------------------------- -Understanding some of the errors. We track the different reasons that -less than the total number of jobs in the slurm log actually run in the +Understanding some of the errors. We track the different reasons that +less than the total number of jobs in the slurm log actually run in the simulator. This is not so much an issue for the CPU partition, but for -the GPU partition, where we have to combine traces extracted from both +the GPU partition, where we have to combine traces extracted from both CPU trace files and GPU trace files. At the beginning of the GPU partition analysis, we give an analysis such as: @@ -34,7 +34,7 @@ At the beginning of the GPU partition analysis, we give an analysis such as: * 128 jobs have BOTH CPU and GPU traces. ---------------------------------------------------- -We give a summary report at the end of the data loading process. An +We give a summary report at the end of the data loading process. An example report is shown for the range `--start 2021-05-21T00:00 --end 2021-05-22T00:00` Skipped jobs summary: @@ -48,29 +48,29 @@ example report is shown for the range `--start 2021-05-21T00:00 --end 2021-05-22 [INFO] Partition 'mit_supercloud/part-cpu': 834 jobs loaded [INFO] Partition 'mit_supercloud/part-gpu': 128 jobs loaded -We explain each of these stats here. +We explain each of these stats here. - - `nodes_alloc > 480`: the number of jobs that are thrown out because + - `nodes_alloc > 480`: the number of jobs that are thrown out because they request more than 480 nodes. - - `pruned_nodes`: the number of jobs thrown out because the node was + - `pruned_nodes`: the number of jobs thrown out because the node was listed in `prune_list.txt`. - - `no_trace_file`: the number of jobs that were found in the Slurm log - for the correct time window and partition, but for which not a single + - `no_trace_file`: the number of jobs that were found in the Slurm log + for the correct time window and partition, but for which not a single corresponding trace file (neither CPU nor GPU) could be found on the filesystem. - - `no_cpu_trace_for_gpu_job`: The number of jobs that had a GPU trace file - but were discarded because they were missing their required corresponding + - `no_cpu_trace_for_gpu_job`: The number of jobs that had a GPU trace file + but were discarded because they were missing their required corresponding CPU trace file. - - `final_gpu_none_mixed`: The number of jobs in a GPU partition run that had + - `final_gpu_none_mixed`: The number of jobs in a GPU partition run that had a CPU trace but were missing the final, processed GPU trace data. - - `final_cpu_none_mixed`: The number of jobs in a GPU partition run that were + - `final_cpu_none_mixed`: The number of jobs in a GPU partition run that were missing the essential CPU trace data during the final job construction phase. -Now, we work on debugging some of these. For example, for `no_cpu_trace_for_gpu_job`, +Now, we work on debugging some of these. For example, for `no_cpu_trace_for_gpu_job`, we can take the jid from the warning message: [WARNING] → no cpu trace for gpu! (jid=4074251073298) SKIPPING @@ -93,12 +93,12 @@ Summary of node filtering: Filtering steps: -1. Jobs with `nodes_alloc > 480` were excluded, based on the assumption that - such large allocations span across GPU nodes. This removed 413 nodes, +1. Jobs with `nodes_alloc > 480` were excluded, based on the assumption that + such large allocations span across GPU nodes. This removed 413 nodes, leaving 494 candidate CPU-only nodes. -2. To reach the target of 480 CPU nodes, we analyzed job frequency per node - and pruned the 14 least-used nodes (those with only 1–26 jobs). +2. To reach the target of 480 CPU nodes, we analyzed job frequency per node + and pruned the 14 least-used nodes (those with only 1–26 jobs). These pruned nodes are listed in `prune_list.txt`. The final list of CPU-only nodes is stored in `cpu_nodes.txt`, and the list @@ -144,7 +144,7 @@ def parse_tres_alloc(tres_str: Union[str, None], stats: Counter = None) -> Dict[Union[int, str], int]: """ Parse a Slurm tres_alloc/tres_req field like: '1=20,2=170000,4=1,5=20' - + Parameters ---------- tres_str : str | None @@ -213,7 +213,7 @@ def load_data(local_dataset_path, **kwargs): """ debug = kwargs.get("debug") NL_PATH = os.path.dirname(__file__) - + skip_counts = Counter() # unpack @@ -224,7 +224,7 @@ def load_data(local_dataset_path, **kwargs): # slurm log -> DataFrame slurm_path = None - for root, _, files in os.walk(local_dataset_path): + for root, _, files in os.walk(os.path.expanduser(local_dataset_path)): if "slurm-log.csv" in files: slurm_path = os.path.join(root, "slurm-log.csv") break @@ -239,7 +239,7 @@ def load_data(local_dataset_path, **kwargs): # date window start_ts = to_epoch(kwargs.get("start", DEFAULT_START)) end_ts = to_epoch(kwargs.get("end", DEFAULT_END)) - + mask = (sl.time_submit >= start_ts) & (sl.time_submit < end_ts) sl = sl[mask] @@ -247,7 +247,7 @@ def load_data(local_dataset_path, **kwargs): print(f"[DEBUG] After time filtering: {len(sl)} jobs") hits = sl.loc[mask] lines = hits["__line__"].tolist() - print(f"data sourced from {len(lines)} records in slurm-log.csv. Line number ranges:", + print(f"data sourced from {len(lines)} records in slurm-log.csv. Line number ranges:", summarize_ranges(lines)) # --- prune out oversized jobs and known under‑used hosts --- @@ -255,7 +255,7 @@ def load_data(local_dataset_path, **kwargs): pruned = set() with open(os.path.join(NL_PATH, "prune_list.txt")) as pf: pruned = {l.strip() for l in pf if l.strip()} - + before_prune = len(sl) # only keep jobs requesting <= 480 nodes sl = sl[ sl.nodes_alloc <= 480 ] @@ -380,7 +380,7 @@ def load_data(local_dataset_path, **kwargs): data = {} - + traced_jobs = all_trace_ids untraced_jobs = job_ids - traced_jobs skip_counts['no_trace_file'] += len(untraced_jobs) @@ -399,7 +399,7 @@ def load_data(local_dataset_path, **kwargs): if debug: tqdm.write(f"Reading CPU {os.path.basename(fp)} for Job ID: {jid} (No slurm info found)") continue - + job_row = job_info.iloc[0] if debug: start_time = job_row.get('time_start', 'N/A') @@ -440,7 +440,7 @@ def load_data(local_dataset_path, **kwargs): for p in gpu_files[:10]: print(" ", p) - # data from the cpu processes are all stored under the `data` dictionary + # data from the cpu processes are all stored under the `data` dictionary # according to their respective jid key #print("******", data.keys()) @@ -515,7 +515,7 @@ def load_data(local_dataset_path, **kwargs): # build final job_dicts jobs_list = [] - + # Get CPUS_PER_NODE and GPUS_PER_NODE from config config = kwargs.get('config', {}) cpus_per_node = config.get('CPUS_PER_NODE') @@ -571,12 +571,12 @@ def load_data(local_dataset_path, **kwargs): gpu_units_req = math.ceil(total_gpu / nr) # sometimes there are spurious large values for cpu util - set max limit based on peak - cpu_peak = cpu_cores_req / cores_per_cpu / cpus_per_node + cpu_peak = cpu_cores_req / cores_per_cpu / cpus_per_node # Is this per CPU? cpu_tr = [min(x/cores_per_cpu/cpus_per_node, cpu_peak) for x in cpu_tr] submit_time = rec.get("time_submit", t0) - start_ts - job = job_dict( + current_job_dict = job_dict( nodes_required = nr, cpu_cores_required = cpu_cores_req, gpu_units_required = gpu_units_req, @@ -597,22 +597,26 @@ def load_data(local_dataset_path, **kwargs): wall_time = max(0, t1-t0), trace_time = len(cpu_tr)*quanta, trace_start_time = 0, - trace_end_time = len(cpu_tr)*quanta + trace_end_time = len(cpu_tr)*quanta, + trace_quanta = quanta ) + job = Job(current_job_dict) jobs_list.append(job) # Calculate min_overall_utime and max_overall_utime - min_overall_utime = int(sl.time_submit.min()) - max_overall_utime = int(sl.time_submit.max()) - - args_namespace = SimpleNamespace( - fastforward=min_overall_utime, - system='mit_supercloud', - time=max_overall_utime - ) - + telemetry_start = int(sl.time_start.min()) + telemetry_end = int(sl.time_end.max()) + #min_overall_utime = int(sl.time_submit.min()) + #max_overall_utime = int(sl.time_submit.max()) + + #args_namespace = SimpleNamespace( + # fastforward=min_overall_utime, + # system='mit_supercloud', + # time=max_overall_utime + #) + print("\nSkipped jobs summary:") for reason, count in skip_counts.items(): print(f"- {reason}: {count}") - return jobs_list, min_overall_utime, max_overall_utime, args_namespace + return jobs_list, telemetry_start, telemetry_end # min_overall_utime, max_overall_utime, args_namespace diff --git a/raps/engine.py b/raps/engine.py index 8859b8c..6885e50 100644 --- a/raps/engine.py +++ b/raps/engine.py @@ -275,8 +275,7 @@ class Engine: if self.debug: print(f"JobID: {job.id}") - if job.state == JobState.RUNNING: - job.running_time = self.current_time - job.start_time + job.running_time = self.current_time - job.start_time if job.state != JobState.RUNNING: raise ValueError(f"Job is in running list, but state is not RUNNING: job.state == {job.state}") @@ -389,7 +388,6 @@ class Engine: self.node_occupancy_history.append(node_occupancy) - tick_data = TickData( current_time=self.current_time, completed=None, diff --git a/raps/job.py b/raps/job.py index 2bfd4b2..845f928 100644 --- a/raps/job.py +++ b/raps/job.py @@ -160,8 +160,9 @@ class Job: # Type is not as expected! raise ValueError(f"type: self.scheduled_nodes:{type(self.scheduled_nodes)}, with {type(self.scheduled_nodes[0])}") assert isinstance(self.submit_time,(int,float)) - assert isinstance(self.start_time,(int,float)) - assert isinstance(self.end_time,(int,float)) + assert isinstance(self.wall_time,(int,float,np.int64,np.double)) + assert isinstance(self.start_time,(int,float,np.int64,np.double,type(None))) + assert isinstance(self.end_time,(int,float,np.int64,np.double,type(None))) assert self.start_time <= self.end_time, f"{self.start_time} <= {self.end_time}" def __repr__(self): diff --git a/raps/workload.py b/raps/workload.py index 7ca78f5..4dec570 100644 --- a/raps/workload.py +++ b/raps/workload.py @@ -166,7 +166,11 @@ class Workload: name = random.choice(JOB_NAMES) account = random.choice(ACCT_NAMES) cpu_util = cpu_util_distribution_to_draw_from(args,config) + if "CORES_PER_CPU" in config: + cpu_cores_required = random.randint(0, config["CORES_PER_CPU"]) gpu_util = gpu_util_distribution_to_draw_from(args,config) + if "GPUS_PER_NODE" in config: + gpu_units_required = random.randint(0, max(config["GPUS_PER_NODE"],math.ceil(max(gpu_util)))) wall_time = wall_time_distribution_to_draw_from(args,config) end_time = start_time + wall_time time_limit = max(wall_time,wall_time_distribution_to_draw_from(args,config)) @@ -187,6 +191,8 @@ class Workload: end_time=end_time, wall_time=wall_time, trace_time=wall_time, trace_start_time=0, trace_end_time=wall_time, + cpu_cores_required=cpu_cores_required, + gpu_units_required=gpu_units_required, trace_quanta=config['TRACE_QUANTA'] ) job = Job(job_info) @@ -295,8 +301,8 @@ class Workload: gpu_util = random.random() * config['GPUS_PER_NODE'] mu = (config['MAX_WALL_TIME'] + config['MIN_WALL_TIME']) / 2 sigma = (config['MAX_WALL_TIME'] - config['MIN_WALL_TIME']) / 6 - wall_time = (truncated_normalvariate_int(mu, sigma, config['MIN_WALL_TIME'], config['MAX_WALL_TIME']) // (3600*downscale) * (3600*downscale)) - time_limit = (truncated_normalvariate_int(mu, sigma, wall_time, config['MAX_WALL_TIME']) // (3600*downscale) * (3600*downscale)) + wall_time = (truncated_normalvariate_int(mu, sigma, config['MIN_WALL_TIME'], config['MAX_WALL_TIME']) // (3600 * downscale) * (3600 * downscale)) + time_limit = (truncated_normalvariate_int(mu, sigma, wall_time, config['MAX_WALL_TIME']) // (3600 * downscale) * (3600 * downscale)) #print(f"wall_time: {wall_time//downscale}") # print(f"time_limit: {time_limit//downscale}") end_state = determine_state(config['JOB_END_PROBS']) diff --git a/tests/systems/conftest.py b/tests/systems/conftest.py index 250774c..9324ac1 100644 --- a/tests/systems/conftest.py +++ b/tests/systems/conftest.py @@ -115,7 +115,7 @@ def system_config(system): "mit_supercloud": { "basic": False, "multi-part-sim": True, - "withdata": False, + "withdata": True, "cooling": False, "uncertainty": False, "time": False, @@ -160,7 +160,7 @@ def system_file(system): "gcloudv2":["/v2/google_cluster_data_2011_sample"], "lassen":["Lassen-Supercomputer-Job-Dataset"], "marconi100":["job_table.parquet"], - "mit_supercloud":[""], + "mit_supercloud":["202201"], "setonix":[""], "summit":[] } diff --git a/tests/systems/test_main_withdata_run.py b/tests/systems/test_main_withdata_run.py index 2fe2234..2bb337c 100644 --- a/tests/systems/test_main_withdata_run.py +++ b/tests/systems/test_main_withdata_run.py @@ -13,6 +13,8 @@ pytestmark = [ def test_main_withdata_run(system, system_config, system_file): + if not system_config.get("basic", False): + pytest.skip(f"{system} does not support basic run even without data.") if not system_config.get("withdata", False): pytest.skip(f"{system} does not support basic run with data.") if isinstance(system_file, list): @@ -20,7 +22,7 @@ def test_main_withdata_run(system, system_config, system_file): else: file_list = [DATA_PATH / system / system_file] for file in file_list: - assert os.path.isfile(file) or os.path.isdir(file), "File does not exist. does ./data exist or is RAPS_DATA_DIR set?" + assert os.path.isfile(file) or os.path.isdir(file), f"File `{file}' does not exist. does ./data exist or is RAPS_DATA_DIR set?" os.chdir(PROJECT_ROOT) result = subprocess.run([ "python", "main.py", diff --git a/tests/systems/test_multi_part_sim_withdata_run.py b/tests/systems/test_multi_part_sim_withdata_run.py new file mode 100644 index 0000000..90ffc09 --- /dev/null +++ b/tests/systems/test_multi_part_sim_withdata_run.py @@ -0,0 +1,37 @@ +import os +import subprocess +import gc +import pytest +from tests.util import PROJECT_ROOT, DATA_PATH + + +pytestmark = [ + pytest.mark.system, + pytest.mark.nodata +] + + +def test_multi_part_sim_run(system, system_config, system_file): + if not system_config.get("multi-part-sim", False): + pytest.skip(f"{system} does not support basic multi-part-sim run even without data.") + if not system_config.get("withdata", False): + pytest.skip(f"{system} does not support multi-part-sim run with data.") + if isinstance(system_file, list): + file_list = [DATA_PATH / system / x for x in system_file] + else: + file_list = [DATA_PATH / system / system_file] + for file in file_list: + assert os.path.isfile(file) or os.path.isdir(file), f"File `{file}' does not exist. does ./data exist or is RAPS_DATA_DIR set?" + + + os.chdir(PROJECT_ROOT) + result = subprocess.run([ + "python", "multi-part-sim.py", + "--time", "1h", + "-x", f"{system}/*", + "-f", *file_list, + #"--noui" + ], capture_output=True, text=True, stdin=subprocess.DEVNULL) + assert result.returncode == 0, f"Failed on {system}: {result.stderr}" + del result + gc.collect() -- GitLab From 732e8e1db79e383a1834e51e4509c9f8eadaa5b3 Mon Sep 17 00:00:00 2001 From: Matthias Maiterth Date: Fri, 8 Aug 2025 14:25:43 -0400 Subject: [PATCH 207/388] multi_part_sim_network basic is working even if no example is there (copy network.json config for testsing) --- raps/network.py | 5 ++- raps/workload.py | 19 ++++++----- tests/systems/test_main_network_run.py | 5 ++- .../test_multi_part_sim_network_run.py | 33 +++++++++++++++++++ 4 files changed, 52 insertions(+), 10 deletions(-) create mode 100644 tests/systems/test_multi_part_sim_network_run.py diff --git a/raps/network.py b/raps/network.py index fef4cc4..fc103be 100644 --- a/raps/network.py +++ b/raps/network.py @@ -220,7 +220,10 @@ def link_loads_for_job(G, job_hosts, tx_volume_bytes): loads = {edge: 0.0 for edge in G.edges()} # each host sends tx_volume_bytes to each of the (N-1) peers for src in job_hosts: - per_peer = tx_volume_bytes / (len(job_hosts)-1) + if len(job_hosts) >= 2: + per_peer = tx_volume_bytes / (len(job_hosts)-1) + else: + per_peer = 0 # find paths where src is the sender for (s, d, p) in paths: if s != src: continue diff --git a/raps/workload.py b/raps/workload.py index 4dec570..fd545f8 100644 --- a/raps/workload.py +++ b/raps/workload.py @@ -856,7 +856,8 @@ if __name__ == "__main__": wall_time=wall_time, trace_time=wall_time, trace_start_time=0, - trace_end_time=wall_time + trace_end_time=wall_time, + trace_quanta=config['TRACE_QUANTA'] )) job_id_ctr += 1 @@ -887,12 +888,13 @@ if __name__ == "__main__": submit_time=0, time_limit=wall_time, start_time=0, - end_time=wall_time, - wall_time=wall_time, - trace_time=wall_time, - trace_start_time=0, - trace_end_time=wall_time - )) + end_time=wall_time, + wall_time=wall_time, + trace_time=wall_time, + trace_start_time=0, + trace_end_time=wall_time, + trace_quanta=config['TRACE_QUANTA'] + )) job_id_ctr += 1 elif mode == 'STAGGERED_JOBS_PER_NODE': @@ -924,7 +926,8 @@ if __name__ == "__main__": wall_time=wall_time, trace_time=wall_time, trace_start_time=0, - trace_end_time=wall_time + trace_end_time=wall_time, + trace_quanta=config['TRACE_QUANTA'] )) job_id_ctr += 1 else: diff --git a/tests/systems/test_main_network_run.py b/tests/systems/test_main_network_run.py index 4872f53..8ec2180 100644 --- a/tests/systems/test_main_network_run.py +++ b/tests/systems/test_main_network_run.py @@ -12,8 +12,11 @@ pytestmark = [ def test_main_run(system, system_config): + if not system_config.get("basic", False): + pytest.skip(f"{system} does not support basic run.") + if not system_config.get("net", False): - pytest.skip(f"{system} does not support basic net run.") + pytest.skip(f"{system} does not support network run.") os.chdir(PROJECT_ROOT) result = subprocess.run([ diff --git a/tests/systems/test_multi_part_sim_network_run.py b/tests/systems/test_multi_part_sim_network_run.py new file mode 100644 index 0000000..3a19dc8 --- /dev/null +++ b/tests/systems/test_multi_part_sim_network_run.py @@ -0,0 +1,33 @@ +import os +import subprocess +import gc +import pytest +from tests.util import PROJECT_ROOT + + +pytestmark = [ + pytest.mark.system, + pytest.mark.nodata +] + + +def test_multi_part_sim_run(system, system_config): + + if not system_config.get("multi-part-sim", False): + pytest.skip(f"{system} does not support basic multi-part-sim run.") + + if not system_config.get("net", False): + pytest.skip(f"{system} does not support network run.") + + os.chdir(PROJECT_ROOT) + result = subprocess.run([ + "python", "multi-part-sim.py", + "--time", "1h", + "--system", system, + "-x", f"{system}/*", + "-net", + #"--noui" + ], capture_output=True, text=True, stdin=subprocess.DEVNULL) + assert result.returncode == 0, f"Failed on {system}: {result.stderr}" + del result + gc.collect() -- GitLab From 4a86bd1d115a30419fa9864217dc1bb9b08fa307 Mon Sep 17 00:00:00 2001 From: Matthias Maiterth Date: Fri, 8 Aug 2025 16:54:46 -0400 Subject: [PATCH 208/388] Augmented Argument checks and added marconi100 cooling tests. Added argument checks to set args.time properly if -t 0 is set! Added cooling tests for marconi100 and updated tests accordingly. --- main.py | 6 +++--- tests/systems/conftest.py | 6 +++--- tests/systems/test_main_network_run.py | 2 +- tests/systems/test_main_time_run.py | 6 ++++-- tests/systems/test_multi_part_sim_withdata_run.py | 4 ++-- 5 files changed, 13 insertions(+), 11 deletions(-) diff --git a/main.py b/main.py index a2277b5..e86c8fa 100644 --- a/main.py +++ b/main.py @@ -88,13 +88,13 @@ else: # Synthetic jobs td = Telemetry(**args_dict) td.save_snapshot(jobs=jobs, timestep_start=timestep_start, timestep_end=timestep_end, args=args, filename=td.dirname) -if args.fastforward: +if args.fastforward is not None: timestep_start = args.fastforward -if args.time: +if args.time is not None: timestep_end = timestep_start + args.time -if args.time_delta: +if args.time_delta is not None: time_delta = args.time_delta else: time_delta = 1 diff --git a/tests/systems/conftest.py b/tests/systems/conftest.py index 9324ac1..6c52c3b 100644 --- a/tests/systems/conftest.py +++ b/tests/systems/conftest.py @@ -94,7 +94,7 @@ def system_config(system): "basic": True, "multi-part-sim": False, "withdata": True, - "cooling": False, + "cooling": True, "uncertainty": False, "time": True, "fastforward": True, @@ -105,7 +105,7 @@ def system_config(system): "basic": True, "multi-part-sim": False, "withdata": True, - "cooling": False, + "cooling": True, "uncertainty": False, "time": True, "fastforward": True, @@ -139,7 +139,7 @@ def system_config(system): "basic": True, "multi-part-sim": False, "withdata": False, - "cooling": False, + "cooling": True, "uncertainty": False, "time": True, "fastforward": True, diff --git a/tests/systems/test_main_network_run.py b/tests/systems/test_main_network_run.py index 8ec2180..6e29162 100644 --- a/tests/systems/test_main_network_run.py +++ b/tests/systems/test_main_network_run.py @@ -11,7 +11,7 @@ pytestmark = [ ] -def test_main_run(system, system_config): +def test_main_network_run(system, system_config): if not system_config.get("basic", False): pytest.skip(f"{system} does not support basic run.") diff --git a/tests/systems/test_main_time_run.py b/tests/systems/test_main_time_run.py index dc72b52..7a574a8 100644 --- a/tests/systems/test_main_time_run.py +++ b/tests/systems/test_main_time_run.py @@ -13,8 +13,10 @@ pytestmark = [ @pytest.mark.parametrize("time_args", [ - "0", "1", "3600", "7200", "43200", - "0s", "1s", "3600s", "7200s", "43200s", + "0", "1", "3600", "7200", + pytest.param("43200", marks=pytest.mark.long), # mark this one as long + "0s", "1s", "3600s", "7200s", + pytest.param("43200s", marks=pytest.mark.long), # mark this one as long "0m", "1m", "60m", "0h", "1h", pytest.param("6h", marks=pytest.mark.long), # mark this one as long diff --git a/tests/systems/test_multi_part_sim_withdata_run.py b/tests/systems/test_multi_part_sim_withdata_run.py index 90ffc09..f93e07e 100644 --- a/tests/systems/test_multi_part_sim_withdata_run.py +++ b/tests/systems/test_multi_part_sim_withdata_run.py @@ -7,7 +7,8 @@ from tests.util import PROJECT_ROOT, DATA_PATH pytestmark = [ pytest.mark.system, - pytest.mark.nodata + pytest.mark.withdata, + pytest.mark.long ] @@ -23,7 +24,6 @@ def test_multi_part_sim_run(system, system_config, system_file): for file in file_list: assert os.path.isfile(file) or os.path.isdir(file), f"File `{file}' does not exist. does ./data exist or is RAPS_DATA_DIR set?" - os.chdir(PROJECT_ROOT) result = subprocess.run([ "python", "multi-part-sim.py", -- GitLab From ef5915592eedcc0fb05c53472ee5cfc57270c3fb Mon Sep 17 00:00:00 2001 From: Matthias Maiterth Date: Fri, 8 Aug 2025 17:04:53 -0400 Subject: [PATCH 209/388] Updated Readme and CONTRIBUTORS for Lumi and initial multi-part-sim pytest. --- CONTRIBUTORS.txt | 2 +- README.md | 5 +++++ tests/systems/conftest.py | 22 +++++++++++++++++++--- 3 files changed, 25 insertions(+), 4 deletions(-) diff --git a/CONTRIBUTORS.txt b/CONTRIBUTORS.txt index 046df5b..e351ce9 100644 --- a/CONTRIBUTORS.txt +++ b/CONTRIBUTORS.txt @@ -10,4 +10,4 @@ Jake Webb (webbtj@ornl.gov), Oak Ridge National Laboratory Rashadul Kabir (rashadul.kabir@colostate.edu), Colorado State University Bertrand Cirou (cirou@cines.fr), Centre Informatique National de l’Enseignement Supérieur Kevin Menear (kmenear@nrel.gov), National Renewable Energy Laboratory - +Tim Dykes (tim.dykes@hpe.com), Hewlett Packard Enterprise diff --git a/README.md b/README.md index 37a38c9..2b816fe 100644 --- a/README.md +++ b/README.md @@ -73,6 +73,11 @@ For MIT Supercloud # Synthetic tests for verification studies: python multi-part-sim.py -x 'mit_supercloud/*' -w multitenant +For Lumi + + # Synthetic test for lumi multi-part-sim: + python multi-part-sim.py -x lumi/* + ## Perform Network Simulation Lassen is one of the few datasets that has networking data. See `raps/dataloaders/lassen.py` for how to diff --git a/tests/systems/conftest.py b/tests/systems/conftest.py index 6c52c3b..c564e7d 100644 --- a/tests/systems/conftest.py +++ b/tests/systems/conftest.py @@ -11,7 +11,8 @@ import pytest "marconi100", "mit_supercloud", "setonix", - "summit" + "summit", + "lumi" ]) def system(request): return request.param @@ -145,7 +146,21 @@ def system_config(system): "fastforward": True, "time_delta": True, "net": False, - } + }, + "lumi": { + "basic": False, + "multi-part-sim": True, + "withdata": False, + "cooling": False, + "uncertainty": False, + "time": False, + "fastforward": False, + "time_delta": False, + "net": False, + "net-multi-sim": False + }, + + } return configs.get(system, default_config) @@ -162,6 +177,7 @@ def system_file(system): "marconi100":["job_table.parquet"], "mit_supercloud":["202201"], "setonix":[""], - "summit":[] + "summit":[], + "lumi":[] } return files.get(system,files) -- GitLab From 4e334818f0eaaaee1b96a55f7081a11d8f9e6742 Mon Sep 17 00:00:00 2001 From: Matthias Maiterth Date: Fri, 8 Aug 2025 17:50:34 -0400 Subject: [PATCH 210/388] Fixed renamed argument arrival in telemetry --- raps/telemetry.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/raps/telemetry.py b/raps/telemetry.py index 8cd976c..6ca591b 100644 --- a/raps/telemetry.py +++ b/raps/telemetry.py @@ -233,7 +233,7 @@ class Telemetry: job['nodes_required'] = random.randint(1, args.scale) job['scheduled_nodes'] = None # Setting to None triggers scheduler to assign nodes - if hasattr(args,'policy') and args.policy == 'poisson': + if hasattr(args,'arrival') and args.arrival == 'poisson': print("available nodes:", config['AVAILABLE_NODES']) for job in tqdm(jobs, desc="Rescheduling jobs"): job['scheduled_nodes'] = None -- GitLab From d34305221b6eea8c06304e91c60e5299c470a99e Mon Sep 17 00:00:00 2001 From: Matthias Maiterth Date: Fri, 8 Aug 2025 17:55:11 -0400 Subject: [PATCH 211/388] Updated the arrival parameter for loaded datasets, wrt lassen and fixed the README. --- README.md | 2 +- raps/dataloaders/lassen.py | 2 +- raps/utils.py | 1 - 3 files changed, 2 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 2b816fe..e7350a9 100644 --- a/README.md +++ b/README.md @@ -83,7 +83,7 @@ For Lumi Lassen is one of the few datasets that has networking data. See `raps/dataloaders/lassen.py` for how to get the datasets. To run a network simulation, use the following command: - python main.py -f ~/data/lassen/Lassen-Supercomputer-Job-Dataset --system lassen --policy fcfs --backfill firstfit -t 12h --arrival poisson + python main.py -f ~/data/lassen/Lassen-Supercomputer-Job-Dataset --system lassen --policy fcfs --backfill firstfit -ff 365d -t 12h --arrival poisson ## Snapshot of extracted workload data diff --git a/raps/dataloaders/lassen.py b/raps/dataloaders/lassen.py index d13da3e..0a1993d 100644 --- a/raps/dataloaders/lassen.py +++ b/raps/dataloaders/lassen.py @@ -193,7 +193,7 @@ def load_data_from_df(allocation_df, node_df, step_df, **kwargs): if arrival == 'poisson': # Modify the submit times according to Poisson process scheduled_nodes = None - submit_time = next_arrival_byconfkwargs(config,kwargs) + submit_time = fastforward + next_arrival_byconfkwargs(config,kwargs) start_time = submit_time # Pretend Job could start immediately # Alternative: None end_time = submit_time + wall_time # Alternative: None else: # Prescribed replay diff --git a/raps/utils.py b/raps/utils.py index 2ef6fdd..3c7dbed 100644 --- a/raps/utils.py +++ b/raps/utils.py @@ -425,7 +425,6 @@ def create_dir_indexed(dir:str, path:str = None) -> str: def next_arrival_byconfargs(config,args,reset=False): arrival_rate = 1 arrival_time = config['JOB_ARRIVAL_TIME'] - time_delta = args.time_delta downscale = args.downscale if args.job_arrival_rate: -- GitLab From 8661274ffc123e40778e751adc64daed9a093d35 Mon Sep 17 00:00:00 2001 From: Matthias Maiterth Date: Mon, 11 Aug 2025 10:14:30 -0400 Subject: [PATCH 212/388] Minor fixes in main LayoutManager, Fugaku dataloader and scheduleflow scheduler. --- main.py | 2 +- raps/dataloaders/fugaku.py | 2 ++ raps/schedulers/scheduleflow.py | 4 ++-- 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/main.py b/main.py index e86c8fa..44042f0 100644 --- a/main.py +++ b/main.py @@ -136,7 +136,7 @@ downscale = args.downscale downscale_str = ""if downscale == 1 else f"/{downscale}" print(f'Simulating {len(jobs)} jobs for {total_timesteps}{downscale_str} seconds from {timestep_start} to {timestep_end}.') print(f'Simulation time delta: {time_delta}{downscale_str} s, Telemetry trace quanta: {jobs[0].trace_quanta}{downscale_str} s.') -layout_manager = LayoutManager(args.layout, engine=sc, debug=args.debug, total_timesteps=total_timesteps, **config) +layout_manager = LayoutManager(args.layout, engine=sc, debug=args.debug, total_timesteps=total_timesteps, args_dict=args_dict, **config) layout_manager.run(jobs, timestep_start=timestep_start, timestep_end=timestep_end, time_delta=time_delta) diff --git a/raps/dataloaders/fugaku.py b/raps/dataloaders/fugaku.py index 11bb13d..4e3a1ad 100644 --- a/raps/dataloaders/fugaku.py +++ b/raps/dataloaders/fugaku.py @@ -125,6 +125,7 @@ def load_data_from_df(df, **kwargs): trace_start_time = start_time trace_end_time = end_time trace_missing_values = False # Sane Choice? + trace_quanta = config['TRACE_QUANTA'] # Should we still have this? # if arrival == 'poisson': # Modify the arrival times of according to Poisson distribution @@ -142,6 +143,7 @@ def load_data_from_df(df, **kwargs): gpu_trace=gpu_trace, ntx_trace=[], nrx_trace=[], + trace_quanta=trace_quanta, end_state=end_state, scheduled_nodes=scheduled_nodes, id=job_id, diff --git a/raps/schedulers/scheduleflow.py b/raps/schedulers/scheduleflow.py index a586d28..c510030 100644 --- a/raps/schedulers/scheduleflow.py +++ b/raps/schedulers/scheduleflow.py @@ -16,7 +16,7 @@ class Scheduler: """ def __init__(self, config, policy, bfpolicy, resource_manager, jobs): - self.sorted_priorities = sorted([x['priority'] for x in jobs]) + self.sorted_priorities = sorted([x.priority for x in jobs]) num_prios = len(self.sorted_priorities) # self.sf_queue = [] self.queue = [] # track submitted jobs @@ -95,7 +95,7 @@ class Scheduler: for sf_app in start_jobs: job = _match_sf_app_and_job(sf_app,queue,start_jobs) queue.remove(job) - self.resource_manager.assign_nodes_to_job(job, current_time) + self.resource_manager.assign_nodes_to_job(job, current_time, self.policy) running.append(job) -- GitLab From 75c6e81e51babead7f53a0257c57aee76644e5ec Mon Sep 17 00:00:00 2001 From: Matthias Maiterth Date: Mon, 11 Aug 2025 10:43:30 -0400 Subject: [PATCH 213/388] Fix on workload generator for cpu cores and gpu units. --- raps/workload.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/raps/workload.py b/raps/workload.py index fd545f8..8c18b77 100644 --- a/raps/workload.py +++ b/raps/workload.py @@ -168,9 +168,14 @@ class Workload: cpu_util = cpu_util_distribution_to_draw_from(args,config) if "CORES_PER_CPU" in config: cpu_cores_required = random.randint(0, config["CORES_PER_CPU"]) + else: + cpu_cores_required = None gpu_util = gpu_util_distribution_to_draw_from(args,config) if "GPUS_PER_NODE" in config: - gpu_units_required = random.randint(0, max(config["GPUS_PER_NODE"],math.ceil(max(gpu_util)))) + if isinstance(gpu_util,list): + gpu_units_required = random.randint(0, max(config["GPUS_PER_NODE"],math.ceil(max(gpu_util)))) + else: + gpu_units_required = random.randint(0, max(config["GPUS_PER_NODE"],math.ceil(gpu_util))) wall_time = wall_time_distribution_to_draw_from(args,config) end_time = start_time + wall_time time_limit = max(wall_time,wall_time_distribution_to_draw_from(args,config)) -- GitLab From ee60ce0993437b9bc09d462bc0b1004588b3cbec Mon Sep 17 00:00:00 2001 From: Matthias Maiterth Date: Mon, 11 Aug 2025 17:19:12 -0400 Subject: [PATCH 214/388] Added a __name__ == "__main__" to main.py, workload and telemetry and moved print_formatted_report to stats --- main.py | 419 +++++++++++++++++++++++----------------------- raps/config.py | 3 +- raps/stats.py | 32 ++++ raps/telemetry.py | 6 +- raps/workload.py | 8 +- 5 files changed, 259 insertions(+), 209 deletions(-) diff --git a/main.py b/main.py index 44042f0..7345828 100644 --- a/main.py +++ b/main.py @@ -17,246 +17,255 @@ from raps.cooling import ThermoFluidsModel from raps.ui import LayoutManager from raps.flops import FLOPSManager from raps.plotting import Plotter -from raps.power import PowerManager, compute_node_power, compute_node_power_validate -from raps.power import compute_node_power_uncertainties, compute_node_power_validate_uncertainties +from raps.power import ( + PowerManager, + compute_node_power, + compute_node_power_validate +) +from raps.power import ( + compute_node_power_uncertainties, + compute_node_power_validate_uncertainties +) from raps.engine import Engine from raps.job import Job from raps.telemetry import Telemetry from raps.workload import Workload from raps.account import Accounts from raps.weather import Weather -from raps.utils import convert_to_seconds, write_dict_to_file -from raps.stats import get_engine_stats, get_job_stats, get_scheduler_stats, get_network_stats +from raps.utils import write_dict_to_file +from raps.stats import ( + get_engine_stats, + get_job_stats, + get_scheduler_stats, + get_network_stats, + print_formatted_report +) from raps.args import args, args_dict -if args.verbose or args.debug: - print(args) -config = ConfigManager(system_name=args.system).get_config() +def main(): + if args.verbose or args.debug: + print(args) -if args.seed: - random.seed(SEED) - np.random.seed(SEED) + config = ConfigManager(system_name=args.system).get_config() -if args.cooling: - cooling_model = ThermoFluidsModel(**config) - cooling_model.initialize() - args.layout = "layout2" + if args.seed: + random.seed(SEED) + np.random.seed(SEED) - if args_dict['start']: - cooling_model.weather = Weather(args_dict['start'], config=config) -else: - cooling_model = None + if args.cooling: + cooling_model = ThermoFluidsModel(**config) + cooling_model.initialize() + args.layout = "layout2" -if args.validate: - if args.uncertainties: - power_manager = PowerManager(compute_node_power_validate_uncertainties, **config) + if args_dict['start']: + cooling_model.weather = Weather(args_dict['start'], config=config) else: - power_manager = PowerManager(compute_node_power_validate, **config) -else: - if args.uncertainties: - power_manager = PowerManager(compute_node_power_uncertainties, **config) + cooling_model = None + + if args.validate: + if args.uncertainties: + power_manager = PowerManager(compute_node_power_validate_uncertainties, **config) + else: + power_manager = PowerManager(compute_node_power_validate, **config) else: - power_manager = PowerManager(compute_node_power, **config) -args_dict['config'] = config -flops_manager = FLOPSManager(**args_dict) + if args.uncertainties: + power_manager = PowerManager(compute_node_power_uncertainties, **config) + else: + power_manager = PowerManager(compute_node_power, **config) + args_dict['config'] = config + flops_manager = FLOPSManager(**args_dict) + if args.replay: -if args.replay: + td = Telemetry(**args_dict) + jobs, timestep_start, timestep_end, args_from_file = td.load_jobs_times_args_from_files(files=args.replay, args=args) + # TODO: Merge args and args_from_files? see telemetry.py:97 - td = Telemetry(**args_dict) - jobs, timestep_start, timestep_end, args_from_file = td.load_jobs_times_args_from_files(files=args.replay, args=args) - # TODO: Merge args and args_from_files? see telemetry.py:97 + else: # Synthetic jobs + wl = Workload(config) + jobs = getattr(wl, args.workload)(args=args) -else: # Synthetic jobs - wl = Workload(config) - jobs = getattr(wl, args.workload)(args=args) + if args.verbose: + for job_vector in jobs: + job = Job(job_vector) + print('jobid:', job.id, '\tlen(gpu_trace):', len(job.gpu_trace), '\twall_time(s):', job.wall_time) + time.sleep(2) - if args.verbose: - for job_vector in jobs: - job = Job(job_vector) - print('jobid:', job.id, '\tlen(gpu_trace):', len(job.gpu_trace), '\twall_time(s):', job.wall_time) - time.sleep(2) - - timestep_start = 0 - if hasattr(jobs[0],'end_time'): - timestep_end = int(math.ceil(max([job.end_time for job in jobs]))) - else: - timestep_end = 88200 # 24 hours + timestep_start = 0 + if hasattr(jobs[0],'end_time'): + timestep_end = int(math.ceil(max([job.end_time for job in jobs]))) + else: + timestep_end = 88200 # 24 hours - td = Telemetry(**args_dict) - td.save_snapshot(jobs=jobs, timestep_start=timestep_start, timestep_end=timestep_end, args=args, filename=td.dirname) + td = Telemetry(**args_dict) + td.save_snapshot(jobs=jobs, timestep_start=timestep_start, timestep_end=timestep_end, args=args, filename=td.dirname) -if args.fastforward is not None: - timestep_start = args.fastforward + if args.fastforward is not None: + timestep_start = args.fastforward -if args.time is not None: - timestep_end = timestep_start + args.time + if args.time is not None: + timestep_end = timestep_start + args.time -if args.time_delta is not None: - time_delta = args.time_delta -else: - time_delta = 1 + if args.time_delta is not None: + time_delta = args.time_delta + else: + time_delta = 1 + + sc = Engine( + power_manager=power_manager, + flops_manager=flops_manager, + cooling_model=cooling_model, + jobs=jobs, + **args_dict, + ) + + DIR_NAME = td.dirname + OPATH = OUTPUT_PATH / DIR_NAME + print("Output directory is: ", OPATH) + sc.opath = OPATH + + if args.accounts: + job_accounts = Accounts(jobs) + if args.accounts_json: + loaded_accounts = Accounts.from_json_filename(args.accounts_json) + accounts = Accounts.merge(loaded_accounts, job_accounts) + else: + accounts = job_accounts + sc.accounts = accounts -sc = Engine( - power_manager=power_manager, - flops_manager=flops_manager, - cooling_model=cooling_model, - jobs=jobs, - **args_dict, -) + if args.plot or args.output: + try: + os.makedirs(OPATH) + except OSError as error: + print(f"Error creating directory: {error}") + + if args.verbose: + print(jobs) -DIR_NAME = td.dirname -OPATH = OUTPUT_PATH / DIR_NAME -print("Output directory is: ", OPATH) -sc.opath = OPATH + total_timesteps = timestep_end - timestep_start -if args.accounts: - job_accounts = Accounts(jobs) - if args.accounts_json: - loaded_accounts = Accounts.from_json_filename(args.accounts_json) - accounts = Accounts.merge(loaded_accounts, job_accounts) - else: - accounts = job_accounts - sc.accounts = accounts - -if args.plot or args.output: - try: - os.makedirs(OPATH) - except OSError as error: - print(f"Error creating directory: {error}") - -if args.verbose: - print(jobs) - -total_timesteps = timestep_end - timestep_start - -downscale = args.downscale -downscale_str = ""if downscale == 1 else f"/{downscale}" -print(f'Simulating {len(jobs)} jobs for {total_timesteps}{downscale_str} seconds from {timestep_start} to {timestep_end}.') -print(f'Simulation time delta: {time_delta}{downscale_str} s, Telemetry trace quanta: {jobs[0].trace_quanta}{downscale_str} s.') -layout_manager = LayoutManager(args.layout, engine=sc, debug=args.debug, total_timesteps=total_timesteps, args_dict=args_dict, **config) -layout_manager.run(jobs, timestep_start=timestep_start, timestep_end=timestep_end, time_delta=time_delta) - - -# Print a formatted report -print("\n--- Simulation Report ---") -engine_stats = get_engine_stats(sc) -for key, value in engine_stats.items(): - print(f"{key.replace('_', ' ').title()}: {value}") -print("-------------------------\n") -print("\n--- Job Stat Report ---") -job_stats = get_job_stats(sc) -for key, value in job_stats.items(): - print(f"{key.replace('_', ' ').title()}: {value}") -print("-------------------------\n") -print("\n--- Scheduler Report ---") -scheduler_stats = get_scheduler_stats(sc) -for key, value in scheduler_stats.items(): - print(f"{key.replace('_', ' ').title()}: {value}") -print("-------------------------") -if args.simulate_network: - print("\n--- Network Report ---") - network_stats = get_network_stats(sc) - for key, value in network_stats.items(): - print(f"{key.replace('_', ' ').title()}: {value}") - print("-------------------------") - -if downscale_str: - downscale_str = "1" + downscale_str - -if args.plot: - if 'power' in args.plot: - pl = Plotter(f"Time ({downscale_str}s)", 'Power (kW)', 'Power History', \ - OPATH / f'power.{args.imtype}', \ - uncertainties=args.uncertainties) - x, y = zip(*power_manager.history) - pl.plot_history(x, y) - - if 'util' in args.plot: - pl = Plotter(f"Time ({downscale_str}s)", 'System Utilization (%)', \ - 'System Utilization History', OPATH / f'util.{args.imtype}') - x, y = zip(*sc.sys_util_history) - pl.plot_history(x, y) - - if 'loss' in args.plot: - pl = Plotter(f"Time ({downscale_str}s)", 'Power Losses (kW)', 'Power Loss History', \ - OPATH / f'loss.{args.imtype}', \ - uncertainties=args.uncertainties) - x, y = zip(*power_manager.loss_history) - pl.plot_history(x, y) - - pl = Plotter(f"Time ({downscale_str}s)", 'Power Losses (%)', 'Power Loss History', \ - OPATH / f'loss_pct.{args.imtype}', \ - uncertainties=args.uncertainties) - x, y = zip(*power_manager.loss_history_percentage) - pl.plot_history(x, y) - - if 'pue' in args.plot: - if cooling_model: - ylabel = 'pue' - title = 'FMU ' + ylabel + 'History' - pl = Plotter(f"Time ({downscale_str}s)", ylabel, title, OPATH / f'pue.{args.imtype}', \ - uncertainties=args.uncertainties) - df = pd.DataFrame(cooling_model.fmu_history) - df.to_parquet('cooling_model.parquet', engine='pyarrow') - pl.plot_history(df['time'], df[ylabel]) - else: - print('Cooling model not enabled... skipping output of plot') - - if 'temp' in args.plot: - if cooling_model: - ylabel = 'Tr_pri_Out[1]' - title = 'FMU ' + ylabel + 'History' - pl = Plotter(f"Time ({downscale_str}s)", ylabel, title, OPATH / 'temp.svg') - df = pd.DataFrame(cooling_model.fmu_history) - df.to_parquet('cooling_model.parquet', engine='pyarrow') - pl.plot_compare(df['time'], df[ylabel]) - else: - print('Cooling model not enabled... skipping output of plot') + downscale = args.downscale + downscale_str = ""if downscale == 1 else f"/{downscale}" + print(f'Simulating {len(jobs)} jobs for {total_timesteps}{downscale_str} seconds from {timestep_start} to {timestep_end}.') + print(f'Simulation time delta: {time_delta}{downscale_str} s, Telemetry trace quanta: {jobs[0].trace_quanta}{downscale_str} s.') + layout_manager = LayoutManager(args.layout, engine=sc, debug=args.debug, total_timesteps=total_timesteps, args_dict=args_dict, **config) + layout_manager.run(jobs, timestep_start=timestep_start, timestep_end=timestep_end, time_delta=time_delta) -if args.output: - if args.uncertainties: - # Parquet cannot handle annotated ufloat format AFAIK - print('Data dump not implemented using uncertainties!') + engine_stats = get_engine_stats(sc) + job_stats = get_job_stats(sc) + scheduler_stats = get_scheduler_stats(sc) + if sc.simulate_network: + network_stats = get_network_stats(sc) else: - if cooling_model: - df = pd.DataFrame(cooling_model.fmu_history) - df.to_parquet(OPATH / 'cooling_model.parquet', engine='pyarrow') + network_stats = None + + print_formatted_report(engine_stats=engine_stats, + job_stats=job_stats, + scheduler_stats=scheduler_stats, + network_stats=network_stats + ) + + if downscale_str: + downscale_str = "1" + downscale_str - df = pd.DataFrame(power_manager.history) - df.to_parquet(OPATH / 'power_history.parquet', engine='pyarrow') + if args.plot: + if 'power' in args.plot: + pl = Plotter(f"Time ({downscale_str}s)", 'Power (kW)', 'Power History', \ + OPATH / f'power.{args.imtype}', \ + uncertainties=args.uncertainties) + x, y = zip(*power_manager.history) + pl.plot_history(x, y) + + if 'util' in args.plot: + pl = Plotter(f"Time ({downscale_str}s)", 'System Utilization (%)', \ + 'System Utilization History', OPATH / f'util.{args.imtype}') + x, y = zip(*sc.sys_util_history) + pl.plot_history(x, y) + + if 'loss' in args.plot: + pl = Plotter(f"Time ({downscale_str}s)", 'Power Losses (kW)', 'Power Loss History', \ + OPATH / f'loss.{args.imtype}', \ + uncertainties=args.uncertainties) + x, y = zip(*power_manager.loss_history) + pl.plot_history(x, y) - df = pd.DataFrame(power_manager.loss_history) - df.to_parquet(OPATH / 'loss_history.parquet', engine='pyarrow') + pl = Plotter(f"Time ({downscale_str}s)", 'Power Losses (%)', 'Power Loss History', \ + OPATH / f'loss_pct.{args.imtype}', \ + uncertainties=args.uncertainties) + x, y = zip(*power_manager.loss_history_percentage) + pl.plot_history(x, y) + + if 'pue' in args.plot: + if cooling_model: + ylabel = 'pue' + title = 'FMU ' + ylabel + 'History' + pl = Plotter(f"Time ({downscale_str}s)", ylabel, title, OPATH / f'pue.{args.imtype}', \ + uncertainties=args.uncertainties) + df = pd.DataFrame(cooling_model.fmu_history) + df.to_parquet('cooling_model.parquet', engine='pyarrow') + pl.plot_history(df['time'], df[ylabel]) + else: + print('Cooling model not enabled... skipping output of plot') + + if 'temp' in args.plot: + if cooling_model: + ylabel = 'Tr_pri_Out[1]' + title = 'FMU ' + ylabel + 'History' + pl = Plotter(f"Time ({downscale_str}s)", ylabel, title, OPATH / 'temp.svg') + df = pd.DataFrame(cooling_model.fmu_history) + df.to_parquet('cooling_model.parquet', engine='pyarrow') + pl.plot_compare(df['time'], df[ylabel]) + else: + print('Cooling model not enabled... skipping output of plot') + + if args.output: + + if args.uncertainties: + # Parquet cannot handle annotated ufloat format AFAIK + print('Data dump not implemented using uncertainties!') + else: + if cooling_model: + df = pd.DataFrame(cooling_model.fmu_history) + df.to_parquet(OPATH / 'cooling_model.parquet', engine='pyarrow') - df = pd.DataFrame(sc.sys_util_history) - df.to_parquet(OPATH / 'util.parquet', engine='pyarrow') + df = pd.DataFrame(power_manager.history) + df.to_parquet(OPATH / 'power_history.parquet', engine='pyarrow') - # Schedule history - job_history = pd.DataFrame(sc.get_job_history_dict()) - job_history.to_csv(OPATH / "job_history.csv", index=False) + df = pd.DataFrame(power_manager.loss_history) + df.to_parquet(OPATH / 'loss_history.parquet', engine='pyarrow') - scheduler_running_history = pd.DataFrame(sc.get_scheduler_running_history()) - job_history.to_csv(OPATH / "running_history.csv", index=False) - scheduler_queue_history = pd.DataFrame(sc.get_scheduler_running_history()) - job_history.to_csv(OPATH / "queue_history.csv", index=False) + df = pd.DataFrame(sc.sys_util_history) + df.to_parquet(OPATH / 'util.parquet', engine='pyarrow') + + # Schedule history + job_history = pd.DataFrame(sc.get_job_history_dict()) + job_history.to_csv(OPATH / "job_history.csv", index=False) + + scheduler_running_history = pd.DataFrame(sc.get_scheduler_running_history()) + scheduler_running_history.to_csv(OPATH / "running_history.csv", index=False) + scheduler_queue_history = pd.DataFrame(sc.get_scheduler_running_history()) + scheduler_queue_history.to_csv(OPATH / "queue_history.csv", index=False) - try: - with open(OPATH / 'stats.out', 'w') as f: - json.dump(engine_stats, f, indent=4) - json.dump(job_stats, f, indent=4) - except TypeError: # Is this the correct error code? - write_dict_to_file(engine_stats, OPATH / 'stats.out') - write_dict_to_file(job_stats, OPATH / 'stats.out') - - if args.accounts: try: - with open(OPATH / 'accounts.json', 'w') as f: - json_string = json.dumps(sc.accounts.to_dict()) - f.write(json_string) - except TypeError: - write_dict_to_file(sc.accounts.to_dict(), OPATH / 'accounts.json') - print("Output directory is: ", OPATH) # If output is enabled, the user wants this information as last output + with open(OPATH / 'stats.out', 'w') as f: + json.dump(engine_stats, f, indent=4) + json.dump(job_stats, f, indent=4) + except TypeError: # Is this the correct error code? + write_dict_to_file(engine_stats, OPATH / 'stats.out') + write_dict_to_file(job_stats, OPATH / 'stats.out') + + if args.accounts: + try: + with open(OPATH / 'accounts.json', 'w') as f: + json_string = json.dumps(sc.accounts.to_dict()) + f.write(json_string) + except TypeError: + write_dict_to_file(sc.accounts.to_dict(), OPATH / 'accounts.json') + print("Output directory is: ", OPATH) # If output is enabled, the user wants this information as last output + + +if __name__ == "__main__": + main() diff --git a/raps/config.py b/raps/config.py index 863014f..c616a8b 100644 --- a/raps/config.py +++ b/raps/config.py @@ -3,7 +3,8 @@ import os from typing import Dict, Any from pathlib import Path -CONFIG_PATH = Path(os.environ.get("RAPS_CONFIG", 'config')).resolve() +ROOT_DIR = os.path.dirname(os.path.split(__file__)[0]) +CONFIG_PATH = Path(os.environ.get("RAPS_CONFIG", ROOT_DIR + '/config')).resolve() class ConfigManager: diff --git a/raps/stats.py b/raps/stats.py index ad3a67f..ea6d0e5 100644 --- a/raps/stats.py +++ b/raps/stats.py @@ -326,3 +326,35 @@ def get_job_stats(engine: Engine): 'priority_weighted_specific_response_time': psf } return job_stats + + +def print_formatted_report(engine_stats=None, + job_stats=None, + scheduler_stats=None, + network_stats=None + ): + # Print a formatted report + if engine_stats: + rep_str = "--- Simulation Report ---" + print(f"\n{rep_str}") + for key, value in engine_stats.items(): + print(f"{key.replace('_', ' ').title()}: {value}") + print(f"{'-' * len(rep_str)}\n") + if job_stats: + rep_str = "--- Job Stat Report ---" + print(f"\n{rep_str}") + for key, value in job_stats.items(): + print(f"{key.replace('_', ' ').title()}: {value}") + print(f"{'-' * len(rep_str)}\n") + if scheduler_stats: + rep_str = "--- Scheduler Report ---" + print(f"\n{rep_str}") + for key, value in scheduler_stats.items(): + print(f"{key.replace('_', ' ').title()}: {value}") + print(f"{'-' * len(rep_str)}\n") + if network_stats: + rep_str = "--- Network Report ---" + print(f"\n{rep_str}") + for key, value in network_stats.items(): + print(f"{key.replace('_', ' ').title()}: {value}") + print(f"{'-' * len(rep_str)}\n") diff --git a/raps/telemetry.py b/raps/telemetry.py index 6ca591b..a46fdbb 100644 --- a/raps/telemetry.py +++ b/raps/telemetry.py @@ -271,7 +271,7 @@ class Telemetry: return jobs, timestep_start, timestep_end, args -if __name__ == "__main__": +def run_telemetry(): config = ConfigManager(system_name=args.system).get_config() args_dict['config'] = config td = Telemetry(**args_dict) @@ -353,3 +353,7 @@ if __name__ == "__main__": net_means = [tx + rx for tx, rx in zip(ntx_means, nrx_means)] plot_network_histogram(ax=ax,data=net_means) plt.show() + + +if __name__ == "__main__": + run_telemetry() diff --git a/raps/workload.py b/raps/workload.py index 8c18b77..0d4c1d2 100644 --- a/raps/workload.py +++ b/raps/workload.py @@ -32,6 +32,7 @@ from raps.telemetry import Telemetry from raps.job import job_dict, Job from raps.utils import create_file_indexed, create_dir_indexed, convert_to_seconds + JOB_NAMES = ["LAMMPS", "GROMACS", "VASP", "Quantum ESPRESSO", "NAMD",\ "OpenFOAM", "WRF", "AMBER", "CP2K", "nek5000", "CHARMM",\ "ABINIT", "Cactus", "Charm++", "NWChem", "STAR-CCM+",\ @@ -775,8 +776,7 @@ def check_workload_args(args): exit(1) -if __name__ == "__main__": - +def run_workload(): from raps.args import args, args_dict from raps.config import ConfigManager config = ConfigManager(system_name=args.system).get_config() @@ -939,3 +939,7 @@ if __name__ == "__main__": raise ValueError(f"Unknown multitenant mode: {mode}") return jobs + + +if __name__ == "__main__": + run_workload() -- GitLab From 4151caf60a135cc4c9933b5f0cd69a8c061301cf Mon Sep 17 00:00:00 2001 From: Matthias Maiterth Date: Mon, 11 Aug 2025 21:03:09 -0400 Subject: [PATCH 215/388] Added telemtry tests --- pytest.ini | 5 +++ raps/telemetry.py | 1 + raps/workload.py | 13 +++++--- tests/systems/conftest.py | 33 ++++++++++++------- tests/systems/test_main_basic_run.py | 2 +- tests/systems/test_main_network_run.py | 4 +-- tests/systems/test_main_noui_run.py | 2 +- tests/systems/test_main_time_run.py | 2 +- tests/systems/test_main_withdata_run.py | 6 ++-- tests/systems/test_telemetry_withdata_run.py | 34 ++++++++++++++++++++ 10 files changed, 78 insertions(+), 24 deletions(-) create mode 100644 tests/systems/test_telemetry_withdata_run.py diff --git a/pytest.ini b/pytest.ini index e7a522a..ac546d7 100644 --- a/pytest.ini +++ b/pytest.ini @@ -4,6 +4,10 @@ testpaths = tests markers = long: mark a test as long (skipped if not run iwth --runlong) + main: raps basic main. + telemetry: raps telemetry analysis + workload: raps workload generation + system: mark a test as system (integration) test unit: mark a test as a unit test withdata: marks tests that require external data @@ -26,6 +30,7 @@ markers = mit_supercloud: System test setonix: System test summit: System test + lumi: System test addopts = -ra diff --git a/raps/telemetry.py b/raps/telemetry.py index a46fdbb..8227893 100644 --- a/raps/telemetry.py +++ b/raps/telemetry.py @@ -280,6 +280,7 @@ def run_telemetry(): else: parser.print_help() + exit() timesteps = timestep_end - timestep_start diff --git a/raps/workload.py b/raps/workload.py index 0d4c1d2..baa13f4 100644 --- a/raps/workload.py +++ b/raps/workload.py @@ -566,7 +566,7 @@ class Workload: return jobs -def plot_job_hist(jobs,config=None,dist_split=None): +def plot_job_hist(jobs,config=None,dist_split=None, gantt_nodes=False): # put args.multimodal in dist_split! split = [1.0] num_dist = 1 @@ -614,10 +614,14 @@ def plot_job_hist(jobs,config=None,dist_split=None): axs[1][0].scatter(x, y,zorder=3) cpu_util = [x.cpu_trace for x in jobs] - if isinstance(cpu_util[0],(np.ndarray, list)): + if isinstance(cpu_util[0], np.ndarray): + cpu_util = np.concatenate(cpu_util).ravel() + elif isinstance(cpu_util[0], list): cpu_util = [sum(part) / len(part) for part in cpu_util] gpu_util = [x.gpu_trace for x in jobs] - if isinstance(gpu_util[0],(np.ndarray, list)): + if isinstance(gpu_util[0], np.ndarray): + gpu_util = np.concatenate(gpu_util).ravel() + elif isinstance(gpu_util[0], list): gpu_util = [sum(part) / len(part) for part in gpu_util] if not all([x == 0 for x in gpu_util]): axs[0][1].scatter(cpu_util,gpu_util,zorder=2,marker='.',s=0.2) @@ -677,7 +681,6 @@ def plot_job_hist(jobs,config=None,dist_split=None): offset = 0 split_index = 0 split_offset = math.floor(len(x) * split[split_index]) - gantt_nodes = args.gantt_nodes if gantt_nodes: if split[0] == 0.0: ax_b.axhline(y=offset, color='red', linestyle='--',lw=0.5) @@ -786,7 +789,7 @@ def run_workload(): else: workload = Workload(config) jobs = getattr(workload, args.workload)(args=args) - plot_job_hist(jobs, config=config, dist_split=args.multimodal) + plot_job_hist(jobs, config=config, dist_split=args.multimodal, gantt_nodes=args.gantt_nodes) if args.output: timestep_start = min([x.submit_time for x in jobs]) timestep_end = math.ceil(max([x.submit_time for x in jobs]) + max([x.wall_time for x in jobs])) diff --git a/tests/systems/conftest.py b/tests/systems/conftest.py index c564e7d..741820e 100644 --- a/tests/systems/conftest.py +++ b/tests/systems/conftest.py @@ -37,7 +37,8 @@ def system_config(system): configs = { "40frontiers": { - "basic": True, + "main": True, + "telemetry": False, "multi-part-sim": False, "withdata": False, "cooling": False, @@ -48,7 +49,8 @@ def system_config(system): "net": False, }, "adastraMI250": { - "basic": True, + "main": True, + "telemetry": True, "multi-part-sim": False, "withdata": True, "cooling": False, @@ -59,7 +61,8 @@ def system_config(system): "net": False, }, "frontier": { - "basic": True, + "main": True, + "telemetry": True, "multi-part-sim": False, "withdata": True, "cooling": True, @@ -70,7 +73,8 @@ def system_config(system): "net": False, }, "fugaku": { - "basic": True, + "main": True, + "telemetry": True, "multi-part-sim": False, "withdata": True, "cooling": False, @@ -81,7 +85,8 @@ def system_config(system): "net": False, }, "gcloudv2": { - "basic": False, + "main": False, + "telemetry": False, "multi-part-sim": False, "withdata": False, "cooling": False, @@ -92,7 +97,8 @@ def system_config(system): "net": False, }, "lassen":{ - "basic": True, + "main": True, + "telemetry": False, # Takes very long! "multi-part-sim": False, "withdata": True, "cooling": True, @@ -103,7 +109,8 @@ def system_config(system): "net": True, }, "marconi100":{ - "basic": True, + "main": True, + "telemetry": True, "multi-part-sim": False, "withdata": True, "cooling": True, @@ -114,7 +121,8 @@ def system_config(system): "net": False, }, "mit_supercloud": { - "basic": False, + "main": False, + "telemetry": False, "multi-part-sim": True, "withdata": True, "cooling": False, @@ -126,7 +134,8 @@ def system_config(system): "net-multi-sim": True, }, "setonix": { - "basic": False, + "main": False, + "telemetry": True, "multi-part-sim": True, "withdata": False, "cooling": False, @@ -137,7 +146,8 @@ def system_config(system): "net": False, }, "summit": { - "basic": True, + "main": True, + "telemetry": False, "multi-part-sim": False, "withdata": False, "cooling": True, @@ -148,7 +158,8 @@ def system_config(system): "net": False, }, "lumi": { - "basic": False, + "main": False, + "telemetry": False, "multi-part-sim": True, "withdata": False, "cooling": False, diff --git a/tests/systems/test_main_basic_run.py b/tests/systems/test_main_basic_run.py index da9b651..c3f919a 100644 --- a/tests/systems/test_main_basic_run.py +++ b/tests/systems/test_main_basic_run.py @@ -12,7 +12,7 @@ pytestmark = [ def test_main_run(system, system_config): - if not system_config.get("basic", False): + if not system_config.get("main", False): pytest.skip(f"{system} does not support basic main run.") os.chdir(PROJECT_ROOT) diff --git a/tests/systems/test_main_network_run.py b/tests/systems/test_main_network_run.py index 6e29162..ce7d467 100644 --- a/tests/systems/test_main_network_run.py +++ b/tests/systems/test_main_network_run.py @@ -12,8 +12,8 @@ pytestmark = [ def test_main_network_run(system, system_config): - if not system_config.get("basic", False): - pytest.skip(f"{system} does not support basic run.") + if not system_config.get("main", False): + pytest.skip(f"{system} does not support basic main run.") if not system_config.get("net", False): pytest.skip(f"{system} does not support network run.") diff --git a/tests/systems/test_main_noui_run.py b/tests/systems/test_main_noui_run.py index 556efa3..4549ac3 100644 --- a/tests/systems/test_main_noui_run.py +++ b/tests/systems/test_main_noui_run.py @@ -12,7 +12,7 @@ pytestmark = [ def test_main_run(system, system_config): - if not system_config.get("basic", False): + if not system_config.get("main", False): pytest.skip(f"{system} does not support basic main run.") os.chdir(PROJECT_ROOT) diff --git a/tests/systems/test_main_time_run.py b/tests/systems/test_main_time_run.py index 7a574a8..b702fa4 100644 --- a/tests/systems/test_main_time_run.py +++ b/tests/systems/test_main_time_run.py @@ -22,7 +22,7 @@ pytestmark = [ pytest.param("6h", marks=pytest.mark.long), # mark this one as long ]) def test_main_time_run(system, system_config, time_args): - if not system_config.get("basic", False): + if not system_config.get("main", False): pytest.skip(f"{system} does not support basic main run.") os.chdir(PROJECT_ROOT) diff --git a/tests/systems/test_main_withdata_run.py b/tests/systems/test_main_withdata_run.py index 2bb337c..ffaf501 100644 --- a/tests/systems/test_main_withdata_run.py +++ b/tests/systems/test_main_withdata_run.py @@ -13,10 +13,10 @@ pytestmark = [ def test_main_withdata_run(system, system_config, system_file): - if not system_config.get("basic", False): - pytest.skip(f"{system} does not support basic run even without data.") + if not system_config.get("main", False): + pytest.skip(f"{system} does not support basic main even without data.") if not system_config.get("withdata", False): - pytest.skip(f"{system} does not support basic run with data.") + pytest.skip(f"{system} does not support basic main with data.") if isinstance(system_file, list): file_list = [DATA_PATH / system / x for x in system_file] else: diff --git a/tests/systems/test_telemetry_withdata_run.py b/tests/systems/test_telemetry_withdata_run.py new file mode 100644 index 0000000..707f39a --- /dev/null +++ b/tests/systems/test_telemetry_withdata_run.py @@ -0,0 +1,34 @@ +import os +import subprocess +import gc +import pytest +from tests.util import PROJECT_ROOT, DATA_PATH + + +pytestmark = [ + pytest.mark.system, + pytest.mark.withdata +] + + +def test_main_withdata_run(system, system_config, system_file): + if not system_config.get("telemetry", False): + pytest.skip(f"{system} does not support telemetry run.") + if not system_config.get("withdata", False): + pytest.skip(f"{system} does not support telemetry run with data.") + + if isinstance(system_file, list): + file_list = [DATA_PATH / system / x for x in system_file] + else: + file_list = [DATA_PATH / system / system_file] + for file in file_list: + assert os.path.isfile(file) or os.path.isdir(file), f"File `{file}' does not exist. does ./data exist or is RAPS_DATA_DIR set?" + os.chdir(PROJECT_ROOT) + result = subprocess.run([ + "python", "raps/telemetry.py", + "--system", system, + "-f", *file_list, + ], capture_output=True, text=True, stdin=subprocess.DEVNULL) + assert result.returncode == 0, f"Failed on {system}: {result.stderr}" + del result + gc.collect() -- GitLab From 3f3c860ffe490ff1f508d3870dca1a32344982cd Mon Sep 17 00:00:00 2001 From: Matthias Maiterth Date: Tue, 12 Aug 2025 16:47:34 -0400 Subject: [PATCH 216/388] Added an experimental pytest for workload generation script. As Workloads plot pyplots this is tested with a timeout. If no other error occured it. Success is assumed. In general: To test everything e.g. for adastsraMI250 pytest -m "adastraMI250 and not cooling and not withdata" -n auto -x To test everything e.g. for frontier pytest -m "frontier and not cooling and not withdata" -n auto -x --- raps/telemetry.py | 7 +- tests/systems/conftest.py | 3 + tests/systems/test_workload_synthetic.py | 114 +++++++++++++++++++++++ 3 files changed, 123 insertions(+), 1 deletion(-) create mode 100644 tests/systems/test_workload_synthetic.py diff --git a/raps/telemetry.py b/raps/telemetry.py index 8227893..d80c69e 100644 --- a/raps/telemetry.py +++ b/raps/telemetry.py @@ -30,6 +30,7 @@ if __name__ == "__main__": choices = ['prescribed', 'poisson'] parser.add_argument('--arrival', default=choices[0], type=str, choices=choices, help=f'Modify arrival distribution ({choices[1]}) or use the original submit times ({choices[0]})') parser.add_argument('-v', '--verbose', action='store_true', help='Enable verbose output') + parser.add_argument('-o', '--output', type=str, default=None, help='Store output in --output file.') args = parser.parse_args() args_dict = vars(args) @@ -353,7 +354,11 @@ def run_telemetry(): # combine into total per‐job traffic net_means = [tx + rx for tx, rx in zip(ntx_means, nrx_means)] plot_network_histogram(ax=ax,data=net_means) - plt.show() + if args.output: + plt.savefig(f'{args.output}') + print(f"Saved to: {args.output}") + else: + plt.show() if __name__ == "__main__": diff --git a/tests/systems/conftest.py b/tests/systems/conftest.py index 741820e..bcde029 100644 --- a/tests/systems/conftest.py +++ b/tests/systems/conftest.py @@ -39,6 +39,7 @@ def system_config(system): "40frontiers": { "main": True, "telemetry": False, + "workload": False, "multi-part-sim": False, "withdata": False, "cooling": False, @@ -51,6 +52,7 @@ def system_config(system): "adastraMI250": { "main": True, "telemetry": True, + "workload": True, "multi-part-sim": False, "withdata": True, "cooling": False, @@ -63,6 +65,7 @@ def system_config(system): "frontier": { "main": True, "telemetry": True, + "workload": True, "multi-part-sim": False, "withdata": True, "cooling": True, diff --git a/tests/systems/test_workload_synthetic.py b/tests/systems/test_workload_synthetic.py new file mode 100644 index 0000000..2e4c4a1 --- /dev/null +++ b/tests/systems/test_workload_synthetic.py @@ -0,0 +1,114 @@ +import subprocess +import gc +import pytest +import shlex + + +pytestmark = [ + pytest.mark.system, + pytest.mark.workload, +] + + +def flatten(dist): + name, args = dist + return [name, *args] + +def _build_args(dist_name, params): + return [dist_name, *params] + + +jobdist_case = [ + ("weibull", ["--jobsize-weibull-shape", "0.75", "--jobsize-weibull-scale", "16"]), + ("normal", ["--jobsize-normal-stddev", "100", "--jobsize-normal-mean", "16"]), + ("uniform",[]), +] +cpudist_case = [ + ("weibull", ["--cpuutil-weibull-shape", "0.75", "--cpuutil-weibull-scale", "16"]), + ("normal", ["--cpuutil-normal-stddev", "100", "--cpuutil-normal-mean", "16"]), + ("uniform",[]), +] +gpudist_case = [ + ("weibull", ["--gpuutil-weibull-shape", "0.75", "--gpuutil-weibull-scale", "16"]), + ("normal", ["--gpuutil-normal-stddev", "100", "--gpuutil-normal-mean", "16"]), + ("uniform",[]), +] +wtimedist_case = [ + ("weibull", ["--walltime-weibull-shape", "0.75", "--walltime-weibull-scale", "16"]), + ("normal", ["--walltime-normal-stddev", "100", "--walltime-normal-mean", "16"]), + ("uniform",[]), +] +additional_params_cases = [ + "", # nothing + ["--jobsize-is-of-degree", "2"], + ["--jobsize-is-of-degree", "3"], + ["--jobsize-is-power-of", "2"], + ["--jobsize-is-power-of", "3"], +] + + +@pytest.mark.parametrize( + "jobdist", jobdist_case, ids=lambda d:d[0] +) +@pytest.mark.parametrize( + "cpudist", cpudist_case, ids=lambda d:d[0] +) +@pytest.mark.parametrize( + "gpudist", gpudist_case, ids=lambda d:d[0] +) +@pytest.mark.parametrize( + "wtimedist", wtimedist_case, ids=lambda d:d[0] +) +@pytest.mark.parametrize( + "additional_params", additional_params_cases, ids=lambda p: (p or "none") +) +def test_workload_synthetic_run( + system, system_config, jobdist, cpudist, gpudist, wtimedist, additional_params +): + """Run the real synthetic workload generator with every combination + of job, CPU, GPU, wall‑time distributions and optional extra flags. + The test simply verifies that the script exits with status 0. + """ + + if not system_config.get("workload", False): + pytest.skip(f"{system} does not support workload run.") + + # Build the command line. Each distribution tuple expands into: + # dist_name, , , ... + cmd = [ + "python", "raps/workload.py", + "--system", system, + "-w", "synthetic", + "--jobsize-distribution", *flatten(jobdist), + "--cpuutil-distribution", *flatten(cpudist), + "--gpuutil-distribution", *flatten(gpudist), + "--walltime-distribution", *flatten(wtimedist), + ] + + # Add any extra parameters if present. + if additional_params: + # If the flag contains a space we keep it as a single string. + cmd.extend(additional_params) + + cmd1 = ["python", "-c \"exit()\""] + result = subprocess.run(cmd1,capture_output=True,text=True,stdin=subprocess.DEVNULL) + try: + result = subprocess.run( + cmd, + capture_output=True, + text=True, + stdin=subprocess.DEVNULL, + timeout=1 + ) + except subprocess.TimeoutExpired: + result.returncode = 0 + + assert result.returncode == 0, ( + f"Failed on {system} with {jobdist[0]}, {cpudist[0]}, " + f"{gpudist[0]}, {wtimedist[0]}: {result.stderr}" + ) + + # Explicitly delete the result to avoid hitting + # “Too many open file descriptors” on slow CI machines. + del result + gc.collect() -- GitLab From 0e02e220d56ec7233493f8fff4966d7a1602c809 Mon Sep 17 00:00:00 2001 From: Wes Brewer Date: Tue, 12 Aug 2025 16:02:33 -0400 Subject: [PATCH 217/388] Force upgrade to python3.11 to support "match" and networkx>=3.5 (for network model) --- README.md | 2 +- pyproject.toml | 2 +- raps/helpers.py | 21 ++++++++++++++++++--- 3 files changed, 20 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index e7350a9..4c82072 100644 --- a/README.md +++ b/README.md @@ -13,7 +13,7 @@ Instructions for setup and usage are given below. An online documentation of Exa ## Setup environment -Note: Requires python3.9 or greater. +Note: Requires python3.11 or greater. pip install -e . diff --git a/pyproject.toml b/pyproject.toml index 2cb56d5..fd7ddbd 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -5,7 +5,7 @@ build-backend = "hatchling.build" [project] name = "raps" version = "0.0.1" -requires-python = ">=3.9" +requires-python = ">=3.11" description = "RAPS" readme = "README.md" # license = {file = "LICENSE.txt"} diff --git a/raps/helpers.py b/raps/helpers.py index bff066a..dc3e3ac 100644 --- a/raps/helpers.py +++ b/raps/helpers.py @@ -1,9 +1,24 @@ import sys +import tomllib +from pathlib import Path def check_python_version(): - # Check for the required Python version - required_major, required_minor = 3, 9 + # Load pyproject.toml + pyproject_path = Path(__file__).parent.parent / "pyproject.toml" + with open(pyproject_path, "rb") as f: + pyproject_data = tomllib.load(f) + # Extract required python version (e.g., ">=3.11") + requires_python = pyproject_data["project"]["requires-python"] + + # Get the minimum major/minor from the string + # This assumes format like ">=3.11" + version_str = requires_python.lstrip(">=").strip() + required_major, required_minor = map(int, version_str.split(".")[:2]) + + # Compare if sys.version_info < (required_major, required_minor): - sys.stderr.write(f"Error: RAPS requires Python {required_major}.{required_minor} or greater\n") + sys.stderr.write( + f"Error: RAPS requires Python {required_major}.{required_minor} or greater\n" + ) sys.exit(1) -- GitLab From 74663d788a8e813a91fc3e064fcb23da2d2c3fe1 Mon Sep 17 00:00:00 2001 From: Wes Brewer Date: Wed, 13 Aug 2025 16:33:14 -0400 Subject: [PATCH 218/388] Add/fix docstrings to RAPS main drivers. Add -net to lassen example in README.md --- README.md | 2 +- main.py | 9 ++++++++- multi-part-sim-mpi.py | 8 ++++++++ multi-part-sim.py | 9 +++++++++ 4 files changed, 26 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 4c82072..304f6f6 100644 --- a/README.md +++ b/README.md @@ -83,7 +83,7 @@ For Lumi Lassen is one of the few datasets that has networking data. See `raps/dataloaders/lassen.py` for how to get the datasets. To run a network simulation, use the following command: - python main.py -f ~/data/lassen/Lassen-Supercomputer-Job-Dataset --system lassen --policy fcfs --backfill firstfit -ff 365d -t 12h --arrival poisson + python main.py -f ~/data/lassen/Lassen-Supercomputer-Job-Dataset --system lassen --policy fcfs --backfill firstfit -ff 365d -t 12h --arrival poisson -net ## Snapshot of extracted workload data diff --git a/main.py b/main.py index 7345828..8c46c8b 100644 --- a/main.py +++ b/main.py @@ -1,4 +1,11 @@ -""" Shortest-job first (SJF) job schedule simulator """ +""" +Main driver for simulating the RAPS single-partition (homogeneous) +system in the ExaDigiT digital twin. Supports synthetic workload +generation or telemetry replay, dynamic power modeling (including +conversion losses), and optional coupling to a thermo-fluids cooling +model. Produces performance, utilization, and energy metrics, with +optional plots and output files for analysis and validation. +""" import json import numpy as np diff --git a/multi-part-sim-mpi.py b/multi-part-sim-mpi.py index 7929680..e183a7e 100644 --- a/multi-part-sim-mpi.py +++ b/multi-part-sim-mpi.py @@ -1,3 +1,11 @@ +""" +MPI-enabled driver for simulating multi-partition RAPS systems. +Distributes partitions across ranks with mpi4py for parallel run. +Supports telemetry replay or synthetic workloads with per-rank +power, FLOPS, and scheduling models. Outputs debug and summary +stats for heterogeneous systems (e.g., LUMI, Setonix, Adastra). +""" + from raps.helpers import check_python_version check_python_version() diff --git a/multi-part-sim.py b/multi-part-sim.py index 06e1e1c..bd9ce3e 100644 --- a/multi-part-sim.py +++ b/multi-part-sim.py @@ -1,3 +1,12 @@ +""" +Main driver for simulating multi-partition (heterogeneous) systems in the RAPS +module of ExaDigiT. Supports replaying telemetry or generating synthetic +workloads across CPU-only, GPU, and mixed partitions. Initializes per- +partition power, FLOPS, and scheduling models, then advances simulations in +lockstep. Outputs per-partition performance, utilization, and energy +statistics for systems such as MIT Supercloud, Setonix, Adastra, and LUMI. +""" + from raps.helpers import check_python_version check_python_version() -- GitLab From 042958eaeb00c593abe62b20b0600b4fde9e5cc2 Mon Sep 17 00:00:00 2001 From: Wes Brewer Date: Wed, 13 Aug 2025 17:12:22 -0400 Subject: [PATCH 219/388] Add feature: yaml experiment files --- experiments/frontier.yaml | 4 + experiments/gcloudv2.yaml | 4 + experiments/lassen.yaml | 9 ++ experiments/marconi100.yaml | 3 + experiments/mit.yaml | 6 + pyproject.toml | 3 +- raps/args.py | 292 ++++++++++++++++++++++++++---------- 7 files changed, 242 insertions(+), 79 deletions(-) create mode 100644 experiments/frontier.yaml create mode 100644 experiments/gcloudv2.yaml create mode 100644 experiments/lassen.yaml create mode 100644 experiments/marconi100.yaml create mode 100644 experiments/mit.yaml diff --git a/experiments/frontier.yaml b/experiments/frontier.yaml new file mode 100644 index 0000000..26ce0b4 --- /dev/null +++ b/experiments/frontier.yaml @@ -0,0 +1,4 @@ +system: frontier +replay: + - ~/data/frontier-sample-2024-01-18/slurm/joblive/date=2024-01-18 + - ~/data/frontier-sample-2024-01-18/jobprofile/date=2024-01-18 diff --git a/experiments/gcloudv2.yaml b/experiments/gcloudv2.yaml new file mode 100644 index 0000000..85a1d6c --- /dev/null +++ b/experiments/gcloudv2.yaml @@ -0,0 +1,4 @@ +system: gcloudv2 +replay: + - ~/data/gcloud/v2/google_cluster_data_2011_sample +ff: 600 diff --git a/experiments/lassen.yaml b/experiments/lassen.yaml new file mode 100644 index 0000000..5434a1b --- /dev/null +++ b/experiments/lassen.yaml @@ -0,0 +1,9 @@ +system: lassen +replay: + - ~/data/lassen/Lassen-Supercomputer-Job-Dataset +policy: fcfs +backfill: firstfit +fastforward: 365d +time: 12h +arrival: poisson +simulate_network: true diff --git a/experiments/marconi100.yaml b/experiments/marconi100.yaml new file mode 100644 index 0000000..0568157 --- /dev/null +++ b/experiments/marconi100.yaml @@ -0,0 +1,3 @@ +system: marconi100 +replay: + - ~/data/marconi100/job_table.parquet diff --git a/experiments/mit.yaml b/experiments/mit.yaml new file mode 100644 index 0000000..bc718e4 --- /dev/null +++ b/experiments/mit.yaml @@ -0,0 +1,6 @@ +system: mit_supercloud +partitions: ["mit_supercloud/part-cpu", "mit_supercloud/part-gpu"] +replay: + - ~/data/mit/202201 +start: 2021-05-21T13:00 +end: 2021-05-21T14:00 diff --git a/pyproject.toml b/pyproject.toml index fd7ddbd..2844ca8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -26,5 +26,6 @@ dependencies = [ "networkx>=3.5", "pytest", "pytest-order", - "pytest-xdist" + "pytest-xdist", + "pyyaml>=6.0.2" ] diff --git a/raps/args.py b/raps/args.py index 18da481..8d31bf5 100644 --- a/raps/args.py +++ b/raps/args.py @@ -1,115 +1,251 @@ -import argparse +import argparse, os, sys, yaml from raps.schedulers.default import PolicyType, BackfillType from raps.workload import add_workload_to_parser, check_workload_args from raps.utils import convert_to_seconds -parser = argparse.ArgumentParser(description='Resource Allocator & Power Simulator (RAPS)') -# System configurations -parser.add_argument('--system', type=str, default='frontier', help='System config to use') -parser.add_argument('-x', '--partitions', nargs='+', default=None, help='List of machine configurations to use, e.g., -x setonix-cpu setonix-gpu') -parser.add_argument('-c', '--cooling', action='store_true', help='Include FMU cooling model') -parser.add_argument('-net', '--simulate-network', default=False, action='store_true', help='Include Network model') - -parser.add_argument('--noui', default=False, action='store_true', help='Run without UI') +def load_config(path): + if path and os.path.exists(path): + with open(path, "r") as f: + return yaml.safe_load(f) or {} + return {} + + +def check_partitions(config): + if len(config.get("partitions", [])) > 1 and \ + os.path.basename(sys.argv[0]) == "main.py": + sys.exit("Error: Use multi-part-sim.py for multi-partition runs.") + + +def _expand_path(p): + if isinstance(p, str): + # expand ~ and $VARS + return os.path.expanduser(os.path.expandvars(p)) + return p + + +def apply_config_to_args(cfg, args): + # Merge supported sections or top-level keys + merged = {} + for k, v in (cfg or {}).items(): + if isinstance(v, dict) and k in { + "shared", "simulate", "telemetry", "scheduler", "output" + }: + merged.update(v) + else: + merged[k] = v + + # Apply to argparse namespace + for k, v in merged.items(): + setattr(args, k, v) + + # Coerce certain keys to lists if YAML provided strings + list_keys = { + "cluster_var", "output_vars", "input_vars", "partitions", "plot" + } + for key in list_keys: + if hasattr(args, key): + val = getattr(args, key) + if isinstance(val, str): + setattr(args, key, [val]) + + # Expand paths (tilde + env vars) + for key in ("path", "output_dir", "plot_dir", "config_file"): + if hasattr(args, key): + setattr(args, key, _expand_path(getattr(args, key))) + + # Normalize enums if provided as strings in YAML + if getattr(args, "policy", None): + try: + # Accept exact values or case-insensitive + val = str(args.policy) + opts = {p.value.lower(): p.value for p in PolicyType} + if val.lower() in opts: + args.policy = opts[val.lower()] + except Exception: + pass + + if getattr(args, "backfill", None): + try: + val = str(args.backfill) + opts = {b.value.lower(): b.value for b in BackfillType} + if val.lower() in opts: + args.backfill = opts[val.lower()] + except Exception: + pass + + +parser = argparse.ArgumentParser( + description="Resource Allocator & Power Simulator (RAPS)", + formatter_class=argparse.ArgumentDefaultsHelpFormatter, +) +parser.add_argument( + "config_file", nargs="?", default=None, + help="YAML config file; overrides defaults/flags." +) +# System configurations +parser.add_argument("--system", type=str, default="frontier", + help="System config to use") +parser.add_argument( + "-x", "--partitions", nargs="+", default=None, + help="List of machine configurations, e.g., -x setonix-cpu setonix-gpu" +) +parser.add_argument("-c", "--cooling", action="store_true", + help="Include FMU cooling model") +parser.add_argument("-net", "--simulate-network", default=False, + action="store_true", help="Include Network model") +parser.add_argument("--noui", default=False, action="store_true", + help="Run without UI") # Simulation runtime options -parser.add_argument('-ff', '--fastforward', type=str, default=None, help='Fast-forward by time amount (uses same units as -t)') -parser.add_argument('-t', '--time', type=str, default=None, help='Length of time to simulate, e.g., 123, 123s, 27m, 3h, 7d') -#parser.add_argument("--time-delta", type=str, default=None, help='Time delta for simulation steps, e.g. 15, 15s 1m, 1h, 3d. (Default unit in seconds. If not set "TRACE_QUANTA" is used.)') # This seems sensible, but 1s is the previous default before introducing this change! -parser.add_argument("--time-delta", type=str, default="1s", help='Time delta for simulation steps, e.g. 15, 15s 1m, 1h, 3d, 1ms. (Default unit in seconds. Default value: 1s.)') -parser.add_argument('-d', '--debug', action='store_true', help='Enable debug mode and disable rich layout') -parser.add_argument('-n', '--numjobs', type=int, default=100, help='Number of jobs to schedule') -parser.add_argument('-v', '--verbose', action='store_true', help='Enable verbose output') -parser.add_argument('--start', type=str, default='2021-05-21T13:00', help='ISO8061 string for start of simulation') -parser.add_argument('--end', type=str, default='2021-05-21T14:00', help='ISO8061 string for end of simulation') -parser.add_argument('--seed', action='store_true', help='Set random number seed for deterministic simulation') -parser.add_argument('-u', '--uncertainties', action='store_true', - help='Change from floating point units to floating point units with uncertainties.' + \ - ' Very expensive w.r.t simulation time!') - -# User Interface options -choices = ['layout1', 'layout2'] -parser.add_argument('--layout', type=str, choices=choices, default=choices[0], help='Layout of UI') - - -# Output options -parser.add_argument('-o', '--output', action='store_true', help='Output power, cooling, and loss models for later analysis') -parser.add_argument('-p', '--plot', nargs='+', choices=['power', 'loss', 'pue', 'temp', 'util'], - help='Specify one or more types of plots to generate: power, loss, pue, util, temp') -choices = ['png', 'svg', 'jpg', 'pdf', 'eps'] -parser.add_argument('--imtype', type=str, choices=choices, default=choices[0], help='Plot image type') - -# Telemetry data -parser.add_argument('-f', '--replay', nargs='+', type=str, help='Either: path/to/joblive path/to/jobprofile' + \ - ' -or- filename.npz (overrides --workload option)') -parser.add_argument('-e', '--encrypt', action='store_true', help='Encrypt any sensitive data in telemetry') -parser.add_argument('--validate', action='store_true', help='Use node power instead of CPU/GPU utilizations') -parser.add_argument('--jid', type=str, default='*', help='Replay job id') -parser.add_argument('--scale', type=int, default=0, help='Scale telemetry to max nodes specified in order to run telemetry on a smaller smaller target system/partition, e.g., --scale 192') +parser.add_argument("-ff", "--fastforward", type=str, default=None, + help="Fast-forward by time amount (uses same units as -t)") +parser.add_argument("-t", "--time", type=str, default=None, + help="Length of time to simulate, e.g., 123, 27m, 3h, 7d") +parser.add_argument("--time-delta", type=str, default="1s", + help="Step size, e.g., 15s, 1m, 1h, 1ms (default: 1s)") +parser.add_argument("-d", "--debug", action="store_true", + help="Enable debug mode and disable rich layout") +parser.add_argument("-n", "--numjobs", type=int, default=100, + help="Number of jobs to schedule") +parser.add_argument("-v", "--verbose", action="store_true", + help="Enable verbose output") +parser.add_argument("--start", type=str, default="2021-05-21T13:00", + help="ISO8061 start of simulation") +parser.add_argument("--end", type=str, default="2021-05-21T14:00", + help="ISO8061 end of simulation") +parser.add_argument("--seed", action="store_true", + help="Set RNG seed for deterministic simulation") +parser.add_argument( + "-u", "--uncertainties", action="store_true", + help=("Use float-with-uncertainties (much slower).") +) + +# UI +ui_layout_choices = ["layout1", "layout2"] +parser.add_argument("--layout", type=str, choices=ui_layout_choices, + default=ui_layout_choices[0], help="UI layout") + +# Output +parser.add_argument("-o", "--output", action="store_true", + help="Write power/cooling/loss outputs for analysis") +plot_choices = ["power", "loss", "pue", "temp", "util"] +parser.add_argument("-p", "--plot", nargs="+", choices=plot_choices, + help="Plots to generate") +img_choices = ["png", "svg", "jpg", "pdf", "eps"] +parser.add_argument("--imtype", type=str, choices=img_choices, + default=img_choices[0], help="Plot image type") + +# Telemetry +parser.add_argument( + "-f", "--replay", nargs="+", type=str, + help=("Either: path/to/joblive path/to/jobprofile OR filename.npz " + "(overrides --workload)") +) +parser.add_argument("-e", "--encrypt", action="store_true", + help="Encrypt sensitive data in telemetry") +parser.add_argument("--validate", action="store_true", + help="Use node power instead of CPU/GPU utilizations") +parser.add_argument("--jid", type=str, default="*", + help="Replay job id") +parser.add_argument("--scale", type=int, default=0, + help=("Scale telemetry to a smaller target system, " + "e.g., --scale 192")) # Synthetic workloads parser = add_workload_to_parser(parser) -#choices = ['random', 'benchmark', 'peak', 'idle','synthetic'] -#parser.add_argument('-w', '--workload', type=str, choices=choices, default=choices[0], help='Type of synthetic workload') - -# Scheduling options -choices = ['default', 'scheduleflow', 'nrel', 'anl', 'flux', 'experimental', 'multitenant'] -parser.add_argument('--scheduler', type=str, choices=choices, default=choices[0], help='Name of scheduler') -choices = [policy.value for policy in PolicyType] -parser.add_argument('--policy', type=str, default=None, help='Schedule policy to use, e.g.:' + str(choices) + " or extended policies") -choices = [policy.value for policy in BackfillType] -parser.add_argument('--backfill', type=str, choices=choices, default=None, help='Backfill Policy') -# Redistribution of job arrival -choices = ['prescribed', 'poisson'] -parser.add_argument('--arrival', default=choices[0], type=str, choices=choices, help=f'Modify arrival distribution ({choices[1]}) or use the original submit times ({choices[0]})') -parser.add_argument('--job-arrival-time', type=int, help='Modify job arrival for poisson distribution (in seconds). Overrides config/*/scheduler.json value.') # no defaults as this overrides system config files -parser.add_argument('--job-arrival-rate', type=float, help='Modify arrival rate of poisson distribution (default 1)') # no defaults as this overrides system config files - - -# Account options -parser.add_argument('--accounts', action='store_true', help='Flag indicating if accounts should be tracked') -parser.add_argument('--accounts-json', type=str, help='Json of account stats generated in previous run. see raps/accounts.py') +# Scheduling +sched_choices = ["default", "scheduleflow", "nrel", "anl", "flux", + "experimental", "multitenant"] +parser.add_argument("--scheduler", type=str, choices=sched_choices, + default=sched_choices[0], help="Scheduler name") +parser.add_argument("--policy", type=str, default=None, + help=f"Schedule policy: {[p.value for p in PolicyType]}") +parser.add_argument("--backfill", type=str, default=None, + help=f"Backfill policy: {[b.value for b in BackfillType]}") + +# Arrival +arr_choices = ["prescribed", "poisson"] +parser.add_argument("--arrival", default=arr_choices[0], type=str, + choices=arr_choices, + help=("Modify arrival distribution (poisson) or use " + "original submit times (prescribed)")) +parser.add_argument("--job-arrival-time", type=int, + help=("Poisson arrival (seconds). Overrides " + "config/*/scheduler.json")) +parser.add_argument("--job-arrival-rate", type=float, + help="Modify Poisson rate (default 1)") + +# Accounts +parser.add_argument("--accounts", action="store_true", + help="Track accounts") +parser.add_argument("--accounts-json", type=str, + help="Accounts JSON from previous run") def post_process_args(args): if args.time_delta: - time_delta_raw, time_delta_downscale_raw = convert_to_seconds(args.time_delta) + tdelta_raw, tdelta_down = convert_to_seconds(args.time_delta) else: - time_delta_raw, time_delta_downscale_raw = None, 1 + tdelta_raw, tdelta_down = None, 1 if args.time: - time_raw, time_downscale_raw = convert_to_seconds(args.time) + time_raw, time_down = convert_to_seconds(args.time) else: - time_raw, time_downscale_raw = None, 1 + time_raw, time_down = None, 1 if args.fastforward: - ff_raw, ff_downscale_raw = convert_to_seconds(args.fastforward) + ff_raw, ff_down = convert_to_seconds(args.fastforward) else: - ff_raw, ff_downscale_raw = None, 1 + ff_raw, ff_down = None, 1 - max_downscale = max(time_delta_downscale_raw, time_downscale_raw, ff_downscale_raw) - args.downscale = max_downscale + max_down = max(tdelta_down, time_down, ff_down) + args.downscale = max_down if args.time_delta: - args.time_delta = int((time_delta_raw / time_delta_downscale_raw) * max_downscale) + args.time_delta = int((tdelta_raw / tdelta_down) * max_down) if args.time: - args.time = int((time_raw / time_downscale_raw) * max_downscale) + args.time = int((time_raw / time_down) * max_down) if args.fastforward: - args.fastforward = int((ff_raw / ff_downscale_raw) * max_downscale) + args.fastforward = int((ff_raw / ff_down) * max_down) return args -# ### At the end get args and an args_dict. import this if needed. +# ---- Parse + YAML merge ---- args = parser.parse_args() -# Do conversions and checks here if needed + +# Config file existence check +if args.config_file and not os.path.isfile(args.config_file): + print(f"Error: '{args.config_file}' not found.", file=sys.stderr) + sys.exit(1) + +cfg = load_config(args.config_file) + +# If launching multi-partition case with main.py error out +check_partitions(cfg) + +apply_config_to_args(cfg, args) + +# Optional: format fileprefix after config merge (if provided by workload parser) +if hasattr(args, "fileprefix") and isinstance(args.fileprefix, str): + try: + args.fileprefix = args.fileprefix.format(**vars(args)) + except KeyError as e: + print(f"Warning: missing placeholder {e} in fileprefix; skipping.") + +# Prefer replay if both replay and workload got set +if getattr(args, "replay", None) and getattr(args, "workload", None): + print("Info: --replay provided; ignoring --workload.", file=sys.stderr) + +# Validate workload args before time conversions check_workload_args(args) + +# Convert time-like args and compute downscale args = post_process_args(args) -# generate the dictionary + +# Expose dict form args_dict = vars(args) -# #Now import args and args_dict directly if needed.: -# from args import args,args_dict -- GitLab From 1f82af8c80e623114c1c70770c8422c1cfa4d8dc Mon Sep 17 00:00:00 2001 From: Wes Brewer Date: Wed, 13 Aug 2025 17:43:14 -0400 Subject: [PATCH 220/388] Add some validation checks into args.py --- raps/args.py | 41 ++++++++++++++++++++++++++++++----------- 1 file changed, 30 insertions(+), 11 deletions(-) diff --git a/raps/args.py b/raps/args.py index 8d31bf5..942b54d 100644 --- a/raps/args.py +++ b/raps/args.py @@ -12,12 +12,6 @@ def load_config(path): return {} -def check_partitions(config): - if len(config.get("partitions", [])) > 1 and \ - os.path.basename(sys.argv[0]) == "main.py": - sys.exit("Error: Use multi-part-sim.py for multi-partition runs.") - - def _expand_path(p): if isinstance(p, str): # expand ~ and $VARS @@ -113,9 +107,9 @@ parser.add_argument("-n", "--numjobs", type=int, default=100, parser.add_argument("-v", "--verbose", action="store_true", help="Enable verbose output") parser.add_argument("--start", type=str, default="2021-05-21T13:00", - help="ISO8061 start of simulation") + help="ISO8601 start of simulation") parser.add_argument("--end", type=str, default="2021-05-21T14:00", - help="ISO8061 end of simulation") + help="ISO8601 end of simulation") parser.add_argument("--seed", action="store_true", help="Set RNG seed for deterministic simulation") parser.add_argument( @@ -225,9 +219,6 @@ if args.config_file and not os.path.isfile(args.config_file): cfg = load_config(args.config_file) -# If launching multi-partition case with main.py error out -check_partitions(cfg) - apply_config_to_args(cfg, args) # Optional: format fileprefix after config merge (if provided by workload parser) @@ -237,9 +228,37 @@ if hasattr(args, "fileprefix") and isinstance(args.fileprefix, str): except KeyError as e: print(f"Warning: missing placeholder {e} in fileprefix; skipping.") +# Expand paths inside list fields (e.g., replay) +if getattr(args, "replay", None): + if isinstance(args.replay, str): + args.replay = [args.replay] + args.replay = [_expand_path(p) for p in args.replay] + # Prefer replay if both replay and workload got set if getattr(args, "replay", None) and getattr(args, "workload", None): print("Info: --replay provided; ignoring --workload.", file=sys.stderr) + print("Info: --replay provided; ignoring --workload.", file=sys.stderr) + args.workload = None + +# Enforce valid policy/backfill values (after normalization in apply_config_to_args) +if getattr(args, "policy", None): + _valid_policies = {p.value for p in PolicyType} + if args.policy not in _valid_policies: + sys.exit(f"Error: Unknown policy '{args.policy}'. " + f"Valid: {sorted(_valid_policies)}") +if getattr(args, "backfill", None): + _valid_backfills = {b.value for b in BackfillType} + if args.backfill not in _valid_backfills: + sys.exit(f"Error: Unknown backfill '{args.backfill}'. " + f"Valid: {sorted(_valid_backfills)}") + +# Multi-partition guard for single-part driver (check merged args incl. CLI) +if os.path.basename(sys.argv[0]) == "main.py": + _parts = args.partitions or [] + if isinstance(_parts, str): + _parts = [_parts] + if len(_parts) > 1: + sys.exit("Error: Use multi-part-sim.py for multi-partition runs.") # Validate workload args before time conversions check_workload_args(args) -- GitLab From e50afd3712e7e9214bccad8b46f851bbf26541f8 Mon Sep 17 00:00:00 2001 From: Wes Brewer Date: Wed, 13 Aug 2025 17:43:47 -0400 Subject: [PATCH 221/388] Update gcloudv2.py to use Job objects intead of job dicts --- raps/dataloaders/gcloudv2.py | 29 ++++++++++++++++++++--------- 1 file changed, 20 insertions(+), 9 deletions(-) diff --git a/raps/dataloaders/gcloudv2.py b/raps/dataloaders/gcloudv2.py index f0900a3..61f544e 100644 --- a/raps/dataloaders/gcloudv2.py +++ b/raps/dataloaders/gcloudv2.py @@ -6,7 +6,8 @@ from typing import List, Optional, Generator, Tuple, Any, Union import numpy as np import pandas as pd -from raps.job import job_dict # ensure RAPS is in PYTHONPATH +from raps.job import job_dict +from raps.job import Job """ Official instructions are here: @@ -198,6 +199,7 @@ class GoogleClusterV2DataLoader: def load_data(data_path: Union[str, List[str]], **kwargs: Any) -> Tuple[List[Any], float, float]: + config = kwargs.get('config') # Unpack list if isinstance(data_path, list): if len(data_path)==1: @@ -256,6 +258,9 @@ def load_data(data_path: Union[str, List[str]], **kwargs: Any) -> Tuple[List[Any t0 = df["timestamp"].min() t1 = df["timestamp"] - t0 + # Get trace quanta + trace_quanta = config['TRACE_QUANTA'] + # Load task usage usage_loader = GoogleClusterV2DataLoader(base_path, event_type="task_usage", concatenate=True) usage_df = next(iter(usage_loader)) @@ -294,15 +299,20 @@ def load_data(data_path: Union[str, List[str]], **kwargs: Any) -> Tuple[List[Any if jid_f!='*' and str(jid)!=str(jid_f): continue trace = usage_map[jid] - # ensure gpu_trace is same length - gpu_trace = np.zeros_like(trace) - job = job_dict( - nodes_required=nodes_required, + # ensure gpu_trace is same length as cpu_trace + gpu_trace = np.zeros_like(trace, dtype=float) + + # nodes_required should be a positive int + nr = int(nodes_required_map.get(jid, 1)) + if nr < 1: + nr = 1 + + job_d = job_dict( + nodes_required=nr, name=f"job_{jid}", account=f"user_{row.get('user_name','unknown')}", cpu_trace=trace, - #gpu_trace=gpu_trace, - gpu_trace=0, + gpu_trace=gpu_trace, nrx_trace=[], ntx_trace=[], end_state="UNKNOWN", scheduled_nodes=[], id=jid, priority=int(row.get('scheduling_class',0)), @@ -310,10 +320,11 @@ def load_data(data_path: Union[str, List[str]], **kwargs: Any) -> Tuple[List[Any submit_time=start, time_limit=0, start_time=start, end_time=end, wall_time=wall, trace_time=row["timestamp"], - trace_start_time=start, trace_end_time=end + trace_start_time=start, trace_end_time=end, trace_quanta=trace_quanta ) + # Wrap dict in a real Job so telemetry.save_snapshot() can use __dict__ #if nodes_required > 0: - jobs.append(job) + jobs.append(Job(job_d)) # Compute simulation span: start at t=0, end at the latest job finish simulation_start = 0 -- GitLab From 27c4d6c6fe02296922225fac6f20614df149f151 Mon Sep 17 00:00:00 2001 From: Matthias Maiterth Date: Wed, 13 Aug 2025 17:55:28 -0400 Subject: [PATCH 222/388] Added -o argument and added them to the test to cleanup after testing. --- main.py | 4 ++-- pytest.ini | 2 +- raps/args.py | 8 ++++++-- raps/telemetry.py | 20 +++++++++++++------ raps/workload.py | 2 +- tests/conftest.py | 6 ++++++ tests/systems/test_main_basic_run.py | 12 +++++++++-- tests/systems/test_main_cooling_run.py | 12 +++++++++-- .../test_main_cooling_uncertainty_run.py | 12 +++++++++-- tests/systems/test_main_fastforward_run.py | 12 +++++++++-- tests/systems/test_main_network_run.py | 15 +++++++++++--- .../systems/test_main_network_withdata_run.py | 15 +++++++++++--- tests/systems/test_main_noui_run.py | 12 +++++++++-- tests/systems/test_main_time_delta_run.py | 12 +++++++++-- .../test_main_time_delta_sub_second_run.py | 11 ++++++++-- tests/systems/test_main_time_ff_delta_run.py | 13 ++++++++++-- tests/systems/test_main_time_run.py | 12 +++++++++-- tests/systems/test_main_uncertainty_run.py | 12 +++++++++-- tests/systems/test_main_withdata_run.py | 10 +++++++++- .../test_multi_part_sim_network_run.py | 6 +++++- tests/systems/test_telemetry_withdata_run.py | 10 +++++++++- 21 files changed, 177 insertions(+), 41 deletions(-) diff --git a/main.py b/main.py index 8c46c8b..f8de19e 100644 --- a/main.py +++ b/main.py @@ -142,7 +142,7 @@ def main(): accounts = job_accounts sc.accounts = accounts - if args.plot or args.output: + if args.plot or args.output is not None: try: os.makedirs(OPATH) except OSError as error: @@ -228,7 +228,7 @@ def main(): else: print('Cooling model not enabled... skipping output of plot') - if args.output: + if args.output is not None: if args.uncertainties: # Parquet cannot handle annotated ufloat format AFAIK diff --git a/pytest.ini b/pytest.ini index ac546d7..f0e2827 100644 --- a/pytest.ini +++ b/pytest.ini @@ -18,7 +18,7 @@ markers = fastforward: fastforward argument test time_delta: time delta argument test time_delta_sub_second: sub second time delta argument test - net: network model test + network: network model test 40frontiers: System test adastraMI250: System test diff --git a/raps/args.py b/raps/args.py index 942b54d..631fcba 100644 --- a/raps/args.py +++ b/raps/args.py @@ -123,8 +123,12 @@ parser.add_argument("--layout", type=str, choices=ui_layout_choices, default=ui_layout_choices[0], help="UI layout") # Output -parser.add_argument("-o", "--output", action="store_true", - help="Write power/cooling/loss outputs for analysis") +parser.add_argument('-o', '--output', type=str, nargs="?", + const="", # Used if -o is given without a value + default=None, # Used if -o is not provided at all + help=("Output power, cooling, and loss models for later ", + "analysis. Argumment specifies name."), + ) plot_choices = ["power", "loss", "pue", "temp", "util"] parser.add_argument("-p", "--plot", nargs="+", choices=plot_choices, help="Plots to generate") diff --git a/raps/telemetry.py b/raps/telemetry.py index d80c69e..12c1bcd 100644 --- a/raps/telemetry.py +++ b/raps/telemetry.py @@ -55,7 +55,11 @@ class Telemetry: self.kwargs = kwargs self.system = kwargs.get('system') self.config = kwargs.get('config') - self.dirname = create_casename() + outname = kwargs.get('output') + if outname is None or outname == "": + self.dirname = create_casename() + else: + self.dirname = outname try: self.dataloader = importlib.import_module(f"raps.dataloaders.{self.system}", package=__package__) except: @@ -250,8 +254,8 @@ class Telemetry: extracted_date = matched_date.group(0) self.dirname = "sim=" + extracted_date else: - extracted_date = "Date not found" - self.dirname = create_casename() + extracted_date = f"Date not found, dirname is: {self.dirname}" + print(extracted_date) print(*args.replay) try: @@ -354,9 +358,13 @@ def run_telemetry(): # combine into total per‐job traffic net_means = [tx + rx for tx, rx in zip(ntx_means, nrx_means)] plot_network_histogram(ax=ax,data=net_means) - if args.output: - plt.savefig(f'{args.output}') - print(f"Saved to: {args.output}") + if args.output is not None: + if args.output == "": + filename = f"{td.dirname}.svg" + else: + filename = args.output + plt.savefig(f'{filename}') + print(f"Saved to: {filename}") else: plt.show() diff --git a/raps/workload.py b/raps/workload.py index baa13f4..1c1e7c0 100644 --- a/raps/workload.py +++ b/raps/workload.py @@ -210,7 +210,7 @@ class Workload: print(args) total_jobs = args.numjobs orig_job_size_distribution = args.jobsize_distribution - orig_wall_time_distribution = args.jobsize_distribution + orig_wall_time_distribution = args.walltime_distribution orig_cpuutil_distribution = args.cpuutil_distribution orig_gpuutil_distribution = args.gpuutil_distribution jobs = [] diff --git a/tests/conftest.py b/tests/conftest.py index 5084620..8f05879 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,4 +1,5 @@ import pytest +import uuid def pytest_addoption(parser): @@ -12,3 +13,8 @@ def pytest_runtest_setup(item): #reason = f"Skipping {item.nodeid} because it requires --runlong" reason = "Skipping test because it requires --runlong" pytest.skip(reason) + + +@pytest.fixture +def random_id(): + return f"test-{str(uuid.uuid4())[:8]}" diff --git a/tests/systems/test_main_basic_run.py b/tests/systems/test_main_basic_run.py index c3f919a..8993949 100644 --- a/tests/systems/test_main_basic_run.py +++ b/tests/systems/test_main_basic_run.py @@ -11,7 +11,7 @@ pytestmark = [ ] -def test_main_run(system, system_config): +def test_main_run(system, system_config,random_id): if not system_config.get("main", False): pytest.skip(f"{system} does not support basic main run.") @@ -19,8 +19,16 @@ def test_main_run(system, system_config): result = subprocess.run([ "python", "main.py", "--time", "1m", - "--system", system + "--system", system, + "-o", random_id ], capture_output=True, text=True, stdin=subprocess.DEVNULL) assert result.returncode == 0, f"Failed on {system}: {result.stderr}" + + subprocess.run( + f"rm {random_id}.npz && rm -fr simulation_results/{random_id}", + shell=True, + check=True + ) + del result gc.collect() diff --git a/tests/systems/test_main_cooling_run.py b/tests/systems/test_main_cooling_run.py index 6932d9c..79c10b7 100644 --- a/tests/systems/test_main_cooling_run.py +++ b/tests/systems/test_main_cooling_run.py @@ -12,7 +12,7 @@ pytestmark = [ ] -def test_main_run(system, system_config): +def test_main_run(system, system_config, random_id): if not system_config.get("cooling", False): pytest.skip(f"{system} does not support cooling.") @@ -22,8 +22,16 @@ def test_main_run(system, system_config): "--time", "1h", "--system", system, "-c", - "--noui" + "--noui", + "-o", random_id ], capture_output=True, text=True, stdin=subprocess.DEVNULL) assert result.returncode == 0, f"Failed on {system}: {result.stderr}" + + subprocess.run( + f"rm {random_id}.npz && rm -fr simulation_results/{random_id}", + shell=True, + check=True + ) + del result gc.collect() diff --git a/tests/systems/test_main_cooling_uncertainty_run.py b/tests/systems/test_main_cooling_uncertainty_run.py index 507e15d..2515325 100644 --- a/tests/systems/test_main_cooling_uncertainty_run.py +++ b/tests/systems/test_main_cooling_uncertainty_run.py @@ -12,7 +12,7 @@ pytestmark = [ ] -def test_main_run(request, system, system_config): +def test_main_run(request, system, system_config, random_id): print(f"Markexpr: {request.config.option.markexpr}") if not system_config.get("uncertainty", False) or not system_config.get("cooling", False): pytest.skip(f"{system} does not support cooling or uncertainty.") @@ -24,8 +24,16 @@ def test_main_run(request, system, system_config): "--system", system, "-c", "-u", - "--noui" + "--noui", + "-o", random_id ], capture_output=True, text=True, stdin=subprocess.DEVNULL) assert result.returncode == 0, f"Failed on {system}: {result.stderr}" + + subprocess.run( + f"rm {random_id}.npz && rm -fr simulation_results/{random_id}", + shell=True, + check=True + ) + del result gc.collect() diff --git a/tests/systems/test_main_fastforward_run.py b/tests/systems/test_main_fastforward_run.py index 88657b8..4b0584b 100644 --- a/tests/systems/test_main_fastforward_run.py +++ b/tests/systems/test_main_fastforward_run.py @@ -18,7 +18,7 @@ pytestmark = [ "0m", "1m", "60m", "0h", "1h", "6h", ]) -def test_main_fastforward_run(system, system_config, ff_arg): +def test_main_fastforward_run(system, system_config, ff_arg, random_id): if not system_config.get("fastforward", False): pytest.skip(f"{system} does not support basic main run.") @@ -29,8 +29,16 @@ def test_main_fastforward_run(system, system_config, ff_arg): "--fastforward", ff_arg, "--system", system, #--"-f", system_file, - "--noui" + "--noui", + "-o", random_id ], capture_output=True, text=True, stdin=subprocess.DEVNULL) assert result.returncode == 0, f"Failed on {system}: {result.stderr}" + + subprocess.run( + f"rm {random_id}.npz && rm -fr simulation_results/{random_id}", + shell=True, + check=True + ) + del result gc.collect() diff --git a/tests/systems/test_main_network_run.py b/tests/systems/test_main_network_run.py index ce7d467..f40cc8f 100644 --- a/tests/systems/test_main_network_run.py +++ b/tests/systems/test_main_network_run.py @@ -7,11 +7,12 @@ from tests.util import PROJECT_ROOT pytestmark = [ pytest.mark.system, - pytest.mark.nodata + pytest.mark.nodata, + pytest.mark.network ] -def test_main_network_run(system, system_config): +def test_main_network_run(system, system_config, random_id): if not system_config.get("main", False): pytest.skip(f"{system} does not support basic main run.") @@ -23,8 +24,16 @@ def test_main_network_run(system, system_config): "python", "main.py", "--time", "1m", "--system", system, - "-net" + "-net", + "-o", random_id ], capture_output=True, text=True, stdin=subprocess.DEVNULL) assert result.returncode == 0, f"Failed on {system}: {result.stderr}" + + subprocess.run( + f"rm {random_id}.npz && rm -fr simulation_results/{random_id}", + shell=True, + check=True + ) + del result gc.collect() diff --git a/tests/systems/test_main_network_withdata_run.py b/tests/systems/test_main_network_withdata_run.py index e481d38..c478e21 100644 --- a/tests/systems/test_main_network_withdata_run.py +++ b/tests/systems/test_main_network_withdata_run.py @@ -9,11 +9,12 @@ pytestmark = [ pytest.mark.system, pytest.mark.nodata, pytest.mark.withdata, - pytest.mark.long + pytest.mark.long, + pytest.mark.network ] -def test_main_run(system, system_config, system_file): +def test_main_run(system, system_config, system_file, random_id): if not system_config.get("net", False): pytest.skip(f"{system} does not support basic net run.") @@ -30,8 +31,16 @@ def test_main_run(system, system_config, system_file): "--time", "1m", "--system", system, "-f", *file_list, - "-net" + "-net", + "-o", random_id ], capture_output=True, text=True, stdin=subprocess.DEVNULL) assert result.returncode == 0, f"Failed on {system}: {result.stderr}" + + subprocess.run( + f"rm {random_id}.npz && rm -fr simulation_results/{random_id}", + shell=True, + check=True + ) + del result gc.collect() diff --git a/tests/systems/test_main_noui_run.py b/tests/systems/test_main_noui_run.py index 4549ac3..50ca5b0 100644 --- a/tests/systems/test_main_noui_run.py +++ b/tests/systems/test_main_noui_run.py @@ -11,7 +11,7 @@ pytestmark = [ ] -def test_main_run(system, system_config): +def test_main_run(system, system_config, random_id): if not system_config.get("main", False): pytest.skip(f"{system} does not support basic main run.") @@ -20,8 +20,16 @@ def test_main_run(system, system_config): "python", "main.py", "--time", "1m", "--system", system, - "--noui" + "--noui", + "-o", random_id ], capture_output=True, text=True, stdin=subprocess.DEVNULL) assert result.returncode == 0, f"Failed on {system}: {result.stderr}" + + subprocess.run( + f"rm {random_id}.npz && rm -fr simulation_results/{random_id}", + shell=True, + check=True + ) + del result gc.collect() diff --git a/tests/systems/test_main_time_delta_run.py b/tests/systems/test_main_time_delta_run.py index c1aee66..f86caa7 100644 --- a/tests/systems/test_main_time_delta_run.py +++ b/tests/systems/test_main_time_delta_run.py @@ -23,7 +23,7 @@ pytestmark = [ ("10h", "3h"), ("3d", "1d") ], ids=["1","1s","10s","1m","1h","3h","1d"]) -def test_main_time_delta_run(system, system_config, time_arg, tdelta_arg): +def test_main_time_delta_run(system, system_config, time_arg, tdelta_arg, random_id): if not system_config.get("time_delta", False): pytest.skip(f"{system} does not support time_delta run.") @@ -34,10 +34,18 @@ def test_main_time_delta_run(system, system_config, time_arg, tdelta_arg): "--time-delta", tdelta_arg, "--system", system, #--"-f", system_file, - "--noui" + "--noui", + "-o", random_id ], capture_output=True, text=True, stdin=subprocess.DEVNULL) assert result.returncode == 0, f"Failed on {system}: {result.stderr}" time, downscale = convert_to_seconds(time_arg) assert f"Time Simulated: {convert_seconds_to_hhmmss(time // downscale)}" in result.stdout + + subprocess.run( + f"rm {random_id}.npz && rm -fr simulation_results/{random_id}", + shell=True, + check=True + ) + del result gc.collect() diff --git a/tests/systems/test_main_time_delta_sub_second_run.py b/tests/systems/test_main_time_delta_sub_second_run.py index bcadd41..5542549 100644 --- a/tests/systems/test_main_time_delta_sub_second_run.py +++ b/tests/systems/test_main_time_delta_sub_second_run.py @@ -24,7 +24,7 @@ pytestmark = [ ("100ms", "1ms"), ("100ms", "1s"), ], ids=["1ds","3ds","1cs","1ms","1cs-for-10ds","1ms-for-10cs","1ms-for-100ms","1s-for-100ms"]) -def test_main_time_delta_run(system, system_config, time_arg, tdelta_arg): +def test_main_time_delta_run(system, system_config, time_arg, tdelta_arg, random_id): if not system_config.get("time_delta", False): pytest.skip(f"{system} does not support time_delta run.") @@ -35,7 +35,8 @@ def test_main_time_delta_run(system, system_config, time_arg, tdelta_arg): "--time-delta", tdelta_arg, "--system", system, #--"-f", system_file, - "--noui" + "--noui", + "-o", random_id ], capture_output=True, text=True, stdin=subprocess.DEVNULL) assert result.returncode == 0, f"Failed on {system}: {result.stderr}" time, downscale = convert_to_seconds(time_arg) @@ -43,5 +44,11 @@ def test_main_time_delta_run(system, system_config, time_arg, tdelta_arg): #assert f"Time Simulated: {convert_seconds_to_hhmmss(int((time / td_ds) * downscale))}" in result.stdout assert f"Time Simulated: {convert_seconds_to_hhmmss(time / downscale)}" in result.stdout + subprocess.run( + f"rm {random_id}.npz && rm -fr simulation_results/{random_id}", + shell=True, + check=True + ) + del result gc.collect() diff --git a/tests/systems/test_main_time_ff_delta_run.py b/tests/systems/test_main_time_ff_delta_run.py index 9301c70..3e46dda 100644 --- a/tests/systems/test_main_time_ff_delta_run.py +++ b/tests/systems/test_main_time_ff_delta_run.py @@ -21,7 +21,8 @@ pytestmark = [ ("10h", "3h", "1h"), pytest.param("3d", "1d", "1d", marks=pytest.mark.long, id="1d (long)"), ], ids=["1","1s","10s","1m","1h","3h","1d"]) -def test_main_time_delta_run(system, system_config, time_arg, tdelta_arg, ff_arg): +def test_main_time_delta_run(system, system_config, time_arg, tdelta_arg, + ff_arg, random_id): if not system_config.get("time_delta", False): pytest.skip(f"{system} does not support time_delta run.") @@ -33,8 +34,16 @@ def test_main_time_delta_run(system, system_config, time_arg, tdelta_arg, ff_arg "--time-delta", tdelta_arg, "--system", system, #--"-f", system_file, - "--noui" + "--noui", + "-o", random_id ], capture_output=True, text=True, stdin=subprocess.DEVNULL) assert result.returncode == 0, f"Failed on {system}: {result.stderr}" + + subprocess.run( + f"rm {random_id}.npz && rm -fr simulation_results/{random_id}", + shell=True, + check=True + ) + del result gc.collect() diff --git a/tests/systems/test_main_time_run.py b/tests/systems/test_main_time_run.py index b702fa4..e87e331 100644 --- a/tests/systems/test_main_time_run.py +++ b/tests/systems/test_main_time_run.py @@ -21,7 +21,7 @@ pytestmark = [ "0h", "1h", pytest.param("6h", marks=pytest.mark.long), # mark this one as long ]) -def test_main_time_run(system, system_config, time_args): +def test_main_time_run(system, system_config, time_args, random_id): if not system_config.get("main", False): pytest.skip(f"{system} does not support basic main run.") @@ -31,8 +31,16 @@ def test_main_time_run(system, system_config, time_args): "--time", time_args, "--system", system, #--"-f", system_file, - "--noui" + "--noui", + "-o", random_id ], capture_output=True, text=True, stdin=subprocess.DEVNULL) assert result.returncode == 0, f"Failed on {system}: {result.stderr}" + + subprocess.run( + f"rm {random_id}.npz && rm -fr simulation_results/{random_id}", + shell=True, + check=True + ) + del result gc.collect() diff --git a/tests/systems/test_main_uncertainty_run.py b/tests/systems/test_main_uncertainty_run.py index a02cc13..815a661 100644 --- a/tests/systems/test_main_uncertainty_run.py +++ b/tests/systems/test_main_uncertainty_run.py @@ -13,7 +13,7 @@ pytestmark = [ ] -def test_main_uncertainty_run(system, system_config): +def test_main_uncertainty_run(system, system_config, random_id): if not system_config.get("uncertainty", False): pytest.skip(f"{system} does not support uncertainty.") @@ -23,8 +23,16 @@ def test_main_uncertainty_run(system, system_config): "--time", "3m", "--system", system, "-u", - "--noui" + "--noui", + "-o", random_id ], capture_output=True, text=True, stdin=subprocess.DEVNULL) assert result.returncode == 0, f"Failed on {system}: {result.stderr}" + + subprocess.run( + f"rm {random_id}.npz && rm -fr simulation_results/{random_id}", + shell=True, + check=True + ) + del result gc.collect() diff --git a/tests/systems/test_main_withdata_run.py b/tests/systems/test_main_withdata_run.py index ffaf501..f2f5f7c 100644 --- a/tests/systems/test_main_withdata_run.py +++ b/tests/systems/test_main_withdata_run.py @@ -12,7 +12,7 @@ pytestmark = [ ] -def test_main_withdata_run(system, system_config, system_file): +def test_main_withdata_run(system, system_config, system_file, random_id): if not system_config.get("main", False): pytest.skip(f"{system} does not support basic main even without data.") if not system_config.get("withdata", False): @@ -29,7 +29,15 @@ def test_main_withdata_run(system, system_config, system_file): "--time", "1m", "--system", system, "-f", *file_list, + "-o", random_id ], capture_output=True, text=True, stdin=subprocess.DEVNULL) assert result.returncode == 0, f"Failed on {system}: {result.stderr}" + + subprocess.run( + f"rm {random_id}.npz && rm -fr simulation_results/{random_id}", + shell=True, + check=True + ) + del result gc.collect() diff --git a/tests/systems/test_multi_part_sim_network_run.py b/tests/systems/test_multi_part_sim_network_run.py index 3a19dc8..643b97a 100644 --- a/tests/systems/test_multi_part_sim_network_run.py +++ b/tests/systems/test_multi_part_sim_network_run.py @@ -11,7 +11,7 @@ pytestmark = [ ] -def test_multi_part_sim_run(system, system_config): +def test_multi_part_sim_run(system, system_config, random_id): if not system_config.get("multi-part-sim", False): pytest.skip(f"{system} does not support basic multi-part-sim run.") @@ -29,5 +29,9 @@ def test_multi_part_sim_run(system, system_config): #"--noui" ], capture_output=True, text=True, stdin=subprocess.DEVNULL) assert result.returncode == 0, f"Failed on {system}: {result.stderr}" + + #TODO: + #Cleanup files after test! + del result gc.collect() diff --git a/tests/systems/test_telemetry_withdata_run.py b/tests/systems/test_telemetry_withdata_run.py index 707f39a..cac6104 100644 --- a/tests/systems/test_telemetry_withdata_run.py +++ b/tests/systems/test_telemetry_withdata_run.py @@ -11,7 +11,7 @@ pytestmark = [ ] -def test_main_withdata_run(system, system_config, system_file): +def test_main_withdata_run(system, system_config, system_file, random_id): if not system_config.get("telemetry", False): pytest.skip(f"{system} does not support telemetry run.") if not system_config.get("withdata", False): @@ -28,7 +28,15 @@ def test_main_withdata_run(system, system_config, system_file): "python", "raps/telemetry.py", "--system", system, "-f", *file_list, + "-o", random_id ], capture_output=True, text=True, stdin=subprocess.DEVNULL) assert result.returncode == 0, f"Failed on {system}: {result.stderr}" + + subprocess.run( + f"rm {random_id}.npz ; rm {random_id}.png", + shell=True, + check=True + ) + del result gc.collect() -- GitLab From e35c2ae7226390abb6ce75cf39f930d4288421b8 Mon Sep 17 00:00:00 2001 From: Wes Brewer Date: Thu, 14 Aug 2025 10:50:14 -0400 Subject: [PATCH 223/388] Update frontier experiment path to ~/data/frontier to be consistent with smoke tests --- experiments/frontier.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/experiments/frontier.yaml b/experiments/frontier.yaml index 26ce0b4..f865a19 100644 --- a/experiments/frontier.yaml +++ b/experiments/frontier.yaml @@ -1,4 +1,4 @@ system: frontier replay: - - ~/data/frontier-sample-2024-01-18/slurm/joblive/date=2024-01-18 - - ~/data/frontier-sample-2024-01-18/jobprofile/date=2024-01-18 + - ~/data/frontier/slurm/joblive/date=2024-01-18 + - ~/data/frontier/jobprofile/date=2024-01-18 -- GitLab From f4a2c26e819b9512f1609ca5f6e6be0f7a8f59ea Mon Sep 17 00:00:00 2001 From: Wes Brewer Date: Thu, 14 Aug 2025 23:11:56 -0400 Subject: [PATCH 224/388] Add bluewaters config --- config/bluewaters/network.json | 9 +++++++++ config/bluewaters/power.json | 19 +++++++++++++++++++ config/bluewaters/scheduler.json | 18 ++++++++++++++++++ config/bluewaters/system.json | 21 +++++++++++++++++++++ 4 files changed, 67 insertions(+) create mode 100644 config/bluewaters/network.json create mode 100644 config/bluewaters/power.json create mode 100644 config/bluewaters/scheduler.json create mode 100644 config/bluewaters/system.json diff --git a/config/bluewaters/network.json b/config/bluewaters/network.json new file mode 100644 index 0000000..5a0f564 --- /dev/null +++ b/config/bluewaters/network.json @@ -0,0 +1,9 @@ +{ + "TOPOLOGY": "fat-tree", + "NETWORK_MAX_BW": 1e9, + "FATTREE_K": 16, + "DRAGONFLY_D": 11, + "DRAGONFLY_A": 9, + "DRAGONFLY_P": 8, + "LATENCY": 1 +} diff --git a/config/bluewaters/power.json b/config/bluewaters/power.json new file mode 100644 index 0000000..fb4d4f2 --- /dev/null +++ b/config/bluewaters/power.json @@ -0,0 +1,19 @@ + +{ + "POWER_GPU_IDLE": 0, + "POWER_GPU_MAX": 0, + "POWER_CPU_IDLE": 38, + "POWER_CPU_MAX": 95, + "POWER_MEM": 74.26, + "POWER_NIC": 20, + "POWER_NVME": 30, + "POWER_SWITCH": 250, + "POWER_CDU": 8473.47, + "POWER_UPDATE_FREQ": 15, + "RECTIFIER_PEAK_THRESHOLD": 13670, + "SIVOC_LOSS_CONSTANT": 13, + "SIVOC_EFFICIENCY": 0.98, + "RECTIFIER_LOSS_CONSTANT": 17, + "RECTIFIER_EFFICIENCY": 0.96, + "POWER_COST": 0.094 +} diff --git a/config/bluewaters/scheduler.json b/config/bluewaters/scheduler.json new file mode 100644 index 0000000..52b97ae --- /dev/null +++ b/config/bluewaters/scheduler.json @@ -0,0 +1,18 @@ + +{ + "SEED": 42, + "JOB_ARRIVAL_TIME": 100, + "MTBF": 11, + "TRACE_QUANTA": 15, + "MIN_WALL_TIME": 60, + "MAX_WALL_TIME": 43200, + "UI_UPDATE_FREQ": 900, + "MAX_NODES_PER_JOB": 26884, + "JOB_END_PROBS": { + "COMPLETED": 0.63, + "FAILED": 0.13, + "CANCELLED": 0.12, + "TIMEOUT": 0.11, + "NODE_FAIL": 0.01 + } +} diff --git a/config/bluewaters/system.json b/config/bluewaters/system.json new file mode 100644 index 0000000..336da0b --- /dev/null +++ b/config/bluewaters/system.json @@ -0,0 +1,21 @@ + +{ + "NUM_CDUS": 36, + "RACKS_PER_CDU": 6, + "NODES_PER_RACK": 128, + "RECTIFIERS_PER_RACK": 32, + "CHASSIS_PER_RACK": 8, + "NODES_PER_BLADE": 2, + "SWITCHES_PER_CHASSIS": 4, + "NICS_PER_NODE": 4, + "RECTIFIERS_PER_CHASSIS": 4, + "NODES_PER_RECTIFIER": 4, + "MISSING_RACKS": [], + "DOWN_NODES": [], + "CPUS_PER_NODE": 2, + "GPUS_PER_NODE": 0, + "CPU_PEAK_FLOPS": 2.6496E11, + "GPU_PEAK_FLOPS": 0, + "CPU_FP_RATIO": 0.667, + "GPU_FP_RATIO": 0 +} -- GitLab From 6c85983a1ac545d9737e1815672efdd48080d421 Mon Sep 17 00:00:00 2001 From: Matthias Maiterth Date: Fri, 15 Aug 2025 13:47:06 -0400 Subject: [PATCH 225/388] Fixed -h option. (Misplaced comma) --- raps/args.py | 4 ++-- tests/systems/test_main_help.py | 28 ++++++++++++++++++++++++++++ 2 files changed, 30 insertions(+), 2 deletions(-) create mode 100644 tests/systems/test_main_help.py diff --git a/raps/args.py b/raps/args.py index 631fcba..9f18991 100644 --- a/raps/args.py +++ b/raps/args.py @@ -126,8 +126,8 @@ parser.add_argument("--layout", type=str, choices=ui_layout_choices, parser.add_argument('-o', '--output', type=str, nargs="?", const="", # Used if -o is given without a value default=None, # Used if -o is not provided at all - help=("Output power, cooling, and loss models for later ", - "analysis. Argumment specifies name."), + help=("Output power, cooling, and loss models for later " + "analysis. Argumment specifies name.") ) plot_choices = ["power", "loss", "pue", "temp", "util"] parser.add_argument("-p", "--plot", nargs="+", choices=plot_choices, diff --git a/tests/systems/test_main_help.py b/tests/systems/test_main_help.py new file mode 100644 index 0000000..f84c63d --- /dev/null +++ b/tests/systems/test_main_help.py @@ -0,0 +1,28 @@ +import os +import subprocess +import gc +import pytest +from tests.util import PROJECT_ROOT + + +pytestmark = [ + pytest.mark.system, + pytest.mark.nodata +] + + +def test_main_help(system, system_config,random_id): + if not system_config.get("main", False): + pytest.skip(f"{system} does not support basic main run.") + + os.chdir(PROJECT_ROOT) + result = subprocess.run([ + "python", "main.py", + "-h" + ], capture_output=True, text=True, stdin=subprocess.DEVNULL) + + assert result.returncode == 0, f"Failed on {system}: {result.stderr}" + assert "usage:" in result.stdout + + del result + gc.collect() -- GitLab From b6831041a00fbf52d75c72dd3d870ae97a428269 Mon Sep 17 00:00:00 2001 From: "Maiterth, Matthias" Date: Fri, 15 Aug 2025 18:25:12 +0000 Subject: [PATCH 226/388] Downtime simulator and continous job generation --- .../frontier-hourly-1year-AI-proxy.yaml | 12 +++ main.py | 15 ++-- multi-part-sim.py | 8 +- raps/args.py | 33 +++++++- raps/downtime.py | 81 +++++++++++++++++++ raps/engine.py | 39 ++++++--- raps/network.py | 2 +- raps/resmgr/default.py | 2 + raps/ui.py | 8 -- raps/workload.py | 55 ++++++++++--- 10 files changed, 216 insertions(+), 39 deletions(-) create mode 100644 experiments/frontier-hourly-1year-AI-proxy.yaml create mode 100644 raps/downtime.py diff --git a/experiments/frontier-hourly-1year-AI-proxy.yaml b/experiments/frontier-hourly-1year-AI-proxy.yaml new file mode 100644 index 0000000..c401c95 --- /dev/null +++ b/experiments/frontier-hourly-1year-AI-proxy.yaml @@ -0,0 +1,12 @@ +system: frontier +continuous-job-generation: True +downtime-first: 8h +downtime-interval: 7d +downtime-length: 4h +time: 356d +time-delta: 1h +numjobs: 1 +maxqueue: 30 +workload: randomAI +policy: fcfs +backfill: firstfit diff --git a/main.py b/main.py index f8de19e..d16c190 100644 --- a/main.py +++ b/main.py @@ -91,13 +91,12 @@ def main(): # TODO: Merge args and args_from_files? see telemetry.py:97 else: # Synthetic jobs - wl = Workload(config) - jobs = getattr(wl, args.workload)(args=args) + wl = Workload(args,config) + jobs = wl.generate_jobs() if args.verbose: - for job_vector in jobs: - job = Job(job_vector) - print('jobid:', job.id, '\tlen(gpu_trace):', len(job.gpu_trace), '\twall_time(s):', job.wall_time) + for job in jobs: + print('jobid:', job.id, '\tlen(gpu_trace):', len(job.gpu_trace) if isinstance(job.gpu_trace,list) else job.gpu_trace, '\twall_time(s):', job.wall_time) time.sleep(2) timestep_start = 0 @@ -120,10 +119,16 @@ def main(): else: time_delta = 1 + if args.continuous_job_generation: + continuous_workload = wl + else: + continuous_workload = None + sc = Engine( power_manager=power_manager, flops_manager=flops_manager, cooling_model=cooling_model, + continuous_workload=continuous_workload, jobs=jobs, **args_dict, ) diff --git a/multi-part-sim.py b/multi-part-sim.py index bd9ce3e..3bc2dc1 100644 --- a/multi-part-sim.py +++ b/multi-part-sim.py @@ -1,9 +1,9 @@ """ Main driver for simulating multi-partition (heterogeneous) systems in the RAPS -module of ExaDigiT. Supports replaying telemetry or generating synthetic +module of ExaDigiT. Supports replaying telemetry or generating synthetic workloads across CPU-only, GPU, and mixed partitions. Initializes per- -partition power, FLOPS, and scheduling models, then advances simulations in -lockstep. Outputs per-partition performance, utilization, and energy +partition power, FLOPS, and scheduling models, then advances simulations in +lockstep. Outputs per-partition performance, utilization, and energy statistics for systems such as MIT Supercloud, Setonix, Adastra, and LUMI. """ @@ -102,7 +102,7 @@ if args.replay: job.submit_time = next_arrival(1 / partition_config['JOB_ARRIVAL_TIME']) else: # Synthetic workload - wl = Workload(*configs) + wl = Workload(args,*configs) total_initial_jobs = args.numjobs diff --git a/raps/args.py b/raps/args.py index 631fcba..817d213 100644 --- a/raps/args.py +++ b/raps/args.py @@ -28,7 +28,9 @@ def apply_config_to_args(cfg, args): }: merged.update(v) else: - merged[k] = v + # Enter the commandline argument, but _underscores as the -dashes + # are replaced when reading from the commandline, but not in the yaml. + merged[k.replace('-','_')] = v # Apply to argparse namespace for k, v in merged.items(): @@ -183,6 +185,21 @@ parser.add_argument("--accounts", action="store_true", parser.add_argument("--accounts-json", type=str, help="Accounts JSON from previous run") +# Downtime +parser.add_argument("--downtime-first", type=str, default=None, + help="First downtime, e.g., after 123, 27m, 3h, 7d") +parser.add_argument("--downtime-interval", type=str, default=None, + help="Interval between downtimes, e.g., every 123, 27m, 3h, 7d") +parser.add_argument("--downtime-length", type=str, default=None, + help="Downtime length, e.g., 123, 27m, 3h, 7d") + +# Continous Job Generation +parser.add_argument("--continuous-job-generation", action="store_true", + help="Activate continuous job generation.") +parser.add_argument("--maxqueue", type=int, default=50, + help="Specify the max queue length for continuous job generation.") + + def post_process_args(args): if args.time_delta: @@ -200,6 +217,13 @@ def post_process_args(args): else: ff_raw, ff_down = None, 1 + if args.downtime_first: + dtf_raw, dtf_down = convert_to_seconds(args.downtime_first) + if args.downtime_interval: + dti_raw, dti_down = convert_to_seconds(args.downtime_interval) + if args.downtime_length: + dtl_raw, dtl_down = convert_to_seconds(args.downtime_length) + max_down = max(tdelta_down, time_down, ff_down) args.downscale = max_down @@ -210,6 +234,13 @@ def post_process_args(args): if args.fastforward: args.fastforward = int((ff_raw / ff_down) * max_down) + if args.downtime_first: + args.downtime_first = int((dtf_raw / dtf_down) * max_down) + if args.downtime_interval: + args.downtime_interval = int((dti_raw / dti_down) * max_down) + if args.downtime_length: + args.downtime_length = int((dtl_raw / dtl_down) * max_down) + return args diff --git a/raps/downtime.py b/raps/downtime.py new file mode 100644 index 0000000..d80aba8 --- /dev/null +++ b/raps/downtime.py @@ -0,0 +1,81 @@ +from __future__ import annotations +from typing import TYPE_CHECKING +from raps.job import JobState +from raps.args import args +import numpy as np + + +if TYPE_CHECKING: + from raps.engine import Engine + + +class Downtime: + + def __init__(self,*, + first_downtime, + downtime_interval, + downtime_length, + ): + self.skip = False + if downtime_length == 0 or downtime_interval == 0 or \ + downtime_length is None or downtime_interval is None: + self.skip = True + self.interval:int = downtime_interval + self.length:int = downtime_length + self.start:int = first_downtime + self.end:int = 0 + self.down:bool = False + + def check_and_trigger(self,*, + timestep:int, + engine:Engine + ): + if self.skip: + return False # Dont simulate downtime + if timestep > self.start and not self.down: + self.simulate_down(engine=engine) + this_downtime_length = np.random.normal(self.length,30 * 60) # 30 minutes std variance around the downtime + self.end = timestep + this_downtime_length + self.start = self.start + self.interval # Next start + return True # System went down + if timestep > self.end and self.down: + self.simulate_up(engine=engine) + return True # System went up + return False # No change + + def simulate_down(self,*, + engine:Engine + ): + if args.debug: + print("Simulated downtime: before downtime start") + print(f"Running: {len(engine.running)}, queued: {len(engine.queue)}") + + #engine.resource_manager.down_nodes.update(engine.resource_manager.nodes) # down_nodes are a set + #engine.resource_manager.available_nodes[:] = [] + + for job in engine.running: + job._state = JobState.CANCELLED + engine.power_manager.set_idle(job.scheduled_nodes) + engine.resource_manager.free_nodes_from_job(job) + + # add all available nodes to down set. + engine.resource_manager.down_nodes.update( + engine.resource_manager.available_nodes) + # clear available nodes + engine.resource_manager.available_nodes[:] = [] + + engine.queue += engine.running + engine.running = [] + if args.debug: + print("Simulated downtime: after downtime start") + print(f"Running: {len(engine.running)}, queued: {len(engine.queue)}") + self.down = True + + def simulate_up(self,*, + engine:Engine + ): + self.down = False + engine.resource_manager.available_nodes[:] = [n['id'] for n in engine.resource_manager.nodes if not n['is_down']] + engine.down_nodes # Careful! these are the down nodes not managed by the resouce manager but given to the engine! + engine.resource_manager.down_nodes.clear() + engine.resource_manager.down_nodes.update(engine.config["DOWN_NODES"]) # Orig. diff --git a/raps/engine.py b/raps/engine.py index 6885e50..aee9080 100644 --- a/raps/engine.py +++ b/raps/engine.py @@ -17,6 +17,8 @@ from raps.network import ( apply_job_slowdown, compute_system_network_stats ) +from raps.workload import continuous_job_generation +from raps.downtime import Downtime @dataclasses.dataclass @@ -26,15 +28,17 @@ class TickData: completed: list[Job] running: list[Job] queue: list[Job] - down_nodes: list[int] power_df: Optional[pd.DataFrame] p_flops: Optional[float] g_flops_w: Optional[float] system_util: float fmu_inputs: Optional[dict] fmu_outputs: Optional[dict] + # This should not be here v: num_active_nodes: int num_free_nodes: int + down_nodes: list[int] + # This should not be here ^: avg_net_tx: float avg_net_rx: float avg_net_util: float @@ -51,6 +55,7 @@ class Engine: config, jobs=None, total_initial_jobs=0, + continuous_workload=None, # Workload class to generate from for continuous generation **kwargs): self.config = config self.down_nodes = summarize_ranges(self.config['DOWN_NODES']) @@ -72,6 +77,7 @@ class Engine: self.power_manager = power_manager self.flops_manager = flops_manager self.debug = kwargs.get('debug') + self.continuous_workload = continuous_workload self.output = kwargs.get('output') self.replay = kwargs.get('replay') self.downscale = kwargs.get('downscale',1) # Factor to downscale the 1s timesteps (power of 10) @@ -85,6 +91,9 @@ class Engine: self.avg_slowdown_history = [] self.max_slowdown_history = [] self.node_occupancy_history = [] + self.downtime = Downtime(first_downtime=kwargs.get('downtime_first'), + downtime_interval=kwargs.get('downtime_interval'), + downtime_length=kwargs.get('downtime_length')) # Set scheduler type - either based on config or command-line args - defaults to 'default' if self.config['multitenant']: @@ -165,12 +174,14 @@ class Engine: else: return False - def prepare_timestep(self, replay:bool = True): + def prepare_timestep(self,*, replay:bool = True, jobs): # 1 identify completed jobs - # 2 Simulate node failure # Defunct feature! - # 3 Update active and free nodes + # 2 Check continuous job generation + # 3 Simulate node failure # Defunct feature! + # 4 Simulate downtime + # 5 Update active and free nodes - # Identify Completed Jobs + # 1 Identify Completed Jobs completed_jobs = [job for job in self.running if job.end_time is not None and job.end_time <= self.current_time] # Update Completed Jobs, their account and and Free resources. for job in completed_jobs: @@ -186,15 +197,22 @@ class Engine: # Free the nodes via the resource manager. self.resource_manager.free_nodes_from_job(job) + # 2 Check continuous job generation + if self.continuous_workload is not None: # Experimental + continuous_job_generation(engine=self,timestep=self.current_time,jobs=jobs) + + # 3 Simulate node failure if not replay: - # Simulate node failure newly_downed_nodes = self.resource_manager.node_failure(self.config['MTBF']) for node in newly_downed_nodes: self.power_manager.set_idle(node) else: newly_downed_nodes = [] - # Update active/free nodes based on core/GPU utilization + # 4 Simulate downtime + need_reschedule = self.downtime.check_and_trigger(timestep=self.current_time,engine=self) + + # 5 Update active/free nodes based on core/GPU utilization if self.config['multitenant']: total_cpu_cores = sum(node['total_cpu_cores'] for node in self.resource_manager.nodes) total_gpu_units = sum(node['total_gpu_units'] for node in self.resource_manager.nodes) @@ -213,7 +231,7 @@ class Engine: - len(self.resource_manager.available_nodes) \ - len(self.resource_manager.down_nodes) - return completed_jobs, newly_downed_nodes + return completed_jobs, newly_downed_nodes, need_reschedule def complete_timestep(self, autoshutdown, all_jobs:List, jobs:List): # 1 update running time of all running jobs @@ -467,13 +485,13 @@ class Engine: all_jobs[:] = [job for job in all_jobs if job.submit_time > timestep + batch_window] # 1. Prepare Timestep: - completed_jobs, newly_downed_nodes = self.prepare_timestep(replay) + completed_jobs, newly_downed_nodes, need_reschedule = self.prepare_timestep(replay=replay,jobs=jobs) # 2. Identify eligible jobs and add them to the queue. has_new_additions = self.add_eligible_jobs_to_queue(jobs) # 3. Schedule jobs that are now in the queue. - if completed_jobs != [] or newly_downed_nodes != [] or has_new_additions: + if completed_jobs != [] or newly_downed_nodes != [] or has_new_additions or need_reschedule: self.scheduler.schedule(self.queue, self.running, self.current_time, accounts=self.accounts, @@ -487,6 +505,7 @@ class Engine: ((time_delta == 1 and self.current_time % self.config['POWER_UPDATE_FREQ'] == 0) or (time_delta != 1 or self.downscale != 1)): tick_data = self.tick(time_delta=time_delta) tick_data.completed = completed_jobs + tick_data.down_nodes = self.resource_manager.down_nodes else: tick_data = None diff --git a/raps/network.py b/raps/network.py index fc103be..13308f4 100644 --- a/raps/network.py +++ b/raps/network.py @@ -73,7 +73,7 @@ class NetworkModel: net_cong = worst_link_util(loads, max_throughput) else: # capacity model: simple α+β or normalized overload - net_cong = network_congestion(ntx_util, nrx_util, max_throughput) + net_cong = network_congestion(net_tx, net_rx, max_throughput) return net_util, net_cong, net_tx, net_rx, max_throughput diff --git a/raps/resmgr/default.py b/raps/resmgr/default.py index 8a5d9fd..ad71ec9 100644 --- a/raps/resmgr/default.py +++ b/raps/resmgr/default.py @@ -61,6 +61,8 @@ class ExclusiveNodeResourceManager: for n in job.scheduled_nodes: if n not in self.available_nodes: self.available_nodes.append(n) + else: + raise KeyError(f"node was free but already in available nodes: {n.id}") self.available_nodes = sorted(self.available_nodes) def update_system_utilization(self, current_time, running_jobs): diff --git a/raps/ui.py b/raps/ui.py index 1849e61..7c6b598 100644 --- a/raps/ui.py +++ b/raps/ui.py @@ -194,14 +194,6 @@ class LayoutManager: nodes_display, running_time_str ] - row.append(nodes_display) - - #if show_nodes: - # # Insert NODELIST immediately after col_slow (whether NODELIST or SLOWDOWN) - # row.append(col_nodelist) - - # Finally, append the running‐time column - row.append(convert_seconds_to_hhmm(job.running_time)) # If the job has been flagged as “dilated”, show its row in yellow if getattr(job, "dilated", False): diff --git a/raps/workload.py b/raps/workload.py index 1c1e7c0..128c286 100644 --- a/raps/workload.py +++ b/raps/workload.py @@ -49,10 +49,17 @@ from raps.utils import truncated_normalvariate_int, truncated_normalvariate_floa class Workload: - def __init__(self, *configs): + def __init__(self,args,*configs): """ Initialize Workload with multiple configurations. """ self.partitions = [config['system_name'] for config in configs] self.config_map = {config['system_name']: config for config in configs} + self.args = args + + def generate_jobs(self): + # This function calls the job generation function as specified by the workload keyword. + # The respective funciton of this class is called. + jobs = getattr(self,self.args.workload)(args=self.args) + return jobs def compute_traces(self, cpu_util: float, gpu_util: float, wall_time: int, trace_quanta: int) -> tuple[np.ndarray, np.ndarray]: """ Compute CPU and GPU traces based on mean CPU & GPU utilizations and wall time. """ @@ -149,14 +156,14 @@ class Workload: def wall_time_distribution_draw_weibull(self,args,config): return truncated_weibull(args.walltime_weibull_scale, args.walltime_weibull_shape, config['MIN_WALL_TIME'], config['MAX_WALL_TIME']) - def generate_jobs(self, *, - job_arrival_distribution_to_draw_from, - job_size_distribution_to_draw_from, - cpu_util_distribution_to_draw_from, - gpu_util_distribution_to_draw_from, - wall_time_distribution_to_draw_from, - args - ) -> list[list[any]]: + def generate_jobs_from_distribution(self, *, + job_arrival_distribution_to_draw_from, + job_size_distribution_to_draw_from, + cpu_util_distribution_to_draw_from, + gpu_util_distribution_to_draw_from, + wall_time_distribution_to_draw_from, + args + ) -> list[list[any]]: jobs = [] partition = random.choice(self.partitions) config = self.config_map[partition] @@ -205,6 +212,25 @@ class Workload: jobs.append(job) return jobs + # Test for random 'reasonable' AI jobs + def randomAI(self, **kwargs): + args = kwargs.get('args',None) + jobs = [] + for i in range(args.numjobs): + draw = random.randint(0,10) + if draw == 0: + et = random.randint(7200,28800) + nr = random.choice([128,256,512,1024,1280,1792,2048]) + new_job = Job(job_dict(nodes_required=nr,name="LLM",account="llmUser",end_state="Success", + id=random.randint(1,99999),cpu_trace=0.1,gpu_trace=(random.uniform(0.55,0.8) * self.config_map[self.args.system]['GPUS_PER_NODE']),ntx_trace=None, + nrx_trace=None,submit_time=0,time_limit=random.randint(43200,43200),start_time=0,end_time=et,wall_time=et)) + else: + new_job = Job(job_dict(nodes_required=1,name="LLM",account="llmUser",end_state="Success", + id=random.randint(1,99999),cpu_trace=1,gpu_trace=(0.2 * self.config_map[self.args.system]['GPUS_PER_NODE']),ntx_trace=None, + nrx_trace=None,submit_time=0,time_limit=43200,start_time=0,end_time=7200,wall_time=random.randint(60,7200))) + jobs.append(new_job) + return jobs + def synthetic(self, **kwargs): args = kwargs.get('args',None) print(args) @@ -269,7 +295,7 @@ class Workload: case _: raise NotImplementedError(args.gpuutil_distribution) - new_jobs = self.generate_jobs( + new_jobs = self.generate_jobs_from_distribution( job_arrival_distribution_to_draw_from=job_arrival_distribution_to_draw_from, job_size_distribution_to_draw_from=job_size_distribution_to_draw_from, cpu_util_distribution_to_draw_from=cpu_util_distribution_to_draw_from, @@ -944,5 +970,14 @@ def run_workload(): return jobs +def continuous_job_generation(*,engine,timestep,jobs): + #print("if len(engine.queue) <= engine.continuous_workload.args.maxqueue:") + #print(f"if {len(engine.queue)} <= {engine.continuous_workload.args.maxqueue}:") + if len(engine.queue) <= engine.continuous_workload.args.maxqueue: + new_jobs = engine.continuous_workload.generate_jobs() + jobs.extend(new_jobs) + pass + + if __name__ == "__main__": run_workload() -- GitLab From c8e5807a0b73e9aa953a8f9024354c244f85e8a6 Mon Sep 17 00:00:00 2001 From: "Maiterth, Matthias" Date: Fri, 15 Aug 2025 18:28:08 +0000 Subject: [PATCH 227/388] Flake8 --- .flake8 | 3 +++ .pre-commit-config.yaml | 11 +++++++++++ README.md | 41 ++++++++++++++++++++++++----------------- pyproject.toml | 3 ++- 4 files changed, 40 insertions(+), 18 deletions(-) create mode 100644 .flake8 create mode 100644 .pre-commit-config.yaml diff --git a/.flake8 b/.flake8 new file mode 100644 index 0000000..bd48511 --- /dev/null +++ b/.flake8 @@ -0,0 +1,3 @@ +[flake8] +exclude = .git, __pycache__, venv*, simulation_results, third_party +max-line-length = 120 diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000..8ea69f7 --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,11 @@ +repos: +- repo: https://github.com/pre-commit/pre-commit-hooks + rev: v6.0.0 + hooks: + - id: check-yaml + - id: end-of-file-fixer + - id: trailing-whitespace +- repo: https://github.com/pycqa/flake8 + rev: '7.3.0' # pick a git hash / tag to point to + hooks: + - id: flake8 diff --git a/README.md b/README.md index 304f6f6..8c92a11 100644 --- a/README.md +++ b/README.md @@ -1,10 +1,10 @@ # ExaDigiT/RAPS -ExaDigiT's Resource Allocator and Power Simulator (RAPS) schedules workloads and -estimates dynamic system power at specified time intervals. RAPS either schedules +ExaDigiT's Resource Allocator and Power Simulator (RAPS) schedules workloads and +estimates dynamic system power at specified time intervals. RAPS either schedules synthetic workloads or replays system telemetry workloads, provides system monitoring during simulation, and an outputs a report of scheduling -and power statistics at the end of the simulation. RAPS also can interface with +and power statistics at the end of the simulation. RAPS also can interface with the FMU cooling model by providing CDU-level power inputs to the cooling model, and reporting the statistics back to the user. RAPS also has built-in plotting capabilities to generate plots of power and cooling at the end of simulation runs. @@ -27,7 +27,7 @@ Note: Requires python3.11 or greater. ## Run simulator with telemetry replay - # Frontier + # Frontier DATEDIR="date=2024-01-18" DPATH=~/data/frontier-sample-2024-01-18 python main.py -f $DPATH/slurm/joblive/$DATEDIR $DPATH/jobprofile/$DATEDIR @@ -37,12 +37,12 @@ Note: Requires python3.11 or greater. For Marconi supercomputer, download `job_table.parquet` from https://zenodo.org/records/10127767 # Marconi100 - python main.py --system marconi100 -f ~/data/marconi100/job_table.parquet + python main.py --system marconi100 -f ~/data/marconi100/job_table.parquet For Adastra MI250 supercomputer, download 'AdastaJobsMI250_15days.parquet' from https://zenodo.org/records/14007065 # Adastra MI250 - python main.py --system adastraMI250 -f AdastaJobsMI250_15days.parquet + python main.py --system adastraMI250 -f AdastaJobsMI250_15days.parquet For Google cluster trace v2 @@ -80,7 +80,7 @@ For Lumi ## Perform Network Simulation -Lassen is one of the few datasets that has networking data. See `raps/dataloaders/lassen.py` for how to +Lassen is one of the few datasets that has networking data. See `raps/dataloaders/lassen.py` for how to get the datasets. To run a network simulation, use the following command: python main.py -f ~/data/lassen/Lassen-Supercomputer-Job-Dataset --system lassen --policy fcfs --backfill firstfit -ff 365d -t 12h --arrival poisson -net @@ -127,7 +127,7 @@ To run this in parallel use: *Note: first install `mpi4py` via pip or conda.* -This will simulate synthetic workloads on two partitions as defined in `config/setonix-cpu` and `config/setonix-gpu`. To replay telemetry workloads from another system, e.g., Marconi100's PM100 dataset, first create a .npz snapshot of the telemetry data, e.g., +This will simulate synthetic workloads on two partitions as defined in `config/setonix-cpu` and `config/setonix-gpu`. To replay telemetry workloads from another system, e.g., Marconi100's PM100 dataset, first create a .npz snapshot of the telemetry data, e.g., python main.py --system marconi100 -f /path/to/marconi100/job_table.parquet @@ -175,17 +175,25 @@ See instructions in [server/README.md](https://code.ornl.gov/exadigit/simulation See instructions in [dashboard/README.md](https://code.ornl.gov/exadigit/simulation-dashboard) +### Contributing Code + +Install pre-commit hooks as set by the project: +``` +pip install pre-commit +pre-commit install +''' + ## Authors -Many thanks to the contributors of ExaDigiT/RAPS. -The full list of contributors and organizations involved are found in CONTRIBUTORS.txt. +Many thanks to the contributors of ExaDigiT/RAPS. +The full list of contributors and organizations involved are found in CONTRIBUTORS.txt. ## Citation If you use ExaDigiT or RAPS in your research, please cite our work: @inproceedings{inproceedings, - title={A Digital Twin Framework for Liquid-cooled Supercomputers as Demonstrated at Exascale}, + title={A Digital Twin Framework for Liquid-cooled Supercomputers as Demonstrated at Exascale}, author={Brewer, Wesley and Maiterth, Matthias and Kumar, Vineet and Wojda, Rafal and Bouknight, Sedrick and Hines, Jesse and Shin, Woong and Greenwood, Scott and Grant, David and Williams, Wesley and Wang, Feiyi}, booktitle={SC24: International Conference for High Performance Computing, Networking, Storage and Analysis}, pages={1--18}, @@ -207,17 +215,16 @@ Thank you for your support! ## License -ExaDigiT/RAPS is distributed under the terms of both the MIT license and the Apache License (Version 2.0). -Users may choose either license, at their option. +ExaDigiT/RAPS is distributed under the terms of both the MIT license and the Apache License (Version 2.0). +Users may choose either license, at their option. -All new contributions must be made under both the MIT and Apache-2.0 licenses. -See LICENSE-MIT, LICENSE-APACHE, COPYRIGHT, NOTICE, and CONTRIBUTORS.txt for details. +All new contributions must be made under both the MIT and Apache-2.0 licenses. +See LICENSE-MIT, LICENSE-APACHE, COPYRIGHT, NOTICE, and CONTRIBUTORS.txt for details. -SPDX-License-Identifier: (Apache-2.0 OR MIT) +SPDX-License-Identifier: (Apache-2.0 OR MIT) ## Attributions Map data used in this project is provided by [OpenStreetMap](https://www.openstreetmap.org/copyright) and is available under the Open Database License (ODbL). © OpenStreetMap contributors. Weather data used in this project is provided by the [Open-Meteo API](https://open-meteo.com/en/docs). Open-Meteo offers free weather forecast data for various applications, and their API provides easy access to weather information without requiring user authentication. - diff --git a/pyproject.toml b/pyproject.toml index 2844ca8..33b73ca 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -27,5 +27,6 @@ dependencies = [ "pytest", "pytest-order", "pytest-xdist", - "pyyaml>=6.0.2" + "pyyaml>=6.0.2", + "pre-commit" ] -- GitLab From e83cec633c2e0ab95f00f34c2a6de90b22a81607 Mon Sep 17 00:00:00 2001 From: Matthias Maiterth Date: Fri, 15 Aug 2025 14:35:54 -0400 Subject: [PATCH 228/388] Cleaned main.py for pep8 --- main.py | 51 +++++++++++++++++++++++++++++---------------------- 1 file changed, 29 insertions(+), 22 deletions(-) diff --git a/main.py b/main.py index d16c190..ba8b57f 100644 --- a/main.py +++ b/main.py @@ -6,7 +6,6 @@ conversion losses), and optional coupling to a thermo-fluids cooling model. Produces performance, utilization, and energy metrics, with optional plots and output files for analysis and validation. """ - import json import numpy as np import random @@ -14,10 +13,9 @@ import pandas as pd import os import time import math - +# from raps.helpers import check_python_version -check_python_version() - +# from raps.config import ConfigManager from raps.constants import OUTPUT_PATH, SEED from raps.cooling import ThermoFluidsModel @@ -34,7 +32,6 @@ from raps.power import ( compute_node_power_validate_uncertainties ) from raps.engine import Engine -from raps.job import Job from raps.telemetry import Telemetry from raps.workload import Workload from raps.account import Accounts @@ -50,6 +47,8 @@ from raps.stats import ( from raps.args import args, args_dict +check_python_version() + def main(): if args.verbose or args.debug: @@ -87,26 +86,31 @@ def main(): if args.replay: td = Telemetry(**args_dict) - jobs, timestep_start, timestep_end, args_from_file = td.load_jobs_times_args_from_files(files=args.replay, args=args) + jobs, timestep_start, timestep_end, args_from_file = \ + td.load_jobs_times_args_from_files(files=args.replay, args=args) # TODO: Merge args and args_from_files? see telemetry.py:97 else: # Synthetic jobs - wl = Workload(args,config) + wl = Workload(args, config) jobs = wl.generate_jobs() if args.verbose: for job in jobs: - print('jobid:', job.id, '\tlen(gpu_trace):', len(job.gpu_trace) if isinstance(job.gpu_trace,list) else job.gpu_trace, '\twall_time(s):', job.wall_time) + print('jobid:', job.id, '\tlen(gpu_trace):', + len(job.gpu_trace) if isinstance(job.gpu_trace, list) + else job.gpu_trace, '\twall_time(s):', + job.wall_time) time.sleep(2) timestep_start = 0 - if hasattr(jobs[0],'end_time'): + if hasattr(jobs[0], 'end_time'): timestep_end = int(math.ceil(max([job.end_time for job in jobs]))) else: timestep_end = 88200 # 24 hours td = Telemetry(**args_dict) - td.save_snapshot(jobs=jobs, timestep_start=timestep_start, timestep_end=timestep_end, args=args, filename=td.dirname) + td.save_snapshot(jobs=jobs, timestep_start=timestep_start, + timestep_end=timestep_end, args=args, filename=td.dirname) if args.fastforward is not None: timestep_start = args.fastforward @@ -160,12 +164,15 @@ def main(): downscale = args.downscale downscale_str = ""if downscale == 1 else f"/{downscale}" - print(f'Simulating {len(jobs)} jobs for {total_timesteps}{downscale_str} seconds from {timestep_start} to {timestep_end}.') - print(f'Simulation time delta: {time_delta}{downscale_str} s, Telemetry trace quanta: {jobs[0].trace_quanta}{downscale_str} s.') - layout_manager = LayoutManager(args.layout, engine=sc, debug=args.debug, total_timesteps=total_timesteps, args_dict=args_dict, **config) + print(f"Simulating {len(jobs)} jobs for {total_timesteps}{downscale_str}" + f" seconds from {timestep_start} to {timestep_end}.") + print(f"Simulation time delta: {time_delta}{downscale_str} s," + f"Telemetry trace quanta: {jobs[0].trace_quanta}{downscale_str} s.") + layout_manager = LayoutManager(args.layout, engine=sc, debug=args.debug, + total_timesteps=total_timesteps, + args_dict=args_dict, **config) layout_manager.run(jobs, timestep_start=timestep_start, timestep_end=timestep_end, time_delta=time_delta) - engine_stats = get_engine_stats(sc) job_stats = get_job_stats(sc) scheduler_stats = get_scheduler_stats(sc) @@ -185,27 +192,27 @@ def main(): if args.plot: if 'power' in args.plot: - pl = Plotter(f"Time ({downscale_str}s)", 'Power (kW)', 'Power History', \ - OPATH / f'power.{args.imtype}', \ + pl = Plotter(f"Time ({downscale_str}s)", 'Power (kW)', 'Power History', + OPATH / f'power.{args.imtype}', uncertainties=args.uncertainties) x, y = zip(*power_manager.history) pl.plot_history(x, y) if 'util' in args.plot: - pl = Plotter(f"Time ({downscale_str}s)", 'System Utilization (%)', \ + pl = Plotter(f"Time ({downscale_str}s)", 'System Utilization (%)', 'System Utilization History', OPATH / f'util.{args.imtype}') x, y = zip(*sc.sys_util_history) pl.plot_history(x, y) if 'loss' in args.plot: - pl = Plotter(f"Time ({downscale_str}s)", 'Power Losses (kW)', 'Power Loss History', \ - OPATH / f'loss.{args.imtype}', \ + pl = Plotter(f"Time ({downscale_str}s)", 'Power Losses (kW)', 'Power Loss History', + OPATH / f'loss.{args.imtype}', uncertainties=args.uncertainties) x, y = zip(*power_manager.loss_history) pl.plot_history(x, y) - pl = Plotter(f"Time ({downscale_str}s)", 'Power Losses (%)', 'Power Loss History', \ - OPATH / f'loss_pct.{args.imtype}', \ + pl = Plotter(f"Time ({downscale_str}s)", 'Power Losses (%)', 'Power Loss History', + OPATH / f'loss_pct.{args.imtype}', uncertainties=args.uncertainties) x, y = zip(*power_manager.loss_history_percentage) pl.plot_history(x, y) @@ -214,7 +221,7 @@ def main(): if cooling_model: ylabel = 'pue' title = 'FMU ' + ylabel + 'History' - pl = Plotter(f"Time ({downscale_str}s)", ylabel, title, OPATH / f'pue.{args.imtype}', \ + pl = Plotter(f"Time ({downscale_str}s)", ylabel, title, OPATH / f'pue.{args.imtype}', uncertainties=args.uncertainties) df = pd.DataFrame(cooling_model.fmu_history) df.to_parquet('cooling_model.parquet', engine='pyarrow') -- GitLab From a175f284269225c0dd18eb8dc2a94169a4bee771 Mon Sep 17 00:00:00 2001 From: Matthias Maiterth Date: Fri, 15 Aug 2025 14:50:42 -0400 Subject: [PATCH 229/388] engine pep8 style --- raps/engine.py | 89 +++++++++++++++++++++++++++++--------------------- 1 file changed, 52 insertions(+), 37 deletions(-) diff --git a/raps/engine.py b/raps/engine.py index aee9080..16eff17 100644 --- a/raps/engine.py +++ b/raps/engine.py @@ -49,14 +49,15 @@ class TickData: class Engine: """Job scheduling simulation engine.""" - def __init__(self, *, power_manager, - flops_manager, - cooling_model=None, - config, - jobs=None, - total_initial_jobs=0, - continuous_workload=None, # Workload class to generate from for continuous generation - **kwargs): + def __init__(self, *, + power_manager, + flops_manager, + cooling_model=None, + config, + jobs=None, + total_initial_jobs=0, + continuous_workload=None, # Workload class to generate from for continuous generation + **kwargs): self.config = config self.down_nodes = summarize_ranges(self.config['DOWN_NODES']) self.resource_manager = ResourceManager( @@ -80,7 +81,8 @@ class Engine: self.continuous_workload = continuous_workload self.output = kwargs.get('output') self.replay = kwargs.get('replay') - self.downscale = kwargs.get('downscale',1) # Factor to downscale the 1s timesteps (power of 10) + # Factor to downscale the 1s timesteps (power of 10): + self.downscale = kwargs.get('downscale', 1) self.simulate_network = kwargs.get('simulate_network') self.sys_util_history = [] self.scheduler_queue_history = [] @@ -111,13 +113,14 @@ class Engine: resource_manager=self.resource_manager, jobs=jobs ) - print(f"Using scheduler: {str(self.scheduler.__class__).split('.')[2]}"\ - f", with policy {self.scheduler.policy} "\ + print(f"Using scheduler: {str(self.scheduler.__class__).split('.')[2]}" + f", with policy {self.scheduler.policy} " f"and backfill {self.scheduler.bfpolicy}") if self.simulate_network: available_nodes = self.resource_manager.available_nodes - self.network_model = NetworkModel(available_nodes=available_nodes,config=config,kwargs=kwargs) + self.network_model = NetworkModel(available_nodes=available_nodes, + config=config, kwargs=kwargs) else: self.network_model = None @@ -174,7 +177,7 @@ class Engine: else: return False - def prepare_timestep(self,*, replay:bool = True, jobs): + def prepare_timestep(self, *, replay: bool = True, jobs): # 1 identify completed jobs # 2 Check continuous job generation # 3 Simulate node failure # Defunct feature! @@ -199,7 +202,7 @@ class Engine: # 2 Check continuous job generation if self.continuous_workload is not None: # Experimental - continuous_job_generation(engine=self,timestep=self.current_time,jobs=jobs) + continuous_job_generation(engine=self, timestep=self.current_time, jobs=jobs) # 3 Simulate node failure if not replay: @@ -210,17 +213,23 @@ class Engine: newly_downed_nodes = [] # 4 Simulate downtime - need_reschedule = self.downtime.check_and_trigger(timestep=self.current_time,engine=self) + need_reschedule = self.downtime.check_and_trigger(timestep=self.current_time, engine=self) # 5 Update active/free nodes based on core/GPU utilization if self.config['multitenant']: - total_cpu_cores = sum(node['total_cpu_cores'] for node in self.resource_manager.nodes) - total_gpu_units = sum(node['total_gpu_units'] for node in self.resource_manager.nodes) - available_cpu_cores = sum(node['available_cpu_cores'] for node in self.resource_manager.nodes) - available_gpu_units = sum(node['available_gpu_units'] for node in self.resource_manager.nodes) - - self.num_free_nodes = len([node for node in self.resource_manager.nodes if not node['is_down'] and node['available_cpu_cores'] == node['total_cpu_cores'] and node['available_gpu_units'] == node['total_gpu_units']]) - self.num_active_nodes = len([node for node in self.resource_manager.nodes if not node['is_down'] and (node['available_cpu_cores'] < node['total_cpu_cores'] or node['available_gpu_units'] < node['total_gpu_units'])]) + # #total_cpu_cores = sum(node['total_cpu_cores'] for node in self.resource_manager.nodes) + # #total_gpu_units = sum(node['total_gpu_units'] for node in self.resource_manager.nodes) + # #available_cpu_cores = sum(node['available_cpu_cores'] for node in self.resource_manager.nodes) + # #available_gpu_units = sum(node['available_gpu_units'] for node in self.resource_manager.nodes) + + self.num_free_nodes = len([node for node in self.resource_manager.nodes if + not node['is_down'] and + node['available_cpu_cores'] == node['total_cpu_cores'] and + node['available_gpu_units'] == node['total_gpu_units']]) + self.num_active_nodes = len([node for node in self.resource_manager.nodes if + not node['is_down'] and + (node['available_cpu_cores'] < node['total_cpu_cores'] + or node['available_gpu_units'] < node['total_gpu_units'])]) # Update system utilization history self.resource_manager.update_system_utilization(self.current_time, self.running) @@ -228,17 +237,17 @@ class Engine: # Whole-node allocator self.num_free_nodes = len(self.resource_manager.available_nodes) self.num_active_nodes = self.config['TOTAL_NODES'] \ - - len(self.resource_manager.available_nodes) \ - - len(self.resource_manager.down_nodes) + - len(self.resource_manager.available_nodes) \ + - len(self.resource_manager.down_nodes) return completed_jobs, newly_downed_nodes, need_reschedule - def complete_timestep(self, autoshutdown, all_jobs:List, jobs:List): + def complete_timestep(self, autoshutdown, all_jobs: List, jobs: List): # 1 update running time of all running jobs # 2 update the current_time of the engine (this serves as reference for most computations) # 3 Check if simulation should shutdown - #update Running time + # update Running time for job in self.running: if job.state == JobState.RUNNING: job.running_time = self.current_time - job.start_time @@ -319,7 +328,8 @@ class Engine: # Simulate network utilization if self.simulate_network: - net_util, net_cong, net_tx, net_rx, max_throughput = self.network_model.simulate_network_utilization(job=job,debug=self.debug) + net_util, net_cong, net_tx, net_rx, max_throughput = \ + self.network_model.simulate_network_utilization(job=job, debug=self.debug) net_utils.append(net_util) net_congs.append(net_cong) @@ -327,14 +337,14 @@ class Engine: net_rx_list.append(net_rx) else: - net_util, net_cong, net_tx, net_rx = 0.0,0.0,0.0,0.0 + net_util, net_cong, net_tx, net_rx = 0.0, 0.0, 0.0, 0.0 max_throughput = 0 net_utils.append(net_util) net_congs.append(net_cong) net_tx_list.append(net_tx) net_rx_list.append(net_rx) - #Apply slowdowns + # Apply slowdowns slowdown_factor = apply_job_slowdown(job=job, max_throughput=max_throughput, net_util=net_util, @@ -393,7 +403,7 @@ class Engine: avg_rx=avg_rx, avg_net=avg_net) else: - avg_tx, avg_rx, avg_net = None,None,None + avg_tx, avg_rx, avg_net = None, None, None # Continue with System Simulation @@ -428,7 +438,7 @@ class Engine: ) return tick_data - def prepare_system_state(self, all_jobs:List, timestep_start, timestep_end, replay:bool): + def prepare_system_state(self, all_jobs: List, timestep_start, timestep_end, replay: bool): # Modifies Jobs object self.current_time = timestep_start @@ -449,7 +459,8 @@ class Engine: self.scheduler.schedule([job], self.running, job.start_time, accounts=self.accounts, sorted=True) self.queue.remove(job) if replay and len(self.queue) != 0: - raise ValueError(f"Something went wrong! Not all jobs could be placed!\nPotential confligt in queue:\n{self.queue}") + raise ValueError("Something went wrong! Not all jobs could be placed!" + "\nPotential confligt in queue:\n{self.queue}") # Restore the target policy and backfill for the remainder of the simulation. self.scheduler.policy = target_policy self.scheduler.bfpolicy = target_bfpolicy @@ -466,7 +477,8 @@ class Engine: if self.debug: print(f"[DEBUG] run_simulation: Initial jobs count: {len(jobs)}") if jobs: - print(f"[DEBUG] run_simulation: First job submit_time: {jobs[0].submit_time}, start_time: {jobs[0].start_time}") + print("[DEBUG] run_simulation: First job submit_time: " + "{jobs[0].submit_time}, start_time: {jobs[0].start_time}") # Place jobs that are currently running, onto the system. self.prepare_system_state(jobs, timestep_start, timestep_end, replay) @@ -485,7 +497,7 @@ class Engine: all_jobs[:] = [job for job in all_jobs if job.submit_time > timestep + batch_window] # 1. Prepare Timestep: - completed_jobs, newly_downed_nodes, need_reschedule = self.prepare_timestep(replay=replay,jobs=jobs) + completed_jobs, newly_downed_nodes, need_reschedule = self.prepare_timestep(replay=replay, jobs=jobs) # 2. Identify eligible jobs and add them to the queue. has_new_additions = self.add_eligible_jobs_to_queue(jobs) @@ -502,7 +514,10 @@ class Engine: # 4. Run tick only at specified time_delta if 0 == (timestep % time_delta) and \ - ((time_delta == 1 and self.current_time % self.config['POWER_UPDATE_FREQ'] == 0) or (time_delta != 1 or self.downscale != 1)): + ((time_delta == 1 and + self.current_time % self.config['POWER_UPDATE_FREQ'] == 0) or + (time_delta != 1 or self.downscale != 1) + ): tick_data = self.tick(time_delta=time_delta) tick_data.completed = completed_jobs tick_data.down_nodes = self.resource_manager.down_nodes @@ -524,7 +539,7 @@ class Engine: def get_scheduler_running_history(self): return self.scheduler_running_history - def record_util_stats(self,*, system_util): + def record_util_stats(self, *, system_util): self.sys_util_history.append((self.current_time, system_util)) self.scheduler_queue_history.append(len(self.running)) self.scheduler_running_history.append(len(self.queue)) @@ -545,5 +560,5 @@ class Engine: # power manager self.power_manager.history.append((self.current_time, total_power_kw)) self.power_manager.loss_history.append((self.current_time, total_loss_kw)) - #engine + # engine self.sys_power = total_power_kw -- GitLab From ac8382767da2b7e9fd81dfbda2c27e87708583b9 Mon Sep 17 00:00:00 2001 From: Wes Brewer Date: Fri, 15 Aug 2025 17:00:34 -0400 Subject: [PATCH 230/388] Get the api client working locally on Linux workstation --- api_client/README.md | 18 +++- api_client/api_client.py | 173 +++++++++++++++++++++++++++++---------- 2 files changed, 148 insertions(+), 43 deletions(-) diff --git a/api_client/README.md b/api_client/README.md index 87f7a79..ac0f028 100644 --- a/api_client/README.md +++ b/api_client/README.md @@ -1,6 +1,20 @@ -API documentation availalbe at: https://exadigit.github.io/SimulationServer/ +API documentation available at: https://exadigit.github.io/SimulationServer/ +# Launch the simulation server + +sudo ./scripts/launch_local.sh + +# Population some initial simulations, e.g., + +python api_client.py run --system frontier --policy default --start 2024-01-01T00:00:00Z --end 2024-01-01T05:00:00Z --scheduler --scheduler-num-jobs 1000 --scheduler-seed 100 --scheduler-jobs-mode random +{'id': '2gr3nqgbmfapvlwhwzizgzbxr4', 'user': 'unknown', 'system': 'frontier', 'state': 'running', 'error_messages': None, 'start': '2024-01-01T00:00:00Z', 'end': '2024-01-01T00:10:00Z', 'execution_start': '2025-08-15T20:22:13.778590Z', 'execution_end': None, 'progress_date': '2024-01-01T00:00:00Z', 'progress': 0.0, 'config': {'start': '2024-01-01T00:00:00Z', 'end': '2024-01-01T00:10:00Z', 'system': 'frontier', 'scheduler': {'enabled': True, 'down_nodes': [], 'jobs_mode': 'random', 'schedule_policy': 'fcfs', 'reschedule': False, 'jobs': None, 'seed': 100, 'num_jobs': 1000}, 'cooling': {'enabled': False}}} + +# List the simulations + +python api_client.py list + +# export BASE_URL="https://myurl.com" -python get_api_token.py +#python get_api_token.py python api_client.py list python api_client.py details --id 5rkkb222xnge7c4ba4oxshqeha diff --git a/api_client/api_client.py b/api_client/api_client.py index c4f513a..f5af1b6 100644 --- a/api_client/api_client.py +++ b/api_client/api_client.py @@ -1,126 +1,217 @@ +#!/usr/bin/env python3 import os import argparse +import json import requests import pandas as pd from dotenv import load_dotenv -# Load environment variables +# ---------------------------------- +# Environment / configuration +# ---------------------------------- load_dotenv() -URL = os.getenv("BASE_URL") -RAPS_URL = os.path.join(URL, "exadigit/api") +# BASE_URL from env if you want, else localhost default +URL = os.getenv("BASE_URL", "http://localhost:8080") +# ---------------------------------- +# Auth / HTTP helpers +# ---------------------------------- def read_token(): - with open('.api-token', 'r') as token_file: - return token_file.read().strip() + token_path = ".api-token" + if os.path.exists(token_path): + try: + with open(token_path, "r") as token_file: + token = token_file.read().strip() + if token: + return token + except OSError as e: + print(f"Warning: Could not read token file: {e}") + # Fallback for localhost or dev use + return "xyz123" + def call_api(endpoint, method="GET", params=None, data=None): - TOKEN = read_token() - url = f"{RAPS_URL}{endpoint}" - headers = {"Authorization": f"Bearer {TOKEN}"} - - response = requests.request(method, url, headers=headers, params=params, json=data) - - if response.status_code == 200: - return response.json() + token = read_token() + url = f"{URL}{endpoint}" + headers = {"Authorization": f"Bearer {token}"} + + try: + resp = requests.request(method, url, headers=headers, params=params, json=data) + except requests.RequestException as e: + print(f"Request error: {e}") + return None + + if resp.status_code == 200: + # handle empty 200 + if not resp.content: + return None + try: + return resp.json() + except ValueError: + print("Error: Response was 200 but not JSON") + return None else: - print(f"Error: {response.status_code} - {response.text}") + print(f"Error: {resp.status_code} - {resp.text}") return None +# ---------------------------------- +# Command handlers +# ---------------------------------- def handle_run(args): - data = {"system": args.system, "policy": args.policy, "parameters": args.parameters} - response = call_api('/simulation/run', method="POST", data=data) + # Build nested payload while omitting keys the user didn’t set + data = { + "start": args.start, + "end": args.end, + "system": args.system, + "policy": args.policy, + "parameters": args.parameters or {}, + } + + scheduler = { + "enabled": args.scheduler_enabled, + "num_jobs": args.scheduler_num_jobs, + "seed": args.scheduler_seed, + "jobs_mode": args.scheduler_jobs_mode, + } + scheduler = {k: v for k, v in scheduler.items() if v is not None} + if scheduler: + data["scheduler"] = scheduler + + cooling = { + "enabled": args.cooling_enabled, + } + cooling = {k: v for k, v in cooling.items() if v is not None} + if cooling: + data["cooling"] = cooling + + response = call_api("/simulation/run", method="POST", data=data) print(response) def handle_list(args): - response = call_api('/simulation/list') + response = call_api("/simulation/list") if response: - results = response.get('results', []) + results = response.get("results", []) + if not results: + print("No simulations found.") + return df = pd.DataFrame(results) - #pd.set_option('display.max_columns', None) - #pd.set_option('display.max_colwidth', None) - #pd.set_option('display.width', None) + # Feel free to uncomment for wider console displays: + # pd.set_option('display.max_columns', None) + # pd.set_option('display.max_colwidth', None) + # pd.set_option('display.width', None) print(df) def handle_simulation_details(args): - response = call_api(f'/simulation/{args.id}') + response = call_api(f"/simulation/{args.id}") print(response) def handle_cooling_cdu(args): - response = call_api(f'/simulation/{args.id}/cooling/cdu') + response = call_api(f"/simulation/{args.id}/cooling/cdu") print(response) def handle_cooling_cep(args): - response = call_api(f'/simulation/{args.id}/cooling/cep') + response = call_api(f"/simulation/{args.id}/cooling/cep") print(response) def handle_scheduler_jobs(args): - response = call_api(f'/simulation/{args.id}/scheduler/jobs') + response = call_api(f"/simulation/{args.id}/scheduler/jobs") print(response) def handle_power_history(args): - response = call_api(f'/simulation/{args.id}/scheduler/jobs/{args.job_id}/power-history') + response = call_api(f"/simulation/{args.id}/scheduler/jobs/{args.job_id}/power-history") print(response) def handle_scheduler_system(args): - response = call_api(f'/simulation/{args.id}/scheduler/system') + response = call_api(f"/simulation/{args.id}/scheduler/system") print(response) def handle_system_info(args): - response = call_api(f'/system-info/{args.system}') + response = call_api(f"/system-info/{args.system}") print(response) -def main(): +# ---------------------------------- +# CLI +# ---------------------------------- +def build_parser(): parser = argparse.ArgumentParser(description="Interact with the SimulationServer REST API.") subparsers = parser.add_subparsers(title="commands", dest="command") - + # Run simulation run_parser = subparsers.add_parser("run", help="Run a simulation.") + + # Top-level options run_parser.add_argument("--system", required=True, help="System to run the simulation on.") run_parser.add_argument("--policy", required=True, help="Policy to use.") - run_parser.add_argument("--parameters", type=dict, default={}, help="Simulation parameters.") + run_parser.add_argument( + "--parameters", + type=json.loads, + default={}, + help='Simulation parameters as JSON, e.g. \'{"alpha":0.1,"beta":"x"}\'', + ) + run_parser.add_argument("--start", required=True, help="ISO time, e.g. 2024-01-01T00:00:00Z") + run_parser.add_argument("--end", required=True, help="ISO time, e.g. 2024-01-01T00:10:00Z") + + # Scheduler group + sched_grp = run_parser.add_argument_group("scheduler options") + sched_grp.add_argument("--scheduler", dest="scheduler_enabled", action="store_true", help="Enable scheduler.") + sched_grp.add_argument("--no-scheduler", dest="scheduler_enabled", action="store_false", help="Disable scheduler.") + sched_grp.set_defaults(scheduler_enabled=None) # omit if unspecified + sched_grp.add_argument("--scheduler-num-jobs", type=int, help="Number of jobs.") + sched_grp.add_argument("--scheduler-seed", type=int, help="Random seed.") + sched_grp.add_argument("--scheduler-jobs-mode", choices=["random", "sequential"], help="Jobs mode.") + + # Cooling group + cool_grp = run_parser.add_argument_group("cooling options") + cool_grp.add_argument("--cooling", dest="cooling_enabled", action="store_true", help="Enable cooling.") + cool_grp.add_argument("--no-cooling", dest="cooling_enabled", action="store_false", help="Disable cooling.") + cool_grp.set_defaults(cooling_enabled=None) # omit if unspecified + run_parser.set_defaults(func=handle_run) - + # List simulations list_parser = subparsers.add_parser("list", help="List all simulations.") list_parser.set_defaults(func=handle_list) - + # Get simulation details details_parser = subparsers.add_parser("details", help="Get details of a simulation.") details_parser.add_argument("--id", required=True, help="Simulation ID.") details_parser.set_defaults(func=handle_simulation_details) - + # Cooling CDU cdu_parser = subparsers.add_parser("cooling-cdu", help="Get cooling CDU data for a simulation.") cdu_parser.add_argument("--id", required=True, help="Simulation ID.") cdu_parser.set_defaults(func=handle_cooling_cdu) - + # Cooling CEP cep_parser = subparsers.add_parser("cooling-cep", help="Get cooling CEP data for a simulation.") cep_parser.add_argument("--id", required=True, help="Simulation ID.") cep_parser.set_defaults(func=handle_cooling_cep) - + # Scheduler jobs jobs_parser = subparsers.add_parser("scheduler-jobs", help="Get scheduler jobs for a simulation.") jobs_parser.add_argument("--id", required=True, help="Simulation ID.") jobs_parser.set_defaults(func=handle_scheduler_jobs) - + # Power history power_parser = subparsers.add_parser("power-history", help="Get power history for a specific job in a simulation.") power_parser.add_argument("--id", required=True, help="Simulation ID.") power_parser.add_argument("--job-id", required=True, help="Job ID.") power_parser.set_defaults(func=handle_power_history) - + # Scheduler system scheduler_parser = subparsers.add_parser("scheduler-system", help="Get scheduler system data for a simulation.") scheduler_parser.add_argument("--id", required=True, help="Simulation ID.") scheduler_parser.set_defaults(func=handle_scheduler_system) - + # System info system_info_parser = subparsers.add_parser("system-info", help="Get system information.") system_info_parser.add_argument("--system", required=True, help="System name.") system_info_parser.set_defaults(func=handle_system_info) - - # Parse and execute + + return parser + +def main(): + parser = build_parser() args = parser.parse_args() if args.command: args.func(args) -- GitLab From bc0d2adbb84ae2d2e9cd74bf51557ce657fe6716 Mon Sep 17 00:00:00 2001 From: Wes Brewer Date: Sat, 16 Aug 2025 12:49:12 -0400 Subject: [PATCH 231/388] Implement space bar pause feature --- raps/engine.py | 68 ++++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 58 insertions(+), 10 deletions(-) diff --git a/raps/engine.py b/raps/engine.py index 6885e50..2bccf80 100644 --- a/raps/engine.py +++ b/raps/engine.py @@ -1,6 +1,11 @@ from typing import Optional, List import dataclasses import pandas as pd +import threading +import sys +import tty +import termios +import time from raps.job import Job, JobState from raps.policy import PolicyType @@ -42,6 +47,37 @@ class TickData: node_occupancy: dict[int, int] +class SimulationState: + def __init__(self): + self.paused = False + self.lock = threading.Lock() + + def toggle_pause(self): + with self.lock: + self.paused = not self.paused + + def is_paused(self): + with self.lock: + return self.paused + +def keyboard_listener(state): + fd = sys.stdin.fileno() + old_settings = termios.tcgetattr(fd) + try: + tty.setcbreak(sys.stdin.fileno()) + while True: + char = sys.stdin.read(1) + if char == ' ': + state.toggle_pause() + if state.is_paused(): + print("\n[PAUSED] Press space to resume.", file=sys.stderr) + else: + print("\n[RESUMED]", file=sys.stderr) + + finally: + termios.tcsetattr(fd, termios.TCSADRAIN, old_settings) + + class Engine: """Job scheduling simulation engine.""" @@ -102,8 +138,8 @@ class Engine: resource_manager=self.resource_manager, jobs=jobs ) - print(f"Using scheduler: {str(self.scheduler.__class__).split('.')[2]}"\ - f", with policy {self.scheduler.policy} "\ + print(f"Using scheduler: {str(self.scheduler.__class__).split('.')[2]}" + f", with policy {self.scheduler.policy} " f"and backfill {self.scheduler.bfpolicy}") if self.simulate_network: @@ -282,9 +318,9 @@ class Engine: else: # if job.state == JobState.RUNNING: # Error checks if job.running_time > job.wall_time: - raise Exception(f"Job should have ended already!\n\ - {job.running_time} > {job.wall_time}\ - ") + raise Exception(f"""Job should have ended already! + {job.running_time} > {job.wall_time} + """) # Aggregate scheduled nodes scheduled_nodes.append(job.scheduled_nodes) @@ -293,7 +329,7 @@ class Engine: cpu_utils.append(cpu_util) # Percentage Utilization! - # Get GPU utilizaiton + # Get GPU utilization gpu_util = get_current_utilization(job.gpu_trace, job) gpu_utils.append(gpu_util) # Percentage Utilization! @@ -301,7 +337,8 @@ class Engine: # Simulate network utilization if self.simulate_network: - net_util, net_cong, net_tx, net_rx, max_throughput = self.network_model.simulate_network_utilization(job=job,debug=self.debug) + net_util, net_cong, net_tx, net_rx, max_throughput = \ + self.network_model.simulate_network_utilization(job=job, debug=self.debug) net_utils.append(net_util) net_congs.append(net_cong) @@ -309,7 +346,7 @@ class Engine: net_rx_list.append(net_rx) else: - net_util, net_cong, net_tx, net_rx = 0.0,0.0,0.0,0.0 + net_util, net_cong, net_tx, net_rx = 0.0, 0.0, 0.0, 0.0 max_throughput = 0 net_utils.append(net_util) net_congs.append(net_cong) @@ -375,7 +412,7 @@ class Engine: avg_rx=avg_rx, avg_net=avg_net) else: - avg_tx, avg_rx, avg_net = None,None,None + avg_tx, avg_rx, avg_net = None, None, None # Continue with System Simulation @@ -459,7 +496,16 @@ class Engine: # Batch Jobs into 6h windows based on submit_time or twice the time_delta if larger batch_window = max(60 * 60 * 6, 2 * time_delta) # at least 6h - for timestep in range(timestep_start, timestep_end): # Runs every seconds! + sim_state = SimulationState() + listener_thread = threading.Thread(target=keyboard_listener, args=(sim_state,), daemon=True) + listener_thread.start() + + timestep = timestep_start + while timestep < timestep_end: # Runs every seconds! + + if sim_state.is_paused(): + time.sleep(0.1) + continue if (timestep % batch_window == 0) or (timestep == timestep_start): # Add jobs that are within the batching window and remove them from all jobs @@ -495,6 +541,8 @@ class Engine: if simulation_done: break yield tick_data + + timestep += 1 def get_job_history_dict(self): return self.job_history_dict -- GitLab From 361a90699ded73ea2303ee4e82d8e75314c04304 Mon Sep 17 00:00:00 2001 From: Wes Brewer Date: Sat, 16 Aug 2025 13:10:14 -0400 Subject: [PATCH 232/388] Add support for shift + to double time_delta and shift - to half it --- raps/engine.py | 40 ++++++++++++++++++++++++++++++++-------- raps/ui.py | 15 +++++++++------ 2 files changed, 41 insertions(+), 14 deletions(-) diff --git a/raps/engine.py b/raps/engine.py index 2bccf80..df3ec32 100644 --- a/raps/engine.py +++ b/raps/engine.py @@ -45,11 +45,13 @@ class TickData: avg_net_util: float slowdown_per_job: float node_occupancy: dict[int, int] + time_delta: int class SimulationState: - def __init__(self): + def __init__(self, time_delta): self.paused = False + self.time_delta = time_delta self.lock = threading.Lock() def toggle_pause(self): @@ -60,6 +62,21 @@ class SimulationState: with self.lock: return self.paused + def speed_up(self): + with self.lock: + self.time_delta *= 2 + print(f"\n[INFO] time_delta increased to {self.time_delta}", file=sys.stderr) + + def slow_down(self): + with self.lock: + if self.time_delta > 1: + self.time_delta //= 2 + print(f"\n[INFO] time_delta decreased to {self.time_delta}", file=sys.stderr) + + def get_time_delta(self): + with self.lock: + return self.time_delta + def keyboard_listener(state): fd = sys.stdin.fileno() old_settings = termios.tcgetattr(fd) @@ -67,12 +84,16 @@ def keyboard_listener(state): tty.setcbreak(sys.stdin.fileno()) while True: char = sys.stdin.read(1) - if char == ' ': + if char == ' ' or char == 'k': state.toggle_pause() if state.is_paused(): - print("\n[PAUSED] Press space to resume.", file=sys.stderr) + print("\n[PAUSED] Press space or k to resume.", file=sys.stderr) else: print("\n[RESUMED]", file=sys.stderr) + elif char == '+': + state.speed_up() + elif char == '-' or char == '_': + state.slow_down() finally: termios.tcsetattr(fd, termios.TCSADRAIN, old_settings) @@ -443,7 +464,8 @@ class Engine: avg_net_rx=avg_rx, avg_net_util=avg_net, slowdown_per_job=0, - node_occupancy=node_occupancy + node_occupancy=node_occupancy, + time_delta=time_delta ) return tick_data @@ -496,7 +518,7 @@ class Engine: # Batch Jobs into 6h windows based on submit_time or twice the time_delta if larger batch_window = max(60 * 60 * 6, 2 * time_delta) # at least 6h - sim_state = SimulationState() + sim_state = SimulationState(time_delta) listener_thread = threading.Thread(target=keyboard_listener, args=(sim_state,), daemon=True) listener_thread.start() @@ -507,6 +529,8 @@ class Engine: time.sleep(0.1) continue + current_time_delta = sim_state.get_time_delta() + if (timestep % batch_window == 0) or (timestep == timestep_start): # Add jobs that are within the batching window and remove them from all jobs jobs += [job for job in all_jobs if job.submit_time <= timestep + batch_window] @@ -529,9 +553,9 @@ class Engine: print(".", end="", flush=True) # 4. Run tick only at specified time_delta - if 0 == (timestep % time_delta) and \ - ((time_delta == 1 and self.current_time % self.config['POWER_UPDATE_FREQ'] == 0) or (time_delta != 1 or self.downscale != 1)): - tick_data = self.tick(time_delta=time_delta) + if 0 == (timestep % current_time_delta) and \ + ((current_time_delta == 1 and self.current_time % self.config['POWER_UPDATE_FREQ'] == 0) or (current_time_delta != 1 or self.downscale != 1)): + tick_data = self.tick(time_delta=current_time_delta) tick_data.completed = completed_jobs else: tick_data = None diff --git a/raps/ui.py b/raps/ui.py index 1849e61..7d1744d 100644 --- a/raps/ui.py +++ b/raps/ui.py @@ -212,7 +212,7 @@ class LayoutManager: # Update the layout self.layout["scheduled"].update(Panel(Align(table, align="center"))) - def update_status(self, time, nrun, nqueue, active_nodes, free_nodes, down_nodes, avg_net_util, slowdown): + def update_status(self, time, nrun, nqueue, active_nodes, free_nodes, down_nodes, avg_net_util, slowdown, time_delta): """ Updates the status information table with the provided system status data. @@ -234,7 +234,7 @@ class LayoutManager: # Define columns with header styles columns = [ "Time", "Jobs Running", "Jobs Queued", - "Active Nodes", "Free Nodes", "Down Nodes"] + "Active Nodes", "Free Nodes", "Down Nodes", "Speed"] if self.simulate_network: columns.extend(("Net Util (%)", "Slowdown per job")) table = Table(header_style="bold magenta", expand=True) @@ -248,7 +248,8 @@ class LayoutManager: str(nqueue), str(active_nodes), str(free_nodes), - str(len(down_nodes)) + str(len(down_nodes)), + f"{time_delta}x" ] if self.simulate_network: row.append(f"{avg_net_util * 100:.0f}%") @@ -496,7 +497,8 @@ class LayoutManager: self.update_scheduled_jobs(data.running + data.queue) self.update_status( data.current_time, len(data.running), len(data.queue), data.num_active_nodes, - data.num_free_nodes, data.down_nodes, data.avg_net_util, data.slowdown_per_job + data.num_free_nodes, data.down_nodes, data.avg_net_util, data.slowdown_per_job, + data.time_delta ) self.update_scheduled_jobs(data.running + data.queue) @@ -509,7 +511,8 @@ class LayoutManager: data.num_free_nodes, data.down_nodes, data.avg_net_util, - data.slowdown_per_job + data.slowdown_per_job, + data.time_delta ) self.update_power_array( @@ -527,7 +530,7 @@ class LayoutManager: #last_i = 0 for i,data in enumerate(self.engine.run_simulation(jobs, timestep_start, timestep_end, time_delta, autoshutdown=True)): if data and (not self.debug and not self.noui): - self.update_full_layout(data,time_delta) + self.update_full_layout(data, time_delta) #self.update_progress_bar(i-last_i) #last_i=i if not self.debug and not self.noui: -- GitLab From b39aae20f6dca009678981c3e57e6690531fc1db Mon Sep 17 00:00:00 2001 From: Matthias Maiterth Date: Mon, 18 Aug 2025 10:14:04 -0400 Subject: [PATCH 233/388] flake8 conform with autopep8 --- hetero-setonix.py | 25 +- main.py | 2 +- multi-part-sim-mpi.py | 46 ++- multi-part-sim.py | 64 ++-- raps/account.py | 34 +- raps/args.py | 8 +- raps/cooling.py | 25 +- raps/dataloaders/adastraMI250.py | 30 +- raps/dataloaders/frontier.py | 45 ++- raps/dataloaders/fugaku.py | 19 +- raps/dataloaders/gcloudv2.py | 78 ++-- raps/dataloaders/lassen.py | 35 +- raps/dataloaders/marconi100.py | 30 +- raps/dataloaders/mit_supercloud/cli.py | 10 +- raps/dataloaders/mit_supercloud/download.py | 33 +- raps/dataloaders/mit_supercloud/loader.py | 127 ++++--- raps/dataloaders/mit_supercloud/utils.py | 8 +- raps/downtime.py | 38 +- raps/flops.py | 13 +- raps/helpers.py | 1 + raps/job.py | 38 +- raps/network.py | 38 +- raps/plotting.py | 55 +-- raps/power.py | 90 ++--- raps/resmgr/__init__.py | 7 +- raps/resmgr/default.py | 63 ++-- raps/resmgr/multitenant.py | 49 +-- raps/schedulers/__init__.py | 3 +- raps/schedulers/default.py | 15 +- raps/schedulers/experimental.py | 46 +-- raps/schedulers/multitenant.py | 34 +- raps/schedulers/replay.py | 9 +- raps/schedulers/scheduleflow.py | 41 +- raps/stats.py | 55 +-- raps/telemetry.py | 120 +++--- raps/utils.py | 37 +- raps/validators.py | 2 +- raps/weather.py | 24 +- raps/workload.py | 393 ++++++++++++-------- 39 files changed, 968 insertions(+), 822 deletions(-) diff --git a/hetero-setonix.py b/hetero-setonix.py index b42dcdb..4d1ee64 100644 --- a/hetero-setonix.py +++ b/hetero-setonix.py @@ -1,20 +1,20 @@ +from raps.utils import convert_to_seconds +from raps.workload import Workload +from raps.scheduler import Scheduler +from raps.power import PowerManager, compute_node_power +from raps.flops import FLOPSManager +from raps.ui import LayoutManager +from raps.config import ConfigManager +import copy +from args import args from raps.helpers import check_python_version check_python_version() -from args import args -import copy args_dict1 = copy.deepcopy(vars(args)) args_dict2 = copy.deepcopy(vars(args)) print(args_dict1) print(args_dict2) -from raps.config import ConfigManager -from raps.ui import LayoutManager -from raps.flops import FLOPSManager -from raps.power import PowerManager, compute_node_power -from raps.scheduler import Scheduler -from raps.workload import Workload -from raps.utils import convert_to_seconds config1 = ConfigManager(system_name='setonix-cpu').get_config() config2 = ConfigManager(system_name='setonix-gpu').get_config() @@ -53,16 +53,17 @@ print(f"Jobs for setonix-gpu: {len(jobs2)}") if args.time: timesteps = convert_to_seconds(args.time) else: - timesteps = 88200 # 24 hours + timesteps = 88200 # 24 hours -if args.verbose: print(jobs) +if args.verbose: + print(jobs) # Create generator objects for both partitions gen1 = layout_manager1.run_stepwise(jobs1, timesteps=timesteps) gen2 = layout_manager2.run_stepwise(jobs2, timesteps=timesteps) # Step through both generators in lockstep -#for _ in range(timesteps): +# for _ in range(timesteps): # next(gen1) # Advance first scheduler # next(gen2) # Advance second scheduler diff --git a/main.py b/main.py index ba8b57f..af1e08b 100644 --- a/main.py +++ b/main.py @@ -87,7 +87,7 @@ def main(): td = Telemetry(**args_dict) jobs, timestep_start, timestep_end, args_from_file = \ - td.load_jobs_times_args_from_files(files=args.replay, args=args) + td.load_jobs_times_args_from_files(files=args.replay, args=args, config=config) # TODO: Merge args and args_from_files? see telemetry.py:97 else: # Synthetic jobs diff --git a/multi-part-sim-mpi.py b/multi-part-sim-mpi.py index e183a7e..bb24a08 100644 --- a/multi-part-sim-mpi.py +++ b/multi-part-sim-mpi.py @@ -6,27 +6,23 @@ power, FLOPS, and scheduling models. Outputs debug and summary stats for heterogeneous systems (e.g., LUMI, Setonix, Adastra). """ +from tqdm import tqdm +from mpi4py import MPI +from raps.utils import convert_to_seconds, next_arrival +from raps.workload import Workload +from raps.telemetry import Telemetry +from raps.power import PowerManager, compute_node_power +from raps.flops import FLOPSManager +from raps.engine import Engine +from raps.ui import LayoutManager +from raps.config import ConfigManager, CONFIG_PATH +from args import args +import random +import os +import glob from raps.helpers import check_python_version check_python_version() -import glob -import os -import random -import sys - -from args import args -from raps.config import ConfigManager, CONFIG_PATH -from raps.schedulers.default import PolicyType -from raps.ui import LayoutManager -from raps.engine import Engine -from raps.flops import FLOPSManager -from raps.power import PowerManager, compute_node_power -from raps.telemetry import Telemetry -from raps.workload import Workload -from raps.utils import convert_to_seconds, next_arrival - -from mpi4py import MPI -from tqdm import tqdm def main(): comm = MPI.COMM_WORLD @@ -51,9 +47,9 @@ def main(): # 4) Each rank decides which partition‐indices it owns (round-robin): local_partition_indices = [i for i in range(len(partition_names)) if (i % size) == rank] - local_partition_names = [partition_names[i] for i in local_partition_indices] - local_configs = [configs[i] for i in local_partition_indices] - local_args_dicts = [args_dicts[i] for i in local_partition_indices] + local_partition_names = [partition_names[i] for i in local_partition_indices] + # local_configs = [configs[i] for i in local_partition_indices] # Unused + # local_args_dicts = [args_dicts[i] for i in local_partition_indices] # Unused # 5) Rank 0 builds (or loads) the entire job list, assigns partitions, groups by partition, # then scatters exactly those jobs to each rank. Other ranks just sit in the scatter: @@ -73,7 +69,7 @@ def main(): if args.arrival == 'poisson': for job in tqdm(jobs_full, desc="[rank 0] Rescheduling arrivals…"): p_name = job['partition'] - p_cfg = configs[partition_names.index(p_name)] + p_cfg = configs[partition_names.index(p_name)] job['requested_nodes'] = None job['submit_time'] = next_arrival(1 / p_cfg['JOB_ARRIVAL_TIME']) @@ -86,7 +82,7 @@ def main(): job['partition'] = random.choices(partition_names, weights=available_nodes, k=1)[0] # --- c) Group “jobs_full” by partition name: - jobs_by_partition = { p: [] for p in partition_names } + jobs_by_partition = {p: [] for p in partition_names} for job in jobs_full: jobs_by_partition[job['partition']].append(job) @@ -103,7 +99,7 @@ def main(): local_jobs = comm.scatter(jobs_for_rank, root=0) # 7) Re‐group each rank’s “local_jobs” into a dict keyed by its local_partition_names: - local_jobs_by_partition = { p: [] for p in local_partition_names } + local_jobs_by_partition = {p: [] for p in local_partition_names} for job in local_jobs: local_jobs_by_partition[job['partition']].append(job) @@ -136,7 +132,7 @@ def main(): timesteps = 88200 # default 24 hours timestep_start = fastforward - timestep_end = timestep_start + timesteps + timestep_end = timestep_start + timesteps # 10) Build a generator for each partition that this rank owns: local_generators = {} diff --git a/multi-part-sim.py b/multi-part-sim.py index 3bc2dc1..024112e 100644 --- a/multi-part-sim.py +++ b/multi-part-sim.py @@ -7,42 +7,39 @@ lockstep. Outputs per-partition performance, utilization, and energy statistics for systems such as MIT Supercloud, Setonix, Adastra, and LUMI. """ +from tqdm import tqdm +from raps.stats import get_engine_stats, get_job_stats, get_scheduler_stats, get_network_stats +from raps.utils import convert_to_seconds, next_arrival +from raps.workload import Workload +from raps.telemetry import Telemetry +from raps.power import PowerManager, compute_node_power +from raps.flops import FLOPSManager +from raps.engine import Engine +from raps.ui import LayoutManager +from raps.config import ConfigManager, CONFIG_PATH +from raps.args import args +import random +import os +import glob from raps.helpers import check_python_version check_python_version() -import glob -import os -import random -import sys - -from raps.args import args -from raps.config import ConfigManager, CONFIG_PATH -from raps.schedulers.default import PolicyType -from raps.ui import LayoutManager -from raps.engine import Engine -from raps.flops import FLOPSManager -from raps.power import PowerManager, compute_node_power -from raps.telemetry import Telemetry -from raps.workload import Workload -from raps.utils import create_casename, convert_to_seconds, next_arrival -from raps.stats import get_engine_stats, get_job_stats, get_scheduler_stats, get_network_stats -from tqdm import tqdm # Load configurations for each partition partition_names = args.partitions print(args.partitions) if '*' in args.partitions[0]: - paths = glob.glob(os.path.join(CONFIG_PATH, args.partitions[0].replace("'",""))) + paths = glob.glob(os.path.join(CONFIG_PATH, args.partitions[0].replace("'", ""))) partition_names = [os.path.join(*p.split(os.sep)[-2:]) for p in paths] args.system = partition_names[0].split(os.sep)[0] configs = [ConfigManager(system_name=partition).get_config() for partition in partition_names] args_dicts = [ - {**vars(args), 'config': config, 'partition': partition_names[i]} - for i, config in enumerate(configs) - ] + {**vars(args), 'config': config, 'partition': partition_names[i]} + for i, config in enumerate(configs) +] # Initialize Workload if args.replay: @@ -51,10 +48,9 @@ if args.replay: t0_by_partition = {} t1_by_partition = {} - if args.replay[0].endswith('.npz'): # snapshot mode: pick the right .npz for each partition - snap_map = { os.path.basename(p): p for p in args.replay } + snap_map = {os.path.basename(p): p for p in args.replay} for ad in args_dicts: part = ad['partition'] # e.g. 'mit_supercloud/part-cpu' short = part.split('/')[-1] # 'part-cpu' @@ -73,9 +69,10 @@ if args.replay: print(f"\n[{part}] loading traces from {args.replay[0]} …") jobs_part, t0, t1 = td.load_data(args.replay) jobs_by_partition[part] = jobs_part - #td.save_snapshot(jobs_part, t0, t1, args_from_file, filename=part.split('/')[-1]) + # td.save_snapshot(jobs_part, t0, t1, args_from_file, filename=part.split('/')[-1]) # Check if args need to be extracted or merged! Not implemented yet! - td.save_snapshot(jobs=jobs_part, timestep_start=t0, timestep_end=t1, filename=part.split('/')[-1],args=args) + td.save_snapshot(jobs=jobs_part, timestep_start=t0, timestep_end=t1, + filename=part.split('/')[-1], args=args) # --- report how many jobs per partition --- for part, jl in jobs_by_partition.items(): @@ -102,7 +99,7 @@ if args.replay: job.submit_time = next_arrival(1 / partition_config['JOB_ARRIVAL_TIME']) else: # Synthetic workload - wl = Workload(args,*configs) + wl = Workload(args, *configs) total_initial_jobs = args.numjobs @@ -116,11 +113,13 @@ for job in jobs: # Initialize layout managers for each partition layout_managers = {} -for i, (config,ad) in enumerate(zip(configs,args_dicts)): +for i, (config, ad) in enumerate(zip(configs, args_dicts)): pm = PowerManager(compute_node_power, **configs[i]) fm = FLOPSManager(**args_dicts[i]) - sc = Engine(power_manager=pm, flops_manager=fm, cooling_model=None, jobs=jobs_by_partition[config['system_name']], total_initial_jobs=total_initial_jobs, **args_dicts[i]) - layout_managers[config['system_name']] = LayoutManager(args.layout, engine=sc, debug=args.debug, args_dict=ad, **config) + sc = Engine(power_manager=pm, flops_manager=fm, cooling_model=None, + jobs=jobs_by_partition[config['system_name']], total_initial_jobs=total_initial_jobs, **args_dicts[i]) + layout_managers[config['system_name']] = LayoutManager( + args.layout, engine=sc, debug=args.debug, args_dict=ad, **config) # Set simulation timesteps if args.fastforward: @@ -141,7 +140,10 @@ else: time_delta = config['TRACE_QUANTA'] # Create generators for each layout manager -generators = {name: lm.run_stepwise(jobs_by_partition[name], timestep_start=timestep_start, timestep_end=timestep_end, time_delta=time_delta) +generators = {name: lm.run_stepwise(jobs_by_partition[name], + timestep_start=timestep_start, + timestep_end=timestep_end, + time_delta=time_delta) for name, lm in layout_managers.items()} # Step through all generators in lockstep @@ -154,7 +156,7 @@ for timestep in range(timesteps): sys_power = 0 for name, lm in layout_managers.items(): sys_util = lm.engine.sys_util_history[-1] if lm.engine.sys_util_history else (0, 0.0) - if hasattr(lm.engine.resource_manager,'allocated_cpu_cores'): + if hasattr(lm.engine.resource_manager, 'allocated_cpu_cores'): allocated_cores = lm.engine.resource_manager.allocated_cpu_cores print(f"[DEBUG] {name} - Timestep {timestep} - Jobs running: {len(lm.engine.running)} -", f"Utilization: {sys_util[1]:.2f}% - Allocated Cores: {allocated_cores} - ", diff --git a/raps/account.py b/raps/account.py index 15863b6..1bdf10f 100644 --- a/raps/account.py +++ b/raps/account.py @@ -94,7 +94,7 @@ class Account: return acct @classmethod - def merge(cls,account1:'Account', account2:'Account') -> 'Account': + def merge(cls, account1: 'Account', account2: 'Account') -> 'Account': """ Destructive merge @@ -140,7 +140,7 @@ class Accounts: self.average_user.energy_allocated = self.all_users.energy_allocated / total_accounts self.average_user.avg_power = self.all_users.avg_power / total_accounts if self.average_user.jobs_completed != 0.0: - self.average_user.update_fugaku_points(self.average_user.energy_allocated,self.average_user.avg_power) + self.average_user.update_fugaku_points(self.average_user.energy_allocated, self.average_user.avg_power) return self def __init__(self, jobs=None): @@ -148,10 +148,10 @@ class Accounts: self.all_users = Account(-2, "All_Users") self.average_user = Account(-1, "Avg_User") if jobs: - if not isinstance(jobs,list): + if not isinstance(jobs, list): raise TypeError for job_dict in jobs: - if not isinstance(job_dict,dict): + if not isinstance(job_dict, dict): raise TypeError if job_dict["account"] not in self.account_dict: self.account_dict[job_dict["account"]] = Account(job_dict["account"], jobs_enqueued=0) @@ -160,7 +160,7 @@ class Accounts: self.update_average_user() pass - def updates_all_users_by_account(self,account:Account): + def updates_all_users_by_account(self, account: Account): self.all_users.jobs_enqueued += account.jobs_enqueued self.all_users.jobs_completed += account.jobs_completed self.all_users.time_allocated += account.time_allocated @@ -168,16 +168,13 @@ class Accounts: self.all_users.avg_power = self.energy_allocated / self.time_allocated self.update_average_user() # Only necessary if averag_user was not updated before calling update all users. # Therefore As this is needed for fugaku points this should always be called. - self.all_users.update_fugaku_points(self.average_user.energy_allocated,self.average_user.avg_power) + self.all_users.update_fugaku_points(self.average_user.energy_allocated, self.average_user.avg_power) - - - def add_account(self, account:Account): + def add_account(self, account: Account): self.account_dict[account.name] = account self.add_user_stats_to_all_users(account) # update_average_user() is already called - @classmethod def from_dict(cls, dictionary): accounts = cls() @@ -219,12 +216,12 @@ class Accounts: account.update_statistics(jobstats, self.average_user) self.account_dict[jobstats.account] = account # Update the summary account (all_users) and the average_user account - self.all_users.update_statistics(jobstats,self.average_user) + self.all_users.update_statistics(jobstats, self.average_user) self.update_average_user() def to_dict(self): acct_dict = {} - for account_name,account in self.account_dict.items(): + for account_name, account in self.account_dict.items(): acct_dict[account_name] = account.to_dict() ret_dict = {} ret_dict['account_dict'] = acct_dict @@ -233,7 +230,7 @@ class Accounts: return ret_dict @classmethod - def merge(cls, accounts1:'Accounts', accounts2:'Accounts') -> 'Accounts': + def merge(cls, accounts1: 'Accounts', accounts2: 'Accounts') -> 'Accounts': """ Destructive merge of accounts """ @@ -242,7 +239,8 @@ class Accounts: for ac2_k, ac2_v in accounts2.account_dict.items(): if ac2_k in accounts1.account_dict: - merged_accounts.account_dict[ac2_k] = Account.merge(accounts1.account_dict[ac2_k], accounts2.account_dict[ac2_k]) + merged_accounts.account_dict[ac2_k] = Account.merge( + accounts1.account_dict[ac2_k], accounts2.account_dict[ac2_k]) else: merged_accounts.account_dict[ac2_k] = ac2_v for ac1_k, ac1_v in accounts1.account_dict.items(): @@ -253,15 +251,17 @@ class Accounts: pass # Update all users -> then update average user -> then fugagku points for all users (order is important!) - merged_accounts.all_users = Account.merge(accounts1.all_users,accounts2.all_users) + merged_accounts.all_users = Account.merge(accounts1.all_users, accounts2.all_users) merged_accounts.update_average_user() # Update to average user is needed before fugaku points can be caluculated. if merged_accounts.all_users.jobs_completed != 0: - merged_accounts.all_users.update_fugaku_points(merged_accounts.average_user.energy_allocated, merged_accounts.average_user.avg_power) + merged_accounts.all_users.update_fugaku_points( + merged_accounts.average_user.energy_allocated, merged_accounts.average_user.avg_power) for ac_k, ac_v in merged_accounts.account_dict.items(): if merged_accounts.account_dict[ac_k].jobs_completed != 0: - merged_accounts.account_dict[ac_k].update_fugaku_points(merged_accounts.average_user.energy_allocated, merged_accounts.average_user.avg_power) + merged_accounts.account_dict[ac_k].update_fugaku_points( + merged_accounts.average_user.energy_allocated, merged_accounts.average_user.avg_power) accounts1 = None accounts2 = None diff --git a/raps/args.py b/raps/args.py index 044881c..95e979f 100644 --- a/raps/args.py +++ b/raps/args.py @@ -1,4 +1,7 @@ -import argparse, os, sys, yaml +import argparse +import os +import sys +import yaml from raps.schedulers.default import PolicyType, BackfillType from raps.workload import add_workload_to_parser, check_workload_args @@ -30,7 +33,7 @@ def apply_config_to_args(cfg, args): else: # Enter the commandline argument, but _underscores as the -dashes # are replaced when reading from the commandline, but not in the yaml. - merged[k.replace('-','_')] = v + merged[k.replace('-', '_')] = v # Apply to argparse namespace for k, v in merged.items(): @@ -200,7 +203,6 @@ parser.add_argument("--maxqueue", type=int, default=50, help="Specify the max queue length for continuous job generation.") - def post_process_args(args): if args.time_delta: tdelta_raw, tdelta_down = convert_to_seconds(args.time_delta) diff --git a/raps/cooling.py b/raps/cooling.py index d5c9248..4bdfd87 100644 --- a/raps/cooling.py +++ b/raps/cooling.py @@ -18,6 +18,7 @@ from datetime import timedelta from raps.policy import PolicyType + def get_matching_variables(variables, pattern): # Regex pattern to match strings containing .summary pattern = re.compile(pattern) @@ -58,8 +59,8 @@ class ThermoFluidsModel: Methods ------- initialize(): - Initializes the FMU by extracting the file, reading the model description, setting up input and output variables, - and preparing the model for simulation. + Initializes the FMU by extracting the file, reading the model description, + setting up input and output variables, and preparing the model for simulation. generate_runtime_values(cdu_power, sc) -> dict: Generates runtime values dynamically for the FMU inputs based on CDU power and other configuration parameters. generate_fmu_inputs(runtime_values: dict, uncertainties: bool = False) -> list: @@ -75,6 +76,7 @@ class ThermoFluidsModel: cleanup(): Cleans up the extracted FMU directory, ensuring no temporary files are left behind. """ + def __init__(self, **config): """ Constructs all the necessary attributes for the ThermoFluidsModel object. @@ -141,15 +143,19 @@ class ThermoFluidsModel: """ # Dynamically generate the power inputs runtime_values = { - f"simulator_1_datacenter_1_computeBlock_{i+1}_cabinet_1_sources_Q_flow_total": cdu_power[i] * self.config['COOLING_EFFICIENCY'] / self.config['RACKS_PER_CDU'] - for i in range(self.config['NUM_CDUS']) + f"simulator_1_datacenter_1_computeBlock_{i + 1}" + f"_cabinet_1_sources_Q_flow_total": cdu_power[i] * + self.config['COOLING_EFFICIENCY'] / self.config['RACKS_PER_CDU'] + for i in range(self.config['NUM_CDUS']) } # Default temperature is from the config temperature = self.config['WET_BULB_TEMP'] # If replay mode is on and weather data is available - if engine.scheduler.policy== PolicyType.REPLAY and self.weather and self.weather.start is not None and self.weather.has_coords: + if engine.scheduler.policy == PolicyType.REPLAY and \ + self.weather and self.weather.start is not None and \ + self.weather.has_coords: # Convert total seconds to timedelta object delta = timedelta(seconds=engine.current_time) target_datetime = self.weather.start + delta @@ -204,7 +210,6 @@ class ThermoFluidsModel: return fmu_inputs - def calculate_pue(self, cooling_input, cooling_output): """ Calculate the Power Usage Effectiveness (PUE) of the data center. @@ -235,7 +240,8 @@ class ThermoFluidsModel: # Get the sum of the work done by all CDU pumps W_CDUPs = sum( - convert_to_watts(cooling_output.get(f'simulator[1].datacenter[1].computeBlock[{idx+1}].cdu[1].summary.W_flow_CDUP_kW')) + convert_to_watts(cooling_output.get( + f'simulator[1].datacenter[1].computeBlock[{idx + 1}].cdu[1].summary.W_flow_CDUP_kW')) for idx in range(self.config['NUM_CDUS']) ) @@ -246,7 +252,8 @@ class ThermoFluidsModel: total_input_power = np.maximum(total_cooling_input_power, 1e-3) # Calculate PUE - pue = (total_input_power + np.sum(W_CDUPs) + np.sum(W_HTWPs) + np.sum(W_CTWPs) + np.sum(W_CTs)) / total_input_power + pue = (total_input_power + np.sum(W_CDUPs) + np.sum(W_HTWPs) + + np.sum(W_CTWPs) + np.sum(W_CTs)) / total_input_power return pue @@ -318,7 +325,7 @@ class ThermoFluidsModel: # Cleanup - at the end of the simulation shutil.rmtree(self.unzipdir, ignore_errors=True) - def simulate_cooling(self,*, rack_power, engine): + def simulate_cooling(self, *, rack_power, engine): cdu_power = rack_power.T[-1] * 1000 runtime_values = self.generate_runtime_values(cdu_power, engine) diff --git a/raps/dataloaders/adastraMI250.py b/raps/dataloaders/adastraMI250.py index 58df564..df186b6 100644 --- a/raps/dataloaders/adastraMI250.py +++ b/raps/dataloaders/adastraMI250.py @@ -1,7 +1,8 @@ """ # get the data - Download `AdastaJobsMI250_15days.parquet` from https://zenodo.org/records/14007065/files/AdastaJobsMI250_15days.parquet + Download `AdastaJobsMI250_15days.parquet` from + https://zenodo.org/records/14007065/files/AdastaJobsMI250_15days.parquet # to simulate the dataset @@ -23,7 +24,7 @@ import pandas as pd from tqdm import tqdm from ..job import job_dict, Job -from ..utils import power_to_utilization, next_arrival_byconfkwargs +from ..utils import next_arrival_byconfkwargs def load_data(jobs_path, **kwargs): @@ -94,10 +95,10 @@ def load_data_from_df(jobs_df: pd.DataFrame, **kwargs): wall_time = int(jobs_df.loc[jidx, 'run_time']) if wall_time <= 0: - print("error wall_time",wall_time) + print("error wall_time", wall_time) continue if nodes_required <= 0: - print("error nodes_required",nodes_required) + print("error nodes_required", nodes_required) continue if validate: @@ -112,11 +113,11 @@ def load_data_from_df(jobs_df: pd.DataFrame, **kwargs): cpu_power = jobs_df.loc[jidx, 'cpu_power_consumption'] cpu_power_array = cpu_power.tolist() cpu_watts = sum(cpu_power_array) / (wall_time * nodes_required) - cpu_min_power = config['POWER_CPU_IDLE'] * config['CPUS_PER_NODE'] - cpu_max_power = config['POWER_CPU_MAX'] * config['CPUS_PER_NODE'] + # cpu_min_power = config['POWER_CPU_IDLE'] * config['CPUS_PER_NODE'] # Unused + # cpu_max_power = config['POWER_CPU_MAX'] * config['CPUS_PER_NODE'] # Unused cpu_util = (cpu_watts / float(config['POWER_CPU_IDLE']) - config['CPUS_PER_NODE']) \ - / ((float(config['POWER_CPU_MAX']) / float(config['POWER_CPU_IDLE'])) - 1.0) + / ((float(config['POWER_CPU_MAX']) / float(config['POWER_CPU_IDLE'])) - 1.0) # power_to_utilization(cpu_power_array, cpu_min_power, cpu_max_power) # print("cpu_watts",cpu_watts,"cpu_util",cpu_util) @@ -131,14 +132,14 @@ def load_data_from_df(jobs_df: pd.DataFrame, **kwargs): cpu_power = cpu_power[:min_length] mem_power = mem_power[:min_length] - gpu_power = (node_power - cpu_power - mem_power \ + gpu_power = (node_power - cpu_power - mem_power - ([config['NICS_PER_NODE'] * config['POWER_NIC']])) gpu_power_array = gpu_power.tolist() gpu_watts = sum(gpu_power_array) / (wall_time * nodes_required) - gpu_min_power = config['POWER_GPU_IDLE'] * config['GPUS_PER_NODE'] - gpu_max_power = config['POWER_GPU_MAX'] * config['GPUS_PER_NODE'] + # gpu_min_power = config['POWER_GPU_IDLE'] * config['GPUS_PER_NODE'] # Unused + # gpu_max_power = config['POWER_GPU_MAX'] * config['GPUS_PER_NODE'] # Unused gpu_util = (gpu_watts / float(config['POWER_GPU_IDLE']) - config['GPUS_PER_NODE']) \ - / ((float(config['POWER_GPU_MAX']) / float(config['POWER_GPU_IDLE'])) - 1.0) + / ((float(config['POWER_GPU_MAX']) / float(config['POWER_GPU_IDLE'])) - 1.0) # power_to_utilization(gpu_power_array, gpu_min_power, gpu_max_power) # print("gpu_watts",gpu_watts,"gpu_util",gpu_util) gpu_trace = np.maximum(0, gpu_util) @@ -163,7 +164,7 @@ def load_data_from_df(jobs_df: pd.DataFrame, **kwargs): diff = start_timestamp - telemetry_start_timestamp start_time = int(diff.total_seconds()) - end_timestamp = jobs_df.loc[jidx,'end_time'] + end_timestamp = jobs_df.loc[jidx, 'end_time'] diff = end_timestamp - telemetry_start_timestamp end_time = int(diff.total_seconds()) @@ -206,6 +207,7 @@ def load_data_from_df(jobs_df: pd.DataFrame, **kwargs): print("jobs not added: ", count_jobs_notOK) return jobs, telemetry_start_time, telemetry_end_time + def xname_to_index(xname: str, config: dict): """ Converts an xname string to an index value based on system configuration. @@ -225,7 +227,8 @@ def xname_to_index(xname: str, config: dict): if row == 6: col -= 9 rack_index = row * 12 + col - node_index = chassis * config['BLADES_PER_CHASSIS'] * config['NODES_PER_BLADE'] + slot * config['NODES_PER_BLADE'] + node + node_index = chassis * config['BLADES_PER_CHASSIS'] * \ + config['NODES_PER_BLADE'] + slot * config['NODES_PER_BLADE'] + node return rack_index * config['SC_SHAPE'][2] + node_index @@ -266,6 +269,7 @@ CDU_NAMES = [ 'x2609c1', ] + def cdu_index_to_name(index: int, config: dict): return CDU_NAMES[index - 1] diff --git a/raps/dataloaders/frontier.py b/raps/dataloaders/frontier.py index 495de2e..ed7798a 100644 --- a/raps/dataloaders/frontier.py +++ b/raps/dataloaders/frontier.py @@ -150,9 +150,9 @@ def load_data_from_df(jobs_df: pd.DataFrame, jobprofile_df: pd.DataFrame, **kwar jobprofile_df = jobprofile_df.sort_values(by='timestamp') jobprofile_df = jobprofile_df.reset_index(drop=True) - #telemetry_start_timestamp = jobs_df['time_snapshot'].min() # Earliets time snapshot within the day! + # telemetry_start_timestamp = jobs_df['time_snapshot'].min() # Earliets time snapshot within the day! telemetry_start_timestamp = jobprofile_df['timestamp'].min() # Earliets time snapshot within the day! - #telemetry_end_timestamp = jobs_df['time_snapshot'].max() # This time has nothing to do with the jobs! + # telemetry_end_timestamp = jobs_df['time_snapshot'].max() # This time has nothing to do with the jobs! telemetry_end_timestamp = jobprofile_df['timestamp'].max() # Earliets time snapshot within the day! # Time that can be simulated # Take earliest time as baseline reference @@ -162,14 +162,15 @@ def load_data_from_df(jobs_df: pd.DataFrame, jobprofile_df: pd.DataFrame, **kwar first_start_timestamp = jobs_df['time_start'].min() diff = first_start_timestamp - telemetry_start_timestamp - first_start = int(diff.total_seconds()) # negative seconds or 0 + # first_start = int(diff.total_seconds()) # negative seconds or 0 # Unused num_jobs = len(jobs_df) if debug: print("num_jobs:", num_jobs) print("telemetry_start:", telemetry_start, "simulation_fin", telemetry_end) - print("telemetry_start_timestamp:", telemetry_start_timestamp, "telemetry_end_timestamp", telemetry_end_timestamp) - print("first_start_timestamp:",first_start_timestamp, "last start timestamp:", jobs_df['time_start'].max()) + print("telemetry_start_timestamp:", telemetry_start_timestamp, + "telemetry_end_timestamp", telemetry_end_timestamp) + print("first_start_timestamp:", first_start_timestamp, "last start timestamp:", jobs_df['time_start'].max()) jobs = [] # Map dataframe to job state. Add results to jobs list @@ -186,21 +187,22 @@ def load_data_from_df(jobs_df: pd.DataFrame, jobprofile_df: pd.DataFrame, **kwar name = encrypt(name) if validate: - cpu_power = jobprofile_df[jobprofile_df['allocation_id'] \ + cpu_power = jobprofile_df[jobprofile_df['allocation_id'] == allocation_id]['mean_node_power'] cpu_trace = cpu_power.values gpu_trace = cpu_trace else: - cpu_power = jobprofile_df[jobprofile_df['allocation_id'] \ + cpu_power = jobprofile_df[jobprofile_df['allocation_id'] == allocation_id]['sum_cpu0_power'] cpu_power_array = cpu_power.values cpu_min_power = nodes_required * config['POWER_CPU_IDLE'] * config['CPUS_PER_NODE'] cpu_max_power = nodes_required * config['POWER_CPU_MAX'] * config['CPUS_PER_NODE'] - cpu_util = power_to_utilization(cpu_power_array, cpu_min_power, cpu_max_power) # Will be negative! as cpu_power_array[i] can be smaller than cpu_min_power + # Will be negative! as cpu_power_array[i] can be smaller than cpu_min_power + cpu_util = power_to_utilization(cpu_power_array, cpu_min_power, cpu_max_power) cpu_trace = cpu_util * config['CPUS_PER_NODE'] - gpu_power = jobprofile_df[jobprofile_df['allocation_id'] \ + gpu_power = jobprofile_df[jobprofile_df['allocation_id'] == allocation_id]['sum_gpu_power'] gpu_power_array = gpu_power.values @@ -229,7 +231,7 @@ def load_data_from_df(jobs_df: pd.DataFrame, jobprofile_df: pd.DataFrame, **kwar end_time = diff.total_seconds() if not start_time <= end_time or np.isnan(end_time): continue # Start_time is not smaller than end_time or is not valid - #Skip entry. + # Skip entry. wall_time = end_time - start_time if np.isnan(wall_time): @@ -250,7 +252,9 @@ def load_data_from_df(jobs_df: pd.DataFrame, jobprofile_df: pd.DataFrame, **kwar trace_start_time = 0 trace_end_time = trace_time else: - print(f"Job: {job_id} {end_state} {start_time} - {end_time},Trace: {trace_start_time} - {trace_end_time} Missing: {missing_trace_time}!") + print(f"Job: {job_id} {end_state} {start_time} - {end_time}, " + f"Trace: {trace_start_time} - {trace_end_time}, " + f"Missing: {missing_trace_time}!") else: trace_missing_values = False @@ -261,7 +265,7 @@ def load_data_from_df(jobs_df: pd.DataFrame, jobprofile_df: pd.DataFrame, **kwar if arrival == 'poisson': # Modify the arrival times of the jobs according to Poisson distribution scheduled_nodes = None - submit_time = next_arrival_byconfkwargs(config,kwargs) + submit_time = next_arrival_byconfkwargs(config, kwargs) start_time = None # ? end_time = None # ? priority = aging_boost(nodes_required) @@ -279,12 +283,18 @@ def load_data_from_df(jobs_df: pd.DataFrame, jobprofile_df: pd.DataFrame, **kwar print("ignoring job b/c zero trace:", jidx, submit_time, start_time, nodes_required) continue # SKIP! if end_time < telemetry_start: - # raise ValueError("Job ends before frist recorded telemetry entry:",job_id, "start:", start_time,"end:",end_time, " Telemetry: ", len(gpu_trace), "entries.") - print("Job ends before frist recorded telemetry entry:",job_id, "start:", start_time,"end:",end_time, " Telemetry: ", len(gpu_trace), "entries.") + # raise ValueError("Job ends before frist recorded telemetry entry:", + # job_id, "start:", start_time,"end:",end_time, + # " Telemetry: ", len(gpu_trace), "entries.") + print("Job ends before frist recorded telemetry entry:", job_id, "start:", + start_time, "end:", end_time, " Telemetry: ", len(gpu_trace), "entries.") continue # SKIP! if start_time > telemetry_end: - # raise ValueError("Job starts after last recorded telemetry entry:",job_id, "start:", start_time,"end:",end_time, " Telemetry: ", len(gpu_trace), "entries.") - print("Job starts after last recorded telemetry entry:",job_id, "start:", start_time,"end:",end_time, " Telemetry: ", len(gpu_trace), "entries.") + # raise ValueError("Job starts after last recorded telemetry entry:", + # job_id, "start:", start_time,"end:",end_time, + # " Telemetry: ", len(gpu_trace), "entries.") + print("Job starts after last recorded telemetry entry:", job_id, "start:", + start_time, "end:", end_time, " Telemetry: ", len(gpu_trace), "entries.") continue # SKIP! if gpu_trace.size > 0 and (jid == job_id or jid == '*'): # and time_submit >= 0: @@ -330,7 +340,8 @@ def xname_to_index(xname: str, config: dict): if row == 6: col -= 9 rack_index = row * 12 + col - node_index = chassis * config['BLADES_PER_CHASSIS'] * config['NODES_PER_BLADE'] + slot * config['NODES_PER_BLADE'] + node + node_index = chassis * config['BLADES_PER_CHASSIS'] * \ + config['NODES_PER_BLADE'] + slot * config['NODES_PER_BLADE'] + node return rack_index * config['SC_SHAPE'][2] + node_index diff --git a/raps/dataloaders/fugaku.py b/raps/dataloaders/fugaku.py index 4e3a1ad..f28ac6b 100644 --- a/raps/dataloaders/fugaku.py +++ b/raps/dataloaders/fugaku.py @@ -17,7 +17,6 @@ import pandas as pd from tqdm import tqdm from ..job import job_dict, Job -from ..utils import next_arrival def load_data(path, **kwargs): @@ -51,10 +50,10 @@ def load_data_from_df(df, **kwargs): int: Telemetry Start (in seconds 0) int: Telemetry End (in seconds) """ - encrypt_bool = kwargs.get('encrypt') - arrival = kwargs.get('arrival') + # encrypt_bool = kwargs.get('encrypt') # Unused + # arrival = kwargs.get('arrival') # Unused validate = kwargs.get('validate') - jid = kwargs.get('jid', '*') + # jid = kwargs.get('jid', '*') # Unused config = kwargs.get('config') job_list = [] @@ -86,7 +85,8 @@ def load_data_from_df(df, **kwargs): else: # cpu_trace = row['perf1'] if 'perf1' in df.columns else 0 # Assuming some performance metric as cpu_trace - cpu_trace = row['perf1'] / (row['perf1'] + row['perf6']) if 'perf1' in df.columns else 0 # Total Opts / Total Ops + Idle Ops + # Total Opts / Total Ops + Idle Ops + cpu_trace = row['perf1'] / (row['perf1'] + row['perf6']) if 'perf1' in df.columns else 0 gpu_trace = 0 # Set to 0 as GPU trace is not explicitly provided # No network trace @@ -98,7 +98,8 @@ def load_data_from_df(df, **kwargs): priority = row['pri'] if 'pri' in df.columns else 0 - submit_timestamp = pd.to_datetime(row['adt']) if 'adt' in df.columns else -1 # Else job was submitted in the past + submit_timestamp = pd.to_datetime(row['adt']) if 'adt' in df.columns else - \ + 1 # Else job was submitted in the past diff = submit_timestamp - telemetry_start_timestamp submit_time = int(diff.total_seconds()) @@ -113,8 +114,8 @@ def load_data_from_df(df, **kwargs): end_time = int(diff.total_seconds()) wall_time = end_time - start_time - #duration = int(row['duration']) if 'duration' in df.columns else 0 # in seconds Recorded duration and wall_time do not match! - #if (wall_time != duration): + # duration = int(row['duration']) if 'duration' in df.columns else 0 + # if (wall_time != duration): # if abs(wall_time - duration) <= 1: # offset is often 1 # wall_time = min(wall_time,duration) # else: @@ -174,4 +175,4 @@ def cdu_index_to_name(index: int, config: dict): def cdu_pos(index: int, config: dict) -> tuple[int, int]: """ Return (row, col) tuple for a cdu index """ - return (0, index) # TODO + return (0, index) # TODO diff --git a/raps/dataloaders/gcloudv2.py b/raps/dataloaders/gcloudv2.py index 61f544e..54dfc1d 100644 --- a/raps/dataloaders/gcloudv2.py +++ b/raps/dataloaders/gcloudv2.py @@ -41,7 +41,7 @@ Downloading Google Cluster Traces v2: gsutil cp gs://clusterdata_2019_a/collection_events-000000000000.json.gz ./google_cluster_data_sample/ gsutil cp gs://clusterdata_2019_a/collection_events-000000000000.parquet.gz ./google_cluster_data_sample/ - # Download the first JSON and Parquet file for instance_events + # Download the first JSON and Parquet file for instance_events gsutil cp gs://clusterdata_2019_a/instance_events-000000000000.json.gz ./google_cluster_data_sample/ gsutil cp gs://clusterdata_2019_a/instance_events-000000000000.parquet.gz ./google_cluster_data_sample/ @@ -61,32 +61,32 @@ Following explanation from Gemini-CLI on how the job nodes required is being com 1. Machine Capacity Determination: * The machine_events data is loaded to get information about the cluster's machines. - * The CPU_capacity and memory_capacity of a typical machine are determined by taking - the mode() (most frequent value) of these columns from the machine_df. This gives + * The CPU_capacity and memory_capacity of a typical machine are determined by taking + the mode() (most frequent value) of these columns from the machine_df. This gives us the standard CPU and memory capacity of a single node in the cluster. 2. Task Resource Request Aggregation: - * The task_events data is loaded, which contains CPU_request and memory_request for + * The task_events data is loaded, which contains CPU_request and memory_request for individual tasks. - * These task requests are then grouped by job_ID, and the CPU_request and memory_request - are summed up for all tasks belonging to the same job. This gives us the total CPU and + * These task requests are then grouped by job_ID, and the CPU_request and memory_request + are summed up for all tasks belonging to the same job. This gives us the total CPU and memory requested by each job. 3. Nodes Required Calculation (CPU and Memory): - * For each job, the total CPU_request is divided by the cpu_capacity of a single machine. - The np.ceil() function is used to round up to the nearest whole number, ensuring that + * For each job, the total CPU_request is divided by the cpu_capacity of a single machine. + The np.ceil() function is used to round up to the nearest whole number, ensuring that enough nodes are allocated to satisfy the CPU demand. This result is stored as nodes_required_cpu. - * Similarly, the total memory_request is divided by the mem_capacity of a single machine, + * Similarly, the total memory_request is divided by the mem_capacity of a single machine, and np.ceil() is applied. This result is stored as nodes_required_mem. 4. Final `nodes_required`: - * The final nodes_required for a job is determined by taking the np.maximum() of nodes_required_cpu - and nodes_required_mem. This ensures that the job is allocated enough nodes to satisfy both its CPU + * The final nodes_required for a job is determined by taking the np.maximum() of nodes_required_cpu + and nodes_required_mem. This ensures that the job is allocated enough nodes to satisfy both its CPU and memory requirements. The result is then cast to an integer (.astype(int)). 5. Filtering: - * Finally, any jobs for which the calculated nodes_required is 0 (meaning they requested no CPU or memory) + * Finally, any jobs for which the calculated nodes_required is 0 (meaning they requested no CPU or memory) are filtered out, as these jobs would not require any nodes in the simulation. """ @@ -150,12 +150,14 @@ V2_COLUMN_NAMES = { } SUPPORTED_EVENT_TYPES = list(V2_COLUMN_NAMES.keys()) + class GoogleClusterV2DataLoader: """ Loader for Google Cluster V2 CSV.GZ files. """ - def __init__(self, base_path: str, event_type: str="job_events", - file_indices: Optional[List[int]]=None, concatenate: bool=True): + + def __init__(self, base_path: str, event_type: str = "job_events", + file_indices: Optional[List[int]] = None, concatenate: bool = True): self.base_path = os.path.expanduser(base_path) if event_type not in SUPPORTED_EVENT_TYPES: raise ValueError(f"Unsupported event type: '{event_type}'") @@ -202,8 +204,8 @@ def load_data(data_path: Union[str, List[str]], **kwargs: Any) -> Tuple[List[Any config = kwargs.get('config') # Unpack list if isinstance(data_path, list): - if len(data_path)==1: - data_path=data_path[0] + if len(data_path) == 1: + data_path = data_path[0] else: raise ValueError(f"Expected single path, got {data_path}") base_path = os.path.expanduser(data_path) @@ -233,7 +235,8 @@ def load_data(data_path: Union[str, List[str]], **kwargs: Any) -> Tuple[List[Any job_resources['nodes_required_cpu'] = np.ceil(job_resources['CPU_request'] / cpu_capacity) job_resources['nodes_required_mem'] = np.ceil(job_resources['memory_request'] / mem_capacity) # The final nodes_required is the maximum of CPU-driven and memory-driven node requirements - job_resources['nodes_required'] = np.maximum(job_resources['nodes_required_cpu'], job_resources['nodes_required_mem']).astype(int) + job_resources['nodes_required'] = np.maximum( + job_resources['nodes_required_cpu'], job_resources['nodes_required_mem']).astype(int) # Create a dictionary for quick lookup of nodes_required by job_ID nodes_required_map = job_resources.set_index('job_ID')['nodes_required'].to_dict() @@ -250,13 +253,13 @@ def load_data(data_path: Union[str, List[str]], **kwargs: Any) -> Tuple[List[Any # Load submit events loader = GoogleClusterV2DataLoader(base_path, event_type="job_events", concatenate=True) df = next(iter(loader)) - for col in ("timestamp","job_ID","event_type"): + for col in ("timestamp", "job_ID", "event_type"): if col not in df.columns: raise ValueError(f"Missing column {col}") - df = df[df["event_type"]==0] - df["timestamp"] = df["timestamp"].astype(float) / 1e6 # convert from microseconds → seconds + df = df[df["event_type"] == 0] + df["timestamp"] = df["timestamp"].astype(float) / 1e6 # convert from microseconds → seconds t0 = df["timestamp"].min() - t1 = df["timestamp"] - t0 + # t1 = df["timestamp"] - t0 # Unused # Get trace quanta trace_quanta = config['TRACE_QUANTA'] @@ -267,37 +270,38 @@ def load_data(data_path: Union[str, List[str]], **kwargs: Any) -> Tuple[List[Any # Convert microseconds → seconds for task usage usage_df["start_time"] = usage_df["start_time"].astype(float) / 1e6 - usage_df["end_time"] = usage_df["end_time" ].astype(float) / 1e6 + usage_df["end_time"] = usage_df["end_time"].astype(float) / 1e6 # Build per-job start and end times (seconds since trace-start) usage_map_start = usage_df.groupby("job_ID")["start_time"].min().to_dict() - usage_map_end = usage_df.groupby("job_ID")["end_time" ].max().to_dict() + usage_map_end = usage_df.groupby("job_ID")["end_time"].max().to_dict() # rename to avg if "CPU_usage_rate" in usage_df.columns: - usage_df.rename(columns={"CPU_usage_rate":"CPU_usage_avg"}, inplace=True) - usage_df["job_ID"] = usage_df["job_ID"].astype(int) + usage_df.rename(columns={"CPU_usage_rate": "CPU_usage_avg"}, inplace=True) + usage_df["job_ID"] = usage_df["job_ID"].astype(int) usage_df["CPU_usage_avg"] = usage_df["CPU_usage_avg"].astype(float) usage_map = usage_df.groupby("job_ID")["CPU_usage_avg"].apply(lambda s: s.to_numpy()).to_dict() - #print(usage_map) + # print(usage_map) # Filter to jobs with usage data AND valid resource requests df = df[df["job_ID"].isin(usage_map) & df["job_ID"].isin(job_resources['job_ID'])] jobs: List[Any] = [] - jid_f = kwargs.get('jid','*') + jid_f = kwargs.get('jid', '*') for _, row in tqdm(df.iterrows(), total=df.shape[0], desc="Loading jobs"): jid = int(row["job_ID"]) start = usage_map_start[jid] - t0 - end = usage_map_end [jid] - t0 - wall = end - start + end = usage_map_end[jid] - t0 + wall = end - start - #nodes_required = int(nodes_required_map.get(jid, 1)) # Default to 1 if not found - nodes_required = int(nodes_required_map.get(jid)) + # nodes_required = int(nodes_required_map.get(jid, 1)) # Default to 1 if not found + # nodes_required = int(nodes_required_map.get(jid)) # Unused - if jid_f!='*' and str(jid)!=str(jid_f): continue + if jid_f != '*' and str(jid) != str(jid_f): + continue trace = usage_map[jid] # ensure gpu_trace is same length as cpu_trace gpu_trace = np.zeros_like(trace, dtype=float) @@ -310,23 +314,23 @@ def load_data(data_path: Union[str, List[str]], **kwargs: Any) -> Tuple[List[Any job_d = job_dict( nodes_required=nr, name=f"job_{jid}", - account=f"user_{row.get('user_name','unknown')}", + account=f"user_{row.get('user_name', 'unknown')}", cpu_trace=trace, gpu_trace=gpu_trace, nrx_trace=[], ntx_trace=[], end_state="UNKNOWN", scheduled_nodes=[], - id=jid, priority=int(row.get('scheduling_class',0)), - #submit_time=row["timestamp"], time_limit=0, + id=jid, priority=int(row.get('scheduling_class', 0)), + # submit_time=row["timestamp"], time_limit=0, submit_time=start, time_limit=0, start_time=start, end_time=end, wall_time=wall, trace_time=row["timestamp"], trace_start_time=start, trace_end_time=end, trace_quanta=trace_quanta ) # Wrap dict in a real Job so telemetry.save_snapshot() can use __dict__ - #if nodes_required > 0: + # if nodes_required > 0: jobs.append(Job(job_d)) # Compute simulation span: start at t=0, end at the latest job finish simulation_start = 0 - simulation_end = int(max(usage_map_end.values()) - t0) + simulation_end = int(max(usage_map_end.values()) - t0) return jobs, simulation_start, simulation_end diff --git a/raps/dataloaders/lassen.py b/raps/dataloaders/lassen.py index 0a1993d..bff9098 100644 --- a/raps/dataloaders/lassen.py +++ b/raps/dataloaders/lassen.py @@ -26,7 +26,7 @@ Usage Instructions: python main.py -f /path/to/LAST/Lassen-Supercomputer-Job-Dataset --system lassen -ff 365d -t 1d # For the network replay this command gives suiteable snapshots: - python main.py -f /path/to/LAST/Lassen-Supercomputer-Job-Dataset --system lassen --policy fcfs --backfill firstfit -t 12h --arrival poisson + python main.py -f /path/to/LAST/Lassen-Supercomputer-Job-Dataset --system lassen --policy fcfs --backfill firstfit -t 12h --arrival poisson # noqa """ import math @@ -46,7 +46,8 @@ def load_data(path, **kwargs): Loads data from the given file paths and returns job info. """ nrows = None - alloc_df = pd.read_csv(os.path.join(path[0], 'final_csm_allocation_history_hashed.csv'), nrows=nrows, low_memory=False) + alloc_df = pd.read_csv(os.path.join( + path[0], 'final_csm_allocation_history_hashed.csv'), nrows=nrows, low_memory=False) node_df = pd.read_csv(os.path.join(path[0], 'final_csm_allocation_node_history.csv'), nrows=nrows, low_memory=False) step_df = pd.read_csv(os.path.join(path[0], 'final_csm_step_history.csv'), nrows=nrows, low_memory=False) return load_data_from_df(alloc_df, node_df, step_df, **kwargs) @@ -63,7 +64,8 @@ def load_data_from_df(allocation_df, node_df, step_df, **kwargs): verbose = kwargs.get('verbose') fastforward = kwargs.get('fastforward') # int in seconds - allocation_df['job_submit_timestamp'] = pd.to_datetime(allocation_df['job_submit_time'], format='mixed', errors='coerce') + allocation_df['job_submit_timestamp'] = pd.to_datetime( + allocation_df['job_submit_time'], format='mixed', errors='coerce') allocation_df['begin_timestamp'] = pd.to_datetime(allocation_df['begin_time'], format='mixed', errors='coerce') allocation_df['end_timestamp'] = pd.to_datetime(allocation_df['end_time'], format='mixed', errors='coerce') @@ -90,8 +92,10 @@ def load_data_from_df(allocation_df, node_df, step_df, **kwargs): simulation_end_timestamp = simulation_start_timestamp + time_to_simulate_timedelta # As these are >1.4M jobs, filtered to the simulated timestamps before creating the job structs. - allocation_df = allocation_df[allocation_df['end_timestamp'] >= simulation_start_timestamp] # Job should not have ended before the simulation time - allocation_df = allocation_df[allocation_df['job_submit_timestamp'] < simulation_end_timestamp] # Job has to have been submited before or during the simulaion time + # Job should not have ended before the simulation time + allocation_df = allocation_df[allocation_df['end_timestamp'] >= simulation_start_timestamp] + # Job has to have been submited before or during the simulaion time + allocation_df = allocation_df[allocation_df['job_submit_timestamp'] < simulation_end_timestamp] job_list = [] @@ -99,7 +103,7 @@ def load_data_from_df(allocation_df, node_df, step_df, **kwargs): account = row['hashed_user_id'] job_id = int(row['primary_job_id']) - allocation_id = row['allocation_id'] + # allocation_id = row['allocation_id'] # Unused nodes_required = row['num_nodes'] end_state = row['exit_status'] name = str(uuid.uuid4())[:6] # This generates a random 6 char identifier.... @@ -140,14 +144,16 @@ def load_data_from_df(allocation_df, node_df, step_df, **kwargs): else: gpu_power = gpu_node_idle_power if gpu_power < gpu_node_idle_power: - # print(gpu_power, gpu_node_idle_power) # Issue: RAPS assumes power is between idle and max, but C-states are not considered! + # print(gpu_power, gpu_node_idle_power) + # Issue: RAPS assumes power is between idle and max, but C-states are not considered! gpu_power = gpu_node_idle_power # Setting to idle as other parts of the sim make this assumption - assert gpu_power >= gpu_node_idle_power, f"{gpu_power} >= {gpu_node_idle_power}" + f" gpu_power = ({gpu_node_energy.sum()} / {nodes_required}) / {wall_time}" + assert gpu_power >= gpu_node_idle_power, f"{gpu_power} >= {gpu_node_idle_power}" + \ + f" gpu_power = ({gpu_node_energy.sum()} / {nodes_required}) / {wall_time}" gpu_min_power = gpu_node_idle_power gpu_max_power = config['POWER_GPU_MAX'] * config['GPUS_PER_NODE'] # power_to_utilization has issues! As it is unclear if gpu_power is for a single gpu or all gpus of a node. # The multiplication by GPUS_PER_NODE fixes this but is patch-work! TODO Refactor and fix - gpu_util = power_to_utilization(gpu_power,gpu_min_power,gpu_max_power) + gpu_util = power_to_utilization(gpu_power, gpu_min_power, gpu_max_power) # gpu_util should to be between 0 an 4 (4 GPUs), where 4 is all GPUs full utilization. gpu_util_scalar = gpu_util * config['GPUS_PER_NODE'] @@ -162,7 +168,8 @@ def load_data_from_df(allocation_df, node_df, step_df, **kwargs): cpu_util = cpu_node_usage.sum() / 10e9 / nodes_required / wall_time / threads_per_core else: cpu_util = 0.0 - assert cpu_util >= 0, f"{cpu_util} = {cpu_node_usage.sum()} / 10e9 / {nodes_required} / {wall_time} / {threads_per_core}" + assert cpu_util >= 0, f"{cpu_util} = {cpu_node_usage.sum()} / 10e9 " \ + f"/ {nodes_required} / {wall_time} / {threads_per_core}" # cpu_util should be between 0 an 2 (2 CPUs) @@ -193,7 +200,7 @@ def load_data_from_df(allocation_df, node_df, step_df, **kwargs): if arrival == 'poisson': # Modify the submit times according to Poisson process scheduled_nodes = None - submit_time = fastforward + next_arrival_byconfkwargs(config,kwargs) + submit_time = fastforward + next_arrival_byconfkwargs(config, kwargs) start_time = submit_time # Pretend Job could start immediately # Alternative: None end_time = submit_time + wall_time # Alternative: None else: # Prescribed replay @@ -275,10 +282,10 @@ def compute_time_offset(begin_time, reference_time): def adjust_bursts(burst_intervals, total, intervals): bursts = burst_intervals / np.sum(burst_intervals) * total bursts = np.round(bursts).astype(int) - adjustment = total - np.sum(bursts) + # adjustment = total - np.sum(bursts) # Unused - ## Distribute adjustment across non-zero elements to avoid negative values - #if adjustment != 0: + # Distribute adjustment across non-zero elements to avoid negative values + # if adjustment != 0: # for i in range(len(bursts)): # if bursts[i] > 0: # bursts[i] += adjustment % (2^64-1) # This can overflow! diff --git a/raps/dataloaders/marconi100.py b/raps/dataloaders/marconi100.py index d4e7b0f..bb345a3 100644 --- a/raps/dataloaders/marconi100.py +++ b/raps/dataloaders/marconi100.py @@ -23,7 +23,6 @@ """ import uuid -import random import numpy as np import pandas as pd from tqdm import tqdm @@ -60,14 +59,14 @@ def load_data_from_df(jobs_df: pd.DataFrame, **kwargs): The list of parsed jobs. """ config = kwargs.get('config') - min_time = kwargs.get('min_time', None) + # min_time = kwargs.get('min_time', None) # Unused arrival = kwargs.get('arrival') validate = kwargs.get('validate') jid = kwargs.get('jid', '*') debug = kwargs.get('debug') - #fastforward = kwargs.get('fastforward') - #if fastforward: + # fastforward = kwargs.get('fastforward') + # if fastforward: # print(f"fast-forwarding {fastforward} seconds") # Sort jobs dataframe based on values in time_start column, adjust indices after sorting @@ -91,8 +90,9 @@ def load_data_from_df(jobs_df: pd.DataFrame, **kwargs): if debug: print("num_jobs:", num_jobs) print("telemetry_start:", telemetry_start, "simulation_fin", telemetry_end) - print("telemetry_start_timestamp:", telemetry_start_timestamp, "telemetry_end_timestamp", telemetry_end_timestamp) - print("first_start_timestamp:",first_start_timestamp, "last start timestamp:", jobs_df['time_start'].max()) + print("telemetry_start_timestamp:", telemetry_start_timestamp, + "telemetry_end_timestamp", telemetry_end_timestamp) + print("first_start_timestamp:", first_start_timestamp, "last start timestamp:", jobs_df['time_start'].max()) jobs = [] @@ -137,8 +137,8 @@ def load_data_from_df(jobs_df: pd.DataFrame, **kwargs): mem_power = mem_power[:min_length] gpu_power = (node_power - cpu_power - mem_power - - ([nodes_required * config['NICS_PER_NODE'] * config['POWER_NIC']] * len(node_power)) - - ([nodes_required * config['POWER_NVME']] * len(node_power))) + - ([nodes_required * config['NICS_PER_NODE'] * config['POWER_NIC']] * len(node_power)) + - ([nodes_required * config['POWER_NVME']] * len(node_power))) gpu_power_array = gpu_power.tolist() gpu_min_power = nodes_required * config['POWER_GPU_IDLE'] * config['GPUS_PER_NODE'] gpu_max_power = nodes_required * config['POWER_GPU_MAX'] * config['GPUS_PER_NODE'] @@ -167,7 +167,7 @@ def load_data_from_df(jobs_df: pd.DataFrame, **kwargs): if arrival == 'poisson': # Modify the arrival times according to Poisson distribution scheduled_nodes = None - submit_time = next_arrival_byconfkwargs(config,kwargs) + submit_time = next_arrival_byconfkwargs(config, kwargs) start_time = None end_time = None else: # Prescribed replay @@ -195,26 +195,24 @@ def load_data_from_df(jobs_df: pd.DataFrame, **kwargs): trace_missing_values = True # What does this do? - #if jid == '*': + # if jid == '*': # # submit_time = max(submit_time.total_seconds(), 0) # submit_timestamp = jobs_df.loc[jidx, 'submit_time'] # diff = submit_timestamp - telemetry_start_timestamp # submit_time = diff.total_seconds() - - #else: + # else: # # When extracting out a single job, run one iteration past the end of the job # submit_time = config['UI_UPDATE_FREQ'] - if gpu_trace.size > 0 and (jid == job_id or jid == '*'): # and time_submit >= 0: job_info = job_dict(nodes_required=nodes_required, name=name, account=account, cpu_trace=cpu_trace, - gpu_trace=gpu_trace, nrx_trace=[],ntx_trace=[], + gpu_trace=gpu_trace, nrx_trace=[], ntx_trace=[], end_state=end_state, scheduled_nodes=scheduled_nodes, - id=job_id, priority=priority,partition=partition, + id=job_id, priority=priority, partition=partition, submit_time=submit_time, time_limit=time_limit, start_time=start_time, end_time=end_time, wall_time=wall_time, trace_time=trace_time, @@ -239,4 +237,4 @@ def cdu_index_to_name(index: int, config: dict): def cdu_pos(index: int, config: dict) -> tuple[int, int]: """ Return (row, col) tuple for a cdu index """ - return (0, index) # TODO + return (0, index) # TODO diff --git a/raps/dataloaders/mit_supercloud/cli.py b/raps/dataloaders/mit_supercloud/cli.py index 9daccf7..0596e5c 100644 --- a/raps/dataloaders/mit_supercloud/cli.py +++ b/raps/dataloaders/mit_supercloud/cli.py @@ -3,6 +3,7 @@ from .download import download from .loader import load_data from .utils import DEFAULT_START, DEFAULT_END + def main(): p = argparse.ArgumentParser(prog="mit_supercloud") subs = p.add_subparsers(dest="cmd", required=True) @@ -18,7 +19,7 @@ def main(): default=DEFAULT_END, help="End datetime, in ISO format (e.g. '2021-05-21T16:45')." ) - common.add_argument("--partition", choices=["all","part-cpu","part-gpu"], default="all") + common.add_argument("--partition", choices=["all", "part-cpu", "part-gpu"], default="all") common.add_argument("--outdir", default="source_data") common.add_argument("--bucket", default="mit-supercloud-dataset") common.add_argument("--prefix", default="datacenter-challenge/202201/") @@ -31,12 +32,13 @@ def main(): pl = subs.add_parser("load", parents=[common], help="Load local data into RAPS") pl.add_argument("path", help="Local data root") pl.set_defaults(func=lambda args: load_data(args.path, - start_date=args.start, - end_date=args.end, - partition=args.partition)) + start_date=args.start, + end_date=args.end, + partition=args.partition)) args = p.parse_args() return args.func(args) + if __name__ == "__main__": main() diff --git a/raps/dataloaders/mit_supercloud/download.py b/raps/dataloaders/mit_supercloud/download.py index 5130849..f46c573 100644 --- a/raps/dataloaders/mit_supercloud/download.py +++ b/raps/dataloaders/mit_supercloud/download.py @@ -20,28 +20,26 @@ Flags: --dry-run # List a sample of files without downloading """ # Suppress urllib3 InsecureRequestWarning -import urllib3 -urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) - -import os -import re -from datetime import datetime - -import pandas as pd -import boto3 -from botocore import UNSIGNED -from botocore.client import Config -from tqdm import tqdm - from .utils import ( load_slurm_log, build_or_load_manifest, - filter_keys_by_jobs + # filter_keys_by_jobs # Defined below! not in utils... ) +from tqdm import tqdm +from botocore.client import Config +from botocore import UNSIGNED +import boto3 +import pandas as pd +from datetime import datetime +import re +import os +import urllib3 +urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) + # Default date window DEFAULT_START = "21052021" -DEFAULT_END = "22052021" +DEFAULT_END = "22052021" def ensure_slurm_log(s3, bucket, key, dest): @@ -109,7 +107,7 @@ def build_manifest(s3, bucket, prefix, manifest_path): print(f"Manifest written to {manifest_path}.") -#def load_manifest(manifest_path): +# def load_manifest(manifest_path): # with open(manifest_path) as f: # return [line.strip() for line in f] @@ -140,7 +138,8 @@ def filter_keys_by_jobs(keys, job_ids): def download_traces(s3, bucket, prefix, outdir, keys, dry_run): if dry_run: print("Dry-run: sample of matching keys:") - for key in keys[:10]: print(" ", key) + for key in keys[:10]: + print(" ", key) return for key in tqdm(keys, desc="Downloading traces"): rel = key[len(prefix):] diff --git a/raps/dataloaders/mit_supercloud/loader.py b/raps/dataloaders/mit_supercloud/loader.py index 3e84efd..cc0c1dc 100644 --- a/raps/dataloaders/mit_supercloud/loader.py +++ b/raps/dataloaders/mit_supercloud/loader.py @@ -110,12 +110,10 @@ Note: To locate the pruning logic, search for the keyword "prune" in the code. import ast import os import math -import numpy as np import pandas as pd import re from tqdm import tqdm -from types import SimpleNamespace from typing import Dict, Union, Optional from collections import Counter @@ -123,7 +121,6 @@ from raps.job import job_dict, Job from raps.utils import summarize_ranges from .utils import proc_cpu_series, proc_gpu_series, to_epoch from .utils import DEFAULT_START, DEFAULT_END -from .utils import validate_job_traces TRES_ID_MAP = { 1: "cpu", @@ -134,8 +131,8 @@ TRES_ID_MAP = { } GREEN = "\033[32m" YELLOW = "\033[33m" -RED = "\033[31m" -RESET = "\033[0m" +RED = "\033[31m" +RESET = "\033[0m" def parse_tres_alloc(tres_str: Union[str, None], @@ -238,7 +235,7 @@ def load_data(local_dataset_path, **kwargs): # date window start_ts = to_epoch(kwargs.get("start", DEFAULT_START)) - end_ts = to_epoch(kwargs.get("end", DEFAULT_END)) + end_ts = to_epoch(kwargs.get("end", DEFAULT_END)) mask = (sl.time_submit >= start_ts) & (sl.time_submit < end_ts) sl = sl[mask] @@ -254,11 +251,11 @@ def load_data(local_dataset_path, **kwargs): # load list of underutilized nodes to ignore pruned = set() with open(os.path.join(NL_PATH, "prune_list.txt")) as pf: - pruned = {l.strip() for l in pf if l.strip()} + pruned = {l_.strip() for l_ in pf if l_.strip()} before_prune = len(sl) # only keep jobs requesting <= 480 nodes - sl = sl[ sl.nodes_alloc <= 480 ] + sl = sl[sl.nodes_alloc <= 480] after_alloc_filter = len(sl) skip_counts['nodes_alloc > 480'] += (before_prune - after_alloc_filter) @@ -299,26 +296,26 @@ def load_data(local_dataset_path, **kwargs): ]) # partition mode - part = kwargs.get("partition","").split("/")[-1].lower() - cpu_only = (part=="part-cpu") - mixed = (part=="part-gpu") + part = kwargs.get("partition", "").split("/")[-1].lower() + cpu_only = (part == "part-cpu") + mixed = (part == "part-gpu") # create nodelist mapping if cpu_only: with open(os.path.join(NL_PATH, "cpu_nodes.txt")) as f: - cpu_nodes = [l.strip() for l in f if l.strip()] + cpu_nodes = [l_.strip() for l_ in f if l_.strip()] cpu_node_to_idx = {h: i for i, h in enumerate(cpu_nodes)} - else: # cpu + gpu + else: # cpu + gpu with open(os.path.join(NL_PATH, "gpu_nodes.txt")) as f: - gpu_nodes = [l.strip() for l in f if l.strip()] + gpu_nodes = [l_.strip() for l_ in f if l_.strip()] gpu_node_to_idx = {h: i for i, h in enumerate(gpu_nodes)} if cpu_only: job_ids = set(sl.id_job) - gpu_jobs - #skip_counts['gpu_job_in_cpu_mode'] += len(set(sl.id_job) & gpu_jobs) + # skip_counts['gpu_job_in_cpu_mode'] += len(set(sl.id_job) & gpu_jobs) elif mixed: job_ids = gpu_jobs & set(sl.id_job) - #skip_counts['cpu_job_in_gpu_mode'] += len(set(sl.id_job) - gpu_jobs) + # skip_counts['cpu_job_in_gpu_mode'] += len(set(sl.id_job) - gpu_jobs) else: job_ids = set(sl.id_job) @@ -328,33 +325,33 @@ def load_data(local_dataset_path, **kwargs): cpu_files = [] cpu_root = os.path.join(data_root, "cpu") if os.path.exists(cpu_root): - for R,_,fs in os.walk(cpu_root): + for R, _, fs in os.walk(cpu_root): for f in fs: if not f.endswith("-timeseries.csv"): continue try: - jid = int(f.split("-",1)[0]) + jid = int(f.split("-", 1)[0]) if jid in job_ids: - cpu_files.append(os.path.join(R,f)) + cpu_files.append(os.path.join(R, f)) except (ValueError, IndexError): continue gpu_files = [] gpu_root = os.path.join(data_root, "gpu") if os.path.exists(gpu_root): - for R,_,fs in os.walk(gpu_root): + for R, _, fs in os.walk(gpu_root): for f in fs: if not f.endswith(".csv"): continue try: - jid = int(f.split("-",1)[0]) + jid = int(f.split("-", 1)[0]) if jid in job_ids: - gpu_files.append(os.path.join(R,f)) + gpu_files.append(os.path.join(R, f)) except (ValueError, IndexError): continue - cpu_ids = {int(os.path.basename(p).split('-',1)[0]) for p in cpu_files} - gpu_ids = {int(os.path.basename(p).split('-',1)[0]) for p in gpu_files} + cpu_ids = {int(os.path.basename(p).split('-', 1)[0]) for p in cpu_files} + gpu_ids = {int(os.path.basename(p).split('-', 1)[0]) for p in gpu_files} all_trace_ids = cpu_ids | gpu_ids print(f"→ {len(cpu_files)} CPU files, {len(gpu_files)} GPU files → {len(all_trace_ids)} jobs with traces") @@ -378,14 +375,12 @@ def load_data(local_dataset_path, **kwargs): print(f" * {overlap_count} jobs have BOTH CPU and GPU traces.") print("----------------------------------------------------\n") - data = {} traced_jobs = all_trace_ids untraced_jobs = job_ids - traced_jobs skip_counts['no_trace_file'] += len(untraced_jobs) - # CPU first for fp in tqdm(cpu_files, desc="Loading CPU traces"): df = pd.read_csv(fp, dtype={0: str}) @@ -407,7 +402,7 @@ def load_data(local_dataset_path, **kwargs): tres_alloc = job_row.get('tres_alloc', 'N/A') tres_alloc_dict = parse_tres_alloc(tres_alloc) rec["tres_alloc_dict"] = tres_alloc_dict - gres_used = job_row.get('gres_used', 'N/A') + # gres_used = job_row.get('gres_used', 'N/A') # Unused tqdm.write(f"Reading CPU {os.path.basename(fp)} for Job ID: {jid}") tqdm.write(f" Start Time: {start_time}, Wall Time: {wall_time}s") @@ -433,7 +428,7 @@ def load_data(local_dataset_path, **kwargs): rec["nodes_alloc"] = int(job_row["nodes_alloc"]) rec["cpu"] = proc_cpu_series(df) - #print(f'{RED}{rec["cpu"]}{RESET}') + # print(f'{RED}{rec["cpu"]}{RESET}') if debug: print(f"GPU candidate files ({len(gpu_files)}):") @@ -442,35 +437,39 @@ def load_data(local_dataset_path, **kwargs): # data from the cpu processes are all stored under the `data` dictionary # according to their respective jid key - #print("******", data.keys()) + # print("******", data.keys()) for fp in tqdm(gpu_files, desc="Loading GPU traces"): if not os.path.exists(fp): - if debug: print(f"{YELLOW}[WARNING] gpu path {fp!r} doesn't exist skipping{RESET}") + if debug: + print(f"{YELLOW}[WARNING] gpu path {fp!r} doesn't exist skipping{RESET}") skip_counts['gpu_path_does_not_exist'] += 1 continue - if debug: tqdm.write(f"Reading GPU {os.path.basename(fp)}") + if debug: + tqdm.write(f"Reading GPU {os.path.basename(fp)}") dfi = pd.read_csv(fp, dtype={0: str}) if "gpu_index" not in dfi.columns: - if debug: tqdm.write("[WARNING] → no gpu_index column! SKIPPING") + if debug: + tqdm.write("[WARNING] → no gpu_index column! SKIPPING") skip_counts['no_gpu_index_column'] += 1 continue jid = int(os.path.basename(fp).split("-", 1)[0]) rec = data.setdefault(jid, {}) cpu_df = rec.get("cpu") - #print(f"{YELLOW}jid={jid} {cpu_df}{RESET}") + # print(f"{YELLOW}jid={jid} {cpu_df}{RESET}") if cpu_df is None: - if debug: tqdm.write(f"{YELLOW}[WARNING] → no cpu trace for gpu! (jid={jid}) SKIPPING{RESET}") + if debug: + tqdm.write(f"{YELLOW}[WARNING] → no cpu trace for gpu! (jid={jid}) SKIPPING{RESET}") skip_counts['no_cpu_trace_for_gpu_job'] += 1 continue gpu_cnt = rec.get("gpu_cnt", 0) gpu_ser, gpu_cnt = proc_gpu_series(cpu_df, dfi, gpu_cnt) - gpu_cnt = data[jid].get("gpu_cnt", 0) + gpu_cnt = data[jid].get("gpu_cnt", 0) prev_gpu = data[jid].get("gpu") gpu_ser, gpu_cnt = proc_gpu_series(cpu_df, dfi, gpu_cnt) if prev_gpu is None: @@ -520,7 +519,7 @@ def load_data(local_dataset_path, **kwargs): config = kwargs.get('config', {}) cpus_per_node = config.get('CPUS_PER_NODE') cores_per_cpu = config.get('CORES_PER_CPU') - gpus_per_node = config.get('GPUS_PER_NODE') + # gpus_per_node = config.get('GPUS_PER_NODE') # Unused quanta = config.get('TRACE_QUANTA') @@ -542,7 +541,7 @@ def load_data(local_dataset_path, **kwargs): skip_counts['final_cpu_none_cpu_only'] += 1 continue cpu_tr = cpu.cpu_utilisation.tolist() - gpu_tr = [0] # Ensure gpu_tr is a list for max() operation + gpu_tr = [0] # Ensure gpu_tr is a list for max() operation t0, t1 = cpu.utime.min(), cpu.utime.max() elif mixed: if cpu is None: @@ -554,7 +553,7 @@ def load_data(local_dataset_path, **kwargs): cpu_tr = cpu.cpu_utilisation.tolist() gpu_tr = gpu t0, t1 = cpu.utime.min(), cpu.utime.max() - else: # not cpu_only or mixed + else: # not cpu_only or mixed skip_counts['final_unhandled_partition'] += 1 continue @@ -577,28 +576,28 @@ def load_data(local_dataset_path, **kwargs): submit_time = rec.get("time_submit", t0) - start_ts current_job_dict = job_dict( - nodes_required = nr, - cpu_cores_required = cpu_cores_req, - gpu_units_required = gpu_units_req, - name = rec.get("name_job", "unknown"), - account = rec.get("id_user", "unknown"), - cpu_trace = cpu_tr, - gpu_trace = gpu_tr, - ntx_trace = [], - nrx_trace = [], - end_state = rec.get("state_end", "unknown"), - id = jid, - scheduled_nodes = rec.get("scheduled_nodes"), - priority = rec.get("priority", 0), - submit_time = submit_time, - time_limit = rec.get("time_limit", 0), - start_time = t0 - start_ts, - end_time = t1 - start_ts, - wall_time = max(0, t1-t0), - trace_time = len(cpu_tr)*quanta, - trace_start_time = 0, - trace_end_time = len(cpu_tr)*quanta, - trace_quanta = quanta + nodes_required=nr, + cpu_cores_required=cpu_cores_req, + gpu_units_required=gpu_units_req, + name=rec.get("name_job", "unknown"), + account=rec.get("id_user", "unknown"), + cpu_trace=cpu_tr, + gpu_trace=gpu_tr, + ntx_trace=[], + nrx_trace=[], + end_state=rec.get("state_end", "unknown"), + id=jid, + scheduled_nodes=rec.get("scheduled_nodes"), + priority=rec.get("priority", 0), + submit_time=submit_time, + time_limit=rec.get("time_limit", 0), + start_time=t0 - start_ts, + end_time=t1 - start_ts, + wall_time=max(0, t1-t0), + trace_time=len(cpu_tr)*quanta, + trace_start_time=0, + trace_end_time=len(cpu_tr)*quanta, + trace_quanta=quanta ) job = Job(current_job_dict) jobs_list.append(job) @@ -606,14 +605,14 @@ def load_data(local_dataset_path, **kwargs): # Calculate min_overall_utime and max_overall_utime telemetry_start = int(sl.time_start.min()) telemetry_end = int(sl.time_end.max()) - #min_overall_utime = int(sl.time_submit.min()) - #max_overall_utime = int(sl.time_submit.max()) + # min_overall_utime = int(sl.time_submit.min()) + # max_overall_utime = int(sl.time_submit.max()) - #args_namespace = SimpleNamespace( + # args_namespace = SimpleNamespace( # fastforward=min_overall_utime, # system='mit_supercloud', # time=max_overall_utime - #) + # ) print("\nSkipped jobs summary:") for reason, count in skip_counts.items(): diff --git a/raps/dataloaders/mit_supercloud/utils.py b/raps/dataloaders/mit_supercloud/utils.py index eec1de0..8d51c48 100644 --- a/raps/dataloaders/mit_supercloud/utils.py +++ b/raps/dataloaders/mit_supercloud/utils.py @@ -8,7 +8,7 @@ from scipy.sparse import csr_matrix as csr from tqdm import tqdm DEFAULT_START = "2021-05-21T00:00" -DEFAULT_END = "2021-05-22T00:00" +DEFAULT_END = "2021-05-22T00:00" def to_epoch(s: str) -> int: @@ -182,7 +182,7 @@ def proc_cpu_series(dfi): def proc_gpu_series(cpu_df, dfi, gpu_cnt): # 1) Build CPU time range t_cpu_start = int(cpu_df.utime.min()) - t_cpu_end = int(cpu_df.utime.max()) + t_cpu_end = int(cpu_df.utime.max()) t_cpu = np.array([t_cpu_start, t_cpu_end, t_cpu_end - t_cpu_start]) # 2) Safely convert the GPU timestamps to integer seconds @@ -203,7 +203,7 @@ def proc_gpu_series(cpu_df, dfi, gpu_cnt): dfi["t_fixed"] = ts_int - ts_int.min() + t_cpu_start # 5) Prepare output DataFrame with a utime column - #ugpus = dfi.gpu_index.unique() + # ugpus = dfi.gpu_index.unique() gpu_df = pd.DataFrame({"utime": cpu_df["utime"].values}) # 6) Interpolate each GPU field onto the CPU utime grid @@ -228,7 +228,7 @@ def proc_gpu_series(cpu_df, dfi, gpu_cnt): ren = { "gpu_index": f"gpu_index_{gpu_cnt}", "utilization_gpu_pct": f"gpu_util_{gpu_cnt}", - "utilization_memory_pct":f"gpu_mempct_{gpu_cnt}", + "utilization_memory_pct": f"gpu_mempct_{gpu_cnt}", "memory_free_MiB": f"gpu_memfree_{gpu_cnt}", "memory_used_MiB": f"gpu_memused_{gpu_cnt}", "temperature_gpu": f"gpu_temp_{gpu_cnt}", diff --git a/raps/downtime.py b/raps/downtime.py index d80aba8..7c5bf1f 100644 --- a/raps/downtime.py +++ b/raps/downtime.py @@ -11,7 +11,7 @@ if TYPE_CHECKING: class Downtime: - def __init__(self,*, + def __init__(self, *, first_downtime, downtime_interval, downtime_length, @@ -20,21 +20,21 @@ class Downtime: if downtime_length == 0 or downtime_interval == 0 or \ downtime_length is None or downtime_interval is None: self.skip = True - self.interval:int = downtime_interval - self.length:int = downtime_length - self.start:int = first_downtime - self.end:int = 0 - self.down:bool = False + self.interval: int = downtime_interval + self.length: int = downtime_length + self.start: int = first_downtime + self.end: int = 0 + self.down: bool = False - def check_and_trigger(self,*, - timestep:int, - engine:Engine + def check_and_trigger(self, *, + timestep: int, + engine: Engine ): if self.skip: return False # Dont simulate downtime if timestep > self.start and not self.down: self.simulate_down(engine=engine) - this_downtime_length = np.random.normal(self.length,30 * 60) # 30 minutes std variance around the downtime + this_downtime_length = np.random.normal(self.length, 30 * 60) # 30 minutes std variance around the downtime self.end = timestep + this_downtime_length self.start = self.start + self.interval # Next start return True # System went down @@ -43,15 +43,15 @@ class Downtime: return True # System went up return False # No change - def simulate_down(self,*, - engine:Engine + def simulate_down(self, *, + engine: Engine ): if args.debug: print("Simulated downtime: before downtime start") print(f"Running: {len(engine.running)}, queued: {len(engine.queue)}") - #engine.resource_manager.down_nodes.update(engine.resource_manager.nodes) # down_nodes are a set - #engine.resource_manager.available_nodes[:] = [] + # engine.resource_manager.down_nodes.update(engine.resource_manager.nodes) # down_nodes are a set + # engine.resource_manager.available_nodes[:] = [] for job in engine.running: job._state = JobState.CANCELLED @@ -71,11 +71,13 @@ class Downtime: print(f"Running: {len(engine.running)}, queued: {len(engine.queue)}") self.down = True - def simulate_up(self,*, - engine:Engine + def simulate_up(self, *, + engine: Engine ): self.down = False - engine.resource_manager.available_nodes[:] = [n['id'] for n in engine.resource_manager.nodes if not n['is_down']] - engine.down_nodes # Careful! these are the down nodes not managed by the resouce manager but given to the engine! + engine.resource_manager.available_nodes[:] = [n['id'] + for n in engine.resource_manager.nodes if not n['is_down']] + engine.down_nodes # Careful! + # these are the down nodes not managed by the resouce manager but given to the engine! engine.resource_manager.down_nodes.clear() engine.resource_manager.down_nodes.update(engine.config["DOWN_NODES"]) # Orig. diff --git a/raps/flops.py b/raps/flops.py index b64c462..1546c52 100644 --- a/raps/flops.py +++ b/raps/flops.py @@ -1,6 +1,7 @@ import numpy as np from .utils import linear_to_3d_index + class FLOPSManager(): def __init__(self, **kwargs): @@ -23,25 +24,25 @@ class FLOPSManager(): if self.validate: # cpu_util is in fact node_Watts in this case total_peak = ( - self.config['CPU_FP_RATIO'] * self.config['CPU_PEAK_FLOPS'] + \ + self.config['CPU_FP_RATIO'] * self.config['CPU_PEAK_FLOPS'] + self.config['GPU_FP_RATIO'] * self.config['GPU_PEAK_FLOPS'] ) denominator = ( - self.config['POWER_CPU_MAX'] * self.config['CPUS_PER_NODE'] + \ - self.config['POWER_GPU_MAX'] * self.config['GPUS_PER_NODE'] + \ - self.config['POWER_NIC'] * self.config['NICS_PER_NODE'] + \ + self.config['POWER_CPU_MAX'] * self.config['CPUS_PER_NODE'] + + self.config['POWER_GPU_MAX'] * self.config['GPUS_PER_NODE'] + + self.config['POWER_NIC'] * self.config['NICS_PER_NODE'] + self.config['POWER_NVME'] ) self.flop_state[node_indices] = total_peak * (cpu_util_flat / denominator) else: self.flop_state[node_indices] = ( - self.config['CPU_FP_RATIO'] * cpu_util_flat * self.config['CPU_PEAK_FLOPS'] + \ + self.config['CPU_FP_RATIO'] * cpu_util_flat * self.config['CPU_PEAK_FLOPS'] + self.config['GPU_FP_RATIO'] * gpu_util_flat * self.config['GPU_PEAK_FLOPS'] ) def get_rpeak(self): node_peak_flops = ( - self.config['CPUS_PER_NODE'] * self.config['CPU_PEAK_FLOPS'] + \ + self.config['CPUS_PER_NODE'] * self.config['CPU_PEAK_FLOPS'] + self.config['GPUS_PER_NODE'] * self.config['GPU_PEAK_FLOPS'] ) system_peak_flops = self.config['AVAILABLE_NODES'] * node_peak_flops diff --git a/raps/helpers.py b/raps/helpers.py index dc3e3ac..0e2e654 100644 --- a/raps/helpers.py +++ b/raps/helpers.py @@ -2,6 +2,7 @@ import sys import tomllib from pathlib import Path + def check_python_version(): # Load pyproject.toml pyproject_path = Path(__file__).parent.parent / "pyproject.toml" diff --git a/raps/job.py b/raps/job.py index 845f928..262371f 100644 --- a/raps/job.py +++ b/raps/job.py @@ -21,7 +21,7 @@ def job_dict(*, nodes_required, name, account, allocated_cpu_cores=0, allocated_gpu_units=0, # Traces cpu_trace, gpu_trace, ntx_trace, nrx_trace, - #Times + # Times submit_time=0, time_limit=0, start_time=0, end_time=0, wall_time=0, trace_time=0, trace_start_time=0, trace_end_time=0, @@ -77,7 +77,7 @@ def dilate_trace(trace, factor): Returns: - list of float: the dilated trace. """ - if trace is None or (isinstance(trace,(list, np.ndarray)) and len(trace) == 0): + if trace is None or (isinstance(trace, (list, np.ndarray)) and len(trace) == 0): return trace # Traces can be list/np.array or single float values. # In case of a single float, we adjust the value directly as it is applied to each timestep @@ -153,16 +153,17 @@ class Job: else: raise ValueError(f"{self.nodes_required} {self.scheduled_nodes}") if self.scheduled_nodes == [] or self.scheduled_nodes is None or \ - (isinstance(self.scheduled_nodes,list) and isinstance(self.scheduled_nodes[0], int)) or \ - (isinstance(self.scheduled_nodes,np.ndarray) and isinstance(self.scheduled_nodes[0], int)): + (isinstance(self.scheduled_nodes, list) and isinstance(self.scheduled_nodes[0], int)) or \ + (isinstance(self.scheduled_nodes, np.ndarray) and isinstance(self.scheduled_nodes[0], int)): pass # Type is ok else: # Type is not as expected! - raise ValueError(f"type: self.scheduled_nodes:{type(self.scheduled_nodes)}, with {type(self.scheduled_nodes[0])}") - assert isinstance(self.submit_time,(int,float)) - assert isinstance(self.wall_time,(int,float,np.int64,np.double)) - assert isinstance(self.start_time,(int,float,np.int64,np.double,type(None))) - assert isinstance(self.end_time,(int,float,np.int64,np.double,type(None))) + raise ValueError( + f"type: self.scheduled_nodes:{type(self.scheduled_nodes)}, with {type(self.scheduled_nodes[0])}") + assert isinstance(self.submit_time, (int, float)) + assert isinstance(self.wall_time, (int, float, np.int64, np.double)) + assert isinstance(self.start_time, (int, float, np.int64, np.double, type(None))) + assert isinstance(self.end_time, (int, float, np.int64, np.double, type(None))) assert self.start_time <= self.end_time, f"{self.start_time} <= {self.end_time}" def __repr__(self): @@ -235,11 +236,10 @@ class Job: self.end_time = self.start_time + self.wall_time - class JobStatistics: """ Reduced class for handling statistics after the job has finished. """ - def __init__(self,job): + def __init__(self, job): self.id = job.id self.name = job.name self.account = job.account @@ -250,42 +250,42 @@ class JobStatistics: self.start_time = job.start_time self.end_time = job.end_time self.state = job._state - if isinstance(job.cpu_trace,list) or isinstance(job.cpu_trace,np.ndarray): + if isinstance(job.cpu_trace, list) or isinstance(job.cpu_trace, np.ndarray): if len(job.cpu_trace) == 0: self.avg_cpu_usage = 0 else: self.avg_cpu_usage = sum(job.cpu_trace) / len(job.cpu_trace) - elif isinstance(job.cpu_trace,int) or isinstance(job.cpu_trace,float): + elif isinstance(job.cpu_trace, int) or isinstance(job.cpu_trace, float): self.avg_cpu_usage = job.cpu_trace else: raise NotImplementedError() - if isinstance(job.gpu_trace,list) or isinstance(job.gpu_trace,np.ndarray): + if isinstance(job.gpu_trace, list) or isinstance(job.gpu_trace, np.ndarray): if len(job.gpu_trace) == 0: self.avg_gpu_usage = 0 else: self.avg_gpu_usage = sum(job.gpu_trace) / len(job.gpu_trace) - elif isinstance(job.gpu_trace,int) or isinstance(job.gpu_trace,float): + elif isinstance(job.gpu_trace, int) or isinstance(job.gpu_trace, float): self.avg_gpu_usage = job.gpu_trace else: raise NotImplementedError() - if isinstance(job.ntx_trace,list) or isinstance(job.ntx_trace,np.ndarray): + if isinstance(job.ntx_trace, list) or isinstance(job.ntx_trace, np.ndarray): if len(job.ntx_trace) == 0: self.avg_ntx_usage = 0 else: self.avg_ntx_usage = sum(job.ntx_trace) / len(job.ntx_trace) - elif isinstance(job.ntx_trace,int) or isinstance(job.ntx_trace,float): + elif isinstance(job.ntx_trace, int) or isinstance(job.ntx_trace, float): self.avg_ntx_usage = job.ntx_trace else: self.avg_ntx_usage = 0 - if isinstance(job.nrx_trace,list) or isinstance(job.nrx_trace,np.ndarray): + if isinstance(job.nrx_trace, list) or isinstance(job.nrx_trace, np.ndarray): if len(job.nrx_trace) == 0: self.avg_nrx_usage = 0 else: self.avg_nrx_usage = sum(job.nrx_trace) / len(job.nrx_trace) - elif isinstance(job.nrx_trace,int) or isinstance(job.nrx_trace,float): + elif isinstance(job.nrx_trace, int) or isinstance(job.nrx_trace, float): self.avg_nrx_usage = job.nrx_trace else: self.avg_nrx_usage = 0 diff --git a/raps/network.py b/raps/network.py index 13308f4..4e8ac49 100644 --- a/raps/network.py +++ b/raps/network.py @@ -34,7 +34,8 @@ class NetworkModel: net_cong = 0 net_tx = 0 net_rx = 0 - max_throughput = self.max_link_bw * job.trace_quanta # self.config.get('TRACE_QUANTA') # Why? What should this be? + # self.config.get('TRACE_QUANTA') # Why? What should this be? + max_throughput = self.max_link_bw * job.trace_quanta if job.nodes_required <= 1: # single node, no network utilization or congestion. @@ -66,9 +67,9 @@ class NetworkModel: host_list.append(dragonfly_node_id_to_host_name(fat_idx, D, A, P)) if debug: print(" dragonfly hosts:", host_list) - ##if len(host_list) <= 1: + # if len(host_list) <= 1: # net_cong = 0.0 - #else: + # else: loads = link_loads_for_job(self.net_graph, host_list, net_tx) # ? Only tx not rx or total net_util) net_cong = worst_link_util(loads, max_throughput) @@ -78,7 +79,7 @@ class NetworkModel: return net_util, net_cong, net_tx, net_rx, max_throughput -def apply_job_slowdown(*,job, max_throughput, net_util, net_cong, net_tx, net_rx, debug: bool = False): +def apply_job_slowdown(*, job, max_throughput, net_util, net_cong, net_tx, net_rx, debug: bool = False): # Get the maximum allowed bandwidth from the configuration. if net_cong > 1: if debug: @@ -105,17 +106,17 @@ def apply_job_slowdown(*,job, max_throughput, net_util, net_cong, net_tx, net_rx return slowdown_factor -def compute_system_network_stats(net_utils,net_tx_list,net_rx_list,slowdown_factors): +def compute_system_network_stats(net_utils, net_tx_list, net_rx_list, slowdown_factors): # Compute network averages n = len(net_utils) or 1 avg_tx = sum(net_tx_list) / n avg_rx = sum(net_rx_list) / n avg_net = sum(net_utils) / n - #avg_slowdown_per_job = sum(slowdown_factors) / n - #self.avg_slowdown_history.append(avg_slowdown_per_job) - #max_slowdown_per_job = max(slowdown_factors) - #self.max_slowdown_history.append(max_slowdown_per_job) + # avg_slowdown_per_job = sum(slowdown_factors) / n + # self.avg_slowdown_history.append(avg_slowdown_per_job) + # max_slowdown_per_job = max(slowdown_factors) + # self.max_slowdown_history.append(max_slowdown_per_job) return avg_tx, avg_rx, avg_net @@ -167,7 +168,7 @@ def build_fattree(k): """ G = nx.Graph() # core - num_core = (k//2)**2 + # num_core = (k//2)**2 # Unused! for i in range(k//2): for j in range(k//2): core = f"c_{i}_{j}" @@ -226,7 +227,8 @@ def link_loads_for_job(G, job_hosts, tx_volume_bytes): per_peer = 0 # find paths where src is the sender for (s, d, p) in paths: - if s != src: continue + if s != src: + continue # add per_peer to every link on p for u, v in zip(p, p[1:]): # ensure ordering matches loads keys @@ -255,10 +257,10 @@ def node_id_to_host_name(node_id: int, k: int) -> str: There are (k^3/4) total hosts, assigned in ascending order across pod → edge → h. """ hosts_per_pod = (k // 2) * (k // 2) # e.g. for k=8, hosts_per_pod = 16 - pod = node_id // hosts_per_pod - offset = node_id % hosts_per_pod - edge = offset // (k // 2) - idx = offset % (k // 2) + pod = node_id // hosts_per_pod + offset = node_id % hosts_per_pod + edge = offset // (k // 2) + idx = offset % (k // 2) return f"h_{pod}_{edge}_{idx}" @@ -329,7 +331,7 @@ def dragonfly_node_id_to_host_name(fat_idx: int, D: int, A: int, P: int) -> str: total_hosts = D * A * P assert 0 <= fat_idx < total_hosts, "fat_idx out of range" - host_offset = fat_idx % P - router_group = (fat_idx // P) % A - pod = fat_idx // (A * P) + host_offset = fat_idx % P + router_group = (fat_idx // P) % A + pod = fat_idx // (A * P) return f"h_{pod}_{router_group}_{host_offset}" diff --git a/raps/plotting.py b/raps/plotting.py index 80bc310..9fee188 100644 --- a/raps/plotting.py +++ b/raps/plotting.py @@ -16,7 +16,6 @@ Plotter import itertools import matplotlib.pyplot as plt -import matplotlib.dates as md import matplotlib.ticker as ticker from matplotlib.ticker import MaxNLocator import time @@ -24,6 +23,7 @@ import numpy as np from uncertainties import unumpy from rich.progress import track + class BasePlotter: """ A base class for setting up and saving plots. @@ -37,6 +37,7 @@ class BasePlotter: title : str The title of the plot. """ + def __init__(self, xlabel, ylabel, title, uncertainties=False): """ Constructs all the necessary attributes for the BasePlotter object. @@ -82,6 +83,7 @@ class BasePlotter: plt.savefig(save_path) plt.close() + class Plotter(BasePlotter): """ A class for creating and saving specific types of plots, such as histories, @@ -92,6 +94,7 @@ class Plotter(BasePlotter): save_path : str The path to save the plot. """ + def __init__(self, xlabel='', ylabel='', title='', save_path='out.svg', uncertainties=False): """ Constructs all the necessary attributes for the Plotter object. @@ -270,7 +273,7 @@ def plot_job_gantt(start_times, end_times, node_counts): alpha=0.8 ) - #for y, (s, e, n) in enumerate(zip(start_times, end_times, node_counts)): + # for y, (s, e, n) in enumerate(zip(start_times, end_times, node_counts)): # plt.barh(y, width=e - s, left=s, height=0.8, # color='yellow', edgecolor='black', alpha=0.8) # # Optionally place the node count label in the middle of the bar @@ -302,7 +305,7 @@ def plot_network_histogram(*, ax, data, bins=50, save_path='network_histogram.pn ax.yscale('log') # force scientific notation on x-axis - ax.ticklabel_format(style='scientific', axis='x', scilimits=(0,0)) + ax.ticklabel_format(style='scientific', axis='x', scilimits=(0, 0)) ax.xlabel('Network Traffic per Job (bytes)') ax.ylabel('Frequency') @@ -324,10 +327,10 @@ def spaced_colors(n, cmap_name='nipy_spectral'): return [cmap(v) for v in values] -def plot_jobs_gantt(*,ax=None,jobs, bars_are_node_sized): - jobs.sort(key=lambda x:x.submit_time) +def plot_jobs_gantt(*, ax=None, jobs, bars_are_node_sized): + jobs.sort(key=lambda x: x.submit_time) if ax is None: - ax = plt.figure(figsize=(10,4)) + ax = plt.figure(figsize=(10, 4)) # Submit_time and Wall_time submit_t = [x.submit_time for x in jobs] duration = [x.wall_time for x in jobs] @@ -337,34 +340,34 @@ def plot_jobs_gantt(*,ax=None,jobs, bars_are_node_sized): offset = 0 for i in track(range(len(jobs)), description="Collecting information to plot"): if bars_are_node_sized: - ax.barh(offset + nodes_required[i] / 2,duration[i], height=nodes_required[i], left=submit_t[i]) + ax.barh(offset + nodes_required[i] / 2, duration[i], height=nodes_required[i], left=submit_t[i]) offset += nodes_required[i] else: ax.barh(i, duration[i], height=1.0, left=submit_t[i], color=colors[i]) print("Plotting") ax.set_ylabel("Job ID") - ##ax_b labels: + # ax_b labels: ax.set_xlabel("time [hh:mm]") minx_s = min([x.submit_time for x in jobs]) maxx_s = np.ceil(max([x.wall_time for x in jobs]) + max([x.submit_time for x in jobs])) x_label_mins = [int(n) for n in np.arange(minx_s // 60, maxx_s // 60)] x_label_ticks = [n * 60 for n in x_label_mins[0::60]] x_label_str = [str(x1).zfill(2) + ":" + str(x2).zfill(2) for - (x1,x2) in [(n // 60,n % 60) for - n in x_label_mins[0::60]]] + (x1, x2) in [(n // 60, n % 60) for + n in x_label_mins[0::60]]] - ax.set_xticks(x_label_ticks,x_label_str) - #ax.yaxis.set_inverted(True) + ax.set_xticks(x_label_ticks, x_label_str) + # ax.yaxis.set_inverted(True) return ax -def plot_nodes_gantt(*,ax=None,jobs): +def plot_nodes_gantt(*, ax=None, jobs): if ax is None: - ax = plt.figure(figsize=(10,4)) + ax = plt.figure(figsize=(10, 4)) # Submit_time and Wall_time duration = [x.wall_time for x in jobs] - #nodes_required = [x['nodes_required'] for x in jobs] + # nodes_required = [x['nodes_required'] for x in jobs] start_t = [x.start_time for x in jobs] nodeIDs = [x.scheduled_nodes for x in jobs] @@ -375,28 +378,28 @@ def plot_nodes_gantt(*,ax=None,jobs): print("Plotting") ax.set_ylabel("Node ID") - ##ax_b labels: + # ax_b labels: ax.set_xlabel("time [hh:mm]") - minx_s = min([x.submit_time for x in jobs]) - maxx_s = np.ceil(max([x.wall_time for x in jobs]) + max([x.submit_time for x in jobs])) - #ax.xaxis.set_major_formatter(md.DateFormatter('%H:%M:%S')) + # minx_s = min([x.submit_time for x in jobs]) # Unused + # maxx_s = np.ceil(max([x.wall_time for x in jobs]) + max([x.submit_time for x in jobs])) # Unused + # ax.xaxis.set_major_formatter(md.DateFormatter('%H:%M:%S')) formatter = ticker.FuncFormatter(lambda s, x: time.strftime('%m-%d %H:%M:%S', time.gmtime(s))) ax.xaxis.set_major_formatter(formatter) ax.yaxis.set_major_locator(MaxNLocator(integer=True)) - #x_label_mins = [int(n) for n in np.arange(minx_s // 60, maxx_s // 60)] - #x_label_ticks = [n * 60 for n in x_label_mins[0::60]] - #x_label_str = [str(x1).zfill(2) + ":" + str(x2).zfill(2) for + # x_label_mins = [int(n) for n in np.arange(minx_s // 60, maxx_s // 60)] + # x_label_ticks = [n * 60 for n in x_label_mins[0::60]] + # x_label_str = [str(x1).zfill(2) + ":" + str(x2).zfill(2) for # (x1,x2) in [(n // 60,n % 60) for # n in x_label_mins[0::60]]] - #ax.set_xticks(x_label_ticks,x_label_str) - ax.set_ylim(1,max(list(itertools.chain.from_iterable(nodeIDs)))) - #ax.yaxis.set_inverted(True) + # ax.set_xticks(x_label_ticks,x_label_str) + ax.set_ylim(1, max(list(itertools.chain.from_iterable(nodeIDs)))) + # ax.yaxis.set_inverted(True) return ax if __name__ == "__main__": plotter = Plotter() - #plotter.plot_history([1, 2, 3, 4]) + # plotter.plot_history([1, 2, 3, 4]) diff --git a/raps/power.py b/raps/power.py index 171ae93..188b150 100644 --- a/raps/power.py +++ b/raps/power.py @@ -8,14 +8,14 @@ Classes: Functions: - compute_loss: Linear loss model - compute_node_power: Calculate the total power consumption for given CPU and GPU utilization. -- compute_node_power_validate: Calculate the total power consumption for a given mean and standard deviation of node power. +- compute_node_power_validate: Calculate the total power consumption for + a given mean and standard deviation of node power. """ import numpy as np import pandas as pd import uncertainties as uf from .utils import linear_to_3d_index -from .validators import recompute_power def custom_str_uncertainties(self): @@ -30,7 +30,7 @@ def custom_format_uncertainties(self, fmt_spec): return f"{self.nominal_value:{fmt_spec}} ±{self.std_dev:{fmt_spec}}" -#In stats unicode is printed as unocde abbreviation! To be fixed! +# In stats unicode is printed as unocde abbreviation! To be fixed! uf.Variable.__str__ = custom_str_uncertainties uf.Variable.__repr__ = custom_repr_uncertainties uf.Variable.__format__ = custom_format_uncertainties @@ -50,26 +50,26 @@ def compute_node_power(cpu_util, gpu_util, net_util, config): :return: Total power consumption after accounting for power loss. """ power_cpu = cpu_util * config['POWER_CPU_MAX'] + \ - (config['CPUS_PER_NODE'] - cpu_util) * config['POWER_CPU_IDLE'] + (config['CPUS_PER_NODE'] - cpu_util) * config['POWER_CPU_IDLE'] power_gpu = gpu_util * config['POWER_GPU_MAX'] + \ - (config['GPUS_PER_NODE'] - gpu_util) * config['POWER_GPU_IDLE'] + (config['GPUS_PER_NODE'] - gpu_util) * config['POWER_GPU_IDLE'] try: power_nic = config['POWER_NIC_IDLE'] + \ - (config['POWER_NIC_MAX'] - config['POWER_NIC_IDLE']) * net_util - except: + (config['POWER_NIC_MAX'] - config['POWER_NIC_IDLE']) * net_util + except KeyError: if isinstance(net_util, np.ndarray): power_nic = config['POWER_NIC'] * np.ones(net_util.shape) else: power_nic = config['POWER_NIC'] power_total = power_cpu + power_gpu + config['POWER_MEM'] + \ - config['NICS_PER_NODE'] * power_nic + config['POWER_NVME'] + config['NICS_PER_NODE'] * power_nic + config['POWER_NVME'] # Apply power loss due to Sivoc and Rectifier - power_with_sivoc_loss = compute_loss(power_total, config['SIVOC_LOSS_CONSTANT'], \ - config['SIVOC_EFFICIENCY']) + power_with_sivoc_loss = compute_loss(power_total, config['SIVOC_LOSS_CONSTANT'], + config['SIVOC_EFFICIENCY']) power_sivoc_loss_only = power_with_sivoc_loss - power_total return power_with_sivoc_loss, power_sivoc_loss_only @@ -85,18 +85,19 @@ def compute_node_power_uncertainties(cpu_util, gpu_util, net_util, config): :return: Total power consumption after accounting for power loss. """ power_cpu = cpu_util \ - * uf.ufloat(config['POWER_CPU_MAX'], config['POWER_CPU_MAX'] * config['POWER_CPU_UNCERTAINTY']) \ - + (config['CPUS_PER_NODE'] - cpu_util) \ - * uf.ufloat(config['POWER_CPU_IDLE'], config['POWER_CPU_IDLE'] * config['POWER_CPU_UNCERTAINTY']) + * uf.ufloat(config['POWER_CPU_MAX'], config['POWER_CPU_MAX'] * config['POWER_CPU_UNCERTAINTY']) \ + + (config['CPUS_PER_NODE'] - cpu_util) \ + * uf.ufloat(config['POWER_CPU_IDLE'], config['POWER_CPU_IDLE'] * config['POWER_CPU_UNCERTAINTY']) power_gpu = gpu_util \ - * uf.ufloat(config['POWER_GPU_MAX'], config['POWER_GPU_MAX'] * config['POWER_GPU_UNCERTAINTY']) \ - + (config['GPUS_PER_NODE'] - gpu_util) \ - * uf.ufloat(config['POWER_GPU_IDLE'], config['POWER_GPU_IDLE'] * config['POWER_GPU_UNCERTAINTY']) + * uf.ufloat(config['POWER_GPU_MAX'], config['POWER_GPU_MAX'] * config['POWER_GPU_UNCERTAINTY']) \ + + (config['GPUS_PER_NODE'] - gpu_util) \ + * uf.ufloat(config['POWER_GPU_IDLE'], config['POWER_GPU_IDLE'] * config['POWER_GPU_UNCERTAINTY']) power_total = power_cpu + power_gpu \ - + uf.ufloat(config['POWER_MEM'], config['POWER_MEM'] * config['POWER_MEM_UNCERTAINTY']) \ - + config['NICS_PER_NODE'] * uf.ufloat(config['POWER_NIC'], config['POWER_NIC'] * config['POWER_NIC_UNCERTAINTY']) \ - + uf.ufloat(config['POWER_NVME'], config['POWER_NVME'] * config['POWER_NVME_UNCERTAINTY']) + + uf.ufloat(config['POWER_MEM'], config['POWER_MEM'] * config['POWER_MEM_UNCERTAINTY']) \ + + config['NICS_PER_NODE'] \ + * uf.ufloat(config['POWER_NIC'], config['POWER_NIC'] * config['POWER_NIC_UNCERTAINTY']) \ + + uf.ufloat(config['POWER_NVME'], config['POWER_NVME'] * config['POWER_NVME_UNCERTAINTY']) # Apply power loss due to Sivoc and Rectifier power_with_sivoc_loss = compute_loss(power_total, config['SIVOC_LOSS_CONSTANT'], config['SIVOC_EFFICIENCY']) @@ -188,7 +189,7 @@ class PowerManager: self.history = [] self.loss_history = [] self.uncertainties = False - if power_func in [compute_node_power_uncertainties, \ + if power_func in [compute_node_power_uncertainties, compute_node_power_validate_uncertainties]: self.uncertainties = True if self.down_nodes: @@ -196,13 +197,14 @@ class PowerManager: def get_peak_power(self): """Estimate peak power of system for setting max value of gauges in dashboard""" - node_power = compute_node_power(self.config['CPUS_PER_NODE'], self.config['GPUS_PER_NODE'], net_util=0, config=self.config)[0] + node_power = compute_node_power(self.config['CPUS_PER_NODE'], + self.config['GPUS_PER_NODE'], net_util=0, config=self.config)[0] blades_per_rectifier = self.config['BLADES_PER_CHASSIS'] / self.config['RECTIFIERS_PER_CHASSIS'] rectifier_load = blades_per_rectifier * self.config['NODES_PER_BLADE'] * node_power - rectifier_power = compute_loss(rectifier_load, self.config['RECTIFIER_LOSS_CONSTANT'], \ + rectifier_power = compute_loss(rectifier_load, self.config['RECTIFIER_LOSS_CONSTANT'], self.config['RECTIFIER_EFFICIENCY']) # with AC-DC conversion losses chassis_power = self.config['BLADES_PER_CHASSIS'] * rectifier_power / blades_per_rectifier \ - + self.config['SWITCHES_PER_CHASSIS'] * self.config['POWER_SWITCH'] + + self.config['SWITCHES_PER_CHASSIS'] * self.config['POWER_SWITCH'] rack_power = chassis_power * self.config['CHASSIS_PER_RACK'] total_power = rack_power * self.config['NUM_RACKS'] + self.config['POWER_CDU'] * self.config['NUM_CDUS'] return total_power @@ -224,10 +226,10 @@ class PowerManager: # approximate by scaling up to number of rectifiers, applying loss # and then dividing by number of rectifiers. # For Frontier there are four nodes per rectifier. - power_with_loss = compute_loss(initial_power * self.config['NODES_PER_RECTIFIER'], \ - self.config['RECTIFIER_LOSS_CONSTANT'], \ + power_with_loss = compute_loss(initial_power * self.config['NODES_PER_RECTIFIER'], + self.config['RECTIFIER_LOSS_CONSTANT'], self.config['RECTIFIER_EFFICIENCY']) \ - / self.config['NODES_PER_RECTIFIER'] + / self.config['NODES_PER_RECTIFIER'] return np.full(self.sc_shape, power_with_loss) def apply_down_nodes(self): @@ -284,7 +286,6 @@ class PowerManager: self.sivoc_loss[node_indices] = sivoc_loss return power_value[np.cumsum(job_lengths) - 1] - def calculate_rectifiers_needed(self, power_state_summed): """ Calculate the number of rectifiers needed based on the total power consumption. @@ -338,9 +339,10 @@ class PowerManager: num_rectifiers = num_rectifiers_array[i, j, k] power_per_rectifier = chassis_power[i, j, k] / num_rectifiers rectifier_power[i, j, k, :num_rectifiers] = power_per_rectifier - power_with_losses[i, j, k, :num_rectifiers] = compute_loss(power_per_rectifier, \ - self.config['RECTIFIER_LOSS_CONSTANT'], \ - self.config['RECTIFIER_EFFICIENCY']) + power_with_losses[i, j, k, :num_rectifiers] = \ + compute_loss(power_per_rectifier, + self.config['RECTIFIER_LOSS_CONSTANT'], + self.config['RECTIFIER_EFFICIENCY']) rectifier_power = np.nan_to_num(rectifier_power) power_with_losses = np.nan_to_num(power_with_losses) @@ -348,8 +350,8 @@ class PowerManager: else: divisor = np.array([4, 4, 4, 4]).reshape(1, 1, 1, 4) rectifier_power = chassis_power[:, :, :, np.newaxis] / divisor - power_with_losses = compute_loss(rectifier_power, \ - self.config['RECTIFIER_LOSS_CONSTANT'], \ + power_with_losses = compute_loss(rectifier_power, + self.config['RECTIFIER_LOSS_CONSTANT'], self.config['RECTIFIER_EFFICIENCY']) # Compute just the losses @@ -380,7 +382,6 @@ class PowerManager: # Return rectifier losses summed at CDU level return power_with_rows, rect_loss_with_rows - def compute_sivoc_losses(self): """ Compute SIVOC losses for each CDU in the system. @@ -422,16 +423,16 @@ class PowerManager: return power_df def simulate_power(self, *, - running_jobs, - scheduled_nodes, - cpu_utils, - gpu_utils, - net_utils + running_jobs, + scheduled_nodes, + cpu_utils, + gpu_utils, + net_utils ): jobs_power = self.update_power_state(scheduled_nodes, cpu_utils, gpu_utils, net_utils) for i, job in enumerate(running_jobs): - #if job.running_time % self.config['TRACE_QUANTA'] == 0: + # if job.running_time % self.config['TRACE_QUANTA'] == 0: job.power_history.append(jobs_power[i] * len(job.scheduled_nodes)) # Update the power array UI component @@ -440,7 +441,8 @@ class PowerManager: rack_loss = rect_losses + sivoc_losses power_df = self.get_power_df(rack_power, rack_loss) - total_power_kw = sum(row[-1] for row in rack_power) + self.config['NUM_CDUS'] * self.config['POWER_CDU'] / 1000.0 + total_power_kw = sum(row[-1] for row in rack_power) + \ + self.config['NUM_CDUS'] * self.config['POWER_CDU'] / 1000.0 total_loss_kw = sum(row[-1] for row in rack_loss) # Primary return value: @@ -452,10 +454,10 @@ class PowerManager: # jobs_power # For statistics # === return power_df, \ - rack_power, \ - total_power_kw, \ - total_loss_kw, \ - jobs_power + rack_power, \ + total_power_kw, \ + total_loss_kw, \ + jobs_power def record_power_stats_foreach_job(*, running_jobs, jobs_power): diff --git a/raps/resmgr/__init__.py b/raps/resmgr/__init__.py index 609f104..70814a9 100644 --- a/raps/resmgr/__init__.py +++ b/raps/resmgr/__init__.py @@ -14,12 +14,13 @@ def make_resource_manager(total_nodes, down_nodes, config): return MultiTenantResourceManager(total_nodes, down_nodes, config) return ExclusiveNodeResourceManager(total_nodes, down_nodes, config) + # Alias for backward compatibility ResourceManager = make_resource_manager __all__ = [ - "make_resource_manager", - "ResourceManager", - "ExclusiveNodeResourceManager", + "make_resource_manager", + "ResourceManager", + "ExclusiveNodeResourceManager", "MultiTenantResourceManager" ] diff --git a/raps/resmgr/default.py b/raps/resmgr/default.py index ad71ec9..2bb1345 100644 --- a/raps/resmgr/default.py +++ b/raps/resmgr/default.py @@ -1,14 +1,16 @@ from raps.job import JobState from raps.policy import PolicyType + class ExclusiveNodeResourceManager: """ Legacy exclusive-node resource manager: allocates and frees full nodes. """ + def __init__(self, total_nodes, down_nodes, config=None): - self.total_nodes = total_nodes - self.down_nodes = set(down_nodes) - self.config = config or {} + self.total_nodes = total_nodes + self.down_nodes = set(down_nodes) + self.config = config or {} # Determine per-node capacities cfg = self.config @@ -32,7 +34,7 @@ class ExclusiveNodeResourceManager: }) # Available nodes list for allocation/frees - self.available_nodes = [n['id'] for n in self.nodes if not n['is_down']] + self.available_nodes = [n['id'] for n in self.nodes if not n['is_down']] # System utilization history (time, util%) self.sys_util_history = [] @@ -52,8 +54,8 @@ class ExclusiveNodeResourceManager: # Mark job running job.start_time = current_time - job.end_time = current_time + job.wall_time - job.state = JobState.RUNNING + job.end_time = current_time + job.wall_time + job.state = JobState.RUNNING def free_nodes_from_job(self, job): """Frees the full nodes previously allocated to a job.""" @@ -79,35 +81,36 @@ class ExclusiveNodeResourceManager: util = (num_active / total_operational) * 100 if total_operational else 0 self.sys_util_history.append((current_time, util)) return util - """ - Computes system utilization as percentage of non-down nodes that are active. - """ - total_operational = self.total_nodes - len(self.down_nodes) - util = (num_active_nodes / total_operational) * 100 if total_operational else 0 - self.sys_util_history.append((current_time, util)) - return util + # """ + # Computes system utilization as percentage of non-down nodes that are active. + # """ + # total_operational = self.total_nodes - len(self.down_nodes) + # util = (num_active_nodes / total_operational) * 100 if total_operational else 0 + # self.sys_util_history.append((current_time, util)) + # return util def node_failure(self, mtbf): return [] - """Simulate node failure using Weibull distribution.""" - shape_parameter = 1.5 - scale_parameter = mtbf * 3600 # Convert to seconds + # Node failure not working! + # """Simulate node failure using Weibull distribution.""" + # shape_parameter = 1.5 + # scale_parameter = mtbf * 3600 # Convert to seconds - # Create a NumPy array of node indices, excluding down nodes - all_nodes = np.array(sorted(set(range(self.total_nodes)) - set(self.down_nodes))) + # # Create a NumPy array of node indices, excluding down nodes + # all_nodes = np.array(sorted(set(range(self.total_nodes)) - set(self.down_nodes))) - # Sample the Weibull distribution for all nodes at once - random_values = weibull_min.rvs(shape_parameter, scale=scale_parameter, size=all_nodes.size) + # # Sample the Weibull distribution for all nodes at once + # random_values = weibull_min.rvs(shape_parameter, scale=scale_parameter, size=all_nodes.size) - # Identify nodes that have failed - failure_threshold = 0.1 - failed_nodes_mask = random_values < failure_threshold - newly_downed_nodes = all_nodes[failed_nodes_mask] + # # Identify nodes that have failed + # failure_threshold = 0.1 + # failed_nodes_mask = random_values < failure_threshold + # newly_downed_nodes = all_nodes[failed_nodes_mask] - # Update available and down nodes - for node_index in newly_downed_nodes: - if node_index in self.available_nodes: - self.available_nodes.remove(node_index) - self.down_nodes.add(str(node_index)) + # # Update available and down nodes + # for node_index in newly_downed_nodes: + # if node_index in self.available_nodes: + # self.available_nodes.remove(node_index) + # self.down_nodes.add(str(node_index)) - return newly_downed_nodes.tolist() + # return newly_downed_nodes.tolist() diff --git a/raps/resmgr/multitenant.py b/raps/resmgr/multitenant.py index e7121be..5d5f27d 100644 --- a/raps/resmgr/multitenant.py +++ b/raps/resmgr/multitenant.py @@ -12,15 +12,16 @@ class MultiTenantResourceManager: """ Resource manager for per-node CPU/GPU multitenancy. """ + def __init__(self, total_nodes, down_nodes, config): - self.total_nodes = total_nodes - self.config = config - self.down_nodes = set(down_nodes) - self.nodes = [] + self.total_nodes = total_nodes + self.config = config + self.down_nodes = set(down_nodes) + self.nodes = [] # Track total allocations for reporting self.allocated_cpu_cores = 0 self.allocated_gpu_units = 0 - self.sys_util_history = [] + self.sys_util_history = [] # Determine per-node capacities total_cpu = self.config['CPUS_PER_NODE'] * self.config['CORES_PER_CPU'] @@ -49,7 +50,7 @@ class MultiTenantResourceManager: candidate = self.nodes[node_id] if (not candidate['is_down'] and candidate['available_cpu_cores'] >= job.cpu_cores_required and - candidate['available_gpu_units'] >= job.gpu_units_required): + candidate['available_gpu_units'] >= job.gpu_units_required): found = candidate # Fallback: first-fit @@ -57,7 +58,7 @@ class MultiTenantResourceManager: for candidate in self.nodes: if (not candidate['is_down'] and candidate['available_cpu_cores'] >= job.cpu_cores_required and - candidate['available_gpu_units'] >= job.gpu_units_required): + candidate['available_gpu_units'] >= job.gpu_units_required): found = candidate break @@ -67,23 +68,23 @@ class MultiTenantResourceManager: # Allocate resources found['available_cpu_cores'] -= job.cpu_cores_required found['available_gpu_units'] -= job.gpu_units_required - self.allocated_cpu_cores += job.cpu_cores_required - self.allocated_gpu_units += job.gpu_units_required + self.allocated_cpu_cores += job.cpu_cores_required + self.allocated_gpu_units += job.gpu_units_required # ---- Invariant checks (after mutating node/RM state) ---- assert_node_accounting_ok(found) # no negatives left assert self.allocated_cpu_cores >= 0 and self.allocated_gpu_units >= 0 # Optional: global sanity vs. totals assert self.allocated_cpu_cores <= sum(n['total_cpu_cores'] for n in self.nodes) - assert self.allocated_gpu_units <= sum(n['total_gpu_units'] for n in self.nodes) + assert self.allocated_gpu_units <= sum(n['total_gpu_units'] for n in self.nodes) # Record on job - job.scheduled_nodes = [found['id']] - job.allocated_cpu_cores = job.cpu_cores_required - job.allocated_gpu_units = job.gpu_units_required - job.start_time = current_time - job.end_time = current_time + job.wall_time - job.state = JobState.RUNNING + job.scheduled_nodes = [found['id']] + job.allocated_cpu_cores = job.cpu_cores_required + job.allocated_gpu_units = job.gpu_units_required + job.start_time = current_time + job.end_time = current_time + job.wall_time + job.state = JobState.RUNNING def free_nodes_from_job(self, job): """Releases cores/GPUs from a completed job.""" @@ -93,8 +94,8 @@ class MultiTenantResourceManager: node = self.nodes[nid] node['available_cpu_cores'] += getattr(job, 'allocated_cpu_cores', 0) node['available_gpu_units'] += getattr(job, 'allocated_gpu_units', 0) - self.allocated_cpu_cores -= getattr(job, 'allocated_cpu_cores', 0) - self.allocated_gpu_units -= getattr(job, 'allocated_gpu_units', 0) + self.allocated_cpu_cores -= getattr(job, 'allocated_cpu_cores', 0) + self.allocated_gpu_units -= getattr(job, 'allocated_gpu_units', 0) else: print(f"Warning: Job {job.id} had invalid node {nid} during free.") @@ -103,9 +104,9 @@ class MultiTenantResourceManager: Computes and records utilization based on allocated CPU/GPU across all nodes. """ total_cpu = sum(n['total_cpu_cores'] for n in self.nodes) - total_gpu = sum(n['total_gpu_units'] for n in self.nodes) - used_cpu = self.allocated_cpu_cores - used_gpu = self.allocated_gpu_units + total_gpu = sum(n['total_gpu_units'] for n in self.nodes) + used_cpu = self.allocated_cpu_cores + used_gpu = self.allocated_gpu_units cpu_util = (used_cpu / total_cpu) * 100 if total_cpu else 0 gpu_util = (used_gpu / total_gpu) * 100 if total_gpu else 0 @@ -121,15 +122,15 @@ class MultiTenantResourceManager: """ shape = 1.5 scale = mtbf * 3600 - ops = np.array([n['id'] for n in self.nodes if not n['is_down']]) + ops = np.array([n['id'] for n in self.nodes if not n['is_down']]) if ops.size == 0: return [] - vals = weibull_min.rvs(shape, scale=scale, size=ops.size) + vals = weibull_min.rvs(shape, scale=scale, size=ops.size) failed = ops[vals < 0.001] for nid in failed: node = self.nodes[nid] - node['is_down'] = True + node['is_down'] = True node['available_cpu_cores'] = 0 node['available_gpu_units'] = 0 self.down_nodes.add(nid) diff --git a/raps/schedulers/__init__.py b/raps/schedulers/__init__.py index ca3431e..2019635 100644 --- a/raps/schedulers/__init__.py +++ b/raps/schedulers/__init__.py @@ -1,6 +1,7 @@ from importlib import import_module + def load_scheduler(scheduler_type="default"): """Dynamically loads a scheduler by type.""" module = import_module(f".{scheduler_type}", package="raps.schedulers") - return getattr(module, f"Scheduler") + return getattr(module, "Scheduler") diff --git a/raps/schedulers/default.py b/raps/schedulers/default.py index e77036b..fad33a1 100644 --- a/raps/schedulers/default.py +++ b/raps/schedulers/default.py @@ -62,9 +62,10 @@ class Scheduler: PolicyType.LJF, PolicyType.SJF]: break # The job at the front of the queue doesnt fit stop processing the queue. else: - raise NotImplementedError("Depending on the Policy this choice should be explicit. Add the implementation above!") + raise NotImplementedError( + "Depending on the Policy this choice should be explicit. Add the implementation above!") - def prepare_system_state(self,jobs_to_submit:List, running, timestep_start): + def prepare_system_state(self, jobs_to_submit: List, running, timestep_start): # def schedule(self, queue, running, current_time, accounts=None, sorted=False, debug=False): """ In the case of replay and fast forward, previously placed jobs should be present. @@ -94,7 +95,7 @@ class Scheduler: else: return jobs_to_submit - def place_job_and_manage_queues(self, job, queue,running, current_time): + def place_job_and_manage_queues(self, job, queue, running, current_time): self.resource_manager.assign_nodes_to_job(job, current_time, self.policy) running.append(job) queue.remove(job) @@ -113,13 +114,13 @@ class Scheduler: nodes_available = True # Checked above if job.nodes_required == 0: raise ValueError(f"Job Requested zero nodes: {job}") - #clear scheduled nodes + # clear scheduled nodes job.scheduled_nodes = [] else: pass # not enough nodes available return nodes_available - def backfill(self,queue:List, running:List, current_time): + def backfill(self, queue: List, running: List, current_time): # Try to find a backfill candidate from the entire queue. while queue: backfill_job = self.find_backfill_job(queue, running, current_time) @@ -166,10 +167,10 @@ class Scheduler: pass elif self.bfpolicy == BackfillType.EASY: queue[:] = sorted(queue, key=lambda job: job.submit_time) - return self.return_first_fit(queue,time_limit) + return self.return_first_fit(queue, time_limit) elif self.bfpolicy == BackfillType.FIRSTFIT: pass # Stay with the prioritization! - return self.return_first_fit(queue,time_limit) + return self.return_first_fit(queue, time_limit) elif self.bfpolicy in [BackfillType.BESTFIT, BackfillType.GREEDY, BackfillType.CONSERVATIVE, diff --git a/raps/schedulers/experimental.py b/raps/schedulers/experimental.py index 0c49ffd..046873a 100644 --- a/raps/schedulers/experimental.py +++ b/raps/schedulers/experimental.py @@ -95,9 +95,10 @@ class Scheduler: ]: break # The job at the front of the queue doesnt fit stop processing the queue. else: - raise NotImplementedError("Depending on the Policy this choice should be explicit. Add the implementation above!") + raise NotImplementedError( + "Depending on the Policy this choice should be explicit. Add the implementation above!") - def place_job_and_manage_queues(self, job, queue,running, current_time): + def place_job_and_manage_queues(self, job, queue, running, current_time): self.resource_manager.assign_nodes_to_job(job, current_time) running.append(job) queue.remove(job) @@ -105,7 +106,7 @@ class Scheduler: scheduled_nodes = summarize_ranges(job.scheduled_nodes) print(f"t={current_time}: Scheduled job {job.id} with wall time {job.wall_time} on nodes {scheduled_nodes}") - def check_available_nodes(self,job): + def check_available_nodes(self, job): nodes_available = False if job.nodes_required <= len(self.resource_manager.available_nodes): if self.policy == PolicyType.REPLAY and job.scheduled_nodes: # Check if we need exact set @@ -116,13 +117,13 @@ class Scheduler: nodes_available = True # Checked above if job.nodes_required == 0: raise ValueError(f"Job Requested zero nodes: {job}") - #clear scheduled nodes + # clear scheduled nodes job.scheduled_nodes = [] else: pass # not enough nodes available return nodes_available - def backfill(self,queue:List, running:List, current_time): + def backfill(self, queue: List, running: List, current_time): # Try to find a backfill candidate from the entire queue. while queue: backfill_job = self.find_backfill_job(queue, running, current_time) @@ -169,10 +170,10 @@ class Scheduler: pass elif self.bfpolicy == BackfillType.EASY: queue[:] = sorted(queue, key=lambda job: job.submit_time) - return self.return_first_fit(queue,time_limit) + return self.return_first_fit(queue, time_limit) elif self.bfpolicy == BackfillType.FIRSTFIT: pass # Stay with the prioritization! - return self.return_first_fit(queue,time_limit) + return self.return_first_fit(queue, time_limit) elif self.bfpolicy in [BackfillType.BESTFIT, BackfillType.GREEDY, BackfillType.CONSERVATIVE, @@ -206,9 +207,9 @@ class Scheduler: fugaku_priority = 0 # Create a tuple of the job and the priority priority = job.priority - priority_triple_list.append((fugaku_priority,priority,job)) + priority_triple_list.append((fugaku_priority, priority, job)) # Sort everythin according to fugaku_points - priority_triple_list = sorted(priority_triple_list, key=lambda x:x[0], reverse=True) + priority_triple_list = sorted(priority_triple_list, key=lambda x: x[0], reverse=True) # Find the first element with negative fugaku_points for cutoff, triple in enumerate(priority_triple_list): fugaku_priority, _, _ = triple @@ -216,7 +217,7 @@ class Scheduler: break first_part = priority_triple_list[:cutoff] # Sort everything afterwards according to job priority - second_part = sorted(priority_triple_list[cutoff:], key=lambda x:x[1], reverse=True) + second_part = sorted(priority_triple_list[cutoff:], key=lambda x: x[1], reverse=True) queue_a = [] queue_b = [] if first_part != []: @@ -244,9 +245,9 @@ class Scheduler: raise KeyError("No nodes indicated") priority = 100 * nnodes * power - priority_tuple_list.append((priority,job)) + priority_tuple_list.append((priority, job)) # Sort everythin according to new priority - priority_tuple_list = sorted(priority_tuple_list, key=lambda x:x[0], reverse=True) + priority_tuple_list = sorted(priority_tuple_list, key=lambda x: x[0], reverse=True) queue = [] if priority_tuple_list != []: _, queue = zip(*priority_tuple_list) @@ -263,9 +264,9 @@ class Scheduler: power = 0 priority = power - priority_tuple_list.append((priority,job)) + priority_tuple_list.append((priority, job)) # Sort everythin according to power_acct_priority Disregarding size - priority_tuple_list = sorted(priority_tuple_list, key=lambda x:x[0], reverse=True) + priority_tuple_list = sorted(priority_tuple_list, key=lambda x: x[0], reverse=True) queue = [] if priority_tuple_list != []: _, queue = zip(*priority_tuple_list) @@ -282,16 +283,15 @@ class Scheduler: power = 0 priority = power - priority_tuple_list.append((priority,job)) + priority_tuple_list.append((priority, job)) # Sort everythin according to power_acct_priority Disregarding size - priority_tuple_list = sorted(priority_tuple_list, key=lambda x:x[0], reverse=False) + priority_tuple_list = sorted(priority_tuple_list, key=lambda x: x[0], reverse=False) queue = [] if priority_tuple_list != []: _, queue = zip(*priority_tuple_list) queue = list(queue) return queue - def sort_AEDP(self, queue, accounts=None): if queue == []: return queue @@ -305,9 +305,9 @@ class Scheduler: time = 0 priority = energy * time - priority_tuple_list.append((priority,job)) + priority_tuple_list.append((priority, job)) # Sort everythin according to power_acct_priority Disregarding size - priority_tuple_list = sorted(priority_tuple_list, key=lambda x:x[0], reverse=False) + priority_tuple_list = sorted(priority_tuple_list, key=lambda x: x[0], reverse=False) queue = [] if priority_tuple_list != []: _, queue = zip(*priority_tuple_list) @@ -327,9 +327,9 @@ class Scheduler: time = 0 priority = energy * time * time - priority_tuple_list.append((priority,job)) + priority_tuple_list.append((priority, job)) # Sort everythin according to power_acct_priority Disregarding size - priority_tuple_list = sorted(priority_tuple_list, key=lambda x:x[0], reverse=False) + priority_tuple_list = sorted(priority_tuple_list, key=lambda x: x[0], reverse=False) queue = [] if priority_tuple_list != []: _, queue = zip(*priority_tuple_list) @@ -349,9 +349,9 @@ class Scheduler: time = 0 priority = power * time - priority_tuple_list.append((priority,job)) + priority_tuple_list.append((priority, job)) # Sort everythin according to power_acct_priority Disregarding size - priority_tuple_list = sorted(priority_tuple_list, key=lambda x:x[0], reverse=False) + priority_tuple_list = sorted(priority_tuple_list, key=lambda x: x[0], reverse=False) queue = [] if priority_tuple_list != []: _, queue = zip(*priority_tuple_list) diff --git a/raps/schedulers/multitenant.py b/raps/schedulers/multitenant.py index 539605e..85c3024 100644 --- a/raps/schedulers/multitenant.py +++ b/raps/schedulers/multitenant.py @@ -40,7 +40,9 @@ class Scheduler: # Iterate over a copy of the queue since we might remove items for job in queue[:]: if self.debug: - print(f"[DEBUG] Scheduler: Considering job {job.id} (CPU: {job.cpu_cores_required}, GPU: {job.gpu_units_required})") + print( + f"[DEBUG] Scheduler: Considering job {job.id} " + f"(CPU: {job.cpu_cores_required}, GPU: {job.gpu_units_required})") if self.policy == PolicyType.REPLAY: if job.start_time > current_time: continue # Replay: Job didn't start yet. Next! @@ -61,15 +63,17 @@ class Scheduler: # After backfill dedice continue processing the queue or wait, continuing may result in fairness issues. if self.policy in [PolicyType.REPLAY]: - # print(f"Nodes available {nodes_available} - Req:{len(job.requested_nodes)} N-avail:{len(self.resource_manager.available_nodes)}") + # print(f"Nodes available {nodes_available} - " + # f"Req:{len(job.requested_nodes)} N-avail:{len(self.resource_manager.available_nodes)}") continue # Regardless if the job at the front of the queue doenst fit, try placing all of them. elif self.policy in [PolicyType.FCFS, PolicyType.PRIORITY, PolicyType.LJF, PolicyType.SJF]: break # The job at the front of the queue doesnt fit stop processing the queue. else: - raise NotImplementedError("Depending on the Policy this choice should be explicit. Add the implementation above!") + raise NotImplementedError( + "Depending on the Policy this choice should be explicit. Add the implementation above!") - def prepare_system_state(self,jobs_to_submit:List, running, timestep_start): + def prepare_system_state(self, jobs_to_submit: List, running, timestep_start): # def schedule(self, queue, running, current_time, accounts=None, sorted=False, debug=False): """ In the case of replay and fast forward, previously placed jobs should be present. @@ -99,7 +103,7 @@ class Scheduler: else: return jobs_to_submit - def place_job_and_manage_queues(self, job, queue,running, current_time, node_id): + def place_job_and_manage_queues(self, job, queue, running, current_time, node_id): self.resource_manager.assign_nodes_to_job(job, current_time, node_id) running.append(job) queue.remove(job) @@ -112,7 +116,11 @@ class Scheduler: # Iterate through all nodes managed by the ResourceManager for node in self.resource_manager.nodes: if self.debug: - print(f"[DEBUG] Checking node {node['id']}: Available CPU: {node['available_cpu_cores']}, Available GPU: {node['available_gpu_units']}. Job needs CPU: {job.cpu_cores_required}, GPU: {job.gpu_units_required}") + print( + f"[DEBUG] Checking node {node['id']}: " + f"Available CPU: {node['available_cpu_cores']}, " + f"Available GPU: {node['available_gpu_units']}. " + f"Job needs CPU: {job.cpu_cores_required}, GPU: {job.gpu_units_required}") # Skip if the node is down if node['is_down']: continue @@ -125,7 +133,7 @@ class Scheduler: # If no suitable node is found, return None return None - def backfill(self,queue:List, running:List, current_time): + def backfill(self, queue: List, running: List, current_time): # Try to find a backfill candidate from the entire queue. while queue: backfill_job, node_id = self.find_backfill_job(queue, running, current_time) @@ -146,13 +154,13 @@ class Scheduler: return None, None # Identify when the nex job in the queue could run as a time limit: - first_job = queue[0] + # first_job = queue[0] # Unused # For multitenancy, we need to check if the first job can fit on any node # based on its core/GPU requirements, not just nodes_required. # This is a simplification; a more complex backfill might consider # if the job can fit by combining resources from multiple nodes. # For now, we assume it needs to fit on a single node. - + # We need to know the total available resources if all running jobs finish by shadow_time_end # This is complex with multitenancy, so for now, we'll simplify the backfill logic # to just check if a job can fit on *any* node, not necessarily the one @@ -162,11 +170,11 @@ class Scheduler: # With multitenancy, this needs a more sophisticated resource projection. # For now, we will make `time_limit` effectively infinite for backfill candidates # if the job can fit on *any* node, and rely on `check_available_nodes`. - + # Revert to a simpler time_limit for now, or remove it if not applicable # For now, let's assume time_limit is not strictly tied to node availability # in the same way as before, and focus on resource availability. - time_limit = float('inf') # Effectively no time limit for backfill candidates + time_limit = float('inf') # Effectively no time limit for backfill candidates # We now have the time_limit after which no backfilled job should end # as the next job in line has the necessary resrouces after this time limit. @@ -176,10 +184,10 @@ class Scheduler: pass elif self.bfpolicy == BackfillType.EASY: queue[:] = sorted(queue, key=lambda job: job.submit_time) - return self.return_first_fit(queue,time_limit) + return self.return_first_fit(queue, time_limit) elif self.bfpolicy == BackfillType.FIRSTFIT: pass # Stay with the prioritization! - return self.return_first_fit(queue,time_limit) + return self.return_first_fit(queue, time_limit) elif self.bfpolicy in [BackfillType.BESTFIT, BackfillType.GREEDY, BackfillType.CONSERVATIVE, diff --git a/raps/schedulers/replay.py b/raps/schedulers/replay.py index 4b32809..7a0abcf 100644 --- a/raps/schedulers/replay.py +++ b/raps/schedulers/replay.py @@ -21,7 +21,7 @@ class Scheduler: """Sort jobs based on the selected scheduling policy.""" return sorted(queue, key=lambda job: job.start_time) - def prepare_system_state(self,queue,running): + def prepare_system_state(self, queue, running): return queue def schedule(self, queue, running, current_time, accounts=None, sorted=False, debug=False): @@ -44,7 +44,7 @@ class Scheduler: nodes_available = True # Checked above if job.nodes_required == 0: raise ValueError(f"Job Requested zero nodes: {job}") - #clear scheduled nodes + # clear scheduled nodes job.scheduled_nodes = [] else: pass # not enough nodes available @@ -55,4 +55,7 @@ class Scheduler: queue.remove(job) else: # This is a replay so this should not happen - raise ValueError(f"Nodes not available!\nRequested:{job.scheduled_nodes}\nAvailable:{self.resource_manager.available_nodes}\n{job.__dict__}; Policy: {self.policy}") + raise ValueError( + f"Nodes not available!\nRequested:{job.scheduled_nodes}\n" + f"Available:{self.resource_manager.available_nodes}\n{job.__dict__}; " + f"Policy: {self.policy}") diff --git a/raps/schedulers/scheduleflow.py b/raps/schedulers/scheduleflow.py index c510030..2af694d 100644 --- a/raps/schedulers/scheduleflow.py +++ b/raps/schedulers/scheduleflow.py @@ -1,9 +1,6 @@ -from raps.job import JobState -from raps.utils import summarize_ranges from third_party.ScheduleFlow import ScheduleFlow from third_party.ScheduleFlow import _intScheduleFlow from third_party.ScheduleFlow._intScheduleFlow import EventType -from ..job import job_dict class Scheduler: @@ -36,13 +33,11 @@ class Scheduler: # self.sf_end_list = [] # list as returned from sf_scheduler.start_job # self.sf_action_list = [] # list as returned from sf_scheduler.stop_job - def gif(self): - logs = self._sf_runtime.get_stats() - #vis_hanlder = _intScheduleFlow.VizualizationEngine(self.sf_scheduler. + # logs = self._sf_runtime.get_stats() # Unused + # vis_hanlder = _intScheduleFlow.VizualizationEngine(self.sf_scheduler. self._sf_runtime._Runtime__generate_gif() - def sort_jobs(self, queue, accounts=None): """ Optionally, pre-sort jobs. @@ -59,9 +54,9 @@ class Scheduler: def schedule(self, queue, running, current_time, accounts=None, sorted=False, debug=False): - #self._sf_runtim + # self._sf_runtim pass - #### SECOND TRY + # SECOND TRY new_queue_items = list(filter(lambda x: x not in self.queue, queue)) if new_queue_items: self.queue += new_queue_items @@ -93,15 +88,13 @@ class Scheduler: if len(start_jobs) > 0: self._sf_runtime._Runtime__job_start_event(start_jobs) for sf_app in start_jobs: - job = _match_sf_app_and_job(sf_app,queue,start_jobs) + job = _match_sf_app_and_job(sf_app, queue, start_jobs) queue.remove(job) self.resource_manager.assign_nodes_to_job(job, current_time, self.policy) running.append(job) - # Keep track of: All jobs have been submitted empty the queue! - # remove_list = [] # job_list = [] # for x in self.sf_start_list: @@ -123,8 +116,8 @@ class Scheduler: # for x in remove_list: # self.sf_start_list.remove(x) - #### First TRY - #if self.sf_end_list: + # First TRY + # if self.sf_end_list: # remove_list = [] # job_list = [] # for x in self.sf_end_list: @@ -140,13 +133,13 @@ class Scheduler: # We need to flect this on the raps side. # March the sf_scheduler forward based on the jobs - #end_jobs = self.sf_scheduler.start_job(current_time,sf_schedule[1]) - #self.sf_scheduler.end_job(current_time,end_jobs) + # end_jobs = self.sf_scheduler.start_job(current_time,sf_schedule[1]) + # self.sf_scheduler.end_job(current_time,end_jobs) # Add to running # Process the actions (each action is assumed to be (start_time, job_info)) - #for act in actions: + # for act in actions: # start_time, sf_job = act # # Find the corresponding RAPS job using its ID # job = self._find_job(queue, sf_job['job_id']) @@ -160,9 +153,7 @@ class Scheduler: # if debug: # print(f"t={current_time}: Scheduled job {job.id} on nodes {summarize_ranges(job.scheduled_nodes)}") - - - def _find_sf_in_queue(self,queue,sf_app): + def _find_sf_in_queue(self, queue, sf_app): # Remember we added four digits and an underscore in _convert_to_sf: match = [x for x in queue if x.id == sf_app.name] if len(match != 1): @@ -182,7 +173,13 @@ class Scheduler: priority = sf_prio resubmit_factor = -1 name = job.id # We use the ID as name to be able to match when unpacking! - return ScheduleFlow.Application(nodes,submission_time,walltime,requested_walltimes,priority,resubmit_factor,name) + return ScheduleFlow.Application(nodes, + submission_time, + walltime, + requested_walltimes, + priority, + resubmit_factor, + name) def _find_job(self, queue, job_id): """ @@ -202,7 +199,7 @@ class Scheduler: return None -def _match_sf_app_and_job(sf_app,queue,sf_queue): +def _match_sf_app_and_job(sf_app, queue, sf_queue): match = [x for x in sf_queue if x.name == sf_app.name] if len(match) != 1: print("Multiple Matches") diff --git a/raps/stats.py b/raps/stats.py index ea6d0e5..f224862 100644 --- a/raps/stats.py +++ b/raps/stats.py @@ -4,7 +4,8 @@ These are statistics on the engine the jobs -Both could be part of the engine or jobs class, but as the are very verbose, try to keep statistics consolidated in this file. +Both could be part of the engine or jobs class, but as the are very verbose, +try to keep statistics consolidated in this file. """ import sys from .utils import sum_values, min_value, max_value, convert_seconds_to_hhmmss @@ -42,7 +43,7 @@ def get_engine_stats(engine: Engine): if engine.config['multitenant']: # Multitenancy Stats - total_jobs_loaded = engine.total_initial_jobs # Assuming this is passed to __init__ + total_jobs_loaded = engine.total_initial_jobs # Assuming this is passed to __init__ stats['total jobs loaded'] = total_jobs_loaded stats['jobs completed percentage'] = f"{(engine.jobs_completed / total_jobs_loaded * 100):.2f}%" @@ -50,8 +51,8 @@ def get_engine_stats(engine: Engine): # Calculate average concurrent jobs per node (average density across all nodes and timesteps) total_jobs_running_timesteps = 0 max_concurrent_jobs_per_node = 0 - sum_jobs_per_active_node = 0 # New: Sum of (jobs / active_nodes) for each timestep - count_active_timesteps_for_avg_active = 0 # New: Count of timesteps with active nodes + sum_jobs_per_active_node = 0 # New: Sum of (jobs / active_nodes) for each timestep + count_active_timesteps_for_avg_active = 0 # New: Count of timesteps with active nodes for occupancy_dict in engine.node_occupancy_history: current_timestep_total_occupancy = sum(occupancy_dict.values()) @@ -77,13 +78,13 @@ def get_engine_stats(engine: Engine): stats['avg concurrent jobs per node'] = "N/A" stats['max concurrent jobs per node'] = "N/A" - #network_stats = get_network_stats() - #stats.update(network_stats) + # network_stats = get_network_stats() + # stats.update(network_stats) return stats -def min_max_sum(value,min,max,sum): +def min_max_sum(value, min, max, sum): if value < 0: value = 0 if value < min: @@ -91,7 +92,7 @@ def min_max_sum(value,min,max,sum): if value > max: max = value sum += value - return min,max,sum + return min, max, sum def get_scheduler_stats(engine: Engine): @@ -170,7 +171,7 @@ def get_job_stats(engine: Engine): # Information on Job-Mix for job in engine.job_history_dict: job_size = job['num_nodes'] - min_job_size,max_job_size,sum_job_size = \ + min_job_size, max_job_size, sum_job_size = \ min_max_sum(job_size, min_job_size, max_job_size, sum_job_size) runtime = job['end_time'] - job['start_time'] @@ -194,7 +195,7 @@ def get_job_stats(engine: Engine): # Completion statistics wait_time = job["start_time"] - job["submit_time"] - min_wait_time,max_wait_time,sum_wait_time = \ + min_wait_time, max_wait_time, sum_wait_time = \ min_max_sum(wait_time, min_wait_time, max_wait_time, sum_wait_time) turnaround_time = job["end_time"] - job["submit_time"] @@ -214,10 +215,10 @@ def get_job_stats(engine: Engine): min_psf_partial_den, max_psf_partial_den, sum_psf_partial_den = \ min_max_sum(psf_partial_den, min_psf_partial_den, max_psf_partial_den, sum_psf_partial_den) - min_cpu_u, max_cpu_u, sum_cpu_u = min_max_sum(job['avg_cpu_usage'],min_cpu_u, max_cpu_u, sum_cpu_u) - min_gpu_u, max_gpu_u, sum_gpu_u = min_max_sum(job['avg_gpu_usage'],min_gpu_u, max_gpu_u, sum_gpu_u) - min_ntx_u, max_ntx_u, sum_ntx_u = min_max_sum(job['avg_ntx_usage'],min_ntx_u, max_ntx_u, sum_ntx_u) - min_nrx_u, max_nrx_u, sum_nrx_u = min_max_sum(job['avg_nrx_usage'],min_nrx_u, max_nrx_u, sum_nrx_u) + min_cpu_u, max_cpu_u, sum_cpu_u = min_max_sum(job['avg_cpu_usage'], min_cpu_u, max_cpu_u, sum_cpu_u) + min_gpu_u, max_gpu_u, sum_gpu_u = min_max_sum(job['avg_gpu_usage'], min_gpu_u, max_gpu_u, sum_gpu_u) + min_ntx_u, max_ntx_u, sum_ntx_u = min_max_sum(job['avg_ntx_usage'], min_ntx_u, max_ntx_u, sum_ntx_u) + min_nrx_u, max_nrx_u, sum_nrx_u = min_max_sum(job['avg_nrx_usage'], min_nrx_u, max_nrx_u, sum_nrx_u) if job['num_nodes'] <= 5: jobsSmall += 1 @@ -255,21 +256,21 @@ def get_job_stats(engine: Engine): psf = 0 else: # Set these to -1 to indicate nothing ran - min_job_size, max_job_size, avg_job_size = -1,-1,-1 - min_runtime, max_runtime, avg_runtime = -1,-1,-1 - min_energy, max_energy, avg_energy = -1,-1,-1 - min_edp, max_edp, avg_edp = -1,-1,-1 - min_edp2, max_edp2, avg_edp2 = -1,-1,-1 - min_agg_node_hours, max_agg_node_hours, avg_agg_node_hours = -1,-1,-1 - min_wait_time, max_wait_time, avg_wait_time = -1,-1,-1 - min_turnaround_time, max_turnaround_time, avg_turnaround_time = -1,-1,-1 - min_awrt, max_awrt, avg_awrt = -1,-1,-1 + min_job_size, max_job_size, avg_job_size = -1, -1, -1 + min_runtime, max_runtime, avg_runtime = -1, -1, -1 + min_energy, max_energy, avg_energy = -1, -1, -1 + min_edp, max_edp, avg_edp = -1, -1, -1 + min_edp2, max_edp2, avg_edp2 = -1, -1, -1 + min_agg_node_hours, max_agg_node_hours, avg_agg_node_hours = -1, -1, -1 + min_wait_time, max_wait_time, avg_wait_time = -1, -1, -1 + min_turnaround_time, max_turnaround_time, avg_turnaround_time = -1, -1, -1 + min_awrt, max_awrt, avg_awrt = -1, -1, -1 psf = -1 - min_cpu_u, max_cpu_u, avg_cpu_u = -1,-1,-1 - min_gpu_u, max_gpu_u, avg_gpu_u = -1,-1,-1 - min_ntx_u, max_ntx_u, avg_ntx_u = -1,-1,-1 - min_nrx_u, max_nrx_u, avg_nrx_u = -1,-1,-1 + min_cpu_u, max_cpu_u, avg_cpu_u = -1, -1, -1 + min_gpu_u, max_gpu_u, avg_gpu_u = -1, -1, -1 + min_ntx_u, max_ntx_u, avg_ntx_u = -1, -1, -1 + min_nrx_u, max_nrx_u, avg_nrx_u = -1, -1, -1 job_stats = { 'jobs completed': engine.jobs_completed, diff --git a/raps/telemetry.py b/raps/telemetry.py index 12c1bcd..4b9cfe0 100644 --- a/raps/telemetry.py +++ b/raps/telemetry.py @@ -10,25 +10,30 @@ import re import sys import random import argparse -#import itertools +# import itertools import json import os.path if __name__ == "__main__": - #from raps.args import args,args_dict + # from raps.args import args,args_dict parser = argparse.ArgumentParser(description='Telemetry data validator') parser.add_argument('--jid', type=str, default='*', help='Replay job id') parser.add_argument('-f', '--replay', nargs='+', type=str, - help='Either: path/to/joblive path/to/jobprofile' + \ + help='Either: path/to/joblive path/to/jobprofile' ' -or- filename.npz (overrides --workload option)') - parser.add_argument('-p', '--plot', type=str, default=None, choices=['jobs','nodes'], help='Output plots') + parser.add_argument('-p', '--plot', type=str, default=None, choices=['jobs', 'nodes'], help='Output plots') parser.add_argument("--is-results-file", action='store_true', default=False, help='Output plots') - parser.add_argument("--gantt-nodes", default=False, action='store_true', required=False, help="Print Gannt with nodes required as line thickness (default false)") # duplicate in workload! - parser.add_argument('-t', '--time', type=str, default=None, help='Length of time to simulate, e.g., 123, 123s, 27m, 3h, 7d') + parser.add_argument("--gantt-nodes", default=False, action='store_true', required=False, + # duplicate in workload! + help="Print Gannt with nodes required as line thickness (default false)") + parser.add_argument('-t', '--time', type=str, default=None, + help='Length of time to simulate, e.g., 123, 123s, 27m, 3h, 7d') parser.add_argument('--system', type=str, default='frontier', help='System config to use') choices = ['prescribed', 'poisson'] - parser.add_argument('--arrival', default=choices[0], type=str, choices=choices, help=f'Modify arrival distribution ({choices[1]}) or use the original submit times ({choices[0]})') + parser.add_argument('--arrival', default=choices[0], type=str, choices=choices, + help=f"Modify arrival distribution ({choices[1]}) " + f"or use the original submit times ({choices[0]})") parser.add_argument('-v', '--verbose', action='store_true', help='Enable verbose output') parser.add_argument('-o', '--output', type=str, default=None, help='Store output in --output file.') args = parser.parse_args() @@ -38,14 +43,18 @@ import importlib import numpy as np import pandas as pd from tqdm import tqdm -from rich.progress import track +# from rich.progress import track from raps.config import ConfigManager from raps.job import Job, job_dict import matplotlib.pyplot as plt -from raps.plotting import Plotter, plot_submit_times, plot_nodes_histogram, plot_jobs_gantt, plot_nodes_gantt, spaced_colors, plot_network_histogram +from raps.plotting import ( + plot_jobs_gantt, + plot_nodes_gantt, + plot_network_histogram +) from raps.utils import next_arrival_byconfargs, create_casename, convert_to_seconds -#from raps.args import args, args_dict +# from raps.args import args, args_dict class Telemetry: @@ -62,15 +71,16 @@ class Telemetry: self.dirname = outname try: self.dataloader = importlib.import_module(f"raps.dataloaders.{self.system}", package=__package__) - except: + except FileNotFoundError: print("WARNING: Failed to load dataloader") - def save_snapshot(self, *, jobs: list, timestep_start:int, timestep_end:int, args:dict, filename: str): + def save_snapshot(self, *, jobs: list, timestep_start: int, timestep_end: int, args: dict, filename: str): """Saves a snapshot of the jobs to a compressed file. """ list_of_job_dicts = [] for job in jobs: list_of_job_dicts.append(job.__dict__) - np.savez_compressed(filename, jobs=list_of_job_dicts, timestep_start=timestep_start, timestep_end=timestep_end, args=args) + np.savez_compressed(filename, jobs=list_of_job_dicts, timestep_start=timestep_start, + timestep_end=timestep_end, args=args) def load_snapshot(self, snapshot: str, downscale=1) -> list: """Reads a snapshot from a compressed file and return 4 values: joblist, timestep_start, timestep_end and args. @@ -87,63 +97,63 @@ class Telemetry: list_of_job_dicts = data['jobs'].tolist() for job_info in list_of_job_dicts: jobs.append(Job(job_info)) - if hasattr(data,'timestep_start'): + if hasattr(data, 'timestep_start'): timestep_start = int(data['timestep_start']) else: timestep_start = 0 - if hasattr(data,'timestep_end'): + if hasattr(data, 'timestep_end'): timestep_end = int(data['timestep_end']) else: timestep_end = np.inf - if hasattr(data,'args'): + if hasattr(data, 'args'): args_from_file = data['args'].tolist() else: args_from_file = None return jobs, \ - timestep_start, \ - timestep_end, \ - args_from_file + timestep_start, \ + timestep_end, \ + args_from_file def load_csv_results(self, file): jobs = [] time_start = 0 time_end = 0 - for line in pd.read_csv(file,chunksize=1): - job_info = job_dict(nodes_required=line.get('num_nodes').item(), # Named like this somewhere in the csv history dumper + for line in pd.read_csv(file, chunksize=1): + job_info = job_dict(nodes_required=line.get('num_nodes').item(), name=line.get('name').item(), account=line.get('account').item(), cpu_trace=None, gpu_trace=None, ntx_trace=None, nrx_trace=None, - #end_state=line.get('end_state').item(), + # end_state=line.get('end_state').item(), end_state=None, scheduled_nodes=json.loads(line.get('scheduled_nodes').item()), id=line.get('id').item(), - #priority=line.get('priority').item(), + # priority=line.get('priority').item(), priority=None, - #partition=line.get('partition').item(), + # partition=line.get('partition').item(), partition=None, submit_time=line.get('submit_time').item(), start_time=line.get('start_time').item(), end_time=line.get('end_time').item(), - #wall_time=line.get('wall_time').item(), + # wall_time=line.get('wall_time').item(), wall_time=line.get('end_time').item() - line.get('start_time').item(), - #trace_time=line.get('trace_time').item(), + # trace_time=line.get('trace_time').item(), trace_time=None, - #trace_start_time=line.get('trace_start_time').item(), + # trace_start_time=line.get('trace_start_time').item(), trace_start_time=None, - #trace_end_time=line.get('trace_end_time').item(), + # trace_end_time=line.get('trace_end_time').item(), trace_end_time=None, - #trace_missing_values=line.get('trace_missing_values').item(), + # trace_missing_values=line.get('trace_missing_values').item(), trace_missing_values=None ) job = Job(job_info) jobs.append(job) - #if hasattr(data,'args'): + # if hasattr(data,'args'): # args_from_file = data["args"].item() # This should be empty as csv contains no args. - #else: + # else: # args_from_file = None return jobs, time_start, time_end, None @@ -158,9 +168,9 @@ class Telemetry: def load_data_from_csv(self, file, *args, **kwargs): jobs = [] - df = pd.read_csv(file,chunksize=1, header='infer') + df = pd.read_csv(file, chunksize=1, header='infer') for d in df: - #print(d['name'].astype(str)) + # print(d['name'].astype(str)) job_info = job_dict(nodes_required=None, name=d['name'].astype(str).item(), account=d['account'].astype(str).item(), @@ -200,7 +210,7 @@ class Telemetry: """ Return (row, col) tuple for a cdu index """ return self.dataloader.cdu_pos(index, config=self.config) - def load_jobs_times_args_from_files(self,*,files, args, downscale=1): + def load_jobs_times_args_from_files(self, *, files, args, config, downscale=1): """ Load all files as combined jobs """ # Read telemetry data (either npz file or via custom data loader) # TODO: Merge args? See main.py:79 @@ -208,21 +218,22 @@ class Telemetry: timestep_start = sys.maxsize jobs = [] trigger_custom_dataloader = False - for i,file in enumerate(files): + for i, file in enumerate(files): file = os.path.normpath(file.lstrip('"').rstrip('"')) - if hasattr(args,'is_results_file') and args.is_results_file: + if hasattr(args, 'is_results_file') and args.is_results_file: if file.endswith(".csv"): jobs, timestep_start, timestep, _ = self.load_csv_results(file) elif file.endswith(".npz"): # Replay .npz file print(f"Loading {file}...") - jobs_from_file, timestep_start_from_file, timestep_end_from_file, args_from_file = self.load_snapshot(file) + jobs_from_file, timestep_start_from_file, timestep_end_from_file, args_from_file = self.load_snapshot( + file) if args_from_file is not None: - print("File was generated with:" +\ - f"\n--system {args_from_file.system} " +\ - f"-ff {args_from_file.fastforward} " +\ - f"-t {args_from_file.time}\n" +\ - f"All Args:\n{args_from_file}" +\ + print("File was generated with:" + f"\n--system {args_from_file.system} " + f"-ff {args_from_file.fastforward} " + f"-t {args_from_file.time}\n" + f"All Args:\n{args_from_file}" "To use these set them from the commandline!" ) else: @@ -230,19 +241,19 @@ class Telemetry: # Args are usually extracted to tell the users how to reporduce results. # They are not processed and re-set to said arguments automatily jobs.extend(jobs_from_file) - timestep_start = min(timestep_start,timestep_start_from_file) + timestep_start = min(timestep_start, timestep_start_from_file) timestep_end = max(timestep_end, timestep_end_from_file) - if hasattr(args,'scale') and args.scale: + if hasattr(args, 'scale') and args.scale: for job in tqdm(jobs, desc=f"Scaling jobs to {args.scale} nodes"): job['nodes_required'] = random.randint(1, args.scale) job['scheduled_nodes'] = None # Setting to None triggers scheduler to assign nodes - if hasattr(args,'arrival') and args.arrival == 'poisson': + if hasattr(args, 'arrival') and args.arrival == 'poisson': print("available nodes:", config['AVAILABLE_NODES']) for job in tqdm(jobs, desc="Rescheduling jobs"): job['scheduled_nodes'] = None - job['submit_time'] = next_arrival_byconfargs(config,args) + job['submit_time'] = next_arrival_byconfargs(config, args) else: trigger_custom_dataloader = True break @@ -281,7 +292,10 @@ def run_telemetry(): args_dict['config'] = config td = Telemetry(**args_dict) if args.replay: - jobs, timestep_start, timestep_end, _ = td.load_jobs_times_args_from_files(files=args.replay,args=args) + jobs, timestep_start, timestep_end, _ = \ + td.load_jobs_times_args_from_files(files=args.replay, + args=args, + config=config) else: parser.print_help() @@ -327,11 +341,11 @@ def run_telemetry(): for job in jobs: job_vec = job.__dict__ # only if there’s at least one valid sample - if hasattr(job_vec,'ntx_trace'): + if hasattr(job_vec, 'ntx_trace'): ntx = np.array(job_vec.get('ntx_trace', [])) if ntx.size > 0 and not np.all(np.isnan(ntx)): ntx_means.append(np.nanmean(ntx)) - if hasattr(job_vec,'nrx_trace'): + if hasattr(job_vec, 'nrx_trace'): nrx = np.array(job_vec.get('nrx_trace', [])) if nrx.size > 0 and not np.all(np.isnan(nrx)): nrx_means.append(np.nanmean(nrx)) @@ -347,17 +361,17 @@ def run_telemetry(): print('No valid nrx_trace data found.') if args.plot: - fig,ax = plt.subplots() + fig, ax = plt.subplots() if args.plot == "jobs": - plot_jobs_gantt(ax=ax,jobs=jobs, bars_are_node_sized=args.gantt_nodes) + plot_jobs_gantt(ax=ax, jobs=jobs, bars_are_node_sized=args.gantt_nodes) ax.invert_yaxis() elif args.plot == "nodes": - plot_nodes_gantt(ax=ax,jobs=jobs) + plot_nodes_gantt(ax=ax, jobs=jobs) elif args.plot == "network": if ntx_means and nrx_means: # combine into total per‐job traffic net_means = [tx + rx for tx, rx in zip(ntx_means, nrx_means)] - plot_network_histogram(ax=ax,data=net_means) + plot_network_histogram(ax=ax, data=net_means) if args.output is not None: if args.output == "": filename = f"{td.dirname}.svg" diff --git a/raps/utils.py b/raps/utils.py index 3c7dbed..bbac74d 100644 --- a/raps/utils.py +++ b/raps/utils.py @@ -21,6 +21,7 @@ import json from raps.job import Job + def sum_values(values): return sum(x[1] for x in values) if values else 0 @@ -116,6 +117,7 @@ def truncated_weibull(scale, shape, min, max): if min < number <= max: return int(number) + def truncated_weibull_float(scale, shape, min, max): while True: number = random.weibullvariate(scale, shape) @@ -123,13 +125,12 @@ def truncated_weibull_float(scale, shape, min, max): return float(number) - -def return_nearest_power_of(*,number,base): +def return_nearest_power_of(*, number, base): if base == 1: return number else: - next_num = base ** math.ceil(math.log(number,base)) - prev_num = base ** math.floor(math.log(number,base)) + next_num = base ** math.ceil(math.log(number, base)) + prev_num = base ** math.floor(math.log(number, base)) if next_num - number < number - prev_num: return next_num else: @@ -346,7 +347,7 @@ def resampledf(df, time_resampled): @ In, None @ Out, CDU_names, list, list of CDU names """ - df.set_index('time',inplace=True) + df.set_index('time', inplace=True) df = df.reindex(df.index.union(time_resampled)).interpolate('values').loc[time_resampled] df = df.reset_index() return df @@ -388,7 +389,7 @@ def create_casename(prefix=''): return prefix + str(uuid.uuid4())[:7] -def create_file_indexed(prefix:str, path:str = None, ending:str = None, create=True) -> str: +def create_file_indexed(prefix: str, path: str = None, ending: str = None, create=True) -> str: if path is not None: os.makedirs(path, exist_ok=True) else: @@ -407,7 +408,7 @@ def create_file_indexed(prefix:str, path:str = None, ending:str = None, create=T index += 1 -def create_dir_indexed(dir:str, path:str = None) -> str: +def create_dir_indexed(dir: str, path: str = None) -> str: if dir is None: raise ValueError("'dir' cannot be none") if path is None: @@ -415,14 +416,14 @@ def create_dir_indexed(dir:str, path:str = None) -> str: index = 1 while True: dirname = f"{dir}_{index:03d}" - fullpath = os.path.join(path,dirname) + fullpath = os.path.join(path, dirname) if not os.path.exists(fullpath): - os.makedirs(fullpath,exist_ok=False) + os.makedirs(fullpath, exist_ok=False) return fullpath index += 1 -def next_arrival_byconfargs(config,args,reset=False): +def next_arrival_byconfargs(config, args, reset=False): arrival_rate = 1 arrival_time = config['JOB_ARRIVAL_TIME'] downscale = args.downscale @@ -434,7 +435,7 @@ def next_arrival_byconfargs(config,args,reset=False): return next_arrival(arrival_rate / (arrival_time * downscale), reset) -def next_arrival_byconfkwargs(config,kwargs,reset=False): +def next_arrival_byconfkwargs(config, kwargs, reset=False): arrival_rate = 1 arrival_time = config['JOB_ARRIVAL_TIME'] if kwargs['job_arrival_rate']: @@ -444,7 +445,7 @@ def next_arrival_byconfkwargs(config,kwargs,reset=False): return next_arrival(arrival_rate / arrival_time, reset) -def next_arrival(lambda_rate,reset=False, start_time=0): +def next_arrival(lambda_rate, reset=False, start_time=0): if not hasattr(next_arrival, 'next_time') or reset is True: # Initialize the first time it's called next_arrival.next_time = start_time @@ -455,7 +456,7 @@ def next_arrival(lambda_rate,reset=False, start_time=0): def convert_to_seconds(time_str): - if isinstance(time_str, (int,float)): + if isinstance(time_str, (int, float)): return time_str # this happens.... # Define the conversion factors time_factors = { @@ -535,7 +536,7 @@ def toJSON(obj): """Function to dump a json string from object""" return json.dumps( obj, - default=lambda o:o.__dict__, + default=lambda o: o.__dict__, sort_keys=True, indent=4) @@ -543,7 +544,7 @@ def toJSON(obj): def convert_numpy_to_builtin(obj): if isinstance(obj, dict): tmp_obj = dict() - for k,v in obj.items(): + for k, v in obj.items(): tmp_obj[k] = convert_numpy_to_builtin(v) return tmp_obj elif isinstance(obj, list): @@ -571,13 +572,13 @@ def get_current_utilization(trace, job: Job): if time_quanta_index < 0: time_quanta_index = 0 - if (isinstance(trace,list) and trace != []) or \ + if (isinstance(trace, list) and trace != []) or \ (isinstance(trace, np.ndarray) and trace.size != 0): if time_quanta_index < len(trace): util = get_utilization(trace, time_quanta_index) else: - util = get_utilization(trace, max(0,len(trace) - 1)) - elif isinstance(trace,float) or isinstance(trace,int): + util = get_utilization(trace, max(0, len(trace) - 1)) + elif isinstance(trace, float) or isinstance(trace, int): util = trace else: util = 0.0 diff --git a/raps/validators.py b/raps/validators.py index 14e6743..cb811dd 100644 --- a/raps/validators.py +++ b/raps/validators.py @@ -6,7 +6,7 @@ def recompute_power(nodes, running_jobs, current_time): idx = min(idx, len(j.cpu_trace)-1) cpu_p = j.cpu_trace[idx] gpu_p = j.gpu_trace[idx] if j.gpu_trace else 0 - nid = j.scheduled_nodes[0] + nid = j.scheduled_nodes[0] node_power[nid] += cpu_p + gpu_p total = sum(node_power.values()) return node_power, total diff --git a/raps/weather.py b/raps/weather.py index b31f88e..8a4e138 100644 --- a/raps/weather.py +++ b/raps/weather.py @@ -52,13 +52,14 @@ class Weather: if not self.zip_code or not self.country_code: print("Error: ZIP code or country code is not specified.") return None, None - - geocoding_url = f'https://nominatim.openstreetmap.org/search?postalcode={self.zip_code}&country={self.country_code}&format=json' + + geocoding_url = "https://nominatim.openstreetmap.org/search?" + \ + f"postalcode={self.zip_code}&country={self.country_code}&format=json" headers = { 'User-Agent': 'ExaDigiT' # Custom User-Agent header } response = requests.get(geocoding_url, headers=headers, verify=False) # Disable SSL verification temporarily - + # Check for successful response if response.status_code == 200: try: @@ -82,10 +83,12 @@ class Weather: if self.lat is None or self.lon is None: print("Error: Latitude and longitude are not set. Please provide valid ZIP code and country code.") return - - weather_url = f'https://archive-api.open-meteo.com/v1/archive?latitude={self.lat}&longitude={self.lon}&start_date={date}&end_date={date}&temperature_unit=celsius&hourly=temperature_2m' + + weather_url = "https://archive-api.open-meteo.com/v1/archive?" + \ + f"latitude={self.lat}&longitude={self.lon}&" + \ + f"start_date={date}&end_date={date}&temperature_unit=celsius&hourly=temperature_2m" response = requests.get(weather_url, verify=False) # Disable SSL verification temporarily - + # Check for successful response if response.status_code == 200: try: @@ -93,7 +96,7 @@ class Weather: if 'hourly' in data and 'temperature_2m' in data['hourly']: times = data['hourly']['time'] temperatures = data['hourly']['temperature_2m'] - + # Cache the weather data for fast lookup for i, time in enumerate(times): temp_celsius = temperatures[i] @@ -108,7 +111,6 @@ class Weather: else: print(f"Error fetching weather data. Status Code: {response.status_code}") - def get_temperature(self, target_datetime): """ Get temperature for a specific datetime from cached data. @@ -116,13 +118,13 @@ class Weather: if not self.has_coords: print("Error: Latitude and longitude are not set. Please provide valid ZIP code and country code.") return None - + # Round target_datetime to the nearest previous hour target_hour = target_datetime.replace(minute=0, second=0, microsecond=0) - + # Convert to string format without timezone info to match cache format target_hour_str = target_hour.isoformat(timespec='minutes').replace('+00:00', '') # Remove timezone information - + # Retrieve from cache if target_hour_str in self.weather_cache: return self.weather_cache[target_hour_str] diff --git a/raps/workload.py b/raps/workload.py index 128c286..92d4e46 100644 --- a/raps/workload.py +++ b/raps/workload.py @@ -24,32 +24,38 @@ JOB_END_PROBS : list List of probabilities for different job end states. """ +from raps.utils import ( + truncated_normalvariate_int, + truncated_normalvariate_float, + determine_state, next_arrival, + next_arrival_byconfargs, + truncated_weibull, + truncated_weibull_float +) import math import random import numpy as np import matplotlib.pyplot as plt from raps.telemetry import Telemetry from raps.job import job_dict, Job -from raps.utils import create_file_indexed, create_dir_indexed, convert_to_seconds +from raps.utils import create_file_indexed -JOB_NAMES = ["LAMMPS", "GROMACS", "VASP", "Quantum ESPRESSO", "NAMD",\ - "OpenFOAM", "WRF", "AMBER", "CP2K", "nek5000", "CHARMM",\ - "ABINIT", "Cactus", "Charm++", "NWChem", "STAR-CCM+",\ - "Gaussian", "ANSYS", "COMSOL", "PLUMED", "nekrs",\ - "TensorFlow", "PyTorch", "BLAST", "Spark", "GAMESS",\ +JOB_NAMES = ["LAMMPS", "GROMACS", "VASP", "Quantum ESPRESSO", "NAMD", + "OpenFOAM", "WRF", "AMBER", "CP2K", "nek5000", "CHARMM", + "ABINIT", "Cactus", "Charm++", "NWChem", "STAR-CCM+", + "Gaussian", "ANSYS", "COMSOL", "PLUMED", "nekrs", + "TensorFlow", "PyTorch", "BLAST", "Spark", "GAMESS", "ORCA", "Simulink", "MOOSE", "ELK"] -ACCT_NAMES = ["ACT01", "ACT02", "ACT03", "ACT04", "ACT05", "ACT06", "ACT07",\ +ACCT_NAMES = ["ACT01", "ACT02", "ACT03", "ACT04", "ACT05", "ACT06", "ACT07", "ACT08", "ACT09", "ACT10", "ACT11", "ACT12", "ACT13", "ACT14"] MAX_PRIORITY = 500000 -from raps.utils import truncated_normalvariate_int, truncated_normalvariate_float, determine_state, next_arrival, next_arrival_byconfargs, truncated_weibull, truncated_weibull_float - class Workload: - def __init__(self,args,*configs): + def __init__(self, args, *configs): """ Initialize Workload with multiple configurations. """ self.partitions = [config['system_name'] for config in configs] self.config_map = {config['system_name']: config for config in configs} @@ -58,24 +64,29 @@ class Workload: def generate_jobs(self): # This function calls the job generation function as specified by the workload keyword. # The respective funciton of this class is called. - jobs = getattr(self,self.args.workload)(args=self.args) + jobs = getattr(self, self.args.workload)(args=self.args) return jobs - def compute_traces(self, cpu_util: float, gpu_util: float, wall_time: int, trace_quanta: int) -> tuple[np.ndarray, np.ndarray]: + def compute_traces(self, + cpu_util: float, + gpu_util: float, + wall_time: int, + trace_quanta: int + ) -> tuple[np.ndarray, np.ndarray]: """ Compute CPU and GPU traces based on mean CPU & GPU utilizations and wall time. """ cpu_trace = cpu_util * np.ones(int(wall_time) // trace_quanta) gpu_trace = gpu_util * np.ones(int(wall_time) // trace_quanta) return (cpu_trace, gpu_trace) - def job_arrival_distribution_draw_poisson(self,args,config): - return next_arrival_byconfargs(config,args) + def job_arrival_distribution_draw_poisson(self, args, config): + return next_arrival_byconfargs(config, args) - def job_size_distribution_draw_uniform(self,args,config): + def job_size_distribution_draw_uniform(self, args, config): min_v = 1 max_v = config['MAX_NODES_PER_JOB'] if (args.jobsize_is_power_of is not None): base = args.jobsize_is_power_of - possible_jobsizes = [base ** exp for exp in range(min_v, int(math.floor(math.log(max_v,base))))] + possible_jobsizes = [base ** exp for exp in range(min_v, int(math.floor(math.log(max_v, base))))] selection = random.randint(0, len(possible_jobsizes) - 1) number = possible_jobsizes[selection] elif (args.jobsize_is_of_degree is not None): @@ -87,14 +98,14 @@ class Workload: number = random.randint(1, config['MAX_NODES_PER_JOB']) return number - def job_size_distribution_draw_weibull(self,args,config): + def job_size_distribution_draw_weibull(self, args, config): min_v = 1 max_v = config['MAX_NODES_PER_JOB'] if (args.jobsize_is_power_of is not None): base = args.jobsize_is_power_of - possible_jobsizes = [base ** exp for exp in range(min_v, int(math.floor(math.log(max_v,base))))] - scale = math.log(args.jobsize_weibull_scale,base) - shape = math.log(args.jobsize_weibull_shape,base) + possible_jobsizes = [base ** exp for exp in range(min_v, int(math.floor(math.log(max_v, base))))] + scale = math.log(args.jobsize_weibull_scale, base) + shape = math.log(args.jobsize_weibull_shape, base) selection = truncated_weibull(scale, shape, 0, len(possible_jobsizes) - 1) number = possible_jobsizes[selection] elif (args.jobsize_is_of_degree is not None): @@ -105,17 +116,18 @@ class Workload: selection = truncated_weibull(scale, shape, 0, len(possible_jobsizes) - 1) number = possible_jobsizes[selection] else: - number = truncated_weibull(args.jobsize_weibull_scale, args.jobsize_weibull_shape, 1, config['MAX_NODES_PER_JOB']) + number = truncated_weibull(args.jobsize_weibull_scale, args.jobsize_weibull_shape, + 1, config['MAX_NODES_PER_JOB']) return number - def job_size_distribution_draw_normal(self,args,config): + def job_size_distribution_draw_normal(self, args, config): min_v = 1 max_v = config['MAX_NODES_PER_JOB'] if (args.jobsize_is_power_of is not None): base = args.jobsize_is_power_of - possible_jobsizes = [base ** exp for exp in range(min_v, int(math.floor(math.log(max_v,base))))] - mean = math.log(args.jobsize_normal_mean,base) - stddev = math.log(args.jobsize_normal_stddev,base) # (len(possible_jobsizes) / (max_v - min_v)) + possible_jobsizes = [base ** exp for exp in range(min_v, int(math.floor(math.log(max_v, base))))] + mean = math.log(args.jobsize_normal_mean, base) + stddev = math.log(args.jobsize_normal_stddev, base) # (len(possible_jobsizes) / (max_v - min_v)) selection = truncated_normalvariate_int(mean, stddev, 0, len(possible_jobsizes) - 1) number = possible_jobsizes[selection - 1] elif (args.jobsize_is_of_degree is not None): @@ -126,35 +138,48 @@ class Workload: selection = truncated_weibull(mean, stddev, 0, len(possible_jobsizes) - 1) number = possible_jobsizes[selection] else: - number = truncated_normalvariate_int(args.jobsize_normal_mean, args.jobsize_normal_stddev, 1, config['MAX_NODES_PER_JOB']) + number = truncated_normalvariate_int( + args.jobsize_normal_mean, args.jobsize_normal_stddev, 1, config['MAX_NODES_PER_JOB']) return number - def cpu_utilization_distribution_draw_uniform(self,args,config): + def cpu_utilization_distribution_draw_uniform(self, args, config): return random.uniform(0.0, config['CPUS_PER_NODE']) - def cpu_utilization_distribution_draw_normal(self,args,config): - return truncated_normalvariate_float(args.cpuutil_normal_mean, args.cpuutil_normal_stddev,0.0, config['CPUS_PER_NODE']) + def cpu_utilization_distribution_draw_normal(self, args, config): + return truncated_normalvariate_float(args.cpuutil_normal_mean, + args.cpuutil_normal_stddev, + 0.0, config['CPUS_PER_NODE']) - def cpu_utilization_distribution_draw_weibull(self,args,config): - return truncated_weibull_float(args.cpuutil_weibull_scale, args.cpuutil_weibull_shape,0.0, config['CPUS_PER_NODE']) + def cpu_utilization_distribution_draw_weibull(self, args, config): + return truncated_weibull_float(args.cpuutil_weibull_scale, + args.cpuutil_weibull_shape, + 0.0, config['CPUS_PER_NODE']) - def gpu_utilization_distribution_draw_uniform(self,args,config): + def gpu_utilization_distribution_draw_uniform(self, args, config): return random.uniform(0.0, config['GPUS_PER_NODE']) - def gpu_utilization_distribution_draw_normal(self,args,config): - return truncated_normalvariate_float(args.gpuutil_normal_mean, args.gpuutil_normal_stddev,0.0, config['GPUS_PER_NODE']) + def gpu_utilization_distribution_draw_normal(self, args, config): + return truncated_normalvariate_float(args.gpuutil_normal_mean, + args.gpuutil_normal_stddev, + 0.0, config['GPUS_PER_NODE']) - def gpu_utilization_distribution_draw_weibull(self,args,config): - return truncated_weibull_float(args.gpuutil_weibull_scale, args.gpuutil_weibull_shape,0.0 , config['GPUS_PER_NODE']) + def gpu_utilization_distribution_draw_weibull(self, args, config): + return truncated_weibull_float(args.gpuutil_weibull_scale, + args.gpuutil_weibull_shape, + 0.0, config['GPUS_PER_NODE']) - def wall_time_distribution_draw_uniform(self,args,config): - return random.uniform(config['MIN_WALL_TIME'],config['MAX_WALL_TIME']) + def wall_time_distribution_draw_uniform(self, args, config): + return random.uniform(config['MIN_WALL_TIME'], config['MAX_WALL_TIME']) - def wall_time_distribution_draw_normal(self,args,config): - return max(1,truncated_normalvariate_int(float(args.walltime_normal_mean), float(args.walltime_normal_stddev), config['MIN_WALL_TIME'], config['MAX_WALL_TIME']) / 3600 * 3600) + def wall_time_distribution_draw_normal(self, args, config): + return max(1, truncated_normalvariate_int(float(args.walltime_normal_mean), + float(args.walltime_normal_stddev), config['MIN_WALL_TIME'], + config['MAX_WALL_TIME']) / 3600 * 3600) - def wall_time_distribution_draw_weibull(self,args,config): - return truncated_weibull(args.walltime_weibull_scale, args.walltime_weibull_shape, config['MIN_WALL_TIME'], config['MAX_WALL_TIME']) + def wall_time_distribution_draw_weibull(self, args, config): + return truncated_weibull(args.walltime_weibull_scale, + args.walltime_weibull_shape, + config['MIN_WALL_TIME'], config['MAX_WALL_TIME']) def generate_jobs_from_distribution(self, *, job_arrival_distribution_to_draw_from, @@ -168,25 +193,25 @@ class Workload: partition = random.choice(self.partitions) config = self.config_map[partition] for job_index in range(args.numjobs): - submit_time = int(job_arrival_distribution_to_draw_from(args,config)) + submit_time = int(job_arrival_distribution_to_draw_from(args, config)) start_time = submit_time - nodes_required = job_size_distribution_to_draw_from(args,config) + nodes_required = job_size_distribution_to_draw_from(args, config) name = random.choice(JOB_NAMES) account = random.choice(ACCT_NAMES) - cpu_util = cpu_util_distribution_to_draw_from(args,config) + cpu_util = cpu_util_distribution_to_draw_from(args, config) if "CORES_PER_CPU" in config: cpu_cores_required = random.randint(0, config["CORES_PER_CPU"]) else: cpu_cores_required = None - gpu_util = gpu_util_distribution_to_draw_from(args,config) + gpu_util = gpu_util_distribution_to_draw_from(args, config) if "GPUS_PER_NODE" in config: - if isinstance(gpu_util,list): - gpu_units_required = random.randint(0, max(config["GPUS_PER_NODE"],math.ceil(max(gpu_util)))) + if isinstance(gpu_util, list): + gpu_units_required = random.randint(0, max(config["GPUS_PER_NODE"], math.ceil(max(gpu_util)))) else: - gpu_units_required = random.randint(0, max(config["GPUS_PER_NODE"],math.ceil(gpu_util))) - wall_time = wall_time_distribution_to_draw_from(args,config) + gpu_units_required = random.randint(0, max(config["GPUS_PER_NODE"], math.ceil(gpu_util))) + wall_time = wall_time_distribution_to_draw_from(args, config) end_time = start_time + wall_time - time_limit = max(wall_time,wall_time_distribution_to_draw_from(args,config)) + time_limit = max(wall_time, wall_time_distribution_to_draw_from(args, config)) end_state = determine_state(config['JOB_END_PROBS']) cpu_trace = cpu_util # self.compute_traces(cpu_util, gpu_util, wall_time, config['TRACE_QUANTA']) gpu_trace = gpu_util # self.compute_traces(cpu_util, gpu_util, wall_time, config['TRACE_QUANTA']) @@ -214,25 +239,48 @@ class Workload: # Test for random 'reasonable' AI jobs def randomAI(self, **kwargs): - args = kwargs.get('args',None) + args = kwargs.get('args', None) jobs = [] for i in range(args.numjobs): - draw = random.randint(0,10) + draw = random.randint(0, 10) if draw == 0: - et = random.randint(7200,28800) - nr = random.choice([128,256,512,1024,1280,1792,2048]) - new_job = Job(job_dict(nodes_required=nr,name="LLM",account="llmUser",end_state="Success", - id=random.randint(1,99999),cpu_trace=0.1,gpu_trace=(random.uniform(0.55,0.8) * self.config_map[self.args.system]['GPUS_PER_NODE']),ntx_trace=None, - nrx_trace=None,submit_time=0,time_limit=random.randint(43200,43200),start_time=0,end_time=et,wall_time=et)) + et = random.randint(7200, 28800) + nr = random.choice([128, 256, 512, 1024, 1280, 1792, 2048]) + new_job = Job(job_dict(nodes_required=nr, + name="LLM", + account="llmUser", + end_state="Success", + id=random.randint(1, 99999), + cpu_trace=0.1, + gpu_trace=(random.uniform(0.55, 0.8) * + self.config_map[self.args.system]['GPUS_PER_NODE']), + ntx_trace=None, + nrx_trace=None, + submit_time=0, + time_limit=random.randint(43200, 43200), + start_time=0, + end_time=et, + wall_time=et)) else: - new_job = Job(job_dict(nodes_required=1,name="LLM",account="llmUser",end_state="Success", - id=random.randint(1,99999),cpu_trace=1,gpu_trace=(0.2 * self.config_map[self.args.system]['GPUS_PER_NODE']),ntx_trace=None, - nrx_trace=None,submit_time=0,time_limit=43200,start_time=0,end_time=7200,wall_time=random.randint(60,7200))) + new_job = Job(job_dict(nodes_required=1, + name="LLM", + account="llmUser", + end_state="Success", + id=random.randint(1, 99999), + cpu_trace=1, + gpu_trace=(0.2 * self.config_map[self.args.system]['GPUS_PER_NODE']), + ntx_trace=None, + nrx_trace=None, + submit_time=0, + time_limit=43200, + start_time=0, + end_time=7200, + wall_time=random.randint(60, 7200))) jobs.append(new_job) return jobs def synthetic(self, **kwargs): - args = kwargs.get('args',None) + args = kwargs.get('args', None) print(args) total_jobs = args.numjobs orig_job_size_distribution = args.jobsize_distribution @@ -242,11 +290,11 @@ class Workload: jobs = [] if len(args.jobsize_distribution) != 1 and sum(args.multimodal) != 1.0: raise Exception(f"Sum of --multimodal != 1.0 : {args.multimodal} == {sum(args.multimodal)}") - for i,(jsdist,wtdist,cudist,gudist,percentage) in enumerate(zip(args.jobsize_distribution, - args.walltime_distribution, - args.cpuutil_distribution, - args.gpuutil_distribution, - args.multimodal)): + for i, (jsdist, wtdist, cudist, gudist, percentage) in enumerate(zip(args.jobsize_distribution, + args.walltime_distribution, + args.cpuutil_distribution, + args.gpuutil_distribution, + args.multimodal)): args.numjobs = math.floor(total_jobs * percentage) args.jobsize_distribution = jsdist @@ -302,7 +350,7 @@ class Workload: gpu_util_distribution_to_draw_from=gpu_util_distribution_to_draw_from, wall_time_distribution_to_draw_from=wall_time_distribution_to_draw_from, args=args) - next_arrival(0,reset=True) + next_arrival(0, reset=True) jobs.extend(new_jobs) args.numjobs = total_jobs args.jobsize_distribution = orig_job_size_distribution @@ -317,7 +365,7 @@ class Workload: partition = random.choice(self.partitions) config = self.config_map[partition] - time_delta = args.time_delta + # time_delta = args.time_delta # Unused downscale = args.downscale config['MIN_WALL_TIME'] = config['MIN_WALL_TIME'] * downscale @@ -333,34 +381,36 @@ class Workload: gpu_util = random.random() * config['GPUS_PER_NODE'] mu = (config['MAX_WALL_TIME'] + config['MIN_WALL_TIME']) / 2 sigma = (config['MAX_WALL_TIME'] - config['MIN_WALL_TIME']) / 6 - wall_time = (truncated_normalvariate_int(mu, sigma, config['MIN_WALL_TIME'], config['MAX_WALL_TIME']) // (3600 * downscale) * (3600 * downscale)) - time_limit = (truncated_normalvariate_int(mu, sigma, wall_time, config['MAX_WALL_TIME']) // (3600 * downscale) * (3600 * downscale)) - #print(f"wall_time: {wall_time//downscale}") - # print(f"time_limit: {time_limit//downscale}") + wall_time = (truncated_normalvariate_int( + mu, sigma, config['MIN_WALL_TIME'], config['MAX_WALL_TIME']) // (3600 * downscale) * (3600 * downscale)) + time_limit = (truncated_normalvariate_int(mu, sigma, wall_time, + config['MAX_WALL_TIME']) // (3600 * downscale) * (3600 * downscale)) + # print(f"wall_time: {wall_time//downscale}") + # print(f"time_limit: {time_limit//downscale}") end_state = determine_state(config['JOB_END_PROBS']) cpu_trace, gpu_trace = self.compute_traces(cpu_util, gpu_util, wall_time, config['TRACE_QUANTA']) priority = random.randint(0, MAX_PRIORITY) net_tx, net_rx = None, None # Jobs arrive according to Poisson process - time_to_next_job = int(next_arrival_byconfargs(config,args)) - #wall_time = wall_time * downscale - #time_limit = time_limit * downscale + time_to_next_job = int(next_arrival_byconfargs(config, args)) + # wall_time = wall_time * downscale + # time_limit = time_limit * downscale job_info = job_dict(nodes_required=nodes_required, name=name, - account=account, cpu_trace=cpu_trace, - gpu_trace=gpu_trace, ntx_trace=net_tx, - nrx_trace=net_rx, end_state=end_state, - id=job_index, priority=priority, - partition=partition, - submit_time=time_to_next_job - 100, - time_limit=time_limit, - start_time=time_to_next_job, - end_time=time_to_next_job + wall_time, - wall_time=wall_time, trace_time=wall_time, - trace_start_time=0, trace_end_time=wall_time, - trace_quanta=config['TRACE_QUANTA'] * downscale, - downscale=downscale + account=account, cpu_trace=cpu_trace, + gpu_trace=gpu_trace, ntx_trace=net_tx, + nrx_trace=net_rx, end_state=end_state, + id=job_index, priority=priority, + partition=partition, + submit_time=time_to_next_job - 100, + time_limit=time_limit, + start_time=time_to_next_job, + end_time=time_to_next_job + wall_time, + wall_time=wall_time, trace_time=wall_time, + trace_start_time=0, trace_end_time=wall_time, + trace_quanta=config['TRACE_QUANTA'] * downscale, + downscale=downscale ) job = Job(job_info) jobs.append(job) @@ -368,7 +418,7 @@ class Workload: def random(self, **kwargs): """ Generate random workload """ - args = kwargs.get('args',None) + args = kwargs.get('args', None) return self.generate_random_jobs(args=args) def peak(self, **kwargs): @@ -389,7 +439,8 @@ class Workload: job_time = len(gpu_trace) * config['TRACE_QUANTA'] # Create job info for this partition job_info = job_dict(nodes_required=config['AVAILABLE_NODES'], - scheduled_nodes=[], # Down nodes, therefore doesnt work list(range(config['AVAILABLE_NODES'])), + # Down nodes, therefore doesnt work list(range(config['AVAILABLE_NODES'])), + scheduled_nodes=[], name=f"Max Test {partition}", account=ACCT_NAMES[0], cpu_trace=cpu_trace, @@ -592,7 +643,7 @@ class Workload: return jobs -def plot_job_hist(jobs,config=None,dist_split=None, gantt_nodes=False): +def plot_job_hist(jobs, config=None, dist_split=None, gantt_nodes=False): # put args.multimodal in dist_split! split = [1.0] num_dist = 1 @@ -605,8 +656,8 @@ def plot_job_hist(jobs,config=None,dist_split=None, gantt_nodes=False): x2 = [x.time_limit for x in jobs] fig_m = plt.figure() gs = fig_m.add_gridspec(30, 1) - gs0 = gs[0:20].subgridspec(500,500,hspace=0,wspace=0) - gs1 = gs[24:].subgridspec(1,1) + gs0 = gs[0:20].subgridspec(500, 500, hspace=0, wspace=0) + gs1 = gs[24:].subgridspec(1, 1) ax_top = fig_m.add_subplot(gs0[:]) ax_top.axis('off') @@ -616,28 +667,28 @@ def plot_job_hist(jobs,config=None,dist_split=None, gantt_nodes=False): ax_bot.axis('off') ax_bot.set_title('Submit Time + Wall Time') - #ax0 = fig_m.add_subplot(gs[:2,:]) - #ax1 = fig_m.add_subplot(gs[2:,:]) + # ax0 = fig_m.add_subplot(gs[:2,:]) + # ax1 = fig_m.add_subplot(gs[2:,:]) - #gss = gridspec.GridSpec(5, 5, figure=ax0) - #fig, axs = plt.subplots(2, 2, gridspec_kw={'width_ratios': (4, 1), 'height_ratios': (1, 4)}) + # gss = gridspec.GridSpec(5, 5, figure=ax0) + # fig, axs = plt.subplots(2, 2, gridspec_kw={'width_ratios': (4, 1), 'height_ratios': (1, 4)}) axs = [] col = [] - col.append(fig_m.add_subplot(gs0[:100,:433])) - col.append(fig_m.add_subplot(gs0[:100,433:])) + col.append(fig_m.add_subplot(gs0[:100, :433])) + col.append(fig_m.add_subplot(gs0[:100, 433:])) axs.append(col.copy()) col = [] - col.append(fig_m.add_subplot(gs0[100:,:433])) - col.append(fig_m.add_subplot(gs0[100:,433:])) + col.append(fig_m.add_subplot(gs0[100:, :433])) + col.append(fig_m.add_subplot(gs0[100:, 433:])) axs.append(col.copy()) - ax_b = fig_m.add_subplot(gs1[:,:]) + ax_b = fig_m.add_subplot(gs1[:, :]) # Create scatter plot for i in range(len(x)): - axs[1][0].plot([x[i],x2[i]],[y[i],y[i]],color='lightblue',zorder=1) - axs[1][0].scatter(x2, y,marker='.',c='lightblue',zorder=2) - axs[1][0].scatter(x, y,zorder=3) + axs[1][0].plot([x[i], x2[i]], [y[i], y[i]], color='lightblue', zorder=1) + axs[1][0].scatter(x2, y, marker='.', c='lightblue', zorder=2) + axs[1][0].scatter(x, y, zorder=3) cpu_util = [x.cpu_trace for x in jobs] if isinstance(cpu_util[0], np.ndarray): @@ -650,10 +701,10 @@ def plot_job_hist(jobs,config=None,dist_split=None, gantt_nodes=False): elif isinstance(gpu_util[0], list): gpu_util = [sum(part) / len(part) for part in gpu_util] if not all([x == 0 for x in gpu_util]): - axs[0][1].scatter(cpu_util,gpu_util,zorder=2,marker='.',s=0.2) - axs[0][1].hist(gpu_util,bins=100,orientation='horizontal',zorder=1, density=True,color='tab:purple') - axs[0][1].axhline(np.mean(gpu_util), color='r', linewidth=1,zorder=3) - axs[0][1].set(ylim=[0,config['GPUS_PER_NODE']]) + axs[0][1].scatter(cpu_util, gpu_util, zorder=2, marker='.', s=0.2) + axs[0][1].hist(gpu_util, bins=100, orientation='horizontal', zorder=1, density=True, color='tab:purple') + axs[0][1].axhline(np.mean(gpu_util), color='r', linewidth=1, zorder=3) + axs[0][1].set(ylim=[0, config['GPUS_PER_NODE']]) axs[0][1].set_ylabel("gpu util") axs[0][1].yaxis.set_label_coords(1.15, 0.5) axs[0][1].yaxis.set_label_position("right") @@ -661,17 +712,17 @@ def plot_job_hist(jobs,config=None,dist_split=None, gantt_nodes=False): else: axs[0][1].set_yticks([]) pass - axs[0][1].hist(cpu_util,bins=100,orientation='vertical',zorder=1, density=True,color='tab:cyan') - axs[0][1].axvline(np.mean(cpu_util), color='r', linewidth=1,zorder=3) - axs[0][1].set(xlim=[0,config['CPUS_PER_NODE']]) + axs[0][1].hist(cpu_util, bins=100, orientation='vertical', zorder=1, density=True, color='tab:cyan') + axs[0][1].axvline(np.mean(cpu_util), color='r', linewidth=1, zorder=3) + axs[0][1].set(xlim=[0, config['CPUS_PER_NODE']]) axs[0][1].set_xlabel("cpu util") - axs[0][1].xaxis.set_label_coords(0.5,1.30) + axs[0][1].xaxis.set_label_coords(0.5, 1.30) axs[0][1].xaxis.set_label_position("top") axs[0][1].xaxis.tick_top() - axs[0][0].hist(x2,bins=max(1,math.ceil(min(100,(max(x2) - min(x))))), orientation='vertical',color='lightblue') - axs[0][0].hist(x,bins=max(1,math.ceil(min(100,(max(x2) - min(x))))), orientation='vertical') + axs[0][0].hist(x2, bins=max(1, math.ceil(min(100, (max(x2) - min(x))))), orientation='vertical', color='lightblue') + axs[0][0].hist(x, bins=max(1, math.ceil(min(100, (max(x2) - min(x))))), orientation='vertical') axs[1][0].sharex(axs[0][0]) - axs[1][1].hist(y,bins=max(1,min(100,(max(y) - min(y)))), orientation='horizontal') + axs[1][1].hist(y, bins=max(1, min(100, (max(y) - min(y)))), orientation='horizontal') axs[1][0].sharey(axs[1][1]) # Remove ticks @@ -686,12 +737,12 @@ def plot_job_hist(jobs,config=None,dist_split=None, gantt_nodes=False): x_label_mins = [n for n in np.arange(minx_s // 60, maxx_s // 60)] x_label_ticks = [n * 60 for n in x_label_mins[0::60]] x_label_str = [str(x1).zfill(2) + ":" + str(x2).zfill(2) for - (x1,x2) in [(n // 60,n % 60) for - n in x_label_mins[0::60]]] - axs[1][0].set_xticks(x_label_ticks,x_label_str) + (x1, x2) in [(n // 60, n % 60) for + n in x_label_mins[0::60]]] + axs[1][0].set_xticks(x_label_ticks, x_label_str) miny = min(y) maxy = max(y) - interval = max(1,maxy // 10) + interval = max(1, maxy // 10) y_ticks = np.arange(0, maxy, interval) y_ticks[0] = miny axs[1][0].set_yticks(y_ticks) @@ -709,41 +760,41 @@ def plot_job_hist(jobs,config=None,dist_split=None, gantt_nodes=False): split_offset = math.floor(len(x) * split[split_index]) if gantt_nodes: if split[0] == 0.0: - ax_b.axhline(y=offset, color='red', linestyle='--',lw=0.5) + ax_b.axhline(y=offset, color='red', linestyle='--', lw=0.5) split_index += 1 for i in range(len(x)): - #ax_b.barh(i,duration[i], height=1.0, left=submit_t[i]) - ax_b.barh(offset + nodes_required[i] / 2,duration[i], height=nodes_required[i], left=submit_t[i]) + # ax_b.barh(i,duration[i], height=1.0, left=submit_t[i]) + ax_b.barh(offset + nodes_required[i] / 2, duration[i], height=nodes_required[i], left=submit_t[i]) offset += nodes_required[i] if i != len(x) - 1 and i == split_offset - 1 and split_index < len(split): - ax_b.axhline(y=offset, color='red', linestyle='--',lw=0.5) + ax_b.axhline(y=offset, color='red', linestyle='--', lw=0.5) split_index += 1 split_offset += math.floor(len(x) * split[split_index]) - #ax_b.axhline(y=(len(x)/num_dist * i)-0.5, color='red', linestyle='--',lw=0.5) + # ax_b.axhline(y=(len(x)/num_dist * i)-0.5, color='red', linestyle='--',lw=0.5) if split[-1] == 0.0: - ax_b.axhline(y=offset, color='red', linestyle='--',lw=0.5) + ax_b.axhline(y=offset, color='red', linestyle='--', lw=0.5) split_index += 1 ax_b.set_ylabel("Jobs' acc. nodes") else: for i in range(len(x)): - ax_b.barh(i,duration[i], height=1.0, left=submit_t[i]) - for i in range(1,num_dist): + ax_b.barh(i, duration[i], height=1.0, left=submit_t[i]) + for i in range(1, num_dist): if num_dist == 1: break - ax_b.axhline(y=(len(x) * split[split_index]) - 0.5, color='red', linestyle='--',lw=0.5) + ax_b.axhline(y=(len(x) * split[split_index]) - 0.5, color='red', linestyle='--', lw=0.5) split_index += 1 ax_b.set_ylabel("Job ID") - #ax_b labels: + # ax_b labels: ax_b.set_xlabel("time [hh:mm]") minx_s = 0 maxx_s = math.ceil(max([x.wall_time for x in jobs]) + max([x.submit_time for x in jobs])) x_label_mins = [n for n in np.arange(minx_s // 60, maxx_s // 60)] x_label_ticks = [n * 60 for n in x_label_mins[0::60]] x_label_str = [str(x1).zfill(2) + ":" + str(x2).zfill(2) for - (x1,x2) in [(n // 60,n % 60) for - n in x_label_mins[0::60]]] + (x1, x2) in [(n // 60, n % 60) for + n in x_label_mins[0::60]]] - ax_b.set_xticks(x_label_ticks,x_label_str) + ax_b.set_xticks(x_label_ticks, x_label_str) ax_b.yaxis.set_inverted(True) plt.show() @@ -751,50 +802,68 @@ def plot_job_hist(jobs,config=None,dist_split=None, gantt_nodes=False): def add_workload_to_parser(parser): - choices = ['random', 'benchmark', 'peak', 'idle','synthetic', 'multitenant'] - parser.add_argument('-w', '--workload', type=str, choices=choices, default=choices[0], help='Type of synthetic workload') + choices = ['random', 'benchmark', 'peak', 'idle', 'synthetic', 'multitenant'] + parser.add_argument('-w', '--workload', type=str, choices=choices, + default=choices[0], help='Type of synthetic workload') - parser.add_argument("--multimodal", default=[1.0], type=float, nargs="+", help="Percentage to draw from each distribution (list of floats)e.g. '0.2 0.8' percentages apply in order to the list of the --distribution argument list.") + parser.add_argument("--multimodal", default=[1.0], type=float, nargs="+", + help="Percentage to draw from each distribution " + "(list of floats)e.g. '0.2 0.8' percentages apply" + " in order to the list of the --distribution argument list.") # Jobsize: - parser.add_argument("--jobsize-distribution", type=str, nargs="+", choices=['uniform','weibull','normal'], default=None, help='Distribution type') + parser.add_argument("--jobsize-distribution", type=str, nargs="+", + choices=['uniform', 'weibull', 'normal'], default=None, help='Distribution type') parser.add_argument("--jobsize-normal-mean", type=float, required=False, help="Mean (mu) for Normal distribution") - parser.add_argument("--jobsize-normal-stddev", type=float, required=False, help="Standard deviation (sigma) for Normal distribution") + parser.add_argument("--jobsize-normal-stddev", type=float, required=False, + help="Standard deviation (sigma) for Normal distribution") parser.add_argument("--jobsize-weibull-shape", type=float, required=False, help="Jobsize shape of weibull") parser.add_argument("--jobsize-weibull-scale", type=float, required=False, help="Jobsize scale of weibull") - parser.add_argument("--jobsize-is-of-degree", default=None, type=int,required=False,help="Draw jobsizes from distribution of degree N (squared,cubed).") - parser.add_argument("--jobsize-is-power-of", default=None, type=int,required=False,help="Draw jobsizes from distribution of power of N (2->2^x,3->3^x).") + parser.add_argument("--jobsize-is-of-degree", default=None, type=int, required=False, + help="Draw jobsizes from distribution of degree N (squared,cubed).") + parser.add_argument("--jobsize-is-power-of", default=None, type=int, required=False, + help="Draw jobsizes from distribution of power of N (2->2^x,3->3^x).") # Walltime: - parser.add_argument("--walltime-distribution", type=str, nargs="+", choices=['uniform','weibull','normal'], default=None, help='Distribution type') + parser.add_argument("--walltime-distribution", type=str, nargs="+", + choices=['uniform', 'weibull', 'normal'], default=None, help='Distribution type') - parser.add_argument("--walltime-normal-mean", type=float, required=False, help="Walltime mean (mu) for Normal distribution") - parser.add_argument("--walltime-normal-stddev", type=float, required=False, help="Walltime standard deviation (sigma) for Normal distribution") + parser.add_argument("--walltime-normal-mean", type=float, required=False, + help="Walltime mean (mu) for Normal distribution") + parser.add_argument("--walltime-normal-stddev", type=float, required=False, + help="Walltime standard deviation (sigma) for Normal distribution") parser.add_argument("--walltime-weibull-shape", type=float, required=False, help="Walltime shape of weibull") parser.add_argument("--walltime-weibull-scale", type=float, required=False, help="Walltime scale of weibull") # Utilizations - parser.add_argument("--cpuutil-distribution", type=str, nargs="+", choices=['uniform','weibull','normal'], default=['uniform'], help='Distribution type') + parser.add_argument("--cpuutil-distribution", type=str, nargs="+", + choices=['uniform', 'weibull', 'normal'], default=['uniform'], help='Distribution type') - parser.add_argument("--cpuutil-normal-mean", type=float, required=False, help="Walltime mean (mu) for Normal distribution") - parser.add_argument("--cpuutil-normal-stddev", type=float, required=False, help="Walltime standard deviation (sigma) for Normal distribution") + parser.add_argument("--cpuutil-normal-mean", type=float, required=False, + help="Walltime mean (mu) for Normal distribution") + parser.add_argument("--cpuutil-normal-stddev", type=float, required=False, + help="Walltime standard deviation (sigma) for Normal distribution") parser.add_argument("--cpuutil-weibull-shape", type=float, required=False, help="Walltime shape of weibull") parser.add_argument("--cpuutil-weibull-scale", type=float, required=False, help="Walltime scale of weibull") - parser.add_argument("--gpuutil-distribution", type=str, nargs="+", choices=['uniform','weibull','normal'], default=['uniform'], help='Distribution type') + parser.add_argument("--gpuutil-distribution", type=str, nargs="+", + choices=['uniform', 'weibull', 'normal'], default=['uniform'], help='Distribution type') - parser.add_argument("--gpuutil-normal-mean", type=float, required=False, help="Walltime mean (mu) for Normal distribution") - parser.add_argument("--gpuutil-normal-stddev", type=float, required=False, help="Walltime standard deviation (sigma) for Normal distribution") + parser.add_argument("--gpuutil-normal-mean", type=float, required=False, + help="Walltime mean (mu) for Normal distribution") + parser.add_argument("--gpuutil-normal-stddev", type=float, required=False, + help="Walltime standard deviation (sigma) for Normal distribution") parser.add_argument("--gpuutil-weibull-shape", type=float, required=False, help="Walltime shape of weibull") parser.add_argument("--gpuutil-weibull-scale", type=float, required=False, help="Walltime scale of weibull") - parser.add_argument("--gantt-nodes", default=False, action='store_true', required=False, help="Print Gannt with nodes required as line thickness (default false)") + parser.add_argument("--gantt-nodes", default=False, action='store_true', required=False, + help="Print Gannt with nodes required as line thickness (default false)") return parser @@ -811,7 +880,7 @@ def run_workload(): config = ConfigManager(system_name=args.system).get_config() if args.replay: td = Telemetry(**args_dict) - jobs,_,_,_ = td.load_jobs_times_args_from_files(files=args.replay,args=args) + jobs, _, _, _ = td.load_jobs_times_args_from_files(files=args.replay, args=args, config=config) else: workload = Workload(config) jobs = getattr(workload, args.workload)(args=args) @@ -819,9 +888,9 @@ def run_workload(): if args.output: timestep_start = min([x.submit_time for x in jobs]) timestep_end = math.ceil(max([x.submit_time for x in jobs]) + max([x.wall_time for x in jobs])) - filename = create_file_indexed('wl',create=False,ending="npz").split(".npz")[0] + filename = create_file_indexed('wl', create=False, ending="npz").split(".npz")[0] # savez_compressed add npz itself, but create_file_indexed needs to check for .npz to find existing files - np.savez_compressed(filename,jobs=jobs,timestep_start=timestep_start, timestep_end=timestep_end, args=args) + np.savez_compressed(filename, jobs=jobs, timestep_start=timestep_start, timestep_end=timestep_end, args=args) print(filename + ".npz") # To std-out to show which npz was created. def multitenant(self, **kwargs): @@ -845,19 +914,19 @@ def run_workload(): list[dict] List of job_dict entries. """ - mode = kwargs.get('mode', 'TWO_JOBS_PER_NODE_SPLIT') - wall_time = kwargs.get('wall_time', 3600) + mode = kwargs.get('mode', 'TWO_JOBS_PER_NODE_SPLIT') + wall_time = kwargs.get('wall_time', 3600) jobs = [] for partition in self.partitions: - cfg = self.config_map[partition] - trace_quanta = kwargs.get('trace_quanta', cfg['TRACE_QUANTA']) + cfg = self.config_map[partition] + trace_quanta = kwargs.get('trace_quanta', cfg['TRACE_QUANTA']) cores_per_cpu = cfg.get('CORES_PER_CPU', 1) cpus_per_node = cfg.get('CPUS_PER_NODE', 1) cores_per_node = cores_per_cpu * cpus_per_node - gpus_per_node = cfg.get('GPUS_PER_NODE', 0) + gpus_per_node = cfg.get('GPUS_PER_NODE', 0) n_nodes = cfg['AVAILABLE_NODES'] @@ -970,9 +1039,9 @@ def run_workload(): return jobs -def continuous_job_generation(*,engine,timestep,jobs): - #print("if len(engine.queue) <= engine.continuous_workload.args.maxqueue:") - #print(f"if {len(engine.queue)} <= {engine.continuous_workload.args.maxqueue}:") +def continuous_job_generation(*, engine, timestep, jobs): + # print("if len(engine.queue) <= engine.continuous_workload.args.maxqueue:") + # print(f"if {len(engine.queue)} <= {engine.continuous_workload.args.maxqueue}:") if len(engine.queue) <= engine.continuous_workload.args.maxqueue: new_jobs = engine.continuous_workload.generate_jobs() jobs.extend(new_jobs) -- GitLab From 0b1b94384f798579b7e191950da1da64d32883a3 Mon Sep 17 00:00:00 2001 From: Wes Brewer Date: Mon, 18 Aug 2025 10:16:31 -0400 Subject: [PATCH 234/388] Add 'j' and 'l' keys for speedup / slowdown --- raps/engine.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/raps/engine.py b/raps/engine.py index df3ec32..b7a8b04 100644 --- a/raps/engine.py +++ b/raps/engine.py @@ -84,15 +84,15 @@ def keyboard_listener(state): tty.setcbreak(sys.stdin.fileno()) while True: char = sys.stdin.read(1) - if char == ' ' or char == 'k': + if char == 'k' or char == ' ': state.toggle_pause() if state.is_paused(): print("\n[PAUSED] Press space or k to resume.", file=sys.stderr) else: print("\n[RESUMED]", file=sys.stderr) - elif char == '+': + elif char == 'l' or char == '+': state.speed_up() - elif char == '-' or char == '_': + elif char == 'j' or char == '_': state.slow_down() finally: -- GitLab From 21940a65b34a508fb3ccb56ffd05dec5a6b03278 Mon Sep 17 00:00:00 2001 From: Wes Brewer Date: Mon, 18 Aug 2025 11:20:40 -0400 Subject: [PATCH 235/388] Fix bug when trying to run system without dataloader --- raps/telemetry.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/raps/telemetry.py b/raps/telemetry.py index 4b9cfe0..a3a393e 100644 --- a/raps/telemetry.py +++ b/raps/telemetry.py @@ -71,8 +71,9 @@ class Telemetry: self.dirname = outname try: self.dataloader = importlib.import_module(f"raps.dataloaders.{self.system}", package=__package__) - except FileNotFoundError: - print("WARNING: Failed to load dataloader") + except ImportError as e: + print(f"WARNING: Failed to load dataloader: {e}") + self.dataloader = None def save_snapshot(self, *, jobs: list, timestep_start: int, timestep_end: int, args: dict, filename: str): """Saves a snapshot of the jobs to a compressed file. """ -- GitLab From efe7c898373828448163950e27bc736dba2845eb Mon Sep 17 00:00:00 2001 From: Matthias Maiterth Date: Mon, 18 Aug 2025 15:56:52 -0400 Subject: [PATCH 236/388] Fixed usage of filedescription to cleanup terminal. --- raps/engine.py | 32 ++++++++++++++++++-------------- 1 file changed, 18 insertions(+), 14 deletions(-) diff --git a/raps/engine.py b/raps/engine.py index 3af1de0..338c155 100644 --- a/raps/engine.py +++ b/raps/engine.py @@ -5,6 +5,8 @@ import threading import sys import tty import termios +import os +import select import time from raps.job import Job, JobState @@ -82,20 +84,22 @@ def keyboard_listener(state): fd = sys.stdin.fileno() old_settings = termios.tcgetattr(fd) try: - tty.setcbreak(sys.stdin.fileno()) + tty.setcbreak(fd) # or tty.setraw(fd) while True: - char = sys.stdin.read(1) - if char == 'k' or char == ' ': - state.toggle_pause() - if state.is_paused(): - print("\n[PAUSED] Press space or k to resume.", file=sys.stderr) - else: - print("\n[RESUMED]", file=sys.stderr) - elif char == 'l' or char == '+': - state.speed_up() - elif char == 'j' or char == '_': - state.slow_down() - + # Wait up to 0.1s for input + rlist, _, _ = select.select([sys.stdin], [], [], 0.1) + if rlist: + char = os.read(fd, 1).decode() + if char == 'k' or char == ' ': + state.toggle_pause() + if state.is_paused(): + print("\n[PAUSED] Press space or k to resume.", file=sys.stderr) + else: + print("\n[RESUMED]", file=sys.stderr) + elif char == 'l' or char == '+': + state.speed_up() + elif char == 'j' or char == '_': + state.slow_down() finally: termios.tcsetattr(fd, termios.TCSADRAIN, old_settings) @@ -547,7 +551,7 @@ class Engine: all_jobs[:] = [job for job in all_jobs if job.submit_time > timestep + batch_window] # 1. Prepare Timestep: - completed_jobs, newly_downed_nodes = self.prepare_timestep(replay) + completed_jobs, newly_downed_nodes = self.prepare_timestep(replay=replay) # 2. Identify eligible jobs and add them to the queue. has_new_additions = self.add_eligible_jobs_to_queue(jobs) -- GitLab From 7f217f5b805880c025ebe0f2554e99f1e9b2364e Mon Sep 17 00:00:00 2001 From: Matthias Maiterth Date: Mon, 18 Aug 2025 17:45:45 -0400 Subject: [PATCH 237/388] Force os to return tty to sane state. --- raps/ui.py | 30 +++++++++++++++++------------- 1 file changed, 17 insertions(+), 13 deletions(-) diff --git a/raps/ui.py b/raps/ui.py index b5ab8cb..6d7eaec 100644 --- a/raps/ui.py +++ b/raps/ui.py @@ -1,4 +1,5 @@ import sys +import os import pandas as pd import numpy as np from rich.align import Align @@ -545,19 +546,22 @@ class LayoutManager: context = Live(self.layout, auto_refresh=True, refresh_per_second=3) else: context = nullcontext() - with context: - # last_i = 0 - for i, data in enumerate(self.engine.run_simulation(jobs, - timestep_start, - timestep_end, - time_delta, - autoshutdown=True)): - if data and (not self.debug and not self.noui): - self.update_full_layout(data, time_delta) - # self.update_progress_bar(i-last_i) - # last_i=i - if not self.debug and not self.noui: - self.update_progress_bar(1) + try: + with context: + # last_i = 0 + for i, data in enumerate(self.engine.run_simulation(jobs, + timestep_start, + timestep_end, + time_delta, + autoshutdown=True)): + if data and (not self.debug and not self.noui): + self.update_full_layout(data, time_delta) + # self.update_progress_bar(i-last_i) + # last_i=i + if not self.debug and not self.noui: + self.update_progress_bar(1) + finally: + os.system("stty sane") def run_stepwise(self, jobs, timestep_start, timestep_end, time_delta): """ Prepares the UI and returns a generator for the simulation """ -- GitLab From 4add18478947dd69d8f0a3149e929280ef6d3530 Mon Sep 17 00:00:00 2001 From: Matthias Maiterth Date: Tue, 19 Aug 2025 10:15:24 -0400 Subject: [PATCH 238/388] Re-added downtime and continous job generation after previous merged temporarily removed the changes for easier integration. --- raps/engine.py | 88 ++++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 82 insertions(+), 6 deletions(-) diff --git a/raps/engine.py b/raps/engine.py index 338c155..63b0df9 100644 --- a/raps/engine.py +++ b/raps/engine.py @@ -13,7 +13,6 @@ from raps.job import Job, JobState from raps.policy import PolicyType from raps.utils import ( summarize_ranges, - expand_ranges, get_current_utilization ) from raps.resmgr import ResourceManager @@ -24,6 +23,8 @@ from raps.network import ( apply_job_slowdown, compute_system_network_stats ) +from raps.workload import continuous_job_generation +from raps.downtime import Downtime @dataclasses.dataclass @@ -113,6 +114,7 @@ class Engine: config, jobs=None, total_initial_jobs=0, + continuous_workload=None, # Workload class to generate from for continuous generation **kwargs): self.config = config self.down_nodes = summarize_ranges(self.config['DOWN_NODES']) @@ -134,6 +136,7 @@ class Engine: self.power_manager = power_manager self.flops_manager = flops_manager self.debug = kwargs.get('debug') + self.continuous_workload = continuous_workload self.output = kwargs.get('output') self.replay = kwargs.get('replay') self.downscale = kwargs.get('downscale', 1) # Factor to downscale the 1s timesteps (power of 10) @@ -147,6 +150,9 @@ class Engine: self.avg_slowdown_history = [] self.max_slowdown_history = [] self.node_occupancy_history = [] + self.downtime = Downtime(first_downtime=kwargs.get('downtime_first'), + downtime_interval=kwargs.get('downtime_interval'), + downtime_length=kwargs.get('downtime_length')) # Set scheduler type - either based on config or command-line args - defaults to 'default' if self.config['multitenant']: @@ -227,7 +233,75 @@ class Engine: else: return False - def prepare_timestep(self, replay: bool = True): + def prepare_timestep(self, *, replay: bool = True, jobs): + # 1 identify completed jobs + # 2 Check continuous job generation + # 3 Simulate node failure # Defunct feature! + # 4 Simulate downtime + # 5 Update active and free nodes + + # 1 Identify Completed Jobs + completed_jobs = [job for job in self.running if job.end_time is not None and job.end_time <= self.current_time] + # Update Completed Jobs, their account and and Free resources. + for job in completed_jobs: + self.power_manager.set_idle(job.scheduled_nodes) + job.state = JobState.COMPLETED + + self.running.remove(job) + self.jobs_completed += 1 + job_stats = job.statistics() + if self.accounts: + self.accounts.update_account_statistics(job_stats) + self.job_history_dict.append(job_stats.__dict__) + # Free the nodes via the resource manager. + self.resource_manager.free_nodes_from_job(job) + + # 2 Check continuous job generation + if self.continuous_workload is not None: # Experimental + continuous_job_generation(engine=self, timestep=self.current_time, jobs=jobs) + + # 3 Simulate node failure + if not replay: + newly_downed_nodes = self.resource_manager.node_failure(self.config['MTBF']) + for node in newly_downed_nodes: + self.power_manager.set_idle(node) + else: + newly_downed_nodes = [] + + need_reschedule = False + # 4 Simulate downtime + need_reschedule = self.downtime.check_and_trigger(timestep=self.current_time, engine=self) + + # 5 Update active/free nodes based on core/GPU utilization + if self.config['multitenant']: + # #total_cpu_cores = sum(node['total_cpu_cores'] for node in self.resource_manager.nodes) + # #total_gpu_units = sum(node['total_gpu_units'] for node in self.resource_manager.nodes) + # #available_cpu_cores = sum(node['available_cpu_cores'] for node in self.resource_manager.nodes) + # #available_gpu_units = sum(node['available_gpu_units'] for node in self.resource_manager.nodes) + + self.num_free_nodes = len([node for node in self.resource_manager.nodes if + not node['is_down'] and + node['available_cpu_cores'] == node['total_cpu_cores'] and + node['available_gpu_units'] == node['total_gpu_units']]) + self.num_active_nodes = len([node for node in self.resource_manager.nodes if + not node['is_down'] and + (node['available_cpu_cores'] < node['total_cpu_cores'] + or node['available_gpu_units'] < node['total_gpu_units'])]) + + # Update system utilization history + self.resource_manager.update_system_utilization(self.current_time, self.running) + else: + # Whole-node allocator + self.num_free_nodes = len(self.resource_manager.available_nodes) + self.num_active_nodes = self.config['TOTAL_NODES'] \ + - len(self.resource_manager.available_nodes) \ + - len(self.resource_manager.down_nodes) + self.down_nodes = self.resource_manager.down_nodes + # TODO This should only be managed in the resource manager! + + return completed_jobs, newly_downed_nodes, need_reschedule + + def prepare_timestep_old(self, replay: bool = True): # 1 identify completed jobs # 2 Simulate node failure # Defunct feature! # 3 Update active and free nodes @@ -440,11 +514,13 @@ class Engine: net_rx_list=net_rx_list, slowdown_factors=slowdown_factors ) + slowdown_per_job = sum(slowdown_factors)/len(slowdown_factors) if len(slowdown_factors) != 0 else 0 self.record_network_stats(avg_tx=avg_tx, avg_rx=avg_rx, avg_net=avg_net) else: avg_tx, avg_rx, avg_net = None, None, None + slowdown_per_job = 0 # Continue with System Simulation @@ -462,7 +538,7 @@ class Engine: completed=None, running=self.running, queue=self.queue, - down_nodes=expand_ranges(self.down_nodes[1:]), + down_nodes=self.down_nodes, power_df=power_df, p_flops=pflops, g_flops_w=gflops_per_watt, @@ -474,7 +550,7 @@ class Engine: avg_net_tx=avg_tx, avg_net_rx=avg_rx, avg_net_util=avg_net, - slowdown_per_job=0, + slowdown_per_job=slowdown_per_job, node_occupancy=node_occupancy, time_delta=time_delta ) @@ -551,13 +627,13 @@ class Engine: all_jobs[:] = [job for job in all_jobs if job.submit_time > timestep + batch_window] # 1. Prepare Timestep: - completed_jobs, newly_downed_nodes = self.prepare_timestep(replay=replay) + completed_jobs, newly_downed_nodes, need_reschedule = self.prepare_timestep(replay=replay, jobs=jobs) # 2. Identify eligible jobs and add them to the queue. has_new_additions = self.add_eligible_jobs_to_queue(jobs) # 3. Schedule jobs that are now in the queue. - if completed_jobs != [] or newly_downed_nodes != [] or has_new_additions: + if completed_jobs != [] or newly_downed_nodes != [] or has_new_additions or need_reschedule: self.scheduler.schedule(self.queue, self.running, self.current_time, accounts=self.accounts, -- GitLab From d20c4bb2f941b16d8aa2dc63f5c7253543dde247 Mon Sep 17 00:00:00 2001 From: Wes Brewer Date: Tue, 19 Aug 2025 13:51:46 -0400 Subject: [PATCH 239/388] Remove hetero-setonix.py and update DATAPATH in tests/smoke.py --- hetero-setonix.py | 86 ----------------------------------------------- tests/smoke.py | 13 +++++-- 2 files changed, 10 insertions(+), 89 deletions(-) delete mode 100644 hetero-setonix.py diff --git a/hetero-setonix.py b/hetero-setonix.py deleted file mode 100644 index 4d1ee64..0000000 --- a/hetero-setonix.py +++ /dev/null @@ -1,86 +0,0 @@ -from raps.utils import convert_to_seconds -from raps.workload import Workload -from raps.scheduler import Scheduler -from raps.power import PowerManager, compute_node_power -from raps.flops import FLOPSManager -from raps.ui import LayoutManager -from raps.config import ConfigManager -import copy -from args import args -from raps.helpers import check_python_version -check_python_version() - -args_dict1 = copy.deepcopy(vars(args)) -args_dict2 = copy.deepcopy(vars(args)) -print(args_dict1) -print(args_dict2) - - -config1 = ConfigManager(system_name='setonix-cpu').get_config() -config2 = ConfigManager(system_name='setonix-gpu').get_config() - -args_dict1['config'] = config1 -args_dict2['config'] = config2 - -pm1 = PowerManager(compute_node_power, **config1) -pm2 = PowerManager(compute_node_power, **config2) - -fm1 = FLOPSManager(**args_dict1) -fm2 = FLOPSManager(**args_dict2) - -sc1 = Scheduler(power_manager=pm1, flops_manager=fm1, cooling_model=None, **args_dict1) -sc2 = Scheduler(power_manager=pm2, flops_manager=fm2, cooling_model=None, **args_dict2) - -layout_manager1 = LayoutManager(args.layout, scheduler=sc1, debug=args.debug, **config1) -layout_manager2 = LayoutManager(args.layout, scheduler=sc2, debug=args.debug, **config2) - -print(config1) -print(config2) -configs = [config1, config2] -wl = Workload(*configs) - -jobs = getattr(wl, args.workload)(num_jobs=args.numjobs) -print(jobs) - -# Separate jobs based on partition -jobs1 = [job for job in jobs if job['partition'] == 'setonix-cpu'] -jobs2 = [job for job in jobs if job['partition'] == 'setonix-gpu'] - -# Print counts for verification -print(f"Jobs for setonix-cpu: {len(jobs1)}") -print(f"Jobs for setonix-gpu: {len(jobs2)}") - -if args.time: - timesteps = convert_to_seconds(args.time) -else: - timesteps = 88200 # 24 hours - -if args.verbose: - print(jobs) - -# Create generator objects for both partitions -gen1 = layout_manager1.run_stepwise(jobs1, timesteps=timesteps) -gen2 = layout_manager2.run_stepwise(jobs2, timesteps=timesteps) - -# Step through both generators in lockstep -# for _ in range(timesteps): -# next(gen1) # Advance first scheduler -# next(gen2) # Advance second scheduler - -for timestep in range(timesteps): - # Advance generators - next(gen1) - next(gen2) - - # Timestep - print(f"[DEBUG] Timestep: {timestep}") - - # Queue lengths - print(f"[DEBUG] setonix-cpu Queue: {len(layout_manager1.scheduler.queue)}") - print(f"[DEBUG] setonix-gpu Queue: {len(layout_manager2.scheduler.queue)}") - - # System utilization - sys_util1 = layout_manager1.scheduler.sys_util_history[-1][1] if layout_manager1.scheduler.sys_util_history else 0.0 - sys_util2 = layout_manager2.scheduler.sys_util_history[-1][1] if layout_manager2.scheduler.sys_util_history else 0.0 - print(f"[DEBUG] setonix-cpu Util: {sys_util1:.2f}%") - print(f"[DEBUG] setonix-gpu Util: {sys_util2:.2f}%") diff --git a/tests/smoke.py b/tests/smoke.py index d3c7dcb..0f9f4ca 100644 --- a/tests/smoke.py +++ b/tests/smoke.py @@ -3,7 +3,7 @@ import argparse import subprocess # Define the data path -DATAPATH = os.path.expanduser("~/data") +DATAPATH = os.getenv("RAPS_DATA_DIR", "/opt/data") # Standardize the time setting DEFAULT_TIME = "1h" @@ -14,11 +14,12 @@ SYSTEMS = { "frontier": "frontier/slurm/joblive/date=2024-01-18 frontier/jobprofile/date=2024-01-18", "marconi100": "marconi100/job_table.parquet", "lassen": "lassen/Lassen-Supercomputer-Job-Dataset", - "adastraMI250": "adastra/AdastaJobsMI250_15days.parquet" + "adastraMI250": "adastra/AdastaJobsMI250_15days.parquet", } VALID_CHOICES = set(SYSTEMS.keys()).union({"synthetic", "hetero"}) + def run_command(command): """Helper function to run a shell command.""" print(f"Running: {command}") @@ -27,17 +28,20 @@ def run_command(command): print(f"Error: Command failed with return code {result.returncode}") exit(-1) + def build_command(system, file_paths, additional_args=""): """Build the command string for the given system and file paths.""" full_paths = " ".join([os.path.join(DATAPATH, path) for path in file_paths.split()]) return f"python main.py --system {system} -f {full_paths} -t {DEFAULT_TIME} {additional_args}".strip() + def execute_system_tests(systems): """Execute tests for selected systems.""" for system in systems: command = build_command(system, SYSTEMS[system]) run_command(command) + def synthetic_workload_tests(): """Run synthetic workload tests.""" print("Starting synthetic workload tests...") @@ -46,18 +50,21 @@ def synthetic_workload_tests(): run_command(f"python main.py -w peak -t {DEFAULT_TIME}") run_command(f"python main.py -w idle -t {DEFAULT_TIME}") + def hetero_tests(): """Run heterogeneous workload tests.""" print("Starting heterogeneous workload tests...") run_command(f"python multi-part-sim.py -x setonix/part-cpu setonix/part-gpu -t {DEFAULT_TIME}") + def main(): """Main function to parse arguments and run tests.""" parser = argparse.ArgumentParser(description="Run smoke tests for HPC systems.") parser.add_argument( "tests", nargs="*", # Allow multiple test selections, including none - help="Run tests for one or more specific systems (e.g., 'frontier lassen'), 'synthetic' workloads, or 'hetero'. If omitted, all tests run.", + help="Run tests for one or more specific systems (e.g., 'frontier lassen')," + "'synthetic' workloads, or 'hetero'. If omitted, all tests run." ) args = parser.parse_args() -- GitLab From 05e9c4cb46dff47065127a72a2d85c19d8f336a7 Mon Sep 17 00:00:00 2001 From: Matthias Maiterth Date: Tue, 19 Aug 2025 14:46:52 -0400 Subject: [PATCH 240/388] Fixed workload change to use args and config. --- raps/workload.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/raps/workload.py b/raps/workload.py index 92d4e46..38233c2 100644 --- a/raps/workload.py +++ b/raps/workload.py @@ -882,7 +882,7 @@ def run_workload(): td = Telemetry(**args_dict) jobs, _, _, _ = td.load_jobs_times_args_from_files(files=args.replay, args=args, config=config) else: - workload = Workload(config) + workload = Workload(args, config) jobs = getattr(workload, args.workload)(args=args) plot_job_hist(jobs, config=config, dist_split=args.multimodal, gantt_nodes=args.gantt_nodes) if args.output: -- GitLab From 9fd3d9be441496fe7e9dd543cfc9b7a5d816f84f Mon Sep 17 00:00:00 2001 From: "Brewer, Wes" Date: Tue, 19 Aug 2025 22:22:53 +0000 Subject: [PATCH 241/388] Blue waters data loader and supporting torus3d network --- config/bluewaters/network.json | 18 +- config/bluewaters/scheduler.json | 2 +- raps/dataloaders/bluewaters.py | 329 +++++++++++++++++++++++++++++++ raps/job.py | 1 + raps/network.py | 199 +++++++++++++++++-- 5 files changed, 522 insertions(+), 27 deletions(-) create mode 100644 raps/dataloaders/bluewaters.py diff --git a/config/bluewaters/network.json b/config/bluewaters/network.json index 5a0f564..1283549 100644 --- a/config/bluewaters/network.json +++ b/config/bluewaters/network.json @@ -1,9 +1,13 @@ { - "TOPOLOGY": "fat-tree", - "NETWORK_MAX_BW": 1e9, - "FATTREE_K": 16, - "DRAGONFLY_D": 11, - "DRAGONFLY_A": 9, - "DRAGONFLY_P": 8, - "LATENCY": 1 + "TOPOLOGY": "torus3d", + "NETWORK_MAX_BW": 9.6e9, + "TORUS_X": 24, + "TORUS_Y": 24, + "TORUS_Z": 24, + "TORUS_WRAP": true, + "HOSTS_PER_ROUTER": 2, + "TORUS_LINK_BW": 9.6e9, + "LATENCY_PER_HOP": 1e-6, + "TORUS_ROUTING": "DOR_XYZ", + "NODE_COORDS_CSV": null } diff --git a/config/bluewaters/scheduler.json b/config/bluewaters/scheduler.json index 52b97ae..3f1445e 100644 --- a/config/bluewaters/scheduler.json +++ b/config/bluewaters/scheduler.json @@ -3,7 +3,7 @@ "SEED": 42, "JOB_ARRIVAL_TIME": 100, "MTBF": 11, - "TRACE_QUANTA": 15, + "TRACE_QUANTA": 60, "MIN_WALL_TIME": 60, "MAX_WALL_TIME": 43200, "UI_UPDATE_FREQ": 900, diff --git a/raps/dataloaders/bluewaters.py b/raps/dataloaders/bluewaters.py new file mode 100644 index 0000000..2b4c38a --- /dev/null +++ b/raps/dataloaders/bluewaters.py @@ -0,0 +1,329 @@ +""" +Blue Waters dataloader + +Example test case: + + python main.py -f /opt/data/bluewaters --start 20170328 --system bluewaters -net + +To download the necessary datasets: + + https://bluewaters.ncsa.illinois.edu/data-sets.html - this explains each of the datasets in detail + + There are two datasets available from: + + https://app.globus.org/file-manager?origin_id=854c1a5c-fa9f-4df4-a71c-407a33e44da0 + + 1. /torque_logs_anonimized (sic) - we are using the file 2017.tar.gz (377MB) + + 2. /node_metrics/cray_system_sampler - we are using the file 20170328.tgz (485MB) + + Another dataset we plan to use (but not currently): + + 3. Monet - Blue Waters Network Dataset (140GB) - https://databank.illinois.edu/datasets/IDB-2921318 + + We assume these datasets are setup as follows (assuming -f /opt/data/bluewaters): + + /opt/data/bluewaters/cray_system_sampler/20170328 + /opt/data/bluewaters/torque_logs/20170328 + /opt/data/bluewaters/monet/20170328 +""" + +import math +import re +import pandas as pd +from pathlib import Path +from raps.telemetry import Job, job_dict + + +def throughput_traces(total_tx, total_rx, intervals): + intervals = max(1, int(intervals or 1)) + tx = [(total_tx or 0) // intervals] * intervals + rx = [(total_rx or 0) // intervals] * intervals + # print(total_tx, total_rx, intervals, tx[:5], rx[:5]) + return tx, rx + + +def build_sampler_df(root, day, nodes, tmin, tmax, tx_idx, rx_idx, chunksize=None): + """One-time loader: returns a DataFrame of per-node positive deltas with mid-interval timestamps. + Columns: nid, mid_ts, dtx, drx (all numeric).""" + sdir = Path(root) / "cray_system_sampler" / day + files = [sdir] if sdir.is_file() else (sorted(f for f in sdir.iterdir() if f.is_file()) if sdir.exists() else []) + if not files: + raise FileNotFoundError(f"No Cray sampler files for day {day} under {sdir.parent}") + + cols = [0, 1, tx_idx, rx_idx] # ts, nid, tx, rx + out = [] + + def _process(df): + if df.empty: + return None + df = df[cols] + df.columns = ["ts", "nid", "tx", "rx"] + df = df[df["nid"].isin(nodes)] + if df.empty: + return None + # sort & compute deltas per node + df = df.sort_values(["nid", "ts"]) + df["ts_prev"] = df.groupby("nid")["ts"].shift(1) + df["tx_prev"] = df.groupby("nid")["tx"].shift(1) + df["rx_prev"] = df.groupby("nid")["rx"].shift(1) + # positive deltas only + df["dtx"] = df["tx"] - df["tx_prev"] + df["drx"] = df["rx"] - df["rx_prev"] + df = df[(df["dtx"] > 0) | (df["drx"] > 0)] + if df.empty: + return None + # mid-interval timestamp for window inclusion + df["mid_ts"] = 0.5 * (df["ts"] + df["ts_prev"]) + df = df[["nid", "mid_ts", "dtx", "drx"]].dropna() + return df + + for fp in files: + print(f"reading {fp}... this may take a while") + if chunksize: + for chunk in pd.read_csv(fp, header=None, skipinitialspace=True, chunksize=chunksize): + dfp = _process(chunk) + if dfp is not None: + out.append(dfp) + else: + df = pd.read_csv(fp, header=None, skipinitialspace=True) + dfp = _process(df) + if dfp is not None: + out.append(dfp) + + if not out: + # nothing matched nodes/time; return empty frame with expected columns + return pd.DataFrame(columns=["nid", "mid_ts", "dtx", "drx"]) + + return pd.concat(out, ignore_index=True) + + +def hms_to_seconds(wt: str) -> int: + try: + h, m, s = map(int, wt.split(":")) + return h * 3600 + m * 60 + s + except Exception: + return 0 + + +def extract_nodes_from_line(hosts_field: str): + """Extract node IDs from an exec_host field in one line.""" + nodes = [] + for token in hosts_field.split("+"): + if "/" in token: + node = token.split("/")[0] + try: + nodes.append(int(node)) + except ValueError: + pass + return nodes + + +# example line: +# 03/18/2017 00:01:15;E;6335144.bw;user=USER260243U group=GRP113775G +# account=A116610A jobname=dm5-8506-M9 queue=normal ctime=1489668573 +# qtime=1489668573 etime=1489798453 start=1489799118 +# owner=USER260243U@h2ologin2 +# exec_host=26742/0-31+26743/0-31+26728/0-31+26729/0-31 +# login_node=nid27563 Resource_List.flags=commtolerant:commlocal +# Resource_List.neednodes=4:ppn=32 Resource_List.nodect=4 +# Resource_List.nodes=4:ppn=32 Resource_List.partition=bwsched +# Resource_List.walltime=04:00:00 session=16472 total_execution_slots=128 +# unique_node_count=4 end=1489813275 Exit_status=2 resources_used.cput=28 +# resources_used.energy_used=0 resources_used.mem=18996kb +# resources_used.vmem=130088kb resources_used.walltime=03:55:49 + + +PATS = { + "id": re.compile(r"\b(jobid|job_id|Job_Id)[:=]\s*([^\s,]+)", re.I), + "name": re.compile(r"\b(jobname)[:=]\s*([^\s,]+)", re.I), + "account": re.compile(r"\b(account)[:=]\s*([^\s,]+)", re.I), + # Nodes: use Resource_List.nodect or unique_node_count + "nodes_required": re.compile(r"\b(?:Resource_List\.nodect|unique_node_count)[:=]\s*(\d+)", re.I), + # CPU cores per node: from ppn in Resource_List.nodes + "cpu_cores_required": re.compile(r"\bppn=(\d+)", re.I), + # GPUs per node + "gpu_units_required": re.compile(r"\bgpus?=(\d+)", re.I), + # Scheduled nodes list (exec_host=...) + "scheduled_nodes": re.compile(r"\bexec_host=([^\s,]+)", re.I), + # Times + "submit_time": re.compile(r"\bqtime=([0-9]+)", re.I), + "start_time": re.compile(r"\bstart=([0-9]+)", re.I), + "end_time": re.compile(r"\bend=([0-9]+)", re.I), + # Walltime used + "wall_time": re.compile(r"resources_used\.walltime=(\d{2}:\d{2}:\d{2})", re.I), +} + + +def _parse_line(line: str, debug=False): + rec = {} + for key, pat in PATS.items(): + m = pat.search(line) + if m: + if debug: + print(f"\n[{key}] matched pattern {pat.pattern}") + for i in range(0, (m.lastindex or 0) + 1): + print(f" group({i}): {m.group(i)}") + rec[key] = m.group(m.lastindex or 0) # take last group + # normalize scheduled_nodes into list + if "scheduled_nodes" in rec: + rec["scheduled_nodes"] = extract_nodes_from_line(rec["scheduled_nodes"]) + # wall_time + if rec.get("wall_time"): + rec["wall_time"] = hms_to_seconds(rec["wall_time"]) + + return rec + + +def load_data(local_dataset_path, **kwargs): + config = kwargs.get("config") + root = Path(local_dataset_path[0]) + day = kwargs.get("start") + fp = root / "torque_logs" / day + + jobs_raw = [] + + # parse file + for line in fp.open("rt", errors="ignore"): + if "jobname" not in line.lower(): + continue + rec = _parse_line(line) + + # skip if missing times + if not (rec.get("start_time") and rec.get("end_time")): + continue + + # ints + st = int(rec["start_time"]) + et = int(rec["end_time"]) + sub = int(rec.get("submit_time", st)) + + duration = et - st if et >= st else 0 + nr = int(rec.get("nodes_required")) + int(rec.get("cpu_cores_required")) + + jid = rec.get("id") + trace_quanta = config.get("TRACE_QUANTA") + + job_d = job_dict( + nodes_required=nr, + name=rec.get("name"), + account=rec.get("account"), + # cpu_trace=[0]*nr*nc, # placeholder trace + # gpu_trace=[0]*nr*0, # Blue Waters has no GPUs + cpu_trace=0, + gpu_trace=0, + nrx_trace=[], + ntx_trace=[], + end_state="UNKNOWN", + scheduled_nodes=rec.get("scheduled_nodes"), + id=jid, + priority=0, + submit_time=sub, + time_limit=0, + start_time=st, + end_time=et, + wall_time=duration, + trace_time=sub, + trace_start_time=st, + trace_end_time=et, + trace_quanta=trace_quanta, + ) + jobs_raw.append(job_d) + + # jobs_raw = list of dicts with absolute epoch times (as ints), e.g.: + # {'id': '6335144.bw', 'name': '...', 'account': '...', 'scheduled_nodes': [26742, ...], + # 'nodes_required': 4, 'cpu_cores_required': 32, 'submit_time': 1489798453, + # 'start_time': 1489799118, 'end_time': 1489813275} + + # Gather global filters once + all_nodes = set() + abs_starts = [] + abs_ends = [] + + for r in jobs_raw: + if r.get("scheduled_nodes"): + all_nodes.update(r["scheduled_nodes"]) + abs_starts.append(int(r["start_time"])) + abs_ends.append(int(r["end_time"])) + if not all_nodes or not abs_starts: + return [], 0, 0 + + global_tmin = min(abs_starts) + global_tmax = max(abs_ends) + + # Confirm the correct 0-based indices for ipogif0_* from the HEADER + tx_idx = 15 # kwargs.get("sampler_tx_idx", 15) # placeholder; pass real index via kwargs + rx_idx = 16 # kwargs.get("sampler_rx_idx", 16) # placeholder; pass real index via kwargs + + # Build once (chunk if files are huge) + sampler_df = build_sampler_df(root, day, all_nodes, global_tmin, global_tmax, tx_idx, rx_idx, chunksize=None) + # Optional speed-ups: + # sampler_df.set_index(["nid"], inplace=True) # if you want .loc fast path per node + + # Option 1: take indices from kwargs (0-based). Option 2: keep your quick defaults. + + Path(local_dataset_path[0] if isinstance(local_dataset_path, (list, tuple)) else local_dataset_path) + + bin_s = config.get("TRACE_QUANTA") + jobs = [] + + for r in jobs_raw: + st_abs = int(r["start_time"]) + et_abs = int(r["end_time"]) + nodes = r.get("scheduled_nodes") or [] + jid = r["id"] + + # Filter by nodes, sum positive deltas + dfj = sampler_df[sampler_df["nid"].isin(nodes)] + total_tx = int(dfj["dtx"].sum()) if not dfj.empty else 0 + total_rx = int(dfj["drx"].sum()) if not dfj.empty else 0 + # total_tx and total_rx are bytes per node + + nodes_required = r.get("nodes_required") + + # Smear totals evenly across bins (simple first pass) + duration = max(1, et_abs - st_abs) + samples = max(1, math.ceil(duration / bin_s)) + ntx, nrx = throughput_traces(total_tx, total_rx, samples) # bytes per bin + + job_d = job_dict( + nodes_required=nodes_required, + name=r.get("name"), + account=r.get("account", "unknown"), + cpu_trace=0, + gpu_trace=0, + nrx_trace=nrx, + ntx_trace=ntx, + end_state="UNKNOWN", + scheduled_nodes=nodes, + id=jid, + priority=0, + submit_time=int(r["submit_time"]), + time_limit=0, + start_time=st_abs, + end_time=et_abs, + wall_time=et_abs - st_abs, + trace_time=st_abs, + trace_start_time=st_abs, + trace_end_time=st_abs + samples * bin_s, + trace_quanta=bin_s, + trace_missing_values=False, + ) + jobs.append(Job(job_d)) + + # Normalize times so first start = 0 + t0 = min((j.start_time for j in jobs), default=0) + for j in jobs: + j.submit_time -= t0 + j.start_time -= t0 + j.end_time -= t0 + j.trace_time -= t0 + j.trace_start_time -= t0 + j.trace_end_time -= t0 + + # pprint(jobs) + simulation_start = 0 + simulation_end = max((j.end_time for j in jobs), default=0) + + return jobs, simulation_start, simulation_end diff --git a/raps/job.py b/raps/job.py index 262371f..33a4670 100644 --- a/raps/job.py +++ b/raps/job.py @@ -176,6 +176,7 @@ class Job: f"allocated_cpu_cores={self.allocated_cpu_cores}, " f"allocated_gpu_units={self.allocated_gpu_units}, " f"cpu_trace={self.cpu_trace}, gpu_trace={self.gpu_trace}, " + f"ntx_trace={self.ntx_trace}, nrx_trace={self.nrx_trace}, " f"end_state={self.end_state}, " f"submit_time={self.submit_time}, time_limit={self.time_limit}, " f"start_time={self.start_time}, end_time={self.end_time}, " diff --git a/raps/network.py b/raps/network.py index 4e8ac49..9c457d4 100644 --- a/raps/network.py +++ b/raps/network.py @@ -1,25 +1,37 @@ +import csv import networkx as nx from itertools import combinations from raps.utils import get_current_utilization +from pathlib import Path class NetworkModel: - """ - """ + """ """ def __init__(self, *, available_nodes, config, **kwargs): - self.topology = config.get('TOPOLOGY') + self.topology = config.get("TOPOLOGY") # if fat-tree, build the graph once if self.topology == "fat-tree": print("building fat-tree graph...") self.fattree_k = config.get("FATTREE_K") self.net_graph = build_fattree(self.fattree_k) print(self.net_graph) + elif self.topology == "torus3d": + print("building torus3d graph...") + dims = (int(config["TORUS_X"]), int(config["TORUS_Y"]), int(config["TORUS_Z"])) + wrap = bool(config.get("TORUS_WRAP", True)) + link_bw = float(config.get("TORUS_LINK_BW", config.get("NETWORK_MAX_BW"))) + hpr = int(config.get("HOSTS_PER_ROUTER")) + routing = config.get("TORUS_ROUTING", "DOR_XYZ").upper() + coords_csv = config.get("NODE_COORDS_CSV") # optional + self.net_graph, self.torus_meta = build_torus3d( + dims=dims, wrap=wrap, link_bw=link_bw, hosts_per_router=hpr, routing=routing, coords_csv=coords_csv + ) elif self.topology == "dragonfly": print("building dragonfly graph...") - D = config["DRAGONFLY_D"] # groups - A = config["DRAGONFLY_A"] # routers per group - P = config["DRAGONFLY_P"] # hosts per router + D = config["DRAGONFLY_D"] # groups + A = config["DRAGONFLY_A"] # routers per group + P = config["DRAGONFLY_P"] # hosts per router self.net_graph = build_dragonfly(D, A, P) print(self.net_graph) @@ -63,7 +75,7 @@ class NetworkModel: host_list = [] for real_n in job.scheduled_nodes: - fat_idx = self.real_to_fat_idx[real_n] # contiguous in [0..(D*A*P−1)] + fat_idx = self.real_to_fat_idx[real_n] # contiguous in [0..(D*A*P−1)] host_list.append(dragonfly_node_id_to_host_name(fat_idx, D, A, P)) if debug: print(" dragonfly hosts:", host_list) @@ -89,7 +101,7 @@ def apply_job_slowdown(*, job, max_throughput, net_util, net_cong, net_tx, net_r slowdown_factor = network_slowdown(throughput, max_throughput) if debug: - print("***", hasattr(job, 'dilated'), throughput, max_throughput, slowdown_factor) + print("***", hasattr(job, "dilated"), throughput, max_throughput, slowdown_factor) # Only apply slowdown once per job to avoid compounding the effect. if not job.dilated: @@ -169,30 +181,30 @@ def build_fattree(k): G = nx.Graph() # core # num_core = (k//2)**2 # Unused! - for i in range(k//2): - for j in range(k//2): + for i in range(k // 2): + for j in range(k // 2): core = f"c_{i}_{j}" G.add_node(core, type="core") # pods for pod in range(k): # agg switches - for agg in range(k//2): + for agg in range(k // 2): a = f"a_{pod}_{agg}" G.add_node(a, type="agg") # connect to all core switches in column agg - for i in range(k//2): + for i in range(k // 2): core = f"c_{agg}_{i}" G.add_edge(a, core) # edge switches + hosts - for edge in range(k//2): + for edge in range(k // 2): e = f"e_{pod}_{edge}" G.add_node(e, type="edge") # connect edge→each agg in this pod - for agg in range(k//2): + for agg in range(k // 2): a = f"a_{pod}_{agg}" G.add_edge(e, a) # connect hosts - for h in range(k//2): + for h in range(k // 2): host = f"h_{pod}_{edge}_{h}" G.add_node(host, type="host") G.add_edge(e, host) @@ -205,7 +217,7 @@ def all_to_all_paths(G, hosts): """ paths = [] for i in range(len(hosts)): - for j in range(i+1, len(hosts)): + for j in range(i + 1, len(hosts)): src, dst = hosts[i], hosts[j] p = nx.shortest_path(G, src, dst) paths.append((src, dst, p)) @@ -222,11 +234,11 @@ def link_loads_for_job(G, job_hosts, tx_volume_bytes): # each host sends tx_volume_bytes to each of the (N-1) peers for src in job_hosts: if len(job_hosts) >= 2: - per_peer = tx_volume_bytes / (len(job_hosts)-1) + per_peer = tx_volume_bytes / (len(job_hosts) - 1) else: per_peer = 0 # find paths where src is the sender - for (s, d, p) in paths: + for s, d, p in paths: if s != src: continue # add per_peer to every link on p @@ -256,7 +268,7 @@ def node_id_to_host_name(node_id: int, k: int) -> str: Map a 0-based integer node_id into one of the fat-tree hosts "h_{pod}_{edge}_{h}". There are (k^3/4) total hosts, assigned in ascending order across pod → edge → h. """ - hosts_per_pod = (k // 2) * (k // 2) # e.g. for k=8, hosts_per_pod = 16 + hosts_per_pod = (k // 2) * (k // 2) # e.g. for k=8, hosts_per_pod = 16 pod = node_id // hosts_per_pod offset = node_id % hosts_per_pod edge = offset // (k // 2) @@ -335,3 +347,152 @@ def dragonfly_node_id_to_host_name(fat_idx: int, D: int, A: int, P: int) -> str: router_group = (fat_idx // P) % A pod = fat_idx // (A * P) return f"h_{pod}_{router_group}_{host_offset}" + + +def build_torus3d(dims, wrap=True, link_bw=1e9, hosts_per_router=1, routing="DOR_XYZ", coords_csv=None): + """ + Build a 3D torus at router granularity, then attach host nodes to routers. + Node ids in the returned graph are host names ("h_x_y_z_i") and router names ("r_x_y_z"). + Edges have attribute 'capacity' (bytes/s) and 'latency' (per hop). + """ + X, Y, Z = map(int, dims) + G = nx.Graph() + + # Routers + def rname(x, y, z): + return f"r_{x}_{y}_{z}" + + for x in range(X): + for y in range(Y): + for z in range(Z): + G.add_node(rname(x, y, z), kind="router", coord=(x, y, z)) + + # Toroidal links between routers (±x, ±y, ±z) + def wrapi(i, n): + return (i + n) % n if wrap else (None if i < 0 or i >= n else i) + + for x in range(X): + for y in range(Y): + for z in range(Z): + u = rname(x, y, z) + # x+ + nxp = wrapi(x + 1, X) + v = rname(nxp, y, z) if nxp is not None else None + if v and not G.has_edge(u, v): + G.add_edge(u, v, capacity=link_bw) + # y+ + nyp = wrapi(y + 1, Y) + v = rname(x, nyp, z) if nyp is not None else None + if v and not G.has_edge(u, v): + G.add_edge(u, v, capacity=link_bw) + # z+ + nzp = wrapi(z + 1, Z) + v = rname(x, y, nzp) if nzp is not None else None + if v and not G.has_edge(u, v): + G.add_edge(u, v, capacity=link_bw) + + # Attach hosts to routers + host_to_router = {} + router_to_hosts = {} + + def hname(x, y, z, i): + return f"h_{x}_{y}_{z}_{i}" + + # If a nid→(x,y,z) CSV is supplied, place accordingly; else dense round-robin + # CSV format: nid,x,y,z[,i] + nid_placement = {} + if coords_csv: + p = Path(coords_csv) + with p.open("rt") as fh: + rd = csv.reader(fh) + for row in rd: + if not row: + continue + nid = int(row[0]) + x, y, z = map(int, row[1:4]) + i = int(row[4]) if len(row) > 4 else 0 + nid_placement[nid] = (x, y, z, i) + + # Build hosts + for x in range(X): + for y in range(Y): + for z in range(Z): + r = rname(x, y, z) + router_to_hosts[r] = [] + for i in range(hosts_per_router): + h = hname(x, y, z, i) + G.add_node(h, kind="host", coord=(x, y, z), local_index=i) + G.add_edge(h, r, capacity=link_bw) # host↔router edge; you can cap with NETWORK_MAX_BW instead + host_to_router[h] = r + router_to_hosts[r].append(h) + + meta = { + "dims": (X, Y, Z), + "wrap": wrap, + "routing": routing, + "host_to_router": host_to_router, + "router_to_hosts": router_to_hosts, + } + return G, meta + + +def _axis_steps(a, b, n, wrap=True): + """Return minimal step sequence along one axis from a to b with wrap-around.""" + if a == b: + return [] + fwd = (b - a) % n + back = (a - b) % n + if not wrap: + step = 1 if b > a else -1 + return [step] * abs(b - a) + if fwd <= back: + return [1] * fwd + else: + return [-1] * back + + +def torus_route_xyz(src_r, dst_r, dims, wrap=True): + """Router-level path (list of router names) using XYZ dimension-order routing.""" + X, Y, Z = dims + + def parse(r): + _, x, y, z = r.split("_") + return int(x), int(y), int(z) + + x1, y1, z1 = parse(src_r) + x2, y2, z2 = parse(dst_r) + + path = [src_r] + x, y, z = x1, y1, z1 + for step in _axis_steps(x, x2, X, wrap): + x = (x + step) % X + path.append(f"r_{x}_{y}_{z}") + for step in _axis_steps(y, y2, Y, wrap): + y = (y + step) % Y + path.append(f"r_{x}_{y}_{z}") + for step in _axis_steps(z, z2, Z, wrap): + z = (z + step) % Z + path.append(f"r_{x}_{y}_{z}") + return path + + +def torus_host_path(G, meta, h_src, h_dst): + r_src = meta["host_to_router"][h_src] + r_dst = meta["host_to_router"][h_dst] + routers = torus_route_xyz(r_src, r_dst, meta["dims"], meta["wrap"]) + # host->src_router + (router path) + dst_router->host + path = [h_src, r_src] + routers[1:] + [h_dst] + return path + + +def link_loads_for_job_torus(G, meta, host_list, traffic_bytes): + # all-to-all between hosts in host_list, route via torus_host_path, add traffic_bytes per pair + loads = {} + n = len(host_list) + for i in range(n): + for j in range(i + 1, n): + p = torus_host_path(G, meta, host_list[i], host_list[j]) + for u, v in zip(p, p[1:]): + e = tuple(sorted((u, v))) + loads[e] = loads.get(e, 0) + traffic_bytes + return loads -- GitLab From 6e2a2aad87079552afd9bdd913cf66cdcd6f16d6 Mon Sep 17 00:00:00 2001 From: Matthias Maiterth Date: Wed, 20 Aug 2025 13:13:38 -0400 Subject: [PATCH 242/388] Refactor of engine current_timestep. Removed uneeded variables and duplication. Now the current_timestep is tracked by the engine only! --- raps/engine.py | 147 +++++++++++++++++-------------------------------- raps/job.py | 79 +++++++++++++++++--------- raps/stats.py | 39 ++++++++++--- raps/ui.py | 4 +- 4 files changed, 138 insertions(+), 131 deletions(-) diff --git a/raps/engine.py b/raps/engine.py index 63b0df9..572eba1 100644 --- a/raps/engine.py +++ b/raps/engine.py @@ -30,7 +30,7 @@ from raps.downtime import Downtime @dataclasses.dataclass class TickData: """ Represents the state output from the simulation each tick """ - current_time: int + current_timestep: int completed: list[Job] running: list[Job] queue: list[Job] @@ -130,7 +130,7 @@ class Engine: self.job_history_dict = [] self.jobs_completed = 0 self.total_initial_jobs = total_initial_jobs - self.current_time = 0 + self.current_timestep = 0 self.cooling_model = cooling_model self.sys_power = 0 self.power_manager = power_manager @@ -190,13 +190,17 @@ class Engine: jobs_to_submit still holds the jobs that need be submitted in the future. """ if self.debug: - print(f"[DEBUG] add_running_jobs_to_queue: current_time={self.current_time}") + print(f"[DEBUG] add_running_jobs_to_queue: current_time={self.current_timestep}") # Build a list of jobs whose start_time is <= current_time. - eligible_jobs = [job for job in jobs_to_submit if job.start_time < self.current_time] + eligible_jobs = [job for job in jobs_to_submit if + job.start_time is not None + and job.start_time < self.current_timestep] if self.debug: print(f"[DEBUG] add_running_jobs_to_queue: Found {len(eligible_jobs)} eligible jobs.") # Remove those jobs from jobs_to_submit: - jobs_to_submit[:] = [job for job in jobs_to_submit if job.start_time >= self.current_time] + jobs_to_submit[:] = [job for job in jobs_to_submit if + job.start_time is not None + and job.start_time >= self.current_timestep] if self.debug: print(f"[DEBUG] add_running_jobs_to_queue: {len(jobs_to_submit)} jobs remaining in jobs_to_submit.") # Convert them to Job instances and build list of eligible jobs. @@ -215,13 +219,13 @@ class Engine: - false if no new jobs are present """ if self.debug: - print(f"[DEBUG] add_eligible_jobs_to_queue: current_time={self.current_time}") + print(f"[DEBUG] add_eligible_jobs_to_queue: current_time={self.current_timestep}") # Build a list of jobs whose submit_time is <= current_time. - eligible_jobs = [job for job in jobs_to_submit if job.submit_time <= self.current_time] + eligible_jobs = [job for job in jobs_to_submit if job.submit_time <= self.current_timestep] if self.debug: print(f"[DEBUG] add_eligible_jobs_to_queue: Found {len(eligible_jobs)} eligible jobs.") # Remove those jobs from jobs_to_submit: - jobs_to_submit[:] = [job for job in jobs_to_submit if job.submit_time > self.current_time] + jobs_to_submit[:] = [job for job in jobs_to_submit if job.submit_time > self.current_timestep] if self.debug: print(f"[DEBUG] add_eligible_jobs_to_queue: {len(jobs_to_submit)} jobs remaining in jobs_to_submit.") # Convert them to Job instances and build list of eligible jobs. @@ -241,7 +245,8 @@ class Engine: # 5 Update active and free nodes # 1 Identify Completed Jobs - completed_jobs = [job for job in self.running if job.end_time is not None and job.end_time <= self.current_time] + completed_jobs = [job for job in self.running if + job.end_time is not None and job.end_time <= self.current_timestep] # Update Completed Jobs, their account and and Free resources. for job in completed_jobs: self.power_manager.set_idle(job.scheduled_nodes) @@ -258,7 +263,7 @@ class Engine: # 2 Check continuous job generation if self.continuous_workload is not None: # Experimental - continuous_job_generation(engine=self, timestep=self.current_time, jobs=jobs) + continuous_job_generation(engine=self, timestep=self.current_timestep, jobs=jobs) # 3 Simulate node failure if not replay: @@ -270,7 +275,7 @@ class Engine: need_reschedule = False # 4 Simulate downtime - need_reschedule = self.downtime.check_and_trigger(timestep=self.current_time, engine=self) + need_reschedule = self.downtime.check_and_trigger(timestep=self.current_timestep, engine=self) # 5 Update active/free nodes based on core/GPU utilization if self.config['multitenant']: @@ -289,7 +294,7 @@ class Engine: or node['available_gpu_units'] < node['total_gpu_units'])]) # Update system utilization history - self.resource_manager.update_system_utilization(self.current_time, self.running) + self.resource_manager.update_system_utilization(self.current_timestep, self.running) else: # Whole-node allocator self.num_free_nodes = len(self.resource_manager.available_nodes) @@ -301,73 +306,15 @@ class Engine: return completed_jobs, newly_downed_nodes, need_reschedule - def prepare_timestep_old(self, replay: bool = True): - # 1 identify completed jobs - # 2 Simulate node failure # Defunct feature! - # 3 Update active and free nodes - - # Identify Completed Jobs - completed_jobs = [job for job in self.running if job.end_time is not None and job.end_time <= self.current_time] - # Update Completed Jobs, their account and and Free resources. - for job in completed_jobs: - self.power_manager.set_idle(job.scheduled_nodes) - job.state = JobState.COMPLETED - - self.running.remove(job) - self.jobs_completed += 1 - job_stats = job.statistics() - if self.accounts: - self.accounts.update_account_statistics(job_stats) - self.job_history_dict.append(job_stats.__dict__) - # Free the nodes via the resource manager. - self.resource_manager.free_nodes_from_job(job) - - if not replay: - # Simulate node failure - newly_downed_nodes = self.resource_manager.node_failure(self.config['MTBF']) - for node in newly_downed_nodes: - self.power_manager.set_idle(node) - else: - newly_downed_nodes = [] - - # Update active/free nodes based on core/GPU utilization - if self.config['multitenant']: - # total_cpu_cores = sum(node['total_cpu_cores'] for node in self.resource_manager.nodes) # Unused - # total_gpu_units = sum(node['total_gpu_units'] for node in self.resource_manager.nodes) # Unused - # available_cpu_cores = sum(node['available_cpu_cores'] for node in self.resource_manager.nodes) # Unused - # available_gpu_units = sum(node['available_gpu_units'] for node in self.resource_manager.nodes) # Unused - - self.num_free_nodes = len([node for node in self.resource_manager.nodes if - not node['is_down'] and - node['available_cpu_cores'] == node['total_cpu_cores'] and - node['available_gpu_units'] == node['total_gpu_units']]) - self.num_active_nodes = len([node for node in self.resource_manager.nodes if - not node['is_down'] and - (node['available_cpu_cores'] < node['total_cpu_cores'] or - node['available_gpu_units'] < node['total_gpu_units'])]) - - # Update system utilization history - self.resource_manager.update_system_utilization(self.current_time, self.running) - else: - # Whole-node allocator - self.num_free_nodes = len(self.resource_manager.available_nodes) - self.num_active_nodes = self.config['TOTAL_NODES'] \ - - len(self.resource_manager.available_nodes) \ - - len(self.resource_manager.down_nodes) - - return completed_jobs, newly_downed_nodes - def complete_timestep(self, autoshutdown, all_jobs: List, jobs: List): # 1 update running time of all running jobs - # 2 update the current_time of the engine (this serves as reference for most computations) + # 2 update the current_timestep of the engine (this serves as reference for most computations) # 3 Check if simulation should shutdown # update Running time for job in self.running: if job.state == JobState.RUNNING: - job.running_time = self.current_time - job.start_time - - self.current_time += 1 # Update the current time every timestep + job.running_time = self.current_timestep - job.start_time # Stop the simulation if no more jobs are running or in the queue or in the job list. if autoshutdown and \ @@ -376,10 +323,15 @@ class Engine: not self.replay and \ len(all_jobs) == 0 and \ len(jobs) == 0: - print(f"[DEBUG] {self.config['system_name']} - Stopping simulation at time {self.current_time}") + if self.debug: + print(f"[DEBUG] Simulaiton Complete: {self.config['system_name']} - " + f"Stopping simulation at time {self.current_timestep}. " + f"Simulation ran for {self.current_timestep - self.timestep_start}") simulation_complete = True else: simulation_complete = False + self.current_timestep += 1 # Update the current time every timestep + return simulation_complete def tick(self, *, time_delta=1): @@ -409,7 +361,7 @@ class Engine: net_tx_list = [] net_rx_list = [] if self.debug: - print(f"Current Time: {self.current_time}") + print(f"Current Time: {self.current_timestep}") slowdown_factors = [] @@ -417,7 +369,7 @@ class Engine: if self.debug: print(f"JobID: {job.id}") - job.running_time = self.current_time - job.start_time + job.running_time = self.current_timestep - job.start_time if job.state != JobState.RUNNING: raise ValueError(f"Job is in running list, but state is not RUNNING: job.state == {job.state}") @@ -534,7 +486,7 @@ class Engine: self.node_occupancy_history.append(node_occupancy) tick_data = TickData( - current_time=self.current_time, + current_timestep=self.current_timestep, completed=None, running=self.running, queue=self.queue, @@ -558,10 +510,13 @@ class Engine: def prepare_system_state(self, all_jobs: List, timestep_start, timestep_end, replay: bool): # Modifies Jobs object - self.current_time = timestep_start + self.current_timestep = timestep_start # Keep only jobs that have not yet ended and that have a chance to start - all_jobs[:] = [job for job in all_jobs if job.end_time >= timestep_start and job.submit_time < timestep_end] + all_jobs[:] = [job for job in all_jobs if (job.end_time is not None + and job.end_time >= timestep_start + and job.submit_time < timestep_end + ) or job.end_time is None] all_jobs.sort(key=lambda j: j.submit_time) @@ -585,7 +540,8 @@ class Engine: def run_simulation(self, jobs, timestep_start, timestep_end, time_delta=1, autoshutdown=False): """Generator that yields after each simulation tick.""" - self.timesteps = (timestep_end - timestep_start) # Where is this used? + self.timestep_start = timestep_start + self.timestep_end = timestep_end if self.scheduler.policy == PolicyType.REPLAY: replay = True @@ -612,8 +568,8 @@ class Engine: listener_thread = threading.Thread(target=keyboard_listener, args=(sim_state,), daemon=True) listener_thread.start() - timestep = timestep_start - while timestep < timestep_end: # Runs every seconds! + self.current_timestep = timestep_start + while self.current_timestep < timestep_end: # Runs every seconds! if sim_state.is_paused(): time.sleep(0.1) @@ -621,10 +577,10 @@ class Engine: current_time_delta = sim_state.get_time_delta() - if (timestep % batch_window == 0) or (timestep == timestep_start): + if (self.current_timestep % batch_window == 0) or (self.current_timestep == timestep_start): # Add jobs that are within the batching window and remove them from all jobs - jobs += [job for job in all_jobs if job.submit_time <= timestep + batch_window] - all_jobs[:] = [job for job in all_jobs if job.submit_time > timestep + batch_window] + jobs += [job for job in all_jobs if job.submit_time <= self.current_timestep + batch_window] + all_jobs[:] = [job for job in all_jobs if job.submit_time > self.current_timestep + batch_window] # 1. Prepare Timestep: completed_jobs, newly_downed_nodes, need_reschedule = self.prepare_timestep(replay=replay, jobs=jobs) @@ -635,18 +591,19 @@ class Engine: # 3. Schedule jobs that are now in the queue. if completed_jobs != [] or newly_downed_nodes != [] or has_new_additions or need_reschedule: self.scheduler.schedule(self.queue, self.running, - self.current_time, + self.current_timestep, accounts=self.accounts, sorted=(not has_new_additions)) - if self.debug and timestep % self.config['UI_UPDATE_FREQ'] == 0: + if self.debug and self.current_timestep % self.config['UI_UPDATE_FREQ'] == 0: print(".", end="", flush=True) # 4. Run tick only at specified time_delta - if 0 == (timestep % current_time_delta) and \ - ((current_time_delta == 1 and - self.current_time % self.config['POWER_UPDATE_FREQ'] == 0) or - (current_time_delta != 1 or self.downscale != 1)): + if 0 == (self.current_timestep % current_time_delta) \ + and ((current_time_delta == 1 + and self.current_timestep % self.config['POWER_UPDATE_FREQ'] == 0) + or (current_time_delta != 1 or self.downscale != 1) + ): tick_data = self.tick(time_delta=current_time_delta) tick_data.completed = completed_jobs else: @@ -658,8 +615,6 @@ class Engine: break yield tick_data - timestep += 1 - def get_job_history_dict(self): return self.job_history_dict @@ -670,7 +625,7 @@ class Engine: return self.scheduler_running_history def record_util_stats(self, *, system_util): - self.sys_util_history.append((self.current_time, system_util)) + self.sys_util_history.append((self.current_timestep, system_util)) self.scheduler_queue_history.append(len(self.running)) self.scheduler_running_history.append(len(self.queue)) @@ -684,11 +639,11 @@ class Engine: self.net_util_history.append(avg_net) def record_power_stats(self, *, time_delta, total_power_kw, total_loss_kw, jobs_power): - if (time_delta == 1 and self.current_time % self.config['POWER_UPDATE_FREQ'] == 0) or time_delta != 1: + if (time_delta == 1 and self.current_timestep % self.config['POWER_UPDATE_FREQ'] == 0) or time_delta != 1: # First job specific record_power_stats_foreach_job(running_jobs=self.running, jobs_power=jobs_power) # power manager - self.power_manager.history.append((self.current_time, total_power_kw)) - self.power_manager.loss_history.append((self.current_time, total_loss_kw)) + self.power_manager.history.append((self.current_timestep, total_power_kw)) + self.power_manager.loss_history.append((self.current_timestep, total_loss_kw)) # engine self.sys_power = total_power_kw diff --git a/raps/job.py b/raps/job.py index 33a4670..64400d9 100644 --- a/raps/job.py +++ b/raps/job.py @@ -12,22 +12,49 @@ Implementing such using something like: """ -def job_dict(*, nodes_required, name, account, +class JobState(Enum): + """Enumeration for job states.""" + RUNNING = 'R' + PENDING = 'PD' + COMPLETED = 'C' + CANCELLED = 'CA' + FAILED = 'F' + TIMEOUT = 'TO' + + +def job_dict(*, + nodes_required, + name, + account, # Allocation - end_state, scheduled_nodes=None, - id, priority=0, partition=0, + job_state=JobState.PENDING, + end_state: JobState | None = None, + scheduled_nodes=None, + id, + priority: int | None = 0, + partition: int | None = 0, # Resource Requests and allocations - cpu_cores_required=0, gpu_units_required=0, - allocated_cpu_cores=0, allocated_gpu_units=0, + cpu_cores_required=0, + gpu_units_required=0, + allocated_cpu_cores=0, + allocated_gpu_units=0, # Traces - cpu_trace, gpu_trace, ntx_trace, nrx_trace, + cpu_trace, + gpu_trace, + ntx_trace, + nrx_trace, # Times - submit_time=0, time_limit=0, - start_time=0, end_time=0, wall_time=0, - trace_time=0, trace_start_time=0, trace_end_time=0, - trace_quanta=None, - trace_missing_values=False, - downscale=1 + submit_time=0, + time_limit: int = 0, + start_time: int | None = 0, + end_time: int | None = 0, + wall_time: int | None = 0, # Should this be removed? + trace_time: int | None = 0, + trace_start_time: int | None = 0, + trace_end_time: int | None = 0, + trace_quanta: int | None = None, + trace_missing_values: bool | None = False, + downscale: int = 1 ): """ Return job info dictionary """ return { @@ -35,6 +62,7 @@ def job_dict(*, nodes_required, name, account, 'name': name, 'account': account, # Allocation: + 'job_state': job_state, 'end_state': end_state, 'scheduled_nodes': scheduled_nodes, 'id': id, @@ -94,16 +122,6 @@ def dilate_trace(trace, factor): return new_trace -class JobState(Enum): - """Enumeration for job states.""" - RUNNING = 'R' - PENDING = 'PD' - COMPLETED = 'C' - CANCELLED = 'CA' - FAILED = 'F' - TIMEOUT = 'TO' - - class Job: """Represents a job to be scheduled and executed in the distributed computing system. @@ -164,7 +182,8 @@ class Job: assert isinstance(self.wall_time, (int, float, np.int64, np.double)) assert isinstance(self.start_time, (int, float, np.int64, np.double, type(None))) assert isinstance(self.end_time, (int, float, np.int64, np.double, type(None))) - assert self.start_time <= self.end_time, f"{self.start_time} <= {self.end_time}" + if self.start_time is not None and self.end_time is not None: + assert self.start_time <= self.end_time, f"{self.start_time} <= {self.end_time}" def __repr__(self): """Return a string representation of the job.""" @@ -177,7 +196,7 @@ class Job: f"allocated_gpu_units={self.allocated_gpu_units}, " f"cpu_trace={self.cpu_trace}, gpu_trace={self.gpu_trace}, " f"ntx_trace={self.ntx_trace}, nrx_trace={self.nrx_trace}, " - f"end_state={self.end_state}, " + f"job_state={self.job_state}, end_state={self.end_state}, " f"submit_time={self.submit_time}, time_limit={self.time_limit}, " f"start_time={self.start_time}, end_time={self.end_time}, " f"wall_time={self.wall_time}, " @@ -258,6 +277,8 @@ class JobStatistics: self.avg_cpu_usage = sum(job.cpu_trace) / len(job.cpu_trace) elif isinstance(job.cpu_trace, int) or isinstance(job.cpu_trace, float): self.avg_cpu_usage = job.cpu_trace + elif job.cpu_trace is None: + self.avg_cpu_usage = None else: raise NotImplementedError() @@ -268,6 +289,8 @@ class JobStatistics: self.avg_gpu_usage = sum(job.gpu_trace) / len(job.gpu_trace) elif isinstance(job.gpu_trace, int) or isinstance(job.gpu_trace, float): self.avg_gpu_usage = job.gpu_trace + elif job.gpu_trace is None: + self.avg_gpu_usage = None else: raise NotImplementedError() @@ -278,8 +301,10 @@ class JobStatistics: self.avg_ntx_usage = sum(job.ntx_trace) / len(job.ntx_trace) elif isinstance(job.ntx_trace, int) or isinstance(job.ntx_trace, float): self.avg_ntx_usage = job.ntx_trace + elif job.ntx_trace is None: + self.avg_ntx_usage = None else: - self.avg_ntx_usage = 0 + raise NotImplementedError() if isinstance(job.nrx_trace, list) or isinstance(job.nrx_trace, np.ndarray): if len(job.nrx_trace) == 0: @@ -288,8 +313,10 @@ class JobStatistics: self.avg_nrx_usage = sum(job.nrx_trace) / len(job.nrx_trace) elif isinstance(job.nrx_trace, int) or isinstance(job.nrx_trace, float): self.avg_nrx_usage = job.nrx_trace + elif job.nrx_trace is None: + self.avg_nrx_usage = None else: - self.avg_nrx_usage = 0 + raise NotImplementedError() if len(job.power_history) == 0: self.avg_node_power = 0 diff --git a/raps/stats.py b/raps/stats.py index f224862..b4cfbfb 100644 --- a/raps/stats.py +++ b/raps/stats.py @@ -15,8 +15,9 @@ from .engine import Engine def get_engine_stats(engine: Engine): """ Return engine statistics """ + timesteps = engine.current_timestep - engine.timestep_start num_samples = len(engine.power_manager.history) if engine.power_manager else 0 - time_simulated = convert_seconds_to_hhmmss(engine.timesteps / engine.downscale) + time_simulated = convert_seconds_to_hhmmss(timesteps / engine.downscale) average_power_mw = sum_values(engine.power_manager.history) / num_samples / 1000 if num_samples else 0 average_loss_mw = sum_values(engine.power_manager.loss_history) / num_samples / 1000 if num_samples else 0 min_loss_mw = min_value(engine.power_manager.loss_history) / 1000 if num_samples else 0 @@ -24,7 +25,7 @@ def get_engine_stats(engine: Engine): loss_fraction = average_loss_mw / average_power_mw if average_power_mw else 0 efficiency = 1 - loss_fraction if loss_fraction else 0 - total_energy_consumed = average_power_mw * engine.timesteps / 3600 if engine.timesteps else 0 # MW-hr + total_energy_consumed = average_power_mw * timesteps / 3600 if timesteps else 0 # MW-hr emissions = total_energy_consumed * 852.3 / 2204.6 / efficiency if efficiency else 0 total_cost = total_energy_consumed * 1000 * engine.config.get('POWER_COST', 0) # Total cost in dollars @@ -149,7 +150,8 @@ def get_job_stats(engine: Engine): min_agg_node_hours, max_agg_node_hours, sum_agg_node_hours = sys.maxsize, -sys.maxsize - 1, 0 # Completion statistics - throughput = engine.jobs_completed / engine.timesteps * 3600 if engine.timesteps else 0 # Jobs per hour + throughput = engine.jobs_completed / (engine.current_timestep - engine.timestep_start) * 3600 if \ + (engine.current_timestep - engine.timestep_start != 0) else 0 # Jobs per hour min_wait_time, max_wait_time, sum_wait_time = sys.maxsize, -sys.maxsize - 1, 0 min_turnaround_time, max_turnaround_time, sum_turnaround_time = sys.maxsize, -sys.maxsize - 1, 0 @@ -215,10 +217,14 @@ def get_job_stats(engine: Engine): min_psf_partial_den, max_psf_partial_den, sum_psf_partial_den = \ min_max_sum(psf_partial_den, min_psf_partial_den, max_psf_partial_den, sum_psf_partial_den) - min_cpu_u, max_cpu_u, sum_cpu_u = min_max_sum(job['avg_cpu_usage'], min_cpu_u, max_cpu_u, sum_cpu_u) - min_gpu_u, max_gpu_u, sum_gpu_u = min_max_sum(job['avg_gpu_usage'], min_gpu_u, max_gpu_u, sum_gpu_u) - min_ntx_u, max_ntx_u, sum_ntx_u = min_max_sum(job['avg_ntx_usage'], min_ntx_u, max_ntx_u, sum_ntx_u) - min_nrx_u, max_nrx_u, sum_nrx_u = min_max_sum(job['avg_nrx_usage'], min_nrx_u, max_nrx_u, sum_nrx_u) + if job['avg_cpu_usage'] is not None: + min_cpu_u, max_cpu_u, sum_cpu_u = min_max_sum(job['avg_cpu_usage'], min_cpu_u, max_cpu_u, sum_cpu_u) + if job['avg_gpu_usage'] is not None: + min_gpu_u, max_gpu_u, sum_gpu_u = min_max_sum(job['avg_gpu_usage'], min_gpu_u, max_gpu_u, sum_gpu_u) + if job['avg_ntx_usage'] is not None: + min_ntx_u, max_ntx_u, sum_ntx_u = min_max_sum(job['avg_ntx_usage'], min_ntx_u, max_ntx_u, sum_ntx_u) + if job['avg_nrx_usage'] is not None: + min_nrx_u, max_nrx_u, sum_nrx_u = min_max_sum(job['avg_nrx_usage'], min_nrx_u, max_nrx_u, sum_nrx_u) if job['num_nodes'] <= 5: jobsSmall += 1 @@ -272,6 +278,25 @@ def get_job_stats(engine: Engine): min_ntx_u, max_ntx_u, avg_ntx_u = -1, -1, -1 min_nrx_u, max_nrx_u, avg_nrx_u = -1, -1, -1 + if min_cpu_u == sys.maxsize and \ + max_cpu_u == -sys.maxsize - 1 and \ + sum_cpu_u == 0: + min_cpu_u, max_cpu_u, avg_cpu_u = -1, -1, -1 + + if min_gpu_u == sys.maxsize and \ + max_gpu_u == -sys.maxsize - 1 and \ + sum_gpu_u == 0: + min_gpu_u, max_gpu_u, avg_gpu_u = -1, -1, -1 + if min_ntx_u == sys.maxsize and \ + max_ntx_u == -sys.maxsize - 1 and \ + sum_ntx_u == 0: + min_ntx_u, max_ntx_u, avg_ntx_u = -1, -1, -1 + + if min_nrx_u == sys.maxsize and \ + max_nrx_u == -sys.maxsize - 1 and \ + sum_nrx_u == 0: + min_nrx_u, max_nrx_u, avg_nrx_u = -1, -1, -1 + job_stats = { 'jobs completed': engine.jobs_completed, 'throughput': f'{throughput:.2f} jobs/hour', diff --git a/raps/ui.py b/raps/ui.py index 6d7eaec..376f168 100644 --- a/raps/ui.py +++ b/raps/ui.py @@ -516,7 +516,7 @@ class LayoutManager: self.update_scheduled_jobs(data.running + data.queue) self.update_status( - data.current_time, len(data.running), len(data.queue), data.num_active_nodes, + data.current_timestep, len(data.running), len(data.queue), data.num_active_nodes, data.num_free_nodes, data.down_nodes, data.avg_net_util, data.slowdown_per_job, data.time_delta ) @@ -524,7 +524,7 @@ class LayoutManager: self.update_scheduled_jobs(data.running + data.queue) self.update_status( - data.current_time, + data.current_timestep, len(data.running), len(data.queue), data.num_active_nodes, -- GitLab From c26496abc157fbf7ec8b978e8841c5e22b75d5b9 Mon Sep 17 00:00:00 2001 From: "Hines, Jesse" Date: Wed, 20 Aug 2025 17:33:05 +0000 Subject: [PATCH 243/388] Refactor system config --- README.md | 14 +- config/40frontiers.yaml | 60 ++++ config/40frontiers/power.json | 18 - config/40frontiers/scheduler.json | 17 - config/40frontiers/system.json | 20 -- config/40frontiers/uq.json | 11 - config/adastraMI250.yaml | 110 ++++++ config/adastraMI250/cooling.json | 25 -- config/adastraMI250/power.json | 18 - config/adastraMI250/scheduler.json | 17 - config/adastraMI250/system.json | 20 -- config/adastraMI250/uq.json | 11 - config/bluewaters.yaml | 61 ++++ config/bluewaters/network.json | 13 - config/bluewaters/power.json | 19 - config/bluewaters/scheduler.json | 18 - config/bluewaters/system.json | 21 -- config/frontier.yaml | 83 +++++ config/frontier/cooling.json | 25 -- config/frontier/power.json | 18 - config/frontier/scheduler.json | 17 - config/frontier/system.json | 20 -- config/frontier/uq.json | 11 - config/fugaku.yaml | 49 +++ config/fugaku/power.json | 18 - config/fugaku/scheduler.json | 17 - config/fugaku/system.json | 20 -- config/gcloudv2.yaml | 49 +++ config/gcloudv2/power.json | 18 - config/gcloudv2/scheduler.json | 17 - config/gcloudv2/system.json | 20 -- config/lassen.yaml | 128 +++++++ config/lassen/cooling.json | 71 ---- config/lassen/network.json | 9 - config/lassen/power.json | 19 - config/lassen/scheduler.json | 17 - config/lassen/system.json | 22 -- config/lumi/lumi-c.yaml | 49 +++ config/lumi/lumi-c/power.json | 18 - config/lumi/lumi-c/scheduler.json | 17 - config/lumi/lumi-c/system.json | 20 -- config/lumi/lumi-g.yaml | 49 +++ config/lumi/lumi-g/power.json | 18 - config/lumi/lumi-g/scheduler.json | 17 - config/lumi/lumi-g/system.json | 20 -- config/marconi100.yaml | 121 +++++++ config/marconi100/cooling.json | 76 ---- config/marconi100/power.json | 18 - config/marconi100/scheduler.json | 17 - config/marconi100/system.json | 20 -- config/mit_supercloud/part-cpu.yaml | 51 +++ config/mit_supercloud/part-cpu/power.json | 18 - config/mit_supercloud/part-cpu/scheduler.json | 19 - config/mit_supercloud/part-cpu/system.json | 21 -- config/mit_supercloud/part-gpu.yaml | 51 +++ config/mit_supercloud/part-gpu/power.json | 18 - config/mit_supercloud/part-gpu/scheduler.json | 19 - config/mit_supercloud/part-gpu/system.json | 21 -- config/setonix/part-cpu.yaml | 242 +++++++++++++ config/setonix/part-cpu/power.json | 18 - config/setonix/part-cpu/scheduler.json | 18 - config/setonix/part-cpu/system.json | 21 -- config/setonix/part-gpu.yaml | 114 ++++++ config/setonix/part-gpu/power.json | 18 - config/setonix/part-gpu/scheduler.json | 18 - config/setonix/part-gpu/system.json | 21 -- config/summit.yaml | 329 ++++++++++++++++++ config/summit/cooling.json | 284 --------------- config/summit/power.json | 18 - config/summit/scheduler.json | 17 - config/summit/system.json | 19 - main.py | 4 +- multi-part-sim-mpi.py | 4 +- multi-part-sim.py | 4 +- pyproject.toml | 2 + pytest.ini | 8 +- raps/config.py | 281 +++++++++++---- raps/power.py | 4 +- raps/telemetry.py | 4 +- raps/workload.py | 4 +- tests/test_system_config.py | 10 + 81 files changed, 1792 insertions(+), 1469 deletions(-) create mode 100644 config/40frontiers.yaml delete mode 100644 config/40frontiers/power.json delete mode 100644 config/40frontiers/scheduler.json delete mode 100644 config/40frontiers/system.json delete mode 100644 config/40frontiers/uq.json create mode 100644 config/adastraMI250.yaml delete mode 100644 config/adastraMI250/cooling.json delete mode 100644 config/adastraMI250/power.json delete mode 100644 config/adastraMI250/scheduler.json delete mode 100644 config/adastraMI250/system.json delete mode 100644 config/adastraMI250/uq.json create mode 100644 config/bluewaters.yaml delete mode 100644 config/bluewaters/network.json delete mode 100644 config/bluewaters/power.json delete mode 100644 config/bluewaters/scheduler.json delete mode 100644 config/bluewaters/system.json create mode 100644 config/frontier.yaml delete mode 100644 config/frontier/cooling.json delete mode 100644 config/frontier/power.json delete mode 100644 config/frontier/scheduler.json delete mode 100644 config/frontier/system.json delete mode 100644 config/frontier/uq.json create mode 100644 config/fugaku.yaml delete mode 100644 config/fugaku/power.json delete mode 100644 config/fugaku/scheduler.json delete mode 100644 config/fugaku/system.json create mode 100644 config/gcloudv2.yaml delete mode 100644 config/gcloudv2/power.json delete mode 100644 config/gcloudv2/scheduler.json delete mode 100644 config/gcloudv2/system.json create mode 100644 config/lassen.yaml delete mode 100644 config/lassen/cooling.json delete mode 100644 config/lassen/network.json delete mode 100644 config/lassen/power.json delete mode 100644 config/lassen/scheduler.json delete mode 100644 config/lassen/system.json create mode 100644 config/lumi/lumi-c.yaml delete mode 100644 config/lumi/lumi-c/power.json delete mode 100644 config/lumi/lumi-c/scheduler.json delete mode 100644 config/lumi/lumi-c/system.json create mode 100644 config/lumi/lumi-g.yaml delete mode 100644 config/lumi/lumi-g/power.json delete mode 100644 config/lumi/lumi-g/scheduler.json delete mode 100644 config/lumi/lumi-g/system.json create mode 100644 config/marconi100.yaml delete mode 100644 config/marconi100/cooling.json delete mode 100644 config/marconi100/power.json delete mode 100644 config/marconi100/scheduler.json delete mode 100644 config/marconi100/system.json create mode 100644 config/mit_supercloud/part-cpu.yaml delete mode 100644 config/mit_supercloud/part-cpu/power.json delete mode 100644 config/mit_supercloud/part-cpu/scheduler.json delete mode 100644 config/mit_supercloud/part-cpu/system.json create mode 100644 config/mit_supercloud/part-gpu.yaml delete mode 100644 config/mit_supercloud/part-gpu/power.json delete mode 100644 config/mit_supercloud/part-gpu/scheduler.json delete mode 100644 config/mit_supercloud/part-gpu/system.json create mode 100644 config/setonix/part-cpu.yaml delete mode 100644 config/setonix/part-cpu/power.json delete mode 100644 config/setonix/part-cpu/scheduler.json delete mode 100644 config/setonix/part-cpu/system.json create mode 100644 config/setonix/part-gpu.yaml delete mode 100644 config/setonix/part-gpu/power.json delete mode 100644 config/setonix/part-gpu/scheduler.json delete mode 100644 config/setonix/part-gpu/system.json create mode 100644 config/summit.yaml delete mode 100644 config/summit/cooling.json delete mode 100644 config/summit/power.json delete mode 100644 config/summit/scheduler.json delete mode 100644 config/summit/system.json create mode 100644 tests/test_system_config.py diff --git a/README.md b/README.md index 8c92a11..f46e23a 100644 --- a/README.md +++ b/README.md @@ -139,15 +139,15 @@ This will dump a .npz file with a randomized name, e.g. ac23db.npz. Let's rename There are three ways to modify replaying of telemetry data: - 1. `--arrival`. Changing the arrival time distribution - replay cases will default to `--arrival prescribed`, where the jobs will be submitted exactly as they were submitted on the physical machine. This can be changed to `--arrival poisson` to change when the jobs arrive, which is especially useful in cases where there may be gaps in time, e.g., when the system goes down for several days, or the system is is underutilized. - python main.py -f $DPATH/slurm/joblive/$DATEDIR $DPATH/jobprofile/$DATEDIR --arrival poisson - 2. `--policy`. Changing the way the jobs are scheduled. The `--policy` flag will be set by default to `replay` in cases where a telemetry file is provided, in which case the jobs will be scheduled according to the start times provided. Changing the `--policy` to `fcfs` or `backfill` will use the internal scheduler. +1. `--arrival`. Changing the arrival time distribution - replay cases will default to `--arrival prescribed`, where the jobs will be submitted exactly as they were submitted on the physical machine. This can be changed to `--arrival poisson` to change when the jobs arrive, which is especially useful in cases where there may be gaps in time, e.g., when the system goes down for several days, or the system is is underutilized. +python main.py -f $DPATH/slurm/joblive/$DATEDIR $DPATH/jobprofile/$DATEDIR --arrival poisson +2. `--policy`. Changing the way the jobs are scheduled. The `--policy` flag will be set by default to `replay` in cases where a telemetry file is provided, in which case the jobs will be scheduled according to the start times provided. Changing the `--policy` to `fcfs` or `backfill` will use the internal scheduler. - python main.py -f $DPATH/slurm/joblive/$DATEDIR $DPATH/jobprofile/$DATEDIR --policy fcfs --backfill firstfit -t 12h +python main.py -f $DPATH/slurm/joblive/$DATEDIR $DPATH/jobprofile/$DATEDIR --policy fcfs --backfill firstfit -t 12h - 3. `--scale`. Changing the scale of each job in the telemetry data. The `--scale` flag will specify the maximum number of nodes for each job (generally set this to the max number of nodes of the smallest partition), and randomly select the number of nodes for each job from one to max nodes. This flag is useful when replaying telemetry from a larger system onto a smaller system. +3. `--scale`. Changing the scale of each job in the telemetry data. The `--scale` flag will specify the maximum number of nodes for each job (generally set this to the max number of nodes of the smallest partition), and randomly select the number of nodes for each job from one to max nodes. This flag is useful when replaying telemetry from a larger system onto a smaller system. - 4. `--shuffle`. Shuffle the jobs before playing. +4. `--shuffle`. Shuffle the jobs before playing. ## Job-level power output example for replay of single job @@ -181,7 +181,7 @@ Install pre-commit hooks as set by the project: ``` pip install pre-commit pre-commit install -''' +``` ## Authors diff --git a/config/40frontiers.yaml b/config/40frontiers.yaml new file mode 100644 index 0000000..cc2783e --- /dev/null +++ b/config/40frontiers.yaml @@ -0,0 +1,60 @@ +system: + num_cdus: 1000 + racks_per_cdu: 3 + nodes_per_rack: 128 + chassis_per_rack: 8 + nodes_per_blade: 2 + switches_per_chassis: 4 + nics_per_node: 4 + rectifiers_per_chassis: 4 + nodes_per_rectifier: 4 + missing_racks: + - 41 + down_nodes: [] + cpus_per_node: 1 + gpus_per_node: 4 + cpu_peak_flops: 2048000000000.0 + gpu_peak_flops: 52000000000000.0 + cpu_fp_ratio: 0.667 + gpu_fp_ratio: 0.667 +power: + power_gpu_idle: 88 + power_gpu_max: 560 + power_cpu_idle: 90 + power_cpu_max: 280 + power_mem: 74.26 + power_nic: 20 + power_nvme: 30 + power_switch: 250 + power_cdu: 8473.47 + power_update_freq: 15 + rectifier_peak_threshold: 13670 + sivoc_loss_constant: 13 + sivoc_efficiency: 0.98 + rectifier_loss_constant: 17 + rectifier_efficiency: 0.96 + power_cost: 0.094 +scheduler: + job_arrival_time: 1 + mtbf: 11 + trace_quanta: 15 + min_wall_time: 3600 + max_wall_time: 43200 + ui_update_freq: 900 + max_nodes_per_job: 9000 + job_end_probs: + COMPLETED: 0.63 + FAILED: 0.13 + CANCELLED: 0.12 + TIMEOUT: 0.11 + NODE_FAIL: 0.01 +uq: + power_gpu_uncertainty: 0.05 + power_cpu_uncertainty: 0.05 + power_mem_uncertainty: 0.05 + power_nic_uncertainty: 0.05 + power_nvme_uncertainty: 0.05 + power_cdus_uncertainty: 0.05 + power_node_uncertainty: 0.002 + power_switch_uncertainty: 0.05 + rectifier_power_uncertainty: 0.05 diff --git a/config/40frontiers/power.json b/config/40frontiers/power.json deleted file mode 100644 index d6ec29e..0000000 --- a/config/40frontiers/power.json +++ /dev/null @@ -1,18 +0,0 @@ -{ - "POWER_GPU_IDLE": 88, - "POWER_GPU_MAX": 560, - "POWER_CPU_IDLE": 90, - "POWER_CPU_MAX": 280, - "POWER_MEM": 74.26, - "POWER_NIC": 20, - "POWER_NVME": 30, - "POWER_SWITCH": 250, - "POWER_CDU": 8473.47, - "POWER_UPDATE_FREQ": 15, - "RECTIFIER_PEAK_THRESHOLD": 13670, - "SIVOC_LOSS_CONSTANT": 13, - "SIVOC_EFFICIENCY": 0.98, - "RECTIFIER_LOSS_CONSTANT": 17, - "RECTIFIER_EFFICIENCY": 0.96, - "POWER_COST": 0.094 -} diff --git a/config/40frontiers/scheduler.json b/config/40frontiers/scheduler.json deleted file mode 100644 index 0a43f19..0000000 --- a/config/40frontiers/scheduler.json +++ /dev/null @@ -1,17 +0,0 @@ -{ - "SEED": 42, - "JOB_ARRIVAL_TIME": 1, - "MTBF": 11, - "TRACE_QUANTA": 15, - "MIN_WALL_TIME": 3600, - "MAX_WALL_TIME": 43200, - "UI_UPDATE_FREQ": 900, - "MAX_NODES_PER_JOB": 9000, - "JOB_END_PROBS": { - "COMPLETED": 0.63, - "FAILED": 0.13, - "CANCELLED": 0.12, - "TIMEOUT": 0.11, - "NODE_FAIL": 0.01 - } -} diff --git a/config/40frontiers/system.json b/config/40frontiers/system.json deleted file mode 100644 index 51add94..0000000 --- a/config/40frontiers/system.json +++ /dev/null @@ -1,20 +0,0 @@ -{ - "NUM_CDUS": 1000, - "RACKS_PER_CDU": 3, - "NODES_PER_RACK": 128, - "RECTIFIERS_PER_RACK": 32, - "CHASSIS_PER_RACK": 8, - "NODES_PER_BLADE": 2, - "SWITCHES_PER_CHASSIS": 4, - "NICS_PER_NODE": 4, - "RECTIFIERS_PER_CHASSIS": 4, - "NODES_PER_RECTIFIER": 4, - "MISSING_RACKS": [41], - "DOWN_NODES": [], - "CPUS_PER_NODE": 1, - "GPUS_PER_NODE": 4, - "CPU_PEAK_FLOPS": 2048E9, - "GPU_PEAK_FLOPS": 52E12, - "CPU_FP_RATIO": 0.667, - "GPU_FP_RATIO": 0.667 -} diff --git a/config/40frontiers/uq.json b/config/40frontiers/uq.json deleted file mode 100644 index 7359bc2..0000000 --- a/config/40frontiers/uq.json +++ /dev/null @@ -1,11 +0,0 @@ -{ - "POWER_GPU_UNCERTAINTY": 0.05 , - "POWER_CPU_UNCERTAINTY": 0.05 , - "POWER_MEM_UNCERTAINTY": 0.05 , - "POWER_NIC_UNCERTAINTY": 0.05 , - "POWER_NVME_UNCERTAINTY": 0.05 , - "POWER_CDUS_UNCERTAINTY": 0.05 , - "POWER_NODE_UNCERTAINTY": 0.002, - "POWER_SWITCH_UNCERTAINTY": 0.05 , - "RECTIFIER_POWER_UNCERTAINTY": 0.05 -} diff --git a/config/adastraMI250.yaml b/config/adastraMI250.yaml new file mode 100644 index 0000000..c7b95b8 --- /dev/null +++ b/config/adastraMI250.yaml @@ -0,0 +1,110 @@ +system: + num_cdus: 1 + racks_per_cdu: 3 + nodes_per_rack: 128 + chassis_per_rack: 8 + nodes_per_blade: 2 + switches_per_chassis: 4 + nics_per_node: 4 + rectifiers_per_chassis: 4 + nodes_per_rectifier: 4 + missing_racks: [] + down_nodes: + - 356 + - 357 + - 358 + - 359 + - 360 + - 361 + - 362 + - 363 + - 364 + - 365 + - 366 + - 367 + - 368 + - 369 + - 370 + - 371 + - 372 + - 373 + - 374 + - 375 + - 376 + - 377 + - 378 + - 379 + - 380 + - 381 + - 382 + - 383 + cpus_per_node: 1 + gpus_per_node: 8 + cpu_peak_flops: 2048000000000.0 + gpu_peak_flops: 21120000000000.0 + cpu_fp_ratio: 0.667 + gpu_fp_ratio: 0.667 +power: + power_gpu_idle: 44 + power_gpu_max: 238 + power_cpu_idle: 90 + power_cpu_max: 280 + power_mem: 37.13 + power_nic: 20 + power_nvme: 0 + power_switch: 250 + power_cdu: 8473.47 + power_update_freq: 15 + rectifier_peak_threshold: 13670 + sivoc_loss_constant: 13 + sivoc_efficiency: 0.98 + rectifier_loss_constant: 17 + rectifier_efficiency: 0.96 + power_cost: 0.094 +scheduler: + job_arrival_time: 900 + mtbf: 11 + trace_quanta: 20 + min_wall_time: 60 + max_wall_time: 43200 + ui_update_freq: 900 + max_nodes_per_job: 324 + job_end_probs: + COMPLETED: 0.63 + FAILED: 0.13 + CANCELLED: 0.12 + TIMEOUT: 0.11 + NODE_FAIL: 0.01 +uq: + power_gpu_uncertainty: 0.05 + power_cpu_uncertainty: 0.05 + power_mem_uncertainty: 0.05 + power_nic_uncertainty: 0.05 + power_nvme_uncertainty: 0.05 + power_cdus_uncertainty: 0.05 + power_node_uncertainty: 0.002 + power_switch_uncertainty: 0.05 + rectifier_power_uncertainty: 0.05 +cooling: + cooling_efficiency: 0.945 + wet_bulb_temp: 290.0 + zip_code: '37831' + country_code: US + fmu_path: "models/Simulator_olcf5_base.fmu" + fmu_column_mapping: + T_sec_r_C: "Rack Return Temperature (°C)" + T_sec_s_C: "Rack Supply Temperature (°C)" + p_sec_r_psig: "Rack Supply Pressure (psig)" + p_sec_s_psig: "Rack Return Pressure (psig)" + V_flow_sec_GPM: "Rack Flowrate (gpm)" + T_prim_r_C: "Facility Return Temperature (°C)" + T_prim_s_C: "Facility Supply Temperature (°C)" + p_prim_s_psig: "Facility Supply Pressure (psig)" + p_prim_r_psig: "Facility Return Pressure (psig)" + V_flow_prim_GPM: "Facility Flowrate (gpm)" + W_flow_CDUP_kW: "Work Done By CDUP (kW)" + temperature_keys: + - simulator_1_centralEnergyPlant_1_coolingTowerLoop_1_sources_Towb + w_htwps_key: "simulator[1].centralEnergyPlant[1].hotWaterLoop[1].summary.W_flow_HTWP_kW" + w_ctwps_key: "simulator[1].centralEnergyPlant[1].coolingTowerLoop[1].summary.W_flow_CTWP_kW" + w_cts_key: "simulator[1].centralEnergyPlant[1].coolingTowerLoop[1].summary.W_flow_CT_kW" diff --git a/config/adastraMI250/cooling.json b/config/adastraMI250/cooling.json deleted file mode 100644 index 778a56d..0000000 --- a/config/adastraMI250/cooling.json +++ /dev/null @@ -1,25 +0,0 @@ -{ - "COOLING_EFFICIENCY": 0.945, - "WET_BULB_TEMP": 290.0, - "ZIP_CODE": 37831, - "COUNTRY_CODE": "US", - "FMU_PATH": "models/Simulator_olcf5_base.fmu", - "FMU_COLUMN_MAPPING": { - "T_sec_r_C": "Rack Return Temperature (\u00b0C)", - "T_sec_s_C": "Rack Supply Temperature (\u00b0C)", - "p_sec_r_psig": "Rack Supply Pressure (psig)", - "p_sec_s_psig": "Rack Return Pressure (psig)", - "V_flow_sec_GPM": "Rack Flowrate (gpm)", - "T_prim_r_C": "Facility Return Temperature (\u00b0C)", - "T_prim_s_C": "Facility Supply Temperature (\u00b0C)", - "p_prim_s_psig": "Facility Supply Pressure (psig)", - "p_prim_r_psig": "Facility Return Pressure (psig)", - "V_flow_prim_GPM": "Facility Flowrate (gpm)", - "W_flow_CDUP_kW": "Work Done By CDUP (kW)" - }, - "TEMPERATURE_KEY": "simulator_1_centralEnergyPlant_1_coolingTowerLoop_1_sources_Towb", - "W_HTWPs_KEY": "simulator[1].centralEnergyPlant[1].hotWaterLoop[1].summary.W_flow_HTWP_kW", - "W_CTWPs_KEY": "simulator[1].centralEnergyPlant[1].coolingTowerLoop[1].summary.W_flow_CTWP_kW", - "W_CTs_KEY": "simulator[1].centralEnergyPlant[1].coolingTowerLoop[1].summary.W_flow_CT_kW" - -} diff --git a/config/adastraMI250/power.json b/config/adastraMI250/power.json deleted file mode 100644 index 77d8c69..0000000 --- a/config/adastraMI250/power.json +++ /dev/null @@ -1,18 +0,0 @@ -{ - "POWER_GPU_IDLE": 44, - "POWER_GPU_MAX": 238, - "POWER_CPU_IDLE": 90, - "POWER_CPU_MAX": 280, - "POWER_MEM": 37.13, - "POWER_NIC": 20, - "POWER_NVME": 0, - "POWER_SWITCH": 250, - "POWER_CDU": 8473.47, - "POWER_UPDATE_FREQ": 15, - "RECTIFIER_PEAK_THRESHOLD": 13670, - "SIVOC_LOSS_CONSTANT": 13, - "SIVOC_EFFICIENCY": 0.98, - "RECTIFIER_LOSS_CONSTANT": 17, - "RECTIFIER_EFFICIENCY": 0.96, - "POWER_COST": 0.094 -} diff --git a/config/adastraMI250/scheduler.json b/config/adastraMI250/scheduler.json deleted file mode 100644 index 3ae6644..0000000 --- a/config/adastraMI250/scheduler.json +++ /dev/null @@ -1,17 +0,0 @@ -{ - "SEED": 42, - "JOB_ARRIVAL_TIME": 900, - "MTBF": 11, - "TRACE_QUANTA": 20, - "MIN_WALL_TIME": 60, - "MAX_WALL_TIME": 43200, - "UI_UPDATE_FREQ": 900, - "MAX_NODES_PER_JOB": 324, - "JOB_END_PROBS": { - "COMPLETED": 0.63, - "FAILED": 0.13, - "CANCELLED": 0.12, - "TIMEOUT": 0.11, - "NODE_FAIL": 0.01 - } -} diff --git a/config/adastraMI250/system.json b/config/adastraMI250/system.json deleted file mode 100644 index 36a689e..0000000 --- a/config/adastraMI250/system.json +++ /dev/null @@ -1,20 +0,0 @@ -{ - "NUM_CDUS": 1, - "RACKS_PER_CDU": 3, - "NODES_PER_RACK": 128, - "RECTIFIERS_PER_RACK": 32, - "CHASSIS_PER_RACK": 8, - "NODES_PER_BLADE": 2, - "SWITCHES_PER_CHASSIS": 4, - "NICS_PER_NODE": 4, - "RECTIFIERS_PER_CHASSIS": 4, - "NODES_PER_RECTIFIER": 4, - "MISSING_RACKS": [], - "DOWN_NODES": [356,357,358,359,360,361,362,363,364,365,366,367,368,369,370,371,372,373,374,375,376,377,378,379,380,381,382,383], - "CPUS_PER_NODE": 1, - "GPUS_PER_NODE": 8, - "CPU_PEAK_FLOPS": 2048E9, - "GPU_PEAK_FLOPS": 21.120000E12, - "CPU_FP_RATIO": 0.667, - "GPU_FP_RATIO": 0.667 -} diff --git a/config/adastraMI250/uq.json b/config/adastraMI250/uq.json deleted file mode 100644 index 7359bc2..0000000 --- a/config/adastraMI250/uq.json +++ /dev/null @@ -1,11 +0,0 @@ -{ - "POWER_GPU_UNCERTAINTY": 0.05 , - "POWER_CPU_UNCERTAINTY": 0.05 , - "POWER_MEM_UNCERTAINTY": 0.05 , - "POWER_NIC_UNCERTAINTY": 0.05 , - "POWER_NVME_UNCERTAINTY": 0.05 , - "POWER_CDUS_UNCERTAINTY": 0.05 , - "POWER_NODE_UNCERTAINTY": 0.002, - "POWER_SWITCH_UNCERTAINTY": 0.05 , - "RECTIFIER_POWER_UNCERTAINTY": 0.05 -} diff --git a/config/bluewaters.yaml b/config/bluewaters.yaml new file mode 100644 index 0000000..90be71d --- /dev/null +++ b/config/bluewaters.yaml @@ -0,0 +1,61 @@ +system: + num_cdus: 36 + racks_per_cdu: 6 + nodes_per_rack: 128 + chassis_per_rack: 8 + nodes_per_blade: 2 + switches_per_chassis: 4 + nics_per_node: 4 + rectifiers_per_chassis: 4 + nodes_per_rectifier: 4 + missing_racks: [] + down_nodes: [] + cpus_per_node: 2 + gpus_per_node: 0 + cpu_peak_flops: 264960000000.0 + gpu_peak_flops: 0 + cpu_fp_ratio: 0.667 + gpu_fp_ratio: 0 +power: + power_gpu_idle: 0 + power_gpu_max: 0 + power_cpu_idle: 38 + power_cpu_max: 95 + power_mem: 74.26 + power_nic: 20 + power_nvme: 30 + power_switch: 250 + power_cdu: 8473.47 + power_update_freq: 15 + rectifier_peak_threshold: 13670 + sivoc_loss_constant: 13 + sivoc_efficiency: 0.98 + rectifier_loss_constant: 17 + rectifier_efficiency: 0.96 + power_cost: 0.094 +scheduler: + job_arrival_time: 100 + mtbf: 11 + trace_quanta: 60 + min_wall_time: 60 + max_wall_time: 43200 + ui_update_freq: 900 + max_nodes_per_job: 26884 + job_end_probs: + COMPLETED: 0.63 + FAILED: 0.13 + CANCELLED: 0.12 + TIMEOUT: 0.11 + NODE_FAIL: 0.01 +network: + topology: torus3d + network_max_bw: 9600000000.0 + torus_x: 24 + torus_y: 24 + torus_z: 24 + torus_wrap: true + hosts_per_router: 2 + torus_link_bw: 9600000000.0 + latency_per_hop: 1.0e-06 + torus_routing: DOR_XYZ + node_coords_csv: null diff --git a/config/bluewaters/network.json b/config/bluewaters/network.json deleted file mode 100644 index 1283549..0000000 --- a/config/bluewaters/network.json +++ /dev/null @@ -1,13 +0,0 @@ -{ - "TOPOLOGY": "torus3d", - "NETWORK_MAX_BW": 9.6e9, - "TORUS_X": 24, - "TORUS_Y": 24, - "TORUS_Z": 24, - "TORUS_WRAP": true, - "HOSTS_PER_ROUTER": 2, - "TORUS_LINK_BW": 9.6e9, - "LATENCY_PER_HOP": 1e-6, - "TORUS_ROUTING": "DOR_XYZ", - "NODE_COORDS_CSV": null -} diff --git a/config/bluewaters/power.json b/config/bluewaters/power.json deleted file mode 100644 index fb4d4f2..0000000 --- a/config/bluewaters/power.json +++ /dev/null @@ -1,19 +0,0 @@ - -{ - "POWER_GPU_IDLE": 0, - "POWER_GPU_MAX": 0, - "POWER_CPU_IDLE": 38, - "POWER_CPU_MAX": 95, - "POWER_MEM": 74.26, - "POWER_NIC": 20, - "POWER_NVME": 30, - "POWER_SWITCH": 250, - "POWER_CDU": 8473.47, - "POWER_UPDATE_FREQ": 15, - "RECTIFIER_PEAK_THRESHOLD": 13670, - "SIVOC_LOSS_CONSTANT": 13, - "SIVOC_EFFICIENCY": 0.98, - "RECTIFIER_LOSS_CONSTANT": 17, - "RECTIFIER_EFFICIENCY": 0.96, - "POWER_COST": 0.094 -} diff --git a/config/bluewaters/scheduler.json b/config/bluewaters/scheduler.json deleted file mode 100644 index 3f1445e..0000000 --- a/config/bluewaters/scheduler.json +++ /dev/null @@ -1,18 +0,0 @@ - -{ - "SEED": 42, - "JOB_ARRIVAL_TIME": 100, - "MTBF": 11, - "TRACE_QUANTA": 60, - "MIN_WALL_TIME": 60, - "MAX_WALL_TIME": 43200, - "UI_UPDATE_FREQ": 900, - "MAX_NODES_PER_JOB": 26884, - "JOB_END_PROBS": { - "COMPLETED": 0.63, - "FAILED": 0.13, - "CANCELLED": 0.12, - "TIMEOUT": 0.11, - "NODE_FAIL": 0.01 - } -} diff --git a/config/bluewaters/system.json b/config/bluewaters/system.json deleted file mode 100644 index 336da0b..0000000 --- a/config/bluewaters/system.json +++ /dev/null @@ -1,21 +0,0 @@ - -{ - "NUM_CDUS": 36, - "RACKS_PER_CDU": 6, - "NODES_PER_RACK": 128, - "RECTIFIERS_PER_RACK": 32, - "CHASSIS_PER_RACK": 8, - "NODES_PER_BLADE": 2, - "SWITCHES_PER_CHASSIS": 4, - "NICS_PER_NODE": 4, - "RECTIFIERS_PER_CHASSIS": 4, - "NODES_PER_RECTIFIER": 4, - "MISSING_RACKS": [], - "DOWN_NODES": [], - "CPUS_PER_NODE": 2, - "GPUS_PER_NODE": 0, - "CPU_PEAK_FLOPS": 2.6496E11, - "GPU_PEAK_FLOPS": 0, - "CPU_FP_RATIO": 0.667, - "GPU_FP_RATIO": 0 -} diff --git a/config/frontier.yaml b/config/frontier.yaml new file mode 100644 index 0000000..3102f31 --- /dev/null +++ b/config/frontier.yaml @@ -0,0 +1,83 @@ +system: + num_cdus: 25 + racks_per_cdu: 3 + nodes_per_rack: 128 + chassis_per_rack: 8 + nodes_per_blade: 2 + switches_per_chassis: 4 + nics_per_node: 4 + rectifiers_per_chassis: 4 + nodes_per_rectifier: 4 + missing_racks: + - 41 + down_nodes: [] + cpus_per_node: 1 + gpus_per_node: 4 + cpu_peak_flops: 2048000000000.0 + gpu_peak_flops: 52000000000000.0 + cpu_fp_ratio: 0.667 + gpu_fp_ratio: 0.667 +power: + power_gpu_idle: 88 + power_gpu_max: 560 + power_cpu_idle: 90 + power_cpu_max: 280 + power_mem: 74.26 + power_nic: 20 + power_nvme: 30 + power_switch: 250 + power_cdu: 8473.47 + power_update_freq: 15 + rectifier_peak_threshold: 13670 + sivoc_loss_constant: 13 + sivoc_efficiency: 0.98 + rectifier_loss_constant: 17 + rectifier_efficiency: 0.96 + power_cost: 0.094 +scheduler: + job_arrival_time: 100 + mtbf: 11 + trace_quanta: 15 + min_wall_time: 60 + max_wall_time: 43200 + ui_update_freq: 900 + max_nodes_per_job: 3000 + job_end_probs: + COMPLETED: 0.63 + FAILED: 0.13 + CANCELLED: 0.12 + TIMEOUT: 0.11 + NODE_FAIL: 0.01 +uq: + power_gpu_uncertainty: 0.05 + power_cpu_uncertainty: 0.05 + power_mem_uncertainty: 0.05 + power_nic_uncertainty: 0.05 + power_nvme_uncertainty: 0.05 + power_cdus_uncertainty: 0.05 + power_node_uncertainty: 0.002 + power_switch_uncertainty: 0.05 + rectifier_power_uncertainty: 0.05 +cooling: + cooling_efficiency: 0.945 + wet_bulb_temp: 290.0 + zip_code: '37831' + country_code: US + fmu_path: "models/Simulator_olcf5_base.fmu" + fmu_column_mapping: + T_sec_r_C: "Rack Return Temperature (°C)" + T_sec_s_C: "Rack Supply Temperature (°C)" + p_sec_r_psig: "Rack Supply Pressure (psig)" + p_sec_s_psig: "Rack Return Pressure (psig)" + V_flow_sec_GPM: "Rack Flowrate (gpm)" + T_prim_r_C: "Facility Return Temperature (°C)" + T_prim_s_C: "Facility Supply Temperature (°C)" + p_prim_s_psig: "Facility Supply Pressure (psig)" + p_prim_r_psig: "Facility Return Pressure (psig)" + V_flow_prim_GPM: "Facility Flowrate (gpm)" + W_flow_CDUP_kW: "Work Done By CDUP (kW)" + temperature_keys: + - simulator_1_centralEnergyPlant_1_coolingTowerLoop_1_sources_Towb + w_htwps_key: "simulator[1].centralEnergyPlant[1].hotWaterLoop[1].summary.W_flow_HTWP_kW" + w_ctwps_key: "simulator[1].centralEnergyPlant[1].coolingTowerLoop[1].summary.W_flow_CTWP_kW" + w_cts_key: "simulator[1].centralEnergyPlant[1].coolingTowerLoop[1].summary.W_flow_CT_kW" diff --git a/config/frontier/cooling.json b/config/frontier/cooling.json deleted file mode 100644 index fd734a5..0000000 --- a/config/frontier/cooling.json +++ /dev/null @@ -1,25 +0,0 @@ -{ - "COOLING_EFFICIENCY": 0.945, - "WET_BULB_TEMP": 290.0, - "ZIP_CODE": 37831, - "COUNTRY_CODE": "US", - "FMU_PATH": "models/Simulator_olcf5_base.fmu", - "FMU_COLUMN_MAPPING": { - "T_sec_r_C": "Rack Return Temperature (\u00b0C)", - "T_sec_s_C": "Rack Supply Temperature (\u00b0C)", - "p_sec_r_psig": "Rack Supply Pressure (psig)", - "p_sec_s_psig": "Rack Return Pressure (psig)", - "V_flow_sec_GPM": "Rack Flowrate (gpm)", - "T_prim_r_C": "Facility Return Temperature (\u00b0C)", - "T_prim_s_C": "Facility Supply Temperature (\u00b0C)", - "p_prim_s_psig": "Facility Supply Pressure (psig)", - "p_prim_r_psig": "Facility Return Pressure (psig)", - "V_flow_prim_GPM": "Facility Flowrate (gpm)", - "W_flow_CDUP_kW": "Work Done By CDUP (kW)" - }, - "TEMPERATURE_KEYS": ["simulator_1_centralEnergyPlant_1_coolingTowerLoop_1_sources_Towb"], - "W_HTWPs_KEY": "simulator[1].centralEnergyPlant[1].hotWaterLoop[1].summary.W_flow_HTWP_kW", - "W_CTWPs_KEY": "simulator[1].centralEnergyPlant[1].coolingTowerLoop[1].summary.W_flow_CTWP_kW", - "W_CTs_KEY": "simulator[1].centralEnergyPlant[1].coolingTowerLoop[1].summary.W_flow_CT_kW" - -} diff --git a/config/frontier/power.json b/config/frontier/power.json deleted file mode 100644 index d6ec29e..0000000 --- a/config/frontier/power.json +++ /dev/null @@ -1,18 +0,0 @@ -{ - "POWER_GPU_IDLE": 88, - "POWER_GPU_MAX": 560, - "POWER_CPU_IDLE": 90, - "POWER_CPU_MAX": 280, - "POWER_MEM": 74.26, - "POWER_NIC": 20, - "POWER_NVME": 30, - "POWER_SWITCH": 250, - "POWER_CDU": 8473.47, - "POWER_UPDATE_FREQ": 15, - "RECTIFIER_PEAK_THRESHOLD": 13670, - "SIVOC_LOSS_CONSTANT": 13, - "SIVOC_EFFICIENCY": 0.98, - "RECTIFIER_LOSS_CONSTANT": 17, - "RECTIFIER_EFFICIENCY": 0.96, - "POWER_COST": 0.094 -} diff --git a/config/frontier/scheduler.json b/config/frontier/scheduler.json deleted file mode 100644 index 47d1da4..0000000 --- a/config/frontier/scheduler.json +++ /dev/null @@ -1,17 +0,0 @@ -{ - "SEED": 42, - "JOB_ARRIVAL_TIME": 100, - "MTBF": 11, - "TRACE_QUANTA": 15, - "MIN_WALL_TIME": 60, - "MAX_WALL_TIME": 43200, - "UI_UPDATE_FREQ": 900, - "MAX_NODES_PER_JOB": 3000, - "JOB_END_PROBS": { - "COMPLETED": 0.63, - "FAILED": 0.13, - "CANCELLED": 0.12, - "TIMEOUT": 0.11, - "NODE_FAIL": 0.01 - } -} diff --git a/config/frontier/system.json b/config/frontier/system.json deleted file mode 100644 index b1b9d76..0000000 --- a/config/frontier/system.json +++ /dev/null @@ -1,20 +0,0 @@ -{ - "NUM_CDUS": 25, - "RACKS_PER_CDU": 3, - "NODES_PER_RACK": 128, - "RECTIFIERS_PER_RACK": 32, - "CHASSIS_PER_RACK": 8, - "NODES_PER_BLADE": 2, - "SWITCHES_PER_CHASSIS": 4, - "NICS_PER_NODE": 4, - "RECTIFIERS_PER_CHASSIS": 4, - "NODES_PER_RECTIFIER": 4, - "MISSING_RACKS": [41], - "DOWN_NODES": [], - "CPUS_PER_NODE": 1, - "GPUS_PER_NODE": 4, - "CPU_PEAK_FLOPS": 2048E9, - "GPU_PEAK_FLOPS": 52E12, - "CPU_FP_RATIO": 0.667, - "GPU_FP_RATIO": 0.667 -} diff --git a/config/frontier/uq.json b/config/frontier/uq.json deleted file mode 100644 index 7359bc2..0000000 --- a/config/frontier/uq.json +++ /dev/null @@ -1,11 +0,0 @@ -{ - "POWER_GPU_UNCERTAINTY": 0.05 , - "POWER_CPU_UNCERTAINTY": 0.05 , - "POWER_MEM_UNCERTAINTY": 0.05 , - "POWER_NIC_UNCERTAINTY": 0.05 , - "POWER_NVME_UNCERTAINTY": 0.05 , - "POWER_CDUS_UNCERTAINTY": 0.05 , - "POWER_NODE_UNCERTAINTY": 0.002, - "POWER_SWITCH_UNCERTAINTY": 0.05 , - "RECTIFIER_POWER_UNCERTAINTY": 0.05 -} diff --git a/config/fugaku.yaml b/config/fugaku.yaml new file mode 100644 index 0000000..afc82bf --- /dev/null +++ b/config/fugaku.yaml @@ -0,0 +1,49 @@ +system: + num_cdus: 24 + racks_per_cdu: 18 + nodes_per_rack: 368 + chassis_per_rack: 8 + nodes_per_blade: 1 + switches_per_chassis: 2 + nics_per_node: 1 + rectifiers_per_chassis: 1 + nodes_per_rectifier: 48 + missing_racks: [] + down_nodes: [] + cpus_per_node: 1 + gpus_per_node: 0 + cpu_peak_flops: 3379000000000.0 + gpu_peak_flops: 0 + cpu_fp_ratio: 0.82 + gpu_fp_ratio: 0.0 +power: + power_gpu_idle: 0 + power_gpu_max: 0 + power_cpu_idle: 30 + power_cpu_max: 150 + power_mem: 10 + power_nic: 0 + power_nvme: 0 + power_switch: 0 + power_cdu: 0 + power_update_freq: 10 + rectifier_peak_threshold: 0 + sivoc_loss_constant: 13 + sivoc_efficiency: 0.98 + rectifier_loss_constant: 17 + rectifier_efficiency: 0.96 + power_cost: 0.094 +scheduler: + job_arrival_time: 10 + mtbf: 11 + trace_quanta: 10 + min_wall_time: 3600 + max_wall_time: 43200 + ui_update_freq: 3600 + max_nodes_per_job: 3000 + job_end_probs: + COMPLETED: 0.63 + FAILED: 0.13 + CANCELLED: 0.12 + TIMEOUT: 0.11 + NODE_FAIL: 0.01 diff --git a/config/fugaku/power.json b/config/fugaku/power.json deleted file mode 100644 index 759a0c4..0000000 --- a/config/fugaku/power.json +++ /dev/null @@ -1,18 +0,0 @@ -{ - "POWER_GPU_IDLE": 0, - "POWER_GPU_MAX": 0, - "POWER_CPU_IDLE": 30, - "POWER_CPU_MAX": 150, - "POWER_MEM": 10, - "POWER_NIC": 0, - "POWER_NVME": 0, - "POWER_SWITCH": 0, - "POWER_CDU": 0, - "POWER_UPDATE_FREQ": 10, - "RECTIFIER_PEAK_THRESHOLD": 0, - "SIVOC_LOSS_CONSTANT": 13, - "SIVOC_EFFICIENCY": 0.98, - "RECTIFIER_LOSS_CONSTANT": 17, - "RECTIFIER_EFFICIENCY": 0.96, - "POWER_COST": 0.094 -} diff --git a/config/fugaku/scheduler.json b/config/fugaku/scheduler.json deleted file mode 100644 index 94cde88..0000000 --- a/config/fugaku/scheduler.json +++ /dev/null @@ -1,17 +0,0 @@ -{ - "SEED": 42, - "JOB_ARRIVAL_TIME": 10, - "MTBF": 11, - "TRACE_QUANTA": 10, - "MIN_WALL_TIME": 3600, - "MAX_WALL_TIME": 43200, - "UI_UPDATE_FREQ": 3600, - "MAX_NODES_PER_JOB": 3000, - "JOB_END_PROBS": { - "COMPLETED": 0.63, - "FAILED": 0.13, - "CANCELLED": 0.12, - "TIMEOUT": 0.11, - "NODE_FAIL": 0.01 - } -} diff --git a/config/fugaku/system.json b/config/fugaku/system.json deleted file mode 100644 index 6a0e63a..0000000 --- a/config/fugaku/system.json +++ /dev/null @@ -1,20 +0,0 @@ -{ - "NUM_CDUS": 24, - "RACKS_PER_CDU": 18, - "NODES_PER_RACK": 368, - "RECTIFIERS_PER_RACK": 8, - "CHASSIS_PER_RACK": 8, - "NODES_PER_BLADE": 1, - "SWITCHES_PER_CHASSIS": 2, - "NICS_PER_NODE": 1, - "RECTIFIERS_PER_CHASSIS": 1, - "NODES_PER_RECTIFIER": 48, - "MISSING_RACKS": [], - "DOWN_NODES": [], - "CPUS_PER_NODE": 1, - "GPUS_PER_NODE": 0, - "CPU_PEAK_FLOPS": 3.379E12, - "GPU_PEAK_FLOPS": 0, - "CPU_FP_RATIO": 0.82, - "GPU_FP_RATIO": 0.0 -} diff --git a/config/gcloudv2.yaml b/config/gcloudv2.yaml new file mode 100644 index 0000000..53cd999 --- /dev/null +++ b/config/gcloudv2.yaml @@ -0,0 +1,49 @@ +system: + num_cdus: 125 + racks_per_cdu: 1 + nodes_per_rack: 100 + chassis_per_rack: 1 + nodes_per_blade: 2 + switches_per_chassis: 4 + nics_per_node: 4 + rectifiers_per_chassis: 4 + nodes_per_rectifier: 4 + missing_racks: [] + down_nodes: [] + cpus_per_node: 1 + gpus_per_node: 0 + cpu_peak_flops: 2048000000000.0 + gpu_peak_flops: 0 + cpu_fp_ratio: 0.667 + gpu_fp_ratio: 0.667 +power: + power_gpu_idle: 88 + power_gpu_max: 560 + power_cpu_idle: 90 + power_cpu_max: 280 + power_mem: 74.26 + power_nic: 20 + power_nvme: 30 + power_switch: 250 + power_cdu: 8473.47 + power_update_freq: 15 + rectifier_peak_threshold: 13670 + sivoc_loss_constant: 13 + sivoc_efficiency: 0.98 + rectifier_loss_constant: 17 + rectifier_efficiency: 0.96 + power_cost: 0.094 +scheduler: + job_arrival_time: 100 + mtbf: 11 + trace_quanta: 15 + min_wall_time: 3600 + max_wall_time: 43200 + ui_update_freq: 900 + max_nodes_per_job: 3000 + job_end_probs: + COMPLETED: 0.63 + FAILED: 0.13 + CANCELLED: 0.12 + TIMEOUT: 0.11 + NODE_FAIL: 0.01 diff --git a/config/gcloudv2/power.json b/config/gcloudv2/power.json deleted file mode 100644 index d6ec29e..0000000 --- a/config/gcloudv2/power.json +++ /dev/null @@ -1,18 +0,0 @@ -{ - "POWER_GPU_IDLE": 88, - "POWER_GPU_MAX": 560, - "POWER_CPU_IDLE": 90, - "POWER_CPU_MAX": 280, - "POWER_MEM": 74.26, - "POWER_NIC": 20, - "POWER_NVME": 30, - "POWER_SWITCH": 250, - "POWER_CDU": 8473.47, - "POWER_UPDATE_FREQ": 15, - "RECTIFIER_PEAK_THRESHOLD": 13670, - "SIVOC_LOSS_CONSTANT": 13, - "SIVOC_EFFICIENCY": 0.98, - "RECTIFIER_LOSS_CONSTANT": 17, - "RECTIFIER_EFFICIENCY": 0.96, - "POWER_COST": 0.094 -} diff --git a/config/gcloudv2/scheduler.json b/config/gcloudv2/scheduler.json deleted file mode 100644 index 3cc1744..0000000 --- a/config/gcloudv2/scheduler.json +++ /dev/null @@ -1,17 +0,0 @@ -{ - "SEED": 42, - "JOB_ARRIVAL_TIME": 100, - "MTBF": 11, - "TRACE_QUANTA": 15, - "MIN_WALL_TIME": 3600, - "MAX_WALL_TIME": 43200, - "UI_UPDATE_FREQ": 900, - "MAX_NODES_PER_JOB": 3000, - "JOB_END_PROBS": { - "COMPLETED": 0.63, - "FAILED": 0.13, - "CANCELLED": 0.12, - "TIMEOUT": 0.11, - "NODE_FAIL": 0.01 - } -} diff --git a/config/gcloudv2/system.json b/config/gcloudv2/system.json deleted file mode 100644 index 4b6fc7b..0000000 --- a/config/gcloudv2/system.json +++ /dev/null @@ -1,20 +0,0 @@ -{ - "NUM_CDUS": 125, - "RACKS_PER_CDU": 1, - "NODES_PER_RACK": 100, - "RECTIFIERS_PER_RACK": 32, - "CHASSIS_PER_RACK": 1, - "NODES_PER_BLADE": 2, - "SWITCHES_PER_CHASSIS": 4, - "NICS_PER_NODE": 4, - "RECTIFIERS_PER_CHASSIS": 4, - "NODES_PER_RECTIFIER": 4, - "MISSING_RACKS": [], - "DOWN_NODES": [], - "CPUS_PER_NODE": 1, - "GPUS_PER_NODE": 0, - "CPU_PEAK_FLOPS": 2048E9, - "GPU_PEAK_FLOPS": 0, - "CPU_FP_RATIO": 0.667, - "GPU_FP_RATIO": 0.667 -} diff --git a/config/lassen.yaml b/config/lassen.yaml new file mode 100644 index 0000000..640c55e --- /dev/null +++ b/config/lassen.yaml @@ -0,0 +1,128 @@ +system: + num_cdus: 257 + racks_per_cdu: 1 + nodes_per_rack: 18 + chassis_per_rack: 1 + nodes_per_blade: 1 + switches_per_chassis: 5 + nics_per_node: 2 + rectifiers_per_chassis: 5 + nodes_per_rectifier: 4 + missing_racks: + - 44 + down_nodes: [] + cpus_per_node: 2 + threads_per_core: 4 + cpu_frequency: 2400000000 + gpus_per_node: 4 + cpu_peak_flops: 396800000000.0 + gpu_peak_flops: 7800000000000.0 + cpu_fp_ratio: 0.72 + gpu_fp_ratio: 0.72 +power: + power_gpu_idle: 75 + power_gpu_max: 300 + power_cpu_idle: 47.25 + power_cpu_max: 252 + power_mem: 74.26 + power_nic_idle: 10 + power_nic_max: 50 + power_nvme: 45 + power_switch: 250 + power_cdu: 0 + power_update_freq: 20 + rectifier_peak_threshold: 13670 + sivoc_loss_constant: 0 + sivoc_efficiency: 1 + rectifier_loss_constant: 0 + rectifier_efficiency: 1 + power_cost: 0.094 +scheduler: + job_arrival_time: 20 + mtbf: 11 + trace_quanta: 20 + min_wall_time: 3600 + max_wall_time: 43200 + ui_update_freq: 3600 + max_nodes_per_job: 3000 + job_end_probs: + COMPLETED: 0.63 + FAILED: 0.13 + CANCELLED: 0.12 + TIMEOUT: 0.11 + NODE_FAIL: 0.01 +cooling: + cooling_efficiency: 0.945 + wet_bulb_temp: 290.0 + zip_code: '94550' + country_code: US + fmu_path: "models/POWER9CSM/fmus/lassen.fmu" + fmu_column_mapping: + T_sec_r_C: "Rack Return Temperature (°C)" + T_sec_s_C: "Rack Supply Temperature (°C)" + p_sec_r_psig: "Rack Supply Pressure (psig)" + p_sec_s_psig: "Rack Return Pressure (psig)" + V_flow_sec_GPM: "Rack Flowrate (gpm)" + T_prim_r_C: "Facility Return Temperature (°C)" + T_prim_s_C: "Facility Supply Temperature (°C)" + p_prim_s_psig: "Facility Supply Pressure (psig)" + p_prim_r_psig: "Facility Return Pressure (psig)" + V_flow_prim_GPM: "Facility Flowrate (gpm)" + W_flow_CDUP_kW: "Work Done By CDUP (kW)" + temperature_keys: + - simulator_1_datacenter_1_computeBlock_1_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_2_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_3_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_4_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_5_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_6_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_7_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_8_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_9_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_10_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_11_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_12_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_13_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_14_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_15_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_16_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_17_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_18_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_19_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_20_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_21_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_22_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_23_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_24_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_25_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_26_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_27_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_28_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_29_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_30_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_31_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_32_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_33_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_34_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_35_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_36_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_37_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_38_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_39_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_40_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_41_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_42_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_43_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_44_cabinet_1_sources_T_Air + - simulator_1_centralEnergyPlant_1_coolingTowerLoop_1_sources_T_ext + w_htwps_key: "simulator[1].centralEnergyPlant[1].hotWaterLoop[1].summary.W_flow_HTWP_kW" + w_ctwps_key: "simulator[1].centralEnergyPlant[1].coolingTowerLoop[1].summary.W_flow_CTWP_kW" + w_cts_key: "simulator[1].centralEnergyPlant[1].coolingTowerLoop[1].summary.W_flow_CT_kW" +network: + topology: fat-tree + network_max_bw: 1000000000.0 + fattree_k: 16 + dragonfly_d: 11 + dragonfly_a: 9 + dragonfly_p: 8 + latency: 1 diff --git a/config/lassen/cooling.json b/config/lassen/cooling.json deleted file mode 100644 index 871ceec..0000000 --- a/config/lassen/cooling.json +++ /dev/null @@ -1,71 +0,0 @@ -{ - "COOLING_EFFICIENCY": 0.945, - "WET_BULB_TEMP": 290.0, - "ZIP_CODE": 94550, - "COUNTRY_CODE": "US", - "FMU_PATH": "models/POWER9CSM/fmus/lassen.fmu", - "FMU_COLUMN_MAPPING": { - "T_sec_r_C": "Rack Return Temperature (\u00b0C)", - "T_sec_s_C": "Rack Supply Temperature (\u00b0C)", - "p_sec_r_psig": "Rack Supply Pressure (psig)", - "p_sec_s_psig": "Rack Return Pressure (psig)", - "V_flow_sec_GPM": "Rack Flowrate (gpm)", - "T_prim_r_C": "Facility Return Temperature (\u00b0C)", - "T_prim_s_C": "Facility Supply Temperature (\u00b0C)", - "p_prim_s_psig": "Facility Supply Pressure (psig)", - "p_prim_r_psig": "Facility Return Pressure (psig)", - "V_flow_prim_GPM": "Facility Flowrate (gpm)", - "W_flow_CDUP_kW": "Work Done By CDUP (kW)" - }, - "TEMPERATURE_KEYS": [ - "simulator_1_datacenter_1_computeBlock_1_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_2_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_3_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_4_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_5_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_6_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_7_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_8_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_9_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_10_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_11_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_12_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_13_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_14_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_15_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_16_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_17_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_18_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_19_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_20_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_21_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_22_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_23_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_24_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_25_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_26_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_27_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_28_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_29_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_30_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_31_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_32_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_33_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_34_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_35_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_36_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_37_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_38_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_39_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_40_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_41_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_42_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_43_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_44_cabinet_1_sources_T_Air", - "simulator_1_centralEnergyPlant_1_coolingTowerLoop_1_sources_T_ext" - ], - "W_HTWPs_KEY": "simulator[1].centralEnergyPlant[1].hotWaterLoop[1].summary.W_flow_HTWP_kW", - "W_CTWPs_KEY": "simulator[1].centralEnergyPlant[1].coolingTowerLoop[1].summary.W_flow_CTWP_kW", - "W_CTs_KEY": "simulator[1].centralEnergyPlant[1].coolingTowerLoop[1].summary.W_flow_CT_kW" - -} diff --git a/config/lassen/network.json b/config/lassen/network.json deleted file mode 100644 index 5a0f564..0000000 --- a/config/lassen/network.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "TOPOLOGY": "fat-tree", - "NETWORK_MAX_BW": 1e9, - "FATTREE_K": 16, - "DRAGONFLY_D": 11, - "DRAGONFLY_A": 9, - "DRAGONFLY_P": 8, - "LATENCY": 1 -} diff --git a/config/lassen/power.json b/config/lassen/power.json deleted file mode 100644 index 5b314b6..0000000 --- a/config/lassen/power.json +++ /dev/null @@ -1,19 +0,0 @@ -{ - "POWER_GPU_IDLE": 75, - "POWER_GPU_MAX": 300, - "POWER_CPU_IDLE": 47.25, - "POWER_CPU_MAX": 252, - "POWER_MEM": 74.26, - "POWER_NIC_IDLE": 10, - "POWER_NIC_MAX": 50, - "POWER_NVME": 45, - "POWER_SWITCH": 250, - "POWER_CDU": 0, - "POWER_UPDATE_FREQ": 20, - "RECTIFIER_PEAK_THRESHOLD": 13670, - "SIVOC_LOSS_CONSTANT": 0, - "SIVOC_EFFICIENCY": 1, - "RECTIFIER_LOSS_CONSTANT": 0, - "RECTIFIER_EFFICIENCY": 1, - "POWER_COST": 0.094 -} diff --git a/config/lassen/scheduler.json b/config/lassen/scheduler.json deleted file mode 100644 index 709f080..0000000 --- a/config/lassen/scheduler.json +++ /dev/null @@ -1,17 +0,0 @@ -{ - "SEED": 42, - "JOB_ARRIVAL_TIME": 20, - "MTBF": 11, - "TRACE_QUANTA": 20, - "MIN_WALL_TIME": 3600, - "MAX_WALL_TIME": 43200, - "UI_UPDATE_FREQ": 3600, - "MAX_NODES_PER_JOB": 3000, - "JOB_END_PROBS": { - "COMPLETED": 0.63, - "FAILED": 0.13, - "CANCELLED": 0.12, - "TIMEOUT": 0.11, - "NODE_FAIL": 0.01 - } -} diff --git a/config/lassen/system.json b/config/lassen/system.json deleted file mode 100644 index 15d000d..0000000 --- a/config/lassen/system.json +++ /dev/null @@ -1,22 +0,0 @@ -{ - "NUM_CDUS": 257, - "RACKS_PER_CDU": 1, - "NODES_PER_RACK": 18, - "RECTIFIERS_PER_RACK": 5, - "CHASSIS_PER_RACK": 1, - "NODES_PER_BLADE": 1, - "SWITCHES_PER_CHASSIS": 5, - "NICS_PER_NODE": 2, - "RECTIFIERS_PER_CHASSIS": 5, - "NODES_PER_RECTIFIER": 4, - "MISSING_RACKS": [44], - "DOWN_NODES": [], - "CPUS_PER_NODE": 2, - "THREADS_PER_CORE": 4, - "CPU_FREQUENCY": 2400000000, - "GPUS_PER_NODE": 4, - "CPU_PEAK_FLOPS": 396.8E9, - "GPU_PEAK_FLOPS": 7.8E12, - "CPU_FP_RATIO": 0.72, - "GPU_FP_RATIO": 0.72 -} diff --git a/config/lumi/lumi-c.yaml b/config/lumi/lumi-c.yaml new file mode 100644 index 0000000..091060b --- /dev/null +++ b/config/lumi/lumi-c.yaml @@ -0,0 +1,49 @@ +system: + num_cdus: 1 + racks_per_cdu: 2 + nodes_per_rack: 256 + chassis_per_rack: 8 + nodes_per_blade: 4 + switches_per_chassis: 4 + nics_per_node: 4 + rectifiers_per_chassis: 4 + nodes_per_rectifier: 4 + missing_racks: [] + down_nodes: [] + cpus_per_node: 2 + gpus_per_node: 0 + cpu_peak_flops: 2509440000000.0 + gpu_peak_flops: 0 + cpu_fp_ratio: 0.667 + gpu_fp_ratio: 0.667 +power: + power_gpu_idle: 88 + power_gpu_max: 560 + power_cpu_idle: 90 + power_cpu_max: 280 + power_mem: 74.26 + power_nic: 20 + power_nvme: 30 + power_switch: 250 + power_cdu: 8473.47 + power_update_freq: 15 + rectifier_peak_threshold: 13670 + sivoc_loss_constant: 13 + sivoc_efficiency: 0.98 + rectifier_loss_constant: 17 + rectifier_efficiency: 0.96 + power_cost: 0.094 +scheduler: + job_arrival_time: 900 + mtbf: 11 + trace_quanta: 15 + min_wall_time: 60 + max_wall_time: 172800 + ui_update_freq: 900 + max_nodes_per_job: 512 + job_end_probs: + COMPLETED: 0.63 + FAILED: 0.13 + CANCELLED: 0.12 + TIMEOUT: 0.11 + NODE_FAIL: 0.01 diff --git a/config/lumi/lumi-c/power.json b/config/lumi/lumi-c/power.json deleted file mode 100644 index d6ec29e..0000000 --- a/config/lumi/lumi-c/power.json +++ /dev/null @@ -1,18 +0,0 @@ -{ - "POWER_GPU_IDLE": 88, - "POWER_GPU_MAX": 560, - "POWER_CPU_IDLE": 90, - "POWER_CPU_MAX": 280, - "POWER_MEM": 74.26, - "POWER_NIC": 20, - "POWER_NVME": 30, - "POWER_SWITCH": 250, - "POWER_CDU": 8473.47, - "POWER_UPDATE_FREQ": 15, - "RECTIFIER_PEAK_THRESHOLD": 13670, - "SIVOC_LOSS_CONSTANT": 13, - "SIVOC_EFFICIENCY": 0.98, - "RECTIFIER_LOSS_CONSTANT": 17, - "RECTIFIER_EFFICIENCY": 0.96, - "POWER_COST": 0.094 -} diff --git a/config/lumi/lumi-c/scheduler.json b/config/lumi/lumi-c/scheduler.json deleted file mode 100644 index 530a1c2..0000000 --- a/config/lumi/lumi-c/scheduler.json +++ /dev/null @@ -1,17 +0,0 @@ -{ - "SEED": 42, - "JOB_ARRIVAL_TIME": 900, - "MTBF": 11, - "TRACE_QUANTA": 15, - "MIN_WALL_TIME": 60, - "MAX_WALL_TIME": 172800, - "UI_UPDATE_FREQ": 900, - "MAX_NODES_PER_JOB": 512, - "JOB_END_PROBS": { - "COMPLETED": 0.63, - "FAILED": 0.13, - "CANCELLED": 0.12, - "TIMEOUT": 0.11, - "NODE_FAIL": 0.01 - } -} diff --git a/config/lumi/lumi-c/system.json b/config/lumi/lumi-c/system.json deleted file mode 100644 index 924f281..0000000 --- a/config/lumi/lumi-c/system.json +++ /dev/null @@ -1,20 +0,0 @@ -{ - "NUM_CDUS": 1, - "RACKS_PER_CDU": 2, - "NODES_PER_RACK": 256, - "RECTIFIERS_PER_RACK": 32, - "CHASSIS_PER_RACK": 8, - "NODES_PER_BLADE": 4, - "SWITCHES_PER_CHASSIS": 4, - "NICS_PER_NODE": 4, - "RECTIFIERS_PER_CHASSIS": 4, - "NODES_PER_RECTIFIER": 4, - "MISSING_RACKS": [], - "DOWN_NODES": [], - "CPUS_PER_NODE": 2, - "GPUS_PER_NODE": 0, - "CPU_PEAK_FLOPS": 2.50944E12, - "GPU_PEAK_FLOPS": 0, - "CPU_FP_RATIO": 0.667, - "GPU_FP_RATIO": 0.667 -} diff --git a/config/lumi/lumi-g.yaml b/config/lumi/lumi-g.yaml new file mode 100644 index 0000000..7c0f77c --- /dev/null +++ b/config/lumi/lumi-g.yaml @@ -0,0 +1,49 @@ +system: + num_cdus: 10 + racks_per_cdu: 3 + nodes_per_rack: 128 + chassis_per_rack: 8 + nodes_per_blade: 2 + switches_per_chassis: 4 + nics_per_node: 4 + rectifiers_per_chassis: 4 + nodes_per_rectifier: 4 + missing_racks: [] + down_nodes: [] + cpus_per_node: 1 + gpus_per_node: 4 + cpu_peak_flops: 2048000000000.0 + gpu_peak_flops: 52000000000000.0 + cpu_fp_ratio: 0.667 + gpu_fp_ratio: 0.667 +power: + power_gpu_idle: 88 + power_gpu_max: 560 + power_cpu_idle: 90 + power_cpu_max: 280 + power_mem: 74.26 + power_nic: 20 + power_nvme: 30 + power_switch: 250 + power_cdu: 8473.47 + power_update_freq: 15 + rectifier_peak_threshold: 13670 + sivoc_loss_constant: 13 + sivoc_efficiency: 0.98 + rectifier_loss_constant: 17 + rectifier_efficiency: 0.96 + power_cost: 0.094 +scheduler: + job_arrival_time: 900 + mtbf: 11 + trace_quanta: 15 + min_wall_time: 60 + max_wall_time: 172800 + ui_update_freq: 900 + max_nodes_per_job: 1024 + job_end_probs: + COMPLETED: 0.63 + FAILED: 0.13 + CANCELLED: 0.12 + TIMEOUT: 0.11 + NODE_FAIL: 0.01 diff --git a/config/lumi/lumi-g/power.json b/config/lumi/lumi-g/power.json deleted file mode 100644 index d6ec29e..0000000 --- a/config/lumi/lumi-g/power.json +++ /dev/null @@ -1,18 +0,0 @@ -{ - "POWER_GPU_IDLE": 88, - "POWER_GPU_MAX": 560, - "POWER_CPU_IDLE": 90, - "POWER_CPU_MAX": 280, - "POWER_MEM": 74.26, - "POWER_NIC": 20, - "POWER_NVME": 30, - "POWER_SWITCH": 250, - "POWER_CDU": 8473.47, - "POWER_UPDATE_FREQ": 15, - "RECTIFIER_PEAK_THRESHOLD": 13670, - "SIVOC_LOSS_CONSTANT": 13, - "SIVOC_EFFICIENCY": 0.98, - "RECTIFIER_LOSS_CONSTANT": 17, - "RECTIFIER_EFFICIENCY": 0.96, - "POWER_COST": 0.094 -} diff --git a/config/lumi/lumi-g/scheduler.json b/config/lumi/lumi-g/scheduler.json deleted file mode 100644 index ed21980..0000000 --- a/config/lumi/lumi-g/scheduler.json +++ /dev/null @@ -1,17 +0,0 @@ -{ - "SEED": 42, - "JOB_ARRIVAL_TIME": 900, - "MTBF": 11, - "TRACE_QUANTA": 15, - "MIN_WALL_TIME": 60, - "MAX_WALL_TIME": 172800, - "UI_UPDATE_FREQ": 900, - "MAX_NODES_PER_JOB": 1024, - "JOB_END_PROBS": { - "COMPLETED": 0.63, - "FAILED": 0.13, - "CANCELLED": 0.12, - "TIMEOUT": 0.11, - "NODE_FAIL": 0.01 - } -} diff --git a/config/lumi/lumi-g/system.json b/config/lumi/lumi-g/system.json deleted file mode 100644 index 9a36fcb..0000000 --- a/config/lumi/lumi-g/system.json +++ /dev/null @@ -1,20 +0,0 @@ -{ - "NUM_CDUS": 10, - "RACKS_PER_CDU": 3, - "NODES_PER_RACK": 128, - "RECTIFIERS_PER_RACK": 32, - "CHASSIS_PER_RACK": 8, - "NODES_PER_BLADE": 2, - "SWITCHES_PER_CHASSIS": 4, - "NICS_PER_NODE": 4, - "RECTIFIERS_PER_CHASSIS": 4, - "NODES_PER_RECTIFIER": 4, - "MISSING_RACKS": [], - "DOWN_NODES": [], - "CPUS_PER_NODE": 1, - "GPUS_PER_NODE": 4, - "CPU_PEAK_FLOPS": 2048E9, - "GPU_PEAK_FLOPS": 52E12, - "CPU_FP_RATIO": 0.667, - "GPU_FP_RATIO": 0.667 -} \ No newline at end of file diff --git a/config/marconi100.yaml b/config/marconi100.yaml new file mode 100644 index 0000000..797153e --- /dev/null +++ b/config/marconi100.yaml @@ -0,0 +1,121 @@ +system: + num_cdus: 49 + racks_per_cdu: 1 + nodes_per_rack: 20 + chassis_per_rack: 1 + nodes_per_blade: 1 + switches_per_chassis: 5 + nics_per_node: 2 + rectifiers_per_chassis: 5 + nodes_per_rectifier: 4 + missing_racks: [] + down_nodes: [] + cpus_per_node: 2 + gpus_per_node: 4 + cpu_peak_flops: 396800000000.0 + gpu_peak_flops: 7800000000000.0 + cpu_fp_ratio: 0.69 + gpu_fp_ratio: 0.69 +power: + power_gpu_idle: 75 + power_gpu_max: 300 + power_cpu_idle: 47.25 + power_cpu_max: 252 + power_mem: 74.26 + power_nic: 21 + power_nvme: 45 + power_switch: 250 + power_cdu: 0 + power_update_freq: 20 + rectifier_peak_threshold: 13670 + sivoc_loss_constant: 0 + sivoc_efficiency: 1 + rectifier_loss_constant: 0 + rectifier_efficiency: 1 + power_cost: 0.094 +scheduler: + job_arrival_time: 20 + mtbf: 11 + trace_quanta: 20 + min_wall_time: 3600 + max_wall_time: 43200 + ui_update_freq: 3600 + max_nodes_per_job: 3000 + job_end_probs: + COMPLETED: 0.63 + FAILED: 0.13 + CANCELLED: 0.12 + TIMEOUT: 0.11 + NODE_FAIL: 0.01 +cooling: + cooling_efficiency: 0.945 + wet_bulb_temp: 290.0 + zip_code: '30170' + country_code: IT + fmu_path: "models/POWER9CSM/fmus/marconi100.fmu" + fmu_column_mapping: + T_sec_r_C: "Rack Return Temperature (°C)" + T_sec_s_C: "Rack Supply Temperature (°C)" + p_sec_r_psig: "Rack Supply Pressure (psig)" + p_sec_s_psig: "Rack Return Pressure (psig)" + V_flow_sec_GPM: "Rack Flowrate (gpm)" + T_prim_r_C: "Facility Return Temperature (°C)" + T_prim_s_C: "Facility Supply Temperature (°C)" + p_prim_s_psig: "Facility Supply Pressure (psig)" + p_prim_r_psig: "Facility Return Pressure (psig)" + V_flow_prim_GPM: "Facility Flowrate (gpm)" + W_flow_CDUP_kW: "Work Done By CDUP (kW)" + temperature_keys: + - simulator_1_datacenter_1_computeBlock_1_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_2_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_3_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_4_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_5_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_6_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_7_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_8_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_9_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_10_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_11_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_12_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_13_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_14_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_15_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_16_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_17_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_18_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_19_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_20_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_21_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_22_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_23_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_24_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_25_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_26_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_27_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_28_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_29_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_30_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_31_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_32_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_33_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_34_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_35_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_36_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_37_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_38_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_39_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_40_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_41_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_42_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_43_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_44_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_45_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_46_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_47_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_48_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_49_cabinet_1_sources_T_Air + - simulator_1_centralEnergyPlant_1_coolingTowerLoop_1_sources_T_ext + w_htwps_key: "simulator[1].centralEnergyPlant[1].hotWaterLoop[1].summary.W_flow_HTWP_kW" + w_ctwps_key: "simulator[1].centralEnergyPlant[1].coolingTowerLoop[1].summary.W_flow_CTWP_kW" + w_cts_key: "simulator[1].centralEnergyPlant[1].coolingTowerLoop[1].summary.W_flow_CT_kW" diff --git a/config/marconi100/cooling.json b/config/marconi100/cooling.json deleted file mode 100644 index cda9d16..0000000 --- a/config/marconi100/cooling.json +++ /dev/null @@ -1,76 +0,0 @@ -{ - "COOLING_EFFICIENCY": 0.945, - "WET_BULB_TEMP": 290.0, - "ZIP_CODE": 30170, - "COUNTRY_CODE": "IT", - "FMU_PATH": "models/POWER9CSM/fmus/marconi100.fmu", - "FMU_COLUMN_MAPPING": { - "T_sec_r_C": "Rack Return Temperature (\u00b0C)", - "T_sec_s_C": "Rack Supply Temperature (\u00b0C)", - "p_sec_r_psig": "Rack Supply Pressure (psig)", - "p_sec_s_psig": "Rack Return Pressure (psig)", - "V_flow_sec_GPM": "Rack Flowrate (gpm)", - "T_prim_r_C": "Facility Return Temperature (\u00b0C)", - "T_prim_s_C": "Facility Supply Temperature (\u00b0C)", - "p_prim_s_psig": "Facility Supply Pressure (psig)", - "p_prim_r_psig": "Facility Return Pressure (psig)", - "V_flow_prim_GPM": "Facility Flowrate (gpm)", - "W_flow_CDUP_kW": "Work Done By CDUP (kW)" - }, - "TEMPERATURE_KEYS": [ - "simulator_1_datacenter_1_computeBlock_1_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_2_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_3_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_4_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_5_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_6_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_7_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_8_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_9_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_10_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_11_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_12_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_13_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_14_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_15_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_16_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_17_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_18_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_19_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_20_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_21_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_22_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_23_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_24_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_25_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_26_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_27_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_28_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_29_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_30_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_31_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_32_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_33_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_34_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_35_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_36_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_37_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_38_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_39_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_40_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_41_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_42_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_43_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_44_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_45_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_46_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_47_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_48_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_49_cabinet_1_sources_T_Air", - "simulator_1_centralEnergyPlant_1_coolingTowerLoop_1_sources_T_ext" - ], - "W_HTWPs_KEY": "simulator[1].centralEnergyPlant[1].hotWaterLoop[1].summary.W_flow_HTWP_kW", - "W_CTWPs_KEY": "simulator[1].centralEnergyPlant[1].coolingTowerLoop[1].summary.W_flow_CTWP_kW", - "W_CTs_KEY": "simulator[1].centralEnergyPlant[1].coolingTowerLoop[1].summary.W_flow_CT_kW" - -} diff --git a/config/marconi100/power.json b/config/marconi100/power.json deleted file mode 100644 index 9e23022..0000000 --- a/config/marconi100/power.json +++ /dev/null @@ -1,18 +0,0 @@ -{ - "POWER_GPU_IDLE": 75, - "POWER_GPU_MAX": 300, - "POWER_CPU_IDLE": 47.25, - "POWER_CPU_MAX": 252, - "POWER_MEM": 74.26, - "POWER_NIC": 21, - "POWER_NVME": 45, - "POWER_SWITCH": 250, - "POWER_CDU": 0, - "POWER_UPDATE_FREQ": 20, - "RECTIFIER_PEAK_THRESHOLD": 13670, - "SIVOC_LOSS_CONSTANT": 0, - "SIVOC_EFFICIENCY": 1, - "RECTIFIER_LOSS_CONSTANT": 0, - "RECTIFIER_EFFICIENCY": 1, - "POWER_COST": 0.094 -} diff --git a/config/marconi100/scheduler.json b/config/marconi100/scheduler.json deleted file mode 100644 index 709f080..0000000 --- a/config/marconi100/scheduler.json +++ /dev/null @@ -1,17 +0,0 @@ -{ - "SEED": 42, - "JOB_ARRIVAL_TIME": 20, - "MTBF": 11, - "TRACE_QUANTA": 20, - "MIN_WALL_TIME": 3600, - "MAX_WALL_TIME": 43200, - "UI_UPDATE_FREQ": 3600, - "MAX_NODES_PER_JOB": 3000, - "JOB_END_PROBS": { - "COMPLETED": 0.63, - "FAILED": 0.13, - "CANCELLED": 0.12, - "TIMEOUT": 0.11, - "NODE_FAIL": 0.01 - } -} diff --git a/config/marconi100/system.json b/config/marconi100/system.json deleted file mode 100644 index 816e802..0000000 --- a/config/marconi100/system.json +++ /dev/null @@ -1,20 +0,0 @@ -{ - "NUM_CDUS": 49, - "RACKS_PER_CDU": 1, - "NODES_PER_RACK": 20, - "RECTIFIERS_PER_RACK": 5, - "CHASSIS_PER_RACK": 1, - "NODES_PER_BLADE": 1, - "SWITCHES_PER_CHASSIS": 5, - "NICS_PER_NODE": 2, - "RECTIFIERS_PER_CHASSIS": 5, - "NODES_PER_RECTIFIER": 4, - "MISSING_RACKS": [], - "DOWN_NODES": [], - "CPUS_PER_NODE": 2, - "GPUS_PER_NODE": 4, - "CPU_PEAK_FLOPS": 396.8E9, - "GPU_PEAK_FLOPS": 7.8E12, - "CPU_FP_RATIO": 0.69, - "GPU_FP_RATIO": 0.69 -} diff --git a/config/mit_supercloud/part-cpu.yaml b/config/mit_supercloud/part-cpu.yaml new file mode 100644 index 0000000..111882d --- /dev/null +++ b/config/mit_supercloud/part-cpu.yaml @@ -0,0 +1,51 @@ +system: + num_cdus: 12 + racks_per_cdu: 1 + nodes_per_rack: 40 + chassis_per_rack: 8 + nodes_per_blade: 1 + switches_per_chassis: 4 + nics_per_node: 4 + rectifiers_per_chassis: 4 + nodes_per_rectifier: 4 + missing_racks: [] + down_nodes: [] + cpus_per_node: 2 + cores_per_cpu: 24 + gpus_per_node: 0 + cpu_peak_flops: 2995200000000.0 + gpu_peak_flops: 0 + cpu_fp_ratio: 0.667 + gpu_fp_ratio: 0.667 +power: + power_gpu_idle: 88 + power_gpu_max: 560 + power_cpu_idle: 1 + power_cpu_max: 6 + power_mem: 74.26 + power_nvme: 30 + power_nic: 20 + power_cdu: 8473.47 + power_switch: 250 + power_update_freq: 15 + rectifier_peak_threshold: 13670 + sivoc_loss_constant: 13 + sivoc_efficiency: 0.98 + rectifier_loss_constant: 17 + rectifier_efficiency: 0.96 + power_cost: 0.094 +scheduler: + multitenant: true + job_arrival_time: 900 + mtbf: 11 + trace_quanta: 10 + min_wall_time: 3600 + max_wall_time: 43200 + ui_update_freq: 900 + max_nodes_per_job: 3000 + job_end_probs: + COMPLETED: 0.63 + FAILED: 0.13 + CANCELLED: 0.12 + TIMEOUT: 0.11 + NODE_FAIL: 0.01 diff --git a/config/mit_supercloud/part-cpu/power.json b/config/mit_supercloud/part-cpu/power.json deleted file mode 100644 index 08d02e4..0000000 --- a/config/mit_supercloud/part-cpu/power.json +++ /dev/null @@ -1,18 +0,0 @@ -{ - "POWER_GPU_IDLE": 88, - "POWER_GPU_MAX": 560, - "POWER_CPU_IDLE": 1, - "POWER_CPU_MAX": 6, - "POWER_MEM": 74.26, - "POWER_NVME": 30, - "POWER_NIC": 20, - "POWER_CDU": 8473.47, - "POWER_SWITCH": 250, - "POWER_UPDATE_FREQ": 15, - "RECTIFIER_PEAK_THRESHOLD": 13670, - "SIVOC_LOSS_CONSTANT": 13, - "SIVOC_EFFICIENCY": 0.98, - "RECTIFIER_LOSS_CONSTANT": 17, - "RECTIFIER_EFFICIENCY": 0.96, - "POWER_COST": 0.094 -} diff --git a/config/mit_supercloud/part-cpu/scheduler.json b/config/mit_supercloud/part-cpu/scheduler.json deleted file mode 100644 index 2b9c850..0000000 --- a/config/mit_supercloud/part-cpu/scheduler.json +++ /dev/null @@ -1,19 +0,0 @@ -{ - "multitenant": true, - "SEED": 42, - "JOB_ARRIVAL_TIME": 900, - "MTBF": 11, - "MAX_TIME": 88200, - "TRACE_QUANTA": 10, - "MIN_WALL_TIME": 3600, - "MAX_WALL_TIME": 43200, - "UI_UPDATE_FREQ": 900, - "MAX_NODES_PER_JOB": 3000, - "JOB_END_PROBS": { - "COMPLETED": 0.63, - "FAILED": 0.13, - "CANCELLED": 0.12, - "TIMEOUT": 0.11, - "NODE_FAIL": 0.01 - } -} diff --git a/config/mit_supercloud/part-cpu/system.json b/config/mit_supercloud/part-cpu/system.json deleted file mode 100644 index 548c484..0000000 --- a/config/mit_supercloud/part-cpu/system.json +++ /dev/null @@ -1,21 +0,0 @@ -{ - "NUM_CDUS": 12, - "RACKS_PER_CDU": 1, - "NODES_PER_RACK": 40, - "RECTIFIERS_PER_RACK": 32, - "CHASSIS_PER_RACK": 8, - "NODES_PER_BLADE": 1, - "SWITCHES_PER_CHASSIS": 4, - "NICS_PER_NODE": 4, - "RECTIFIERS_PER_CHASSIS": 4, - "NODES_PER_RECTIFIER": 4, - "MISSING_RACKS": [], - "DOWN_NODES": [], - "CPUS_PER_NODE": 2, - "CORES_PER_CPU": 24, - "GPUS_PER_NODE": 0, - "CPU_PEAK_FLOPS": 2.9952E12, - "GPU_PEAK_FLOPS": 0, - "CPU_FP_RATIO": 0.667, - "GPU_FP_RATIO": 0.667 -} diff --git a/config/mit_supercloud/part-gpu.yaml b/config/mit_supercloud/part-gpu.yaml new file mode 100644 index 0000000..b842831 --- /dev/null +++ b/config/mit_supercloud/part-gpu.yaml @@ -0,0 +1,51 @@ +system: + num_cdus: 1 + racks_per_cdu: 1 + nodes_per_rack: 224 + chassis_per_rack: 8 + nodes_per_blade: 2 + switches_per_chassis: 4 + nics_per_node: 4 + rectifiers_per_chassis: 4 + nodes_per_rectifier: 4 + missing_racks: [] + down_nodes: [] + cpus_per_node: 2 + cores_per_cpu: 20 + gpus_per_node: 2 + cpu_peak_flops: 1248000000000.0 + gpu_peak_flops: 7800000000000.0 + cpu_fp_ratio: 0.667 + gpu_fp_ratio: 0.667 +power: + power_gpu_idle: 75 + power_gpu_max: 300 + power_cpu_idle: 90 + power_cpu_max: 280 + power_mem: 74.26 + power_nvme: 30 + power_nic: 20 + power_cdu: 8473.47 + power_switch: 250 + power_update_freq: 15 + rectifier_peak_threshold: 13670 + sivoc_loss_constant: 13 + sivoc_efficiency: 0.98 + rectifier_loss_constant: 17 + rectifier_efficiency: 0.96 + power_cost: 0.094 +scheduler: + multitenant: true + job_arrival_time: 900 + mtbf: 11 + trace_quanta: 20 + min_wall_time: 3600 + max_wall_time: 43200 + ui_update_freq: 900 + max_nodes_per_job: 192 + job_end_probs: + COMPLETED: 0.63 + FAILED: 0.13 + CANCELLED: 0.12 + TIMEOUT: 0.11 + NODE_FAIL: 0.01 diff --git a/config/mit_supercloud/part-gpu/power.json b/config/mit_supercloud/part-gpu/power.json deleted file mode 100644 index 725b9fe..0000000 --- a/config/mit_supercloud/part-gpu/power.json +++ /dev/null @@ -1,18 +0,0 @@ -{ - "POWER_GPU_IDLE": 75, - "POWER_GPU_MAX": 300, - "POWER_CPU_IDLE": 90, - "POWER_CPU_MAX": 280, - "POWER_MEM": 74.26, - "POWER_NVME": 30, - "POWER_NIC": 20, - "POWER_CDU": 8473.47, - "POWER_SWITCH": 250, - "POWER_UPDATE_FREQ": 15, - "RECTIFIER_PEAK_THRESHOLD": 13670, - "SIVOC_LOSS_CONSTANT": 13, - "SIVOC_EFFICIENCY": 0.98, - "RECTIFIER_LOSS_CONSTANT": 17, - "RECTIFIER_EFFICIENCY": 0.96, - "POWER_COST": 0.094 -} diff --git a/config/mit_supercloud/part-gpu/scheduler.json b/config/mit_supercloud/part-gpu/scheduler.json deleted file mode 100644 index ee96c92..0000000 --- a/config/mit_supercloud/part-gpu/scheduler.json +++ /dev/null @@ -1,19 +0,0 @@ -{ - "multitenant": true, - "SEED": 42, - "JOB_ARRIVAL_TIME": 900, - "MTBF": 11, - "MAX_TIME": 88200, - "TRACE_QUANTA": 20, - "MIN_WALL_TIME": 3600, - "MAX_WALL_TIME": 43200, - "UI_UPDATE_FREQ": 900, - "MAX_NODES_PER_JOB": 192, - "JOB_END_PROBS": { - "COMPLETED": 0.63, - "FAILED": 0.13, - "CANCELLED": 0.12, - "TIMEOUT": 0.11, - "NODE_FAIL": 0.01 - } -} diff --git a/config/mit_supercloud/part-gpu/system.json b/config/mit_supercloud/part-gpu/system.json deleted file mode 100644 index 9d3eb00..0000000 --- a/config/mit_supercloud/part-gpu/system.json +++ /dev/null @@ -1,21 +0,0 @@ -{ - "NUM_CDUS": 1, - "RACKS_PER_CDU": 1, - "NODES_PER_RACK": 224, - "RECTIFIERS_PER_RACK": 32, - "CHASSIS_PER_RACK": 8, - "NODES_PER_BLADE": 2, - "SWITCHES_PER_CHASSIS": 4, - "NICS_PER_NODE": 4, - "RECTIFIERS_PER_CHASSIS": 4, - "NODES_PER_RECTIFIER": 4, - "MISSING_RACKS": [], - "DOWN_NODES": [], - "CPUS_PER_NODE": 2, - "CORES_PER_CPU": 20, - "GPUS_PER_NODE": 2, - "CPU_PEAK_FLOPS": 1.248E12, - "GPU_PEAK_FLOPS": 7.8E12, - "CPU_FP_RATIO": 0.667, - "GPU_FP_RATIO": 0.667 -} diff --git a/config/setonix/part-cpu.yaml b/config/setonix/part-cpu.yaml new file mode 100644 index 0000000..3d7ce90 --- /dev/null +++ b/config/setonix/part-cpu.yaml @@ -0,0 +1,242 @@ +system: + num_cdus: 1 + racks_per_cdu: 7 + nodes_per_rack: 256 + chassis_per_rack: 8 + nodes_per_blade: 4 + switches_per_chassis: 4 + nics_per_node: 4 + rectifiers_per_chassis: 4 + nodes_per_rectifier: 4 + missing_racks: [] + down_nodes: + - 1600 + - 1601 + - 1602 + - 1603 + - 1604 + - 1605 + - 1606 + - 1607 + - 1608 + - 1609 + - 1610 + - 1611 + - 1612 + - 1613 + - 1614 + - 1615 + - 1616 + - 1617 + - 1618 + - 1619 + - 1620 + - 1621 + - 1622 + - 1623 + - 1624 + - 1625 + - 1626 + - 1627 + - 1628 + - 1629 + - 1630 + - 1631 + - 1632 + - 1633 + - 1634 + - 1635 + - 1636 + - 1637 + - 1638 + - 1639 + - 1640 + - 1641 + - 1642 + - 1643 + - 1644 + - 1645 + - 1646 + - 1647 + - 1648 + - 1649 + - 1650 + - 1651 + - 1652 + - 1653 + - 1654 + - 1655 + - 1656 + - 1657 + - 1658 + - 1659 + - 1660 + - 1661 + - 1662 + - 1663 + - 1664 + - 1665 + - 1666 + - 1667 + - 1668 + - 1669 + - 1670 + - 1671 + - 1672 + - 1673 + - 1674 + - 1675 + - 1676 + - 1677 + - 1678 + - 1679 + - 1680 + - 1681 + - 1682 + - 1683 + - 1684 + - 1685 + - 1686 + - 1687 + - 1688 + - 1689 + - 1690 + - 1691 + - 1692 + - 1693 + - 1694 + - 1695 + - 1696 + - 1697 + - 1698 + - 1699 + - 1700 + - 1701 + - 1702 + - 1703 + - 1704 + - 1705 + - 1706 + - 1707 + - 1708 + - 1709 + - 1710 + - 1711 + - 1712 + - 1713 + - 1714 + - 1715 + - 1716 + - 1717 + - 1718 + - 1719 + - 1720 + - 1721 + - 1722 + - 1723 + - 1724 + - 1725 + - 1726 + - 1727 + - 1728 + - 1729 + - 1730 + - 1731 + - 1732 + - 1733 + - 1734 + - 1735 + - 1736 + - 1737 + - 1738 + - 1739 + - 1740 + - 1741 + - 1742 + - 1743 + - 1744 + - 1745 + - 1746 + - 1747 + - 1748 + - 1749 + - 1750 + - 1751 + - 1752 + - 1753 + - 1754 + - 1755 + - 1756 + - 1757 + - 1758 + - 1759 + - 1760 + - 1761 + - 1762 + - 1763 + - 1764 + - 1765 + - 1766 + - 1767 + - 1768 + - 1769 + - 1770 + - 1771 + - 1772 + - 1773 + - 1774 + - 1775 + - 1776 + - 1777 + - 1778 + - 1779 + - 1780 + - 1781 + - 1782 + - 1783 + - 1784 + - 1785 + - 1786 + - 1787 + - 1788 + - 1789 + - 1790 + - 1791 + cores_per_cpu: 64 + cpus_per_node: 2 + gpus_per_node: 0 + cpu_peak_flops: 2509440000000.0 + gpu_peak_flops: 0 + cpu_fp_ratio: 0.667 + gpu_fp_ratio: 0.667 +power: + power_gpu_idle: 88 + power_gpu_max: 560 + power_cpu_idle: 90 + power_cpu_max: 280 + power_mem: 74.26 + power_nvme: 30 + power_nic: 20 + power_cdu: 8473.47 + power_switch: 250 + power_update_freq: 15 + rectifier_peak_threshold: 13670 + sivoc_loss_constant: 13 + sivoc_efficiency: 0.98 + rectifier_loss_constant: 17 + rectifier_efficiency: 0.96 + power_cost: 0.094 +scheduler: + job_arrival_time: 900 + mtbf: 11 + trace_quanta: 20 + min_wall_time: 3600 + max_wall_time: 43200 + ui_update_freq: 900 + max_nodes_per_job: 3000 + job_end_probs: + COMPLETED: 0.63 + FAILED: 0.13 + CANCELLED: 0.12 + TIMEOUT: 0.11 + NODE_FAIL: 0.01 diff --git a/config/setonix/part-cpu/power.json b/config/setonix/part-cpu/power.json deleted file mode 100644 index 5128c4c..0000000 --- a/config/setonix/part-cpu/power.json +++ /dev/null @@ -1,18 +0,0 @@ -{ - "POWER_GPU_IDLE": 88, - "POWER_GPU_MAX": 560, - "POWER_CPU_IDLE": 90, - "POWER_CPU_MAX": 280, - "POWER_MEM": 74.26, - "POWER_NVME": 30, - "POWER_NIC": 20, - "POWER_CDU": 8473.47, - "POWER_SWITCH": 250, - "POWER_UPDATE_FREQ": 15, - "RECTIFIER_PEAK_THRESHOLD": 13670, - "SIVOC_LOSS_CONSTANT": 13, - "SIVOC_EFFICIENCY": 0.98, - "RECTIFIER_LOSS_CONSTANT": 17, - "RECTIFIER_EFFICIENCY": 0.96, - "POWER_COST": 0.094 -} diff --git a/config/setonix/part-cpu/scheduler.json b/config/setonix/part-cpu/scheduler.json deleted file mode 100644 index 0ea905d..0000000 --- a/config/setonix/part-cpu/scheduler.json +++ /dev/null @@ -1,18 +0,0 @@ -{ - "SEED": 42, - "JOB_ARRIVAL_TIME": 900, - "MTBF": 11, - "MAX_TIME": 88200, - "TRACE_QUANTA": 20, - "MIN_WALL_TIME": 3600, - "MAX_WALL_TIME": 43200, - "UI_UPDATE_FREQ": 900, - "MAX_NODES_PER_JOB": 3000, - "JOB_END_PROBS": { - "COMPLETED": 0.63, - "FAILED": 0.13, - "CANCELLED": 0.12, - "TIMEOUT": 0.11, - "NODE_FAIL": 0.01 - } -} diff --git a/config/setonix/part-cpu/system.json b/config/setonix/part-cpu/system.json deleted file mode 100644 index 94442c1..0000000 --- a/config/setonix/part-cpu/system.json +++ /dev/null @@ -1,21 +0,0 @@ -{ - "NUM_CDUS": 1, - "RACKS_PER_CDU": 7, - "NODES_PER_RACK": 256, - "RECTIFIERS_PER_RACK": 32, - "CHASSIS_PER_RACK": 8, - "NODES_PER_BLADE": 4, - "SWITCHES_PER_CHASSIS": 4, - "NICS_PER_NODE": 4, - "RECTIFIERS_PER_CHASSIS": 4, - "NODES_PER_RECTIFIER": 4, - "MISSING_RACKS": [], - "DOWN_NODES": [1600, 1601, 1602, 1603, 1604, 1605, 1606, 1607, 1608, 1609, 1610, 1611, 1612, 1613, 1614, 1615, 1616, 1617, 1618, 1619, 1620, 1621, 1622, 1623, 1624, 1625, 1626, 1627, 1628, 1629, 1630, 1631, 1632, 1633, 1634, 1635, 1636, 1637, 1638, 1639, 1640, 1641, 1642, 1643, 1644, 1645, 1646, 1647, 1648, 1649, 1650, 1651, 1652, 1653, 1654, 1655, 1656, 1657, 1658, 1659, 1660, 1661, 1662, 1663, 1664, 1665, 1666, 1667, 1668, 1669, 1670, 1671, 1672, 1673, 1674, 1675, 1676, 1677, 1678, 1679, 1680, 1681, 1682, 1683, 1684, 1685, 1686, 1687, 1688, 1689, 1690, 1691, 1692, 1693, 1694, 1695, 1696, 1697, 1698, 1699, 1700, 1701, 1702, 1703, 1704, 1705, 1706, 1707, 1708, 1709, 1710, 1711, 1712, 1713, 1714, 1715, 1716, 1717, 1718, 1719, 1720, 1721, 1722, 1723, 1724, 1725, 1726, 1727, 1728, 1729, 1730, 1731, 1732, 1733, 1734, 1735, 1736, 1737, 1738, 1739, 1740, 1741, 1742, 1743, 1744, 1745, 1746, 1747, 1748, 1749, 1750, 1751, 1752, 1753, 1754, 1755, 1756, 1757, 1758, 1759, 1760, 1761, 1762, 1763, 1764, 1765, 1766, 1767, 1768, 1769, 1770, 1771, 1772, 1773, 1774, 1775, 1776, 1777, 1778, 1779, 1780, 1781, 1782, 1783, 1784, 1785, 1786, 1787, 1788, 1789, 1790, 1791], - "CORES_PER_CPU": 64, - "CPUS_PER_NODE": 2, - "GPUS_PER_NODE": 0, - "CPU_PEAK_FLOPS": 2.50944E12, - "GPU_PEAK_FLOPS": 0, - "CPU_FP_RATIO": 0.667, - "GPU_FP_RATIO": 0.667 -} diff --git a/config/setonix/part-gpu.yaml b/config/setonix/part-gpu.yaml new file mode 100644 index 0000000..807a1ce --- /dev/null +++ b/config/setonix/part-gpu.yaml @@ -0,0 +1,114 @@ +system: + num_cdus: 1 + racks_per_cdu: 2 + nodes_per_rack: 128 + chassis_per_rack: 8 + nodes_per_blade: 2 + switches_per_chassis: 4 + nics_per_node: 4 + rectifiers_per_chassis: 4 + nodes_per_rectifier: 4 + missing_racks: [] + down_nodes: + - 192 + - 193 + - 194 + - 195 + - 196 + - 197 + - 198 + - 199 + - 200 + - 201 + - 202 + - 203 + - 204 + - 205 + - 206 + - 207 + - 208 + - 209 + - 210 + - 211 + - 212 + - 213 + - 214 + - 215 + - 216 + - 217 + - 218 + - 219 + - 220 + - 221 + - 222 + - 223 + - 224 + - 225 + - 226 + - 227 + - 228 + - 229 + - 230 + - 231 + - 232 + - 233 + - 234 + - 235 + - 236 + - 237 + - 238 + - 239 + - 240 + - 241 + - 242 + - 243 + - 244 + - 245 + - 246 + - 247 + - 248 + - 249 + - 250 + - 251 + - 252 + - 253 + - 254 + - 255 + cores_per_cpu: 64 + cpus_per_node: 1 + gpus_per_node: 4 + cpu_peak_flops: 2048000000000.0 + gpu_peak_flops: 52000000000000.0 + cpu_fp_ratio: 0.667 + gpu_fp_ratio: 0.667 +power: + power_gpu_idle: 88 + power_gpu_max: 560 + power_cpu_idle: 90 + power_cpu_max: 280 + power_mem: 74.26 + power_nvme: 30 + power_nic: 20 + power_cdu: 8473.47 + power_switch: 250 + power_update_freq: 15 + rectifier_peak_threshold: 13670 + sivoc_loss_constant: 13 + sivoc_efficiency: 0.98 + rectifier_loss_constant: 17 + rectifier_efficiency: 0.96 + power_cost: 0.094 +scheduler: + job_arrival_time: 900 + mtbf: 11 + trace_quanta: 20 + min_wall_time: 3600 + max_wall_time: 43200 + ui_update_freq: 900 + max_nodes_per_job: 192 + job_end_probs: + COMPLETED: 0.63 + FAILED: 0.13 + CANCELLED: 0.12 + TIMEOUT: 0.11 + NODE_FAIL: 0.01 diff --git a/config/setonix/part-gpu/power.json b/config/setonix/part-gpu/power.json deleted file mode 100644 index 5128c4c..0000000 --- a/config/setonix/part-gpu/power.json +++ /dev/null @@ -1,18 +0,0 @@ -{ - "POWER_GPU_IDLE": 88, - "POWER_GPU_MAX": 560, - "POWER_CPU_IDLE": 90, - "POWER_CPU_MAX": 280, - "POWER_MEM": 74.26, - "POWER_NVME": 30, - "POWER_NIC": 20, - "POWER_CDU": 8473.47, - "POWER_SWITCH": 250, - "POWER_UPDATE_FREQ": 15, - "RECTIFIER_PEAK_THRESHOLD": 13670, - "SIVOC_LOSS_CONSTANT": 13, - "SIVOC_EFFICIENCY": 0.98, - "RECTIFIER_LOSS_CONSTANT": 17, - "RECTIFIER_EFFICIENCY": 0.96, - "POWER_COST": 0.094 -} diff --git a/config/setonix/part-gpu/scheduler.json b/config/setonix/part-gpu/scheduler.json deleted file mode 100644 index 937b71d..0000000 --- a/config/setonix/part-gpu/scheduler.json +++ /dev/null @@ -1,18 +0,0 @@ -{ - "SEED": 42, - "JOB_ARRIVAL_TIME": 900, - "MTBF": 11, - "MAX_TIME": 88200, - "TRACE_QUANTA": 20, - "MIN_WALL_TIME": 3600, - "MAX_WALL_TIME": 43200, - "UI_UPDATE_FREQ": 900, - "MAX_NODES_PER_JOB": 192, - "JOB_END_PROBS": { - "COMPLETED": 0.63, - "FAILED": 0.13, - "CANCELLED": 0.12, - "TIMEOUT": 0.11, - "NODE_FAIL": 0.01 - } -} diff --git a/config/setonix/part-gpu/system.json b/config/setonix/part-gpu/system.json deleted file mode 100644 index f524cc2..0000000 --- a/config/setonix/part-gpu/system.json +++ /dev/null @@ -1,21 +0,0 @@ -{ - "NUM_CDUS": 1, - "RACKS_PER_CDU": 2, - "NODES_PER_RACK": 128, - "RECTIFIERS_PER_RACK": 32, - "CHASSIS_PER_RACK": 8, - "NODES_PER_BLADE": 2, - "SWITCHES_PER_CHASSIS": 4, - "NICS_PER_NODE": 4, - "RECTIFIERS_PER_CHASSIS": 4, - "NODES_PER_RECTIFIER": 4, - "MISSING_RACKS": [], - "DOWN_NODES": [192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255], - "CORES_PER_CPU": 64, - "CPUS_PER_NODE": 1, - "GPUS_PER_NODE": 4, - "CPU_PEAK_FLOPS": 2048E9, - "GPU_PEAK_FLOPS": 52E12, - "CPU_FP_RATIO": 0.667, - "GPU_FP_RATIO": 0.667 -} diff --git a/config/summit.yaml b/config/summit.yaml new file mode 100644 index 0000000..8dc6fe3 --- /dev/null +++ b/config/summit.yaml @@ -0,0 +1,329 @@ +system: + num_cdus: 257 + racks_per_cdu: 1 + nodes_per_rack: 18 + chassis_per_rack: 1 + nodes_per_blade: 1 + switches_per_chassis: 5 + nics_per_node: 2 + rectifiers_per_chassis: 5 + nodes_per_rectifier: 4 + missing_racks: [] + down_nodes: [] + cpus_per_node: 2 + gpus_per_node: 6 + cpu_peak_flops: 436200000000.0 + gpu_peak_flops: 7800000000000.0 + cpu_fp_ratio: 0.674 + gpu_fp_ratio: 0.674 +power: + power_gpu_idle: 75 + power_gpu_max: 300 + power_cpu_idle: 47.25 + power_cpu_max: 300 + power_mem: 74.26 + power_nic: 21 + power_nvme: 45 + power_switch: 0 + power_cdu: 0 + power_update_freq: 10 + rectifier_peak_threshold: 0 + sivoc_loss_constant: 13 + sivoc_efficiency: 0.98 + rectifier_loss_constant: 17 + rectifier_efficiency: 0.96 + power_cost: 0.094 +scheduler: + job_arrival_time: 60 + mtbf: 11 + trace_quanta: 10 + min_wall_time: 3600 + max_wall_time: 43200 + ui_update_freq: 3600 + max_nodes_per_job: 3000 + job_end_probs: + COMPLETED: 0.63 + FAILED: 0.13 + CANCELLED: 0.12 + TIMEOUT: 0.11 + NODE_FAIL: 0.01 +cooling: + cooling_efficiency: 0.945 + wet_bulb_temp: 290.0 + zip_code: '37831' + country_code: US + fmu_path: "models/POWER9CSM/fmus/summit.fmu" + fmu_column_mapping: + T_sec_r_C: "Rack Return Temperature (°C)" + T_sec_s_C: "Rack Supply Temperature (°C)" + p_sec_r_psig: "Rack Supply Pressure (psig)" + p_sec_s_psig: "Rack Return Pressure (psig)" + V_flow_sec_GPM: "Rack Flowrate (gpm)" + T_prim_r_C: "Facility Return Temperature (°C)" + T_prim_s_C: "Facility Supply Temperature (°C)" + p_prim_s_psig: "Facility Supply Pressure (psig)" + p_prim_r_psig: "Facility Return Pressure (psig)" + V_flow_prim_GPM: "Facility Flowrate (gpm)" + W_flow_CDUP_kW: "Work Done By CDUP (kW)" + temperature_keys: + - simulator_1_datacenter_1_computeBlock_1_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_2_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_3_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_4_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_5_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_6_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_7_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_8_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_9_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_10_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_11_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_12_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_13_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_14_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_15_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_16_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_17_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_18_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_19_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_20_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_21_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_22_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_23_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_24_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_25_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_26_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_27_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_28_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_29_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_30_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_31_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_32_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_33_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_34_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_35_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_36_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_37_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_38_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_39_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_40_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_41_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_42_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_43_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_44_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_45_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_46_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_47_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_48_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_49_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_50_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_51_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_52_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_53_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_54_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_55_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_56_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_57_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_58_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_59_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_60_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_61_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_62_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_63_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_64_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_65_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_66_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_67_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_68_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_69_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_70_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_71_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_72_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_73_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_74_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_75_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_76_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_77_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_78_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_79_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_80_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_81_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_82_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_83_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_84_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_85_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_86_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_87_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_88_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_89_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_90_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_91_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_92_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_93_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_94_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_95_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_96_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_97_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_98_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_99_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_100_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_101_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_102_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_103_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_104_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_105_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_106_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_107_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_108_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_109_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_110_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_111_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_112_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_113_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_114_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_115_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_116_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_117_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_118_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_119_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_120_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_121_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_122_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_123_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_124_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_125_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_126_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_127_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_128_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_129_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_130_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_131_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_132_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_133_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_134_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_135_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_136_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_137_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_138_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_139_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_140_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_141_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_142_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_143_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_144_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_145_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_146_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_147_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_148_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_149_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_150_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_151_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_152_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_153_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_154_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_155_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_156_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_157_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_158_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_159_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_160_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_161_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_162_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_163_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_164_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_165_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_166_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_167_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_168_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_169_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_170_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_171_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_172_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_173_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_174_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_175_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_176_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_177_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_178_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_179_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_180_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_181_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_182_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_183_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_184_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_185_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_186_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_187_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_188_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_189_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_190_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_191_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_192_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_193_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_194_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_195_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_196_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_197_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_198_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_199_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_200_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_201_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_202_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_203_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_204_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_205_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_206_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_207_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_208_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_209_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_210_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_211_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_212_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_213_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_214_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_215_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_216_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_217_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_218_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_219_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_220_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_221_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_222_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_223_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_224_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_225_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_226_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_227_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_228_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_229_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_230_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_231_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_232_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_233_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_234_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_235_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_236_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_237_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_238_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_239_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_240_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_241_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_242_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_243_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_244_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_245_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_246_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_247_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_248_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_249_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_250_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_251_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_252_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_253_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_254_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_255_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_256_cabinet_1_sources_T_Air + - simulator_1_datacenter_1_computeBlock_257_cabinet_1_sources_T_Air + - simulator_1_centralEnergyPlant_1_coolingTowerLoop_1_sources_T_ext + w_htwps_key: "simulator[1].centralEnergyPlant[1].hotWaterLoop[1].summary.W_flow_HTWP_kW" + w_ctwps_key: "simulator[1].centralEnergyPlant[1].coolingTowerLoop[1].summary.W_flow_CTWP_kW" + w_cts_key: "simulator[1].centralEnergyPlant[1].coolingTowerLoop[1].summary.W_flow_CT_kW" diff --git a/config/summit/cooling.json b/config/summit/cooling.json deleted file mode 100644 index 3d1d2e4..0000000 --- a/config/summit/cooling.json +++ /dev/null @@ -1,284 +0,0 @@ -{ - "COOLING_EFFICIENCY": 0.945, - "WET_BULB_TEMP": 290.0, - "ZIP_CODE": 37831, - "COUNTRY_CODE": "US", - "FMU_PATH": "models/POWER9CSM/fmus/summit.fmu", - "FMU_COLUMN_MAPPING": { - "T_sec_r_C": "Rack Return Temperature (\u00b0C)", - "T_sec_s_C": "Rack Supply Temperature (\u00b0C)", - "p_sec_r_psig": "Rack Supply Pressure (psig)", - "p_sec_s_psig": "Rack Return Pressure (psig)", - "V_flow_sec_GPM": "Rack Flowrate (gpm)", - "T_prim_r_C": "Facility Return Temperature (\u00b0C)", - "T_prim_s_C": "Facility Supply Temperature (\u00b0C)", - "p_prim_s_psig": "Facility Supply Pressure (psig)", - "p_prim_r_psig": "Facility Return Pressure (psig)", - "V_flow_prim_GPM": "Facility Flowrate (gpm)", - "W_flow_CDUP_kW": "Work Done By CDUP (kW)" - }, - "TEMPERATURE_KEYS": [ - "simulator_1_datacenter_1_computeBlock_1_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_2_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_3_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_4_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_5_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_6_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_7_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_8_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_9_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_10_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_11_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_12_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_13_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_14_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_15_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_16_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_17_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_18_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_19_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_20_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_21_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_22_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_23_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_24_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_25_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_26_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_27_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_28_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_29_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_30_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_31_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_32_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_33_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_34_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_35_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_36_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_37_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_38_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_39_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_40_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_41_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_42_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_43_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_44_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_45_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_46_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_47_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_48_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_49_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_50_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_51_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_52_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_53_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_54_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_55_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_56_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_57_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_58_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_59_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_60_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_61_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_62_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_63_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_64_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_65_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_66_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_67_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_68_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_69_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_70_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_71_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_72_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_73_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_74_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_75_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_76_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_77_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_78_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_79_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_80_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_81_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_82_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_83_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_84_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_85_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_86_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_87_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_88_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_89_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_90_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_91_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_92_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_93_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_94_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_95_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_96_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_97_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_98_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_99_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_100_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_101_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_102_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_103_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_104_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_105_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_106_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_107_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_108_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_109_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_110_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_111_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_112_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_113_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_114_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_115_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_116_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_117_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_118_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_119_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_120_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_121_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_122_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_123_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_124_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_125_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_126_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_127_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_128_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_129_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_130_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_131_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_132_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_133_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_134_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_135_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_136_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_137_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_138_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_139_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_140_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_141_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_142_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_143_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_144_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_145_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_146_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_147_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_148_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_149_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_150_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_151_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_152_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_153_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_154_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_155_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_156_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_157_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_158_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_159_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_160_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_161_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_162_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_163_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_164_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_165_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_166_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_167_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_168_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_169_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_170_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_171_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_172_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_173_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_174_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_175_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_176_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_177_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_178_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_179_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_180_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_181_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_182_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_183_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_184_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_185_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_186_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_187_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_188_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_189_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_190_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_191_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_192_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_193_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_194_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_195_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_196_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_197_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_198_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_199_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_200_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_201_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_202_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_203_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_204_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_205_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_206_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_207_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_208_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_209_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_210_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_211_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_212_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_213_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_214_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_215_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_216_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_217_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_218_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_219_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_220_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_221_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_222_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_223_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_224_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_225_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_226_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_227_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_228_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_229_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_230_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_231_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_232_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_233_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_234_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_235_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_236_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_237_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_238_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_239_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_240_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_241_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_242_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_243_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_244_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_245_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_246_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_247_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_248_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_249_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_250_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_251_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_252_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_253_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_254_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_255_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_256_cabinet_1_sources_T_Air", - "simulator_1_datacenter_1_computeBlock_257_cabinet_1_sources_T_Air", - "simulator_1_centralEnergyPlant_1_coolingTowerLoop_1_sources_T_ext" - ], - "W_HTWPs_KEY": "simulator[1].centralEnergyPlant[1].hotWaterLoop[1].summary.W_flow_HTWP_kW", - "W_CTWPs_KEY": "simulator[1].centralEnergyPlant[1].coolingTowerLoop[1].summary.W_flow_CTWP_kW", - "W_CTs_KEY": "simulator[1].centralEnergyPlant[1].coolingTowerLoop[1].summary.W_flow_CT_kW" - -} diff --git a/config/summit/power.json b/config/summit/power.json deleted file mode 100644 index af6fdaa..0000000 --- a/config/summit/power.json +++ /dev/null @@ -1,18 +0,0 @@ -{ - "POWER_GPU_IDLE": 75, - "POWER_GPU_MAX": 300, - "POWER_CPU_IDLE": 47.25, - "POWER_CPU_MAX": 300, - "POWER_MEM": 74.26, - "POWER_NIC": 21, - "POWER_NVME": 45, - "POWER_SWITCH": 0, - "POWER_CDU": 0, - "POWER_UPDATE_FREQ": 10, - "RECTIFIER_PEAK_THRESHOLD": 0, - "SIVOC_LOSS_CONSTANT": 13, - "SIVOC_EFFICIENCY": 0.98, - "RECTIFIER_LOSS_CONSTANT": 17, - "RECTIFIER_EFFICIENCY": 0.96, - "POWER_COST": 0.094 -} diff --git a/config/summit/scheduler.json b/config/summit/scheduler.json deleted file mode 100644 index 263f3ec..0000000 --- a/config/summit/scheduler.json +++ /dev/null @@ -1,17 +0,0 @@ -{ - "SEED": 42, - "JOB_ARRIVAL_TIME": 60, - "MTBF": 11, - "TRACE_QUANTA": 10, - "MIN_WALL_TIME": 3600, - "MAX_WALL_TIME": 43200, - "UI_UPDATE_FREQ": 3600, - "MAX_NODES_PER_JOB": 3000, - "JOB_END_PROBS": { - "COMPLETED": 0.63, - "FAILED": 0.13, - "CANCELLED": 0.12, - "TIMEOUT": 0.11, - "NODE_FAIL": 0.01 - } -} diff --git a/config/summit/system.json b/config/summit/system.json deleted file mode 100644 index 6fcd420..0000000 --- a/config/summit/system.json +++ /dev/null @@ -1,19 +0,0 @@ -{ - "NUM_CDUS": 257, - "RACKS_PER_CDU": 1, - "NODES_PER_RACK": 18, - "CHASSIS_PER_RACK": 1, - "NODES_PER_BLADE": 1, - "SWITCHES_PER_CHASSIS": 5, - "NICS_PER_NODE": 2, - "RECTIFIERS_PER_CHASSIS": 5, - "NODES_PER_RECTIFIER": 4, - "MISSING_RACKS": [], - "DOWN_NODES": [], - "CPUS_PER_NODE": 2, - "GPUS_PER_NODE": 6, - "CPU_PEAK_FLOPS": 436.2E9, - "GPU_PEAK_FLOPS": 7.8E12, - "CPU_FP_RATIO": 0.674, - "GPU_FP_RATIO": 0.674 -} diff --git a/main.py b/main.py index af1e08b..20f8240 100644 --- a/main.py +++ b/main.py @@ -16,7 +16,7 @@ import math # from raps.helpers import check_python_version # -from raps.config import ConfigManager +from raps.config import get_system_config from raps.constants import OUTPUT_PATH, SEED from raps.cooling import ThermoFluidsModel from raps.ui import LayoutManager @@ -54,7 +54,7 @@ def main(): if args.verbose or args.debug: print(args) - config = ConfigManager(system_name=args.system).get_config() + config = get_system_config(args.system).get_legacy() if args.seed: random.seed(SEED) diff --git a/multi-part-sim-mpi.py b/multi-part-sim-mpi.py index bb24a08..fed038e 100644 --- a/multi-part-sim-mpi.py +++ b/multi-part-sim-mpi.py @@ -15,7 +15,7 @@ from raps.power import PowerManager, compute_node_power from raps.flops import FLOPSManager from raps.engine import Engine from raps.ui import LayoutManager -from raps.config import ConfigManager, CONFIG_PATH +from raps.config import get_system_config, CONFIG_PATH from args import args import random import os @@ -42,7 +42,7 @@ def main(): partition_names = comm.bcast(partition_names, root=0) # 3) Load configs for every partition (all ranks do this) - configs = [ConfigManager(system_name=p).get_config() for p in partition_names] + configs = [get_system_config(p).get_legacy() for p in partition_names] args_dicts = [{**vars(args), 'config': cfg} for cfg in configs] # 4) Each rank decides which partition‐indices it owns (round-robin): diff --git a/multi-part-sim.py b/multi-part-sim.py index 024112e..350462e 100644 --- a/multi-part-sim.py +++ b/multi-part-sim.py @@ -16,7 +16,7 @@ from raps.power import PowerManager, compute_node_power from raps.flops import FLOPSManager from raps.engine import Engine from raps.ui import LayoutManager -from raps.config import ConfigManager, CONFIG_PATH +from raps.config import get_system_config, CONFIG_PATH from raps.args import args import random import os @@ -35,7 +35,7 @@ if '*' in args.partitions[0]: args.system = partition_names[0].split(os.sep)[0] -configs = [ConfigManager(system_name=partition).get_config() for partition in partition_names] +configs = [get_system_config(partition).get_legacy() for partition in partition_names] args_dicts = [ {**vars(args), 'config': config, 'partition': partition_names[i]} for i, config in enumerate(configs) diff --git a/pyproject.toml b/pyproject.toml index 33b73ca..d46a621 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -28,5 +28,7 @@ dependencies = [ "pytest-order", "pytest-xdist", "pyyaml>=6.0.2", + "pydantic>=2.11.7", + "pydantic-settings>=2.10.1", "pre-commit" ] diff --git a/pytest.ini b/pytest.ini index f0e2827..08a528c 100644 --- a/pytest.ini +++ b/pytest.ini @@ -2,11 +2,11 @@ testpaths = tests #python_paths = . markers = - long: mark a test as long (skipped if not run iwth --runlong) + long: mark a test as long (skipped if not run with --runlong) - main: raps basic main. - telemetry: raps telemetry analysis - workload: raps workload generation + main: raps basic main. + telemetry: raps telemetry analysis + workload: raps workload generation system: mark a test as system (integration) test unit: mark a test as a unit test diff --git a/raps/config.py b/raps/config.py index c616a8b..4f9f709 100644 --- a/raps/config.py +++ b/raps/config.py @@ -1,82 +1,223 @@ -import json -import os -from typing import Dict, Any +import os, functools +from typing import Any, Literal from pathlib import Path +import yaml +from pydantic import BaseModel, computed_field, model_validator -ROOT_DIR = os.path.dirname(os.path.split(__file__)[0]) -CONFIG_PATH = Path(os.environ.get("RAPS_CONFIG", ROOT_DIR + '/config')).resolve() - - -class ConfigManager: - def __init__(self, system_name: str): - self.config: Dict[str, Any] = {} - self.load_system_config(system_name) - self.system_name = system_name - self.derive_values() - - def load_system_config(self, system_name: str) -> None: - base_path = CONFIG_PATH / system_name - if not os.path.isdir(base_path): - raise FileNotFoundError(f"\"{system_name}\" not found in {CONFIG_PATH}.", - f"Valid systems are:{os.listdir(CONFIG_PATH)}") - config_files = ['system.json', 'power.json', 'scheduler.json'] - optional_files = ['cooling.json', 'uq.json', 'network.json'] - - for config_file in config_files + optional_files: - file_path = base_path / config_file - if config_file in optional_files and not file_path.exists(): - continue # Skip loading if the file is optional and doesn't exist - if not file_path.exists(): - raise FileNotFoundError(f"Mandatory configuration file {config_file} not found.") - config_data = self.load_config_file(file_path) - self.config.update(config_data) - - @staticmethod - def load_config_file(file_path: Path) -> dict[str, Any]: - with open(file_path, 'r') as file: - return json.load(file) - - def derive_values(self) -> None: - # Derive SC_SHAPE and TOTAL_NODES - num_cdus = self.config.get('NUM_CDUS', 0) - racks_per_cdu = self.config.get('RACKS_PER_CDU', 0) - nodes_per_rack = self.config.get('NODES_PER_RACK', 0) - chassis_per_rack = self.config.get('CHASSIS_PER_RACK', 0) - nodes_per_blade = self.config.get('NODES_PER_BLADE', 0) - down_nodes = self.config.get('DOWN_NODES', 0) - missing_racks = self.config.get('MISSING_RACKS', 0) - - self.config['NUM_RACKS'] = num_cdus * racks_per_cdu - len(missing_racks) - self.config['SC_SHAPE'] = [num_cdus, racks_per_cdu, nodes_per_rack] - self.config['TOTAL_NODES'] = num_cdus * racks_per_cdu * nodes_per_rack - self.config['BLADES_PER_CHASSIS'] = int(nodes_per_rack / chassis_per_rack / nodes_per_blade) - self.config['system_name'] = self.system_name - - # Generate POWER_DF_HEADER +ROOT_DIR = Path(__file__).parent.parent +CONFIG_PATH = Path(os.environ.get("RAPS_CONFIG", ROOT_DIR / 'config')).resolve() + +# Define Pydantic models for the config to handle parsing and validation + +class SystemSystemConfig(BaseModel): + num_cdus: int + racks_per_cdu: int + nodes_per_rack: int + chassis_per_rack: int + nodes_per_blade: int + switches_per_chassis: int + nics_per_node: int + rectifiers_per_chassis: int + nodes_per_rectifier: int + missing_racks: list[int] = [] + down_nodes: list[int] = [] + cpus_per_node: int + gpus_per_node: int + cpu_peak_flops: float + gpu_peak_flops: float + cpu_fp_ratio: float + gpu_fp_ratio: float + threads_per_core: int|None = None + cores_per_cpu: int|None = None + + @model_validator(mode='after') + def _update_down_nodes(self): + for rack in self.missing_racks: + start_node_id = rack * self.nodes_per_rack + end_node_id = start_node_id + self.nodes_per_rack + self.down_nodes.extend(range(start_node_id, end_node_id)) + self.down_nodes = sorted(set(self.down_nodes)) + return self + + @computed_field + @property + def num_racks(self) -> int: + return self.num_cdus * self.racks_per_cdu - len(self.missing_racks) + + @computed_field + @property + def sc_shape(self) -> list[int]: + return [self.num_cdus, self.racks_per_cdu, self.nodes_per_rack] + + @computed_field + @property + def total_nodes(self) -> int: + return self.num_cdus * self.racks_per_cdu * self.nodes_per_rack + + @computed_field + @property + def blades_per_chassis(self) -> int: + return int(self.nodes_per_rack / self.chassis_per_rack / self.nodes_per_blade) + + @computed_field + @property + def power_df_header(self) -> list[str]: power_df_header = ["CDU"] - for i in range(1, racks_per_cdu + 1): + for i in range(1, self.racks_per_cdu + 1): power_df_header.append(f"Rack {i}") power_df_header.append("Sum") - for i in range(1, racks_per_cdu + 1): + for i in range(1, self.racks_per_cdu + 1): power_df_header.append(f"Loss {i}") power_df_header.append("Loss") - self.config['POWER_DF_HEADER'] = power_df_header + return power_df_header + + @computed_field + @property + def available_nodes(self) -> int: + return self.total_nodes - len(self.down_nodes) + +class SystemPowerConfig(BaseModel): + power_gpu_idle: float + power_gpu_max: float + power_cpu_idle: float + power_cpu_max: float + power_mem: float + power_nic: float|None = None + power_nic_idle: float|None = None + power_nic_max: float|None = None + power_nvme: float + power_switch: float + power_cdu: float + power_update_freq: int + rectifier_peak_threshold: float + sivoc_loss_constant: float + sivoc_efficiency: float + rectifier_loss_constant: float + rectifier_efficiency: float + power_cost: float + +class SystemUqConfig(BaseModel): + power_gpu_uncertainty: float + power_cpu_uncertainty: float + power_mem_uncertainty: float + power_nic_uncertainty: float + power_nvme_uncertainty: float + power_cdus_uncertainty: float + power_node_uncertainty: float + power_switch_uncertainty: float + rectifier_power_uncertainty: float + +JobEndStates = Literal["COMPLETED", "FAILED", "CANCELLED", "TIMEOUT", "NODE_FAIL"] + +class SystemSchedulerConfig(BaseModel): + job_arrival_time: int + mtbf: int + trace_quanta: int + min_wall_time: int + max_wall_time: int + ui_update_freq: int + max_nodes_per_job: int + job_end_probs: dict[JobEndStates, float] + multitenant: bool = False + +class SystemCoolingConfig(BaseModel): + cooling_efficiency: float + wet_bulb_temp: float + zip_code: str|None = None + country_code: str|None = None + fmu_path: str + fmu_column_mapping: dict[str, str] + w_htwps_key: str + w_ctwps_key: str + w_cts_key: str + temperature_keys: list[str] + +class SystemNetworkConfig(BaseModel): + topology: Literal["fat-tree", "dragonfly", "torus3d"] + network_max_bw: float + latency: float|None = None + + fattree_k: int|None = None + + dragonfly_d: int|None = None + dragonfly_a: int|None = None + dragonfly_p: int|None = None + + torus_x: int|None = None + torus_y: int|None = None + torus_z: int|None = None + torus_wrap: bool|None = None + torus_link_bw: float|None = None + torus_routing: str|None = None + + hosts_per_router: int|None = None + latency_per_hop: float|None = None + node_coords_csv: str|None = None + +class SystemConfig(BaseModel): + system_name: str + system: SystemSystemConfig + power: SystemPowerConfig + scheduler: SystemSchedulerConfig + uq: SystemUqConfig|None = None + cooling: SystemCoolingConfig|None = None + network: SystemNetworkConfig|None = None + + def get_legacy(self) -> dict[str, Any]: + """ + Return the system config as a flattened, uppercased dict. This is for backwards + compatibility with the rest of RAPS code so we can migrate to the new config format + gradually. The dict also as a "config" key that contains the SystemConfig object itself. + """ + renames = { # fields that need to be renamed to something other than just .upper() + "system_name": "system_name", + "w_htwps_key": "W_HTWPs_KEY", + "w_ctwps_key": "W_CTWPs_KEY", + "w_cts_key": "W_CTs_KEY", + "multitenant": "multitenant", + } + dump = self.model_dump(mode = "json", exclude_none = True) - # Convert MISSING_RACKS into list of DOWN_NODES - for rack in missing_racks: - start_node_id = rack * nodes_per_rack - end_node_id = start_node_id + nodes_per_rack - down_nodes.extend(range(start_node_id, end_node_id)) - self.config['DOWN_NODES'] = down_nodes + config_dict: dict[str, Any] = {} + for k, v in dump.items(): # flatten + if isinstance(v, dict): + config_dict.update(v) + else: + config_dict[k] = v + # rename keys + config_dict = {renames.get(k, k.upper()): v for k, v in config_dict.items()} + config_dict['config'] = self + return config_dict - # Default multitenancy to False, unless explicitly set to True - self.config['multitenant'] = bool(self.config.get("multitenant", False)) - self.config['AVAILABLE_NODES'] = self.config['TOTAL_NODES'] - len(down_nodes) +@functools.cache +def list_systems() -> list[str]: + """ Lists all available systems """ + return sorted([ + str(p.relative_to(CONFIG_PATH)).removesuffix(".yaml") + for p in CONFIG_PATH.rglob("*.yaml") + ]) - def get(self, key: str) -> Any: - return self.config.get(key) - def get_config(self) -> Dict[str, Any]: - # Return the complete config dictionary - return self.config +@functools.cache +def get_system_config(system: str) -> SystemConfig: + """ + Returns the system config as a Pydantic object. + system can either be a path to a custom .yaml file, or the name of one of the pre-configured + systems defined in RAPS_CONFIG. + """ + config_path = Path(system.removesuffix(".yaml") + ".yaml") + if config_path.exists() or config_path.is_absolute(): + system_name = config_path.resolve() + else: # assume it's a pre-configured system + system_name = system.removesuffix(".yaml") + config_path = CONFIG_PATH / config_path + if not config_path.is_file(): + raise FileNotFoundError( + f'"{system}" not found. Known systems are: {list_systems()}' + ) + config = { + "system_name": system_name, + **yaml.safe_load(config_path.read_text()), + } + return SystemConfig.model_validate(config) diff --git a/raps/power.py b/raps/power.py index 188b150..dd0745b 100644 --- a/raps/power.py +++ b/raps/power.py @@ -55,10 +55,10 @@ def compute_node_power(cpu_util, gpu_util, net_util, config): power_gpu = gpu_util * config['POWER_GPU_MAX'] + \ (config['GPUS_PER_NODE'] - gpu_util) * config['POWER_GPU_IDLE'] - try: + if config.get("POWER_NIC_IDLE") != None and config.get("POWER_NIC_MAX") != None: power_nic = config['POWER_NIC_IDLE'] + \ (config['POWER_NIC_MAX'] - config['POWER_NIC_IDLE']) * net_util - except KeyError: + else: if isinstance(net_util, np.ndarray): power_nic = config['POWER_NIC'] * np.ones(net_util.shape) else: diff --git a/raps/telemetry.py b/raps/telemetry.py index a3a393e..e9f67ab 100644 --- a/raps/telemetry.py +++ b/raps/telemetry.py @@ -45,7 +45,7 @@ import pandas as pd from tqdm import tqdm # from rich.progress import track -from raps.config import ConfigManager +from raps.config import get_system_config from raps.job import Job, job_dict import matplotlib.pyplot as plt from raps.plotting import ( @@ -289,7 +289,7 @@ class Telemetry: def run_telemetry(): - config = ConfigManager(system_name=args.system).get_config() + config = get_system_config(args.system).get_legacy() args_dict['config'] = config td = Telemetry(**args_dict) if args.replay: diff --git a/raps/workload.py b/raps/workload.py index 38233c2..cedb52f 100644 --- a/raps/workload.py +++ b/raps/workload.py @@ -876,8 +876,8 @@ def check_workload_args(args): def run_workload(): from raps.args import args, args_dict - from raps.config import ConfigManager - config = ConfigManager(system_name=args.system).get_config() + from raps.config import get_system_config + config = get_system_config(args.system).get_legacy() if args.replay: td = Telemetry(**args_dict) jobs, _, _, _ = td.load_jobs_times_args_from_files(files=args.replay, args=args, config=config) diff --git a/tests/test_system_config.py b/tests/test_system_config.py new file mode 100644 index 0000000..74280df --- /dev/null +++ b/tests/test_system_config.py @@ -0,0 +1,10 @@ +import pytest +from raps.config import list_systems, get_system_config + +@pytest.mark.parametrize("system_name", list_systems()) +def test_configs(system_name): + # Very basic test that all system configs are valid + config = get_system_config(system_name) + assert config.system_name == system_name + assert config.get_legacy()['system_name'] == system_name + assert config.get_legacy()['config'] == config -- GitLab From f2b77f8fe3ecbd3614befe027a5103bec6e16eff Mon Sep 17 00:00:00 2001 From: "Maiterth, Matthias" Date: Fri, 22 Aug 2025 13:29:22 +0000 Subject: [PATCH 244/388] Large Process toward live system simulation. --- main.py | 13 +- raps/args.py | 3 + raps/cooling.py | 4 +- raps/dataloaders/adastraMI250.py | 6 +- raps/dataloaders/bluewaters.py | 12 +- raps/dataloaders/frontier.py | 252 +++++++++++++++++++++++++++++-- raps/dataloaders/fugaku.py | 10 +- raps/dataloaders/gcloudv2.py | 2 +- raps/dataloaders/lassen.py | 2 +- raps/dataloaders/marconi100.py | 24 ++- raps/engine.py | 137 +++++++++++------ raps/job.py | 78 ++++++---- raps/plotting.py | 10 +- raps/resmgr/default.py | 5 +- raps/resmgr/multitenant.py | 5 +- raps/schedulers/default.py | 8 +- raps/schedulers/experimental.py | 12 +- raps/telemetry.py | 105 +++++++++---- raps/ui.py | 68 +++++---- raps/workload.py | 40 ++--- 20 files changed, 588 insertions(+), 208 deletions(-) diff --git a/main.py b/main.py index 20f8240..9655b1b 100644 --- a/main.py +++ b/main.py @@ -83,7 +83,16 @@ def main(): args_dict['config'] = config flops_manager = FLOPSManager(**args_dict) - if args.replay: + if args.live and not args.replay: + assert args.time is not None, {"--time must be set, specifing how long we want to predict"} + td = Telemetry(**args_dict) + jobs, timestep_start, timestep_end = \ + td.load_jobs_times_args_from_live_system() + if args.output is not None: + td.save_snapshot(jobs=jobs, timestep_start=timestep_start, + timestep_end=timestep_end, args=args, filename=td.dirname) + + elif args.replay: td = Telemetry(**args_dict) jobs, timestep_start, timestep_end, args_from_file = \ @@ -113,7 +122,7 @@ def main(): timestep_end=timestep_end, args=args, filename=td.dirname) if args.fastforward is not None: - timestep_start = args.fastforward + timestep_start = timestep_start + args.fastforward if args.time is not None: timestep_end = timestep_start + args.time diff --git a/raps/args.py b/raps/args.py index 95e979f..9c4142b 100644 --- a/raps/args.py +++ b/raps/args.py @@ -156,6 +156,9 @@ parser.add_argument("--jid", type=str, default="*", parser.add_argument("--scale", type=int, default=0, help=("Scale telemetry to a smaller target system, " "e.g., --scale 192")) +parser.add_argument("--live", action="store_true", + help="Grab data from live system.") + # Synthetic workloads parser = add_workload_to_parser(parser) diff --git a/raps/cooling.py b/raps/cooling.py index 4bdfd87..ba96e43 100644 --- a/raps/cooling.py +++ b/raps/cooling.py @@ -157,7 +157,7 @@ class ThermoFluidsModel: self.weather and self.weather.start is not None and \ self.weather.has_coords: # Convert total seconds to timedelta object - delta = timedelta(seconds=engine.current_time) + delta = timedelta(seconds=engine.current_timestep) target_datetime = self.weather.start + delta # Get temperature from weather data @@ -332,7 +332,7 @@ class ThermoFluidsModel: # FMU inputs are N powers and the wetbulb temp fmu_inputs = self.generate_fmu_inputs(runtime_values, uncertainties=engine.power_manager.uncertainties) - cooling_inputs, cooling_outputs = self.step(engine.current_time, + cooling_inputs, cooling_outputs = self.step(engine.current_timestep, fmu_inputs, engine.config['POWER_UPDATE_FREQ']) return cooling_inputs, cooling_outputs diff --git a/raps/dataloaders/adastraMI250.py b/raps/dataloaders/adastraMI250.py index df186b6..f0f576b 100644 --- a/raps/dataloaders/adastraMI250.py +++ b/raps/dataloaders/adastraMI250.py @@ -81,14 +81,12 @@ def load_data_from_df(jobs_df: pd.DataFrame, **kwargs): # Map dataframe to job state. Add results to jobs list for jidx in tqdm(range(num_jobs - 1), total=num_jobs, desc="Processing Jobs"): - job_id = jobs_df.loc[jidx, 'job_id'] if not jid == '*': if int(jid) == int(job_id): print(f'Extracting {job_id} profile') else: continue - nodes_required = jobs_df.loc[jidx, 'num_nodes_alloc'] name = str(uuid.uuid4())[:6] account = jobs_df.loc[jidx, 'user_id'] @@ -193,10 +191,12 @@ def load_data_from_df(jobs_df: pd.DataFrame, **kwargs): time_limit=time_limit, start_time=start_time, end_time=end_time, - wall_time=wall_time, + expected_run_time=wall_time, + current_run_time=0, trace_time=trace_time, trace_start_time=trace_start_time, trace_end_time=trace_end_time, + trace_quanta=None, trace_missing_values=True ) job = Job(job_info) diff --git a/raps/dataloaders/bluewaters.py b/raps/dataloaders/bluewaters.py index 2b4c38a..46fa462 100644 --- a/raps/dataloaders/bluewaters.py +++ b/raps/dataloaders/bluewaters.py @@ -220,10 +220,11 @@ def load_data(local_dataset_path, **kwargs): id=jid, priority=0, submit_time=sub, - time_limit=0, + time_limit=int(rec.get("wall_time")), start_time=st, end_time=et, - wall_time=duration, + expected_run_time=duration, + current_run_time=0, trace_time=sub, trace_start_time=st, trace_end_time=et, @@ -268,7 +269,7 @@ def load_data(local_dataset_path, **kwargs): bin_s = config.get("TRACE_QUANTA") jobs = [] - for r in jobs_raw: + for r in jobs_raw: # Is this intended? We go throught the 'raw' jobs_dicts that were creeated above? st_abs = int(r["start_time"]) et_abs = int(r["end_time"]) nodes = r.get("scheduled_nodes") or [] @@ -300,10 +301,11 @@ def load_data(local_dataset_path, **kwargs): id=jid, priority=0, submit_time=int(r["submit_time"]), - time_limit=0, + time_limit=int(r["time_limit"]), start_time=st_abs, end_time=et_abs, - wall_time=et_abs - st_abs, + expected_run_time=et_abs - st_abs, + current_run_time=0, trace_time=st_abs, trace_start_time=st_abs, trace_end_time=st_abs + samples * bin_s, diff --git a/raps/dataloaders/frontier.py b/raps/dataloaders/frontier.py index ed7798a..506dab9 100644 --- a/raps/dataloaders/frontier.py +++ b/raps/dataloaders/frontier.py @@ -9,6 +9,8 @@ # To analyze the data python -m raps.telemetry -f $DPATH/slurm/joblive/$DATEDIR $DPATH/jobprofile/$DATEDIR """ +import ast +import time import numpy as np import pandas as pd from tqdm import tqdm @@ -38,6 +40,8 @@ def load_data(files, **kwargs): list The list of parsed jobs. """ + if kwargs.get("live") is True: + return load_live_data() assert (len(files) == 2), "Frontier dataloader requires two files: joblive and jobprofile" jobs_path = files[0] @@ -111,13 +115,14 @@ def load_data_from_df(jobs_df: pd.DataFrame, jobprofile_df: pd.DataFrame, **kwar With this each job's - submit_time - time_limit - - start_time - - end_time - - wall_time (end_time-start_time, actual runtime in seconds) - - trace_time (lenght of each trace in seconds) - - trace_start_time (time offset in seconds after which the trace starts) - - trace_end_time (time offset in seconds after which the trace ends) - - trace_quanta (job's associated trace quanta, to correctly replay with different trace quanta) + - start_time # Maybe Null + - end_time # Maybe Null + - expected_run_time (end_time - start_time) # Maybe Null + - current_run_time (How long did the job run already, when loading) # Maybe zero + - trace_time (lenght of each trace in seconds) # Maybe Null + - trace_start_time (time offset in seconds after which the trace starts) # Maybe Null + - trace_end_time (time offset in seconds after which the trace ends) # Maybe Null + - trace_quanta (job's associated trace quanta, to correctly replay with different trace quanta) # Maybe Null has to be set for use within the simulation The values trace_start_time are similar to the telemetry_start and @@ -233,21 +238,20 @@ def load_data_from_df(jobs_df: pd.DataFrame, jobprofile_df: pd.DataFrame, **kwar continue # Start_time is not smaller than end_time or is not valid # Skip entry. - wall_time = end_time - start_time - if np.isnan(wall_time): - wall_time = 0 + expected_run_time = end_time - start_time + current_run_time = 0 # Check if we the job may may be runninghave wall time of the jobs trace_quanta = config['TRACE_QUANTA'] trace_time = gpu_trace.size * trace_quanta # seconds trace_start_time = 0 trace_end_time = trace_time - if wall_time > trace_time: - missing_trace_time = int(wall_time - trace_time) + if expected_run_time > trace_time: + missing_trace_time = int(expected_run_time - trace_time) trace_missing_values = True if start_time < 0: trace_start_time = missing_trace_time - trace_end_time = wall_time + trace_end_time = expected_run_time elif end_time > telemetry_end: trace_start_time = 0 trace_end_time = trace_time @@ -310,9 +314,13 @@ def load_data_from_df(jobs_df: pd.DataFrame, jobprofile_df: pd.DataFrame, **kwar scheduled_nodes=scheduled_nodes, id=job_id, priority=priority, # partition missing - submit_time=submit_time, time_limit=time_limit, - start_time=start_time, end_time=end_time, - wall_time=wall_time, trace_time=trace_time, + submit_time=submit_time, + time_limit=time_limit, + start_time=start_time, + end_time=end_time, + expected_run_time=expected_run_time, + current_run_time=current_run_time, + trace_time=trace_time, trace_start_time=trace_start_time, trace_end_time=trace_end_time, trace_quanta=trace_quanta, trace_missing_values=trace_missing_values) @@ -321,6 +329,218 @@ def load_data_from_df(jobs_df: pd.DataFrame, jobprofile_df: pd.DataFrame, **kwar return jobs, telemetry_start, telemetry_end +def load_live_data(**kwargs): + """ Load Slurm Live data using pyslurm """ + jobs = list() + telemetry_start = int(time.time()) # This is now! get unix time + telemetry_start = 1755721300 + if hasattr(kwargs, 'time'): + time_to_sim = kwargs.get('time') # Should be specified . + assert isinstance(time_to_sim, int) + else: + time_to_sim = 14 * 24 * 60 * 60 # or we simulate 2 weeks. + telemetry_end = telemetry_start + time_to_sim + + total_partitions = 0 + partition_dict = dict() + + import pyslurm # noqa + # Local Tests + # filename = "something/something/pyslurm.dump" + # with open(filename, 'r') as f: + # s = f.read() + # data = ast.literal_eval(s) + # + data = pyslurm.job().get() + + for jidx, jdata in data.items(): + if jdata['job_state'] == "COMPLETED" \ + or jdata['job_state'] == "CANCELLED": + continue + if jdata['job_state'] == "TIMEOUT" \ + or jdata['job_state'] == "FAILED": + if jdata['requeue'] is False: + continue + + # if jidx == XXX: + # print(jdata) + # exit() + # Picking the useful ones from the 110 features: Leaving the rest for potential changes + account = jdata['account'] + # 'accrue_time': String = 'Unknown', + # 'admin_comment': String, + # 'alloc_node': String = 'login08', + # 'alloc_sid': int + # 'array_job_id': None, + # 'array_task_id': None, + # 'array_task_str': None, + # 'het_job_id': None, + # 'het_job_id_set': None, + # 'het_job_offset': None, + # 'array_max_tasks': None, + # 'assoc_id': int, + # 'batch_flag': int, + # 'batch_features': None, + # 'batch_host': None, + # 'billable_tres': float, + # 'bitflags': int, + # 'boards_per_node': int, + # 'burst_buffer': None, + # 'burst_buffer_state': None, + # 'command': String, + # 'comment': None, + # 'contiguous': bool, + # 'core_spec': int, + # 'cores_per_socket': int, + # 'cpus_per_task': int, + # 'cpus_per_tres': None, + # 'cpu_freq_gov': int, + # 'cpu_freq_max': int, + # 'cpu_freq_min': int, + # 'dependency': None, + # 'derived_ec': String, + # 'eligible_time': int, + # 'end_time': int, + # 'exc_nodes': [], + # 'exit_code': String, + # 'features': [], + # 'group_id': int, + job_id = jdata['job_id'] + current_state = jdata['job_state'] + end_state = None + # 'last_sched_eval': String # e.g. '2013-02-31T14:29:09', + # 'licenses': {}, + # 'max_cpus': int, + # 'max_nodes': int, + # 'mem_per_tres': None, + name = jdata['name'] + # 'network': None, + # 'nodes': None, + # 'nice': 0, + # 'ntasks_per_core': int, + # 'ntasks_per_core_str': String + # 'ntasks_per_node': int, + # 'ntasks_per_socket': int, + # 'ntasks_per_socket_str': String, + # 'ntasks_per_board': 0, + # 'num_cpus': int, + nodes_required: int = jdata['num_nodes'] + # 'num_tasks': 49152, + # 'partition': String, # e.g.'batch', + if jdata['partition'] in partition_dict: + pass + else: + partition_dict[jdata['partition']] = total_partitions + total_partitions += 1 + partition = partition_dict[jdata['partition']] + # 'mem_per_cpu': bool, + # 'min_memory_cpu': None, + # 'mem_per_node': bool, + # 'min_memory_node': int, + # 'pn_min_memory': int, + # 'pn_min_cpus': int, + # 'pn_min_tmp_disk': int, + priority = jdata['priority'] + # 'profile': int, + # 'qos': String # e.g. 'normal', + # 'reboot': int, + scheduled_nodes_str_list = jdata['req_nodes'] # Explicitly requested nodes # Missmatch between slurm and raps + scheduled_nodes = [] + for n in scheduled_nodes_str_list: + scheduled_nodes = int(n[8:]) + # Do we need to reintroduce a list of explicitly required nodes? This is currently handled by setting the + # scheduled_nodes before the scheduler modifies this list + # 'req_switch': int, + # 'requeue': bool, + # 'resize_time': int, + # 'restart_cnt': int, + # 'resv_name': None, + # 'run_time': int, # ?? + # 'run_time_str': String, + # 'sched_nodes': None, + # 'selinux_context': None, + # 'shared': String, + # 'sockets_per_board': int, + # 'sockets_per_node': int, + if current_state == "RUNNING": + start_time = jdata['start_time'] + end_time = None + current_run_time = jdata['run_time'] + else: + start_time = None + end_time = None + current_run_time = jdata['run_time'] # ?? + if jdata['job_state'] == "TIMEOUT": + if jdata['requeue'] is False: + current_run_time = 0 # ?? + elif jdata['job_state'] == "COMPLETING": + if jdata['requeue'] is False: + current_run_time = 0 # ?? + else: + assert current_run_time == 0, "Check if any other value occurs and should be handled! " \ + f"current_run_time:{current_run_time}" \ + f"\njdata:\n{jdata}" + expected_run_time = None + # 'state_reason': String # e.g. 'JobHeldUser', + # 'std_err': String, + # 'std_in': String, + # 'std_out': String, + submit_time = jdata['submit_time'] # int, Unix Time! + # 'suspend_time': int, + # 'system_comment': None, + # 'time_limit': e.g. 570, # in minutes! + time_limit = jdata['time_limit'] * 60 # needed in seconds + # 'time_limit_str': '0-09:30:00', + # 'time_min': int, + # 'threads_per_core': int, + # 'tres_alloc_str': None, + # 'tres_bind': None, + # 'tres_freq': None, + # 'tres_per_job': None, + # 'tres_per_node': None, + # 'tres_per_socket': None, + # 'tres_per_task': None, + # 'tres_req_str': String, + account = jdata['user_id'] # int for slurm, may be String in raps and conversion works. ... + # 'wait4switch': int, + # 'wckey': None, + # 'work_dir': String + # 'cpus_allocated': dict, + # 'cpus_alloc_layout': dict + cpu_trace = None # To be determined by a model! + gpu_trace = None + trace_time = None + trace_start_time = None + trace_end_time = None + trace_quanta = None + trace_missing_values = None + job_info = job_dict( + nodes_required=nodes_required, + name=name, + account=account, + cpu_trace=cpu_trace, + gpu_trace=gpu_trace, + nrx_trace=None, + ntx_trace=None, + current_state=current_state, + end_state=end_state, + scheduled_nodes=scheduled_nodes, + id=job_id, + priority=priority, # partition missing + partition=partition, + submit_time=submit_time, time_limit=time_limit, + start_time=start_time, end_time=end_time, + expected_run_time=expected_run_time, + current_run_time=current_run_time, + trace_time=trace_time, + trace_start_time=trace_start_time, trace_end_time=trace_end_time, + trace_quanta=trace_quanta, trace_missing_values=trace_missing_values) + job = Job(job_info) + jobs.append(job) + + return jobs, telemetry_start, telemetry_end + + def xname_to_index(xname: str, config: dict): """ Converts an xname string to an index value based on system configuration. diff --git a/raps/dataloaders/fugaku.py b/raps/dataloaders/fugaku.py index f28ac6b..734fa61 100644 --- a/raps/dataloaders/fugaku.py +++ b/raps/dataloaders/fugaku.py @@ -74,7 +74,7 @@ def load_data_from_df(df, **kwargs): telemetry_end = int(diff.total_seconds()) # Loop through the DataFrame rows to extract job information - for _, row in tqdm(df.iterrows(), total=len(df), desc="Processing Jobs"): + for i, row in tqdm(df.iterrows(), total=len(df), desc="Processing Jobs"): nodes_required = row['nnumr'] if 'nnumr' in df.columns else 0 name = row['jnam'] if 'jnam' in df.columns else 'unknown' account = row['usr'] @@ -114,6 +114,12 @@ def load_data_from_df(df, **kwargs): end_time = int(diff.total_seconds()) wall_time = end_time - start_time + if end_time < start_time: + print(f"Job: {i}, skiped end_time < start_time ({end_time} < {start_time})") + if kwargs.get('debug'): + print(row) + continue + # duration = int(row['duration']) if 'duration' in df.columns else 0 # if (wall_time != duration): # if abs(wall_time - duration) <= 1: # offset is often 1 @@ -153,7 +159,7 @@ def load_data_from_df(df, **kwargs): time_limit=time_limit, start_time=start_time, end_time=end_time, - wall_time=wall_time, + expected_run_time=wall_time, trace_time=trace_time, trace_start_time=trace_start_time, trace_end_time=trace_end_time, diff --git a/raps/dataloaders/gcloudv2.py b/raps/dataloaders/gcloudv2.py index 54dfc1d..e19b0e8 100644 --- a/raps/dataloaders/gcloudv2.py +++ b/raps/dataloaders/gcloudv2.py @@ -323,7 +323,7 @@ def load_data(data_path: Union[str, List[str]], **kwargs: Any) -> Tuple[List[Any # submit_time=row["timestamp"], time_limit=0, submit_time=start, time_limit=0, start_time=start, end_time=end, - wall_time=wall, trace_time=row["timestamp"], + expected_run_time=wall, trace_time=row["timestamp"], trace_start_time=start, trace_end_time=end, trace_quanta=trace_quanta ) # Wrap dict in a real Job so telemetry.save_snapshot() can use __dict__ diff --git a/raps/dataloaders/lassen.py b/raps/dataloaders/lassen.py index bff9098..f777f79 100644 --- a/raps/dataloaders/lassen.py +++ b/raps/dataloaders/lassen.py @@ -240,7 +240,7 @@ def load_data_from_df(allocation_df, node_df, step_df, **kwargs): time_limit=time_limit, start_time=start_time, end_time=end_time, - wall_time=wall_time, + expected_run_time=wall_time, trace_time=trace_time, trace_start_time=trace_start_time, trace_end_time=trace_end_time, diff --git a/raps/dataloaders/marconi100.py b/raps/dataloaders/marconi100.py index bb345a3..6222c7c 100644 --- a/raps/dataloaders/marconi100.py +++ b/raps/dataloaders/marconi100.py @@ -207,15 +207,25 @@ def load_data_from_df(jobs_df: pd.DataFrame, **kwargs): if gpu_trace.size > 0 and (jid == job_id or jid == '*'): # and time_submit >= 0: - job_info = job_dict(nodes_required=nodes_required, name=name, - account=account, cpu_trace=cpu_trace, - gpu_trace=gpu_trace, nrx_trace=[], ntx_trace=[], + job_info = job_dict(nodes_required=nodes_required, + name=name, + account=account, + cpu_trace=cpu_trace, + gpu_trace=gpu_trace, + nrx_trace=[], ntx_trace=[], end_state=end_state, + # current_state=current_state, # PENDING? scheduled_nodes=scheduled_nodes, - id=job_id, priority=priority, partition=partition, - submit_time=submit_time, time_limit=time_limit, - start_time=start_time, end_time=end_time, - wall_time=wall_time, trace_time=trace_time, + id=job_id, + priority=priority, + partition=partition, + submit_time=submit_time, + time_limit=time_limit, + start_time=start_time, + end_time=end_time, + expected_run_time=wall_time, + current_run_time=0, + trace_time=trace_time, trace_start_time=trace_start_time, trace_end_time=trace_end_time, trace_quanta=config["TRACE_QUANTA"], diff --git a/raps/engine.py b/raps/engine.py index 572eba1..f79b140 100644 --- a/raps/engine.py +++ b/raps/engine.py @@ -32,6 +32,7 @@ class TickData: """ Represents the state output from the simulation each tick """ current_timestep: int completed: list[Job] + killed: list[Job] running: list[Job] queue: list[Job] down_nodes: list[int] @@ -129,6 +130,7 @@ class Engine: self.accounts = None self.job_history_dict = [] self.jobs_completed = 0 + self.jobs_killed = 0 self.total_initial_jobs = total_initial_jobs self.current_timestep = 0 self.cooling_model = cooling_model @@ -170,6 +172,9 @@ class Engine: resource_manager=self.resource_manager, jobs=jobs ) + if kwargs.get('live'): + assert self.scheduler.policy != PolicyType.REPLAY, \ + "Cannot replay from a live system. Choose a scheduling policy!" print(f"Using scheduler: {str(self.scheduler.__class__).split('.')[2]}" f", with policy {self.scheduler.policy} " f"and backfill {self.scheduler.bfpolicy}") @@ -199,8 +204,8 @@ class Engine: print(f"[DEBUG] add_running_jobs_to_queue: Found {len(eligible_jobs)} eligible jobs.") # Remove those jobs from jobs_to_submit: jobs_to_submit[:] = [job for job in jobs_to_submit if - job.start_time is not None - and job.start_time >= self.current_timestep] + job.start_time is None + or job.start_time >= self.current_timestep] if self.debug: print(f"[DEBUG] add_running_jobs_to_queue: {len(jobs_to_submit)} jobs remaining in jobs_to_submit.") # Convert them to Job instances and build list of eligible jobs. @@ -247,10 +252,12 @@ class Engine: # 1 Identify Completed Jobs completed_jobs = [job for job in self.running if job.end_time is not None and job.end_time <= self.current_timestep] + # Update Completed Jobs, their account and and Free resources. for job in completed_jobs: self.power_manager.set_idle(job.scheduled_nodes) - job.state = JobState.COMPLETED + job.current_state = JobState.COMPLETED + job.end_time = self.current_timestep self.running.remove(job) self.jobs_completed += 1 @@ -261,6 +268,23 @@ class Engine: # Free the nodes via the resource manager. self.resource_manager.free_nodes_from_job(job) + killed_jobs = [job for job in self.running if + job.start_time + job.time_limit <= self.current_timestep] + + for job in killed_jobs: + self.power_manager.set_idle(job.scheduled_nodes) + job.current_state = JobState.TIMEOUT + job.end_time = self.current_timestep + + self.running.remove(job) + self.jobs_killed += 1 + job_stats = job.statistics() + if self.accounts: + self.accounts.update_account_statistics(job_stats) + self.job_history_dict.append(job_stats.__dict__) + # Free the nodes via the resource manager. + self.resource_manager.free_nodes_from_job(job) + # 2 Check continuous job generation if self.continuous_workload is not None: # Experimental continuous_job_generation(engine=self, timestep=self.current_timestep, jobs=jobs) @@ -285,13 +309,13 @@ class Engine: # #available_gpu_units = sum(node['available_gpu_units'] for node in self.resource_manager.nodes) self.num_free_nodes = len([node for node in self.resource_manager.nodes if - not node['is_down'] and - node['available_cpu_cores'] == node['total_cpu_cores'] and - node['available_gpu_units'] == node['total_gpu_units']]) + not node['is_down'] + and node['available_cpu_cores'] == node['total_cpu_cores'] + and node['available_gpu_units'] == node['total_gpu_units']]) self.num_active_nodes = len([node for node in self.resource_manager.nodes if - not node['is_down'] and - (node['available_cpu_cores'] < node['total_cpu_cores'] - or node['available_gpu_units'] < node['total_gpu_units'])]) + not node['is_down'] + and (node['available_cpu_cores'] < node['total_cpu_cores'] + or node['available_gpu_units'] < node['total_gpu_units'])]) # Update system utilization history self.resource_manager.update_system_utilization(self.current_timestep, self.running) @@ -304,27 +328,31 @@ class Engine: self.down_nodes = self.resource_manager.down_nodes # TODO This should only be managed in the resource manager! - return completed_jobs, newly_downed_nodes, need_reschedule + return completed_jobs, killed_jobs, newly_downed_nodes, need_reschedule - def complete_timestep(self, autoshutdown, all_jobs: List, jobs: List): + def complete_timestep(self, *, + actively_considered_jobs: List, + all_jobs: List, + replay: bool, + autoshutdown: bool): # 1 update running time of all running jobs # 2 update the current_timestep of the engine (this serves as reference for most computations) # 3 Check if simulation should shutdown # update Running time for job in self.running: - if job.state == JobState.RUNNING: + if job.current_state == JobState.RUNNING: job.running_time = self.current_timestep - job.start_time # Stop the simulation if no more jobs are running or in the queue or in the job list. if autoshutdown and \ len(self.queue) == 0 and \ len(self.running) == 0 and \ - not self.replay and \ + not replay and \ len(all_jobs) == 0 and \ - len(jobs) == 0: + len(actively_considered_jobs) == 0: if self.debug: - print(f"[DEBUG] Simulaiton Complete: {self.config['system_name']} - " + print(f"Simulaiton completed early: {self.config['system_name']} - " f"Stopping simulation at time {self.current_timestep}. " f"Simulation ran for {self.current_timestep - self.timestep_start}") simulation_complete = True @@ -334,7 +362,7 @@ class Engine: return simulation_complete - def tick(self, *, time_delta=1): + def tick(self, *, time_delta=1, replay=False): # Tick runs all simulations of interest at the given time delta interval. # # The simulations which are needed for simulations consistency at each time step @@ -371,14 +399,23 @@ class Engine: job.running_time = self.current_timestep - job.start_time - if job.state != JobState.RUNNING: - raise ValueError(f"Job is in running list, but state is not RUNNING: job.state == {job.state}") + if job.current_state != JobState.RUNNING: + raise ValueError(f"Job is in running list, but state is not RUNNING: job.state == {job.currentstate}") else: # if job.state == JobState.RUNNING: # Error checks - if job.running_time > job.wall_time: - raise Exception(f"""Job should have ended already! - {job.running_time} > {job.wall_time} - """) + if job.running_time > job.time_limit: + raise Exception(f"Job exceded time limit! " + f"{job.running_time} > {job.time_limit}" + f"\n{job}" + f"\nCurrent timestep:{self.current_timestep-self.timestep_start} (rel)" + ) + if replay and job.running_time > job.expected_run_time: + raise Exception(f"Job should have ended in replay! " + f" {job.running_time} > {job.expected_run_time}" + f"\n{job}" + f"\nCurrent timestep:{self.current_timestep-self.timestep_start} (rel)" + ) + # Aggregate scheduled nodes scheduled_nodes.append(job.scheduled_nodes) @@ -488,6 +525,7 @@ class Engine: tick_data = TickData( current_timestep=self.current_timestep, completed=None, + killed=None, running=self.running, queue=self.queue, down_nodes=self.down_nodes, @@ -508,16 +546,20 @@ class Engine: ) return tick_data - def prepare_system_state(self, all_jobs: List, timestep_start, timestep_end, replay: bool): - # Modifies Jobs object + def prepare_system_state(self, *, all_jobs: List, timestep_start, timestep_end): + # Set engine timesteps + self.timestep_start = timestep_start self.current_timestep = timestep_start + self.timestep_end = timestep_end + # Modifies Jobs object # Keep only jobs that have not yet ended and that have a chance to start - all_jobs[:] = [job for job in all_jobs if (job.end_time is not None - and job.end_time >= timestep_start - and job.submit_time < timestep_end - ) or job.end_time is None] - + all_jobs[:] = [job for job in all_jobs if + job.submit_time < timestep_end + and ((job.end_time is not None + and job.end_time >= timestep_start) + or job.end_time is None) + ] all_jobs.sort(key=lambda j: j.submit_time) self.add_running_jobs_to_queue(all_jobs) @@ -531,7 +573,7 @@ class Engine: for job in self.queue[:]: # operate over a slice copy to be able to remove jobs from queue if placed. self.scheduler.schedule([job], self.running, job.start_time, accounts=self.accounts, sorted=True) self.queue.remove(job) - if replay and len(self.queue) != 0: + if self.replay and len(self.queue) != 0: raise ValueError( f"Something went wrong! Not all jobs could be placed!\nPotential confligt in queue:\n{self.queue}") # Restore the target policy and backfill for the remainder of the simulation. @@ -540,8 +582,6 @@ class Engine: def run_simulation(self, jobs, timestep_start, timestep_end, time_delta=1, autoshutdown=False): """Generator that yields after each simulation tick.""" - self.timestep_start = timestep_start - self.timestep_end = timestep_end if self.scheduler.policy == PolicyType.REPLAY: replay = True @@ -551,12 +591,11 @@ class Engine: if self.debug: print(f"[DEBUG] run_simulation: Initial jobs count: {len(jobs)}") if jobs: - print( - "[DEBUG] run_simulation: First job submit_time: " - f"{jobs[0].submit_time}, start_time: {jobs[0].start_time}") + print("[DEBUG] run_simulation: First job submit_time: " + f"{jobs[0].submit_time}, start_time: {jobs[0].start_time}") - # Place jobs that are currently running, onto the system. - self.prepare_system_state(jobs, timestep_start, timestep_end, replay) + # Set times and place jobs that are currently running, onto the system. + self.prepare_system_state(all_jobs=jobs, timestep_start=timestep_start, timestep_end=timestep_end) # Process jobs in batches for better performance of timestep loop all_jobs = jobs.copy() @@ -565,11 +604,10 @@ class Engine: batch_window = max(60 * 60 * 6, 2 * time_delta) # at least 6h sim_state = SimulationState(time_delta) - listener_thread = threading.Thread(target=keyboard_listener, args=(sim_state,), daemon=True) - listener_thread.start() + # listener_thread = threading.Thread(target=keyboard_listener, args=(sim_state,), daemon=True) + # listener_thread.start() - self.current_timestep = timestep_start - while self.current_timestep < timestep_end: # Runs every seconds! + while self.current_timestep < self.timestep_end: # Runs every seconds! if sim_state.is_paused(): time.sleep(0.1) @@ -583,13 +621,18 @@ class Engine: all_jobs[:] = [job for job in all_jobs if job.submit_time > self.current_timestep + batch_window] # 1. Prepare Timestep: - completed_jobs, newly_downed_nodes, need_reschedule = self.prepare_timestep(replay=replay, jobs=jobs) + completed_jobs, killed_jobs, newly_downed_nodes, need_reschedule = \ + self.prepare_timestep(jobs=jobs) # 2. Identify eligible jobs and add them to the queue. has_new_additions = self.add_eligible_jobs_to_queue(jobs) # 3. Schedule jobs that are now in the queue. - if completed_jobs != [] or newly_downed_nodes != [] or has_new_additions or need_reschedule: + if completed_jobs != [] \ + or killed_jobs != [] \ + or newly_downed_nodes != [] \ + or has_new_additions \ + or need_reschedule: self.scheduler.schedule(self.queue, self.running, self.current_timestep, accounts=self.accounts, @@ -604,13 +647,17 @@ class Engine: and self.current_timestep % self.config['POWER_UPDATE_FREQ'] == 0) or (current_time_delta != 1 or self.downscale != 1) ): - tick_data = self.tick(time_delta=current_time_delta) + tick_data = self.tick(time_delta=current_time_delta, replay=replay) tick_data.completed = completed_jobs + tick_data.killed = completed_jobs else: tick_data = None # 5. Complete the timestep - simulation_done = self.complete_timestep(autoshutdown, all_jobs, jobs) + simulation_done = self.complete_timestep(actively_considered_jobs=jobs, + all_jobs=all_jobs, + replay=replay, + autoshutdown=autoshutdown) if simulation_done: break yield tick_data diff --git a/raps/job.py b/raps/job.py index 64400d9..ecb7fd3 100644 --- a/raps/job.py +++ b/raps/job.py @@ -1,5 +1,6 @@ from enum import Enum import numpy as np +from types import NoneType """ Note: want to simplify this in the future to use a minimal required set of job attributes, @@ -17,6 +18,7 @@ class JobState(Enum): RUNNING = 'R' PENDING = 'PD' COMPLETED = 'C' + COMPLETING = 'Cing' CANCELLED = 'CA' FAILED = 'F' TIMEOUT = 'TO' @@ -27,7 +29,7 @@ def job_dict(*, name, account, # Allocation - job_state=JobState.PENDING, + current_state=JobState.PENDING, end_state: JobState | None = None, scheduled_nodes=None, id, @@ -48,7 +50,8 @@ def job_dict(*, time_limit: int = 0, start_time: int | None = 0, end_time: int | None = 0, - wall_time: int | None = 0, # Should this be removed? + expected_run_time: int | None = 0, + current_run_time: int = 0, trace_time: int | None = 0, trace_start_time: int | None = 0, trace_end_time: int | None = 0, @@ -62,7 +65,7 @@ def job_dict(*, 'name': name, 'account': account, # Allocation: - 'job_state': job_state, + 'current_state': current_state, 'end_state': end_state, 'scheduled_nodes': scheduled_nodes, 'id': id, @@ -83,7 +86,8 @@ def job_dict(*, 'time_limit': time_limit, 'start_time': start_time, 'end_time': end_time, - 'wall_time': wall_time, + 'expected_run_time': expected_run_time, + 'current_run_time': current_run_time, 'trace_time': trace_time, 'trace_start_time': trace_start_time, 'trace_end_time': trace_end_time, @@ -132,7 +136,7 @@ class Job: """ _id_counter = 0 - def __init__(self, job_dict, state=JobState.PENDING, account=None): + def __init__(self, job_dict, current_state=JobState.PENDING, end_state=None, account=None): # # current_time unused! # Initializations: self.power = 0 @@ -143,14 +147,16 @@ class Job: self.allocated_cpu_cores = 0 self.allocated_gpu_units = 0 self.power_history = [] - self._state = state + self._current_state = current_state + self.end_state = end_state # default None! self.account = account # Times: self.submit_time = None # Actual submit time self.time_limit = None # Time limit set at submission self.start_time = None # Actual start time when executing or from telemetry - self.end_time = None # Actual end time when executing or from telemetry - self.wall_time = None # end_time - start_time + self.end_time = None # Actual end time, either None if or from telemetry + self.expected_run_time = None + self.current_run_time = 0 self.trace_time = None # Time period for which traces are available self.trace_start_time = None # Relative start time of the trace (to running time) self.trace_end_time = None # Relative end time of the trace @@ -175,13 +181,14 @@ class Job: (isinstance(self.scheduled_nodes, np.ndarray) and isinstance(self.scheduled_nodes[0], int)): pass # Type is ok else: - # Type is not as expected! raise ValueError( - f"type: self.scheduled_nodes:{type(self.scheduled_nodes)}, with {type(self.scheduled_nodes[0])}") + f"type: self.scheduled_nodes:{type(self.scheduled_nodes)}, " + f"with {type(self.scheduled_nodes[0])}") assert isinstance(self.submit_time, (int, float)) - assert isinstance(self.wall_time, (int, float, np.int64, np.double)) - assert isinstance(self.start_time, (int, float, np.int64, np.double, type(None))) - assert isinstance(self.end_time, (int, float, np.int64, np.double, type(None))) + assert isinstance(self.expected_run_time, (int, float, np.int64, np.double, NoneType)) + assert isinstance(self.current_run_time, (int, float, np.int64, np.double)) + assert isinstance(self.start_time, (int, float, np.int64, np.double, NoneType)) + assert isinstance(self.end_time, (int, float, np.int64, np.double, NoneType)) if self.start_time is not None and self.end_time is not None: assert self.start_time <= self.end_time, f"{self.start_time} <= {self.end_time}" @@ -196,30 +203,32 @@ class Job: f"allocated_gpu_units={self.allocated_gpu_units}, " f"cpu_trace={self.cpu_trace}, gpu_trace={self.gpu_trace}, " f"ntx_trace={self.ntx_trace}, nrx_trace={self.nrx_trace}, " - f"job_state={self.job_state}, end_state={self.end_state}, " + f"end_state={self.end_state}, " + f"current_state={self.current_state}, " f"submit_time={self.submit_time}, time_limit={self.time_limit}, " f"start_time={self.start_time}, end_time={self.end_time}, " - f"wall_time={self.wall_time}, " + f"expected_run_time={self.expected_run_time}, " + f"current_run_time={self.current_run_time}, " f"trace_time={self.trace_time}, " f"trace_start_time={self.trace_start_time}, " f"trace_end_time={self.trace_end_time}, " f"trace_quanta={self.trace_quanta}, " - f"running_time={self.running_time}, state={self._state}, " + f"running_time={self.running_time}, " f"power={self.power}, " f"power_history={self.power_history})") @property - def state(self): + def current_state(self): """Get the current state of the job.""" - return self._state + return self._current_state - @state.setter - def state(self, value): - """Set the state of the job.""" + @current_state.setter + def current_state(self, value): + """Set the current_state of the job.""" if isinstance(value, JobState): - self._state = value + self._current_state = value elif isinstance(value, str) and value in JobState.__members__: - self._state = JobState[value] + self._current_state = JobState[value] else: raise ValueError(f"Invalid state: {value}") @@ -243,7 +252,7 @@ class Job: def apply_dilation(self, factor): """ - Apply a dilation factor to the job’s execution traces and wall time. + Apply a dilation factor to the job’s execution traces and run time. Parameters: - factor (float): the dilation factor; >1 to slow down (lengthen the traces) and <1 to speed up. @@ -252,8 +261,11 @@ class Job: self.gpu_trace = dilate_trace(self.gpu_trace, factor) self.ntx_trace = dilate_trace(self.ntx_trace, factor) self.nrx_trace = dilate_trace(self.nrx_trace, factor) - self.wall_time = int(np.round(self.wall_time * factor)) - self.end_time = self.start_time + self.wall_time + if self.end_time is not None: + expected_run_time = self.end_time - self.start_time + expected_run_time = int(np.round(expected_run_time * factor)) + assert self.start_time is not None + self.end_time = self.start_time + expected_run_time class JobStatistics: @@ -269,7 +281,7 @@ class JobStatistics: self.submit_time = job.submit_time self.start_time = job.start_time self.end_time = job.end_time - self.state = job._state + self.current_state = job.current_state if isinstance(job.cpu_trace, list) or isinstance(job.cpu_trace, np.ndarray): if len(job.cpu_trace) == 0: self.avg_cpu_usage = 0 @@ -332,8 +344,8 @@ if __name__ == "__main__": # Each sample in the trace represents 15 seconds. trace_quanta = 15 # seconds per sample - wall_time = 600 # total job wall time in seconds (600s = 10 minutes) - num_samples = wall_time // trace_quanta # should be 40 samples + expected_run_time = 600 # total job run time in seconds (600s = 10 minutes) + num_samples = expected_run_time // trace_quanta # should be 40 samples # Generate a random GPU trace (values between 0 and 4 for 4 GPUs total) gpu_trace = [random.uniform(0, 4) for _ in range(num_samples)] @@ -352,7 +364,7 @@ if __name__ == "__main__": gpu_trace=gpu_trace, ntx_trace=ntx_trace, nrx_trace=nrx_trace, - wall_time=wall_time, + expected_run_time=expected_run_time, end_state="", scheduled_nodes=[], time_offset=0, @@ -363,7 +375,7 @@ if __name__ == "__main__": job_instance = Job(jdict, current_time=0) # Print original job properties. - print("Original wall_time:", job_instance.wall_time) + print("Original expected_run_time:", job_instance.expected_run_time) print("Original cpu_trace length:", len(job_instance.cpu_trace)) print("Original gpu_trace length:", len(job_instance.gpu_trace)) @@ -373,11 +385,11 @@ if __name__ == "__main__": # Calculate the expected new lengths. expected_samples = int(np.round(num_samples * dilation_factor)) - expected_wall_time = int(np.round(wall_time * dilation_factor)) + expected_run_time = int(np.round(expected_run_time * dilation_factor)) # Print the dilated job properties. print("\nAfter applying a dilation factor of", dilation_factor) - print("New wall_time:", job_instance.wall_time, "(expected:", expected_wall_time, ")") + print("New expected_run_time:", job_instance.expected_run_time, "(expected:", expected_run_time, ")") print("New cpu_trace length:", len(job_instance.cpu_trace), "(expected:", expected_samples, ")") print("New gpu_trace length:", len(job_instance.gpu_trace), "(expected:", expected_samples, ")") diff --git a/raps/plotting.py b/raps/plotting.py index 9fee188..61a8f96 100644 --- a/raps/plotting.py +++ b/raps/plotting.py @@ -333,7 +333,7 @@ def plot_jobs_gantt(*, ax=None, jobs, bars_are_node_sized): ax = plt.figure(figsize=(10, 4)) # Submit_time and Wall_time submit_t = [x.submit_time for x in jobs] - duration = [x.wall_time for x in jobs] + duration = [x.current_run_time if x.end_time else x.time_limit for x in jobs] nodes_required = [x.nodes_required for x in jobs] colors = spaced_colors(len(jobs)) @@ -350,7 +350,8 @@ def plot_jobs_gantt(*, ax=None, jobs, bars_are_node_sized): # ax_b labels: ax.set_xlabel("time [hh:mm]") minx_s = min([x.submit_time for x in jobs]) - maxx_s = np.ceil(max([x.wall_time for x in jobs]) + max([x.submit_time for x in jobs])) + maxx_s = np.ceil(max([x.current_run_tim if x.end_time else x.time_limit for + x in jobs]) + max([x.submit_time for x in jobs])) x_label_mins = [int(n) for n in np.arange(minx_s // 60, maxx_s // 60)] x_label_ticks = [n * 60 for n in x_label_mins[0::60]] x_label_str = [str(x1).zfill(2) + ":" + str(x2).zfill(2) for @@ -366,10 +367,13 @@ def plot_nodes_gantt(*, ax=None, jobs): if ax is None: ax = plt.figure(figsize=(10, 4)) # Submit_time and Wall_time - duration = [x.wall_time for x in jobs] + duration = [x.current_run_time if x.end_time else x.time_limit for x in jobs] # nodes_required = [x['nodes_required'] for x in jobs] start_t = [x.start_time for x in jobs] nodeIDs = [x.scheduled_nodes for x in jobs] + print(nodeIDs) + if not any(nodeIDs): + raise IndexError(f"No nodeIDs: {nodeIDs}, jobs have no scheduled_nodes.") colors = spaced_colors(len(jobs)) for i in track(range(len(jobs)), description="Collecting information to plot"): diff --git a/raps/resmgr/default.py b/raps/resmgr/default.py index 2bb1345..c7791f5 100644 --- a/raps/resmgr/default.py +++ b/raps/resmgr/default.py @@ -54,8 +54,9 @@ class ExclusiveNodeResourceManager: # Mark job running job.start_time = current_time - job.end_time = current_time + job.wall_time - job.state = JobState.RUNNING + if job.expected_run_time: + job.end_time = current_time + job.expected_run_time # This may be an assumption! + job.current_state = JobState.RUNNING def free_nodes_from_job(self, job): """Frees the full nodes previously allocated to a job.""" diff --git a/raps/resmgr/multitenant.py b/raps/resmgr/multitenant.py index 5d5f27d..bb24e4e 100644 --- a/raps/resmgr/multitenant.py +++ b/raps/resmgr/multitenant.py @@ -83,8 +83,9 @@ class MultiTenantResourceManager: job.allocated_cpu_cores = job.cpu_cores_required job.allocated_gpu_units = job.gpu_units_required job.start_time = current_time - job.end_time = current_time + job.wall_time - job.state = JobState.RUNNING + if job.expected_run_time: + job.end_time = current_time + job.expected_run_time # this may be an assumption (See default.py) + job.current_state = JobState.RUNNING def free_nodes_from_job(self, job): """Releases cores/GPUs from a completed job.""" diff --git a/raps/schedulers/default.py b/raps/schedulers/default.py index fad33a1..2a1fd21 100644 --- a/raps/schedulers/default.py +++ b/raps/schedulers/default.py @@ -43,6 +43,7 @@ class Scheduler: if job.start_time > current_time: continue # Replay: Job didn't start yet. Next! else: + # assert job.start_time == current_time, f"{job.start_time} == {current_time}" pass else: pass @@ -101,7 +102,8 @@ class Scheduler: queue.remove(job) if self.debug: scheduled_nodes = summarize_ranges(job.scheduled_nodes) - print(f"t={current_time}: Scheduled job {job.id} with wall time {job.wall_time} on nodes {scheduled_nodes}") + print(f"t={current_time}: Scheduled job {job.id} with time limit " + f"{job.time_limit} on nodes {scheduled_nodes}") def check_available_nodes(self, job): nodes_available = False @@ -146,7 +148,7 @@ class Scheduler: else: nodes_required = first_job.nodes_required - sorted_running = sorted(running, key=lambda job: job.end_time) + sorted_running = sorted(running, key=lambda job: job.time_limit) # Identify when we have enough nodes therefore the start time of the first_job in line shadow_time_end = 0 @@ -156,7 +158,7 @@ class Scheduler: break else: shadow_nodes_avail += job.nodes_required - shadow_time_end = job.end_time + shadow_time_end = job.start_time + job.time_limit time_limit = shadow_time_end - current_time # We now have the time_limit after which no backfilled job should end diff --git a/raps/schedulers/experimental.py b/raps/schedulers/experimental.py index 046873a..b77b0ce 100644 --- a/raps/schedulers/experimental.py +++ b/raps/schedulers/experimental.py @@ -104,7 +104,8 @@ class Scheduler: queue.remove(job) if self.debug: scheduled_nodes = summarize_ranges(job.scheduled_nodes) - print(f"t={current_time}: Scheduled job {job.id} with wall time {job.wall_time} on nodes {scheduled_nodes}") + print(f"t={current_time}: Scheduled job {job.id} with time limit " + f"{job.time_limit} on nodes {scheduled_nodes}") def check_available_nodes(self, job): nodes_available = False @@ -159,7 +160,7 @@ class Scheduler: break else: shadow_nodes_avail += job.nodes_required - shadow_time_end = job.end_time + shadow_time_end = job.time_limit time_limit = shadow_time_end - current_time # We now have the time_limit after which no backfilled job should end @@ -202,6 +203,7 @@ class Scheduler: # Everything with negative Fugaku Points get sorted according to normal priority priority_triple_list = [] for job in queue: + assert accounts and accounts.account_dict fugaku_priority = accounts.account_dict[job.account].fugaku_points if fugaku_priority is None: fugaku_priority = 0 @@ -233,6 +235,7 @@ class Scheduler: return queue priority_tuple_list = [] for job in queue: + assert accounts and accounts.account_dict power = accounts.account_dict[job.account].avg_power if power is None: power = 0 @@ -259,6 +262,7 @@ class Scheduler: return queue priority_tuple_list = [] for job in queue: + assert accounts and accounts.accounts_dict power = accounts.account_dict[job.account].avg_power if power is None: power = 0 @@ -278,6 +282,7 @@ class Scheduler: return queue priority_tuple_list = [] for job in queue: + assert accounts and accounts.accounts_dict power = accounts.account_dict[job.account].avg_power if power is None: power = 0 @@ -297,6 +302,7 @@ class Scheduler: return queue priority_tuple_list = [] for job in queue: + assert accounts and accounts.accounts_dict energy = accounts.account_dict[job.account].energy_allocated time = accounts.account_dict[job.account].time_allocated if energy is None: @@ -319,6 +325,7 @@ class Scheduler: return queue priority_tuple_list = [] for job in queue: + assert accounts and accounts.accounts_dict energy = accounts.account_dict[job.account].energy_allocated time = accounts.account_dict[job.account].time_allocated if energy is None: @@ -341,6 +348,7 @@ class Scheduler: return queue priority_tuple_list = [] for job in queue: + assert accounts and accounts.accounts_dict power = accounts.account_dict[job.account].avg_power time = accounts.account_dict[job.account].time_allocated if power is None: diff --git a/raps/telemetry.py b/raps/telemetry.py index e9f67ab..08ce3d9 100644 --- a/raps/telemetry.py +++ b/raps/telemetry.py @@ -11,8 +11,10 @@ import sys import random import argparse # import itertools -import json +# import json import os.path +from typing import Optional +from types import ModuleType if __name__ == "__main__": @@ -36,6 +38,8 @@ if __name__ == "__main__": f"or use the original submit times ({choices[0]})") parser.add_argument('-v', '--verbose', action='store_true', help='Enable verbose output') parser.add_argument('-o', '--output', type=str, default=None, help='Store output in --output file.') + parser.add_argument("--live", action="store_true", help="Grab data from live system.") + args = parser.parse_args() args_dict = vars(args) @@ -59,6 +63,7 @@ from raps.utils import next_arrival_byconfargs, create_casename, convert_to_seco class Telemetry: """A class for handling telemetry data, including reading/parsing job data, and loading/saving snapshots.""" + dataloader: Optional[ModuleType] def __init__(self, **kwargs): self.kwargs = kwargs @@ -98,15 +103,17 @@ class Telemetry: list_of_job_dicts = data['jobs'].tolist() for job_info in list_of_job_dicts: jobs.append(Job(job_info)) - if hasattr(data, 'timestep_start'): + if 'timestep_start' in data: timestep_start = int(data['timestep_start']) else: timestep_start = 0 - if hasattr(data, 'timestep_end'): + if 'timestep_end' in data: timestep_end = int(data['timestep_end']) else: timestep_end = np.inf - if hasattr(data, 'args'): + print(timestep_end) + exit() + if 'args' in data: args_from_file = data['args'].tolist() else: args_from_file = None @@ -124,31 +131,35 @@ class Telemetry: job_info = job_dict(nodes_required=line.get('num_nodes').item(), name=line.get('name').item(), account=line.get('account').item(), - cpu_trace=None, - gpu_trace=None, - ntx_trace=None, - nrx_trace=None, - # end_state=line.get('end_state').item(), - end_state=None, - scheduled_nodes=json.loads(line.get('scheduled_nodes').item()), + current_state=line.get('current_state').item(), + end_state=line.get('end_state').item(), + scheduled_nodes=line.get('scheduled_nodes').item(), id=line.get('id').item(), - # priority=line.get('priority').item(), - priority=None, - # partition=line.get('partition').item(), - partition=None, + priority=line.get('priority').item(), + partition=line.get('partition').item(), + cpu_cores_required=line.get('cpu_cores_required').item(), + gpu_units_required=line.get('gpu_units_required').item(), + allocated_cpu_cores=line.get('allocated_cpu_cores').item(), + allocated_gpu_units=line.get('allocated_gpu_units').item(), + + cpu_trace=line.get('cpu_trace'), + gpu_trace=line.get('cpu_trace'), + ntx_trace=line.get('cpu_trace'), + nrx_trace=line.get('cpu_trace'), submit_time=line.get('submit_time').item(), + time_limit=line.get('time_limit').item(), start_time=line.get('start_time').item(), end_time=line.get('end_time').item(), - # wall_time=line.get('wall_time').item(), - wall_time=line.get('end_time').item() - line.get('start_time').item(), - # trace_time=line.get('trace_time').item(), - trace_time=None, + expected_run_time=line.get('expected_run_time').item(), + current_run_time=line.get('current_run_time').item(), + trace_time=line.get('trace_time'), # trace_start_time=line.get('trace_start_time').item(), - trace_start_time=None, + trace_start_time=line.get('trace_start_time'), # trace_end_time=line.get('trace_end_time').item(), - trace_end_time=None, - # trace_missing_values=line.get('trace_missing_values').item(), - trace_missing_values=None + trace_end_time=line.get('trace_end_time'), + trace_quanta=line.get('trace_quanta').item(), + trace_missing_values=line.get('trace_missing_values'), + downscale=line.get('downscale'), ) job = Job(job_info) jobs.append(job) @@ -161,10 +172,17 @@ class Telemetry: def load_data(self, files): """Load telemetry data using custom data loaders.""" + assert self.dataloader return self.dataloader.load_data(files, **self.kwargs) + def load_live_data(self): + """Load telemetry data using custom data loaders.""" + assert self.dataloader + return self.dataloader.load_live_data(**self.kwargs) + def load_data_from_df(self, *args, **kwargs): """Load telemetry data using custom data loaders.""" + assert self.dataloader return self.dataloader.load_data_from_df(*args, **kwargs) def load_data_from_csv(self, file, *args, **kwargs): @@ -201,16 +219,24 @@ class Telemetry: def node_index_to_name(self, index: int): """ Convert node index into a name""" + assert self.dataloader return self.dataloader.node_index_to_name(index, config=self.config) def cdu_index_to_name(self, index: int): """ Convert cdu index into a name""" + assert self.dataloader return self.dataloader.cdu_index_to_name(index, config=self.config) def cdu_pos(self, index: int) -> tuple[int, int]: """ Return (row, col) tuple for a cdu index """ + assert self.dataloader return self.dataloader.cdu_pos(index, config=self.config) + def load_jobs_times_args_from_live_system(self): + jobs, timestep_start, timestep_end = self.load_live_data() + # data_args = None + return jobs, timestep_start, timestep_end + def load_jobs_times_args_from_files(self, *, files, args, config, downscale=1): """ Load all files as combined jobs """ # Read telemetry data (either npz file or via custom data loader) @@ -292,7 +318,16 @@ def run_telemetry(): config = get_system_config(args.system).get_legacy() args_dict['config'] = config td = Telemetry(**args_dict) - if args.replay: + + if args.live and not args.replay: + td = Telemetry(**args_dict) + jobs, timestep_start, timestep_end = \ + td.load_jobs_times_args_from_live_system() + if args.output: + td.save_snapshot(jobs=jobs, timestep_start=timestep_start, + timestep_end=timestep_end, args=args, filename=td.dirname) + + elif args.replay: jobs, timestep_start, timestep_end, _ = \ td.load_jobs_times_args_from_files(files=args.replay, args=args, @@ -304,17 +339,19 @@ def run_telemetry(): timesteps = timestep_end - timestep_start - dt_list = [] - wt_list = [] - nr_list = [] + dt_list = [] # arrival time ??? + tl_list = [] # time limit + ert_list = [] # expected run time + nr_list = [] # nodes required submit_times = [] end_times = [] last = 0 for job in jobs: - wt_list.append(job.wall_time) + tl_list.append(job.time_limit) + ert_list.append(job.expected_run_time) nr_list.append(job.nodes_required) submit_times.append(job.submit_time) - end_times.append(job.submit_time + job.wall_time) + end_times.append(job.submit_time + job.time_limit) if job.submit_time > 0: dt = job.submit_time - last dt_list.append(dt) @@ -323,14 +360,18 @@ def run_telemetry(): print(job) dt_list = [item for item in dt_list if item is not None] nr_list = [item for item in nr_list if item is not None] - wt_list = [item for item in wt_list if item is not None] + tl_list = [item for item in tl_list if item is not None] + ert_list = [item for item in ert_list if item is not None] print(f'Number of jobs: {len(jobs)}') print(f'Simulation will run for {timesteps} seconds') if dt_list: print(f'Average job arrival time is: {np.mean(dt_list):.2f}s') - if wt_list: - print(f'Average wall time is: {np.mean(wt_list):.2f}s') + if tl_list: + print(f'Average time limit is: {np.mean(tl_list):.2f}s') + if ert_list: + print(f'Average expected runtime is: {np.mean(ert_list):.2f}s') + if nr_list: print(f'Nodes required (avg): {np.mean(nr_list):.2f}') print(f'Nodes required (max): {np.max(nr_list)}') diff --git a/raps/ui.py b/raps/ui.py index 376f168..d9c3bbe 100644 --- a/raps/ui.py +++ b/raps/ui.py @@ -2,6 +2,7 @@ import sys import os import pandas as pd import numpy as np +from datetime import datetime from rich.align import Align from rich.console import Console from rich.layout import Layout @@ -135,7 +136,7 @@ class LayoutManager: # Build the column headers # columns = ["JOBID", "WALL TIME", "NAME", "ACCOUNT", "ST"] - columns = ["JOBID", "WALL TIME", "NAME", "ACCOUNT", "ST", "NODES"] + columns = ["JOBID", "TIME LIMIT", "NAME", "ACCOUNT", "ST", "NODES"] if show_slowdown: columns.append("SLOW DOWN") else: @@ -144,7 +145,7 @@ class LayoutManager: else: columns.append("SEGMENT") # NODE SEGMENTS - columns.append("TIME") + columns.append("WALL TIME") # Create table with bold magenta headers table = Table(title="Job Queue", header_style="bold magenta", expand=True) @@ -195,11 +196,11 @@ class LayoutManager: row = [ str(job.id).zfill(5), - convert_seconds_to_hhmm(job.wall_time // self.engine.downscale), + convert_seconds_to_hhmm(job.time_limit // self.engine.downscale), # str(job.wall_time), str(job.name), str(job.account), - job.state.value, + job.current_state.value, str(job.nodes_required), nodes_display, running_time_str @@ -223,7 +224,8 @@ class LayoutManager: down_nodes, avg_net_util, slowdown, - time_delta): + time_delta, + timestep_start=0): """ Updates the status information table with the provided system status data. @@ -243,25 +245,43 @@ class LayoutManager: List of nodes that are down. """ # Define columns with header styles - columns = [ - "Time", "Jobs Running", "Jobs Queued", - "Active Nodes", "Free Nodes", "Down Nodes", "Speed"] + columns = [] + time_header = "Time" + if timestep_start != 0: # append time simulated + time_header += " (+Sim)" + columns.append(time_header) + columns.append("Jobs Running") + columns.append("Jobs Queued") + columns.append("Active Nodes") + columns.append("Free Nodes") + columns.append("Down Nodes") + columns.append("Speed") + if self.simulate_network: columns.extend(("Net Util (%)", "Slowdown per job")) table = Table(header_style="bold magenta", expand=True) for col in columns: table.add_column(col, justify="center") + row = [] # Add data row with white values - row = [ - convert_seconds_to_hhmmss(time // self.engine.downscale), - str(nrun), - str(nqueue), - str(active_nodes), - str(free_nodes), - str(len(down_nodes)), - f"{time_delta}x" - ] + time_in_s = time // self.engine.downscale + if (time_in_s < 946684800): # Introducing Y2K into our codebase! Kek + time_str = convert_seconds_to_hhmm(time_in_s) + else: + # For the curious: If the simulation time in seconds is large than + # unix timestamp for Jan 2000 this is a unix timestamp, + time_str = f"{datetime.fromtimestamp(time_in_s).strftime("%Y-%m-%d %H:%M")}" + if timestep_start != 0: # append time simulated + time_str += f"\nSim: {convert_seconds_to_hhmm(time_in_s - timestep_start)}" + + row.append(time_str) + row.append(str(nrun)) + row.append(str(nqueue)) + row.append(str(active_nodes)) + row.append(str(free_nodes)) + row.append(str(len(down_nodes))) + row.append(f"{time_delta}x") if self.simulate_network: row.append(f"{avg_net_util * 100:.0f}%") row.append(f"{slowdown:.1f}x") @@ -501,7 +521,7 @@ class LayoutManager: self.progress.update(self.progress_task, description=f"{timestamp}", advance=timestamp, transient=True) self.layout["progress"].update(self.progress.get_renderable()) - def update_full_layout(self, data: TickData, time_delta=1): + def update_full_layout(self, data: TickData, time_delta=1, timestep_start=0): if self.debug: return uncertainties = self.engine.power_manager.uncertainties @@ -514,13 +534,6 @@ class LayoutManager: ) self.update_pressflow_array(data.fmu_outputs) - self.update_scheduled_jobs(data.running + data.queue) - self.update_status( - data.current_timestep, len(data.running), len(data.queue), data.num_active_nodes, - data.num_free_nodes, data.down_nodes, data.avg_net_util, data.slowdown_per_job, - data.time_delta - ) - self.update_scheduled_jobs(data.running + data.queue) self.update_status( @@ -532,7 +545,8 @@ class LayoutManager: data.down_nodes, data.avg_net_util, data.slowdown_per_job, - data.time_delta + data.time_delta, + timestep_start=timestep_start ) self.update_power_array( @@ -555,7 +569,7 @@ class LayoutManager: time_delta, autoshutdown=True)): if data and (not self.debug and not self.noui): - self.update_full_layout(data, time_delta) + self.update_full_layout(data, time_delta, timestep_start=timestep_start) # self.update_progress_bar(i-last_i) # last_i=i if not self.debug and not self.noui: diff --git a/raps/workload.py b/raps/workload.py index cedb52f..1e6b562 100644 --- a/raps/workload.py +++ b/raps/workload.py @@ -70,12 +70,12 @@ class Workload: def compute_traces(self, cpu_util: float, gpu_util: float, - wall_time: int, + expected_run_time: int, trace_quanta: int ) -> tuple[np.ndarray, np.ndarray]: """ Compute CPU and GPU traces based on mean CPU & GPU utilizations and wall time. """ - cpu_trace = cpu_util * np.ones(int(wall_time) // trace_quanta) - gpu_trace = gpu_util * np.ones(int(wall_time) // trace_quanta) + cpu_trace = cpu_util * np.ones(int(expected_run_time) // trace_quanta) + gpu_trace = gpu_util * np.ones(int(expected_run_time) // trace_quanta) return (cpu_trace, gpu_trace) def job_arrival_distribution_draw_poisson(self, args, config): @@ -227,7 +227,7 @@ class Workload: time_limit=time_limit, start_time=start_time, end_time=end_time, - wall_time=wall_time, trace_time=wall_time, + expected_run_time=wall_time, trace_time=wall_time, trace_start_time=0, trace_end_time=wall_time, cpu_cores_required=cpu_cores_required, gpu_units_required=gpu_units_required, @@ -260,7 +260,7 @@ class Workload: time_limit=random.randint(43200, 43200), start_time=0, end_time=et, - wall_time=et)) + expected_run_time=et)) else: new_job = Job(job_dict(nodes_required=1, name="LLM", @@ -275,7 +275,7 @@ class Workload: time_limit=43200, start_time=0, end_time=7200, - wall_time=random.randint(60, 7200))) + expected_run_time=random.randint(60, 7200))) jobs.append(new_job) return jobs @@ -407,7 +407,7 @@ class Workload: time_limit=time_limit, start_time=time_to_next_job, end_time=time_to_next_job + wall_time, - wall_time=wall_time, trace_time=wall_time, + expected_run_time=wall_time, trace_time=wall_time, trace_start_time=0, trace_end_time=wall_time, trace_quanta=config['TRACE_QUANTA'] * downscale, downscale=downscale @@ -454,7 +454,7 @@ class Workload: time_limit=job_time + 1, start_time=0, end_time=job_time, - wall_time=job_time, + expected_run_time=job_time, trace_time=job_time, trace_start_time=0, trace_end_time=job_time, @@ -496,7 +496,7 @@ class Workload: submit_time=0, start_time=0, end_time=job_time, - wall_time=job_time, + expected_run_time=job_time, trace_time=job_time, trace_start_time=0, trace_end_time=job_time, @@ -541,7 +541,7 @@ class Workload: time_limit=job_time + 1, start_time=0, end_time=job_time, - wall_time=job_time, + expected_run_time=job_time, trace_time=job_time, trace_start_time=0, trace_end_time=job_time, @@ -572,7 +572,7 @@ class Workload: time_limit=job_time + 1, start_time=10800, end_time=14200, - wall_time=job_time, + expected_run_time=job_time, trace_time=job_time, trace_start_time=0, trace_end_time=job_time, @@ -602,7 +602,7 @@ class Workload: time_limit=job_time + 1, start_time=14200, end_time=17800, - wall_time=job_time, + expected_run_time=job_time, trace_time=job_time, trace_start_time=0, trace_end_time=job_time, @@ -631,7 +631,7 @@ class Workload: time_limit=job_time + 1, start_time=17800, end_time=21400, - wall_time=job_time, + expected_run_time=job_time, trace_time=job_time, trace_start_time=0, trace_end_time=job_time, @@ -652,7 +652,7 @@ def plot_job_hist(jobs, config=None, dist_split=None, gantt_nodes=False): split = dist_split y = [y.nodes_required for y in jobs] - x = [x.wall_time for x in jobs] + x = [x.expected_run_time for x in jobs] x2 = [x.time_limit for x in jobs] fig_m = plt.figure() gs = fig_m.add_gridspec(30, 1) @@ -751,7 +751,7 @@ def plot_job_hist(jobs, config=None, dist_split=None, gantt_nodes=False): axs[1][1].tick_params(axis="y", labelleft=False) # Submit_time and Wall_time - duration = [x.wall_time for x in jobs] + duration = [x.expected_run_time for x in jobs] nodes_required = [x.nodes_required for x in jobs] submit_t = [x.submit_time for x in jobs] @@ -787,7 +787,7 @@ def plot_job_hist(jobs, config=None, dist_split=None, gantt_nodes=False): # ax_b labels: ax_b.set_xlabel("time [hh:mm]") minx_s = 0 - maxx_s = math.ceil(max([x.wall_time for x in jobs]) + max([x.submit_time for x in jobs])) + maxx_s = math.ceil(max([x.expected_run_time for x in jobs]) + max([x.submit_time for x in jobs])) x_label_mins = [n for n in np.arange(minx_s // 60, maxx_s // 60)] x_label_ticks = [n * 60 for n in x_label_mins[0::60]] x_label_str = [str(x1).zfill(2) + ":" + str(x2).zfill(2) for @@ -887,7 +887,7 @@ def run_workload(): plot_job_hist(jobs, config=config, dist_split=args.multimodal, gantt_nodes=args.gantt_nodes) if args.output: timestep_start = min([x.submit_time for x in jobs]) - timestep_end = math.ceil(max([x.submit_time for x in jobs]) + max([x.wall_time for x in jobs])) + timestep_end = math.ceil(max([x.submit_time for x in jobs]) + max([x.expected_run_time for x in jobs])) filename = create_file_indexed('wl', create=False, ending="npz").split(".npz")[0] # savez_compressed add npz itself, but create_file_indexed needs to check for .npz to find existing files np.savez_compressed(filename, jobs=jobs, timestep_start=timestep_start, timestep_end=timestep_end, args=args) @@ -956,7 +956,7 @@ def run_workload(): time_limit=wall_time, start_time=0, end_time=wall_time, - wall_time=wall_time, + expected_run_time=wall_time, trace_time=wall_time, trace_start_time=0, trace_end_time=wall_time, @@ -992,7 +992,7 @@ def run_workload(): time_limit=wall_time, start_time=0, end_time=wall_time, - wall_time=wall_time, + expected_run_time=wall_time, trace_time=wall_time, trace_start_time=0, trace_end_time=wall_time, @@ -1026,7 +1026,7 @@ def run_workload(): time_limit=wall_time, start_time=offset, end_time=offset + wall_time, - wall_time=wall_time, + expected_run_time=wall_time, trace_time=wall_time, trace_start_time=0, trace_end_time=wall_time, -- GitLab From 34ea2f794c61f0c61cef85ca4af8628e4a883759 Mon Sep 17 00:00:00 2001 From: Matthias Maiterth Date: Fri, 22 Aug 2025 09:47:45 -0400 Subject: [PATCH 245/388] Fixed bug if no fastforward or time was specified --- raps/telemetry.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/raps/telemetry.py b/raps/telemetry.py index 08ce3d9..e78a40a 100644 --- a/raps/telemetry.py +++ b/raps/telemetry.py @@ -256,13 +256,14 @@ class Telemetry: jobs_from_file, timestep_start_from_file, timestep_end_from_file, args_from_file = self.load_snapshot( file) if args_from_file is not None: - print("File was generated with:" - f"\n--system {args_from_file.system} " - f"-ff {args_from_file.fastforward} " - f"-t {args_from_file.time}\n" - f"All Args:\n{args_from_file}" - "To use these set them from the commandline!" - ) + print(f"File was generated with:" + f"\n--system {args_from_file.system} ") + if hasattr(args_from_file, 'fastforward'): + print(f"-ff {args_from_file.fastforward} ") + if hasattr(args_from_file, 'time'): + print(f"-t {args_from_file.time}") + print(f"All Args:\n{args_from_file}" + "\nTo use these set them from the commandline!") else: print("No generation arguments extracted from input file!") # Args are usually extracted to tell the users how to reporduce results. -- GitLab From 235c1fba95fb8ceb0f7792377359f347418cbbbd Mon Sep 17 00:00:00 2001 From: "Hines, Jesse" Date: Mon, 25 Aug 2025 17:34:11 +0000 Subject: [PATCH 246/388] Refactor args/simulation config --- .flake8 | 2 +- README.md | 16 +- main.py | 4 +- multi-part-sim-mpi.py | 28 +- multi-part-sim.py | 26 +- pyproject.toml | 2 +- raps/args.py | 310 ---------------- raps/dataloaders/adastraMI250.py | 2 +- raps/dataloaders/frontier.py | 5 +- raps/dataloaders/lassen.py | 8 +- raps/dataloaders/marconi100.py | 2 +- raps/downtime.py | 2 +- raps/raps_config.py | 35 ++ raps/sim_config.py | 331 ++++++++++++++++++ raps/{config.py => system_config.py} | 158 ++++++--- raps/telemetry.py | 12 +- raps/utils.py | 192 +++++++--- raps/workload.py | 80 +---- scripts/marconi100-day51.sh | 8 +- scripts/meta_run.sh | 2 +- tests/systems/test_main_network_run.py | 2 +- .../systems/test_main_network_withdata_run.py | 2 +- tests/systems/test_main_time_delta_run.py | 7 +- .../test_main_time_delta_sub_second_run.py | 9 +- tests/systems/test_main_time_ff_delta_run.py | 2 +- tests/systems/test_main_withdata_run.py | 2 +- .../systems/test_multi_part_sim_basic_run.py | 1 - .../test_multi_part_sim_network_run.py | 3 +- tests/systems/test_workload_synthetic.py | 1 - tests/test_main.py | 1 - tests/test_system_config.py | 10 - tests/unit/test_system_config.py | 26 ++ tests/unit/test_utils.py | 42 +++ 33 files changed, 752 insertions(+), 581 deletions(-) delete mode 100644 raps/args.py create mode 100644 raps/raps_config.py create mode 100644 raps/sim_config.py rename raps/{config.py => system_config.py} (54%) delete mode 100644 tests/test_system_config.py create mode 100644 tests/unit/test_system_config.py create mode 100644 tests/unit/test_utils.py diff --git a/.flake8 b/.flake8 index bd48511..ce4ab0a 100644 --- a/.flake8 +++ b/.flake8 @@ -1,3 +1,3 @@ [flake8] -exclude = .git, __pycache__, venv*, simulation_results, third_party +exclude = .git, __pycache__, venv*, simulation_results, third_party, models max-line-length = 120 diff --git a/README.md b/README.md index f46e23a..665a64a 100644 --- a/README.md +++ b/README.md @@ -13,7 +13,7 @@ Instructions for setup and usage are given below. An online documentation of Exa ## Setup environment -Note: Requires python3.11 or greater. +Note: Requires python3.12 or greater. pip install -e . @@ -30,7 +30,7 @@ Note: Requires python3.11 or greater. # Frontier DATEDIR="date=2024-01-18" DPATH=~/data/frontier-sample-2024-01-18 - python main.py -f $DPATH/slurm/joblive/$DATEDIR $DPATH/jobprofile/$DATEDIR + python main.py -f $DPATH/slurm/joblive/$DATEDIR,$DPATH/jobprofile/$DATEDIR ## Open Telemetry dataset @@ -46,7 +46,7 @@ For Adastra MI250 supercomputer, download 'AdastaJobsMI250_15days.parquet' from For Google cluster trace v2 - python main.py --system gcloudv2 -f ~/data/gcloud/v2/google_cluster_data_2011_sample -ff 600 + python main.py --system gcloudv2 -f ~/data/gcloud/v2/google_cluster_data_2011_sample --ff 600 # analyze dataset python -m raps.telemetry --system gcloudv2 -f ~/data/gcloud/v2/google_cluster_data_2011_sample -v @@ -83,7 +83,7 @@ For Lumi Lassen is one of the few datasets that has networking data. See `raps/dataloaders/lassen.py` for how to get the datasets. To run a network simulation, use the following command: - python main.py -f ~/data/lassen/Lassen-Supercomputer-Job-Dataset --system lassen --policy fcfs --backfill firstfit -ff 365d -t 12h --arrival poisson -net + python main.py -f ~/data/lassen/Lassen-Supercomputer-Job-Dataset --system lassen --policy fcfs --backfill firstfit --ff 365d -t 12h --arrival poisson --net ## Snapshot of extracted workload data @@ -140,10 +140,10 @@ This will dump a .npz file with a randomized name, e.g. ac23db.npz. Let's rename There are three ways to modify replaying of telemetry data: 1. `--arrival`. Changing the arrival time distribution - replay cases will default to `--arrival prescribed`, where the jobs will be submitted exactly as they were submitted on the physical machine. This can be changed to `--arrival poisson` to change when the jobs arrive, which is especially useful in cases where there may be gaps in time, e.g., when the system goes down for several days, or the system is is underutilized. -python main.py -f $DPATH/slurm/joblive/$DATEDIR $DPATH/jobprofile/$DATEDIR --arrival poisson +python main.py -f $DPATH/slurm/joblive/$DATEDIR,$DPATH/jobprofile/$DATEDIR --arrival poisson 2. `--policy`. Changing the way the jobs are scheduled. The `--policy` flag will be set by default to `replay` in cases where a telemetry file is provided, in which case the jobs will be scheduled according to the start times provided. Changing the `--policy` to `fcfs` or `backfill` will use the internal scheduler. -python main.py -f $DPATH/slurm/joblive/$DATEDIR $DPATH/jobprofile/$DATEDIR --policy fcfs --backfill firstfit -t 12h +python main.py -f $DPATH/slurm/joblive/$DATEDIR,$DPATH/jobprofile/$DATEDIR --policy fcfs --backfill firstfit -t 12h 3. `--scale`. Changing the scale of each job in the telemetry data. The `--scale` flag will specify the maximum number of nodes for each job (generally set this to the max number of nodes of the smallest partition), and randomly select the number of nodes for each job from one to max nodes. This flag is useful when replaying telemetry from a larger system onto a smaller system. @@ -151,11 +151,11 @@ python main.py -f $DPATH/slurm/joblive/$DATEDIR $DPATH/jobprofile/$DATEDIR --pol ## Job-level power output example for replay of single job - python main.py -f $DPATH/slurm/joblive/$DATEDIR $DPATH/jobprofile/$DATEDIR --jid 1234567 -o + python main.py -f $DPATH/slurm/joblive/$DATEDIR,$DPATH/jobprofile/$DATEDIR --jid 1234567 -o ## Compute stats on telemetry data, e.g., average job arrival time - python -m raps.telemetry -f $DPATH/slurm/joblive/$DATEDIR $DPATH/jobprofile/$DATEDIR + python -m raps.telemetry -f $DPATH/slurm/joblive/$DATEDIR,$DPATH/jobprofile/$DATEDIR ## Build and run Docker container diff --git a/main.py b/main.py index 9655b1b..c3ba946 100644 --- a/main.py +++ b/main.py @@ -16,7 +16,7 @@ import math # from raps.helpers import check_python_version # -from raps.config import get_system_config +from raps.system_config import get_system_config from raps.constants import OUTPUT_PATH, SEED from raps.cooling import ThermoFluidsModel from raps.ui import LayoutManager @@ -45,7 +45,7 @@ from raps.stats import ( print_formatted_report ) -from raps.args import args, args_dict +from raps.sim_config import args, args_dict check_python_version() diff --git a/multi-part-sim-mpi.py b/multi-part-sim-mpi.py index fed038e..eabb19b 100644 --- a/multi-part-sim-mpi.py +++ b/multi-part-sim-mpi.py @@ -8,18 +8,16 @@ stats for heterogeneous systems (e.g., LUMI, Setonix, Adastra). from tqdm import tqdm from mpi4py import MPI -from raps.utils import convert_to_seconds, next_arrival +from raps.utils import next_arrival from raps.workload import Workload from raps.telemetry import Telemetry from raps.power import PowerManager, compute_node_power from raps.flops import FLOPSManager from raps.engine import Engine from raps.ui import LayoutManager -from raps.config import get_system_config, CONFIG_PATH -from args import args +from raps.system_config import get_partition_configs +from raps.sim_config import args import random -import os -import glob from raps.helpers import check_python_version check_python_version() @@ -29,20 +27,10 @@ def main(): rank = comm.Get_rank() size = comm.Get_size() - # 1) Expand “partitions” (on rank 0) if the user used a glob: - if rank == 0: - partition_names = args.partitions - if '*' in partition_names[0]: - paths = glob.glob(os.path.join(CONFIG_PATH, partition_names[0])) - partition_names = [os.path.join(*p.split(os.sep)[-2:]) for p in paths] - else: - partition_names = None - - # 2) Broadcast the final list of partition_names to everyone - partition_names = comm.bcast(partition_names, root=0) - # 3) Load configs for every partition (all ranks do this) - configs = [get_system_config(p).get_legacy() for p in partition_names] + multi_config = get_partition_configs(args.partitions) + partition_names = multi_config.partition_names + configs = [c.get_legacy() for c in multi_config.partitions] args_dicts = [{**vars(args), 'config': cfg} for cfg in configs] # 4) Each rank decides which partition‐indices it owns (round-robin): @@ -122,12 +110,12 @@ def main(): # 9) Compute timestep_start / timestep_end (all ranks agree): if args.fastforward: - fastforward = convert_to_seconds(args.fastforward) + fastforward = args.fastforward else: fastforward = 0 if args.time: - timesteps = convert_to_seconds(args.time) + timesteps = args.time else: timesteps = 88200 # default 24 hours diff --git a/multi-part-sim.py b/multi-part-sim.py index 350462e..587dffb 100644 --- a/multi-part-sim.py +++ b/multi-part-sim.py @@ -9,33 +9,27 @@ statistics for systems such as MIT Supercloud, Setonix, Adastra, and LUMI. from tqdm import tqdm from raps.stats import get_engine_stats, get_job_stats, get_scheduler_stats, get_network_stats -from raps.utils import convert_to_seconds, next_arrival +from raps.utils import next_arrival from raps.workload import Workload from raps.telemetry import Telemetry from raps.power import PowerManager, compute_node_power from raps.flops import FLOPSManager from raps.engine import Engine from raps.ui import LayoutManager -from raps.config import get_system_config, CONFIG_PATH -from raps.args import args +from raps.system_config import get_partition_configs +from raps.sim_config import args import random import os -import glob from raps.helpers import check_python_version check_python_version() # Load configurations for each partition -partition_names = args.partitions +multi_config = get_partition_configs(args.partitions) +partition_names = multi_config.partition_names +configs = [c.get_legacy() for c in multi_config.partitions] +args.system = multi_config.system_name -print(args.partitions) -if '*' in args.partitions[0]: - paths = glob.glob(os.path.join(CONFIG_PATH, args.partitions[0].replace("'", ""))) - partition_names = [os.path.join(*p.split(os.sep)[-2:]) for p in paths] - - args.system = partition_names[0].split(os.sep)[0] - -configs = [get_system_config(partition).get_legacy() for partition in partition_names] args_dicts = [ {**vars(args), 'config': config, 'partition': partition_names[i]} for i, config in enumerate(configs) @@ -123,11 +117,11 @@ for i, (config, ad) in enumerate(zip(configs, args_dicts)): # Set simulation timesteps if args.fastforward: - fastfoward = convert_to_seconds(args.fastforward) + fastfoward = args.fastforward else: fastforward = 0 if args.time: - timesteps = convert_to_seconds(args.time) + timesteps = args.time else: timesteps = 88200 # Default to 24 hours @@ -135,7 +129,7 @@ timestep_start = fastforward timestep_end = timestep_start + timesteps if args.time_delta: - time_delta = convert_to_seconds(args.time_delta) + time_delta = args.time_delta else: time_delta = config['TRACE_QUANTA'] diff --git a/pyproject.toml b/pyproject.toml index d46a621..b7fbb99 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -5,7 +5,7 @@ build-backend = "hatchling.build" [project] name = "raps" version = "0.0.1" -requires-python = ">=3.11" +requires-python = ">=3.12" description = "RAPS" readme = "README.md" # license = {file = "LICENSE.txt"} diff --git a/raps/args.py b/raps/args.py deleted file mode 100644 index 9c4142b..0000000 --- a/raps/args.py +++ /dev/null @@ -1,310 +0,0 @@ -import argparse -import os -import sys -import yaml -from raps.schedulers.default import PolicyType, BackfillType - -from raps.workload import add_workload_to_parser, check_workload_args -from raps.utils import convert_to_seconds - - -def load_config(path): - if path and os.path.exists(path): - with open(path, "r") as f: - return yaml.safe_load(f) or {} - return {} - - -def _expand_path(p): - if isinstance(p, str): - # expand ~ and $VARS - return os.path.expanduser(os.path.expandvars(p)) - return p - - -def apply_config_to_args(cfg, args): - # Merge supported sections or top-level keys - merged = {} - for k, v in (cfg or {}).items(): - if isinstance(v, dict) and k in { - "shared", "simulate", "telemetry", "scheduler", "output" - }: - merged.update(v) - else: - # Enter the commandline argument, but _underscores as the -dashes - # are replaced when reading from the commandline, but not in the yaml. - merged[k.replace('-', '_')] = v - - # Apply to argparse namespace - for k, v in merged.items(): - setattr(args, k, v) - - # Coerce certain keys to lists if YAML provided strings - list_keys = { - "cluster_var", "output_vars", "input_vars", "partitions", "plot" - } - for key in list_keys: - if hasattr(args, key): - val = getattr(args, key) - if isinstance(val, str): - setattr(args, key, [val]) - - # Expand paths (tilde + env vars) - for key in ("path", "output_dir", "plot_dir", "config_file"): - if hasattr(args, key): - setattr(args, key, _expand_path(getattr(args, key))) - - # Normalize enums if provided as strings in YAML - if getattr(args, "policy", None): - try: - # Accept exact values or case-insensitive - val = str(args.policy) - opts = {p.value.lower(): p.value for p in PolicyType} - if val.lower() in opts: - args.policy = opts[val.lower()] - except Exception: - pass - - if getattr(args, "backfill", None): - try: - val = str(args.backfill) - opts = {b.value.lower(): b.value for b in BackfillType} - if val.lower() in opts: - args.backfill = opts[val.lower()] - except Exception: - pass - - -parser = argparse.ArgumentParser( - description="Resource Allocator & Power Simulator (RAPS)", - formatter_class=argparse.ArgumentDefaultsHelpFormatter, -) -parser.add_argument( - "config_file", nargs="?", default=None, - help="YAML config file; overrides defaults/flags." -) - -# System configurations -parser.add_argument("--system", type=str, default="frontier", - help="System config to use") -parser.add_argument( - "-x", "--partitions", nargs="+", default=None, - help="List of machine configurations, e.g., -x setonix-cpu setonix-gpu" -) -parser.add_argument("-c", "--cooling", action="store_true", - help="Include FMU cooling model") -parser.add_argument("-net", "--simulate-network", default=False, - action="store_true", help="Include Network model") -parser.add_argument("--noui", default=False, action="store_true", - help="Run without UI") - -# Simulation runtime options -parser.add_argument("-ff", "--fastforward", type=str, default=None, - help="Fast-forward by time amount (uses same units as -t)") -parser.add_argument("-t", "--time", type=str, default=None, - help="Length of time to simulate, e.g., 123, 27m, 3h, 7d") -parser.add_argument("--time-delta", type=str, default="1s", - help="Step size, e.g., 15s, 1m, 1h, 1ms (default: 1s)") -parser.add_argument("-d", "--debug", action="store_true", - help="Enable debug mode and disable rich layout") -parser.add_argument("-n", "--numjobs", type=int, default=100, - help="Number of jobs to schedule") -parser.add_argument("-v", "--verbose", action="store_true", - help="Enable verbose output") -parser.add_argument("--start", type=str, default="2021-05-21T13:00", - help="ISO8601 start of simulation") -parser.add_argument("--end", type=str, default="2021-05-21T14:00", - help="ISO8601 end of simulation") -parser.add_argument("--seed", action="store_true", - help="Set RNG seed for deterministic simulation") -parser.add_argument( - "-u", "--uncertainties", action="store_true", - help=("Use float-with-uncertainties (much slower).") -) - -# UI -ui_layout_choices = ["layout1", "layout2"] -parser.add_argument("--layout", type=str, choices=ui_layout_choices, - default=ui_layout_choices[0], help="UI layout") - -# Output -parser.add_argument('-o', '--output', type=str, nargs="?", - const="", # Used if -o is given without a value - default=None, # Used if -o is not provided at all - help=("Output power, cooling, and loss models for later " - "analysis. Argumment specifies name.") - ) -plot_choices = ["power", "loss", "pue", "temp", "util"] -parser.add_argument("-p", "--plot", nargs="+", choices=plot_choices, - help="Plots to generate") -img_choices = ["png", "svg", "jpg", "pdf", "eps"] -parser.add_argument("--imtype", type=str, choices=img_choices, - default=img_choices[0], help="Plot image type") - -# Telemetry -parser.add_argument( - "-f", "--replay", nargs="+", type=str, - help=("Either: path/to/joblive path/to/jobprofile OR filename.npz " - "(overrides --workload)") -) -parser.add_argument("-e", "--encrypt", action="store_true", - help="Encrypt sensitive data in telemetry") -parser.add_argument("--validate", action="store_true", - help="Use node power instead of CPU/GPU utilizations") -parser.add_argument("--jid", type=str, default="*", - help="Replay job id") -parser.add_argument("--scale", type=int, default=0, - help=("Scale telemetry to a smaller target system, " - "e.g., --scale 192")) -parser.add_argument("--live", action="store_true", - help="Grab data from live system.") - - -# Synthetic workloads -parser = add_workload_to_parser(parser) - -# Scheduling -sched_choices = ["default", "scheduleflow", "nrel", "anl", "flux", - "experimental", "multitenant"] -parser.add_argument("--scheduler", type=str, choices=sched_choices, - default=sched_choices[0], help="Scheduler name") -parser.add_argument("--policy", type=str, default=None, - help=f"Schedule policy: {[p.value for p in PolicyType]}") -parser.add_argument("--backfill", type=str, default=None, - help=f"Backfill policy: {[b.value for b in BackfillType]}") - -# Arrival -arr_choices = ["prescribed", "poisson"] -parser.add_argument("--arrival", default=arr_choices[0], type=str, - choices=arr_choices, - help=("Modify arrival distribution (poisson) or use " - "original submit times (prescribed)")) -parser.add_argument("--job-arrival-time", type=int, - help=("Poisson arrival (seconds). Overrides " - "config/*/scheduler.json")) -parser.add_argument("--job-arrival-rate", type=float, - help="Modify Poisson rate (default 1)") - -# Accounts -parser.add_argument("--accounts", action="store_true", - help="Track accounts") -parser.add_argument("--accounts-json", type=str, - help="Accounts JSON from previous run") - -# Downtime -parser.add_argument("--downtime-first", type=str, default=None, - help="First downtime, e.g., after 123, 27m, 3h, 7d") -parser.add_argument("--downtime-interval", type=str, default=None, - help="Interval between downtimes, e.g., every 123, 27m, 3h, 7d") -parser.add_argument("--downtime-length", type=str, default=None, - help="Downtime length, e.g., 123, 27m, 3h, 7d") - -# Continous Job Generation -parser.add_argument("--continuous-job-generation", action="store_true", - help="Activate continuous job generation.") -parser.add_argument("--maxqueue", type=int, default=50, - help="Specify the max queue length for continuous job generation.") - - -def post_process_args(args): - if args.time_delta: - tdelta_raw, tdelta_down = convert_to_seconds(args.time_delta) - else: - tdelta_raw, tdelta_down = None, 1 - - if args.time: - time_raw, time_down = convert_to_seconds(args.time) - else: - time_raw, time_down = None, 1 - - if args.fastforward: - ff_raw, ff_down = convert_to_seconds(args.fastforward) - else: - ff_raw, ff_down = None, 1 - - if args.downtime_first: - dtf_raw, dtf_down = convert_to_seconds(args.downtime_first) - if args.downtime_interval: - dti_raw, dti_down = convert_to_seconds(args.downtime_interval) - if args.downtime_length: - dtl_raw, dtl_down = convert_to_seconds(args.downtime_length) - - max_down = max(tdelta_down, time_down, ff_down) - args.downscale = max_down - - if args.time_delta: - args.time_delta = int((tdelta_raw / tdelta_down) * max_down) - if args.time: - args.time = int((time_raw / time_down) * max_down) - if args.fastforward: - args.fastforward = int((ff_raw / ff_down) * max_down) - - if args.downtime_first: - args.downtime_first = int((dtf_raw / dtf_down) * max_down) - if args.downtime_interval: - args.downtime_interval = int((dti_raw / dti_down) * max_down) - if args.downtime_length: - args.downtime_length = int((dtl_raw / dtl_down) * max_down) - - return args - - -# ---- Parse + YAML merge ---- -args = parser.parse_args() - -# Config file existence check -if args.config_file and not os.path.isfile(args.config_file): - print(f"Error: '{args.config_file}' not found.", file=sys.stderr) - sys.exit(1) - -cfg = load_config(args.config_file) - -apply_config_to_args(cfg, args) - -# Optional: format fileprefix after config merge (if provided by workload parser) -if hasattr(args, "fileprefix") and isinstance(args.fileprefix, str): - try: - args.fileprefix = args.fileprefix.format(**vars(args)) - except KeyError as e: - print(f"Warning: missing placeholder {e} in fileprefix; skipping.") - -# Expand paths inside list fields (e.g., replay) -if getattr(args, "replay", None): - if isinstance(args.replay, str): - args.replay = [args.replay] - args.replay = [_expand_path(p) for p in args.replay] - -# Prefer replay if both replay and workload got set -if getattr(args, "replay", None) and getattr(args, "workload", None): - print("Info: --replay provided; ignoring --workload.", file=sys.stderr) - print("Info: --replay provided; ignoring --workload.", file=sys.stderr) - args.workload = None - -# Enforce valid policy/backfill values (after normalization in apply_config_to_args) -if getattr(args, "policy", None): - _valid_policies = {p.value for p in PolicyType} - if args.policy not in _valid_policies: - sys.exit(f"Error: Unknown policy '{args.policy}'. " - f"Valid: {sorted(_valid_policies)}") -if getattr(args, "backfill", None): - _valid_backfills = {b.value for b in BackfillType} - if args.backfill not in _valid_backfills: - sys.exit(f"Error: Unknown backfill '{args.backfill}'. " - f"Valid: {sorted(_valid_backfills)}") - -# Multi-partition guard for single-part driver (check merged args incl. CLI) -if os.path.basename(sys.argv[0]) == "main.py": - _parts = args.partitions or [] - if isinstance(_parts, str): - _parts = [_parts] - if len(_parts) > 1: - sys.exit("Error: Use multi-part-sim.py for multi-partition runs.") - -# Validate workload args before time conversions -check_workload_args(args) - -# Convert time-like args and compute downscale -args = post_process_args(args) - -# Expose dict form -args_dict = vars(args) diff --git a/raps/dataloaders/adastraMI250.py b/raps/dataloaders/adastraMI250.py index f0f576b..90201c8 100644 --- a/raps/dataloaders/adastraMI250.py +++ b/raps/dataloaders/adastraMI250.py @@ -12,7 +12,7 @@ python main.py -f /path/to/AdastaJobsMI250_15days.parquet --system adastraMI250 --policy priority --backfill easy # to fast-forward 60 days and replay for 1 day - python main.py -f /path/to/AdastaJobsMI250_15days.parquet --system adastraMI250 -ff 60d -t 1d + python main.py -f /path/to/AdastaJobsMI250_15days.parquet --system adastraMI250 --ff 60d -t 1d # to analyze dataset python -m raps.telemetry -f /path/to/AdastaJobsMI250_15days.parquet --system adastraMI250 -v diff --git a/raps/dataloaders/frontier.py b/raps/dataloaders/frontier.py index 506dab9..8491617 100644 --- a/raps/dataloaders/frontier.py +++ b/raps/dataloaders/frontier.py @@ -4,12 +4,11 @@ # To simulate DATEDIR="date=2024-01-18" DPATH=/path/to/data - python main.py -f $DPATH/slurm/joblive/$DATEDIR $DPATH/jobprofile/$DATEDIR + python main.py -f $DPATH/slurm/joblive/$DATEDIR,$DPATH/jobprofile/$DATEDIR # To analyze the data - python -m raps.telemetry -f $DPATH/slurm/joblive/$DATEDIR $DPATH/jobprofile/$DATEDIR + python -m raps.telemetry -f $DPATH/slurm/joblive/$DATEDIR,$DPATH/jobprofile/$DATEDIR """ -import ast import time import numpy as np import pandas as pd diff --git a/raps/dataloaders/lassen.py b/raps/dataloaders/lassen.py index f777f79..c9aae0d 100644 --- a/raps/dataloaders/lassen.py +++ b/raps/dataloaders/lassen.py @@ -23,7 +23,7 @@ Usage Instructions: python main.py -f /path/to/LAST/Lassen-Supercomputer-Job-Dataset --system lassen --arrival poisson # to fast-forward 365 days and replay for 1 day. This region day has 2250 jobs with 1650 jobs executed. - python main.py -f /path/to/LAST/Lassen-Supercomputer-Job-Dataset --system lassen -ff 365d -t 1d + python main.py -f /path/to/LAST/Lassen-Supercomputer-Job-Dataset --system lassen --ff 365d -t 1d # For the network replay this command gives suiteable snapshots: python main.py -f /path/to/LAST/Lassen-Supercomputer-Job-Dataset --system lassen --policy fcfs --backfill firstfit -t 12h --arrival poisson # noqa @@ -38,7 +38,7 @@ from tqdm import tqdm from datetime import timedelta from ..job import job_dict, Job -from ..utils import power_to_utilization, next_arrival_byconfkwargs, convert_to_seconds +from ..utils import power_to_utilization, next_arrival_byconfkwargs, parse_td def load_data(path, **kwargs): @@ -80,7 +80,7 @@ def load_data_from_df(allocation_df, node_df, step_df, **kwargs): time_to_simulate = 31536000 # a year time_to_simulate_timedelta = timedelta(seconds=time_to_simulate) # timedelta else: - time_to_simulate_timedelta = timedelta(seconds=convert_to_seconds(time_to_simulate)) # timedelta + time_to_simulate_timedelta = parse_td(time_to_simulate) # timedelta telemetry_start_timestamp = allocation_df['begin_timestamp'].min() telemetry_start_time = 0 @@ -190,7 +190,7 @@ def load_data_from_df(allocation_df, node_df, step_df, **kwargs): ib_tx_per_node = total_ib_tx / n # average bytes per node ib_rx_per_node = total_ib_rx / n # average bytes per node - # net_tx, net_rx = [],[] # generate_network_sequences generates errors (e.g. -ff 800d -t 1d ) + # net_tx, net_rx = [],[] # generate_network_sequences generates errors (e.g. --ff 800d -t 1d ) # net_tx, net_rx = generate_network_sequences(ib_tx, ib_rx, samples, lambda_poisson=0.3) net_tx, net_rx = throughput_traces(ib_tx_per_node, ib_rx_per_node, samples) diff --git a/raps/dataloaders/marconi100.py b/raps/dataloaders/marconi100.py index 6222c7c..fef8ec0 100644 --- a/raps/dataloaders/marconi100.py +++ b/raps/dataloaders/marconi100.py @@ -16,7 +16,7 @@ python main.py -f /path/to/job_table.parquet --system marconi100 --policy priority --backfill firstfit # to fast-forward 60 days and replay for 1 day - python main.py -f /path/to/job_table.parquet --system marconi100 -ff 60d -t 1d + python main.py -f /path/to/job_table.parquet --system marconi100 --ff 60d -t 1d # to analyze dataset python -m raps.telemetry -f /path/to/job_table.parquet --system marconi100 -v diff --git a/raps/downtime.py b/raps/downtime.py index 7c5bf1f..97c9139 100644 --- a/raps/downtime.py +++ b/raps/downtime.py @@ -1,7 +1,7 @@ from __future__ import annotations from typing import TYPE_CHECKING from raps.job import JobState -from raps.args import args +from raps.sim_config import args, sim_config import numpy as np diff --git a/raps/raps_config.py b/raps/raps_config.py new file mode 100644 index 0000000..6eddca8 --- /dev/null +++ b/raps/raps_config.py @@ -0,0 +1,35 @@ +from pathlib import Path +from raps.utils import ExpandedPath +from pydantic_settings import BaseSettings, SettingsConfigDict, YamlConfigSettingsSource +ROOT_DIR = Path(__file__).parent.parent + + +class RapsConfig(BaseSettings): + """ + General settings for raps. Pydantic will automatically populate this model from env vars or a + .env file. + """ + # TODO I think we should move more of general/ui related settings from SimConfig into here. + # We'll be using SimConfig in the simulation server and those settings aren't applicable there, + # so it makes sense to keep SimConfig scoped to the logical operation of the sim. + + system_config_dir: ExpandedPath = ROOT_DIR / 'config' + """ Directory containing system configuration files """ + + model_config = SettingsConfigDict( + yaml_file="raps_config.yaml", + env_prefix='raps_', + env_nested_delimiter='__', + nested_model_default_partial_update=True, + ) + + # Customize setting sources, we'll use yaml config file instead of the default .env + @classmethod + def settings_customise_sources( + cls, settings_cls, + init_settings, env_settings, dotenv_settings, file_secret_settings, + ): + return (init_settings, env_settings, YamlConfigSettingsSource(settings_cls),) + + +raps_config = RapsConfig() diff --git a/raps/sim_config.py b/raps/sim_config.py new file mode 100644 index 0000000..127cec3 --- /dev/null +++ b/raps/sim_config.py @@ -0,0 +1,331 @@ +import argparse +import sys +import yaml +from datetime import timedelta +from pathlib import Path +from typing import Literal +from raps.schedulers.default import PolicyType, BackfillType + +from raps.utils import ( + parse_time_unit, convert_to_time_unit, infer_time_unit, ExpandedPath, + pydantic_add_args, yaml_dump, parse_td, +) + +from pydantic import BaseModel, model_validator, computed_field +from pydantic_settings import SettingsConfigDict + +Distribution = Literal['uniform', 'weibull', 'normal'] + + +class SimConfig(BaseModel): + system: str | None = None + """ System config to use """ + partitions: list[str] = [] + """ List of multiple system configurations for a multi-partition run. Can contain wildcards """ + + cooling: bool = False + """ Include the FMU cooling model """ + simulate_network: bool = False + """ Include network model """ + + # Simulation runtime options + fastforward: int | None = None + """ + Fast-forward by time amount (unit specified by `time_unit`, default seconds). + Can pass a string like 15s, 1m, 1h + """ + time: int | None = None + """ + Length of time to simulate (unit specified by `time_unit`, default seconds). + Can pass a string like 123, 27m, 3h, 7d + """ + time_delta: int = 1 + """ + Step size (unit specified by `time_unit`, default seconds). + Can pass a string like 15s, 1m, 1h, 1ms + """ + time_unit: timedelta + """ + Units all time delta ints are measured in (default seconds) + """ + + @computed_field + @property + def downscale(self) -> int: + return int(timedelta(seconds=1) / self.time_unit) + + start: str = "2021-05-21T13:00" + """ ISO8601 start of simulation """ + end: str = "2021-05-21T14:00" + """ ISO8601 end of simulation """ + + numjobs: int = 100 + """ Number of jobs to schedule """ + + uncertainties: bool = False + """ Use float-with-uncertainties (much slower) """ + + seed: bool = False + """ Set RNG seed for deterministic simulation """ + output: ExpandedPath | None = None + """ Output power, cooling, and loss models for later analysis. Argument specifies name. """ + + debug: bool = False + """ Enable debug mode and disable rich layout """ + noui: bool = False + """ Run without UI """ + verbose: bool = False + """ Enable verbose output """ + layout: Literal["layout1", "layout2"] = "layout1" + """ UI layout """ + plot: list[Literal["power", "loss", "pue", "temp", "util"]] | None = None + """ Plots to generate """ + + imtype: Literal["png", "svg", "jpg", "pdf", "eps"] = "png" + """ Plot image type """ + + replay: list[ExpandedPath] | None = None + """ Either: path/to/joblive path/to/jobprofile OR filename.npz """ + + encrypt: bool = False + """ Encrypt sensitive data in telemetry """ + + power_scope: Literal['node', 'chip'] = "chip" + """ node mode will use node power instead of CPU/GPU utilizations """ + + jid: str = "*" + """ Replay job id """ + + scale: int = 0 + """ Scale telemetry to a smaller target system, --scale 192 """ + + live: bool = False + """ Grab data from live system. """ + + # Workload arguments (TODO split into separate model) + workload: Literal['random', 'benchmark', 'peak', 'idle', 'synthetic', 'multitenant'] | None = None + + """ Type of synthetic workload """ + multimodal: list[float] = [1.0] + """ + Percentage to draw from each distribution (list of floats). e.g. '0.2 0.8' percentages apply + in order to the list of the --distribution argument list. + """ + # Jobsize + jobsize_distribution: list[Distribution] | None = None + """ Distribution type """ + jobsize_normal_mean: float | None = None + """ Mean (mu) for Normal distribution """ + jobsize_normal_stddev: float | None = None + """ Standard deviation (sigma) for Normal distribution """ + jobsize_weibull_shape: float | None = None + """ Jobsize shape of weibull """ + jobsize_weibull_scale: float | None = None + """ Jobsize scale of weibull """ + jobsize_is_of_degree: int | None = None + """ Draw jobsizes from distribution of degree N (squared,cubed). """ + jobsize_is_power_of: int | None = None + """ Draw jobsizes from distribution of power of N (2->2^x,3->3^x). """ + + # Walltime + walltime_distribution: list[Distribution] | None = None + """ Distribution type """ + walltime_normal_mean: float | None = None + """ Walltime mean (mu) for Normal distribution """ + walltime_normal_stddev: float | None = None + """ Walltime standard deviation (sigma) for Normal distribution """ + walltime_weibull_shape: float | None = None + """ Walltime shape of weibull """ + walltime_weibull_scale: float | None = None + """ Walltime scale of weibull """ + # Utilizations (TODO should probably make a reusable "Distribution" submodel) + cpuutil_distribution: list[Distribution] = ['uniform'] + """ Distribution type """ + cpuutil_normal_mean: float | None = None + """ Walltime mean (mu) for Normal distribution """ + cpuutil_normal_stddev: float | None = None + """ Walltime standard deviation (sigma) for Normal distribution """ + cpuutil_weibull_shape: float | None = None + """ Walltime shape of weibull """ + cpuutil_weibull_scale: float | None = None + """ Walltime scale of weibull """ + gpuutil_distribution: list[Distribution] = ['uniform'] + """ Distribution type """ + gpuutil_normal_mean: float | None = None + """ Walltime mean (mu) for Normal distribution """ + gpuutil_normal_stddev: float | None = None + """ Walltime standard deviation (sigma) for Normal distribution """ + gpuutil_weibull_shape: float | None = None + """ Walltime shape of weibull """ + gpuutil_weibull_scale: float | None = None + """ Walltime scale of weibull """ + gantt_nodes: bool = False + """ Print Gannt with nodes required as line thickness (default false) """ + + # Synthetic workloads + scheduler: Literal[ + "default", "scheduleflow", "nrel", "anl", "flux", "experimental", "multitenant", + ] = "default" + """ Scheduler name """ + policy: PolicyType | None = None + """ Schedule policy """ + backfill: BackfillType | None = None + """ Backfill policy """ + + # Arrival + arrival: Literal["prescribed", "poisson"] = "prescribed" + """ Modify arrival distribution (poisson) or use original submit times (prescribed) """ + job_arrival_time: int | None = None + """ Poisson arrival (seconds). Overrides system config scheduler.job_arrival_time """ + job_arrival_rate: float | None = None # TODO define default here + """ Modify Poisson rate (default 1) """ + + # Accounts + accounts: bool = False + accounts_json: ExpandedPath | None = None + """ Path to accounts JSON file from previous run """ + + # Downtime + downtime_first: int | None = None + """ + First downtime (unit specified by `time_unit`, default seconds). + Can pass a string like 27m, 3h, 7d + """ + downtime_interval: str | None = None + """ + Interval between downtimes (unit specified by `time_unit`, default seconds). + Can pass a string like 123, 27m, 3h, 7d + """ + downtime_length: str | None = None + """ + Downtime length (unit specified by `time_unit`, default seconds). + Can pass a string like 123, 27m, 3h, 7d + """ + + # Continous Job Generation + continuous_job_generation: bool = False + """ Activate continuous job generation """ + maxqueue: int = 50 + """ Specify the max queue length for continuous job generation """ + + @model_validator(mode="before") + def _parse_times(cls, data): + time_fields = [ + "time_delta", "time", "fastforward", + "downtime_first", "downtime_interval", "downtime_length", + ] + + if data.get('time_unit') is not None: + time_unit = parse_time_unit(data['time_unit']) + input_time_unit = time_unit + else: + time_unit = min( + [infer_time_unit(data[f]) for f in time_fields if data.get(f)], + default=timedelta(seconds=1) + ) + # When "inferring" time unit interpret raw numbers as seconds. + # E.g. `-t 10 --time-delta 1ds` should be `-t 10s --time-delta 1ds` + input_time_unit = timedelta(seconds=1) + + data['time_unit'] = time_unit + for field in time_fields: + if data.get(field) is not None: + td = parse_td(data[field], input_time_unit) + data[field] = convert_to_time_unit(td, time_unit) + + return data + + @model_validator(mode="after") + def _validate(self): + if self.system and self.partitions: + raise ValueError("system and partitions are mutually exclusive") + elif not self.system and not self.partitions: + self.system = "frontier" + + if not self.replay and not self.workload: + self.workload = "random" + + if self.jobsize_is_power_of is not None and self.jobsize_is_of_degree is not None: + raise ValueError("jobsize_is_power_of and jobsize_is_of_degree are mutually exclusive") + + return self + + def get_legacy_args(self): + """ + Return as an argparse.Namespace object for backwards compatability + """ + return argparse.Namespace(**self.get_legacy_args_dict()) + + def get_legacy_args_dict(self): + """ + Return as a dict object. This is for backwards compatibility with the rest of RAPS code so + we can migrate to the new config gradually. The dict also has a "sim_config" key that + contains the SimConfig object itself. + """ + args_dict = self.model_dump(mode="json") + # validate has been renamed to power_scope + args_dict['validate'] = args_dict["power_scope"] == "node" + + # Convert Path objects to str + if args_dict['output']: + args_dict['output'] = str(args_dict['output']) + if args_dict['replay']: + args_dict['replay'] = [str(p) for p in args_dict['replay']] + if args_dict['accounts_json']: + args_dict['accounts_json'] = str(args_dict['accounts_json']) + + args_dict['sim_config'] = self + return args_dict + + +def parse_args(cli_args=None) -> SimConfig: + parser = argparse.ArgumentParser( + description="Resource Allocator & Power Simulator (RAPS)", + allow_abbrev=False, + formatter_class=argparse.ArgumentDefaultsHelpFormatter, + ) + parser.add_argument( + "config_file", nargs="?", default=None, + help=( + 'YAML sim config file, can be used to configure an experiment instead of using CLI ' + + 'flags. Pass "-" to read from stdin.' + ) + ) + + model_validate_args = pydantic_add_args(parser, SimConfig, model_config=SettingsConfigDict( + cli_implicit_flags=True, + cli_kebab_case=True, + cli_shortcuts={ + "partitions": "x", + "cooling": "c", + "simulate-network": "net", + "fastforward": "ff", + "time": "t", + "debug": "d", + "numjobs": "n", + "verbose": "v", + "output": "o", + "uncertainties": "u", + "plot": "p", + "replay": "f", + "workload": "w", + }, + )) + + args = parser.parse_args(cli_args) + if args.config_file == "-": + config_file_data = yaml.safe_load(sys.stdin.read()) + elif args.config_file: + config_file_data = yaml.safe_load(Path(args.config_file).read_text()) + else: + config_file_data = {} + + return model_validate_args(args, config_file_data) + + +sim_config = parse_args() +args = sim_config.get_legacy_args() +args_dict = sim_config.get_legacy_args_dict() + +if __name__ == "__main__": + print(yaml_dump(sim_config.model_dump(mode="json"))) diff --git a/raps/config.py b/raps/system_config.py similarity index 54% rename from raps/config.py rename to raps/system_config.py index 4f9f709..e458c68 100644 --- a/raps/config.py +++ b/raps/system_config.py @@ -1,14 +1,15 @@ -import os, functools +import functools +import glob +import fnmatch from typing import Any, Literal from pathlib import Path import yaml -from pydantic import BaseModel, computed_field, model_validator - -ROOT_DIR = Path(__file__).parent.parent -CONFIG_PATH = Path(os.environ.get("RAPS_CONFIG", ROOT_DIR / 'config')).resolve() +from pydantic import BaseModel, computed_field, model_validator, field_validator +from raps.raps_config import raps_config # Define Pydantic models for the config to handle parsing and validation + class SystemSystemConfig(BaseModel): num_cdus: int racks_per_cdu: int @@ -27,8 +28,8 @@ class SystemSystemConfig(BaseModel): gpu_peak_flops: float cpu_fp_ratio: float gpu_fp_ratio: float - threads_per_core: int|None = None - cores_per_cpu: int|None = None + threads_per_core: int | None = None + cores_per_cpu: int | None = None @model_validator(mode='after') def _update_down_nodes(self): @@ -76,15 +77,16 @@ class SystemSystemConfig(BaseModel): def available_nodes(self) -> int: return self.total_nodes - len(self.down_nodes) + class SystemPowerConfig(BaseModel): power_gpu_idle: float power_gpu_max: float power_cpu_idle: float power_cpu_max: float power_mem: float - power_nic: float|None = None - power_nic_idle: float|None = None - power_nic_max: float|None = None + power_nic: float | None = None + power_nic_idle: float | None = None + power_nic_max: float | None = None power_nvme: float power_switch: float power_cdu: float @@ -96,6 +98,7 @@ class SystemPowerConfig(BaseModel): rectifier_efficiency: float power_cost: float + class SystemUqConfig(BaseModel): power_gpu_uncertainty: float power_cpu_uncertainty: float @@ -107,8 +110,10 @@ class SystemUqConfig(BaseModel): power_switch_uncertainty: float rectifier_power_uncertainty: float + JobEndStates = Literal["COMPLETED", "FAILED", "CANCELLED", "TIMEOUT", "NODE_FAIL"] + class SystemSchedulerConfig(BaseModel): job_arrival_time: int mtbf: int @@ -120,11 +125,12 @@ class SystemSchedulerConfig(BaseModel): job_end_probs: dict[JobEndStates, float] multitenant: bool = False + class SystemCoolingConfig(BaseModel): cooling_efficiency: float wet_bulb_temp: float - zip_code: str|None = None - country_code: str|None = None + zip_code: str | None = None + country_code: str | None = None fmu_path: str fmu_column_mapping: dict[str, str] w_htwps_key: str @@ -132,70 +138,91 @@ class SystemCoolingConfig(BaseModel): w_cts_key: str temperature_keys: list[str] + class SystemNetworkConfig(BaseModel): topology: Literal["fat-tree", "dragonfly", "torus3d"] network_max_bw: float - latency: float|None = None + latency: float | None = None + + fattree_k: int | None = None - fattree_k: int|None = None + dragonfly_d: int | None = None + dragonfly_a: int | None = None + dragonfly_p: int | None = None - dragonfly_d: int|None = None - dragonfly_a: int|None = None - dragonfly_p: int|None = None + torus_x: int | None = None + torus_y: int | None = None + torus_z: int | None = None + torus_wrap: bool | None = None + torus_link_bw: float | None = None + torus_routing: str | None = None - torus_x: int|None = None - torus_y: int|None = None - torus_z: int|None = None - torus_wrap: bool|None = None - torus_link_bw: float|None = None - torus_routing: str|None = None + hosts_per_router: int | None = None + latency_per_hop: float | None = None + node_coords_csv: str | None = None - hosts_per_router: int|None = None - latency_per_hop: float|None = None - node_coords_csv: str|None = None class SystemConfig(BaseModel): system_name: str + """ Name of the system, defaults to the yaml file name """ + system: SystemSystemConfig power: SystemPowerConfig scheduler: SystemSchedulerConfig - uq: SystemUqConfig|None = None - cooling: SystemCoolingConfig|None = None - network: SystemNetworkConfig|None = None + uq: SystemUqConfig | None = None + cooling: SystemCoolingConfig | None = None + network: SystemNetworkConfig | None = None def get_legacy(self) -> dict[str, Any]: """ Return the system config as a flattened, uppercased dict. This is for backwards compatibility with the rest of RAPS code so we can migrate to the new config format - gradually. The dict also as a "config" key that contains the SystemConfig object itself. + gradually. The dict also as a "system_config" key that contains the SystemConfig object + itself. """ - renames = { # fields that need to be renamed to something other than just .upper() + renames = { # fields that need to be renamed to something other than just .upper() "system_name": "system_name", "w_htwps_key": "W_HTWPs_KEY", "w_ctwps_key": "W_CTWPs_KEY", "w_cts_key": "W_CTs_KEY", "multitenant": "multitenant", } - dump = self.model_dump(mode = "json", exclude_none = True) + dump = self.model_dump(mode="json", exclude_none=True) config_dict: dict[str, Any] = {} - for k, v in dump.items(): # flatten + for k, v in dump.items(): # flatten if isinstance(v, dict): config_dict.update(v) else: config_dict[k] = v # rename keys config_dict = {renames.get(k, k.upper()): v for k, v in config_dict.items()} - config_dict['config'] = self + config_dict['system_config'] = self return config_dict +class MultiPartitionSystemConfig(BaseModel): + system_name: str + partitions: list[SystemConfig] + + @field_validator("partitions") + def _validate_partitions(cls, partitions: list[SystemConfig]): + partition_names = [c.system_name for c in partitions] + if len(set(partition_names)) != len(partition_names): + raise ValueError(f"Duplicate system names: {','.join(partition_names)}") + return partitions + + @property + def partition_names(self): + return [c.system_name for c in self.partitions] + + @functools.cache def list_systems() -> list[str]: """ Lists all available systems """ return sorted([ - str(p.relative_to(CONFIG_PATH)).removesuffix(".yaml") - for p in CONFIG_PATH.rglob("*.yaml") + str(p.relative_to(raps_config.system_config_dir)).removesuffix(".yaml") + for p in raps_config.system_config_dir.rglob("*.yaml") ]) @@ -204,20 +231,57 @@ def get_system_config(system: str) -> SystemConfig: """ Returns the system config as a Pydantic object. system can either be a path to a custom .yaml file, or the name of one of the pre-configured - systems defined in RAPS_CONFIG. + systems defined in RAPS_SYSTEM_CONFIG_DIR. """ - config_path = Path(system.removesuffix(".yaml") + ".yaml") - if config_path.exists() or config_path.is_absolute(): - system_name = config_path.resolve() - else: # assume it's a pre-configured system - system_name = system.removesuffix(".yaml") - config_path = CONFIG_PATH / config_path + if system in list_systems(): + config_path = raps_config.system_config_dir / f"{system}.yaml" + system_name = system + else: + config_path = Path(system).resolve() + system_name = config_path.stem + if not config_path.is_file(): - raise FileNotFoundError( - f'"{system}" not found. Known systems are: {list_systems()}' - ) + raise FileNotFoundError(f'"{system}" not found. Valid systems are: {list_systems()}') config = { - "system_name": system_name, + "system_name": system_name, # You can override system_name in the yaml as well **yaml.safe_load(config_path.read_text()), } return SystemConfig.model_validate(config) + + +def get_partition_configs(partitions: list[str]) -> MultiPartitionSystemConfig: + """ + Resolves multiple partition config files. Can pass globs, or directories to include all yaml + files under the directory. + """ + systems = list_systems() + multi_partition_systems = set(s.split("/")[0] for s in systems if "/" in s) + combined_system_name = [] + + parsed_configs: list[SystemConfig] = [] + for pat in partitions: + if pat in multi_partition_systems: + matched_systems = fnmatch.filter(systems, f"{pat}/*") + combined_system_name.append(pat) + elif fnmatch.filter(systems, pat): + matched_systems = fnmatch.filter(systems, pat) + combined_system_name.extend(s.split("/")[0] for s in matched_systems) + elif Path(pat).is_dir(): + matched_systems = sorted(Path(pat).glob("*.yaml")) + combined_system_name.append(Path(pat).name) + else: + matched_systems = sorted(glob.glob(pat)) + combined_system_name.extend(Path(s).stem for s in matched_systems) + + if not matched_systems: + raise FileNotFoundError(f'No config files match "{pat}"') + parsed_configs.extend(get_system_config(s) for s in sorted(matched_systems)) + + if len(parsed_configs) == 1: + combined_system_name = parsed_configs[0].system_name + else: + combined_system_name = "+".join(dict.fromkeys(combined_system_name)) # dedup, keep order + return MultiPartitionSystemConfig( + system_name=combined_system_name, + partitions=parsed_configs, + ) diff --git a/raps/telemetry.py b/raps/telemetry.py index e78a40a..c6815cd 100644 --- a/raps/telemetry.py +++ b/raps/telemetry.py @@ -18,7 +18,7 @@ from types import ModuleType if __name__ == "__main__": - # from raps.args import args,args_dict + # from raps.sim_config import args, args_dict parser = argparse.ArgumentParser(description='Telemetry data validator') parser.add_argument('--jid', type=str, default='*', help='Replay job id') parser.add_argument('-f', '--replay', nargs='+', type=str, @@ -49,7 +49,7 @@ import pandas as pd from tqdm import tqdm # from rich.progress import track -from raps.config import get_system_config +from raps.system_config import get_system_config from raps.job import Job, job_dict import matplotlib.pyplot as plt from raps.plotting import ( @@ -57,8 +57,8 @@ from raps.plotting import ( plot_nodes_gantt, plot_network_histogram ) -from raps.utils import next_arrival_byconfargs, create_casename, convert_to_seconds -# from raps.args import args, args_dict +from raps.utils import next_arrival_byconfargs, create_casename, convert_to_time_unit +# from raps.sim_config import args, args_dict class Telemetry: @@ -259,7 +259,7 @@ class Telemetry: print(f"File was generated with:" f"\n--system {args_from_file.system} ") if hasattr(args_from_file, 'fastforward'): - print(f"-ff {args_from_file.fastforward} ") + print(f"--ff {args_from_file.fastforward} ") if hasattr(args_from_file, 'time'): print(f"-t {args_from_file.time}") print(f"All Args:\n{args_from_file}" @@ -308,7 +308,7 @@ class Telemetry: timestep_end=timestep_end, args=args, filename=self.dirname) if args.time: - timestep_end = timestep_start + convert_to_seconds(args.time) + timestep_end = timestep_start + convert_to_time_unit(args.time) elif not timestep_end: timestep_end = int(max(job.wall_time + job.start_time for job in jobs)) + 1 diff --git a/raps/utils.py b/raps/utils.py index bbac74d..f54cc71 100644 --- a/raps/utils.py +++ b/raps/utils.py @@ -8,17 +8,22 @@ generating random numbers, summarizing and expanding ranges, determining job sta from datetime import timedelta from enum import Enum - import os import hashlib import math +import re import numpy as np import pandas as pd import random import sys import uuid import json - +import argparse +from pathlib import Path +from typing import Annotated as A, TypeVar, Callable +from pydantic import BaseModel, TypeAdapter, AfterValidator +from pydantic_settings import BaseSettings, SettingsConfigDict, CliApp, CliSettingsSource +import yaml from raps.job import Job @@ -455,52 +460,70 @@ def next_arrival(lambda_rate, reset=False, start_time=0): return next_arrival.next_time -def convert_to_seconds(time_str): - if isinstance(time_str, (int, float)): - return time_str # this happens.... - # Define the conversion factors - time_factors = { - 'd': 86400, # 1 day = 86400 seconds - 'h': 3600, # 1 hour = 3600 seconds - 'm': 60, # 1 minute = 60 seconds - 's': 1, # 1 second = 1 second - '': 1 # empty string = 1 second - } - downscale_factors = { - 'ms': 1000, - 'cs': 100, - 'ds': 10 - } - - # Check if the input string ends with a unit or is purely numeric - # and extract the numeric part and the time unit - if time_str[-1].isdigit(): - unit = '' - num_str = time_str[:] - else: - if time_str[-2].isdigit(): - unit = time_str[-1] - num_str = time_str[:-1] - else: - unit = time_str[-2:] - num_str = time_str[:-2] - - index = num_str.find(".") # convert int or float string - if index != -1: - num = float(num_str) - raise ValueError(f"Float not supported at this time: {num}{unit}") - - else: - num = int(num_str) - - # Convert to seconds using the conversion factors - if unit in time_factors: - return num * time_factors[unit], 1 - elif unit in downscale_factors: - downscale = downscale_factors[unit] - return num, downscale - else: - raise ValueError(f"Unknown time unit: {unit}") +TIME_UNITS = { + 'd': timedelta(days=1), + 'h': timedelta(hours=1), + 'm': timedelta(minutes=1), + 's': timedelta(seconds=1), + 'ds': timedelta(milliseconds=100), + 'cs': timedelta(milliseconds=10), + 'ms': timedelta(milliseconds=1), +} + + +def parse_time_unit(unit) -> timedelta: + parsed_unit = unit + if TypeAdapter(timedelta).validator.isinstance_python(unit): + parsed_unit = TypeAdapter(timedelta).validate_python(unit) + elif isinstance(unit, str): + parsed_unit = TIME_UNITS.get(unit) + if not isinstance(parsed_unit, timedelta): + raise ValueError(f"Invalid time unit {unit}") + if parsed_unit not in TIME_UNITS.values() or parsed_unit > TIME_UNITS['s']: + raise ValueError("Only time units of s, ds, cs, and ms are supported") + return parsed_unit + + +def parse_td(td, unit: str | timedelta = 's') -> timedelta: + """ Parse into a timedelta. Pass unit to interpret raw numbers as (default seconds) """ + unit = parse_time_unit(unit) + if TypeAdapter(int).validator.isinstance_python(td): + return unit * TypeAdapter(int).validate_python(td) + if TypeAdapter(timedelta).validator.isinstance_python(td): + return TypeAdapter(timedelta).validate_python(td) + if isinstance(td, str): + re_match = re.fullmatch(r"(\d+)\s*(\w+)", td.strip()) + if re_match and re_match[2] in TIME_UNITS: + num_str, unit_str = re_match.groups() + return int(num_str) * TIME_UNITS[unit_str] + raise ValueError(f"Invalid timedelta: {td}") + + +def convert_to_time_unit(td, unit: str | timedelta = 's'): + """ + Converts to integer number of time unit + Throws if the given time is less than the unit + """ + num = parse_td(td, unit) / parse_time_unit(unit) + if (num != 0 and num < 1) or not num.is_integer(): + raise ValueError(f"{td} is not divisible by time unit {unit}") + return int(num) + + +def infer_time_unit(td) -> timedelta: + """ Infers the time unit the user meant for the input string """ + parsed_td = parse_td(td) + time_unit = None + if isinstance(td, str): # infer unit from string, e.g. 1s or 200ms + re_match = re.fullmatch(r"(\d+)\s*(\w+)", td.strip()) + if re_match and re_match[2] in TIME_UNITS: + time_unit = TIME_UNITS[re_match[2]] + if not time_unit: + for unit in sorted(TIME_UNITS.values(), reverse=True): + if (parsed_td % unit).total_seconds() == 0: + time_unit = unit + break + return min(TIME_UNITS['s'], time_unit or TIME_UNITS['s']) def encrypt(name): @@ -604,3 +627,76 @@ class ValueComparableEnum(Enum): def __hash__(self): # required if you override __eq__ return hash(self.value) + + +ExpandedPath = A[Path, AfterValidator(lambda v: Path(v).expanduser().resolve())] +""" Type that that expands ~ and environment variables in a path string """ + + +T = TypeVar("T", bound=BaseModel) + + +def pydantic_add_args( + parser: argparse.ArgumentParser, model_cls: type[T], + model_config: SettingsConfigDict | None = None, +) -> Callable[[argparse.Namespace, dict | None], T]: + """ + Add arguments to the parser from the model. Returns a function that can be used to parse the + model from the argparse args. + + Normally you'd just configure Pydantic to just automatically create a BaseSettings object from + sys.argv and/or env variables. But we want a bit more control over the cli parser, and to use + the SimConfig model as a regular non-settings model in the simulation server. So here we do + some hacks to apply the args manually. + """ + model_config_dict = SettingsConfigDict({ + **(model_config or {}), + "cli_parse_args": False, # Don't automatically parse args + }) + + class SettingsModel(model_cls, BaseSettings): + @classmethod + def settings_customise_sources(cls, settings_cls, + init_settings, env_settings, dotenv_settings, file_secret_settings, + ): + return (init_settings,) # Don't load from env vars or anything else + + model_config = model_config_dict + + cli_settings_source = CliSettingsSource(SettingsModel, root_parser=parser) + + def model_validate_args(args: argparse.Namespace, data: dict | None = None): + model = CliApp.run(SettingsModel, + cli_args=args, + cli_settings_source=cli_settings_source, + **(data or {}), + ) + # Recreate model so we don't return the SettingsModel subclass + return model_cls.model_validate(model.model_dump()) + return model_validate_args + + +def yaml_dump(data): + """ Dumps yaml with pretty formatting """ + class IndentDumper(yaml.Dumper): + def represent_data(self, data): + # Quote all strings with special characters to avoid confusion + if ( + isinstance(data, str) and + (not re.fullmatch(r"[\w-]+", data) or data.isdigit()) and + "\n" not in data + ): + return self.represent_scalar('tag:yaml.org,2002:str', data, style='"') + return super(IndentDumper, self).represent_data(data) + + def increase_indent(self, flow=False, indentless=False): + # Indent lists + return super(IndentDumper, self).increase_indent(flow, False) + + return yaml.dump( + data, + Dumper=IndentDumper, + sort_keys=False, + indent=2, + allow_unicode=True, + ) diff --git a/raps/workload.py b/raps/workload.py index 1e6b562..151e2c3 100644 --- a/raps/workload.py +++ b/raps/workload.py @@ -711,7 +711,6 @@ def plot_job_hist(jobs, config=None, dist_split=None, gantt_nodes=False): axs[0][1].yaxis.tick_right() else: axs[0][1].set_yticks([]) - pass axs[0][1].hist(cpu_util, bins=100, orientation='vertical', zorder=1, density=True, color='tab:cyan') axs[0][1].axvline(np.mean(cpu_util), color='r', linewidth=1, zorder=3) axs[0][1].set(xlim=[0, config['CPUS_PER_NODE']]) @@ -800,83 +799,9 @@ def plot_job_hist(jobs, config=None, dist_split=None, gantt_nodes=False): plt.show() -def add_workload_to_parser(parser): - - choices = ['random', 'benchmark', 'peak', 'idle', 'synthetic', 'multitenant'] - parser.add_argument('-w', '--workload', type=str, choices=choices, - default=choices[0], help='Type of synthetic workload') - - parser.add_argument("--multimodal", default=[1.0], type=float, nargs="+", - help="Percentage to draw from each distribution " - "(list of floats)e.g. '0.2 0.8' percentages apply" - " in order to the list of the --distribution argument list.") - - # Jobsize: - parser.add_argument("--jobsize-distribution", type=str, nargs="+", - choices=['uniform', 'weibull', 'normal'], default=None, help='Distribution type') - - parser.add_argument("--jobsize-normal-mean", type=float, required=False, help="Mean (mu) for Normal distribution") - parser.add_argument("--jobsize-normal-stddev", type=float, required=False, - help="Standard deviation (sigma) for Normal distribution") - - parser.add_argument("--jobsize-weibull-shape", type=float, required=False, help="Jobsize shape of weibull") - parser.add_argument("--jobsize-weibull-scale", type=float, required=False, help="Jobsize scale of weibull") - - parser.add_argument("--jobsize-is-of-degree", default=None, type=int, required=False, - help="Draw jobsizes from distribution of degree N (squared,cubed).") - parser.add_argument("--jobsize-is-power-of", default=None, type=int, required=False, - help="Draw jobsizes from distribution of power of N (2->2^x,3->3^x).") - - # Walltime: - parser.add_argument("--walltime-distribution", type=str, nargs="+", - choices=['uniform', 'weibull', 'normal'], default=None, help='Distribution type') - - parser.add_argument("--walltime-normal-mean", type=float, required=False, - help="Walltime mean (mu) for Normal distribution") - parser.add_argument("--walltime-normal-stddev", type=float, required=False, - help="Walltime standard deviation (sigma) for Normal distribution") - - parser.add_argument("--walltime-weibull-shape", type=float, required=False, help="Walltime shape of weibull") - parser.add_argument("--walltime-weibull-scale", type=float, required=False, help="Walltime scale of weibull") - - # Utilizations - parser.add_argument("--cpuutil-distribution", type=str, nargs="+", - choices=['uniform', 'weibull', 'normal'], default=['uniform'], help='Distribution type') - - parser.add_argument("--cpuutil-normal-mean", type=float, required=False, - help="Walltime mean (mu) for Normal distribution") - parser.add_argument("--cpuutil-normal-stddev", type=float, required=False, - help="Walltime standard deviation (sigma) for Normal distribution") - - parser.add_argument("--cpuutil-weibull-shape", type=float, required=False, help="Walltime shape of weibull") - parser.add_argument("--cpuutil-weibull-scale", type=float, required=False, help="Walltime scale of weibull") - - parser.add_argument("--gpuutil-distribution", type=str, nargs="+", - choices=['uniform', 'weibull', 'normal'], default=['uniform'], help='Distribution type') - - parser.add_argument("--gpuutil-normal-mean", type=float, required=False, - help="Walltime mean (mu) for Normal distribution") - parser.add_argument("--gpuutil-normal-stddev", type=float, required=False, - help="Walltime standard deviation (sigma) for Normal distribution") - - parser.add_argument("--gpuutil-weibull-shape", type=float, required=False, help="Walltime shape of weibull") - parser.add_argument("--gpuutil-weibull-scale", type=float, required=False, help="Walltime scale of weibull") - - parser.add_argument("--gantt-nodes", default=False, action='store_true', required=False, - help="Print Gannt with nodes required as line thickness (default false)") - - return parser - - -def check_workload_args(args): - if (args.jobsize_is_power_of is not None and args.jobsize_is_of_degree is not None): - print("Choose either --jobsize-is-power-of or --jobsize-is-of-degree! Not both.") - exit(1) - - def run_workload(): - from raps.args import args, args_dict - from raps.config import get_system_config + from raps.sim_config import args, args_dict + from raps.system_config import get_system_config config = get_system_config(args.system).get_legacy() if args.replay: td = Telemetry(**args_dict) @@ -1045,7 +970,6 @@ def continuous_job_generation(*, engine, timestep, jobs): if len(engine.queue) <= engine.continuous_workload.args.maxqueue: new_jobs = engine.continuous_workload.generate_jobs() jobs.extend(new_jobs) - pass if __name__ == "__main__": diff --git a/scripts/marconi100-day51.sh b/scripts/marconi100-day51.sh index ae801a5..01da9a2 100644 --- a/scripts/marconi100-day51.sh +++ b/scripts/marconi100-day51.sh @@ -1,4 +1,4 @@ -python main.py -f ~/data/marconi100/job_table.parquet --system marconi100 -ff 4381000 -t 61000 -o --policy replay -python main.py -f ~/data/marconi100/job_table.parquet --system marconi100 -ff 4381000 -t 61000 -o --policy fcfs -python main.py -f ~/data/marconi100/job_table.parquet --system marconi100 -ff 4381000 -t 61000 -o --policy fcfs --backfill easy -python main.py -f ~/data/marconi100/job_table.parquet --system marconi100 -ff 4381000 -t 61000 -o --policy priority --backfill firstfit +python main.py -f ~/data/marconi100/job_table.parquet --system marconi100 --ff 4381000 -t 61000 -o --policy replay +python main.py -f ~/data/marconi100/job_table.parquet --system marconi100 --ff 4381000 -t 61000 -o --policy fcfs +python main.py -f ~/data/marconi100/job_table.parquet --system marconi100 --ff 4381000 -t 61000 -o --policy fcfs --backfill easy +python main.py -f ~/data/marconi100/job_table.parquet --system marconi100 --ff 4381000 -t 61000 -o --policy priority --backfill firstfit diff --git a/scripts/meta_run.sh b/scripts/meta_run.sh index 41f4831..0c60596 100755 --- a/scripts/meta_run.sh +++ b/scripts/meta_run.sh @@ -17,7 +17,7 @@ while [ $current_sec -le $end_sec ]; do DATEDIRS="date=$DATEDIR" # Construct the command with the formatted date - command="python main.py -d -o --plot power loss -f $DPATH/slurm/joblive/$DATEDIRS $DPATH/jobprofile/jobprofile/$DATEDIRS >& $DATEDIRS.out &" + command="python main.py -d -o --plot power loss -f $DPATH/slurm/joblive/$DATEDIRS,$DPATH/jobprofile/jobprofile/$DATEDIRS >& $DATEDIRS.out &" sleep 10 # Execute the command diff --git a/tests/systems/test_main_network_run.py b/tests/systems/test_main_network_run.py index f40cc8f..8b80d5d 100644 --- a/tests/systems/test_main_network_run.py +++ b/tests/systems/test_main_network_run.py @@ -24,7 +24,7 @@ def test_main_network_run(system, system_config, random_id): "python", "main.py", "--time", "1m", "--system", system, - "-net", + "--net", "-o", random_id ], capture_output=True, text=True, stdin=subprocess.DEVNULL) assert result.returncode == 0, f"Failed on {system}: {result.stderr}" diff --git a/tests/systems/test_main_network_withdata_run.py b/tests/systems/test_main_network_withdata_run.py index c478e21..82c30de 100644 --- a/tests/systems/test_main_network_withdata_run.py +++ b/tests/systems/test_main_network_withdata_run.py @@ -31,7 +31,7 @@ def test_main_run(system, system_config, system_file, random_id): "--time", "1m", "--system", system, "-f", *file_list, - "-net", + "--net", "-o", random_id ], capture_output=True, text=True, stdin=subprocess.DEVNULL) assert result.returncode == 0, f"Failed on {system}: {result.stderr}" diff --git a/tests/systems/test_main_time_delta_run.py b/tests/systems/test_main_time_delta_run.py index f86caa7..9cb87a2 100644 --- a/tests/systems/test_main_time_delta_run.py +++ b/tests/systems/test_main_time_delta_run.py @@ -3,8 +3,7 @@ import subprocess import gc import pytest from tests.util import PROJECT_ROOT -from raps.utils import convert_seconds_to_hhmmss -from raps.utils import convert_to_seconds +from raps.utils import convert_to_time_unit, convert_seconds_to_hhmmss pytestmark = [ @@ -38,8 +37,8 @@ def test_main_time_delta_run(system, system_config, time_arg, tdelta_arg, random "-o", random_id ], capture_output=True, text=True, stdin=subprocess.DEVNULL) assert result.returncode == 0, f"Failed on {system}: {result.stderr}" - time, downscale = convert_to_seconds(time_arg) - assert f"Time Simulated: {convert_seconds_to_hhmmss(time // downscale)}" in result.stdout + time = convert_to_time_unit(time_arg) + assert f"Time Simulated: {convert_seconds_to_hhmmss(time)}" in result.stdout subprocess.run( f"rm {random_id}.npz && rm -fr simulation_results/{random_id}", diff --git a/tests/systems/test_main_time_delta_sub_second_run.py b/tests/systems/test_main_time_delta_sub_second_run.py index 5542549..459d295 100644 --- a/tests/systems/test_main_time_delta_sub_second_run.py +++ b/tests/systems/test_main_time_delta_sub_second_run.py @@ -3,8 +3,7 @@ import subprocess import gc import pytest from tests.util import PROJECT_ROOT -from raps.utils import convert_seconds_to_hhmmss -from raps.utils import convert_to_seconds +from raps.utils import convert_seconds_to_hhmmss, parse_td pytestmark = [ @@ -39,10 +38,8 @@ def test_main_time_delta_run(system, system_config, time_arg, tdelta_arg, random "-o", random_id ], capture_output=True, text=True, stdin=subprocess.DEVNULL) assert result.returncode == 0, f"Failed on {system}: {result.stderr}" - time, downscale = convert_to_seconds(time_arg) - td, td_ds = convert_to_seconds(tdelta_arg) - #assert f"Time Simulated: {convert_seconds_to_hhmmss(int((time / td_ds) * downscale))}" in result.stdout - assert f"Time Simulated: {convert_seconds_to_hhmmss(time / downscale)}" in result.stdout + time = parse_td(time_arg).seconds + assert f"Time Simulated: {convert_seconds_to_hhmmss(time)}" in result.stdout subprocess.run( f"rm {random_id}.npz && rm -fr simulation_results/{random_id}", diff --git a/tests/systems/test_main_time_ff_delta_run.py b/tests/systems/test_main_time_ff_delta_run.py index 3e46dda..d3ef963 100644 --- a/tests/systems/test_main_time_ff_delta_run.py +++ b/tests/systems/test_main_time_ff_delta_run.py @@ -30,7 +30,7 @@ def test_main_time_delta_run(system, system_config, time_arg, tdelta_arg, result = subprocess.run([ "python", "main.py", "-t", time_arg, - "-ff", ff_arg, + "--ff", ff_arg, "--time-delta", tdelta_arg, "--system", system, #--"-f", system_file, diff --git a/tests/systems/test_main_withdata_run.py b/tests/systems/test_main_withdata_run.py index f2f5f7c..928d149 100644 --- a/tests/systems/test_main_withdata_run.py +++ b/tests/systems/test_main_withdata_run.py @@ -28,7 +28,7 @@ def test_main_withdata_run(system, system_config, system_file, random_id): "python", "main.py", "--time", "1m", "--system", system, - "-f", *file_list, + "-f", ','.join(file_list), "-o", random_id ], capture_output=True, text=True, stdin=subprocess.DEVNULL) assert result.returncode == 0, f"Failed on {system}: {result.stderr}" diff --git a/tests/systems/test_multi_part_sim_basic_run.py b/tests/systems/test_multi_part_sim_basic_run.py index 24a671e..9b9db13 100644 --- a/tests/systems/test_multi_part_sim_basic_run.py +++ b/tests/systems/test_multi_part_sim_basic_run.py @@ -20,7 +20,6 @@ def test_multi_part_sim_run(system, system_config): result = subprocess.run([ "python", "multi-part-sim.py", "--time", "1h", - "--system", system, "-x", f"{system}/*", #"--noui" ], capture_output=True, text=True, stdin=subprocess.DEVNULL) diff --git a/tests/systems/test_multi_part_sim_network_run.py b/tests/systems/test_multi_part_sim_network_run.py index 643b97a..1871725 100644 --- a/tests/systems/test_multi_part_sim_network_run.py +++ b/tests/systems/test_multi_part_sim_network_run.py @@ -23,9 +23,8 @@ def test_multi_part_sim_run(system, system_config, random_id): result = subprocess.run([ "python", "multi-part-sim.py", "--time", "1h", - "--system", system, "-x", f"{system}/*", - "-net", + "--net", #"--noui" ], capture_output=True, text=True, stdin=subprocess.DEVNULL) assert result.returncode == 0, f"Failed on {system}: {result.stderr}" diff --git a/tests/systems/test_workload_synthetic.py b/tests/systems/test_workload_synthetic.py index 2e4c4a1..dd5f8cf 100644 --- a/tests/systems/test_workload_synthetic.py +++ b/tests/systems/test_workload_synthetic.py @@ -1,7 +1,6 @@ import subprocess import gc import pytest -import shlex pytestmark = [ diff --git a/tests/test_main.py b/tests/test_main.py index 4b98263..76f48a3 100644 --- a/tests/test_main.py +++ b/tests/test_main.py @@ -1,4 +1,3 @@ -from tests.smoke import main import subprocess import os diff --git a/tests/test_system_config.py b/tests/test_system_config.py deleted file mode 100644 index 74280df..0000000 --- a/tests/test_system_config.py +++ /dev/null @@ -1,10 +0,0 @@ -import pytest -from raps.config import list_systems, get_system_config - -@pytest.mark.parametrize("system_name", list_systems()) -def test_configs(system_name): - # Very basic test that all system configs are valid - config = get_system_config(system_name) - assert config.system_name == system_name - assert config.get_legacy()['system_name'] == system_name - assert config.get_legacy()['config'] == config diff --git a/tests/unit/test_system_config.py b/tests/unit/test_system_config.py new file mode 100644 index 0000000..e32b99f --- /dev/null +++ b/tests/unit/test_system_config.py @@ -0,0 +1,26 @@ +import pytest +from raps.raps_config import raps_config +from raps.system_config import list_systems, get_system_config, get_partition_configs + + +@pytest.mark.parametrize("system_name", list_systems()) +def test_configs(system_name): + # Very basic test that all system configs are valid + config = get_system_config(system_name) + assert config.system_name == system_name + assert config.get_legacy()['system_name'] == system_name + assert config.get_legacy()['system_config'] == config + + +@pytest.mark.parametrize("input,expected_name,expected_configs", [ + (["lumi"], "lumi", ["lumi/lumi-c", "lumi/lumi-g"]), + (["lumi/*"], "lumi", ["lumi/lumi-c", "lumi/lumi-g"]), + (["frontier", "summit"], "frontier+summit", ["frontier", "summit"]), + # test passing arbitrary paths + ([str(raps_config.system_config_dir / "lumi")], "lumi", ["lumi-c", "lumi-g"]), + ([str(raps_config.system_config_dir / "lumi/lumi-*")], "lumi-c+lumi-g", ["lumi-c", "lumi-g"]), +]) +def test_get_partition_configs(input, expected_name, expected_configs): + result = get_partition_configs(input) + assert result.system_name == expected_name + assert result.partition_names == expected_configs diff --git a/tests/unit/test_utils.py b/tests/unit/test_utils.py new file mode 100644 index 0000000..edf06fe --- /dev/null +++ b/tests/unit/test_utils.py @@ -0,0 +1,42 @@ +import pytest +from datetime import timedelta +from raps.utils import parse_td, convert_to_time_unit, infer_time_unit, TIME_UNITS + + +@pytest.mark.parametrize("input,expected", [ + ("1", timedelta(seconds=1)), + ("1m", timedelta(minutes=1)), + (timedelta(minutes=1), timedelta(minutes=1)), + (2, timedelta(seconds=2)), + ("PT2S", timedelta(seconds=2)), +]) +def test_parse_td(input, expected): + assert parse_td(input) == expected + + +def test_parse_td_error(): + with pytest.raises(ValueError): + parse_td("1x") + + +@pytest.mark.parametrize("input,unit,expected", [ + ("1s", 's', 1), + ("1m", 's', 60), + (0, 'ms', 0), + (timedelta(seconds=6), 'ms', 6000), +]) +def test_convert_to_time_unit(input, unit, expected): + assert convert_to_time_unit(input, unit) == expected + + +@pytest.mark.parametrize("input,expected", [ + ("1s", 's'), + ("1000ms", 'ms'), + (0, 's'), + (timedelta(seconds=6), 's'), + (timedelta(days=6), 's'), + (timedelta(milliseconds=6), 'ms'), + (timedelta(milliseconds=60), 'cs'), +]) +def test_infer_time_unit(input, expected): + assert infer_time_unit(input) == TIME_UNITS[expected] -- GitLab From bf3b6d2344f041db97ccf7ecbde0903a212232c7 Mon Sep 17 00:00:00 2001 From: "Hines, Jesse" Date: Tue, 26 Aug 2025 13:49:23 +0000 Subject: [PATCH 247/388] Fix tests --- raps/dataloaders/mit_supercloud/loader.py | 2 +- raps/telemetry.py | 23 +++++++++---------- tests/systems/test_main_basic_run.py | 2 +- tests/systems/test_main_cooling_run.py | 2 +- .../test_main_cooling_uncertainty_run.py | 2 +- .../systems/test_main_network_withdata_run.py | 2 +- tests/systems/test_main_noui_run.py | 2 +- .../test_main_time_delta_sub_second_run.py | 2 +- tests/systems/test_main_time_ff_delta_run.py | 2 +- tests/systems/test_main_withdata_run.py | 2 +- .../systems/test_multi_part_sim_basic_run.py | 2 +- .../test_multi_part_sim_network_run.py | 2 +- .../test_multi_part_sim_withdata_run.py | 2 +- tests/systems/test_telemetry_withdata_run.py | 2 +- 14 files changed, 24 insertions(+), 25 deletions(-) diff --git a/raps/dataloaders/mit_supercloud/loader.py b/raps/dataloaders/mit_supercloud/loader.py index cc0c1dc..6057210 100644 --- a/raps/dataloaders/mit_supercloud/loader.py +++ b/raps/dataloaders/mit_supercloud/loader.py @@ -593,7 +593,7 @@ def load_data(local_dataset_path, **kwargs): time_limit=rec.get("time_limit", 0), start_time=t0 - start_ts, end_time=t1 - start_ts, - wall_time=max(0, t1-t0), + expected_run_time=max(0, t1-t0), trace_time=len(cpu_tr)*quanta, trace_start_time=0, trace_end_time=len(cpu_tr)*quanta, diff --git a/raps/telemetry.py b/raps/telemetry.py index c6815cd..f485daa 100644 --- a/raps/telemetry.py +++ b/raps/telemetry.py @@ -70,10 +70,18 @@ class Telemetry: self.system = kwargs.get('system') self.config = kwargs.get('config') outname = kwargs.get('output') - if outname is None or outname == "": - self.dirname = create_casename() - else: + if outname: self.dirname = outname + elif kwargs.get("replay"): + # Try to extract date from given name to use as case directory + matched_date = re.search(r"\d{4}-\d{2}-\d{2}", kwargs['replay'][0]) + if matched_date: + self.dirname = f"sim={matched_date.group(0)}" + else: + self.dirname = create_casename() + else: + self.dirname = create_casename() + try: self.dataloader = importlib.import_module(f"raps.dataloaders.{self.system}", package=__package__) except ImportError as e: @@ -287,15 +295,6 @@ class Telemetry: break if trigger_custom_dataloader: # custom data loader - # Try to extract date from given name to use as case directory - matched_date = re.search(r"\d{4}-\d{2}-\d{2}", args.replay[0]) - if matched_date: - extracted_date = matched_date.group(0) - self.dirname = "sim=" + extracted_date - else: - extracted_date = f"Date not found, dirname is: {self.dirname}" - print(extracted_date) - print(*args.replay) try: jobs, timestep_start_from_data, timestep_end_from_data = self.load_data(args.replay) diff --git a/tests/systems/test_main_basic_run.py b/tests/systems/test_main_basic_run.py index 8993949..8e31952 100644 --- a/tests/systems/test_main_basic_run.py +++ b/tests/systems/test_main_basic_run.py @@ -11,7 +11,7 @@ pytestmark = [ ] -def test_main_run(system, system_config,random_id): +def test_main_basic_run(system, system_config,random_id): if not system_config.get("main", False): pytest.skip(f"{system} does not support basic main run.") diff --git a/tests/systems/test_main_cooling_run.py b/tests/systems/test_main_cooling_run.py index 79c10b7..1411d8c 100644 --- a/tests/systems/test_main_cooling_run.py +++ b/tests/systems/test_main_cooling_run.py @@ -12,7 +12,7 @@ pytestmark = [ ] -def test_main_run(system, system_config, random_id): +def test_main_cooling_run(system, system_config, random_id): if not system_config.get("cooling", False): pytest.skip(f"{system} does not support cooling.") diff --git a/tests/systems/test_main_cooling_uncertainty_run.py b/tests/systems/test_main_cooling_uncertainty_run.py index 2515325..2491d7a 100644 --- a/tests/systems/test_main_cooling_uncertainty_run.py +++ b/tests/systems/test_main_cooling_uncertainty_run.py @@ -12,7 +12,7 @@ pytestmark = [ ] -def test_main_run(request, system, system_config, random_id): +def test_main_cooling_uncertainty_run(request, system, system_config, random_id): print(f"Markexpr: {request.config.option.markexpr}") if not system_config.get("uncertainty", False) or not system_config.get("cooling", False): pytest.skip(f"{system} does not support cooling or uncertainty.") diff --git a/tests/systems/test_main_network_withdata_run.py b/tests/systems/test_main_network_withdata_run.py index 82c30de..31db05e 100644 --- a/tests/systems/test_main_network_withdata_run.py +++ b/tests/systems/test_main_network_withdata_run.py @@ -14,7 +14,7 @@ pytestmark = [ ] -def test_main_run(system, system_config, system_file, random_id): +def test_main_network_withdata_run(system, system_config, system_file, random_id): if not system_config.get("net", False): pytest.skip(f"{system} does not support basic net run.") diff --git a/tests/systems/test_main_noui_run.py b/tests/systems/test_main_noui_run.py index 50ca5b0..5b12b55 100644 --- a/tests/systems/test_main_noui_run.py +++ b/tests/systems/test_main_noui_run.py @@ -11,7 +11,7 @@ pytestmark = [ ] -def test_main_run(system, system_config, random_id): +def test_main_noui_run(system, system_config, random_id): if not system_config.get("main", False): pytest.skip(f"{system} does not support basic main run.") diff --git a/tests/systems/test_main_time_delta_sub_second_run.py b/tests/systems/test_main_time_delta_sub_second_run.py index 459d295..9276011 100644 --- a/tests/systems/test_main_time_delta_sub_second_run.py +++ b/tests/systems/test_main_time_delta_sub_second_run.py @@ -23,7 +23,7 @@ pytestmark = [ ("100ms", "1ms"), ("100ms", "1s"), ], ids=["1ds","3ds","1cs","1ms","1cs-for-10ds","1ms-for-10cs","1ms-for-100ms","1s-for-100ms"]) -def test_main_time_delta_run(system, system_config, time_arg, tdelta_arg, random_id): +def test_main_time_delta_sub_second_run(system, system_config, time_arg, tdelta_arg, random_id): if not system_config.get("time_delta", False): pytest.skip(f"{system} does not support time_delta run.") diff --git a/tests/systems/test_main_time_ff_delta_run.py b/tests/systems/test_main_time_ff_delta_run.py index d3ef963..a136615 100644 --- a/tests/systems/test_main_time_ff_delta_run.py +++ b/tests/systems/test_main_time_ff_delta_run.py @@ -21,7 +21,7 @@ pytestmark = [ ("10h", "3h", "1h"), pytest.param("3d", "1d", "1d", marks=pytest.mark.long, id="1d (long)"), ], ids=["1","1s","10s","1m","1h","3h","1d"]) -def test_main_time_delta_run(system, system_config, time_arg, tdelta_arg, +def test_main_time_ff_delta_run(system, system_config, time_arg, tdelta_arg, ff_arg, random_id): if not system_config.get("time_delta", False): pytest.skip(f"{system} does not support time_delta run.") diff --git a/tests/systems/test_main_withdata_run.py b/tests/systems/test_main_withdata_run.py index 928d149..299d34c 100644 --- a/tests/systems/test_main_withdata_run.py +++ b/tests/systems/test_main_withdata_run.py @@ -28,7 +28,7 @@ def test_main_withdata_run(system, system_config, system_file, random_id): "python", "main.py", "--time", "1m", "--system", system, - "-f", ','.join(file_list), + "-f", ','.join(str(p) for p in file_list), "-o", random_id ], capture_output=True, text=True, stdin=subprocess.DEVNULL) assert result.returncode == 0, f"Failed on {system}: {result.stderr}" diff --git a/tests/systems/test_multi_part_sim_basic_run.py b/tests/systems/test_multi_part_sim_basic_run.py index 9b9db13..e8e64e9 100644 --- a/tests/systems/test_multi_part_sim_basic_run.py +++ b/tests/systems/test_multi_part_sim_basic_run.py @@ -11,7 +11,7 @@ pytestmark = [ ] -def test_multi_part_sim_run(system, system_config): +def test_multi_part_sim_basic_run(system, system_config): if not system_config.get("multi-part-sim", False): pytest.skip(f"{system} does not support basic multi-part-sim run.") diff --git a/tests/systems/test_multi_part_sim_network_run.py b/tests/systems/test_multi_part_sim_network_run.py index 1871725..3f53e99 100644 --- a/tests/systems/test_multi_part_sim_network_run.py +++ b/tests/systems/test_multi_part_sim_network_run.py @@ -11,7 +11,7 @@ pytestmark = [ ] -def test_multi_part_sim_run(system, system_config, random_id): +def test_multi_part_sim_network_run(system, system_config, random_id): if not system_config.get("multi-part-sim", False): pytest.skip(f"{system} does not support basic multi-part-sim run.") diff --git a/tests/systems/test_multi_part_sim_withdata_run.py b/tests/systems/test_multi_part_sim_withdata_run.py index f93e07e..f862aca 100644 --- a/tests/systems/test_multi_part_sim_withdata_run.py +++ b/tests/systems/test_multi_part_sim_withdata_run.py @@ -12,7 +12,7 @@ pytestmark = [ ] -def test_multi_part_sim_run(system, system_config, system_file): +def test_multi_part_sim_withdata_run(system, system_config, system_file): if not system_config.get("multi-part-sim", False): pytest.skip(f"{system} does not support basic multi-part-sim run even without data.") if not system_config.get("withdata", False): diff --git a/tests/systems/test_telemetry_withdata_run.py b/tests/systems/test_telemetry_withdata_run.py index cac6104..415fbfe 100644 --- a/tests/systems/test_telemetry_withdata_run.py +++ b/tests/systems/test_telemetry_withdata_run.py @@ -11,7 +11,7 @@ pytestmark = [ ] -def test_main_withdata_run(system, system_config, system_file, random_id): +def test_telemetry_main_withdata_run(system, system_config, system_file, random_id): if not system_config.get("telemetry", False): pytest.skip(f"{system} does not support telemetry run.") if not system_config.get("withdata", False): -- GitLab From 6cf9686075e58ea52943059a64ecf32c28d1ea51 Mon Sep 17 00:00:00 2001 From: Wes Brewer Date: Tue, 26 Aug 2025 17:58:21 -0400 Subject: [PATCH 248/388] Initial implementation of rl framework --- raps/envs/raps_env.py | 149 ++++++++++++++++++++++++++++++++++++++++++ train_rl.py | 17 +++++ 2 files changed, 166 insertions(+) create mode 100644 raps/envs/raps_env.py create mode 100644 train_rl.py diff --git a/raps/envs/raps_env.py b/raps/envs/raps_env.py new file mode 100644 index 0000000..45314dc --- /dev/null +++ b/raps/envs/raps_env.py @@ -0,0 +1,149 @@ +import gym +import numpy as np +from gym import spaces + +from raps.engine import Engine +from raps.power import PowerManager, compute_node_power +from raps.flops import FLOPSManager +from raps.telemetry import Telemetry +from raps.workload import Workload +from raps.ui import LayoutManager + + +class RAPSEnv(gym.Env): + """ + Minimal Gym-compatible wrapper around RAPS Engine + for RL job scheduling experiments. + """ + + metadata = {"render.modes": ["human"]} + + def __init__(self, **kwargs): + super().__init__() + # Store everything in self.args + self.args_dict = kwargs # dict + self.cli_args = kwargs.get("args") # Namespace + self.config = kwargs.get("config") + if self.cli_args is None: + raise ValueError("RAPSEnv requires 'args' (argparse.Namespace) in kwargs") + if self.config is None: + raise ValueError("RAPSEnv requires 'config' in kwargs") + + # --- managers (minimal versions) --- + self.power_manager = PowerManager(compute_node_power, **self.config) + self.flops_manager = FLOPSManager(**self.args_dict) + self.telemetry = Telemetry(**self.args_dict) + + # --- workload (synthetic for now) --- + wl = Workload(self.cli_args, self.config) + jobs = wl.generate_jobs() + #print("***", jobs) + + timestep_start = 0 + #timestep_end = int(max(job.wall_time for job in jobs)) + timestep_end = 100 + + # --- minimal engine instantiation --- + #self.engine = Engine( + # power_manager=self.power_manager, + # flops_manager=self.flops_manager, + # telemetry=self.telemetry, + # jobs=jobs, + # timestep_start=timestep_start, + # timestep_end=timestep_end, + # time_delta=self.args.get("time_delta"), + # continuous_workload=None, + # args=self.args, + # config=self.config + #) + + self.engine = Engine( + power_manager=self.power_manager, + flops_manager=self.flops_manager, + jobs=jobs, + **self.args_dict + ) + + self.layout_manager = LayoutManager( + self.args_dict.get("layout"), engine=self.engine, + debug=self.args_dict.get("debug", False), + total_timesteps=self.args_dict.get("time", 1000), + args_dict=self.args_dict, + **self.config + ) + + # --- RL spaces --- + max_jobs = 100 + job_features = 4 # [nodes, runtime, priority, wait_time] + self.observation_space = spaces.Box( + low=0, high=1, shape=(max_jobs, job_features), dtype=np.float32 + ) + self.action_space = spaces.Discrete(max_jobs) + + def reset(self, **kwargs): + """Reset environment (new workload + engine).""" + wl = Workload(self.cli_args, self.config) + jobs = wl.generate_jobs() + + self.engine.jobs = jobs + self.engine.timestep_start = 0 + #self.engine.timestep_end = int(max(job.wall_time for job in jobs)) + self.engine.timestep_end = 100 + self.engine.current_timestep = 0 + + return self._get_state() + + def step(self, action): + """ + Apply scheduling action. + For now: action = index of job in queue to attempt scheduling. + """ + # TODO: hook into Engine to apply scheduling + # Placeholder: random reward for scaffolding + reward = np.random.rand() + done = self.engine.current_timestep >= self.engine.timestep_end + + obs = self._get_state() + info = { + "utilization": self.telemetry.utilization(), + "power": self.telemetry.power(), + "queue_length": self.telemetry.queue_length(), + } + + self.engine.current_timestep += 1 + return obs, reward, done, info + + def _get_state(self): + """Construct simple state representation from engine's job queue.""" + # Example: take waiting jobs + job_queue = [j for j in self.engine.jobs if not j.started] + + max_jobs, job_features = self.observation_space.shape + state = np.zeros((max_jobs, job_features), dtype=np.float32) + + for i, job in enumerate(job_queue[:max_jobs]): + # fill with features of interest; adapt to what Job exposes + features = [ + getattr(job, "nodes_required", 0), + getattr(job, "wall_time", 0), + getattr(job, "priority", 0), + getattr(job, "wait_time", 0), + ] + state[i, :len(features)] = features + + return state + + #def _get_state(self): + # """Very simple state vector: truncate/pad job queue.""" + # jobs = self.telemetry.get_job_queue_features() + # max_jobs, job_features = self.observation_space.shape + # state = np.zeros((max_jobs, job_features), dtype=np.float32) +# +# for i, job in enumerate(jobs[:max_jobs]): +# state[i, : len(job)] = job +# return state + + def render(self, mode="human"): + print("Timestep:", self.engine.current_timestep, + "Utilization:", self.telemetry.utilization(), + "Power:", self.telemetry.power()) diff --git a/train_rl.py b/train_rl.py new file mode 100644 index 0000000..4a997b6 --- /dev/null +++ b/train_rl.py @@ -0,0 +1,17 @@ +import gym +from stable_baselines3 import PPO +from raps.envs.raps_env import RAPSEnv +from raps.system_config import get_system_config +from raps.sim_config import args, args_dict + +config = get_system_config(args.system).get_legacy() +args_dict['config'] = config +args_dict['args'] = args + +env = RAPSEnv(**args_dict) + +model = PPO("MlpPolicy", env, verbose=1) +model.learn(total_timesteps=10000) + +# Save trained model +model.save("ppo_raps") -- GitLab From d7caed6109b94ebfd72c5da2ec6e560d37ea0437 Mon Sep 17 00:00:00 2001 From: Wes Brewer Date: Tue, 26 Aug 2025 18:05:05 -0400 Subject: [PATCH 249/388] Got initial version working --- raps/envs/raps_env.py | 61 ++++++++++++++----------------------------- 1 file changed, 19 insertions(+), 42 deletions(-) diff --git a/raps/envs/raps_env.py b/raps/envs/raps_env.py index 45314dc..ec65c4c 100644 --- a/raps/envs/raps_env.py +++ b/raps/envs/raps_env.py @@ -21,8 +21,8 @@ class RAPSEnv(gym.Env): def __init__(self, **kwargs): super().__init__() # Store everything in self.args - self.args_dict = kwargs # dict - self.cli_args = kwargs.get("args") # Namespace + self.args_dict = kwargs # dict + self.cli_args = kwargs.get("args") # Namespace self.config = kwargs.get("config") if self.cli_args is None: raise ValueError("RAPSEnv requires 'args' (argparse.Namespace) in kwargs") @@ -37,25 +37,6 @@ class RAPSEnv(gym.Env): # --- workload (synthetic for now) --- wl = Workload(self.cli_args, self.config) jobs = wl.generate_jobs() - #print("***", jobs) - - timestep_start = 0 - #timestep_end = int(max(job.wall_time for job in jobs)) - timestep_end = 100 - - # --- minimal engine instantiation --- - #self.engine = Engine( - # power_manager=self.power_manager, - # flops_manager=self.flops_manager, - # telemetry=self.telemetry, - # jobs=jobs, - # timestep_start=timestep_start, - # timestep_end=timestep_end, - # time_delta=self.args.get("time_delta"), - # continuous_workload=None, - # args=self.args, - # config=self.config - #) self.engine = Engine( power_manager=self.power_manager, @@ -87,7 +68,7 @@ class RAPSEnv(gym.Env): self.engine.jobs = jobs self.engine.timestep_start = 0 - #self.engine.timestep_end = int(max(job.wall_time for job in jobs)) + # self.engine.timestep_end = int(max(job.wall_time for job in jobs)) self.engine.timestep_end = 100 self.engine.current_timestep = 0 @@ -98,16 +79,23 @@ class RAPSEnv(gym.Env): Apply scheduling action. For now: action = index of job in queue to attempt scheduling. """ - # TODO: hook into Engine to apply scheduling - # Placeholder: random reward for scaffolding + # TODO: integrate action with real scheduling logic reward = np.random.rand() done = self.engine.current_timestep >= self.engine.timestep_end obs = self._get_state() + + # Compute info manually + running_nodes = sum(getattr(j, "nodes_required", 0) + for j in self.engine.jobs + if getattr(j, "start_time", None) is not None) + total_nodes = self.config.get("SC_NODES", 1) + utilization = running_nodes / total_nodes + info = { - "utilization": self.telemetry.utilization(), - "power": self.telemetry.power(), - "queue_length": self.telemetry.queue_length(), + "utilization": utilization, + "power": getattr(self.power_manager, "total_power", 0.0), + "queue_length": len([j for j in self.engine.jobs if getattr(j, "start_time", None) is None]), } self.engine.current_timestep += 1 @@ -115,34 +103,23 @@ class RAPSEnv(gym.Env): def _get_state(self): """Construct simple state representation from engine's job queue.""" - # Example: take waiting jobs - job_queue = [j for j in self.engine.jobs if not j.started] + # Example: take waiting jobs (haven’t started yet) + job_queue = [j for j in self.engine.jobs if getattr(j, "start_time", None) is None] max_jobs, job_features = self.observation_space.shape state = np.zeros((max_jobs, job_features), dtype=np.float32) for i, job in enumerate(job_queue[:max_jobs]): - # fill with features of interest; adapt to what Job exposes features = [ getattr(job, "nodes_required", 0), getattr(job, "wall_time", 0), getattr(job, "priority", 0), - getattr(job, "wait_time", 0), + getattr(job, "wait_time", 0), # may need to compute from current_timestep - qdt ] - state[i, :len(features)] = features + state[i, : len(features)] = features return state - #def _get_state(self): - # """Very simple state vector: truncate/pad job queue.""" - # jobs = self.telemetry.get_job_queue_features() - # max_jobs, job_features = self.observation_space.shape - # state = np.zeros((max_jobs, job_features), dtype=np.float32) -# -# for i, job in enumerate(jobs[:max_jobs]): -# state[i, : len(job)] = job -# return state - def render(self, mode="human"): print("Timestep:", self.engine.current_timestep, "Utilization:", self.telemetry.utilization(), -- GitLab From e44917571c5d2eb76bda435689f4086ff98b125e Mon Sep 17 00:00:00 2001 From: Wes Brewer Date: Tue, 26 Aug 2025 19:50:16 -0400 Subject: [PATCH 250/388] Add real reward function - previously was using random reward --- raps/envs/raps_env.py | 67 +++++++++++++++++++++++++++++++------------ 1 file changed, 48 insertions(+), 19 deletions(-) diff --git a/raps/envs/raps_env.py b/raps/envs/raps_env.py index ec65c4c..8f7dbe8 100644 --- a/raps/envs/raps_env.py +++ b/raps/envs/raps_env.py @@ -74,31 +74,60 @@ class RAPSEnv(gym.Env): return self._get_state() + def _compute_reward(self, tick_data, alpha=1.0, beta=0.001, gamma=0.1): + completed = getattr(tick_data, "completed", None) + jobs_completed = len(completed) if completed else 0 + power = getattr(tick_data, "power", 0.0) or 0.0 + queue_len = len(self.engine.queue) + + reward = alpha * jobs_completed - beta * power - gamma * queue_len + + if self.args_dict.get("debug", False): + print(f"[t={self.engine.current_timestep}] jobs_completed={jobs_completed}, " + f"power={power}, queue_len={queue_len}, reward={reward}") + + return reward + def step(self, action): - """ - Apply scheduling action. - For now: action = index of job in queue to attempt scheduling. - """ - # TODO: integrate action with real scheduling logic - reward = np.random.rand() - done = self.engine.current_timestep >= self.engine.timestep_end + # 1. Jobs waiting in the queue + job_queue = list(self.engine.queue) + chosen_job = None - obs = self._get_state() + if job_queue and action < len(job_queue): + chosen_job = job_queue[action] + + # 2. Let RAPS handle all scheduling logic + self.engine.scheduler.place_job_and_manage_queues( + chosen_job, + self.engine.queue, + self.engine.running, + self.engine.current_timestep, + ) - # Compute info manually - running_nodes = sum(getattr(j, "nodes_required", 0) - for j in self.engine.jobs - if getattr(j, "start_time", None) is not None) - total_nodes = self.config.get("SC_NODES", 1) - utilization = running_nodes / total_nodes + # 3. Advance simulation by one tick + # Update bookkeeping so tick() doesn't crash + if not hasattr(self.engine, "num_active_nodes"): + self.engine.num_active_nodes = 0 + if not hasattr(self.engine, "num_free_nodes"): + self.engine.num_free_nodes = self.config["AVAILABLE_NODES"] + + self.engine.num_active_nodes = sum(len(j.scheduled_nodes) for j in self.engine.running) + self.engine.num_free_nodes = self.config["AVAILABLE_NODES"] - self.engine.num_active_nodes + + tick_data = self.engine.tick() + + # 4. Compute reward (throughput vs. power) + reward = self._compute_reward(tick_data) + + # 5. Build next observation + obs = self._get_state() + done = self.engine.current_timestep >= self.engine.timestep_end info = { - "utilization": utilization, - "power": getattr(self.power_manager, "total_power", 0.0), - "queue_length": len([j for j in self.engine.jobs if getattr(j, "start_time", None) is None]), + "scheduled_job": getattr(chosen_job, "id", None), + "power": getattr(tick_data, "power", None), + "completed": getattr(tick_data, "completed", []), } - - self.engine.current_timestep += 1 return obs, reward, done, info def _get_state(self): -- GitLab From 45f13b461225f27e24bcb45c3f24cc320f057d2a Mon Sep 17 00:00:00 2001 From: Wes Brewer Date: Tue, 26 Aug 2025 20:00:24 -0400 Subject: [PATCH 251/388] Expose more settings to PPO --- train_rl.py | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/train_rl.py b/train_rl.py index 4a997b6..9cefbd4 100644 --- a/train_rl.py +++ b/train_rl.py @@ -1,4 +1,3 @@ -import gym from stable_baselines3 import PPO from raps.envs.raps_env import RAPSEnv from raps.system_config import get_system_config @@ -10,7 +9,18 @@ args_dict['args'] = args env = RAPSEnv(**args_dict) -model = PPO("MlpPolicy", env, verbose=1) +model = PPO( + "MlpPolicy", + env, + n_steps=512, # shorter rollouts (quicker feedback loop) + batch_size=128, # must divide n_steps evenly + n_epochs=10, # # of minibatch passes per update + gamma=0.99, # discount (keeps long-term credit) + learning_rate=3e-4, # default Adam lr, can try 1e-4 if unstable + ent_coef=0.01, # encourage exploration + verbose=1, +) + model.learn(total_timesteps=10000) # Save trained model -- GitLab From 90188c2d832331ae8ca5c701b673ed1820ddeb5e Mon Sep 17 00:00:00 2001 From: Wes Brewer Date: Tue, 26 Aug 2025 20:53:25 -0400 Subject: [PATCH 252/388] Fix issues with time not advancing and power not computed correctly --- raps/envs/raps_env.py | 41 +++++++++++++++++------------------------ 1 file changed, 17 insertions(+), 24 deletions(-) diff --git a/raps/envs/raps_env.py b/raps/envs/raps_env.py index 8f7dbe8..398009c 100644 --- a/raps/envs/raps_env.py +++ b/raps/envs/raps_env.py @@ -53,6 +53,13 @@ class RAPSEnv(gym.Env): **self.config ) + self.generator = self.layout_manager.run_stepwise( + jobs, + timestep_start=0, + timestep_end=self.config.get("SIM_END", 1000), + time_delta=self.args_dict.get("time_delta", 1), + ) + # --- RL spaces --- max_jobs = 100 job_features = 4 # [nodes, runtime, priority, wait_time] @@ -77,7 +84,7 @@ class RAPSEnv(gym.Env): def _compute_reward(self, tick_data, alpha=1.0, beta=0.001, gamma=0.1): completed = getattr(tick_data, "completed", None) jobs_completed = len(completed) if completed else 0 - power = getattr(tick_data, "power", 0.0) or 0.0 + power = self.power_manager.history[-1][1] queue_len = len(self.engine.queue) reward = alpha * jobs_completed - beta * power - gamma * queue_len @@ -89,43 +96,29 @@ class RAPSEnv(gym.Env): return reward def step(self, action): - # 1. Jobs waiting in the queue job_queue = list(self.engine.queue) chosen_job = None if job_queue and action < len(job_queue): chosen_job = job_queue[action] - - # 2. Let RAPS handle all scheduling logic self.engine.scheduler.place_job_and_manage_queues( - chosen_job, - self.engine.queue, - self.engine.running, - self.engine.current_timestep, + chosen_job, self.engine.queue, self.engine.running, self.engine.current_timestep ) - # 3. Advance simulation by one tick - # Update bookkeeping so tick() doesn't crash - if not hasattr(self.engine, "num_active_nodes"): - self.engine.num_active_nodes = 0 - if not hasattr(self.engine, "num_free_nodes"): - self.engine.num_free_nodes = self.config["AVAILABLE_NODES"] + # Advance simulation by one step via generator + try: + tick_data = next(self.generator) + except StopIteration: + # Simulation finished + return self._get_state(), 0.0, True, {} - self.engine.num_active_nodes = sum(len(j.scheduled_nodes) for j in self.engine.running) - self.engine.num_free_nodes = self.config["AVAILABLE_NODES"] - self.engine.num_active_nodes - - tick_data = self.engine.tick() - - # 4. Compute reward (throughput vs. power) reward = self._compute_reward(tick_data) - - # 5. Build next observation obs = self._get_state() - done = self.engine.current_timestep >= self.engine.timestep_end + done = self.engine.current_timestep >= min(self.engine.timestep_end, 1000) info = { "scheduled_job": getattr(chosen_job, "id", None), - "power": getattr(tick_data, "power", None), + "power": getattr(tick_data, "power", 0.0), "completed": getattr(tick_data, "completed", []), } return obs, reward, done, info -- GitLab From 414c0f22a661c786f1b87e330b8d39b489ea054e Mon Sep 17 00:00:00 2001 From: Wes Brewer Date: Tue, 26 Aug 2025 21:39:07 -0400 Subject: [PATCH 253/388] Add new RL scheduler schedulers/rl.py --- raps/envs/raps_env.py | 31 ++++++++++++++++++++++++------- raps/resmgr/default.py | 3 ++- raps/schedulers/rl.py | 35 +++++++++++++++++++++++++++++++++++ 3 files changed, 61 insertions(+), 8 deletions(-) create mode 100644 raps/schedulers/rl.py diff --git a/raps/envs/raps_env.py b/raps/envs/raps_env.py index 398009c..97fa77e 100644 --- a/raps/envs/raps_env.py +++ b/raps/envs/raps_env.py @@ -8,6 +8,9 @@ from raps.flops import FLOPSManager from raps.telemetry import Telemetry from raps.workload import Workload from raps.ui import LayoutManager +from raps.schedulers.rl import Scheduler +# from raps.resmgr.default import MultiTenantResourceManager as ResourceManager +from raps.resmgr.default import ExclusiveNodeResourceManager as ResourceManager class RAPSEnv(gym.Env): @@ -45,6 +48,21 @@ class RAPSEnv(gym.Env): **self.args_dict ) + resmgr = ResourceManager( + total_nodes=self.config["TOTAL_NODES"], + down_nodes=self.config.get("DOWN_NODES", []), + config=self.config + ) + + # Plug in RL scheduler + self.scheduler = Scheduler( + config=self.config, + policy="fcfs", # or None if you want no heuristic fallback + resource_manager=resmgr, + env=self + ) + self.engine.scheduler = self.scheduler + self.layout_manager = LayoutManager( self.args_dict.get("layout"), engine=self.engine, debug=self.args_dict.get("debug", False), @@ -96,15 +114,8 @@ class RAPSEnv(gym.Env): return reward def step(self, action): - job_queue = list(self.engine.queue) chosen_job = None - if job_queue and action < len(job_queue): - chosen_job = job_queue[action] - self.engine.scheduler.place_job_and_manage_queues( - chosen_job, self.engine.queue, self.engine.running, self.engine.current_timestep - ) - # Advance simulation by one step via generator try: tick_data = next(self.generator) @@ -112,7 +123,13 @@ class RAPSEnv(gym.Env): # Simulation finished return self._get_state(), 0.0, True, {} + # Store action for scheduler to pick up + self.scheduler.pending_action = action + + # Advance one step (scheduler.schedule() is called inside generator) + tick_data = next(self.generator) reward = self._compute_reward(tick_data) + obs = self._get_state() done = self.engine.current_timestep >= min(self.engine.timestep_end, 1000) diff --git a/raps/resmgr/default.py b/raps/resmgr/default.py index c7791f5..1429a5f 100644 --- a/raps/resmgr/default.py +++ b/raps/resmgr/default.py @@ -42,7 +42,8 @@ class ExclusiveNodeResourceManager: """Assigns full nodes to a job (replay or count-based).""" # Ensure enough free nodes if len(self.available_nodes) < job.nodes_required: - raise ValueError(f"Not enough available nodes to schedule job {job.id}") + raise ValueError(f"Not enough available nodes to schedule job {job.id}", + f"{len(self.available_nodes)} < {job.nodes_required}") if policy == PolicyType.REPLAY and job.scheduled_nodes: # Telemetry replay: use the exact nodes diff --git a/raps/schedulers/rl.py b/raps/schedulers/rl.py new file mode 100644 index 0000000..2272e9d --- /dev/null +++ b/raps/schedulers/rl.py @@ -0,0 +1,35 @@ +from raps.schedulers.default import Scheduler as DefaultScheduler + + +class Scheduler(DefaultScheduler): + """ + Scheduler driven by RL agent actions. + RAPSEnv.step(action) sets env.pending_action, + then RLScheduler.schedule() reads it and acts. + """ + + def __init__(self, config, policy, resource_manager, env=None, *args, **kwargs): + super().__init__(config=config, policy=policy, resource_manager=resource_manager, *args, **kwargs) + self.env = env + self.pending_action = None + + def schedule(self, queue, running, current_time, **kwargs): + if not queue or self.pending_action is None: + return + + action = self.pending_action + if action >= len(queue): + return + + job = queue[action] + + # Check feasibility + if job.nodes_required <= len(self.resource_manager.available_nodes): + self.place_job_and_manage_queues(job, queue, running, current_time) + else: + # Invalid action → skip or log + if self.config.args.get("debug", False): + print(f"[t={current_time}] RL chose invalid job {job.id} (needs {job.nodes_required})") + + # Reset action after use + self.pending_action = None -- GitLab From ba563312a9f37c85304e4d6a99395e326eccae9b Mon Sep 17 00:00:00 2001 From: Wes Brewer Date: Wed, 27 Aug 2025 15:39:37 -0400 Subject: [PATCH 254/388] Modify mit_supercloud/loader.py to support single partition simulations --- raps/dataloaders/mit_supercloud/loader.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/raps/dataloaders/mit_supercloud/loader.py b/raps/dataloaders/mit_supercloud/loader.py index 6057210..abe0092 100644 --- a/raps/dataloaders/mit_supercloud/loader.py +++ b/raps/dataloaders/mit_supercloud/loader.py @@ -300,6 +300,18 @@ def load_data(local_dataset_path, **kwargs): cpu_only = (part == "part-cpu") mixed = (part == "part-gpu") + # handle single-partition configs (e.g., mit_supercloud.yaml) + if not cpu_only and not mixed: + config = kwargs.get("config") + gpus_per_node = config.get("gpus_per_node") + + if gpus_per_node == 0: + cpu_only = True + part = "part-cpu" + else: + mixed = True + part = "part-gpu" + # create nodelist mapping if cpu_only: with open(os.path.join(NL_PATH, "cpu_nodes.txt")) as f: -- GitLab From 00420f05e2f63b18ac74095e3ec3280c87e4a31b Mon Sep 17 00:00:00 2001 From: Wes Brewer Date: Wed, 27 Aug 2025 16:06:00 -0400 Subject: [PATCH 255/388] Fix bug in mit_supercloud/loader.py --- raps/dataloaders/mit_supercloud/loader.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/raps/dataloaders/mit_supercloud/loader.py b/raps/dataloaders/mit_supercloud/loader.py index abe0092..dfa9e44 100644 --- a/raps/dataloaders/mit_supercloud/loader.py +++ b/raps/dataloaders/mit_supercloud/loader.py @@ -602,7 +602,7 @@ def load_data(local_dataset_path, **kwargs): scheduled_nodes=rec.get("scheduled_nodes"), priority=rec.get("priority", 0), submit_time=submit_time, - time_limit=rec.get("time_limit", 0), + time_limit=rec.get("timelimit", 0), start_time=t0 - start_ts, end_time=t1 - start_ts, expected_run_time=max(0, t1-t0), -- GitLab From 456688064518ed0ab30cd34dc8647ad404fb1528 Mon Sep 17 00:00:00 2001 From: Wes Brewer Date: Wed, 27 Aug 2025 16:11:56 -0400 Subject: [PATCH 256/388] More fixes to mit_supercloud. Fix experiments/mit.yaml and README.md to be consistent with recent changes --- README.md | 9 +++++---- experiments/mit.yaml | 3 +-- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index 665a64a..831ef63 100644 --- a/README.md +++ b/README.md @@ -62,16 +62,17 @@ For MIT Supercloud python -m raps.dataloaders.mit_supercloud.cli download --start 2021-05-21T13:00 --end 2021-05-21T14:00 # Load data and run simulation - will save data as part-cpu.npz and part-gpu.npz files - python multi-part-sim.py -x 'mit_supercloud/*' -f $DPATH --system mit_supercloud \ - --start 2021-05-21T13:00 --end 2021-05-21T14:00 + python multi-part-sim.py -x mit_supercloud -f $DPATH --start 2021-05-21T13:00 --end 2021-05-21T14:00 + # or simply + python multi-part-sim.py experiments/mit.yaml # Note: if no start, end dates provided will default to run 24 hours between # 2021-05-21T00:00 to 2021-05-22T00:00 set by defaults in raps/dataloaders/mit_supercloud/utils.py # Re-run simulation using npz files (much faster load) - python multi-part-sim.py -x mit_supercloud/* -f part-*.npz --system mit_supercloud + python multi-part-sim.py -x mit_supercloud -f part-*.npz # Synthetic tests for verification studies: - python multi-part-sim.py -x 'mit_supercloud/*' -w multitenant + python multi-part-sim.py -x mit_supercloud -w multitenant For Lumi diff --git a/experiments/mit.yaml b/experiments/mit.yaml index bc718e4..83892f9 100644 --- a/experiments/mit.yaml +++ b/experiments/mit.yaml @@ -1,6 +1,5 @@ -system: mit_supercloud partitions: ["mit_supercloud/part-cpu", "mit_supercloud/part-gpu"] replay: - - ~/data/mit/202201 + - /opt/data/mit_supercloud start: 2021-05-21T13:00 end: 2021-05-21T14:00 -- GitLab From d6ddb07acaf721877d494dcb54ac9429ad68476b Mon Sep 17 00:00:00 2001 From: Wes Brewer Date: Wed, 27 Aug 2025 16:51:24 -0400 Subject: [PATCH 257/388] Get RL working with mit_supercloud telemetry --- raps/envs/raps_env.py | 83 ++++++++++++++++++++++++++++++++++--------- 1 file changed, 66 insertions(+), 17 deletions(-) diff --git a/raps/envs/raps_env.py b/raps/envs/raps_env.py index 97fa77e..31d7aa1 100644 --- a/raps/envs/raps_env.py +++ b/raps/envs/raps_env.py @@ -37,14 +37,13 @@ class RAPSEnv(gym.Env): self.flops_manager = FLOPSManager(**self.args_dict) self.telemetry = Telemetry(**self.args_dict) - # --- workload (synthetic for now) --- - wl = Workload(self.cli_args, self.config) - jobs = wl.generate_jobs() + # --- Build initial jobs & time bounds --- + self.jobs, self.timestep_start, self.timestep_end = self._build_jobs() self.engine = Engine( power_manager=self.power_manager, flops_manager=self.flops_manager, - jobs=jobs, + jobs=self.jobs, **self.args_dict ) @@ -71,11 +70,14 @@ class RAPSEnv(gym.Env): **self.config ) + self.timestep_start = 0 + self.timestep_end = self.config.get("SIM_END", 1000) + self.generator = self.layout_manager.run_stepwise( - jobs, - timestep_start=0, - timestep_end=self.config.get("SIM_END", 1000), - time_delta=self.args_dict.get("time_delta", 1), + self.jobs, + timestep_start=self.timestep_start, + timestep_end=self.timestep_end, + time_delta=self.args_dict.get("time_delta"), ) # --- RL spaces --- @@ -86,16 +88,63 @@ class RAPSEnv(gym.Env): ) self.action_space = spaces.Discrete(max_jobs) + def _build_jobs(self): + """ + Build a job list either from synthetic workload (--workload) + or from telemetry replay (--replay). + Returns: jobs, timestep_start, timestep_end + """ + # --- Case 1: Telemetry replay --- + if self.cli_args and getattr(self.cli_args, "replay"): + result = self.telemetry.load_jobs_times_args_from_files( + files=self.cli_args.replay, + args=self.cli_args, + config=self.config, + ) + + # Handle 3-tuple vs 4-tuple return + if len(result) == 3: + jobs, start_time, end_time = result + elif len(result) == 4: + jobs, start_time, end_time, _ = result + else: + raise ValueError(f"Unexpected telemetry return format: {len(result)} values") + + # Flatten partitioned jobs if necessary + if jobs and isinstance(jobs[0], list): + jobs = [job for sublist in jobs for job in sublist] + + return jobs, start_time, end_time + + # --- Case 2: Synthetic workload generation --- + elif self.cli_args and getattr(self.cli_args, "workload"): + wl = Workload(self.cli_args, self.config) + jobs = wl.generate_jobs() + + # For synthetic jobs, compute timestep_end from submit + run_time + timestep_start = 0 + timestep_end = max( + (getattr(job, "end_time", None) or getattr(job, "expected_run_time", 0) + job.submit_time) + for job in jobs + ) + return jobs, timestep_start, timestep_end + + # --- Error: neither replay nor workload specified --- + else: + raise ValueError("RAPSEnv requires either --workload or --replay to build jobs.") + def reset(self, **kwargs): - """Reset environment (new workload + engine).""" - wl = Workload(self.cli_args, self.config) - jobs = wl.generate_jobs() - - self.engine.jobs = jobs - self.engine.timestep_start = 0 - # self.engine.timestep_end = int(max(job.wall_time for job in jobs)) - self.engine.timestep_end = 100 - self.engine.current_timestep = 0 + self.engine.jobs = self.jobs + self.engine.timestep_start = self.timestep_start + self.engine.timestep_end = self.timestep_end + self.engine.current_timestep = self.timestep_start + + self.generator = self.layout_manager.run_stepwise( + self.jobs, + timestep_start=self.timestep_start, + timestep_end=self.timestep_end, + time_delta=self.args_dict.get("time_delta", 1), + ) return self._get_state() -- GitLab From 3ce571eabc1dc6dcaa8fa10bcf143afbb7c38ef3 Mon Sep 17 00:00:00 2001 From: Wes Brewer Date: Wed, 27 Aug 2025 23:16:28 -0400 Subject: [PATCH 258/388] Output engine stats after train_rl.py training run --- raps/engine.py | 36 +++++++++++++++++++----------------- raps/envs/raps_env.py | 9 +++++++++ raps/stats.py | 5 ++++- train_rl.py | 8 +++++++- 4 files changed, 39 insertions(+), 19 deletions(-) diff --git a/raps/engine.py b/raps/engine.py index f79b140..b4aa713 100644 --- a/raps/engine.py +++ b/raps/engine.py @@ -363,23 +363,25 @@ class Engine: return simulation_complete def tick(self, *, time_delta=1, replay=False): - # Tick runs all simulations of interest at the given time delta interval. - # - # The simulations which are needed for simulations consistency at each time step - # (inside: the main simulation loop of run_simulation) are not part of tick. - # - # Tick contains: - # For each running job: - # - CPU utilization - # - GPU utilization - # - Network utilization - # - # From these the systems (across all nodes) - # - System Utilization - # - Power - # - Cooling - # - System Performance - # is simulated. + """ + Tick runs all simulations of interest at the given time delta interval. + + The simulations which are needed for simulations consistency at each time step + (inside: the main simulation loop of run_simulation) are not part of tick. + + Tick contains: + For each running job: + - CPU utilization + - GPU utilization + - Network utilization + + From these the systems (across all nodes) + - System Utilization + - Power + - Cooling + - System Performance + is simulated. + """ scheduled_nodes = [] cpu_utils = [] diff --git a/raps/envs/raps_env.py b/raps/envs/raps_env.py index 31d7aa1..fc13ab3 100644 --- a/raps/envs/raps_env.py +++ b/raps/envs/raps_env.py @@ -11,6 +11,7 @@ from raps.ui import LayoutManager from raps.schedulers.rl import Scheduler # from raps.resmgr.default import MultiTenantResourceManager as ResourceManager from raps.resmgr.default import ExclusiveNodeResourceManager as ResourceManager +from raps.stats import get_engine_stats, get_job_stats, get_scheduler_stats, get_network_stats class RAPSEnv(gym.Env): @@ -212,3 +213,11 @@ class RAPSEnv(gym.Env): print("Timestep:", self.engine.current_timestep, "Utilization:", self.telemetry.utilization(), "Power:", self.telemetry.power()) + + def get_stats(self): + return { + "engine_stats": get_engine_stats(self.engine), + "job_stats": get_job_stats(self.engine), + "scheduler_stats": get_scheduler_stats(self.engine), + "network_stats": get_network_stats(self.engine) + } diff --git a/raps/stats.py b/raps/stats.py index b4cfbfb..23158cd 100644 --- a/raps/stats.py +++ b/raps/stats.py @@ -46,7 +46,10 @@ def get_engine_stats(engine: Engine): # Multitenancy Stats total_jobs_loaded = engine.total_initial_jobs # Assuming this is passed to __init__ stats['total jobs loaded'] = total_jobs_loaded - stats['jobs completed percentage'] = f"{(engine.jobs_completed / total_jobs_loaded * 100):.2f}%" + if total_jobs_loaded > 0: + stats['jobs completed percentage'] = f"{(engine.jobs_completed / total_jobs_loaded * 100):.2f}%" + else: + stats['jobs completed percentage'] = "0%" if engine.node_occupancy_history: # Calculate average concurrent jobs per node (average density across all nodes and timesteps) diff --git a/train_rl.py b/train_rl.py index 9cefbd4..40026a0 100644 --- a/train_rl.py +++ b/train_rl.py @@ -2,6 +2,7 @@ from stable_baselines3 import PPO from raps.envs.raps_env import RAPSEnv from raps.system_config import get_system_config from raps.sim_config import args, args_dict +from raps.stats import print_formatted_report config = get_system_config(args.system).get_legacy() args_dict['config'] = config @@ -19,9 +20,14 @@ model = PPO( learning_rate=3e-4, # default Adam lr, can try 1e-4 if unstable ent_coef=0.01, # encourage exploration verbose=1, + tensorboard_log="./ppo_raps_logs/" ) -model.learn(total_timesteps=10000) +model.learn(total_timesteps=10000, tb_log_name="ppo_raps") + +# Output stats +stats = env.get_stats() +print_formatted_report(**stats) # Save trained model model.save("ppo_raps") -- GitLab From 040c248a85fa5db4704331729b5996d018406998 Mon Sep 17 00:00:00 2001 From: Wes Brewer Date: Thu, 28 Aug 2025 12:09:18 -0400 Subject: [PATCH 259/388] Add stats output after each train rl episode --- raps/envs/raps_env.py | 32 ++++++++++++++++++++++++++++++++ train_rl.py | 2 -- 2 files changed, 32 insertions(+), 2 deletions(-) diff --git a/raps/envs/raps_env.py b/raps/envs/raps_env.py index fc13ab3..1046d06 100644 --- a/raps/envs/raps_env.py +++ b/raps/envs/raps_env.py @@ -1,5 +1,6 @@ import gym import numpy as np +import os from gym import spaces from raps.engine import Engine @@ -14,6 +15,34 @@ from raps.resmgr.default import ExclusiveNodeResourceManager as ResourceManager from raps.stats import get_engine_stats, get_job_stats, get_scheduler_stats, get_network_stats +def print_stats(stats): + os.system("clear") + wanted_keys = [ + "time simulated", + "average power", + "system power efficiency", + "total energy consumed", + "carbon emissions", + "jobs completed", + "throughput", + "jobs still running" + ] + + # merge just engine_stats + job_stats + combined = {} + for section in ["engine_stats", "job_stats"]: + if section in stats: + for k, v in stats[section].items(): + if k.lower() in wanted_keys: + pretty_key = k.replace("_", " ").title() + combined[pretty_key] = v + + # align only left column, leave right "ragged" + max_key_len = max(len(k) for k in combined.keys()) + for k, v in combined.items(): + print(f"{k.ljust(max_key_len)} | {v}") + + class RAPSEnv(gym.Env): """ Minimal Gym-compatible wrapper around RAPS Engine @@ -182,6 +211,9 @@ class RAPSEnv(gym.Env): obs = self._get_state() done = self.engine.current_timestep >= min(self.engine.timestep_end, 1000) + if done: + stats = self.get_stats() + print_stats(stats) info = { "scheduled_job": getattr(chosen_job, "id", None), diff --git a/train_rl.py b/train_rl.py index 40026a0..e98f7a3 100644 --- a/train_rl.py +++ b/train_rl.py @@ -2,7 +2,6 @@ from stable_baselines3 import PPO from raps.envs.raps_env import RAPSEnv from raps.system_config import get_system_config from raps.sim_config import args, args_dict -from raps.stats import print_formatted_report config = get_system_config(args.system).get_legacy() args_dict['config'] = config @@ -27,7 +26,6 @@ model.learn(total_timesteps=10000, tb_log_name="ppo_raps") # Output stats stats = env.get_stats() -print_formatted_report(**stats) # Save trained model model.save("ppo_raps") -- GitLab From 38c867de2aa55e85096721be7330c97a4f065fce Mon Sep 17 00:00:00 2001 From: Wes Brewer Date: Thu, 28 Aug 2025 12:16:33 -0400 Subject: [PATCH 260/388] Move to use SB3-style logger for stats output --- raps/envs/raps_env.py | 46 ++++++++++++++++++++++--------------------- 1 file changed, 24 insertions(+), 22 deletions(-) diff --git a/raps/envs/raps_env.py b/raps/envs/raps_env.py index 1046d06..580b349 100644 --- a/raps/envs/raps_env.py +++ b/raps/envs/raps_env.py @@ -1,6 +1,5 @@ import gym import numpy as np -import os from gym import spaces from raps.engine import Engine @@ -14,33 +13,36 @@ from raps.schedulers.rl import Scheduler from raps.resmgr.default import ExclusiveNodeResourceManager as ResourceManager from raps.stats import get_engine_stats, get_job_stats, get_scheduler_stats, get_network_stats +from stable_baselines3.common.logger import Logger, HumanOutputFormat +import sys + +logger = Logger( + folder=None, # no log file, just stdout + output_formats=[HumanOutputFormat(sys.stdout)] +) + + +def print_stats(stats, step=0): + """prints SB3-style stats output""" + + wanted_keys = { + "time simulated": "engine/Time Simulated", + "average power": "engine/Average Power", + "system power efficiency": "engine/System Power Efficiency", + "total energy consumed": "engine/Total Energy Consumed", + "carbon emissions": "engine/Carbon Emissions", + "jobs completed": "jobs/Jobs Completed", + "throughput": "jobs/Throughput", + "jobs still running": "jobs/Jobs Still Running", + } -def print_stats(stats): - os.system("clear") - wanted_keys = [ - "time simulated", - "average power", - "system power efficiency", - "total energy consumed", - "carbon emissions", - "jobs completed", - "throughput", - "jobs still running" - ] - - # merge just engine_stats + job_stats - combined = {} for section in ["engine_stats", "job_stats"]: if section in stats: for k, v in stats[section].items(): if k.lower() in wanted_keys: - pretty_key = k.replace("_", " ").title() - combined[pretty_key] = v + logger.record(wanted_keys[k.lower()], v) - # align only left column, leave right "ragged" - max_key_len = max(len(k) for k in combined.keys()) - for k, v in combined.items(): - print(f"{k.ljust(max_key_len)} | {v}") + logger.dump(step=step) class RAPSEnv(gym.Env): -- GitLab From 5283b6e66e684e0f328c13a7bced40b003633a89 Mon Sep 17 00:00:00 2001 From: Wes Brewer Date: Thu, 28 Aug 2025 14:55:57 -0400 Subject: [PATCH 261/388] Fix bug in telemetry.py when trying to load npz file with --arrival or --scale --- raps/telemetry.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/raps/telemetry.py b/raps/telemetry.py index f485daa..ee0e9fb 100644 --- a/raps/telemetry.py +++ b/raps/telemetry.py @@ -282,14 +282,14 @@ class Telemetry: if hasattr(args, 'scale') and args.scale: for job in tqdm(jobs, desc=f"Scaling jobs to {args.scale} nodes"): - job['nodes_required'] = random.randint(1, args.scale) - job['scheduled_nodes'] = None # Setting to None triggers scheduler to assign nodes + job.nodes_required = random.randint(1, args.scale) + job.scheduled_nodes = None # Setting to None triggers scheduler to assign nodes if hasattr(args, 'arrival') and args.arrival == 'poisson': print("available nodes:", config['AVAILABLE_NODES']) for job in tqdm(jobs, desc="Rescheduling jobs"): - job['scheduled_nodes'] = None - job['submit_time'] = next_arrival_byconfargs(config, args) + job.scheduled_nodes = None + job.submit_time = next_arrival_byconfargs(config, args) else: trigger_custom_dataloader = True break -- GitLab From ca3c30af4cb9253e77fec93fdc6f1a80d70aca06 Mon Sep 17 00:00:00 2001 From: Wes Brewer Date: Thu, 28 Aug 2025 14:57:37 -0400 Subject: [PATCH 262/388] Point all experiment data paths to /opt/data instead of ~/data --- experiments/frontier.yaml | 4 ++-- experiments/gcloudv2.yaml | 2 +- experiments/lassen.yaml | 2 +- experiments/marconi100.yaml | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/experiments/frontier.yaml b/experiments/frontier.yaml index f865a19..280d95f 100644 --- a/experiments/frontier.yaml +++ b/experiments/frontier.yaml @@ -1,4 +1,4 @@ system: frontier replay: - - ~/data/frontier/slurm/joblive/date=2024-01-18 - - ~/data/frontier/jobprofile/date=2024-01-18 + - /opt/data/frontier/slurm/joblive/date=2024-01-18 + - /opt/data/frontier/jobprofile/date=2024-01-18 diff --git a/experiments/gcloudv2.yaml b/experiments/gcloudv2.yaml index 85a1d6c..db8e218 100644 --- a/experiments/gcloudv2.yaml +++ b/experiments/gcloudv2.yaml @@ -1,4 +1,4 @@ system: gcloudv2 replay: - - ~/data/gcloud/v2/google_cluster_data_2011_sample + - /opt/data/gcloud/v2/google_cluster_data_2011_sample ff: 600 diff --git a/experiments/lassen.yaml b/experiments/lassen.yaml index 5434a1b..7ee04be 100644 --- a/experiments/lassen.yaml +++ b/experiments/lassen.yaml @@ -1,6 +1,6 @@ system: lassen replay: - - ~/data/lassen/Lassen-Supercomputer-Job-Dataset + - /opt/data/lassen/Lassen-Supercomputer-Job-Dataset policy: fcfs backfill: firstfit fastforward: 365d diff --git a/experiments/marconi100.yaml b/experiments/marconi100.yaml index 0568157..8592229 100644 --- a/experiments/marconi100.yaml +++ b/experiments/marconi100.yaml @@ -1,3 +1,3 @@ system: marconi100 replay: - - ~/data/marconi100/job_table.parquet + - /opt/data/marconi100/job_table.parquet -- GitLab From 3c8fb136f305d031a2d2acfc4d6a3dac5e353200 Mon Sep 17 00:00:00 2001 From: Wes Brewer Date: Thu, 28 Aug 2025 16:40:09 -0400 Subject: [PATCH 263/388] Modify mit_supercloud loader.py to use --arrival poisson, and add --episode_length option --- config/mit_supercloud.yaml | 51 ++++++++++++++ experiments/mitrl.yaml | 7 ++ raps/dataloaders/mit_supercloud/loader.py | 32 ++++++--- raps/envs/raps_env.py | 84 +++++++++++++++-------- raps/sim_config.py | 4 ++ raps/stats.py | 6 +- 6 files changed, 144 insertions(+), 40 deletions(-) create mode 100644 config/mit_supercloud.yaml create mode 100644 experiments/mitrl.yaml diff --git a/config/mit_supercloud.yaml b/config/mit_supercloud.yaml new file mode 100644 index 0000000..b780b10 --- /dev/null +++ b/config/mit_supercloud.yaml @@ -0,0 +1,51 @@ +system: + num_cdus: 12 + racks_per_cdu: 1 + nodes_per_rack: 40 + chassis_per_rack: 8 + nodes_per_blade: 1 + switches_per_chassis: 4 + nics_per_node: 4 + rectifiers_per_chassis: 4 + nodes_per_rectifier: 4 + missing_racks: [] + down_nodes: [] + cpus_per_node: 2 + cores_per_cpu: 24 + gpus_per_node: 0 + cpu_peak_flops: 2995200000000.0 + gpu_peak_flops: 0 + cpu_fp_ratio: 0.667 + gpu_fp_ratio: 0.667 +power: + power_gpu_idle: 88 + power_gpu_max: 560 + power_cpu_idle: 1 + power_cpu_max: 6 + power_mem: 74.26 + power_nvme: 30 + power_nic: 20 + power_cdu: 8473.47 + power_switch: 250 + power_update_freq: 15 + rectifier_peak_threshold: 13670 + sivoc_loss_constant: 13 + sivoc_efficiency: 0.98 + rectifier_loss_constant: 17 + rectifier_efficiency: 0.96 + power_cost: 0.094 +scheduler: + multitenant: true + job_arrival_time: 1 + mtbf: 11 + trace_quanta: 10 + min_wall_time: 3600 + max_wall_time: 43200 + ui_update_freq: 900 + max_nodes_per_job: 3000 + job_end_probs: + COMPLETED: 0.63 + FAILED: 0.13 + CANCELLED: 0.12 + TIMEOUT: 0.11 + NODE_FAIL: 0.01 diff --git a/experiments/mitrl.yaml b/experiments/mitrl.yaml new file mode 100644 index 0000000..c0adbfe --- /dev/null +++ b/experiments/mitrl.yaml @@ -0,0 +1,7 @@ +system: "mit_supercloud" +replay: + - /opt/data/mit_supercloud +start: 2021-05-21T21:00 +end: 2021-05-21T22:00 +episode_length: 500 +arrival: poisson diff --git a/raps/dataloaders/mit_supercloud/loader.py b/raps/dataloaders/mit_supercloud/loader.py index dfa9e44..ab08f69 100644 --- a/raps/dataloaders/mit_supercloud/loader.py +++ b/raps/dataloaders/mit_supercloud/loader.py @@ -118,7 +118,7 @@ from typing import Dict, Union, Optional from collections import Counter from raps.job import job_dict, Job -from raps.utils import summarize_ranges +from raps.utils import summarize_ranges, next_arrival from .utils import proc_cpu_series, proc_gpu_series, to_epoch from .utils import DEFAULT_START, DEFAULT_END @@ -209,6 +209,8 @@ def load_data(local_dataset_path, **kwargs): jobs_list, sim_start_time, sim_end_time """ debug = kwargs.get("debug") + config = kwargs.get("config") + arrival = kwargs.get("arrival") NL_PATH = os.path.dirname(__file__) skip_counts = Counter() @@ -302,8 +304,7 @@ def load_data(local_dataset_path, **kwargs): # handle single-partition configs (e.g., mit_supercloud.yaml) if not cpu_only and not mixed: - config = kwargs.get("config") - gpus_per_node = config.get("gpus_per_node") + gpus_per_node = config.get("GPUS_PER_NODE") if gpus_per_node == 0: cpu_only = True @@ -528,7 +529,6 @@ def load_data(local_dataset_path, **kwargs): jobs_list = [] # Get CPUS_PER_NODE and GPUS_PER_NODE from config - config = kwargs.get('config', {}) cpus_per_node = config.get('CPUS_PER_NODE') cores_per_cpu = config.get('CORES_PER_CPU') # gpus_per_node = config.get('GPUS_PER_NODE') # Unused @@ -585,7 +585,21 @@ def load_data(local_dataset_path, **kwargs): cpu_peak = cpu_cores_req / cores_per_cpu / cpus_per_node # Is this per CPU? cpu_tr = [min(x/cores_per_cpu/cpus_per_node, cpu_peak) for x in cpu_tr] - submit_time = rec.get("time_submit", t0) - start_ts + if arrival == "poisson": + job_arrival_time = config.get("JOB_ARRIVAL_TIME") + submit_time = next_arrival(1 / job_arrival_time) + start_time = submit_time + end_time = None + scheduled_nodes = None + telemetry_start = 0 + telemetry_end = 86640 + else: # replay + start_time = t0 - start_ts + end_time = t1 - start_ts + submit_time = rec.get("time_submit") - start_ts + scheduled_nodes = rec.get("scheduled_nodes") + telemetry_start = int(sl.time_start.min()) + telemetry_end = int(sl.time_end.max()) current_job_dict = job_dict( nodes_required=nr, @@ -599,12 +613,12 @@ def load_data(local_dataset_path, **kwargs): nrx_trace=[], end_state=rec.get("state_end", "unknown"), id=jid, - scheduled_nodes=rec.get("scheduled_nodes"), + scheduled_nodes=scheduled_nodes, priority=rec.get("priority", 0), submit_time=submit_time, time_limit=rec.get("timelimit", 0), - start_time=t0 - start_ts, - end_time=t1 - start_ts, + start_time=start_time, + end_time=end_time, expected_run_time=max(0, t1-t0), trace_time=len(cpu_tr)*quanta, trace_start_time=0, @@ -615,8 +629,6 @@ def load_data(local_dataset_path, **kwargs): jobs_list.append(job) # Calculate min_overall_utime and max_overall_utime - telemetry_start = int(sl.time_start.min()) - telemetry_end = int(sl.time_end.max()) # min_overall_utime = int(sl.time_submit.min()) # max_overall_utime = int(sl.time_submit.max()) diff --git a/raps/envs/raps_env.py b/raps/envs/raps_env.py index 580b349..c5cd2f2 100644 --- a/raps/envs/raps_env.py +++ b/raps/envs/raps_env.py @@ -40,6 +40,8 @@ def print_stats(stats, step=0): if section in stats: for k, v in stats[section].items(): if k.lower() in wanted_keys: + if k.lower() == "jobs still running" and isinstance(v, list): + v = len(v) logger.record(wanted_keys[k.lower()], v) logger.dump(step=step) @@ -103,7 +105,7 @@ class RAPSEnv(gym.Env): ) self.timestep_start = 0 - self.timestep_end = self.config.get("SIM_END", 1000) + self.timestep_end = getattr(self.cli_args, "episode_length") self.generator = self.layout_manager.run_stepwise( self.jobs, @@ -180,7 +182,29 @@ class RAPSEnv(gym.Env): return self._get_state() - def _compute_reward(self, tick_data, alpha=1.0, beta=0.001, gamma=0.1): + def _compute_reward(self, tick_data): + """ + Reward function: minimize carbon footprint per job completed. + Encourages the agent to complete jobs while keeping emissions low. + """ + reward = 0.0 + + # Jobs completed this tick + jobs_completed = len(getattr(tick_data, "completed", [])) + + # Carbon emitted so far (metric tons CO2) + carbon_so_far = getattr(self.engine, "carbon emissions", 0.0) + + if jobs_completed > 0: + # Reward is higher when more jobs finish with less carbon + reward = jobs_completed / (carbon_so_far + 1e-6) + else: + # Small penalty if no jobs finished (encourages progress) + reward = -0.01 + + return reward + + def _compute_reward2(self, tick_data, alpha=10.0, beta=1.0, gamma=2.0): completed = getattr(tick_data, "completed", None) jobs_completed = len(completed) if completed else 0 power = self.power_manager.history[-1][1] @@ -188,40 +212,46 @@ class RAPSEnv(gym.Env): reward = alpha * jobs_completed - beta * power - gamma * queue_len - if self.args_dict.get("debug", False): - print(f"[t={self.engine.current_timestep}] jobs_completed={jobs_completed}, " - f"power={power}, queue_len={queue_len}, reward={reward}") + print(f"[t={self.engine.current_timestep}] jobs_completed={jobs_completed}, " + f"power={power}, queue_len={queue_len}, reward={reward}") return reward def step(self, action): - chosen_job = None + queue = self.engine.queue + invalid_action = False - # Advance simulation by one step via generator - try: - tick_data = next(self.generator) - except StopIteration: - # Simulation finished - return self._get_state(), 0.0, True, {} - - # Store action for scheduler to pick up - self.scheduler.pending_action = action + # If queue empty or index out of range → invalid + if len(queue) == 0 or action >= len(queue): + invalid_action = True + else: + job = queue[int(action)] + available = len(self.engine.scheduler.resource_manager.available_nodes) + if job.nodes_required <= available: + # Valid scheduling + self.engine.scheduler.place_job_and_manage_queues( + job, queue, self.engine.running, self.engine.current_timestep + ) + else: + invalid_action = True - # Advance one step (scheduler.schedule() is called inside generator) + # advance simulation by one tick tick_data = next(self.generator) - reward = self._compute_reward(tick_data) + + # compute reward + if invalid_action: + reward = -1.0 + else: + reward = self._compute_reward(tick_data) + + # Print stats + stats = self.get_stats() + print_stats(stats) obs = self._get_state() - done = self.engine.current_timestep >= min(self.engine.timestep_end, 1000) - if done: - stats = self.get_stats() - print_stats(stats) - - info = { - "scheduled_job": getattr(chosen_job, "id", None), - "power": getattr(tick_data, "power", 0.0), - "completed": getattr(tick_data, "completed", []), - } + done = self.engine.current_timestep >= self.engine.timestep_end + info = {} + return obs, reward, done, info def _get_state(self): diff --git a/raps/sim_config.py b/raps/sim_config.py index 127cec3..e0b68e2 100644 --- a/raps/sim_config.py +++ b/raps/sim_config.py @@ -208,6 +208,10 @@ class SimConfig(BaseModel): maxqueue: int = 50 """ Specify the max queue length for continuous job generation """ + # Reinforcment Learning + episode_length: int = 1000 + """ Number of timesteps per RL episode (default 1000) """ + @model_validator(mode="before") def _parse_times(cls, data): time_fields = [ diff --git a/raps/stats.py b/raps/stats.py index 23158cd..aa8610e 100644 --- a/raps/stats.py +++ b/raps/stats.py @@ -32,13 +32,13 @@ def get_engine_stats(engine: Engine): stats = { 'time simulated': time_simulated, 'num_samples': num_samples, - 'average power': f'{average_power_mw:.2f} MW', - 'min loss': f'{min_loss_mw:.2f} MW', + 'average power': f'{average_power_mw:.4f} MW', + 'min loss': f'{min_loss_mw:.4f} MW', 'average loss': f'{average_loss_mw:.2f} MW', 'max loss': f'{max_loss_mw:.2f} MW', 'system power efficiency': f'{efficiency * 100:.2f}%', 'total energy consumed': f'{total_energy_consumed:.2f} MW-hr', - 'carbon emissions': f'{emissions:.2f} metric tons CO2', + 'carbon emissions': f'{emissions:.4f} metric tons CO2', 'total cost': f'${total_cost:.2f}' } -- GitLab From 0734cf87eb43af2f82dfe5053424e4cb8ec1ccba Mon Sep 17 00:00:00 2001 From: "Hines, Jesse" Date: Fri, 29 Aug 2025 18:20:17 +0000 Subject: [PATCH 264/388] Refactoring to engine and entrypoint script --- .flake8 | 2 +- .gitignore | 1 + README.md | 52 +-- main.py | 426 ++++++------------ multi-part-sim-mpi.py | 170 ------- multi-part-sim.py | 185 -------- pyproject.toml | 3 + raps/constants.py | 1 - raps/downtime.py | 7 +- raps/engine.py | 203 ++++++++- raps/multi_part_engine.py | 54 +++ raps/network.py | 2 +- raps/plotting.py | 4 +- raps/run_sim.py | 236 ++++++++++ raps/sim_config.py | 111 ++--- raps/system_config.py | 15 +- raps/telemetry.py | 97 ++-- raps/ui.py | 4 - raps/workload.py | 28 +- tests/conftest.py | 2 +- tests/smoke.py | 12 +- tests/systems/conftest.py | 28 +- tests/systems/test_engine.py | 39 ++ tests/systems/test_main_basic_run.py | 4 +- tests/systems/test_main_cooling_run.py | 2 +- .../test_main_cooling_uncertainty_run.py | 2 +- tests/systems/test_main_fastforward_run.py | 3 +- tests/systems/test_main_help.py | 4 +- tests/systems/test_main_network_run.py | 2 +- .../systems/test_main_network_withdata_run.py | 5 +- tests/systems/test_main_noui_run.py | 2 +- tests/systems/test_main_time_delta_run.py | 5 +- .../test_main_time_delta_sub_second_run.py | 5 +- tests/systems/test_main_time_ff_delta_run.py | 7 +- tests/systems/test_main_time_run.py | 3 +- tests/systems/test_main_uncertainty_run.py | 2 +- tests/systems/test_main_withdata_run.py | 5 +- .../systems/test_multi_part_sim_basic_run.py | 3 +- .../test_multi_part_sim_network_run.py | 7 +- .../test_multi_part_sim_withdata_run.py | 6 +- tests/systems/test_telemetry_withdata_run.py | 5 +- tests/systems/test_workload_synthetic.py | 23 +- tests/test_main.py | 12 +- tests/util.py | 9 +- 44 files changed, 865 insertions(+), 933 deletions(-) delete mode 100644 multi-part-sim-mpi.py delete mode 100644 multi-part-sim.py create mode 100644 raps/multi_part_engine.py create mode 100644 raps/run_sim.py create mode 100644 tests/systems/test_engine.py diff --git a/.flake8 b/.flake8 index ce4ab0a..ffffb5c 100644 --- a/.flake8 +++ b/.flake8 @@ -1,3 +1,3 @@ [flake8] -exclude = .git, __pycache__, venv*, simulation_results, third_party, models +exclude = .git, __pycache__, venv*, simulation_results, third_party, models, .venv max-line-length = 120 diff --git a/.gitignore b/.gitignore index 74a41d8..bf49923 100644 --- a/.gitignore +++ b/.gitignore @@ -5,3 +5,4 @@ venv *.npz *.prof simulation_results/ +models/*.fmu diff --git a/README.md b/README.md index 665a64a..a561278 100644 --- a/README.md +++ b/README.md @@ -19,37 +19,37 @@ Note: Requires python3.12 or greater. ## Usage and help menu - python main.py -h + raps run -h ## Run simulator with default synthetic workload - python main.py + raps run ## Run simulator with telemetry replay # Frontier DATEDIR="date=2024-01-18" DPATH=~/data/frontier-sample-2024-01-18 - python main.py -f $DPATH/slurm/joblive/$DATEDIR,$DPATH/jobprofile/$DATEDIR + raps run -f $DPATH/slurm/joblive/$DATEDIR,$DPATH/jobprofile/$DATEDIR ## Open Telemetry dataset For Marconi supercomputer, download `job_table.parquet` from https://zenodo.org/records/10127767 # Marconi100 - python main.py --system marconi100 -f ~/data/marconi100/job_table.parquet + raps run --system marconi100 -f ~/data/marconi100/job_table.parquet For Adastra MI250 supercomputer, download 'AdastaJobsMI250_15days.parquet' from https://zenodo.org/records/14007065 # Adastra MI250 - python main.py --system adastraMI250 -f AdastaJobsMI250_15days.parquet + raps run --system adastraMI250 -f AdastaJobsMI250_15days.parquet For Google cluster trace v2 - python main.py --system gcloudv2 -f ~/data/gcloud/v2/google_cluster_data_2011_sample --ff 600 + raps run --system gcloudv2 -f ~/data/gcloud/v2/google_cluster_data_2011_sample --ff 600 # analyze dataset - python -m raps.telemetry --system gcloudv2 -f ~/data/gcloud/v2/google_cluster_data_2011_sample -v + raps telemetry --system gcloudv2 -f ~/data/gcloud/v2/google_cluster_data_2011_sample -v For MIT Supercloud @@ -62,28 +62,28 @@ For MIT Supercloud python -m raps.dataloaders.mit_supercloud.cli download --start 2021-05-21T13:00 --end 2021-05-21T14:00 # Load data and run simulation - will save data as part-cpu.npz and part-gpu.npz files - python multi-part-sim.py -x 'mit_supercloud/*' -f $DPATH --system mit_supercloud \ + raps run-multi-part -x 'mit_supercloud/*' -f $DPATH --system mit_supercloud \ --start 2021-05-21T13:00 --end 2021-05-21T14:00 # Note: if no start, end dates provided will default to run 24 hours between # 2021-05-21T00:00 to 2021-05-22T00:00 set by defaults in raps/dataloaders/mit_supercloud/utils.py # Re-run simulation using npz files (much faster load) - python multi-part-sim.py -x mit_supercloud/* -f part-*.npz --system mit_supercloud + raps run-multi-part -x mit_supercloud/* -f part-*.npz --system mit_supercloud # Synthetic tests for verification studies: - python multi-part-sim.py -x 'mit_supercloud/*' -w multitenant + raps run-multi-part -x 'mit_supercloud/*' -w multitenant For Lumi # Synthetic test for lumi multi-part-sim: - python multi-part-sim.py -x lumi/* + raps run-multi-part -x lumi/* ## Perform Network Simulation Lassen is one of the few datasets that has networking data. See `raps/dataloaders/lassen.py` for how to get the datasets. To run a network simulation, use the following command: - python main.py -f ~/data/lassen/Lassen-Supercomputer-Job-Dataset --system lassen --policy fcfs --backfill firstfit --ff 365d -t 12h --arrival poisson --net + raps run -f ~/data/lassen/Lassen-Supercomputer-Job-Dataset --system lassen --policy fcfs --backfill firstfit --ff 365d -t 12h --arrival poisson --net ## Snapshot of extracted workload data @@ -91,7 +91,7 @@ To reduce the expense of extracting the needed data from the telemetry parquet f RAPS saves a snapshot of the extracted data in NPZ format. The NPZ file can be given instead of the parquet files for more quickly running subsequent simulations, e.g.: - python main.py -f jobs_2024-02-20_12-20-39.npz + raps run -f jobs_2024-02-20_12-20-39.npz ## Cooling models @@ -103,37 +103,31 @@ We provide several cooling models in the repo https://code.ornl.gov/exadigit/POW Will install the POWER9CSM in the models folder. To activate cooling when running RAPS, use `--cooling` or `-c` argument. e.g., - python main.py --system marconi100 -c + raps run --system marconi100 -c - python main.py --system lassen -c + raps run --system lassen -c - python main.py --system summit -c + raps run --system summit -c ## Support for multiple system partitions Multi-partition systems are supported by running the `multi-part-sim.py` script, where a list of configurations can be specified using the `-x` flag as follows: - python multi-part-sim.py -x setonix/part-cpu setonix/part-gpu + raps run-multi-part -x setonix/part-cpu setonix/part-gpu or simply: - python multi-part-sim.py -x setonix/* # bash + raps run-multi-part -x setonix/* # bash - python multi-part-sim.py -x 'setonix/*' # zsh - -To run this in parallel use: - - mpiexec -n 2 python multi-part-sim-mpi.py -x setonix/part-cpu setonix/part-gpu - -*Note: first install `mpi4py` via pip or conda.* + raps run-multi-part -x 'setonix/*' # zsh This will simulate synthetic workloads on two partitions as defined in `config/setonix-cpu` and `config/setonix-gpu`. To replay telemetry workloads from another system, e.g., Marconi100's PM100 dataset, first create a .npz snapshot of the telemetry data, e.g., - python main.py --system marconi100 -f /path/to/marconi100/job_table.parquet + raps run-multi-part --system marconi100 -f /path/to/marconi100/job_table.parquet This will dump a .npz file with a randomized name, e.g. ac23db.npz. Let's rename this file to pm100.npz for clarity. Note: can control-C when the simulation starts. Now, this pm100.npz file can be used with `multi-part-sim.py` as follows: - python multi-part-sim.py -x setonix/* -f pm100.npz --arrival poisson --scale 192 + raps run-multi-part -x setonix/* -f pm100.npz --arrival poisson --scale 192 ## Modifications to telemetry replay @@ -151,11 +145,11 @@ python main.py -f $DPATH/slurm/joblive/$DATEDIR,$DPATH/jobprofile/$DATEDIR --pol ## Job-level power output example for replay of single job - python main.py -f $DPATH/slurm/joblive/$DATEDIR,$DPATH/jobprofile/$DATEDIR --jid 1234567 -o + raps run -f $DPATH/slurm/joblive/$DATEDIR,$DPATH/jobprofile/$DATEDIR --jid 1234567 -o ## Compute stats on telemetry data, e.g., average job arrival time - python -m raps.telemetry -f $DPATH/slurm/joblive/$DATEDIR,$DPATH/jobprofile/$DATEDIR + raps telemetry -f $DPATH/slurm/joblive/$DATEDIR,$DPATH/jobprofile/$DATEDIR ## Build and run Docker container diff --git a/main.py b/main.py index c3ba946..cd7e162 100644 --- a/main.py +++ b/main.py @@ -1,298 +1,160 @@ """ -Main driver for simulating the RAPS single-partition (homogeneous) -system in the ExaDigiT digital twin. Supports synthetic workload -generation or telemetry replay, dynamic power modeling (including -conversion losses), and optional coupling to a thermo-fluids cooling -model. Produces performance, utilization, and energy metrics, with -optional plots and output files for analysis and validation. +ExaDigiT Resource Allocator & Power Simulator (RAPS) """ -import json -import numpy as np -import random -import pandas as pd -import os -import time -import math -# +import yaml +import argparse +import sys +from pathlib import Path from raps.helpers import check_python_version -# -from raps.system_config import get_system_config -from raps.constants import OUTPUT_PATH, SEED -from raps.cooling import ThermoFluidsModel -from raps.ui import LayoutManager -from raps.flops import FLOPSManager -from raps.plotting import Plotter -from raps.power import ( - PowerManager, - compute_node_power, - compute_node_power_validate -) -from raps.power import ( - compute_node_power_uncertainties, - compute_node_power_validate_uncertainties -) -from raps.engine import Engine -from raps.telemetry import Telemetry -from raps.workload import Workload -from raps.account import Accounts -from raps.weather import Weather -from raps.utils import write_dict_to_file -from raps.stats import ( - get_engine_stats, - get_job_stats, - get_scheduler_stats, - get_network_stats, - print_formatted_report -) - -from raps.sim_config import args, args_dict +from raps.sim_config import SimConfig +from raps.run_sim import run_sim, run_multi_part_sim +from raps.workload import run_workload +from raps.telemetry import run_telemetry, run_telemetry_add_args +from raps.utils import pydantic_add_args, yaml_dump +from pydantic_settings import SettingsConfigDict check_python_version() -def main(): - if args.verbose or args.debug: - print(args) - - config = get_system_config(args.system).get_legacy() - - if args.seed: - random.seed(SEED) - np.random.seed(SEED) - - if args.cooling: - cooling_model = ThermoFluidsModel(**config) - cooling_model.initialize() - args.layout = "layout2" - - if args_dict['start']: - cooling_model.weather = Weather(args_dict['start'], config=config) +def read_sim_yaml(config_file: str): + if config_file == "-": + return yaml.safe_load(sys.stdin.read()) + elif config_file: + return yaml.safe_load(Path(config_file).read_text()) else: - cooling_model = None - - if args.validate: - if args.uncertainties: - power_manager = PowerManager(compute_node_power_validate_uncertainties, **config) - else: - power_manager = PowerManager(compute_node_power_validate, **config) - else: - if args.uncertainties: - power_manager = PowerManager(compute_node_power_uncertainties, **config) - else: - power_manager = PowerManager(compute_node_power, **config) - args_dict['config'] = config - flops_manager = FLOPSManager(**args_dict) - - if args.live and not args.replay: - assert args.time is not None, {"--time must be set, specifing how long we want to predict"} - td = Telemetry(**args_dict) - jobs, timestep_start, timestep_end = \ - td.load_jobs_times_args_from_live_system() - if args.output is not None: - td.save_snapshot(jobs=jobs, timestep_start=timestep_start, - timestep_end=timestep_end, args=args, filename=td.dirname) + return {} - elif args.replay: - td = Telemetry(**args_dict) - jobs, timestep_start, timestep_end, args_from_file = \ - td.load_jobs_times_args_from_files(files=args.replay, args=args, config=config) - # TODO: Merge args and args_from_files? see telemetry.py:97 - - else: # Synthetic jobs - wl = Workload(args, config) - jobs = wl.generate_jobs() - - if args.verbose: - for job in jobs: - print('jobid:', job.id, '\tlen(gpu_trace):', - len(job.gpu_trace) if isinstance(job.gpu_trace, list) - else job.gpu_trace, '\twall_time(s):', - job.wall_time) - time.sleep(2) - - timestep_start = 0 - if hasattr(jobs[0], 'end_time'): - timestep_end = int(math.ceil(max([job.end_time for job in jobs]))) - else: - timestep_end = 88200 # 24 hours - - td = Telemetry(**args_dict) - td.save_snapshot(jobs=jobs, timestep_start=timestep_start, - timestep_end=timestep_end, args=args, filename=td.dirname) - - if args.fastforward is not None: - timestep_start = timestep_start + args.fastforward - - if args.time is not None: - timestep_end = timestep_start + args.time - - if args.time_delta is not None: - time_delta = args.time_delta - else: - time_delta = 1 +CLI_CONFIG = SettingsConfigDict( + cli_implicit_flags=True, + cli_kebab_case=True, +) - if args.continuous_job_generation: - continuous_workload = wl - else: - continuous_workload = None - sc = Engine( - power_manager=power_manager, - flops_manager=flops_manager, - cooling_model=cooling_model, - continuous_workload=continuous_workload, - jobs=jobs, - **args_dict, +def main(): + parser = argparse.ArgumentParser( + description=""" + ExaDigiT Resource Allocator & Power Simulator (RAPS) + """, + allow_abbrev=False, ) - - DIR_NAME = td.dirname - OPATH = OUTPUT_PATH / DIR_NAME - print("Output directory is: ", OPATH) - sc.opath = OPATH - - if args.accounts: - job_accounts = Accounts(jobs) - if args.accounts_json: - loaded_accounts = Accounts.from_json_filename(args.accounts_json) - accounts = Accounts.merge(loaded_accounts, job_accounts) - else: - accounts = job_accounts - sc.accounts = accounts - - if args.plot or args.output is not None: - try: - os.makedirs(OPATH) - except OSError as error: - print(f"Error creating directory: {error}") - - if args.verbose: - print(jobs) - - total_timesteps = timestep_end - timestep_start - - downscale = args.downscale - downscale_str = ""if downscale == 1 else f"/{downscale}" - print(f"Simulating {len(jobs)} jobs for {total_timesteps}{downscale_str}" - f" seconds from {timestep_start} to {timestep_end}.") - print(f"Simulation time delta: {time_delta}{downscale_str} s," - f"Telemetry trace quanta: {jobs[0].trace_quanta}{downscale_str} s.") - layout_manager = LayoutManager(args.layout, engine=sc, debug=args.debug, - total_timesteps=total_timesteps, - args_dict=args_dict, **config) - layout_manager.run(jobs, timestep_start=timestep_start, timestep_end=timestep_end, time_delta=time_delta) - - engine_stats = get_engine_stats(sc) - job_stats = get_job_stats(sc) - scheduler_stats = get_scheduler_stats(sc) - if sc.simulate_network: - network_stats = get_network_stats(sc) - else: - network_stats = None - - print_formatted_report(engine_stats=engine_stats, - job_stats=job_stats, - scheduler_stats=scheduler_stats, - network_stats=network_stats - ) - - if downscale_str: - downscale_str = "1" + downscale_str - - if args.plot: - if 'power' in args.plot: - pl = Plotter(f"Time ({downscale_str}s)", 'Power (kW)', 'Power History', - OPATH / f'power.{args.imtype}', - uncertainties=args.uncertainties) - x, y = zip(*power_manager.history) - pl.plot_history(x, y) - - if 'util' in args.plot: - pl = Plotter(f"Time ({downscale_str}s)", 'System Utilization (%)', - 'System Utilization History', OPATH / f'util.{args.imtype}') - x, y = zip(*sc.sys_util_history) - pl.plot_history(x, y) - - if 'loss' in args.plot: - pl = Plotter(f"Time ({downscale_str}s)", 'Power Losses (kW)', 'Power Loss History', - OPATH / f'loss.{args.imtype}', - uncertainties=args.uncertainties) - x, y = zip(*power_manager.loss_history) - pl.plot_history(x, y) - - pl = Plotter(f"Time ({downscale_str}s)", 'Power Losses (%)', 'Power Loss History', - OPATH / f'loss_pct.{args.imtype}', - uncertainties=args.uncertainties) - x, y = zip(*power_manager.loss_history_percentage) - pl.plot_history(x, y) - - if 'pue' in args.plot: - if cooling_model: - ylabel = 'pue' - title = 'FMU ' + ylabel + 'History' - pl = Plotter(f"Time ({downscale_str}s)", ylabel, title, OPATH / f'pue.{args.imtype}', - uncertainties=args.uncertainties) - df = pd.DataFrame(cooling_model.fmu_history) - df.to_parquet('cooling_model.parquet', engine='pyarrow') - pl.plot_history(df['time'], df[ylabel]) - else: - print('Cooling model not enabled... skipping output of plot') - - if 'temp' in args.plot: - if cooling_model: - ylabel = 'Tr_pri_Out[1]' - title = 'FMU ' + ylabel + 'History' - pl = Plotter(f"Time ({downscale_str}s)", ylabel, title, OPATH / 'temp.svg') - df = pd.DataFrame(cooling_model.fmu_history) - df.to_parquet('cooling_model.parquet', engine='pyarrow') - pl.plot_compare(df['time'], df[ylabel]) - else: - print('Cooling model not enabled... skipping output of plot') - - if args.output is not None: - - if args.uncertainties: - # Parquet cannot handle annotated ufloat format AFAIK - print('Data dump not implemented using uncertainties!') - else: - if cooling_model: - df = pd.DataFrame(cooling_model.fmu_history) - df.to_parquet(OPATH / 'cooling_model.parquet', engine='pyarrow') - - df = pd.DataFrame(power_manager.history) - df.to_parquet(OPATH / 'power_history.parquet', engine='pyarrow') - - df = pd.DataFrame(power_manager.loss_history) - df.to_parquet(OPATH / 'loss_history.parquet', engine='pyarrow') - - df = pd.DataFrame(sc.sys_util_history) - df.to_parquet(OPATH / 'util.parquet', engine='pyarrow') - - # Schedule history - job_history = pd.DataFrame(sc.get_job_history_dict()) - job_history.to_csv(OPATH / "job_history.csv", index=False) - - scheduler_running_history = pd.DataFrame(sc.get_scheduler_running_history()) - scheduler_running_history.to_csv(OPATH / "running_history.csv", index=False) - scheduler_queue_history = pd.DataFrame(sc.get_scheduler_running_history()) - scheduler_queue_history.to_csv(OPATH / "queue_history.csv", index=False) - - try: - with open(OPATH / 'stats.out', 'w') as f: - json.dump(engine_stats, f, indent=4) - json.dump(job_stats, f, indent=4) - except TypeError: # Is this the correct error code? - write_dict_to_file(engine_stats, OPATH / 'stats.out') - write_dict_to_file(job_stats, OPATH / 'stats.out') - - if args.accounts: - try: - with open(OPATH / 'accounts.json', 'w') as f: - json_string = json.dumps(sc.accounts.to_dict()) - f.write(json_string) - except TypeError: - write_dict_to_file(sc.accounts.to_dict(), OPATH / 'accounts.json') - print("Output directory is: ", OPATH) # If output is enabled, the user wants this information as last output + subparsers = parser.add_subparsers(required=True) + + # Shortcut for common sim args + sim_shortcuts = { + "partitions": "x", + "cooling": "c", + "simulate-network": "net", + "fastforward": "ff", + "time": "t", + "debug": "d", + "numjobs": "n", + "verbose": "v", + "output": "o", + "uncertainties": "u", + "plot": "p", + "replay": "f", + "workload": "w", + } + + # ==== raps run ==== + cmd_run = subparsers.add_parser("run", description=""" + Run single-partition (homogeneous) systems. Supports synthetic workload generation or + telemetry replay, dynamic power modeling (including conversion losses), and optional + coupling to a thermo-fluids cooling model. Produces performance, utilization, and + energy metrics, with optional plots and output files for analysis and validation. + """) + cmd_run.add_argument("config_file", nargs="?", default=None, help=""" + YAML sim config file, can be used to configure an experiment instead of using CLI + flags. Pass "-" to read from stdin. + """) + cmd_run_validate = pydantic_add_args(cmd_run, SimConfig, model_config={ + **CLI_CONFIG, + "cli_shortcuts": sim_shortcuts, + }) + + def cmd_run_func(args): + sim_config = cmd_run_validate(args, read_sim_yaml(args.config_file)) + run_sim(sim_config) + cmd_run.set_defaults(func=cmd_run_func) + + # ==== raps run-multi-part ==== + # It might make sense to combine these into a single entrypoint. Though the multi-part run + # #doesn't support UI or the same output options. + cmd_run_multi_part = subparsers.add_parser("run-multi-part", description=""" + Simulates multi-partition (heterogeneous) systems. Supports replaying telemetry or + generating synthetic workloads across CPU-only, GPU, and mixed partitions. Initializes + per-partition power, FLOPS, and scheduling models, then advances simulations in lockstep. + Outputs per-partition performance, utilization, and energy statistics for systems such as + MIT Supercloud, Setonix, Adastra, and LUMI. + """) + cmd_run_multi_part.add_argument("config_file", nargs="?", default=None, help=""" + YAML sim config file, can be used to configure an experiment instead of using CLI + flags. Pass "-" to read from stdin. + """) + cmd_run_multi_part_validate = pydantic_add_args(cmd_run_multi_part, SimConfig, model_config={ + **CLI_CONFIG, + "cli_shortcuts": sim_shortcuts, + }) + + def cmd_run_multi_part_func(args): + sim_config = cmd_run_multi_part_validate(args, read_sim_yaml(args.config_file)) + run_multi_part_sim(sim_config) + cmd_run_multi_part.set_defaults(func=cmd_run_multi_part_func) + + # ==== raps show ==== + cmd_show = subparsers.add_parser("show", description=""" + Outputs the given CLI args as a YAML config file that can be used to re-run the same + simulation. + """) + cmd_show.add_argument("config_file", nargs="?", default=None, help=""" + Input YAML sim config file. Can be used to slightly modify an existing sim config. + """) + cmd_show.add_argument("--show-defaults", default=False, help=""" + If true, include defaults in the output YAML + """) + cmd_show_validate = pydantic_add_args(cmd_show, SimConfig, model_config={ + **CLI_CONFIG, + "cli_shortcuts": sim_shortcuts, + }) + + def cmd_show_func(args): + sim_config = cmd_show_validate(args, read_sim_yaml(args.config_file)) + sim_config = sim_config.model_dump(mode="json", + exclude_defaults=not args.show_defaults) + print(yaml_dump(sim_config), end="") + cmd_show.set_defaults(func=cmd_show_func) + + # ==== raps workload ==== + # TODO: Separate the arguments for this command + cmd_workload = subparsers.add_parser("workload", description=""" + Saves workload as a snapshot. + """) + cmd_workload.add_argument("config_file", nargs="?", default=None, help=""" + YAML sim config file, can be used to configure an experiment instead of using CLI + flags. Pass "-" to read from stdin. + """) + cmd_workload_validate = pydantic_add_args(cmd_workload, SimConfig, model_config={ + **CLI_CONFIG, + "cli_shortcuts": sim_shortcuts, + }) + + def cmd_workload_func(args): + sim_config = cmd_workload_validate(args, read_sim_yaml(args.config_file)) + run_workload(sim_config) + cmd_show.set_defaults(func=cmd_workload_func) + + # ==== raps telemetry ==== + cmd_telemetry = subparsers.add_parser("telemetry", description=""" + Telemetry data validator + """) + run_telemetry_add_args(cmd_telemetry) + cmd_telemetry.set_defaults(func=run_telemetry) + + # TODO: move telemetry and other misc scripts into here + + args = parser.parse_args() + args.func(args) if __name__ == "__main__": diff --git a/multi-part-sim-mpi.py b/multi-part-sim-mpi.py deleted file mode 100644 index eabb19b..0000000 --- a/multi-part-sim-mpi.py +++ /dev/null @@ -1,170 +0,0 @@ -""" -MPI-enabled driver for simulating multi-partition RAPS systems. -Distributes partitions across ranks with mpi4py for parallel run. -Supports telemetry replay or synthetic workloads with per-rank -power, FLOPS, and scheduling models. Outputs debug and summary -stats for heterogeneous systems (e.g., LUMI, Setonix, Adastra). -""" - -from tqdm import tqdm -from mpi4py import MPI -from raps.utils import next_arrival -from raps.workload import Workload -from raps.telemetry import Telemetry -from raps.power import PowerManager, compute_node_power -from raps.flops import FLOPSManager -from raps.engine import Engine -from raps.ui import LayoutManager -from raps.system_config import get_partition_configs -from raps.sim_config import args -import random -from raps.helpers import check_python_version -check_python_version() - - -def main(): - comm = MPI.COMM_WORLD - rank = comm.Get_rank() - size = comm.Get_size() - - # 3) Load configs for every partition (all ranks do this) - multi_config = get_partition_configs(args.partitions) - partition_names = multi_config.partition_names - configs = [c.get_legacy() for c in multi_config.partitions] - args_dicts = [{**vars(args), 'config': cfg} for cfg in configs] - - # 4) Each rank decides which partition‐indices it owns (round-robin): - local_partition_indices = [i for i in range(len(partition_names)) if (i % size) == rank] - local_partition_names = [partition_names[i] for i in local_partition_indices] - # local_configs = [configs[i] for i in local_partition_indices] # Unused - # local_args_dicts = [args_dicts[i] for i in local_partition_indices] # Unused - - # 5) Rank 0 builds (or loads) the entire job list, assigns partitions, groups by partition, - # then scatters exactly those jobs to each rank. Other ranks just sit in the scatter: - if rank == 0: - # --- a) “REPLAY” branch? - if args.replay: - td = Telemetry(**args_dicts[0]) - print(f"[rank 0] Loading telemetry from {args.replay[0]}…") - jobs_full = td.load_snapshot(args.replay[0]) - available_nodes = [c['AVAILABLE_NODES'] for c in configs] - for job in jobs_full: - job['partition'] = random.choices(partition_names, weights=available_nodes, k=1)[0] - if args.scale: - for job in tqdm(jobs_full, desc="[rank 0] Scaling jobs…"): - job['nodes_required'] = random.randint(1, args.scale) - job['requested_nodes'] = None - if args.arrival == 'poisson': - for job in tqdm(jobs_full, desc="[rank 0] Rescheduling arrivals…"): - p_name = job['partition'] - p_cfg = configs[partition_names.index(p_name)] - job['requested_nodes'] = None - job['submit_time'] = next_arrival(1 / p_cfg['JOB_ARRIVAL_TIME']) - - # --- b) “SYNTHETIC WORKLOAD” branch: - else: - wl = Workload(*configs) - jobs_full = getattr(wl, args.workload)(num_jobs=args.numjobs) - available_nodes = [c['AVAILABLE_NODES'] for c in configs] - for job in jobs_full: - job['partition'] = random.choices(partition_names, weights=available_nodes, k=1)[0] - - # --- c) Group “jobs_full” by partition name: - jobs_by_partition = {p: [] for p in partition_names} - for job in jobs_full: - jobs_by_partition[job['partition']].append(job) - - # --- d) Build a list-of-lists, one list per rank, containing the union - # of all jobs for that rank’s partitions: - jobs_for_rank = [[] for _ in range(size)] - for p_idx, p_name in enumerate(partition_names): - tgt = p_idx % size - jobs_for_rank[tgt].extend(jobs_by_partition[p_name]) - else: - jobs_for_rank = None - - # 6) Scatter the per-rank job lists: - local_jobs = comm.scatter(jobs_for_rank, root=0) - - # 7) Re‐group each rank’s “local_jobs” into a dict keyed by its local_partition_names: - local_jobs_by_partition = {p: [] for p in local_partition_names} - for job in local_jobs: - local_jobs_by_partition[job['partition']].append(job) - - # 8) Build one LayoutManager (and Engine/PowerManager/FLOPSManager) per local partition: - layout_managers = {} - for idx, p_name in enumerate(local_partition_names): - global_idx = local_partition_indices[idx] - cfg = configs[global_idx] - ad = args_dicts[global_idx] - - pm = PowerManager(compute_node_power, **cfg) - fm = FLOPSManager(**ad) - sc = Engine(power_manager=pm, flops_manager=fm, - cooling_model=None, **ad) - - layout_managers[p_name] = LayoutManager(args.layout, - engine=sc, - debug=args.debug, - **cfg) - - # 9) Compute timestep_start / timestep_end (all ranks agree): - if args.fastforward: - fastforward = args.fastforward - else: - fastforward = 0 - - if args.time: - timesteps = args.time - else: - timesteps = 88200 # default 24 hours - - timestep_start = fastforward - timestep_end = timestep_start + timesteps - - # 10) Build a generator for each partition that this rank owns: - local_generators = {} - for p_name in local_partition_names: - gen = layout_managers[p_name].run_stepwise( - local_jobs_by_partition[p_name], - timestep_start=timestep_start, - timestep_end=timestep_end - ) - local_generators[p_name] = gen - - # 11) Main simulation loop (every rank steps its own partitions in lockstep): - UIF = configs[0]['UI_UPDATE_FREQ'] # assume same for all configs - for t in range(timesteps): - # --- a) Advance each local partition’s generator - for gen in local_generators.values(): - try: - next(gen) - except StopIteration: - pass - - # --- b) Every UI_UPDATE_FREQ, do per-rank prints + one global reduction - if (t % UIF) == 0: - # 1) sum our local sys_power - local_sys_power = sum(lm.engine.sys_power for lm in layout_managers.values()) - - # 2) print *our* partition‐level info now (so rank 0 and rank 1 will both print): - for p_name, lm in layout_managers.items(): - sys_util = lm.engine.sys_util_history[-1] if lm.engine.sys_util_history else 0.0 - print(f"[DEBUG][rank {rank}] {p_name} – Timestep {t} – " - f"Jobs running: {len(lm.engine.running)} – " - f"Utilization: {sys_util[1]:.2f}% – " - f"Power: {lm.engine.sys_power:.1f}kW") - - # 3) do an MPI reduce so that rank 0 knows the total across all ranks: - total_sys_power = comm.reduce(local_sys_power, op=MPI.SUM, root=0) - if rank == 0: - print(f"[DEBUG][rank {rank}] TOTAL system power (all partitions): {total_sys_power:.1f}kW") - - # 12) Final barrier + exit message on rank 0 - comm.Barrier() - if rank == 0: - print("Simulation complete (all ranks).") - - -if __name__ == "__main__": - main() diff --git a/multi-part-sim.py b/multi-part-sim.py deleted file mode 100644 index 587dffb..0000000 --- a/multi-part-sim.py +++ /dev/null @@ -1,185 +0,0 @@ -""" -Main driver for simulating multi-partition (heterogeneous) systems in the RAPS -module of ExaDigiT. Supports replaying telemetry or generating synthetic -workloads across CPU-only, GPU, and mixed partitions. Initializes per- -partition power, FLOPS, and scheduling models, then advances simulations in -lockstep. Outputs per-partition performance, utilization, and energy -statistics for systems such as MIT Supercloud, Setonix, Adastra, and LUMI. -""" - -from tqdm import tqdm -from raps.stats import get_engine_stats, get_job_stats, get_scheduler_stats, get_network_stats -from raps.utils import next_arrival -from raps.workload import Workload -from raps.telemetry import Telemetry -from raps.power import PowerManager, compute_node_power -from raps.flops import FLOPSManager -from raps.engine import Engine -from raps.ui import LayoutManager -from raps.system_config import get_partition_configs -from raps.sim_config import args -import random -import os -from raps.helpers import check_python_version -check_python_version() - - -# Load configurations for each partition -multi_config = get_partition_configs(args.partitions) -partition_names = multi_config.partition_names -configs = [c.get_legacy() for c in multi_config.partitions] -args.system = multi_config.system_name - -args_dicts = [ - {**vars(args), 'config': config, 'partition': partition_names[i]} - for i, config in enumerate(configs) -] - -# Initialize Workload -if args.replay: - - jobs_by_partition = {} - t0_by_partition = {} - t1_by_partition = {} - - if args.replay[0].endswith('.npz'): - # snapshot mode: pick the right .npz for each partition - snap_map = {os.path.basename(p): p for p in args.replay} - for ad in args_dicts: - part = ad['partition'] # e.g. 'mit_supercloud/part-cpu' - short = part.split('/')[-1] # 'part-cpu' - snap_file = f"{short}.npz" - if snap_file not in snap_map: - raise RuntimeError(f"Snapshot '{snap_file}' not in {args.replay}") - td = Telemetry(**ad) - print(f"[{part}] loading snapshot {snap_file} …") - jobs_part, t0, t1, args_from_file = td.load_snapshot(snap_map[snap_file]) - jobs_by_partition[part] = jobs_part - else: - # raw load_data mode - for ad in args_dicts: - part = ad['partition'] - td = Telemetry(**ad) - print(f"\n[{part}] loading traces from {args.replay[0]} …") - jobs_part, t0, t1 = td.load_data(args.replay) - jobs_by_partition[part] = jobs_part - # td.save_snapshot(jobs_part, t0, t1, args_from_file, filename=part.split('/')[-1]) - # Check if args need to be extracted or merged! Not implemented yet! - td.save_snapshot(jobs=jobs_part, timestep_start=t0, timestep_end=t1, - filename=part.split('/')[-1], args=args) - - # --- report how many jobs per partition --- - for part, jl in jobs_by_partition.items(): - print(f"[INFO] Partition '{part}': {len(jl)} jobs loaded") - - # now flatten into a single job list (or keep separate for your engine) - all_jobs_flat = [] - for part in partition_names: - for job in jobs_by_partition[part]: - job.partition = part - all_jobs_flat.append(job) - - total_initial_jobs = len(all_jobs_flat) - jobs = all_jobs_flat - - if args.scale: - for job in tqdm(jobs, desc=f"Scaling jobs to {args.scale} nodes"): - job.nodes_required = random.randint(1, args.scale) - - if args.arrival == 'poisson': - for job in tqdm(jobs, desc="Adjusting job submission time"): - partition = job.partition - partition_config = configs[partition_names.index(partition)] - job.submit_time = next_arrival(1 / partition_config['JOB_ARRIVAL_TIME']) - -else: # Synthetic workload - wl = Workload(args, *configs) - - total_initial_jobs = args.numjobs - - # Generate jobs based on workload type - jobs = getattr(wl, args.workload)(args=args) - -# Group jobs by partition -jobs_by_partition = {partition: [] for partition in partition_names} -for job in jobs: - jobs_by_partition[job.partition].append(job) - -# Initialize layout managers for each partition -layout_managers = {} -for i, (config, ad) in enumerate(zip(configs, args_dicts)): - pm = PowerManager(compute_node_power, **configs[i]) - fm = FLOPSManager(**args_dicts[i]) - sc = Engine(power_manager=pm, flops_manager=fm, cooling_model=None, - jobs=jobs_by_partition[config['system_name']], total_initial_jobs=total_initial_jobs, **args_dicts[i]) - layout_managers[config['system_name']] = LayoutManager( - args.layout, engine=sc, debug=args.debug, args_dict=ad, **config) - -# Set simulation timesteps -if args.fastforward: - fastfoward = args.fastforward -else: - fastforward = 0 -if args.time: - timesteps = args.time -else: - timesteps = 88200 # Default to 24 hours - -timestep_start = fastforward -timestep_end = timestep_start + timesteps - -if args.time_delta: - time_delta = args.time_delta -else: - time_delta = config['TRACE_QUANTA'] - -# Create generators for each layout manager -generators = {name: lm.run_stepwise(jobs_by_partition[name], - timestep_start=timestep_start, - timestep_end=timestep_end, - time_delta=time_delta) - for name, lm in layout_managers.items()} - -# Step through all generators in lockstep -for timestep in range(timesteps): - for name, gen in generators.items(): - next(gen) # Advance each generator - - # Print debug info every UI_UPDATE_FREQ - if timestep % configs[0]['UI_UPDATE_FREQ'] == 0: # Assuming same frequency for all partitions - sys_power = 0 - for name, lm in layout_managers.items(): - sys_util = lm.engine.sys_util_history[-1] if lm.engine.sys_util_history else (0, 0.0) - if hasattr(lm.engine.resource_manager, 'allocated_cpu_cores'): - allocated_cores = lm.engine.resource_manager.allocated_cpu_cores - print(f"[DEBUG] {name} - Timestep {timestep} - Jobs running: {len(lm.engine.running)} -", - f"Utilization: {sys_util[1]:.2f}% - Allocated Cores: {allocated_cores} - ", - f"Power: {lm.engine.sys_power:.1f}kW", flush=True) - sys_power += lm.engine.sys_power - print(f"system power: {sys_power:.1f}kW", flush=True) - -print("Simulation complete.", flush=True) - -# Print statistics for each partition -for name, lm in layout_managers.items(): - print(f"\n=== Partition: {name} ===") - - engine_stats = get_engine_stats(lm.engine) - job_stats = get_job_stats(lm.engine) - scheduler_stats = get_scheduler_stats(lm.engine) - if args.simulate_network: - network_stats = get_network_stats(lm.engine) - - # Print a formatted report - print("\n--- Simulation Report ---") - for key, value in engine_stats.items(): - print(f"{key.replace('_', ' ').title()}: {value}") - print("-------------------------\n") - print("\n--- Job Stat Report ---") - for key, value in job_stats.items(): - print(f"{key.replace('_', ' ').title()}: {value}") - print("-------------------------\n") - print("\n--- Scheduler Report ---") - for key, value in scheduler_stats.items(): - print(f"{key.replace('_', ' ').title()}: {value}") - print("-------------------------") diff --git a/pyproject.toml b/pyproject.toml index b7fbb99..f396280 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -32,3 +32,6 @@ dependencies = [ "pydantic-settings>=2.10.1", "pre-commit" ] + +[project.scripts] +raps = "main:main" diff --git a/raps/constants.py b/raps/constants.py index 0cdd2fd..53711e1 100644 --- a/raps/constants.py +++ b/raps/constants.py @@ -5,4 +5,3 @@ from pathlib import Path ELLIPSES = '\u2026' OUTPUT_PATH = Path('simulation_results') -SEED = 42 diff --git a/raps/downtime.py b/raps/downtime.py index 97c9139..ae8b82d 100644 --- a/raps/downtime.py +++ b/raps/downtime.py @@ -1,7 +1,6 @@ from __future__ import annotations from typing import TYPE_CHECKING from raps.job import JobState -from raps.sim_config import args, sim_config import numpy as np @@ -15,6 +14,7 @@ class Downtime: first_downtime, downtime_interval, downtime_length, + debug=False ): self.skip = False if downtime_length == 0 or downtime_interval == 0 or \ @@ -25,6 +25,7 @@ class Downtime: self.start: int = first_downtime self.end: int = 0 self.down: bool = False + self.debug = debug def check_and_trigger(self, *, timestep: int, @@ -46,7 +47,7 @@ class Downtime: def simulate_down(self, *, engine: Engine ): - if args.debug: + if self.debug: print("Simulated downtime: before downtime start") print(f"Running: {len(engine.running)}, queued: {len(engine.queue)}") @@ -66,7 +67,7 @@ class Downtime: engine.queue += engine.running engine.running = [] - if args.debug: + if self.debug: print("Simulated downtime: after downtime start") print(f"Running: {len(engine.running)}, queued: {len(engine.queue)}") self.down = True diff --git a/raps/engine.py b/raps/engine.py index f79b140..f6da02c 100644 --- a/raps/engine.py +++ b/raps/engine.py @@ -1,6 +1,7 @@ from typing import Optional, List import dataclasses import pandas as pd +import numpy as np import threading import sys import tty @@ -8,7 +9,8 @@ import termios import os import select import time - +import random +import math from raps.job import Job, JobState from raps.policy import PolicyType from raps.utils import ( @@ -17,14 +19,28 @@ from raps.utils import ( ) from raps.resmgr import ResourceManager from raps.schedulers import load_scheduler -from raps.power import record_power_stats_foreach_job +from raps.power import ( + PowerManager, + compute_node_power, + compute_node_power_validate, + record_power_stats_foreach_job, + compute_node_power_uncertainties, + compute_node_power_validate_uncertainties, +) from raps.network import ( NetworkModel, apply_job_slowdown, compute_system_network_stats ) -from raps.workload import continuous_job_generation +from raps.telemetry import Telemetry +from raps.cooling import ThermoFluidsModel +from raps.flops import FLOPSManager +from raps.workload import Workload, continuous_job_generation +from raps.account import Accounts from raps.downtime import Downtime +from raps.weather import Weather +from raps.sim_config import SimConfig +from raps.system_config import SystemConfig @dataclasses.dataclass @@ -109,15 +125,20 @@ def keyboard_listener(state): class Engine: """Job scheduling simulation engine.""" - def __init__(self, *, power_manager, - flops_manager, - cooling_model=None, - config, + def __init__(self, *, + power_manager: PowerManager, + flops_manager: FLOPSManager, + telemetry: Telemetry, + cooling_model: ThermoFluidsModel | None = None, jobs=None, total_initial_jobs=0, - continuous_workload=None, # Workload class to generate from for continuous generation - **kwargs): - self.config = config + # Workload class to generate from for continuous generation + continuous_workload: Workload | None = None, + accounts=None, + sim_config: SimConfig, + system_config: SystemConfig, + ): + self.config = system_config.get_legacy() self.down_nodes = summarize_ranges(self.config['DOWN_NODES']) self.resource_manager = ResourceManager( total_nodes=self.config['TOTAL_NODES'], @@ -127,7 +148,8 @@ class Engine: # Initialize running and queue, etc. self.running = [] self.queue = [] - self.accounts = None + self.accounts = accounts + self.telemetry = telemetry self.job_history_dict = [] self.jobs_completed = 0 self.jobs_killed = 0 @@ -137,12 +159,12 @@ class Engine: self.sys_power = 0 self.power_manager = power_manager self.flops_manager = flops_manager - self.debug = kwargs.get('debug') + self.debug = sim_config.debug self.continuous_workload = continuous_workload - self.output = kwargs.get('output') - self.replay = kwargs.get('replay') - self.downscale = kwargs.get('downscale', 1) # Factor to downscale the 1s timesteps (power of 10) - self.simulate_network = kwargs.get('simulate_network') + self.output = sim_config.output + self.replay = sim_config.replay + self.downscale = sim_config.downscale # Factor to downscale the 1s timesteps (power of 10) + self.simulate_network = sim_config.simulate_network self.sys_util_history = [] self.scheduler_queue_history = [] self.scheduler_running_history = [] @@ -152,18 +174,20 @@ class Engine: self.avg_slowdown_history = [] self.max_slowdown_history = [] self.node_occupancy_history = [] - self.downtime = Downtime(first_downtime=kwargs.get('downtime_first'), - downtime_interval=kwargs.get('downtime_interval'), - downtime_length=kwargs.get('downtime_length')) + self.downtime = Downtime(first_downtime=sim_config.downtime_first, + downtime_interval=sim_config.downtime_interval, + downtime_length=sim_config.downtime_length, + debug=sim_config.debug, + ) # Set scheduler type - either based on config or command-line args - defaults to 'default' if self.config['multitenant']: scheduler_type = 'multitenant' else: - scheduler_type = kwargs.get('scheduler', 'default') + scheduler_type = sim_config.scheduler - policy_type = kwargs.get('policy') - backfill_type = kwargs.get('backfill') + policy_type = sim_config.policy + backfill_type = sim_config.backfill self.scheduler = load_scheduler(scheduler_type)( config=self.config, @@ -172,7 +196,7 @@ class Engine: resource_manager=self.resource_manager, jobs=jobs ) - if kwargs.get('live'): + if sim_config.live: assert self.scheduler.policy != PolicyType.REPLAY, \ "Cannot replay from a live system. Choose a scheduling policy!" print(f"Using scheduler: {str(self.scheduler.__class__).split('.')[2]}" @@ -181,10 +205,141 @@ class Engine: if self.simulate_network: available_nodes = self.resource_manager.available_nodes - self.network_model = NetworkModel(available_nodes=available_nodes, config=config, kwargs=kwargs) + self.network_model = NetworkModel( + available_nodes=available_nodes, + config=self.config, + ) else: self.network_model = None + @staticmethod + def from_sim_config(sim_config: SimConfig, partition: str | None = None): + if partition: + system_config_by_name = {s.system_name: s for s in sim_config.system_configs} + system_config = system_config_by_name.get(partition) + if not system_config: + raise ValueError(f"Partition {partition} isn't in SimConfig") + elif len(sim_config.system_configs) > 1: + raise ValueError( + "Engine can only run single-partition simulations. Use MultiPartEngine for " + + "multi-partition simulations, or pass partition to select the partition to run." + ) + else: + system_config = sim_config.system_configs[0] + + # Some temporary backwards/compatibility wrappers + system_config_dict = system_config.get_legacy() + sim_config_args = sim_config.get_legacy_args() + sim_config_dict = sim_config.get_legacy_args_dict() + sim_config_dict['config'] = system_config_dict + if partition: + sim_config_dict["system"] = sim_config.system_name + + if sim_config.seed: + random.seed(sim_config.seed) + np.random.seed(sim_config.seed + 1) + + if sim_config.cooling: + cooling_model = ThermoFluidsModel(**system_config_dict) + cooling_model.initialize() + if sim_config.start: + cooling_model.weather = Weather(sim_config.start, config=system_config_dict) + else: + cooling_model = None + + if sim_config.power_scope == 'node': + if sim_config.uncertainties: + power_manager = PowerManager(compute_node_power_validate_uncertainties, **system_config_dict) + else: + power_manager = PowerManager(compute_node_power_validate, **system_config_dict) + else: + if sim_config.uncertainties: + power_manager = PowerManager(compute_node_power_uncertainties, **system_config_dict) + else: + power_manager = PowerManager(compute_node_power, **system_config_dict) + + flops_manager = FLOPSManager( + config=system_config_dict, + validate=(sim_config.power_scope == "node"), + ) + + if sim_config.live and not sim_config.replay: + td = Telemetry(**sim_config_dict) + jobs, timestep_start, timestep_end = \ + td.load_jobs_times_args_from_live_system() + elif sim_config.replay: + # TODO: this will have issues if running separate systems or custom systems + partition_short = partition.split("/")[-1] if partition else None + td = Telemetry( + **sim_config_dict, + partition=partition, + ) + if partition: + snap_map = {p.stem: p for p in sim_config.replay[0].glob("*.npz")} + if len(snap_map) > 0: + if partition_short not in snap_map: + raise RuntimeError(f"Snapshot '{partition_short}.npz' not in {sim_config.replay[0]}") + replay_files = snap_map[partition_short] + else: + replay_files = sim_config.replay + else: + replay_files = sim_config.replay + + jobs, timestep_start, timestep_end, args_from_file = td.load_jobs_times_args_from_files( + files=replay_files, + args=sim_config_args, config=system_config_dict, + ) + else: # Synthetic jobs + wl = Workload(sim_config_args, system_config_dict) + jobs = wl.generate_jobs() + timestep_start = 0 + if hasattr(jobs[0], 'end_time'): + timestep_end = int(math.ceil(max([job.end_time for job in jobs]))) + else: + timestep_end = 88200 # 24 hours + + td = Telemetry(**sim_config_dict) + + # TODO refactor how stat/end/fastforward/time work + if sim_config.fastforward is not None: + timestep_start = timestep_start + sim_config.fastforward + + if sim_config.time is not None: + timestep_end = timestep_start + sim_config.time + + if sim_config.time_delta is not None: + time_delta = sim_config.time_delta + else: + time_delta = 1 + + if sim_config.continuous_job_generation: + continuous_workload = wl + else: + continuous_workload = None + + accounts = None + if sim_config.accounts: + job_accounts = Accounts(jobs) + if sim_config.accounts_json: + loaded_accounts = Accounts.from_json_filename(sim_config.accounts_json) + accounts = Accounts.merge(loaded_accounts, job_accounts) + else: + accounts = job_accounts + + engine = Engine( + power_manager=power_manager, + flops_manager=flops_manager, + cooling_model=cooling_model, + continuous_workload=continuous_workload, + jobs=jobs, + accounts=accounts, + telemetry=td, + sim_config=sim_config, + system_config=system_config, + ) + + return engine, jobs, timestep_start, timestep_end, time_delta + def add_running_jobs_to_queue(self, jobs_to_submit: List): """ Modifies jobs_to_submit and self.queue diff --git a/raps/multi_part_engine.py b/raps/multi_part_engine.py new file mode 100644 index 0000000..461425b --- /dev/null +++ b/raps/multi_part_engine.py @@ -0,0 +1,54 @@ +from collections.abc import Iterable +from raps.engine import Engine, TickData +from raps.sim_config import SimConfig + + +class MultiPartEngine: + def __init__(self, engines: dict[str, Engine], jobs: dict[str, list]): + self.partition_names = sorted(engines.keys()) + self.engines = engines + self.jobs = jobs + + @staticmethod + def from_sim_config(sim_config: SimConfig): + if sim_config.replay: + root_systems = set(s.system_name.split("/")[0] for s in sim_config.system_configs) + # TODO should consider how to pass separate replay values for separate systems + if len(root_systems) > 1: + raise ValueError("Replay for multi-system runs is not supported") + + jobs_by_partition = {} + engines: dict[str, Engine] = {} + + timestep_start, timestep_end, time_delta = 0, 0, 0 + for partition in sim_config.system_configs: + name = partition.system_name + engine, jobs, timestep_start, timestep_end, time_delta = Engine.from_sim_config( + sim_config, partition=name, + ) + for job in jobs: + job.partition = name + jobs_by_partition[name] = jobs + engines[name] = engine + total_initial_jobs = sum(len(j) for j in jobs_by_partition.values()) + for engine in engines.values(): + engine.total_initial_jobs = total_initial_jobs + + multi_engine = MultiPartEngine( + engines=engines, + jobs=jobs_by_partition, + ) + + return multi_engine, jobs_by_partition, timestep_start, timestep_end, time_delta + + def run_simulation(self, jobs: dict, timestep_start, timestep_end, time_delta=1 + ) -> Iterable[dict[str, TickData | None]]: + generators = [] + for part in self.partition_names: + generators.append(self.engines[part].run_simulation( + jobs[part], timestep_start, timestep_end, time_delta, + )) + for tick_datas in zip(*generators, strict=True): + yield dict(zip(self.partition_names, tick_datas)) + + # TODO need to add a mode to run the partitions in parallel diff --git a/raps/network.py b/raps/network.py index 9c457d4..b4340e4 100644 --- a/raps/network.py +++ b/raps/network.py @@ -8,7 +8,7 @@ from pathlib import Path class NetworkModel: """ """ - def __init__(self, *, available_nodes, config, **kwargs): + def __init__(self, *, available_nodes, config): self.topology = config.get("TOPOLOGY") # if fat-tree, build the graph once if self.topology == "fat-tree": diff --git a/raps/plotting.py b/raps/plotting.py index 61a8f96..606ac7a 100644 --- a/raps/plotting.py +++ b/raps/plotting.py @@ -14,7 +14,7 @@ Plotter """ import itertools - +from pathlib import Path import matplotlib.pyplot as plt import matplotlib.ticker as ticker from matplotlib.ticker import MaxNLocator @@ -95,7 +95,7 @@ class Plotter(BasePlotter): The path to save the plot. """ - def __init__(self, xlabel='', ylabel='', title='', save_path='out.svg', uncertainties=False): + def __init__(self, xlabel='', ylabel='', title='', save_path: Path | str = 'out.svg', uncertainties=False): """ Constructs all the necessary attributes for the Plotter object. diff --git a/raps/run_sim.py b/raps/run_sim.py new file mode 100644 index 0000000..68229ad --- /dev/null +++ b/raps/run_sim.py @@ -0,0 +1,236 @@ +""" +Module containing the primary commands for use in the CLI. The simulation logic itself is kept in +Engine and MultiPartEngine so that it can be used programmatically such as in the simulation server. +These functions just handle rendering the terminal UI and outputting results to files etc. +""" +import json +import pandas as pd +import sys +from raps.ui import LayoutManager +from raps.plotting import Plotter +from raps.engine import Engine +from raps.multi_part_engine import MultiPartEngine +from raps.utils import write_dict_to_file +from raps.stats import ( + get_engine_stats, + get_job_stats, + get_scheduler_stats, + get_network_stats, + print_formatted_report +) + +from raps.sim_config import SimConfig + + +def run_sim(sim_config: SimConfig): + if sim_config.verbose or sim_config.debug: + print(f"SimConfig: {sim_config.model_dump_json(indent=4)}") + if len(sim_config.system_configs) > 1: + print("Use run-multi-part to run multi-partition simulations") + sys.exit(1) + + engine, jobs, timestep_start, timestep_end, time_delta = Engine.from_sim_config(sim_config) + + out = sim_config.output + if out: + out.mkdir(parents=True) + engine.telemetry.save_snapshot( + jobs=jobs, + timestep_start=timestep_start, + timestep_end=timestep_end, + args=sim_config.get_legacy_args(), filename=str(out), + ) + + total_timesteps = timestep_end - timestep_start + + downscale = sim_config.downscale + downscale_str = ""if downscale == 1 else f"/{downscale}" + print(f"Simulating {len(jobs)} jobs for {total_timesteps}{downscale_str}" + f" seconds from {timestep_start} to {timestep_end}.") + print(f"Simulation time delta: {time_delta}{downscale_str} s," + f"Telemetry trace quanta: {jobs[0].trace_quanta}{downscale_str} s.") + layout_manager = LayoutManager( + sim_config.layout, engine=engine, + debug=sim_config.debug, total_timesteps=total_timesteps, + args_dict=sim_config.get_legacy_args_dict(), **sim_config.system_configs[0].get_legacy(), + ) + layout_manager.run( + jobs, + timestep_start=timestep_start, timestep_end=timestep_end, time_delta=time_delta, + ) + + engine_stats = get_engine_stats(engine) + job_stats = get_job_stats(engine) + scheduler_stats = get_scheduler_stats(engine) + if engine.simulate_network: + network_stats = get_network_stats(engine) + else: + network_stats = None + + print_formatted_report( + engine_stats=engine_stats, + job_stats=job_stats, + scheduler_stats=scheduler_stats, + network_stats=network_stats, + ) + + if downscale_str: + downscale_str = "1" + downscale_str + + if sim_config.plot: + assert out # SimConfig validation should check this + if 'power' in sim_config.plot: + pl = Plotter(f"Time ({downscale_str}s)", 'Power (kW)', 'Power History', + out / f'power.{sim_config.imtype}', + uncertainties=sim_config.uncertainties) + x, y = zip(*engine.power_manager.history) + pl.plot_history(x, y) + + if 'util' in sim_config.plot: + pl = Plotter(f"Time ({downscale_str}s)", 'System Utilization (%)', + 'System Utilization History', out / f'util.{sim_config.imtype}') + x, y = zip(*engine.sys_util_history) + pl.plot_history(x, y) + + if 'loss' in sim_config.plot: + pl = Plotter(f"Time ({downscale_str}s)", 'Power Losses (kW)', 'Power Loss History', + out / f'loss.{sim_config.imtype}', + uncertainties=sim_config.uncertainties) + x, y = zip(*engine.power_manager.loss_history) + pl.plot_history(x, y) + + pl = Plotter(f"Time ({downscale_str}s)", 'Power Losses (%)', 'Power Loss History', + out / f'loss_pct.{sim_config.imtype}', + uncertainties=sim_config.uncertainties) + x, y = zip(*engine.power_manager.loss_history_percentage) + pl.plot_history(x, y) + + if 'pue' in sim_config.plot: + if engine.cooling_model: + ylabel = 'pue' + title = 'FMU ' + ylabel + 'History' + pl = Plotter(f"Time ({downscale_str}s)", ylabel, title, + out / f'pue.{sim_config.imtype}', + uncertainties=sim_config.uncertainties) + df = pd.DataFrame(engine.cooling_model.fmu_history) + df.to_parquet('cooling_model.parquet', engine='pyarrow') + pl.plot_history(df['time'], df[ylabel]) + else: + print('Cooling model not enabled... skipping output of plot') + + if 'temp' in sim_config.plot: + if engine.cooling_model: + ylabel = 'Tr_pri_Out[1]' + title = 'FMU ' + ylabel + 'History' + pl = Plotter(f"Time ({downscale_str}s)", ylabel, title, out / 'temp.svg') + df = pd.DataFrame(engine.cooling_model.fmu_history) + df.to_parquet('cooling_model.parquet', engine='pyarrow') + pl.plot_compare(df['time'], df[ylabel]) + else: + print('Cooling model not enabled... skipping output of plot') + + if out: + if sim_config.uncertainties: + # Parquet cannot handle annotated ufloat format AFAIK + print('Data dump not implemented using uncertainties!') + else: + if engine.cooling_model: + df = pd.DataFrame(engine.cooling_model.fmu_history) + df.to_parquet(out / 'cooling_model.parquet', engine='pyarrow') + + df = pd.DataFrame(engine.power_manager.history) + df.to_parquet(out / 'power_history.parquet', engine='pyarrow') + + df = pd.DataFrame(engine.power_manager.loss_history) + df.to_parquet(out / 'loss_history.parquet', engine='pyarrow') + + df = pd.DataFrame(engine.sys_util_history) + df.to_parquet(out / 'util.parquet', engine='pyarrow') + + # Schedule history + job_history = pd.DataFrame(engine.get_job_history_dict()) + job_history.to_csv(out / "job_history.csv", index=False) + + scheduler_running_history = pd.DataFrame(engine.get_scheduler_running_history()) + scheduler_running_history.to_csv(out / "running_history.csv", index=False) + scheduler_queue_history = pd.DataFrame(engine.get_scheduler_running_history()) + scheduler_queue_history.to_csv(out / "queue_history.csv", index=False) + + try: + with open(out / 'stats.out', 'w') as f: + json.dump(engine_stats, f, indent=4) + json.dump(job_stats, f, indent=4) + except TypeError: # Is this the correct error code? + write_dict_to_file(engine_stats, out / 'stats.out') + write_dict_to_file(job_stats, out / 'stats.out') + + if sim_config.accounts: + try: + with open(out / 'accounts.json', 'w') as f: + json_string = json.dumps(engine.accounts.to_dict()) + f.write(json_string) + except TypeError: + write_dict_to_file(engine.accounts.to_dict(), out / 'accounts.json') + print("Output directory is: ", out) # If output is enabled, the user wants this information as last output + + +def run_multi_part_sim(sim_config: SimConfig): + multi_engine, jobs, timestep_start, timestep_end, time_delta = MultiPartEngine.from_sim_config(sim_config) + + # TODO: The mit_supercloud dataloader seems to be outputting the wrong timesteps? mit_supercloud + # is the only multi-partition system with replay, so just manually overriding the timesteps here + # to fix it for now. The original multi-part-sim.py always started from timestep 0 as well. + timestep_end = timestep_end - timestep_start + timestep_start = 0 + + if sim_config.output: + for part, engine in multi_engine.engines.items(): + engine.telemetry.save_snapshot( + jobs=jobs[part], + timestep_start=timestep_start, timestep_end=timestep_end, + filename=part.split('/')[-1], + args=sim_config.get_legacy_args(), + ) + + ui_update_freq = sim_config.system_configs[0].scheduler.ui_update_freq + gen = multi_engine.run_simulation(jobs, timestep_start, timestep_end, time_delta) + + for tick_datas in gen: + sys_power = 0 + tick_datas = {k: v for k, v in tick_datas.items() if v} # Filter nones + timestep = list(tick_datas.values())[0].current_timestep if tick_datas else None + + if timestep and timestep % ui_update_freq == 0: + for part, tick_data in tick_datas.items(): + engine = multi_engine.engines[part] + + sys_util = engine.sys_util_history[-1] if engine.sys_util_history else (0, 0.0) + if hasattr(engine.resource_manager, 'allocated_cpu_cores'): + allocated_cores = engine.resource_manager.allocated_cpu_cores + print( + f"[DEBUG] {part} - Timestep {timestep} - Jobs running: {len(engine.running)} -", + f"Utilization: {sys_util[1]:.2f}% - Allocated Cores: {allocated_cores} - ", + f"Power: {engine.sys_power:.1f}kW", + flush=True, + ) + sys_power += engine.sys_power + print(f"system power: {sys_power:.1f}kW", flush=True) + + print("Simulation complete.", flush=True) + + # Print statistics for each partition + for part, engine in multi_engine.engines.items(): + print(f"\n=== Partition: {part} ===") + + engine_stats = get_engine_stats(engine) + job_stats = get_job_stats(engine) + scheduler_stats = get_scheduler_stats(engine) + network_stats = get_network_stats(engine) if sim_config.simulate_network else None + + # Print a formatted report + print_formatted_report( + engine_stats=engine_stats, + job_stats=job_stats, + scheduler_stats=scheduler_stats, + network_stats=network_stats, + ) diff --git a/raps/sim_config.py b/raps/sim_config.py index 127cec3..036ae8b 100644 --- a/raps/sim_config.py +++ b/raps/sim_config.py @@ -1,18 +1,13 @@ import argparse -import sys -import yaml +from functools import cached_property from datetime import timedelta -from pathlib import Path from typing import Literal from raps.schedulers.default import PolicyType, BackfillType - from raps.utils import ( - parse_time_unit, convert_to_time_unit, infer_time_unit, ExpandedPath, - pydantic_add_args, yaml_dump, parse_td, + parse_time_unit, convert_to_time_unit, infer_time_unit, ExpandedPath, parse_td, ) - -from pydantic import BaseModel, model_validator, computed_field -from pydantic_settings import SettingsConfigDict +from raps.system_config import SystemConfig, get_partition_configs +from pydantic import BaseModel, model_validator Distribution = Literal['uniform', 'weibull', 'normal'] @@ -44,13 +39,12 @@ class SimConfig(BaseModel): Step size (unit specified by `time_unit`, default seconds). Can pass a string like 15s, 1m, 1h, 1ms """ - time_unit: timedelta + time_unit: timedelta = timedelta(seconds=1) """ Units all time delta ints are measured in (default seconds) """ - @computed_field - @property + @cached_property def downscale(self) -> int: return int(timedelta(seconds=1) / self.time_unit) @@ -65,7 +59,7 @@ class SimConfig(BaseModel): uncertainties: bool = False """ Use float-with-uncertainties (much slower) """ - seed: bool = False + seed: int | None = None """ Set RNG seed for deterministic simulation """ output: ExpandedPath | None = None """ Output power, cooling, and loss models for later analysis. Argument specifies name. """ @@ -209,7 +203,10 @@ class SimConfig(BaseModel): """ Specify the max queue length for continuous job generation """ @model_validator(mode="before") - def _parse_times(cls, data): + def _validate_before(cls, data): + # This is called with the raw input, before Pydantic parses it, so data is just a dict and + # contain any data types. + time_fields = [ "time_delta", "time", "fastforward", "downtime_first", "downtime_interval", "downtime_length", @@ -236,7 +233,8 @@ class SimConfig(BaseModel): return data @model_validator(mode="after") - def _validate(self): + def _validate_after(self): + # This is called after Pydantic has parsed everything into the model if self.system and self.partitions: raise ValueError("system and partitions are mutually exclusive") elif not self.system and not self.partitions: @@ -245,11 +243,40 @@ class SimConfig(BaseModel): if not self.replay and not self.workload: self.workload = "random" + if self.cooling: + self.layout = "layout2" + if self.jobsize_is_power_of is not None and self.jobsize_is_of_degree is not None: raise ValueError("jobsize_is_power_of and jobsize_is_of_degree are mutually exclusive") + if self.plot and not self.output: + raise ValueError("plot requires an output directory to be set") + + if self.live and not self.replay and self.time is None: + raise ValueError("--time must be set, specifing how long we want to predict") + return self + @property + def system_name(self) -> str: + """ + Name of the system. + Note, this is different than system, as system can be a file or None if partition is set. + """ + return self._multi_partition_system_config.system_name + + @property + def system_configs(self) -> list[SystemConfig]: + """ + Return the SystemConfigs for the selected systems. + Will be a single element array unless multiple `partitions` are selected. + """ + return self._multi_partition_system_config.partitions + + @cached_property + def _multi_partition_system_config(self): + return get_partition_configs(self.partitions if self.partitions else [self.system]) + def get_legacy_args(self): """ Return as an argparse.Namespace object for backwards compatability @@ -265,6 +292,7 @@ class SimConfig(BaseModel): args_dict = self.model_dump(mode="json") # validate has been renamed to power_scope args_dict['validate'] = args_dict["power_scope"] == "node" + args_dict['downscale'] = self.downscale # Convert Path objects to str if args_dict['output']: @@ -276,56 +304,3 @@ class SimConfig(BaseModel): args_dict['sim_config'] = self return args_dict - - -def parse_args(cli_args=None) -> SimConfig: - parser = argparse.ArgumentParser( - description="Resource Allocator & Power Simulator (RAPS)", - allow_abbrev=False, - formatter_class=argparse.ArgumentDefaultsHelpFormatter, - ) - parser.add_argument( - "config_file", nargs="?", default=None, - help=( - 'YAML sim config file, can be used to configure an experiment instead of using CLI ' + - 'flags. Pass "-" to read from stdin.' - ) - ) - - model_validate_args = pydantic_add_args(parser, SimConfig, model_config=SettingsConfigDict( - cli_implicit_flags=True, - cli_kebab_case=True, - cli_shortcuts={ - "partitions": "x", - "cooling": "c", - "simulate-network": "net", - "fastforward": "ff", - "time": "t", - "debug": "d", - "numjobs": "n", - "verbose": "v", - "output": "o", - "uncertainties": "u", - "plot": "p", - "replay": "f", - "workload": "w", - }, - )) - - args = parser.parse_args(cli_args) - if args.config_file == "-": - config_file_data = yaml.safe_load(sys.stdin.read()) - elif args.config_file: - config_file_data = yaml.safe_load(Path(args.config_file).read_text()) - else: - config_file_data = {} - - return model_validate_args(args, config_file_data) - - -sim_config = parse_args() -args = sim_config.get_legacy_args() -args_dict = sim_config.get_legacy_args_dict() - -if __name__ == "__main__": - print(yaml_dump(sim_config.model_dump(mode="json"))) diff --git a/raps/system_config.py b/raps/system_config.py index e458c68..642bb98 100644 --- a/raps/system_config.py +++ b/raps/system_config.py @@ -3,6 +3,7 @@ import glob import fnmatch from typing import Any, Literal from pathlib import Path +from functools import cached_property import yaml from pydantic import BaseModel, computed_field, model_validator, field_validator from raps.raps_config import raps_config @@ -41,27 +42,27 @@ class SystemSystemConfig(BaseModel): return self @computed_field - @property + @cached_property def num_racks(self) -> int: return self.num_cdus * self.racks_per_cdu - len(self.missing_racks) @computed_field - @property + @cached_property def sc_shape(self) -> list[int]: return [self.num_cdus, self.racks_per_cdu, self.nodes_per_rack] @computed_field - @property + @cached_property def total_nodes(self) -> int: return self.num_cdus * self.racks_per_cdu * self.nodes_per_rack @computed_field - @property + @cached_property def blades_per_chassis(self) -> int: return int(self.nodes_per_rack / self.chassis_per_rack / self.nodes_per_blade) @computed_field - @property + @cached_property def power_df_header(self) -> list[str]: power_df_header = ["CDU"] for i in range(1, self.racks_per_cdu + 1): @@ -73,7 +74,7 @@ class SystemSystemConfig(BaseModel): return power_df_header @computed_field - @property + @cached_property def available_nodes(self) -> int: return self.total_nodes - len(self.down_nodes) @@ -120,7 +121,7 @@ class SystemSchedulerConfig(BaseModel): trace_quanta: int min_wall_time: int max_wall_time: int - ui_update_freq: int + ui_update_freq: int # TODO should be moved to raps_config max_nodes_per_job: int job_end_probs: dict[JobEndStates, float] multitenant: bool = False diff --git a/raps/telemetry.py b/raps/telemetry.py index f485daa..5a09eb9 100644 --- a/raps/telemetry.py +++ b/raps/telemetry.py @@ -6,43 +6,14 @@ parsing parquet files, and generating job state information. The module defines a `Telemetry` class for managing telemetry data and several helper functions for data encryption and conversion between node name and index formats. """ -import re import sys import random import argparse -# import itertools +from pathlib import Path # import json -import os.path from typing import Optional from types import ModuleType - -if __name__ == "__main__": - # from raps.sim_config import args, args_dict - parser = argparse.ArgumentParser(description='Telemetry data validator') - parser.add_argument('--jid', type=str, default='*', help='Replay job id') - parser.add_argument('-f', '--replay', nargs='+', type=str, - help='Either: path/to/joblive path/to/jobprofile' - ' -or- filename.npz (overrides --workload option)') - parser.add_argument('-p', '--plot', type=str, default=None, choices=['jobs', 'nodes'], help='Output plots') - parser.add_argument("--is-results-file", action='store_true', default=False, help='Output plots') - parser.add_argument("--gantt-nodes", default=False, action='store_true', required=False, - # duplicate in workload! - help="Print Gannt with nodes required as line thickness (default false)") - parser.add_argument('-t', '--time', type=str, default=None, - help='Length of time to simulate, e.g., 123, 123s, 27m, 3h, 7d') - parser.add_argument('--system', type=str, default='frontier', help='System config to use') - choices = ['prescribed', 'poisson'] - parser.add_argument('--arrival', default=choices[0], type=str, choices=choices, - help=f"Modify arrival distribution ({choices[1]}) " - f"or use the original submit times ({choices[0]})") - parser.add_argument('-v', '--verbose', action='store_true', help='Enable verbose output') - parser.add_argument('-o', '--output', type=str, default=None, help='Store output in --output file.') - parser.add_argument("--live", action="store_true", help="Grab data from live system.") - - args = parser.parse_args() - args_dict = vars(args) - import importlib import numpy as np import pandas as pd @@ -57,8 +28,7 @@ from raps.plotting import ( plot_nodes_gantt, plot_network_histogram ) -from raps.utils import next_arrival_byconfargs, create_casename, convert_to_time_unit -# from raps.sim_config import args, args_dict +from raps.utils import next_arrival_byconfargs, convert_to_time_unit class Telemetry: @@ -69,18 +39,6 @@ class Telemetry: self.kwargs = kwargs self.system = kwargs.get('system') self.config = kwargs.get('config') - outname = kwargs.get('output') - if outname: - self.dirname = outname - elif kwargs.get("replay"): - # Try to extract date from given name to use as case directory - matched_date = re.search(r"\d{4}-\d{2}-\d{2}", kwargs['replay'][0]) - if matched_date: - self.dirname = f"sim={matched_date.group(0)}" - else: - self.dirname = create_casename() - else: - self.dirname = create_casename() try: self.dataloader = importlib.import_module(f"raps.dataloaders.{self.system}", package=__package__) @@ -119,8 +77,7 @@ class Telemetry: timestep_end = int(data['timestep_end']) else: timestep_end = np.inf - print(timestep_end) - exit() + raise ValueError("Invalid timestep_end in snapshot") if 'args' in data: args_from_file = data['args'].tolist() else: @@ -254,7 +211,7 @@ class Telemetry: jobs = [] trigger_custom_dataloader = False for i, file in enumerate(files): - file = os.path.normpath(file.lstrip('"').rstrip('"')) + file = str(Path(file)) if hasattr(args, 'is_results_file') and args.is_results_file: if file.endswith(".csv"): jobs, timestep_start, timestep, _ = self.load_csv_results(file) @@ -295,17 +252,12 @@ class Telemetry: break if trigger_custom_dataloader: # custom data loader - print(*args.replay) try: jobs, timestep_start_from_data, timestep_end_from_data = self.load_data(args.replay) except AssertionError: raise ValueError("Forgot --is-results-file ?") timestep_start = min(timestep_start, timestep_start_from_data) timestep_end = max(timestep_end, timestep_end_from_data) - self.save_snapshot(jobs=jobs, - timestep_start=timestep_start, - timestep_end=timestep_end, - args=args, filename=self.dirname) if args.time: timestep_end = timestep_start + convert_to_time_unit(args.time) elif not timestep_end: @@ -314,7 +266,30 @@ class Telemetry: return jobs, timestep_start, timestep_end, args -def run_telemetry(): +def run_telemetry_add_args(parser: argparse.ArgumentParser): + parser.add_argument('--jid', type=str, default='*', help='Replay job id') + parser.add_argument('-f', '--replay', nargs='+', type=str, + help='Either: path/to/joblive path/to/jobprofile' + ' -or- filename.npz (overrides --workload option)') + parser.add_argument('-p', '--plot', type=str, default=None, choices=['jobs', 'nodes'], help='Output plots') + parser.add_argument("--is-results-file", action='store_true', default=False, help='Output plots') + parser.add_argument("--gantt-nodes", default=False, action='store_true', required=False, + # duplicate in workload! + help="Print Gannt with nodes required as line thickness (default false)") + parser.add_argument('-t', '--time', type=str, default=None, + help='Length of time to simulate, e.g., 123, 123s, 27m, 3h, 7d') + parser.add_argument('--system', type=str, default='frontier', help='System config to use') + choices = ['prescribed', 'poisson'] + parser.add_argument('--arrival', default=choices[0], type=str, choices=choices, + help=f"Modify arrival distribution ({choices[1]}) " + f"or use the original submit times ({choices[0]})") + parser.add_argument('-v', '--verbose', action='store_true', help='Enable verbose output') + parser.add_argument('-o', '--output', type=str, default=None, help='Store output in --output file.') + parser.add_argument("--live", action="store_true", help="Grab data from live system.") + + +def run_telemetry(args): + args_dict = vars(args) config = get_system_config(args.system).get_legacy() args_dict['config'] = config td = Telemetry(**args_dict) @@ -324,8 +299,10 @@ def run_telemetry(): jobs, timestep_start, timestep_end = \ td.load_jobs_times_args_from_live_system() if args.output: - td.save_snapshot(jobs=jobs, timestep_start=timestep_start, - timestep_end=timestep_end, args=args, filename=td.dirname) + td.save_snapshot( + jobs=jobs, timestep_start=timestep_start, + timestep_end=timestep_end, args=args, filename=args.output, + ) elif args.replay: jobs, timestep_start, timestep_end, _ = \ @@ -334,8 +311,8 @@ def run_telemetry(): config=config) else: - parser.print_help() - exit() + print("Either --live or --replay is required") + sys.exit(1) timesteps = timestep_end - timestep_start @@ -416,14 +393,10 @@ def run_telemetry(): plot_network_histogram(ax=ax, data=net_means) if args.output is not None: if args.output == "": - filename = f"{td.dirname}.svg" + filename = f"{args.output}.svg" else: filename = args.output plt.savefig(f'{filename}') print(f"Saved to: {filename}") else: plt.show() - - -if __name__ == "__main__": - run_telemetry() diff --git a/raps/ui.py b/raps/ui.py index d9c3bbe..5be3523 100644 --- a/raps/ui.py +++ b/raps/ui.py @@ -576,7 +576,3 @@ class LayoutManager: self.update_progress_bar(1) finally: os.system("stty sane") - - def run_stepwise(self, jobs, timestep_start, timestep_end, time_delta): - """ Prepares the UI and returns a generator for the simulation """ - return self.engine.run_simulation(jobs, timestep_start, timestep_end, time_delta) diff --git a/raps/workload.py b/raps/workload.py index 151e2c3..563071d 100644 --- a/raps/workload.py +++ b/raps/workload.py @@ -39,6 +39,7 @@ import matplotlib.pyplot as plt from raps.telemetry import Telemetry from raps.job import job_dict, Job from raps.utils import create_file_indexed +from raps.sim_config import SimConfig JOB_NAMES = ["LAMMPS", "GROMACS", "VASP", "Quantum ESPRESSO", "NAMD", @@ -799,18 +800,23 @@ def plot_job_hist(jobs, config=None, dist_split=None, gantt_nodes=False): plt.show() -def run_workload(): - from raps.sim_config import args, args_dict - from raps.system_config import get_system_config - config = get_system_config(args.system).get_legacy() - if args.replay: +def run_workload(sim_config: SimConfig): + args = sim_config.get_legacy_args() + args_dict = sim_config.get_legacy_args() + config = sim_config.system_configs[0].get_legacy() + + if sim_config.replay: td = Telemetry(**args_dict) - jobs, _, _, _ = td.load_jobs_times_args_from_files(files=args.replay, args=args, config=config) + jobs, _, _, _ = td.load_jobs_times_args_from_files(files=sim_config.replay, args=args, config=config) else: workload = Workload(args, config) - jobs = getattr(workload, args.workload)(args=args) - plot_job_hist(jobs, config=config, dist_split=args.multimodal, gantt_nodes=args.gantt_nodes) - if args.output: + jobs = getattr(workload, sim_config.workload)(args=sim_config.get_legacy_args) + plot_job_hist(jobs, + config=config, + dist_split=sim_config.multimodal, + gantt_nodes=sim_config.gantt_nodes) + + if sim_config.output: timestep_start = min([x.submit_time for x in jobs]) timestep_end = math.ceil(max([x.submit_time for x in jobs]) + max([x.expected_run_time for x in jobs])) filename = create_file_indexed('wl', create=False, ending="npz").split(".npz")[0] @@ -970,7 +976,3 @@ def continuous_job_generation(*, engine, timestep, jobs): if len(engine.queue) <= engine.continuous_workload.args.maxqueue: new_jobs = engine.continuous_workload.generate_jobs() jobs.extend(new_jobs) - - -if __name__ == "__main__": - run_workload() diff --git a/tests/conftest.py b/tests/conftest.py index 8f05879..477588a 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -10,7 +10,7 @@ def pytest_addoption(parser): def pytest_runtest_setup(item): if "long" in item.keywords and not item.config.getoption("--runlong"): - #reason = f"Skipping {item.nodeid} because it requires --runlong" + # reason = f"Skipping {item.nodeid} because it requires --runlong" reason = "Skipping test because it requires --runlong" pytest.skip(reason) diff --git a/tests/smoke.py b/tests/smoke.py index 0f9f4ca..7548de3 100644 --- a/tests/smoke.py +++ b/tests/smoke.py @@ -32,7 +32,7 @@ def run_command(command): def build_command(system, file_paths, additional_args=""): """Build the command string for the given system and file paths.""" full_paths = " ".join([os.path.join(DATAPATH, path) for path in file_paths.split()]) - return f"python main.py --system {system} -f {full_paths} -t {DEFAULT_TIME} {additional_args}".strip() + return f"python main.py run --system {system} -f {full_paths} -t {DEFAULT_TIME} {additional_args}".strip() def execute_system_tests(systems): @@ -45,16 +45,16 @@ def execute_system_tests(systems): def synthetic_workload_tests(): """Run synthetic workload tests.""" print("Starting synthetic workload tests...") - run_command(f"python main.py -t {DEFAULT_TIME}") - run_command(f"python main.py -w benchmark -t {BENCH_TIME}") - run_command(f"python main.py -w peak -t {DEFAULT_TIME}") - run_command(f"python main.py -w idle -t {DEFAULT_TIME}") + run_command(f"python main.py run -t {DEFAULT_TIME}") + run_command(f"python main.py run -w benchmark -t {BENCH_TIME}") + run_command(f"python main.py run -w peak -t {DEFAULT_TIME}") + run_command(f"python main.py run -w idle -t {DEFAULT_TIME}") def hetero_tests(): """Run heterogeneous workload tests.""" print("Starting heterogeneous workload tests...") - run_command(f"python multi-part-sim.py -x setonix/part-cpu setonix/part-gpu -t {DEFAULT_TIME}") + run_command(f"python main.py run-multi-part -x setonix/part-cpu setonix/part-gpu -t {DEFAULT_TIME}") def main(): diff --git a/tests/systems/conftest.py b/tests/systems/conftest.py index bcde029..8e361e9 100644 --- a/tests/systems/conftest.py +++ b/tests/systems/conftest.py @@ -99,7 +99,7 @@ def system_config(system): "time_delta": True, "net": False, }, - "lassen":{ + "lassen": { "main": True, "telemetry": False, # Takes very long! "multi-part-sim": False, @@ -111,7 +111,7 @@ def system_config(system): "time_delta": True, "net": True, }, - "marconi100":{ + "marconi100": { "main": True, "telemetry": True, "multi-part-sim": False, @@ -182,16 +182,16 @@ def system_config(system): @pytest.fixture def system_file(system): files = { - "40frontiers":[], - "adastraMI250":["AdastaJobsMI250_15days.parquet"], - "frontier":["slurm/joblive/date=2024-01-18/","jobprofile/date=2024-01-18/"], - "fugaku":["21_04.parquet"], - "gcloudv2":["/v2/google_cluster_data_2011_sample"], - "lassen":["Lassen-Supercomputer-Job-Dataset"], - "marconi100":["job_table.parquet"], - "mit_supercloud":["202201"], - "setonix":[""], - "summit":[], - "lumi":[] + "40frontiers": [], + "adastraMI250": ["AdastaJobsMI250_15days.parquet"], + "frontier": ["slurm/joblive/date=2024-01-18/", "jobprofile/date=2024-01-18/"], + "fugaku": ["21_04.parquet"], + "gcloudv2": ["/v2/google_cluster_data_2011_sample"], + "lassen": ["Lassen-Supercomputer-Job-Dataset"], + "marconi100": ["job_table.parquet"], + "mit_supercloud": ["202201"], + "setonix": [""], + "summit": [], + "lumi": [] } - return files.get(system,files) + return files.get(system, files) diff --git a/tests/systems/test_engine.py b/tests/systems/test_engine.py new file mode 100644 index 0000000..974ed2e --- /dev/null +++ b/tests/systems/test_engine.py @@ -0,0 +1,39 @@ +import gc +import pytest +from raps.engine import Engine +from raps.sim_config import SimConfig +from raps.stats import ( + get_engine_stats, + # get_job_stats, + # get_scheduler_stats, + # get_network_stats, +) + +pytestmark = [ + pytest.mark.system, + pytest.mark.nodata +] + + +def test_engine(system, system_config): + if not system_config.get("main", False): + pytest.skip(f"{system} does not support basic main run.") + + sim_config = SimConfig.model_validate({ + "system": system, + "time": "2m", + }) + engine, jobs, timestep_start, timestep_end, time_delta = Engine.from_sim_config(sim_config) + ticks = list(engine.run_simulation(jobs, timestep_start, timestep_end, time_delta)) + + assert len(ticks) == 120 + + engine_stats = get_engine_stats(engine) + # job_stats = get_job_stats(engine) + # scheduler_stats = get_scheduler_stats(engine) + # network_stats = get_network_stats(engine) + + assert engine_stats['time simulated'] == '0:02:00' + # TODO: More specific tests of values + + gc.collect() diff --git a/tests/systems/test_main_basic_run.py b/tests/systems/test_main_basic_run.py index 8e31952..c420b59 100644 --- a/tests/systems/test_main_basic_run.py +++ b/tests/systems/test_main_basic_run.py @@ -11,13 +11,13 @@ pytestmark = [ ] -def test_main_basic_run(system, system_config,random_id): +def test_main_basic_run(system, system_config, random_id): if not system_config.get("main", False): pytest.skip(f"{system} does not support basic main run.") os.chdir(PROJECT_ROOT) result = subprocess.run([ - "python", "main.py", + "python", "main.py", "run", "--time", "1m", "--system", system, "-o", random_id diff --git a/tests/systems/test_main_cooling_run.py b/tests/systems/test_main_cooling_run.py index 1411d8c..62d8621 100644 --- a/tests/systems/test_main_cooling_run.py +++ b/tests/systems/test_main_cooling_run.py @@ -18,7 +18,7 @@ def test_main_cooling_run(system, system_config, random_id): os.chdir(PROJECT_ROOT) result = subprocess.run([ - "python", "main.py", + "python", "main.py", "run", "--time", "1h", "--system", system, "-c", diff --git a/tests/systems/test_main_cooling_uncertainty_run.py b/tests/systems/test_main_cooling_uncertainty_run.py index 2491d7a..742fe87 100644 --- a/tests/systems/test_main_cooling_uncertainty_run.py +++ b/tests/systems/test_main_cooling_uncertainty_run.py @@ -19,7 +19,7 @@ def test_main_cooling_uncertainty_run(request, system, system_config, random_id) os.chdir(PROJECT_ROOT) result = subprocess.run([ - "python", "main.py", + "python", "main.py", "run", "--time", "3m", "--system", system, "-c", diff --git a/tests/systems/test_main_fastforward_run.py b/tests/systems/test_main_fastforward_run.py index 4b0584b..3eb567c 100644 --- a/tests/systems/test_main_fastforward_run.py +++ b/tests/systems/test_main_fastforward_run.py @@ -24,11 +24,10 @@ def test_main_fastforward_run(system, system_config, ff_arg, random_id): os.chdir(PROJECT_ROOT) result = subprocess.run([ - "python", "main.py", + "python", "main.py", "run", "-t 1", "--fastforward", ff_arg, "--system", system, - #--"-f", system_file, "--noui", "-o", random_id ], capture_output=True, text=True, stdin=subprocess.DEVNULL) diff --git a/tests/systems/test_main_help.py b/tests/systems/test_main_help.py index f84c63d..a651a38 100644 --- a/tests/systems/test_main_help.py +++ b/tests/systems/test_main_help.py @@ -11,13 +11,13 @@ pytestmark = [ ] -def test_main_help(system, system_config,random_id): +def test_main_help(system, system_config, random_id): if not system_config.get("main", False): pytest.skip(f"{system} does not support basic main run.") os.chdir(PROJECT_ROOT) result = subprocess.run([ - "python", "main.py", + "python", "main.py", "run", "-h" ], capture_output=True, text=True, stdin=subprocess.DEVNULL) diff --git a/tests/systems/test_main_network_run.py b/tests/systems/test_main_network_run.py index 8b80d5d..8c7db1e 100644 --- a/tests/systems/test_main_network_run.py +++ b/tests/systems/test_main_network_run.py @@ -21,7 +21,7 @@ def test_main_network_run(system, system_config, random_id): os.chdir(PROJECT_ROOT) result = subprocess.run([ - "python", "main.py", + "python", "main.py", "run", "--time", "1m", "--system", system, "--net", diff --git a/tests/systems/test_main_network_withdata_run.py b/tests/systems/test_main_network_withdata_run.py index 31db05e..1dcfee0 100644 --- a/tests/systems/test_main_network_withdata_run.py +++ b/tests/systems/test_main_network_withdata_run.py @@ -23,11 +23,12 @@ def test_main_network_withdata_run(system, system_config, system_file, random_id else: file_list = [DATA_PATH / system / system_file] for file in file_list: - assert os.path.isfile(file) or os.path.isdir(file), "File does not exist. does ./data exist or is RAPS_DATA_DIR set?" + assert os.path.isfile(file) or os.path.isdir(file), \ + "File does not exist. does ./data exist or is RAPS_DATA_DIR set?" os.chdir(PROJECT_ROOT) result = subprocess.run([ - "python", "main.py", + "python", "main.py", "run", "--time", "1m", "--system", system, "-f", *file_list, diff --git a/tests/systems/test_main_noui_run.py b/tests/systems/test_main_noui_run.py index 5b12b55..af8bea8 100644 --- a/tests/systems/test_main_noui_run.py +++ b/tests/systems/test_main_noui_run.py @@ -17,7 +17,7 @@ def test_main_noui_run(system, system_config, random_id): os.chdir(PROJECT_ROOT) result = subprocess.run([ - "python", "main.py", + "python", "main.py", "run", "--time", "1m", "--system", system, "--noui", diff --git a/tests/systems/test_main_time_delta_run.py b/tests/systems/test_main_time_delta_run.py index 9cb87a2..8808052 100644 --- a/tests/systems/test_main_time_delta_run.py +++ b/tests/systems/test_main_time_delta_run.py @@ -21,18 +21,17 @@ pytestmark = [ ("10h", "1h"), ("10h", "3h"), ("3d", "1d") -], ids=["1","1s","10s","1m","1h","3h","1d"]) +], ids=["1", "1s", "10s", "1m", "1h", "3h", "1d"]) def test_main_time_delta_run(system, system_config, time_arg, tdelta_arg, random_id): if not system_config.get("time_delta", False): pytest.skip(f"{system} does not support time_delta run.") os.chdir(PROJECT_ROOT) result = subprocess.run([ - "python", "main.py", + "python", "main.py", "run", "-t", time_arg, "--time-delta", tdelta_arg, "--system", system, - #--"-f", system_file, "--noui", "-o", random_id ], capture_output=True, text=True, stdin=subprocess.DEVNULL) diff --git a/tests/systems/test_main_time_delta_sub_second_run.py b/tests/systems/test_main_time_delta_sub_second_run.py index 9276011..0bedee8 100644 --- a/tests/systems/test_main_time_delta_sub_second_run.py +++ b/tests/systems/test_main_time_delta_sub_second_run.py @@ -22,18 +22,17 @@ pytestmark = [ ("10cs", "1ms"), ("100ms", "1ms"), ("100ms", "1s"), -], ids=["1ds","3ds","1cs","1ms","1cs-for-10ds","1ms-for-10cs","1ms-for-100ms","1s-for-100ms"]) +], ids=["1ds", "3ds", "1cs", "1ms", "1cs-for-10ds", "1ms-for-10cs", "1ms-for-100ms", "1s-for-100ms"]) def test_main_time_delta_sub_second_run(system, system_config, time_arg, tdelta_arg, random_id): if not system_config.get("time_delta", False): pytest.skip(f"{system} does not support time_delta run.") os.chdir(PROJECT_ROOT) result = subprocess.run([ - "python", "main.py", + "python", "main.py", "run", "-t", time_arg, "--time-delta", tdelta_arg, "--system", system, - #--"-f", system_file, "--noui", "-o", random_id ], capture_output=True, text=True, stdin=subprocess.DEVNULL) diff --git a/tests/systems/test_main_time_ff_delta_run.py b/tests/systems/test_main_time_ff_delta_run.py index a136615..a6c8763 100644 --- a/tests/systems/test_main_time_ff_delta_run.py +++ b/tests/systems/test_main_time_ff_delta_run.py @@ -20,20 +20,19 @@ pytestmark = [ ("10h", "1h", "2h"), ("10h", "3h", "1h"), pytest.param("3d", "1d", "1d", marks=pytest.mark.long, id="1d (long)"), -], ids=["1","1s","10s","1m","1h","3h","1d"]) +], ids=["1", "1s", "10s", "1m", "1h", "3h", "1d"]) def test_main_time_ff_delta_run(system, system_config, time_arg, tdelta_arg, - ff_arg, random_id): + ff_arg, random_id): if not system_config.get("time_delta", False): pytest.skip(f"{system} does not support time_delta run.") os.chdir(PROJECT_ROOT) result = subprocess.run([ - "python", "main.py", + "python", "main.py", "run", "-t", time_arg, "--ff", ff_arg, "--time-delta", tdelta_arg, "--system", system, - #--"-f", system_file, "--noui", "-o", random_id ], capture_output=True, text=True, stdin=subprocess.DEVNULL) diff --git a/tests/systems/test_main_time_run.py b/tests/systems/test_main_time_run.py index e87e331..c8e00b1 100644 --- a/tests/systems/test_main_time_run.py +++ b/tests/systems/test_main_time_run.py @@ -27,10 +27,9 @@ def test_main_time_run(system, system_config, time_args, random_id): os.chdir(PROJECT_ROOT) result = subprocess.run([ - "python", "main.py", + "python", "main.py", "run", "--time", time_args, "--system", system, - #--"-f", system_file, "--noui", "-o", random_id ], capture_output=True, text=True, stdin=subprocess.DEVNULL) diff --git a/tests/systems/test_main_uncertainty_run.py b/tests/systems/test_main_uncertainty_run.py index 815a661..effdcc6 100644 --- a/tests/systems/test_main_uncertainty_run.py +++ b/tests/systems/test_main_uncertainty_run.py @@ -19,7 +19,7 @@ def test_main_uncertainty_run(system, system_config, random_id): os.chdir(PROJECT_ROOT) result = subprocess.run([ - "python", "main.py", + "python", "main.py", "run", "--time", "3m", "--system", system, "-u", diff --git a/tests/systems/test_main_withdata_run.py b/tests/systems/test_main_withdata_run.py index 299d34c..a4cbd55 100644 --- a/tests/systems/test_main_withdata_run.py +++ b/tests/systems/test_main_withdata_run.py @@ -22,10 +22,11 @@ def test_main_withdata_run(system, system_config, system_file, random_id): else: file_list = [DATA_PATH / system / system_file] for file in file_list: - assert os.path.isfile(file) or os.path.isdir(file), f"File `{file}' does not exist. does ./data exist or is RAPS_DATA_DIR set?" + assert os.path.isfile(file) or os.path.isdir(file), \ + f"File `{file}' does not exist. does ./data exist or is RAPS_DATA_DIR set?" os.chdir(PROJECT_ROOT) result = subprocess.run([ - "python", "main.py", + "python", "main.py", "run", "--time", "1m", "--system", system, "-f", ','.join(str(p) for p in file_list), diff --git a/tests/systems/test_multi_part_sim_basic_run.py b/tests/systems/test_multi_part_sim_basic_run.py index e8e64e9..3ea2a9c 100644 --- a/tests/systems/test_multi_part_sim_basic_run.py +++ b/tests/systems/test_multi_part_sim_basic_run.py @@ -18,10 +18,9 @@ def test_multi_part_sim_basic_run(system, system_config): os.chdir(PROJECT_ROOT) result = subprocess.run([ - "python", "multi-part-sim.py", + "python", "main.py", "run-multi-part", "--time", "1h", "-x", f"{system}/*", - #"--noui" ], capture_output=True, text=True, stdin=subprocess.DEVNULL) assert result.returncode == 0, f"Failed on {system}: {result.stderr}" del result diff --git a/tests/systems/test_multi_part_sim_network_run.py b/tests/systems/test_multi_part_sim_network_run.py index 3f53e99..ccbadaa 100644 --- a/tests/systems/test_multi_part_sim_network_run.py +++ b/tests/systems/test_multi_part_sim_network_run.py @@ -21,16 +21,15 @@ def test_multi_part_sim_network_run(system, system_config, random_id): os.chdir(PROJECT_ROOT) result = subprocess.run([ - "python", "multi-part-sim.py", + "python", "main.py", "run-multi-part", "--time", "1h", "-x", f"{system}/*", "--net", - #"--noui" ], capture_output=True, text=True, stdin=subprocess.DEVNULL) assert result.returncode == 0, f"Failed on {system}: {result.stderr}" - #TODO: - #Cleanup files after test! + # TODO: + # Cleanup files after test! del result gc.collect() diff --git a/tests/systems/test_multi_part_sim_withdata_run.py b/tests/systems/test_multi_part_sim_withdata_run.py index f862aca..2b18305 100644 --- a/tests/systems/test_multi_part_sim_withdata_run.py +++ b/tests/systems/test_multi_part_sim_withdata_run.py @@ -22,15 +22,15 @@ def test_multi_part_sim_withdata_run(system, system_config, system_file): else: file_list = [DATA_PATH / system / system_file] for file in file_list: - assert os.path.isfile(file) or os.path.isdir(file), f"File `{file}' does not exist. does ./data exist or is RAPS_DATA_DIR set?" + assert os.path.isfile(file) or os.path.isdir(file), \ + f"File `{file}' does not exist. does ./data exist or is RAPS_DATA_DIR set?" os.chdir(PROJECT_ROOT) result = subprocess.run([ - "python", "multi-part-sim.py", + "python", "main.py", "run-multi-part", "--time", "1h", "-x", f"{system}/*", "-f", *file_list, - #"--noui" ], capture_output=True, text=True, stdin=subprocess.DEVNULL) assert result.returncode == 0, f"Failed on {system}: {result.stderr}" del result diff --git a/tests/systems/test_telemetry_withdata_run.py b/tests/systems/test_telemetry_withdata_run.py index 415fbfe..e9685f7 100644 --- a/tests/systems/test_telemetry_withdata_run.py +++ b/tests/systems/test_telemetry_withdata_run.py @@ -22,10 +22,11 @@ def test_telemetry_main_withdata_run(system, system_config, system_file, random_ else: file_list = [DATA_PATH / system / system_file] for file in file_list: - assert os.path.isfile(file) or os.path.isdir(file), f"File `{file}' does not exist. does ./data exist or is RAPS_DATA_DIR set?" + assert os.path.isfile(file) or os.path.isdir(file), \ + f"File `{file}' does not exist. does ./data exist or is RAPS_DATA_DIR set?" os.chdir(PROJECT_ROOT) result = subprocess.run([ - "python", "raps/telemetry.py", + "python", "main.py", "telemetry", "--system", system, "-f", *file_list, "-o", random_id diff --git a/tests/systems/test_workload_synthetic.py b/tests/systems/test_workload_synthetic.py index dd5f8cf..107b015 100644 --- a/tests/systems/test_workload_synthetic.py +++ b/tests/systems/test_workload_synthetic.py @@ -13,29 +13,26 @@ def flatten(dist): name, args = dist return [name, *args] -def _build_args(dist_name, params): - return [dist_name, *params] - jobdist_case = [ ("weibull", ["--jobsize-weibull-shape", "0.75", "--jobsize-weibull-scale", "16"]), ("normal", ["--jobsize-normal-stddev", "100", "--jobsize-normal-mean", "16"]), - ("uniform",[]), + ("uniform", []), ] cpudist_case = [ ("weibull", ["--cpuutil-weibull-shape", "0.75", "--cpuutil-weibull-scale", "16"]), ("normal", ["--cpuutil-normal-stddev", "100", "--cpuutil-normal-mean", "16"]), - ("uniform",[]), + ("uniform", []), ] gpudist_case = [ ("weibull", ["--gpuutil-weibull-shape", "0.75", "--gpuutil-weibull-scale", "16"]), ("normal", ["--gpuutil-normal-stddev", "100", "--gpuutil-normal-mean", "16"]), - ("uniform",[]), + ("uniform", []), ] wtimedist_case = [ ("weibull", ["--walltime-weibull-shape", "0.75", "--walltime-weibull-scale", "16"]), ("normal", ["--walltime-normal-stddev", "100", "--walltime-normal-mean", "16"]), - ("uniform",[]), + ("uniform", []), ] additional_params_cases = [ "", # nothing @@ -47,16 +44,16 @@ additional_params_cases = [ @pytest.mark.parametrize( - "jobdist", jobdist_case, ids=lambda d:d[0] + "jobdist", jobdist_case, ids=lambda d: d[0] ) @pytest.mark.parametrize( - "cpudist", cpudist_case, ids=lambda d:d[0] + "cpudist", cpudist_case, ids=lambda d: d[0] ) @pytest.mark.parametrize( - "gpudist", gpudist_case, ids=lambda d:d[0] + "gpudist", gpudist_case, ids=lambda d: d[0] ) @pytest.mark.parametrize( - "wtimedist", wtimedist_case, ids=lambda d:d[0] + "wtimedist", wtimedist_case, ids=lambda d: d[0] ) @pytest.mark.parametrize( "additional_params", additional_params_cases, ids=lambda p: (p or "none") @@ -75,7 +72,7 @@ def test_workload_synthetic_run( # Build the command line. Each distribution tuple expands into: # dist_name, , , ... cmd = [ - "python", "raps/workload.py", + "python", "main.py", "workload", "--system", system, "-w", "synthetic", "--jobsize-distribution", *flatten(jobdist), @@ -90,7 +87,7 @@ def test_workload_synthetic_run( cmd.extend(additional_params) cmd1 = ["python", "-c \"exit()\""] - result = subprocess.run(cmd1,capture_output=True,text=True,stdin=subprocess.DEVNULL) + result = subprocess.run(cmd1, capture_output=True, text=True, stdin=subprocess.DEVNULL) try: result = subprocess.run( cmd, diff --git a/tests/test_main.py b/tests/test_main.py index 76f48a3..5c08182 100644 --- a/tests/test_main.py +++ b/tests/test_main.py @@ -13,10 +13,10 @@ PROJECT_ROOT = Path(__file__).resolve().parent.parent # adjust if needed def test_main_withui(): os.chdir(PROJECT_ROOT) result = subprocess.run([ - "python", "main.py", + "python", "main.py", "run", "--time", "1h", ], capture_output=True, - text=True + text=True ) assert result.returncode == 0 @@ -25,11 +25,11 @@ def test_main_withui(): def test_main_noui(): os.chdir(PROJECT_ROOT) result = subprocess.run([ - "python", "main.py", + "python", "main.py", "run", "--time", "1h", "--noui" ], capture_output=True, - text=True + text=True ) assert result.returncode == 0 @@ -39,8 +39,8 @@ def test_main_noui(): def test_main_long(): os.chdir(PROJECT_ROOT) result = subprocess.run([ - "python", "main.py", + "python", "main.py", "run", ], capture_output=True, - text=True + text=True ) assert result.returncode == 0 diff --git a/tests/util.py b/tests/util.py index 96609c7..6ee1df7 100644 --- a/tests/util.py +++ b/tests/util.py @@ -13,10 +13,13 @@ def find_project_root(): PROJECT_ROOT = find_project_root() CONFIG_PATH = PROJECT_ROOT / "config" -DATA_PATH = Path(os.getenv("RAPS_DATA_DIR",PROJECT_ROOT / "data")).resolve() +DATA_PATH = Path(os.getenv("RAPS_DATA_DIR", PROJECT_ROOT / "data")).resolve() -#Maybe usefull but now all systems are listed explicitly! -system_list = [entry for entry in os.listdir(CONFIG_PATH) if os.path.isfile(os.path.join(CONFIG_PATH,entry,'system.json'))] +# Maybe usefull but now all systems are listed explicitly! +system_list = [ + entry for entry in os.listdir(CONFIG_PATH) + if os.path.isfile(os.path.join(CONFIG_PATH, entry, 'system.json')) +] def requires_all_markers(request, required_markers): -- GitLab From 7c5f9996f0ba39b0e0badc7ef6398f4212cf30d9 Mon Sep 17 00:00:00 2001 From: Jesse Hines Date: Thu, 28 Aug 2025 10:05:13 -0400 Subject: [PATCH 265/388] Better test clean up Make sure that test output gets deleted after tests --- tests/conftest.py | 23 ++++++++++++++++--- tests/systems/test_engine.py | 5 +--- tests/systems/test_main_basic_run.py | 14 ++--------- tests/systems/test_main_cooling_run.py | 14 ++--------- .../test_main_cooling_uncertainty_run.py | 14 ++--------- tests/systems/test_main_fastforward_run.py | 14 ++--------- tests/systems/test_main_help.py | 6 +---- tests/systems/test_main_network_run.py | 14 ++--------- .../systems/test_main_network_withdata_run.py | 14 ++--------- tests/systems/test_main_noui_run.py | 14 ++--------- tests/systems/test_main_time_delta_run.py | 14 ++--------- .../test_main_time_delta_sub_second_run.py | 6 ++--- tests/systems/test_main_time_ff_delta_run.py | 14 ++--------- tests/systems/test_main_time_run.py | 14 ++--------- tests/systems/test_main_uncertainty_run.py | 14 ++--------- tests/systems/test_main_withdata_run.py | 14 ++--------- .../test_multi_part_sim_network_run.py | 11 ++------- tests/systems/test_telemetry_withdata_run.py | 14 ++--------- 18 files changed, 53 insertions(+), 180 deletions(-) diff --git a/tests/conftest.py b/tests/conftest.py index 477588a..855f969 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,5 +1,9 @@ import pytest import uuid +import shutil +from glob import glob +from pathlib import Path +import gc def pytest_addoption(parser): @@ -15,6 +19,19 @@ def pytest_runtest_setup(item): pytest.skip(reason) -@pytest.fixture -def random_id(): - return f"test-{str(uuid.uuid4())[:8]}" +@pytest.fixture() +def sim_output(): + """ + Handles cleaning up output from the sim. + Can also be used even if you aren't outputing anything to run garbage collection after the sim. + """ + out = f"test-output/test-{str(uuid.uuid4())[:8]}" + yield out + for file in glob(f"{out}*"): + if Path(file).is_dir(): + shutil.rmtree(file) + else: + Path(file).unlink() + + # Also force a garbage collection to clean up memory after running a simulation + gc.collect() diff --git a/tests/systems/test_engine.py b/tests/systems/test_engine.py index 974ed2e..ce40878 100644 --- a/tests/systems/test_engine.py +++ b/tests/systems/test_engine.py @@ -1,4 +1,3 @@ -import gc import pytest from raps.engine import Engine from raps.sim_config import SimConfig @@ -15,7 +14,7 @@ pytestmark = [ ] -def test_engine(system, system_config): +def test_engine(system, system_config, sim_output): if not system_config.get("main", False): pytest.skip(f"{system} does not support basic main run.") @@ -35,5 +34,3 @@ def test_engine(system, system_config): assert engine_stats['time simulated'] == '0:02:00' # TODO: More specific tests of values - - gc.collect() diff --git a/tests/systems/test_main_basic_run.py b/tests/systems/test_main_basic_run.py index c420b59..0cc9b69 100644 --- a/tests/systems/test_main_basic_run.py +++ b/tests/systems/test_main_basic_run.py @@ -1,6 +1,5 @@ import os import subprocess -import gc import pytest from tests.util import PROJECT_ROOT @@ -11,7 +10,7 @@ pytestmark = [ ] -def test_main_basic_run(system, system_config, random_id): +def test_main_basic_run(system, system_config, sim_output): if not system_config.get("main", False): pytest.skip(f"{system} does not support basic main run.") @@ -20,15 +19,6 @@ def test_main_basic_run(system, system_config, random_id): "python", "main.py", "run", "--time", "1m", "--system", system, - "-o", random_id + "-o", sim_output ], capture_output=True, text=True, stdin=subprocess.DEVNULL) assert result.returncode == 0, f"Failed on {system}: {result.stderr}" - - subprocess.run( - f"rm {random_id}.npz && rm -fr simulation_results/{random_id}", - shell=True, - check=True - ) - - del result - gc.collect() diff --git a/tests/systems/test_main_cooling_run.py b/tests/systems/test_main_cooling_run.py index 62d8621..da0c3a3 100644 --- a/tests/systems/test_main_cooling_run.py +++ b/tests/systems/test_main_cooling_run.py @@ -1,6 +1,5 @@ import os import subprocess -import gc import pytest from tests.util import PROJECT_ROOT @@ -12,7 +11,7 @@ pytestmark = [ ] -def test_main_cooling_run(system, system_config, random_id): +def test_main_cooling_run(system, system_config, sim_output): if not system_config.get("cooling", False): pytest.skip(f"{system} does not support cooling.") @@ -23,15 +22,6 @@ def test_main_cooling_run(system, system_config, random_id): "--system", system, "-c", "--noui", - "-o", random_id + "-o", sim_output ], capture_output=True, text=True, stdin=subprocess.DEVNULL) assert result.returncode == 0, f"Failed on {system}: {result.stderr}" - - subprocess.run( - f"rm {random_id}.npz && rm -fr simulation_results/{random_id}", - shell=True, - check=True - ) - - del result - gc.collect() diff --git a/tests/systems/test_main_cooling_uncertainty_run.py b/tests/systems/test_main_cooling_uncertainty_run.py index 742fe87..472771d 100644 --- a/tests/systems/test_main_cooling_uncertainty_run.py +++ b/tests/systems/test_main_cooling_uncertainty_run.py @@ -1,6 +1,5 @@ import os import subprocess -import gc import pytest from tests.util import PROJECT_ROOT @@ -12,7 +11,7 @@ pytestmark = [ ] -def test_main_cooling_uncertainty_run(request, system, system_config, random_id): +def test_main_cooling_uncertainty_run(request, system, system_config, sim_output): print(f"Markexpr: {request.config.option.markexpr}") if not system_config.get("uncertainty", False) or not system_config.get("cooling", False): pytest.skip(f"{system} does not support cooling or uncertainty.") @@ -25,15 +24,6 @@ def test_main_cooling_uncertainty_run(request, system, system_config, random_id) "-c", "-u", "--noui", - "-o", random_id + "-o", sim_output ], capture_output=True, text=True, stdin=subprocess.DEVNULL) assert result.returncode == 0, f"Failed on {system}: {result.stderr}" - - subprocess.run( - f"rm {random_id}.npz && rm -fr simulation_results/{random_id}", - shell=True, - check=True - ) - - del result - gc.collect() diff --git a/tests/systems/test_main_fastforward_run.py b/tests/systems/test_main_fastforward_run.py index 3eb567c..ab19b24 100644 --- a/tests/systems/test_main_fastforward_run.py +++ b/tests/systems/test_main_fastforward_run.py @@ -1,6 +1,5 @@ import os import subprocess -import gc import pytest from tests.util import PROJECT_ROOT @@ -18,7 +17,7 @@ pytestmark = [ "0m", "1m", "60m", "0h", "1h", "6h", ]) -def test_main_fastforward_run(system, system_config, ff_arg, random_id): +def test_main_fastforward_run(system, system_config, ff_arg, sim_output): if not system_config.get("fastforward", False): pytest.skip(f"{system} does not support basic main run.") @@ -29,15 +28,6 @@ def test_main_fastforward_run(system, system_config, ff_arg, random_id): "--fastforward", ff_arg, "--system", system, "--noui", - "-o", random_id + "-o", sim_output ], capture_output=True, text=True, stdin=subprocess.DEVNULL) assert result.returncode == 0, f"Failed on {system}: {result.stderr}" - - subprocess.run( - f"rm {random_id}.npz && rm -fr simulation_results/{random_id}", - shell=True, - check=True - ) - - del result - gc.collect() diff --git a/tests/systems/test_main_help.py b/tests/systems/test_main_help.py index a651a38..97fabef 100644 --- a/tests/systems/test_main_help.py +++ b/tests/systems/test_main_help.py @@ -1,6 +1,5 @@ import os import subprocess -import gc import pytest from tests.util import PROJECT_ROOT @@ -11,7 +10,7 @@ pytestmark = [ ] -def test_main_help(system, system_config, random_id): +def test_main_help(system, system_config): if not system_config.get("main", False): pytest.skip(f"{system} does not support basic main run.") @@ -23,6 +22,3 @@ def test_main_help(system, system_config, random_id): assert result.returncode == 0, f"Failed on {system}: {result.stderr}" assert "usage:" in result.stdout - - del result - gc.collect() diff --git a/tests/systems/test_main_network_run.py b/tests/systems/test_main_network_run.py index 8c7db1e..ea693b4 100644 --- a/tests/systems/test_main_network_run.py +++ b/tests/systems/test_main_network_run.py @@ -1,6 +1,5 @@ import os import subprocess -import gc import pytest from tests.util import PROJECT_ROOT @@ -12,7 +11,7 @@ pytestmark = [ ] -def test_main_network_run(system, system_config, random_id): +def test_main_network_run(system, system_config, sim_output): if not system_config.get("main", False): pytest.skip(f"{system} does not support basic main run.") @@ -25,15 +24,6 @@ def test_main_network_run(system, system_config, random_id): "--time", "1m", "--system", system, "--net", - "-o", random_id + "-o", sim_output ], capture_output=True, text=True, stdin=subprocess.DEVNULL) assert result.returncode == 0, f"Failed on {system}: {result.stderr}" - - subprocess.run( - f"rm {random_id}.npz && rm -fr simulation_results/{random_id}", - shell=True, - check=True - ) - - del result - gc.collect() diff --git a/tests/systems/test_main_network_withdata_run.py b/tests/systems/test_main_network_withdata_run.py index 1dcfee0..a4a8b4f 100644 --- a/tests/systems/test_main_network_withdata_run.py +++ b/tests/systems/test_main_network_withdata_run.py @@ -1,6 +1,5 @@ import os import subprocess -import gc import pytest from tests.util import PROJECT_ROOT, DATA_PATH @@ -14,7 +13,7 @@ pytestmark = [ ] -def test_main_network_withdata_run(system, system_config, system_file, random_id): +def test_main_network_withdata_run(system, system_config, system_file, sim_output): if not system_config.get("net", False): pytest.skip(f"{system} does not support basic net run.") @@ -33,15 +32,6 @@ def test_main_network_withdata_run(system, system_config, system_file, random_id "--system", system, "-f", *file_list, "--net", - "-o", random_id + "-o", sim_output ], capture_output=True, text=True, stdin=subprocess.DEVNULL) assert result.returncode == 0, f"Failed on {system}: {result.stderr}" - - subprocess.run( - f"rm {random_id}.npz && rm -fr simulation_results/{random_id}", - shell=True, - check=True - ) - - del result - gc.collect() diff --git a/tests/systems/test_main_noui_run.py b/tests/systems/test_main_noui_run.py index af8bea8..08e7189 100644 --- a/tests/systems/test_main_noui_run.py +++ b/tests/systems/test_main_noui_run.py @@ -1,6 +1,5 @@ import os import subprocess -import gc import pytest from tests.util import PROJECT_ROOT @@ -11,7 +10,7 @@ pytestmark = [ ] -def test_main_noui_run(system, system_config, random_id): +def test_main_noui_run(system, system_config, sim_output): if not system_config.get("main", False): pytest.skip(f"{system} does not support basic main run.") @@ -21,15 +20,6 @@ def test_main_noui_run(system, system_config, random_id): "--time", "1m", "--system", system, "--noui", - "-o", random_id + "-o", sim_output ], capture_output=True, text=True, stdin=subprocess.DEVNULL) assert result.returncode == 0, f"Failed on {system}: {result.stderr}" - - subprocess.run( - f"rm {random_id}.npz && rm -fr simulation_results/{random_id}", - shell=True, - check=True - ) - - del result - gc.collect() diff --git a/tests/systems/test_main_time_delta_run.py b/tests/systems/test_main_time_delta_run.py index 8808052..2ca8477 100644 --- a/tests/systems/test_main_time_delta_run.py +++ b/tests/systems/test_main_time_delta_run.py @@ -1,6 +1,5 @@ import os import subprocess -import gc import pytest from tests.util import PROJECT_ROOT from raps.utils import convert_to_time_unit, convert_seconds_to_hhmmss @@ -22,7 +21,7 @@ pytestmark = [ ("10h", "3h"), ("3d", "1d") ], ids=["1", "1s", "10s", "1m", "1h", "3h", "1d"]) -def test_main_time_delta_run(system, system_config, time_arg, tdelta_arg, random_id): +def test_main_time_delta_run(system, system_config, time_arg, tdelta_arg, sim_output): if not system_config.get("time_delta", False): pytest.skip(f"{system} does not support time_delta run.") @@ -33,17 +32,8 @@ def test_main_time_delta_run(system, system_config, time_arg, tdelta_arg, random "--time-delta", tdelta_arg, "--system", system, "--noui", - "-o", random_id + "-o", sim_output ], capture_output=True, text=True, stdin=subprocess.DEVNULL) assert result.returncode == 0, f"Failed on {system}: {result.stderr}" time = convert_to_time_unit(time_arg) assert f"Time Simulated: {convert_seconds_to_hhmmss(time)}" in result.stdout - - subprocess.run( - f"rm {random_id}.npz && rm -fr simulation_results/{random_id}", - shell=True, - check=True - ) - - del result - gc.collect() diff --git a/tests/systems/test_main_time_delta_sub_second_run.py b/tests/systems/test_main_time_delta_sub_second_run.py index 0bedee8..55c0e3c 100644 --- a/tests/systems/test_main_time_delta_sub_second_run.py +++ b/tests/systems/test_main_time_delta_sub_second_run.py @@ -23,7 +23,7 @@ pytestmark = [ ("100ms", "1ms"), ("100ms", "1s"), ], ids=["1ds", "3ds", "1cs", "1ms", "1cs-for-10ds", "1ms-for-10cs", "1ms-for-100ms", "1s-for-100ms"]) -def test_main_time_delta_sub_second_run(system, system_config, time_arg, tdelta_arg, random_id): +def test_main_time_delta_sub_second_run(system, system_config, time_arg, tdelta_arg, sim_output): if not system_config.get("time_delta", False): pytest.skip(f"{system} does not support time_delta run.") @@ -34,14 +34,14 @@ def test_main_time_delta_sub_second_run(system, system_config, time_arg, tdelta_ "--time-delta", tdelta_arg, "--system", system, "--noui", - "-o", random_id + "-o", sim_output ], capture_output=True, text=True, stdin=subprocess.DEVNULL) assert result.returncode == 0, f"Failed on {system}: {result.stderr}" time = parse_td(time_arg).seconds assert f"Time Simulated: {convert_seconds_to_hhmmss(time)}" in result.stdout subprocess.run( - f"rm {random_id}.npz && rm -fr simulation_results/{random_id}", + f"rm {sim_output}.npz && rm -fr simulation_results/{sim_output}", shell=True, check=True ) diff --git a/tests/systems/test_main_time_ff_delta_run.py b/tests/systems/test_main_time_ff_delta_run.py index a6c8763..7424758 100644 --- a/tests/systems/test_main_time_ff_delta_run.py +++ b/tests/systems/test_main_time_ff_delta_run.py @@ -1,6 +1,5 @@ import os import subprocess -import gc import pytest from tests.util import PROJECT_ROOT @@ -22,7 +21,7 @@ pytestmark = [ pytest.param("3d", "1d", "1d", marks=pytest.mark.long, id="1d (long)"), ], ids=["1", "1s", "10s", "1m", "1h", "3h", "1d"]) def test_main_time_ff_delta_run(system, system_config, time_arg, tdelta_arg, - ff_arg, random_id): + ff_arg, sim_output): if not system_config.get("time_delta", False): pytest.skip(f"{system} does not support time_delta run.") @@ -34,15 +33,6 @@ def test_main_time_ff_delta_run(system, system_config, time_arg, tdelta_arg, "--time-delta", tdelta_arg, "--system", system, "--noui", - "-o", random_id + "-o", sim_output ], capture_output=True, text=True, stdin=subprocess.DEVNULL) assert result.returncode == 0, f"Failed on {system}: {result.stderr}" - - subprocess.run( - f"rm {random_id}.npz && rm -fr simulation_results/{random_id}", - shell=True, - check=True - ) - - del result - gc.collect() diff --git a/tests/systems/test_main_time_run.py b/tests/systems/test_main_time_run.py index c8e00b1..0faa06c 100644 --- a/tests/systems/test_main_time_run.py +++ b/tests/systems/test_main_time_run.py @@ -1,6 +1,5 @@ import os import subprocess -import gc import pytest from tests.util import PROJECT_ROOT @@ -21,7 +20,7 @@ pytestmark = [ "0h", "1h", pytest.param("6h", marks=pytest.mark.long), # mark this one as long ]) -def test_main_time_run(system, system_config, time_args, random_id): +def test_main_time_run(system, system_config, time_args, sim_output): if not system_config.get("main", False): pytest.skip(f"{system} does not support basic main run.") @@ -31,15 +30,6 @@ def test_main_time_run(system, system_config, time_args, random_id): "--time", time_args, "--system", system, "--noui", - "-o", random_id + "-o", sim_output ], capture_output=True, text=True, stdin=subprocess.DEVNULL) assert result.returncode == 0, f"Failed on {system}: {result.stderr}" - - subprocess.run( - f"rm {random_id}.npz && rm -fr simulation_results/{random_id}", - shell=True, - check=True - ) - - del result - gc.collect() diff --git a/tests/systems/test_main_uncertainty_run.py b/tests/systems/test_main_uncertainty_run.py index effdcc6..f3d5bd0 100644 --- a/tests/systems/test_main_uncertainty_run.py +++ b/tests/systems/test_main_uncertainty_run.py @@ -1,6 +1,5 @@ import os import subprocess -import gc import pytest from tests.util import PROJECT_ROOT @@ -13,7 +12,7 @@ pytestmark = [ ] -def test_main_uncertainty_run(system, system_config, random_id): +def test_main_uncertainty_run(system, system_config, sim_output): if not system_config.get("uncertainty", False): pytest.skip(f"{system} does not support uncertainty.") @@ -24,15 +23,6 @@ def test_main_uncertainty_run(system, system_config, random_id): "--system", system, "-u", "--noui", - "-o", random_id + "-o", sim_output ], capture_output=True, text=True, stdin=subprocess.DEVNULL) assert result.returncode == 0, f"Failed on {system}: {result.stderr}" - - subprocess.run( - f"rm {random_id}.npz && rm -fr simulation_results/{random_id}", - shell=True, - check=True - ) - - del result - gc.collect() diff --git a/tests/systems/test_main_withdata_run.py b/tests/systems/test_main_withdata_run.py index a4cbd55..68d672d 100644 --- a/tests/systems/test_main_withdata_run.py +++ b/tests/systems/test_main_withdata_run.py @@ -1,6 +1,5 @@ import os import subprocess -import gc import pytest from tests.util import PROJECT_ROOT, DATA_PATH @@ -12,7 +11,7 @@ pytestmark = [ ] -def test_main_withdata_run(system, system_config, system_file, random_id): +def test_main_withdata_run(system, system_config, system_file, sim_output): if not system_config.get("main", False): pytest.skip(f"{system} does not support basic main even without data.") if not system_config.get("withdata", False): @@ -30,15 +29,6 @@ def test_main_withdata_run(system, system_config, system_file, random_id): "--time", "1m", "--system", system, "-f", ','.join(str(p) for p in file_list), - "-o", random_id + "-o", sim_output ], capture_output=True, text=True, stdin=subprocess.DEVNULL) assert result.returncode == 0, f"Failed on {system}: {result.stderr}" - - subprocess.run( - f"rm {random_id}.npz && rm -fr simulation_results/{random_id}", - shell=True, - check=True - ) - - del result - gc.collect() diff --git a/tests/systems/test_multi_part_sim_network_run.py b/tests/systems/test_multi_part_sim_network_run.py index ccbadaa..aa90cca 100644 --- a/tests/systems/test_multi_part_sim_network_run.py +++ b/tests/systems/test_multi_part_sim_network_run.py @@ -1,6 +1,5 @@ import os import subprocess -import gc import pytest from tests.util import PROJECT_ROOT @@ -11,8 +10,7 @@ pytestmark = [ ] -def test_multi_part_sim_network_run(system, system_config, random_id): - +def test_multi_part_sim_network_run(system, system_config, sim_output): if not system_config.get("multi-part-sim", False): pytest.skip(f"{system} does not support basic multi-part-sim run.") @@ -25,11 +23,6 @@ def test_multi_part_sim_network_run(system, system_config, random_id): "--time", "1h", "-x", f"{system}/*", "--net", + "-o", sim_output, ], capture_output=True, text=True, stdin=subprocess.DEVNULL) assert result.returncode == 0, f"Failed on {system}: {result.stderr}" - - # TODO: - # Cleanup files after test! - - del result - gc.collect() diff --git a/tests/systems/test_telemetry_withdata_run.py b/tests/systems/test_telemetry_withdata_run.py index e9685f7..eef5adf 100644 --- a/tests/systems/test_telemetry_withdata_run.py +++ b/tests/systems/test_telemetry_withdata_run.py @@ -1,6 +1,5 @@ import os import subprocess -import gc import pytest from tests.util import PROJECT_ROOT, DATA_PATH @@ -11,7 +10,7 @@ pytestmark = [ ] -def test_telemetry_main_withdata_run(system, system_config, system_file, random_id): +def test_telemetry_main_withdata_run(system, system_config, system_file, sim_output): if not system_config.get("telemetry", False): pytest.skip(f"{system} does not support telemetry run.") if not system_config.get("withdata", False): @@ -29,15 +28,6 @@ def test_telemetry_main_withdata_run(system, system_config, system_file, random_ "python", "main.py", "telemetry", "--system", system, "-f", *file_list, - "-o", random_id + "-o", sim_output, ], capture_output=True, text=True, stdin=subprocess.DEVNULL) assert result.returncode == 0, f"Failed on {system}: {result.stderr}" - - subprocess.run( - f"rm {random_id}.npz ; rm {random_id}.png", - shell=True, - check=True - ) - - del result - gc.collect() -- GitLab From 5c931588e93da1d1de4044442cb79068c64579aa Mon Sep 17 00:00:00 2001 From: Jesse Hines Date: Thu, 28 Aug 2025 10:14:23 -0400 Subject: [PATCH 266/388] Allow passing cli_args explicitly --- main.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/main.py b/main.py index cd7e162..737cc8d 100644 --- a/main.py +++ b/main.py @@ -31,7 +31,7 @@ CLI_CONFIG = SettingsConfigDict( ) -def main(): +def main(cli_args: list[str] | None = None): parser = argparse.ArgumentParser( description=""" ExaDigiT Resource Allocator & Power Simulator (RAPS) @@ -153,7 +153,7 @@ def main(): # TODO: move telemetry and other misc scripts into here - args = parser.parse_args() + args = parser.parse_args(cli_args) args.func(args) -- GitLab From 20bf758bae6a621ac7f9b8be400a4c9e6ee04ae4 Mon Sep 17 00:00:00 2001 From: Jesse Hines Date: Thu, 28 Aug 2025 15:14:29 -0400 Subject: [PATCH 267/388] Refactor system_files fixture --- tests/systems/conftest.py | 27 ++++++++++++------- .../systems/test_main_network_withdata_run.py | 12 ++------- tests/systems/test_main_withdata_run.py | 11 ++------ .../test_multi_part_sim_withdata_run.py | 13 ++------- tests/systems/test_telemetry_withdata_run.py | 11 ++------ 5 files changed, 25 insertions(+), 49 deletions(-) diff --git a/tests/systems/conftest.py b/tests/systems/conftest.py index 8e361e9..269d101 100644 --- a/tests/systems/conftest.py +++ b/tests/systems/conftest.py @@ -1,4 +1,5 @@ import pytest +from tests.util import DATA_PATH @pytest.fixture(params=[ @@ -180,18 +181,24 @@ def system_config(system): @pytest.fixture -def system_file(system): +def system_files(system): files = { "40frontiers": [], - "adastraMI250": ["AdastaJobsMI250_15days.parquet"], - "frontier": ["slurm/joblive/date=2024-01-18/", "jobprofile/date=2024-01-18/"], - "fugaku": ["21_04.parquet"], - "gcloudv2": ["/v2/google_cluster_data_2011_sample"], - "lassen": ["Lassen-Supercomputer-Job-Dataset"], - "marconi100": ["job_table.parquet"], - "mit_supercloud": ["202201"], - "setonix": [""], + "adastraMI250": ["adastraMI250/AdastaJobsMI250_15days.parquet"], + "frontier": ["frontier/slurm/joblive/date=2024-01-18/", "frontier/jobprofile/date=2024-01-18/"], + "fugaku": ["fugaku/21_04.parquet"], + "gcloudv2": ["gcloud/v2/google_cluster_data_2011_sample"], + "lassen": ["lassen/Lassen-Supercomputer-Job-Dataset"], + "marconi100": ["marconi100/job_table.parquet"], + "mit_supercloud": ["mit_supercloud/202201"], + "setonix": [], "summit": [], "lumi": [] } - return files.get(system, files) + + file_list = [DATA_PATH / f for f in files.get(system, [])] + for file in file_list: + assert file.exists(), \ + f"File `{file}' does not exist. does ./data exist or is RAPS_DATA_DIR set?" + + return [str(f) for f in file_list] diff --git a/tests/systems/test_main_network_withdata_run.py b/tests/systems/test_main_network_withdata_run.py index a4a8b4f..62b679a 100644 --- a/tests/systems/test_main_network_withdata_run.py +++ b/tests/systems/test_main_network_withdata_run.py @@ -13,24 +13,16 @@ pytestmark = [ ] -def test_main_network_withdata_run(system, system_config, system_file, sim_output): +def test_main_network_withdata_run(system, system_config, system_files, sim_output): if not system_config.get("net", False): pytest.skip(f"{system} does not support basic net run.") - if isinstance(system_file, list): - file_list = [DATA_PATH / system / x for x in system_file] - else: - file_list = [DATA_PATH / system / system_file] - for file in file_list: - assert os.path.isfile(file) or os.path.isdir(file), \ - "File does not exist. does ./data exist or is RAPS_DATA_DIR set?" - os.chdir(PROJECT_ROOT) result = subprocess.run([ "python", "main.py", "run", "--time", "1m", "--system", system, - "-f", *file_list, + "-f", *system_files, "--net", "-o", sim_output ], capture_output=True, text=True, stdin=subprocess.DEVNULL) diff --git a/tests/systems/test_main_withdata_run.py b/tests/systems/test_main_withdata_run.py index 68d672d..eb996a3 100644 --- a/tests/systems/test_main_withdata_run.py +++ b/tests/systems/test_main_withdata_run.py @@ -11,24 +11,17 @@ pytestmark = [ ] -def test_main_withdata_run(system, system_config, system_file, sim_output): +def test_main_withdata_run(system, system_config, system_files, sim_output): if not system_config.get("main", False): pytest.skip(f"{system} does not support basic main even without data.") if not system_config.get("withdata", False): pytest.skip(f"{system} does not support basic main with data.") - if isinstance(system_file, list): - file_list = [DATA_PATH / system / x for x in system_file] - else: - file_list = [DATA_PATH / system / system_file] - for file in file_list: - assert os.path.isfile(file) or os.path.isdir(file), \ - f"File `{file}' does not exist. does ./data exist or is RAPS_DATA_DIR set?" os.chdir(PROJECT_ROOT) result = subprocess.run([ "python", "main.py", "run", "--time", "1m", "--system", system, - "-f", ','.join(str(p) for p in file_list), + "-f", ','.join(system_files), "-o", sim_output ], capture_output=True, text=True, stdin=subprocess.DEVNULL) assert result.returncode == 0, f"Failed on {system}: {result.stderr}" diff --git a/tests/systems/test_multi_part_sim_withdata_run.py b/tests/systems/test_multi_part_sim_withdata_run.py index 2b18305..caaf9e8 100644 --- a/tests/systems/test_multi_part_sim_withdata_run.py +++ b/tests/systems/test_multi_part_sim_withdata_run.py @@ -12,26 +12,17 @@ pytestmark = [ ] -def test_multi_part_sim_withdata_run(system, system_config, system_file): +def test_multi_part_sim_withdata_run(system, system_config, system_files): if not system_config.get("multi-part-sim", False): pytest.skip(f"{system} does not support basic multi-part-sim run even without data.") if not system_config.get("withdata", False): pytest.skip(f"{system} does not support multi-part-sim run with data.") - if isinstance(system_file, list): - file_list = [DATA_PATH / system / x for x in system_file] - else: - file_list = [DATA_PATH / system / system_file] - for file in file_list: - assert os.path.isfile(file) or os.path.isdir(file), \ - f"File `{file}' does not exist. does ./data exist or is RAPS_DATA_DIR set?" os.chdir(PROJECT_ROOT) result = subprocess.run([ "python", "main.py", "run-multi-part", "--time", "1h", "-x", f"{system}/*", - "-f", *file_list, + "-f", *system_files, ], capture_output=True, text=True, stdin=subprocess.DEVNULL) assert result.returncode == 0, f"Failed on {system}: {result.stderr}" - del result - gc.collect() diff --git a/tests/systems/test_telemetry_withdata_run.py b/tests/systems/test_telemetry_withdata_run.py index eef5adf..ab6f93c 100644 --- a/tests/systems/test_telemetry_withdata_run.py +++ b/tests/systems/test_telemetry_withdata_run.py @@ -10,24 +10,17 @@ pytestmark = [ ] -def test_telemetry_main_withdata_run(system, system_config, system_file, sim_output): +def test_telemetry_main_withdata_run(system, system_config, system_files, sim_output): if not system_config.get("telemetry", False): pytest.skip(f"{system} does not support telemetry run.") if not system_config.get("withdata", False): pytest.skip(f"{system} does not support telemetry run with data.") - if isinstance(system_file, list): - file_list = [DATA_PATH / system / x for x in system_file] - else: - file_list = [DATA_PATH / system / system_file] - for file in file_list: - assert os.path.isfile(file) or os.path.isdir(file), \ - f"File `{file}' does not exist. does ./data exist or is RAPS_DATA_DIR set?" os.chdir(PROJECT_ROOT) result = subprocess.run([ "python", "main.py", "telemetry", "--system", system, - "-f", *file_list, + "-f", *system_files, "-o", sim_output, ], capture_output=True, text=True, stdin=subprocess.DEVNULL) assert result.returncode == 0, f"Failed on {system}: {result.stderr}" -- GitLab From e59b2410387e4c6bee0c9341820644e6f96df18c Mon Sep 17 00:00:00 2001 From: Jesse Hines Date: Tue, 2 Sep 2025 11:28:09 -0400 Subject: [PATCH 268/388] More refactoring to main --- main.py | 147 ++---------------- raps/run_sim.py | 96 +++++++++++- raps/telemetry.py | 75 +++++---- raps/utils.py | 8 +- raps/workload.py | 18 ++- .../systems/test_main_network_withdata_run.py | 2 +- .../test_multi_part_sim_withdata_run.py | 2 +- tests/systems/test_telemetry_withdata_run.py | 2 +- 8 files changed, 182 insertions(+), 168 deletions(-) diff --git a/main.py b/main.py index 737cc8d..1e52395 100644 --- a/main.py +++ b/main.py @@ -1,36 +1,15 @@ """ ExaDigiT Resource Allocator & Power Simulator (RAPS) """ -import yaml import argparse -import sys -from pathlib import Path from raps.helpers import check_python_version -from raps.sim_config import SimConfig -from raps.run_sim import run_sim, run_multi_part_sim -from raps.workload import run_workload -from raps.telemetry import run_telemetry, run_telemetry_add_args -from raps.utils import pydantic_add_args, yaml_dump -from pydantic_settings import SettingsConfigDict +from raps.run_sim import run_sim_add_parser, run_multi_part_sim_add_parser, show_add_parser +from raps.workload import run_workload_add_parser +from raps.telemetry import run_telemetry_add_parser check_python_version() -def read_sim_yaml(config_file: str): - if config_file == "-": - return yaml.safe_load(sys.stdin.read()) - elif config_file: - return yaml.safe_load(Path(config_file).read_text()) - else: - return {} - - -CLI_CONFIG = SettingsConfigDict( - cli_implicit_flags=True, - cli_kebab_case=True, -) - - def main(cli_args: list[str] | None = None): parser = argparse.ArgumentParser( description=""" @@ -40,121 +19,17 @@ def main(cli_args: list[str] | None = None): ) subparsers = parser.add_subparsers(required=True) - # Shortcut for common sim args - sim_shortcuts = { - "partitions": "x", - "cooling": "c", - "simulate-network": "net", - "fastforward": "ff", - "time": "t", - "debug": "d", - "numjobs": "n", - "verbose": "v", - "output": "o", - "uncertainties": "u", - "plot": "p", - "replay": "f", - "workload": "w", - } - - # ==== raps run ==== - cmd_run = subparsers.add_parser("run", description=""" - Run single-partition (homogeneous) systems. Supports synthetic workload generation or - telemetry replay, dynamic power modeling (including conversion losses), and optional - coupling to a thermo-fluids cooling model. Produces performance, utilization, and - energy metrics, with optional plots and output files for analysis and validation. - """) - cmd_run.add_argument("config_file", nargs="?", default=None, help=""" - YAML sim config file, can be used to configure an experiment instead of using CLI - flags. Pass "-" to read from stdin. - """) - cmd_run_validate = pydantic_add_args(cmd_run, SimConfig, model_config={ - **CLI_CONFIG, - "cli_shortcuts": sim_shortcuts, - }) - - def cmd_run_func(args): - sim_config = cmd_run_validate(args, read_sim_yaml(args.config_file)) - run_sim(sim_config) - cmd_run.set_defaults(func=cmd_run_func) - - # ==== raps run-multi-part ==== - # It might make sense to combine these into a single entrypoint. Though the multi-part run - # #doesn't support UI or the same output options. - cmd_run_multi_part = subparsers.add_parser("run-multi-part", description=""" - Simulates multi-partition (heterogeneous) systems. Supports replaying telemetry or - generating synthetic workloads across CPU-only, GPU, and mixed partitions. Initializes - per-partition power, FLOPS, and scheduling models, then advances simulations in lockstep. - Outputs per-partition performance, utilization, and energy statistics for systems such as - MIT Supercloud, Setonix, Adastra, and LUMI. - """) - cmd_run_multi_part.add_argument("config_file", nargs="?", default=None, help=""" - YAML sim config file, can be used to configure an experiment instead of using CLI - flags. Pass "-" to read from stdin. - """) - cmd_run_multi_part_validate = pydantic_add_args(cmd_run_multi_part, SimConfig, model_config={ - **CLI_CONFIG, - "cli_shortcuts": sim_shortcuts, - }) - - def cmd_run_multi_part_func(args): - sim_config = cmd_run_multi_part_validate(args, read_sim_yaml(args.config_file)) - run_multi_part_sim(sim_config) - cmd_run_multi_part.set_defaults(func=cmd_run_multi_part_func) - - # ==== raps show ==== - cmd_show = subparsers.add_parser("show", description=""" - Outputs the given CLI args as a YAML config file that can be used to re-run the same - simulation. - """) - cmd_show.add_argument("config_file", nargs="?", default=None, help=""" - Input YAML sim config file. Can be used to slightly modify an existing sim config. - """) - cmd_show.add_argument("--show-defaults", default=False, help=""" - If true, include defaults in the output YAML - """) - cmd_show_validate = pydantic_add_args(cmd_show, SimConfig, model_config={ - **CLI_CONFIG, - "cli_shortcuts": sim_shortcuts, - }) - - def cmd_show_func(args): - sim_config = cmd_show_validate(args, read_sim_yaml(args.config_file)) - sim_config = sim_config.model_dump(mode="json", - exclude_defaults=not args.show_defaults) - print(yaml_dump(sim_config), end="") - cmd_show.set_defaults(func=cmd_show_func) - - # ==== raps workload ==== - # TODO: Separate the arguments for this command - cmd_workload = subparsers.add_parser("workload", description=""" - Saves workload as a snapshot. - """) - cmd_workload.add_argument("config_file", nargs="?", default=None, help=""" - YAML sim config file, can be used to configure an experiment instead of using CLI - flags. Pass "-" to read from stdin. - """) - cmd_workload_validate = pydantic_add_args(cmd_workload, SimConfig, model_config={ - **CLI_CONFIG, - "cli_shortcuts": sim_shortcuts, - }) - - def cmd_workload_func(args): - sim_config = cmd_workload_validate(args, read_sim_yaml(args.config_file)) - run_workload(sim_config) - cmd_show.set_defaults(func=cmd_workload_func) - - # ==== raps telemetry ==== - cmd_telemetry = subparsers.add_parser("telemetry", description=""" - Telemetry data validator - """) - run_telemetry_add_args(cmd_telemetry) - cmd_telemetry.set_defaults(func=run_telemetry) + run_sim_add_parser(subparsers) + run_multi_part_sim_add_parser(subparsers) + show_add_parser(subparsers) + run_workload_add_parser(subparsers) + run_telemetry_add_parser(subparsers) - # TODO: move telemetry and other misc scripts into here + # TODO: move other misc scripts into here args = parser.parse_args(cli_args) - args.func(args) + assert args.impl, "subparsers should add an impl function to args" + args.impl(args) if __name__ == "__main__": diff --git a/raps/run_sim.py b/raps/run_sim.py index 68229ad..7587dbb 100644 --- a/raps/run_sim.py +++ b/raps/run_sim.py @@ -6,11 +6,13 @@ These functions just handle rendering the terminal UI and outputting results to import json import pandas as pd import sys +import yaml +from pathlib import Path from raps.ui import LayoutManager from raps.plotting import Plotter from raps.engine import Engine from raps.multi_part_engine import MultiPartEngine -from raps.utils import write_dict_to_file +from raps.utils import write_dict_to_file, pydantic_add_args, SubParsers, yaml_dump from raps.stats import ( get_engine_stats, get_job_stats, @@ -22,6 +24,51 @@ from raps.stats import ( from raps.sim_config import SimConfig +def read_yaml(config_file: str): + if config_file == "-": + return yaml.safe_load(sys.stdin.read()) + elif config_file: + return yaml.safe_load(Path(config_file).read_text()) + else: + return {} + + +shortcuts = { + "partitions": "x", + "cooling": "c", + "simulate-network": "net", + "fastforward": "ff", + "time": "t", + "debug": "d", + "numjobs": "n", + "verbose": "v", + "output": "o", + "uncertainties": "u", + "plot": "p", + "replay": "f", + "workload": "w", +} + + +def run_sim_add_parser(subparsers: SubParsers): + parser = subparsers.add_parser("run", description=""" + Run single-partition (homogeneous) systems. Supports synthetic workload generation or + telemetry replay, dynamic power modeling (including conversion losses), and optional + coupling to a thermo-fluids cooling model. Produces performance, utilization, and + energy metrics, with optional plots and output files for analysis and validation. + """) + parser.add_argument("config_file", nargs="?", default=None, help=""" + YAML sim config file, can be used to configure an experiment instead of using CLI + flags. Pass "-" to read from stdin. + """) + model_validate = pydantic_add_args(parser, SimConfig, model_config={ + "cli_shortcuts": shortcuts, + }) + parser.set_defaults( + impl=lambda args: run_sim(model_validate(args, read_yaml(args.config_file))) + ) + + def run_sim(sim_config: SimConfig): if sim_config.verbose or sim_config.debug: print(f"SimConfig: {sim_config.model_dump_json(indent=4)}") @@ -174,6 +221,26 @@ def run_sim(sim_config: SimConfig): print("Output directory is: ", out) # If output is enabled, the user wants this information as last output +def run_multi_part_sim_add_parser(subparsers: SubParsers): + parser = subparsers.add_parser("run-multi-part", description=""" + Simulates multi-partition (heterogeneous) systems. Supports replaying telemetry or + generating synthetic workloads across CPU-only, GPU, and mixed partitions. Initializes + per-partition power, FLOPS, and scheduling models, then advances simulations in lockstep. + Outputs per-partition performance, utilization, and energy statistics for systems such as + MIT Supercloud, Setonix, Adastra, and LUMI. + """) + parser.add_argument("config_file", nargs="?", default=None, help=""" + YAML sim config file, can be used to configure an experiment instead of using CLI + flags. Pass "-" to read from stdin. + """) + model_validate = pydantic_add_args(parser, SimConfig, model_config={ + "cli_shortcuts": shortcuts, + }) + parser.set_defaults( + impl=lambda args: run_multi_part_sim(model_validate(args, read_yaml(args.config_file))) + ) + + def run_multi_part_sim(sim_config: SimConfig): multi_engine, jobs, timestep_start, timestep_end, time_delta = MultiPartEngine.from_sim_config(sim_config) @@ -234,3 +301,30 @@ def run_multi_part_sim(sim_config: SimConfig): scheduler_stats=scheduler_stats, network_stats=network_stats, ) + + +def show_add_parser(subparsers: SubParsers): + parser = subparsers.add_parser("show", description=""" + Outputs the given CLI args as a YAML config file that can be used to re-run the same + simulation. + """) + parser.add_argument("config_file", nargs="?", default=None, help=""" + Input YAML sim config file. Can be used to slightly modify an existing sim config. + """) + parser.add_argument("--show-defaults", default=False, help=""" + If true, include defaults in the output YAML + """) + model_validate = pydantic_add_args(parser, SimConfig, model_config={ + "cli_shortcuts": shortcuts, + }) + + def impl(args): + sim_config = model_validate(args, read_yaml(args.config_file)) + show(sim_config, show_defaults=args.show_defaults) + + parser.set_defaults(impl=impl) + + +def show(sim_config: SimConfig, show_defaults=False): + data = sim_config.model_dump(mode="json", exclude_defaults=not show_defaults) + print(yaml_dump(data), end="") diff --git a/raps/telemetry.py b/raps/telemetry.py index 5a09eb9..3f883de 100644 --- a/raps/telemetry.py +++ b/raps/telemetry.py @@ -6,9 +6,9 @@ parsing parquet files, and generating job state information. The module defines a `Telemetry` class for managing telemetry data and several helper functions for data encryption and conversion between node name and index formats. """ +from typing import Literal import sys import random -import argparse from pathlib import Path # import json from typing import Optional @@ -18,6 +18,7 @@ import importlib import numpy as np import pandas as pd from tqdm import tqdm +from pydantic import BaseModel # from rich.progress import track from raps.system_config import get_system_config @@ -28,7 +29,9 @@ from raps.plotting import ( plot_nodes_gantt, plot_network_histogram ) -from raps.utils import next_arrival_byconfargs, convert_to_time_unit +from raps.utils import ( + next_arrival_byconfargs, convert_to_time_unit, pydantic_add_args, SubParsers, ExpandedPath, +) class Telemetry: @@ -266,30 +269,50 @@ class Telemetry: return jobs, timestep_start, timestep_end, args -def run_telemetry_add_args(parser: argparse.ArgumentParser): - parser.add_argument('--jid', type=str, default='*', help='Replay job id') - parser.add_argument('-f', '--replay', nargs='+', type=str, - help='Either: path/to/joblive path/to/jobprofile' - ' -or- filename.npz (overrides --workload option)') - parser.add_argument('-p', '--plot', type=str, default=None, choices=['jobs', 'nodes'], help='Output plots') - parser.add_argument("--is-results-file", action='store_true', default=False, help='Output plots') - parser.add_argument("--gantt-nodes", default=False, action='store_true', required=False, - # duplicate in workload! - help="Print Gannt with nodes required as line thickness (default false)") - parser.add_argument('-t', '--time', type=str, default=None, - help='Length of time to simulate, e.g., 123, 123s, 27m, 3h, 7d') - parser.add_argument('--system', type=str, default='frontier', help='System config to use') - choices = ['prescribed', 'poisson'] - parser.add_argument('--arrival', default=choices[0], type=str, choices=choices, - help=f"Modify arrival distribution ({choices[1]}) " - f"or use the original submit times ({choices[0]})") - parser.add_argument('-v', '--verbose', action='store_true', help='Enable verbose output') - parser.add_argument('-o', '--output', type=str, default=None, help='Store output in --output file.') - parser.add_argument("--live", action="store_true", help="Grab data from live system.") - - -def run_telemetry(args): - args_dict = vars(args) +class TelemetryArgs(BaseModel): + jid: str = '*' + """ Replay job id """ + replay: list[ExpandedPath] | None = None + """ path/to/joblive path/to/jobprofile -or- filename.npz (overrides --workload option) """ + plot: list[Literal["jobs", "nodes"]] | None = None + """ Output plots """ + is_results_file: bool = False + gantt_nodes: bool = False + """ Print Gannt with nodes required as line thickness (default false) """ + time: str | None = None + """ Length of time to simulate, e.g., 123, 123s, 27m, 3h, 7d """ + system: str = 'frontier' + """ System config to use """ + arrival: Literal['prescribed', 'poisson'] = "prescribed" + """ Modify arrival distribution ({choices[1]}) or use the original submit times """ + verbose: bool = False + output: str | None = None + """ Store output in --output file. """ + live: bool = False + """ Grab data from live system. """ + + +shortcuts = { + "replay": "f", + "plot": "p", + "time": "t", + "verbose": "v", + "output": "o", +} + + +def run_telemetry_add_parser(subparsers: SubParsers): + parser = subparsers.add_parser("telemetry", description=""" + Telemetry data validator + """) + model_validate = pydantic_add_args(parser, TelemetryArgs, { + "cli_shortcuts": shortcuts, + }) + parser.set_defaults(impl=lambda args: run_telemetry(model_validate(args, {}))) + + +def run_telemetry(args: TelemetryArgs): + args_dict = args.model_dump() config = get_system_config(args.system).get_legacy() args_dict['config'] = config td = Telemetry(**args_dict) diff --git a/raps/utils.py b/raps/utils.py index f54cc71..fe7af8f 100644 --- a/raps/utils.py +++ b/raps/utils.py @@ -20,7 +20,7 @@ import uuid import json import argparse from pathlib import Path -from typing import Annotated as A, TypeVar, Callable +from typing import Annotated as A, TypeVar, Callable, TypeAlias from pydantic import BaseModel, TypeAdapter, AfterValidator from pydantic_settings import BaseSettings, SettingsConfigDict, CliApp, CliSettingsSource import yaml @@ -650,6 +650,8 @@ def pydantic_add_args( some hacks to apply the args manually. """ model_config_dict = SettingsConfigDict({ + "cli_implicit_flags": True, + "cli_kebab_case": True, **(model_config or {}), "cli_parse_args": False, # Don't automatically parse args }) @@ -676,6 +678,10 @@ def pydantic_add_args( return model_validate_args +SubParsers: TypeAlias = "argparse._SubParsersAction[argparse.ArgumentParser]" +""" Alias for the result of argparse parser.add_subparsers """ + + def yaml_dump(data): """ Dumps yaml with pretty formatting """ class IndentDumper(yaml.Dumper): diff --git a/raps/workload.py b/raps/workload.py index 563071d..256e2b2 100644 --- a/raps/workload.py +++ b/raps/workload.py @@ -38,7 +38,7 @@ import numpy as np import matplotlib.pyplot as plt from raps.telemetry import Telemetry from raps.job import job_dict, Job -from raps.utils import create_file_indexed +from raps.utils import create_file_indexed, SubParsers, pydantic_add_args from raps.sim_config import SimConfig @@ -800,6 +800,22 @@ def plot_job_hist(jobs, config=None, dist_split=None, gantt_nodes=False): plt.show() +def run_workload_add_parser(subparsers: SubParsers): + from raps.run_sim import shortcuts + # TODO: Separate the arguments for this command + parser = subparsers.add_parser("workload", description=""" + Saves workload as a snapshot. + """) + parser.add_argument("config_file", nargs="?", default=None, help=""" + YAML sim config file, can be used to configure an experiment instead of using CLI + flags. Pass "-" to read from stdin. + """) + model_validate = pydantic_add_args(parser, SimConfig, model_config={ + "cli_shortcuts": shortcuts, + }) + parser.set_defaults(impl=lambda args: run_workload(model_validate(args, {}))) + + def run_workload(sim_config: SimConfig): args = sim_config.get_legacy_args() args_dict = sim_config.get_legacy_args() diff --git a/tests/systems/test_main_network_withdata_run.py b/tests/systems/test_main_network_withdata_run.py index 62b679a..58d14f9 100644 --- a/tests/systems/test_main_network_withdata_run.py +++ b/tests/systems/test_main_network_withdata_run.py @@ -22,7 +22,7 @@ def test_main_network_withdata_run(system, system_config, system_files, sim_outp "python", "main.py", "run", "--time", "1m", "--system", system, - "-f", *system_files, + "-f", ','.join(system_files), "--net", "-o", sim_output ], capture_output=True, text=True, stdin=subprocess.DEVNULL) diff --git a/tests/systems/test_multi_part_sim_withdata_run.py b/tests/systems/test_multi_part_sim_withdata_run.py index caaf9e8..9694969 100644 --- a/tests/systems/test_multi_part_sim_withdata_run.py +++ b/tests/systems/test_multi_part_sim_withdata_run.py @@ -23,6 +23,6 @@ def test_multi_part_sim_withdata_run(system, system_config, system_files): "python", "main.py", "run-multi-part", "--time", "1h", "-x", f"{system}/*", - "-f", *system_files, + "-f", ','.join(system_files), ], capture_output=True, text=True, stdin=subprocess.DEVNULL) assert result.returncode == 0, f"Failed on {system}: {result.stderr}" diff --git a/tests/systems/test_telemetry_withdata_run.py b/tests/systems/test_telemetry_withdata_run.py index ab6f93c..2729c7c 100644 --- a/tests/systems/test_telemetry_withdata_run.py +++ b/tests/systems/test_telemetry_withdata_run.py @@ -20,7 +20,7 @@ def test_telemetry_main_withdata_run(system, system_config, system_files, sim_ou result = subprocess.run([ "python", "main.py", "telemetry", "--system", system, - "-f", *system_files, + "-f", ','.join(system_files), "-o", sim_output, ], capture_output=True, text=True, stdin=subprocess.DEVNULL) assert result.returncode == 0, f"Failed on {system}: {result.stderr}" -- GitLab From dad7a59da28bd0887664a9475f111ed6c1cfbdf9 Mon Sep 17 00:00:00 2001 From: "Maiterth, Matthias" Date: Tue, 2 Sep 2025 18:02:05 +0000 Subject: [PATCH 269/388] Fastsim parallel integration --- config/kestrel.yaml | 53 +++++++++++ raps/dataloaders/kestrel.py | 170 ++++++++++++++++++++++++++++++++++++ raps/engine.py | 28 ++++-- raps/resmgr/default.py | 3 +- raps/schedulers/fastsim.py | 163 ++++++++++++++++++++++++++++++++++ raps/sim_config.py | 2 +- raps/ui.py | 5 +- 7 files changed, 411 insertions(+), 13 deletions(-) create mode 100644 config/kestrel.yaml create mode 100644 raps/dataloaders/kestrel.py create mode 100644 raps/schedulers/fastsim.py diff --git a/config/kestrel.yaml b/config/kestrel.yaml new file mode 100644 index 0000000..15e3ece --- /dev/null +++ b/config/kestrel.yaml @@ -0,0 +1,53 @@ +system: + num_cdus: 6 + racks_per_cdu: 6 + nodes_per_rack: 80 + rectifiers_per_rack: 6 + chassis_per_rack: 1 + nodes_per_blade: 1 + switches_per_chassis: 5 + nics_per_node: 2 + rectifiers_per_chassis: 5 + nodes_per_rectifier: 4 + missing_racks: [] + down_nodes: [] + cpus_per_node: 1 + gpus_per_node: 4 + cpu_peak_flops: 396800000000.0 + gpu_peak_flops: 7800000000000.0 + cpu_fp_ratio: 0.69 + gpu_fp_ratio: 0.69 + +power: + power_gpu_idle: 75 + power_gpu_max: 300 + power_cpu_idle: 100 + power_cpu_max: 800 + power_mem: 74.26 + power_nic: 21 + power_nvme: 45 + power_switch: 250 + power_cdu: 0 + power_update_freq: 20 + rectifier_peak_threshold: 13670 + sivoc_loss_constant: 0 + sivoc_efficiency: 1 + rectifier_loss_constant: 0 + rectifier_efficiency: 1 + power_cost: 0.094 + +scheduler: + seed: 42 + job_arrival_time: 20 + mtbf: 11 + trace_quanta: 20 + min_wall_time: 3600 + max_wall_time: 43200 + ui_update_freq: 3600 + max_nodes_per_job: 3000 + job_end_probs: + COMPLETED: 0.63 + FAILED: 0.13 + CANCELLED: 0.12 + TIMEOUT: 0.11 + NODE_FAIL: 0.01 \ No newline at end of file diff --git a/raps/dataloaders/kestrel.py b/raps/dataloaders/kestrel.py new file mode 100644 index 0000000..8b8470a --- /dev/null +++ b/raps/dataloaders/kestrel.py @@ -0,0 +1,170 @@ +""" + Load data for NREL's Kestrel cluster. +""" +import uuid +import pandas as pd +from tqdm import tqdm + +from ..job import job_dict, Job +from ..utils import power_to_utilization, next_arrival + + +def load_data(jobs_path, **kwargs): + """ + Reads job and job profile data from parquet files and parses them. + + Parameters + ---------- + jobs_path : str + The path to the jobs parquet file. + + Returns + ------- + list + The list of parsed jobs. + """ + jobs_df = pd.read_parquet(jobs_path, engine='pyarrow') + return load_data_from_df(jobs_df, **kwargs) + + +def load_data_from_df(jobs_df: pd.DataFrame, **kwargs): + """ + Reads job and job profile data from parquet files and parses them. + + Requires the following fields in the DataFrame: + - start_time (timestamp): Time execution begins (actual or expected) + - job_id (int): Job ID + - node_power_consumption (List[int]): Power consumption of the job, recorded at Node level + - nodes_required (int): Number of nodes allocated to the job + - cpu_power_consumption (List[int]): Power consumption of the job, recorded at CPU level (don't have this) + - mem_power_consumption (List[int]): Power consumption of the job, recorded at Memory level (don't have this) + - priority (int): Relative priority of the job, 0=held, 1=required nodes DOWN/DRAINED + - job_state (string): State of the job, see enum job_states for possible values + - wall_time (int): Actual runtime of job, in seconds + - nodes (string): List of nodes allocated to job + + Returns + ------- + list + The list of parsed jobs. + """ + config = kwargs.get('config') + min_time = kwargs.get('min_time', None) + reschedule = kwargs.get('reschedule') + fastforward = kwargs.get('fastforward') + validate = kwargs.get('validate') + jid = kwargs.get('jid', '*') + + if fastforward: print(f"fast-forwarding {fastforward} seconds") + + # Sort jobs dataframe based on values in time_start column, adjust indices after sorting + jobs_df = jobs_df.sort_values(by='submit_time') + jobs_df = jobs_df[(jobs_df.start_time.between(pd.to_datetime('2024-09-01T00:00:00'), + pd.to_datetime('2024-09-16T00:00:00'), inclusive='right') | + jobs_df.end_time.between(pd.to_datetime('2024-09-01T00:00:00'), + pd.to_datetime('2024-09-16T00:00:00'), inclusive='right') + )].copy() + jobs_df = jobs_df.reset_index(drop=True) + + telemetry_start_timestamp = jobs_df['start_time'].min() + telemetry_end_timestamp = jobs_df['end_time'].max() + telemetry_start = 0 + telemetry_end = int((telemetry_end_timestamp - telemetry_start_timestamp).total_seconds()) + + # Take earliest time as baseline reference + # We can use the start time of the first job. + if min_time: + time_zero = min_time + else: + time_zero = jobs_df['submit_time'].min() + + num_jobs = len(jobs_df) + print("time_zero:", time_zero, "num_jobs", num_jobs) + + jobs = [] + + # Map dataframe to job state. Add results to jobs list + for jidx in tqdm(range(num_jobs - 1), total=num_jobs, desc="Processing Kestrel Jobs"): + + job_id = jobs_df.loc[jidx, 'job_id'] + account = jobs_df.loc[jidx, 'account'] + + if not jid == '*': + if int(jid) == int(job_id): + print(f'Extracting {job_id} profile') + else: + continue + nodes_required = jobs_df.loc[jidx, 'nodes_required'] + + name = str(uuid.uuid4())[:6] + + if validate: + cpu_power = jobs_df.loc[jidx, 'power_per_node'] + cpu_trace = cpu_power + + else: + cpu_power = jobs_df.loc[jidx, 'power_per_node'] + cpu_power_array = [600] if (pd.isna(cpu_power) or cpu_power == 0) else cpu_power.tolist() + cpu_min_power = nodes_required * config['POWER_CPU_IDLE'] * config['CPUS_PER_NODE'] + cpu_max_power = nodes_required * config['POWER_CPU_MAX'] * config['CPUS_PER_NODE'] + cpu_util = power_to_utilization(cpu_power_array, cpu_min_power, cpu_max_power) + cpu_trace = cpu_util * config['CPUS_PER_NODE'] + gpu_trace = 0 + + # Priority sorting doesn't seem to be implemented at the moment + priority = 0 + + wall_time = jobs_df.loc[jidx, 'wall_time'] + end_state = jobs_df.loc[jidx, 'job_state'] + time_submit = jobs_df.loc[jidx+1, 'submit_time'] + diff = time_submit - time_zero + + if jid == '*': + time_offset = max(diff.total_seconds(), 0) + else: + # When extracting out a single job, run one iteration past the end of the job + time_offset = config['UI_UPDATE_FREQ'] + + if fastforward: time_offset -= fastforward + + if reschedule: # Let the scheduler reschedule the jobs + scheduled_nodes = None + time_offset = next_arrival(1/config['JOB_ARRIVAL_TIME']) + else: # Prescribed replay + scheduled_nodes = None + time_offset = next_arrival(1/config['JOB_ARRIVAL_TIME']) + + trace_quanta = config['TRACE_QUANTA'] + + if cpu_trace.size > 0 and time_offset >= 0: + job_info = job_dict(nodes_required = nodes_required, + name = name, + account = account, + cpu_trace = cpu_trace, + gpu_trace = gpu_trace, + ntx_trace = [], + nrx_trace = [], + end_state = end_state, + scheduled_nodes = scheduled_nodes, + id = job_id, + priority = priority, + submit_time = time_offset, + time_limit = wall_time, + trace_quanta=trace_quanta) + jobs.append(Job(job_info)) + + return jobs, telemetry_start, telemetry_end + + +def node_index_to_name(index: int, config: dict): + """ Converts an index value back to an name string based on system configuration. """ + return f"node{index:04d}" + + +def cdu_index_to_name(index: int, config: dict): + return f"cdu{index:02d}" + + +def cdu_pos(index: int, config: dict) -> tuple[int, int]: + """ Return (row, col) tuple for a cdu index """ + return (0, index) # TODO \ No newline at end of file diff --git a/raps/engine.py b/raps/engine.py index f6da02c..64bf218 100644 --- a/raps/engine.py +++ b/raps/engine.py @@ -42,6 +42,7 @@ from raps.weather import Weather from raps.sim_config import SimConfig from raps.system_config import SystemConfig +from bisect import bisect_right @dataclasses.dataclass class TickData: @@ -413,7 +414,6 @@ class Engine: self.power_manager.set_idle(job.scheduled_nodes) job.current_state = JobState.COMPLETED job.end_time = self.current_timestep - self.running.remove(job) self.jobs_completed += 1 job_stats = job.statistics() @@ -424,7 +424,7 @@ class Engine: self.resource_manager.free_nodes_from_job(job) killed_jobs = [job for job in self.running if - job.start_time + job.time_limit <= self.current_timestep] + job.end_time is not None and job.start_time + job.time_limit <= self.current_timestep] for job in killed_jobs: self.power_manager.set_idle(job.scheduled_nodes) @@ -489,7 +489,8 @@ class Engine: actively_considered_jobs: List, all_jobs: List, replay: bool, - autoshutdown: bool): + autoshutdown: bool, + cursor: int): # 1 update running time of all running jobs # 2 update the current_timestep of the engine (this serves as reference for most computations) # 3 Check if simulation should shutdown @@ -504,7 +505,7 @@ class Engine: len(self.queue) == 0 and \ len(self.running) == 0 and \ not replay and \ - len(all_jobs) == 0 and \ + len(all_jobs) == cursor and \ len(actively_considered_jobs) == 0: if self.debug: print(f"Simulaiton completed early: {self.config['system_name']} - " @@ -555,10 +556,10 @@ class Engine: job.running_time = self.current_timestep - job.start_time if job.current_state != JobState.RUNNING: - raise ValueError(f"Job is in running list, but state is not RUNNING: job.state == {job.currentstate}") + raise ValueError(f"Job {job.id} is in running list, but state is not RUNNING: job.state == {job.current_state}") else: # if job.state == JobState.RUNNING: # Error checks - if job.running_time > job.time_limit: + if job.running_time > job.time_limit and job.end_time is not None: raise Exception(f"Job exceded time limit! " f"{job.running_time} > {job.time_limit}" f"\n{job}" @@ -754,6 +755,9 @@ class Engine: # Process jobs in batches for better performance of timestep loop all_jobs = jobs.copy() + submit_times = [j.submit_time for j in all_jobs] + cursor = 0 + jobs = [] # Batch Jobs into 6h windows based on submit_time or twice the time_delta if larger batch_window = max(60 * 60 * 6, 2 * time_delta) # at least 6h @@ -772,8 +776,13 @@ class Engine: if (self.current_timestep % batch_window == 0) or (self.current_timestep == timestep_start): # Add jobs that are within the batching window and remove them from all jobs - jobs += [job for job in all_jobs if job.submit_time <= self.current_timestep + batch_window] - all_jobs[:] = [job for job in all_jobs if job.submit_time > self.current_timestep + batch_window] + # jobs += [job for job in all_jobs if job.submit_time <= self.current_timestep + batch_window] + # all_jobs[:] = [job for job in all_jobs if job.submit_time > self.current_timestep + batch_window] + cutoff = self.current_timestep + batch_window + r = bisect_right(submit_times, cutoff, lo=cursor) + if r > cursor: + jobs.extend(all_jobs[cursor:r]) + cursor = r # 1. Prepare Timestep: completed_jobs, killed_jobs, newly_downed_nodes, need_reschedule = \ @@ -812,7 +821,8 @@ class Engine: simulation_done = self.complete_timestep(actively_considered_jobs=jobs, all_jobs=all_jobs, replay=replay, - autoshutdown=autoshutdown) + autoshutdown=autoshutdown, + cursor=cursor) if simulation_done: break yield tick_data diff --git a/raps/resmgr/default.py b/raps/resmgr/default.py index c7791f5..ab1e774 100644 --- a/raps/resmgr/default.py +++ b/raps/resmgr/default.py @@ -65,7 +65,8 @@ class ExclusiveNodeResourceManager: if n not in self.available_nodes: self.available_nodes.append(n) else: - raise KeyError(f"node was free but already in available nodes: {n.id}") + raise KeyError((f"Atempting to free node {n} after completion of job {job.id}. " + + "Node is already free (in available nodes)!")) self.available_nodes = sorted(self.available_nodes) def update_system_utilization(self, current_time, running_jobs): diff --git a/raps/schedulers/fastsim.py b/raps/schedulers/fastsim.py new file mode 100644 index 0000000..855dcbd --- /dev/null +++ b/raps/schedulers/fastsim.py @@ -0,0 +1,163 @@ +import pandas as pd +import sys +import os +import zmq + +from ..policy import PolicyType, BackfillType +from raps.telemetry import Telemetry +from ..job import JobState +from raps.sim_config import args +from raps.system_config import get_system_config + +# Run with this command: +# python main.py --system kestrel -f ../data/fastsim_jobs_output.parquet --scheduler fastsim --policy priority --start 2024-09-01T00:00 --end 2024-09-15T00:00 + +class Scheduler(): + """ + FastSim-backed scheduler (strict lockstep via ZeroMQ). + + Protocol (server side is FastSim --serve): + - INIT -> { init_time } + - GET { t } -> { t, running_ids } (server acks t after reply) + - END (on shutdown) -> { ok: true } + + Semantics at engine second t: + - R_t := authoritative running IDs from FastSim for t + - started = R_t - prev_R + -> stamp start_time=t (once), assign nodes once, mark RUNNING + - finished = prev_R - R_t + -> stamp end_time=t (engine will finalize next tick in prepare_timestep) + + running list for this tick = R_t & finished (so those finishing at t remain + visible for one more scheduler call; engine completes them on next second). + """ + + def __init__(self, config, resource_manager, **kwargs): + self.config = config + self.policy = PolicyType(kwargs.get('policy')) + self.bfpolicy = BackfillType(kwargs.get('backfill')) + self.debug = bool(kwargs.get('debug', False)) + + # ---- ZeroMQ client ---- + self.endpoint = kwargs.get('plugin_endpoint', 'ipc:///tmp/fastsim.sock') + self._ctx = zmq.Context.instance() + self._sock = self._ctx.socket(zmq.REQ) + self._sock.setsockopt(zmq.LINGER, 0) + self._sock.connect(self.endpoint) + + # INIT handshake: fetch FastSim's init_time (ISO string). + self.init_time_iso = self._rpc('INIT').get('init_time') + + self.resource_manager = resource_manager + + # Job metadata: id -> Job + self.jobids_to_jobs = {} + self.allocated_jobs = set() # job_ids we have assigned nodes for + self.prev_running_ids = set() # R_{t-1} + + # Build the Job objects from RAPS Telemetry (needed so ExaDigiT subsystems have objects) + args_dict = vars(args) + config = get_system_config(args.system).get_legacy() + args_dict['config'] = config + td = Telemetry(**args_dict) + + print("...Now loading jobs to FastSim scheduler.") + jobs, _, _ = td.load_data(args.replay) + for job in jobs: + self.jobids_to_jobs[job.id] = job + + if self.debug: + print(f"[RAPS-FastSim] Connected to {self.endpoint}; init_time={self.init_time_iso}", file=sys.stderr) + + def _rpc(self, op, **payload): + """Send a JSON request and return the JSON reply (dict).""" + try: + msg = {'op': op} + msg.update(payload) + self._sock.send_json(msg) + rep = self._sock.recv_json() + except Exception as e: + raise RuntimeError(f"[RAPS-FastSim] RPC {op} failed: {e}") from e + if isinstance(rep, dict) and 'error' in rep: + raise RuntimeError(f"[RAPS-FastSim] RPC {op} error: {rep['error']}") + return rep + + def _fastsim_running_ids(self, t: int): + """Blocking call: get authoritative running job IDs for second t.""" + rep = self._rpc('GET', t=int(t)) + rids = rep.get('running_ids', []) + return set(rids) + + def schedule(self, queue=None, running=None, current_time=None, accounts=None, sorted=False): + """ + Called by Engine when RAPS detects an event. + """ + running = running if running is not None else [] + + t = int(current_time) + + # Get authoritative running set for second t (blocks until available) + R_t = self._fastsim_running_ids(t) + + # Diff vs previous second + started_ids = R_t - self.prev_running_ids + finished_ids = self.prev_running_ids - R_t # these end at t; engine finalizes next tick + + # Handle starts: stamp start_time, assign nodes, mark RUNNING + for jid in started_ids: + job = self.jobids_to_jobs.get(jid) + if job is None: + if self.debug: + print(f"[RAPS-FastSim][WARN] Unknown job id from FastSim: {jid}", file=sys.stderr) + continue + + # Assign nodes exactly once + if jid not in self.allocated_jobs: + self.resource_manager.assign_nodes_to_job(job, t, self.policy) + self.allocated_jobs.add(jid) + + # FastSim is authoritative + job.start_time = t + # IMPORTANT: prevent premature completion by RM’s default behavior + job.end_time = None # Prevents RAPS from removing job + job.state = JobState.RUNNING + + # Handle finishes: stamp end_time=t (engine.prepare_timestep next tick completes) + running.clear() + for jid in finished_ids: + job = self.jobids_to_jobs.get(jid) + if job is not None: + # overwrite any prior value; FastSim is the source of truth + # job.end_time = t + if job.start_time is not None: + observed = t - job.start_time + if (job.time_limit is None) or (job.time_limit < observed): + # This is necessary since RAPS is handling finishing jobs, but schedule is not always + # called at every tick, even though the job may have finished in FastSim during that tick. + # TODO: Deal with this, because it messes up the end time of some jobs. + # print(f"Extending {job.id} runtime {job.time_limit} to match observed {observed} at finish.") + job.time_limit = observed + # print((f"Job {job.id} is finished, start time: {job.start_time}, wall time: {job.time_limit}," + # f"end time: {job.end_time}, at time {t}. With nodes {job.scheduled_nodes}.")) + job.end_time = t + job.time_limit = t - job.start_time + running.append(job) + + # Running list reflects exactly FastSim’s R_t + for jid in R_t: + job = self.jobids_to_jobs.get(jid) + if job is not None: + # defensively ensure state isn’t stuck at COMPLETED + if job.state != JobState.RUNNING: + job.state = JobState.RUNNING + running.append(job) + + # Update prev + self.prev_running_ids = R_t + + def end_sim(self): + # Ask server to stop + try: + self._rpc('END') + except Exception: + pass \ No newline at end of file diff --git a/raps/sim_config.py b/raps/sim_config.py index 036ae8b..26a328a 100644 --- a/raps/sim_config.py +++ b/raps/sim_config.py @@ -158,7 +158,7 @@ class SimConfig(BaseModel): # Synthetic workloads scheduler: Literal[ - "default", "scheduleflow", "nrel", "anl", "flux", "experimental", "multitenant", + "default", "scheduleflow", "fastsim", "anl", "flux", "experimental", "multitenant", ] = "default" """ Scheduler name """ policy: PolicyType | None = None diff --git a/raps/ui.py b/raps/ui.py index 5be3523..b4234bc 100644 --- a/raps/ui.py +++ b/raps/ui.py @@ -24,6 +24,7 @@ from raps.utils import summarize_ranges, convert_seconds_to_hhmmss, convert_seco from raps.constants import ELLIPSES from raps.engine import TickData, Engine +MAX_ROWS = 30 class LayoutManager: def __init__(self, layout_type, engine: Engine, total_timesteps=0, debug=None, args_dict=None, **config): @@ -153,7 +154,7 @@ class LayoutManager: table.add_column(col, justify="center") # Add data rows - for job in jobs: + for job in jobs[:MAX_ROWS]: # Number of requested nodes as a string # n_nodes = str(job.nodes_required) # Unused @@ -271,7 +272,7 @@ class LayoutManager: else: # For the curious: If the simulation time in seconds is large than # unix timestamp for Jan 2000 this is a unix timestamp, - time_str = f"{datetime.fromtimestamp(time_in_s).strftime("%Y-%m-%d %H:%M")}" + time_str = f"{datetime.fromtimestamp(time_in_s).strftime('%Y-%m-%d %H:%M')}" if timestep_start != 0: # append time simulated time_str += f"\nSim: {convert_seconds_to_hhmm(time_in_s - timestep_start)}" -- GitLab From 3df68802ab8ef27357a63eeec0a4e4de876fd1d9 Mon Sep 17 00:00:00 2001 From: Wes Brewer Date: Tue, 2 Sep 2025 15:24:03 -0400 Subject: [PATCH 270/388] Get multitenant workload working again (for mit_supercloud) --- raps/workload.py | 149 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 149 insertions(+) diff --git a/raps/workload.py b/raps/workload.py index 563071d..00a572e 100644 --- a/raps/workload.py +++ b/raps/workload.py @@ -643,6 +643,155 @@ class Workload: return jobs + def multitenant(self, **kwargs): + """ + Generate deterministic jobs to validate multitenant scheduling & power. + + usage example: + + python main.py run-multi-part -x mit_supercloud -w multitenant + + Parameters + ---------- + mode : str + One of: + - 'ONE_JOB_PER_NODE_ALL_CORES' + - 'TWO_JOBS_PER_NODE_SPLIT' + - 'STAGGERED_JOBS_PER_NODE' + wall_time : int + Duration (seconds) of each job (default: 3600) + trace_quanta : int + Sampling interval for traces; defaults to config['TRACE_QUANTA'] + + Returns + ------- + list[dict] + List of job_dict entries. + """ + mode = kwargs.get('mode', 'TWO_JOBS_PER_NODE_SPLIT') + wall_time = kwargs.get('wall_time', 3600) + + jobs = [] + + for partition in self.partitions: + cfg = self.config_map[partition] + trace_quanta = kwargs.get('trace_quanta', cfg['TRACE_QUANTA']) + + cores_per_cpu = cfg.get('CORES_PER_CPU', 1) + cpus_per_node = cfg.get('CPUS_PER_NODE', 1) + cores_per_node = cores_per_cpu * cpus_per_node + gpus_per_node = cfg.get('GPUS_PER_NODE', 0) + + n_nodes = cfg['AVAILABLE_NODES'] + + def make_trace(cpu_util, gpu_util): + return self.compute_traces(cpu_util, gpu_util, wall_time, trace_quanta) + + job_id_ctr = 0 + + if mode == 'ONE_JOB_PER_NODE_ALL_CORES': + # Each node runs one job that consumes all cores/GPUs + for nid in range(n_nodes): + cpu_trace, gpu_trace = make_trace(cores_per_node, gpus_per_node) + jobs.append(Job(job_dict( + nodes_required=1, + cpu_cores_required=cores_per_node, + gpu_units_required=gpus_per_node, + name=f"MT_full_node_{partition}_{nid}", + account=random.choice(ACCT_NAMES), + cpu_trace=cpu_trace, + gpu_trace=gpu_trace, + ntx_trace=[], nrx_trace=[], + end_state='COMPLETED', + id=job_id_ctr, + priority=random.randint(0, MAX_PRIORITY), + partition=partition, + submit_time=0, + time_limit=wall_time, + start_time=0, + end_time=wall_time, + expected_run_time=wall_time, + trace_time=wall_time, + trace_start_time=0, + trace_end_time=wall_time, + trace_quanta=cfg['TRACE_QUANTA'] + ))) + job_id_ctr += 1 + + elif mode == 'TWO_JOBS_PER_NODE_SPLIT': + # Two jobs per node: split CPU/GPU roughly in half + for nid in range(n_nodes): + cpu_a = cores_per_node // 2 + cpu_b = cores_per_node - cpu_a + gpu_a = gpus_per_node // 2 + gpu_b = gpus_per_node - gpu_a + + for idx, (c_req, g_req, tag) in enumerate([(cpu_a, gpu_a, 'A'), + (cpu_b, gpu_b, 'B')]): + cpu_trace, gpu_trace = make_trace(c_req, g_req) + jobs.append(Job(job_dict( + nodes_required=1, # still one node; multitenant RM packs cores + cpu_cores_required=c_req, + gpu_units_required=g_req, + name=f"MT_split_node_{partition}_{nid}_{tag}", + account=random.choice(ACCT_NAMES), + cpu_trace=cpu_trace, + gpu_trace=gpu_trace, + ntx_trace=[], nrx_trace=[], + end_state='COMPLETED', + id=job_id_ctr, + priority=random.randint(0, MAX_PRIORITY), + partition=partition, + submit_time=0, + time_limit=wall_time, + start_time=0, + end_time=wall_time, + expected_run_time=wall_time, + trace_time=wall_time, + trace_start_time=0, + trace_end_time=wall_time, + trace_quanta=cfg['TRACE_QUANTA'] + ))) + job_id_ctr += 1 + + elif mode == 'STAGGERED_JOBS_PER_NODE': + # Three jobs per node, staggered starts: 0, wall_time/3, 2*wall_time/3 + offsets = [0, wall_time // 3, 2 * wall_time // 3] + cpu_each = cores_per_node // 3 or 1 + gpu_each = max(1, gpus_per_node // 3) if gpus_per_node else 0 + + for nid in range(n_nodes): + for k, offset in enumerate(offsets): + cpu_trace, gpu_trace = make_trace(cpu_each, gpu_each) + jobs.append(Job(job_dict( + nodes_required=1, + cpu_cores_required=cpu_each, + gpu_units_required=gpu_each, + name=f"MT_stagger_node_{partition}_{nid}_{k}", + account=random.choice(ACCT_NAMES), + cpu_trace=cpu_trace, + gpu_trace=gpu_trace, + ntx_trace=[], nrx_trace=[], + end_state='COMPLETED', + id=job_id_ctr, + priority=random.randint(0, MAX_PRIORITY), + partition=partition, + submit_time=offset, + time_limit=wall_time, + start_time=offset, + end_time=offset + wall_time, + expected_run_time=wall_time, + trace_time=wall_time, + trace_start_time=0, + trace_end_time=wall_time, + trace_quanta=cfg['TRACE_QUANTA'] + ))) + job_id_ctr += 1 + else: + raise ValueError(f"Unknown multitenant mode: {mode}") + + return jobs + def plot_job_hist(jobs, config=None, dist_split=None, gantt_nodes=False): # put args.multimodal in dist_split! -- GitLab From ac3588d93fbde2144dc2f2aa6a256fe5420dbc78 Mon Sep 17 00:00:00 2001 From: Wes Brewer Date: Tue, 2 Sep 2025 15:28:03 -0400 Subject: [PATCH 271/388] Delete old multitenant function in workload.py --- raps/workload.py | 145 ----------------------------------------------- 1 file changed, 145 deletions(-) diff --git a/raps/workload.py b/raps/workload.py index 00a572e..d2a98e7 100644 --- a/raps/workload.py +++ b/raps/workload.py @@ -973,151 +973,6 @@ def run_workload(sim_config: SimConfig): np.savez_compressed(filename, jobs=jobs, timestep_start=timestep_start, timestep_end=timestep_end, args=args) print(filename + ".npz") # To std-out to show which npz was created. - def multitenant(self, **kwargs): - """ - Generate deterministic jobs to validate multitenant scheduling & power. - - Parameters - ---------- - mode : str - One of: - - 'ONE_JOB_PER_NODE_ALL_CORES' - - 'TWO_JOBS_PER_NODE_SPLIT' - - 'STAGGERED_JOBS_PER_NODE' - wall_time : int - Duration (seconds) of each job (default: 3600) - trace_quanta : int - Sampling interval for traces; defaults to config['TRACE_QUANTA'] - - Returns - ------- - list[dict] - List of job_dict entries. - """ - mode = kwargs.get('mode', 'TWO_JOBS_PER_NODE_SPLIT') - wall_time = kwargs.get('wall_time', 3600) - - jobs = [] - - for partition in self.partitions: - cfg = self.config_map[partition] - trace_quanta = kwargs.get('trace_quanta', cfg['TRACE_QUANTA']) - - cores_per_cpu = cfg.get('CORES_PER_CPU', 1) - cpus_per_node = cfg.get('CPUS_PER_NODE', 1) - cores_per_node = cores_per_cpu * cpus_per_node - gpus_per_node = cfg.get('GPUS_PER_NODE', 0) - - n_nodes = cfg['AVAILABLE_NODES'] - - def make_trace(cpu_util, gpu_util): - return self.compute_traces(cpu_util, gpu_util, wall_time, trace_quanta) - - job_id_ctr = 0 - - if mode == 'ONE_JOB_PER_NODE_ALL_CORES': - # Each node runs one job that consumes all cores/GPUs - for nid in range(n_nodes): - cpu_trace, gpu_trace = make_trace(cores_per_node, gpus_per_node) - jobs.append(job_dict( - nodes_required=1, - cpu_cores_required=cores_per_node, - gpu_units_required=gpus_per_node, - name=f"MT_full_node_{partition}_{nid}", - account=random.choice(ACCT_NAMES), - cpu_trace=cpu_trace, - gpu_trace=gpu_trace, - ntx_trace=[], nrx_trace=[], - end_state='COMPLETED', - id=job_id_ctr, - priority=random.randint(0, MAX_PRIORITY), - partition=partition, - submit_time=0, - time_limit=wall_time, - start_time=0, - end_time=wall_time, - expected_run_time=wall_time, - trace_time=wall_time, - trace_start_time=0, - trace_end_time=wall_time, - trace_quanta=config['TRACE_QUANTA'] - )) - job_id_ctr += 1 - - elif mode == 'TWO_JOBS_PER_NODE_SPLIT': - # Two jobs per node: split CPU/GPU roughly in half - for nid in range(n_nodes): - cpu_a = cores_per_node // 2 - cpu_b = cores_per_node - cpu_a - gpu_a = gpus_per_node // 2 - gpu_b = gpus_per_node - gpu_a - - for idx, (c_req, g_req, tag) in enumerate([(cpu_a, gpu_a, 'A'), - (cpu_b, gpu_b, 'B')]): - cpu_trace, gpu_trace = make_trace(c_req, g_req) - jobs.append(job_dict( - nodes_required=1, # still one node; multitenant RM packs cores - cpu_cores_required=c_req, - gpu_units_required=g_req, - name=f"MT_split_node_{partition}_{nid}_{tag}", - account=random.choice(ACCT_NAMES), - cpu_trace=cpu_trace, - gpu_trace=gpu_trace, - ntx_trace=[], nrx_trace=[], - end_state='COMPLETED', - id=job_id_ctr, - priority=random.randint(0, MAX_PRIORITY), - partition=partition, - submit_time=0, - time_limit=wall_time, - start_time=0, - end_time=wall_time, - expected_run_time=wall_time, - trace_time=wall_time, - trace_start_time=0, - trace_end_time=wall_time, - trace_quanta=config['TRACE_QUANTA'] - )) - job_id_ctr += 1 - - elif mode == 'STAGGERED_JOBS_PER_NODE': - # Three jobs per node, staggered starts: 0, wall_time/3, 2*wall_time/3 - offsets = [0, wall_time // 3, 2 * wall_time // 3] - cpu_each = cores_per_node // 3 or 1 - gpu_each = max(1, gpus_per_node // 3) if gpus_per_node else 0 - - for nid in range(n_nodes): - for k, offset in enumerate(offsets): - cpu_trace, gpu_trace = make_trace(cpu_each, gpu_each) - jobs.append(job_dict( - nodes_required=1, - cpu_cores_required=cpu_each, - gpu_units_required=gpu_each, - name=f"MT_stagger_node_{partition}_{nid}_{k}", - account=random.choice(ACCT_NAMES), - cpu_trace=cpu_trace, - gpu_trace=gpu_trace, - ntx_trace=[], nrx_trace=[], - end_state='COMPLETED', - id=job_id_ctr, - priority=random.randint(0, MAX_PRIORITY), - partition=partition, - submit_time=offset, - time_limit=wall_time, - start_time=offset, - end_time=offset + wall_time, - expected_run_time=wall_time, - trace_time=wall_time, - trace_start_time=0, - trace_end_time=wall_time, - trace_quanta=config['TRACE_QUANTA'] - )) - job_id_ctr += 1 - else: - raise ValueError(f"Unknown multitenant mode: {mode}") - - return jobs - def continuous_job_generation(*, engine, timestep, jobs): # print("if len(engine.queue) <= engine.continuous_workload.args.maxqueue:") -- GitLab From 88bd91650233a0f623a24687fd83be863385b42a Mon Sep 17 00:00:00 2001 From: Jesse Hines Date: Thu, 28 Aug 2025 15:21:44 -0400 Subject: [PATCH 272/388] Add missing tests --- tests/systems/conftest.py | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/tests/systems/conftest.py b/tests/systems/conftest.py index 269d101..2703755 100644 --- a/tests/systems/conftest.py +++ b/tests/systems/conftest.py @@ -63,6 +63,18 @@ def system_config(system): "time_delta": True, "net": False, }, + "bluewaters": { + "main": True, + "telemetry": True, + "multi-part-sim": False, + "withdata": True, + "cooling": False, + "uncertainty": False, + "time": True, + "fastforward": True, + "time_delta": True, + "net": False, + }, "frontier": { "main": True, "telemetry": True, @@ -89,10 +101,10 @@ def system_config(system): "net": False, }, "gcloudv2": { - "main": False, - "telemetry": False, + "main": True, + "telemetry": True, "multi-part-sim": False, - "withdata": False, + "withdata": True, "cooling": False, "uncertainty": False, "time": True, @@ -185,6 +197,7 @@ def system_files(system): files = { "40frontiers": [], "adastraMI250": ["adastraMI250/AdastaJobsMI250_15days.parquet"], + "bluewaters": ["bluewaters"], "frontier": ["frontier/slurm/joblive/date=2024-01-18/", "frontier/jobprofile/date=2024-01-18/"], "fugaku": ["fugaku/21_04.parquet"], "gcloudv2": ["gcloud/v2/google_cluster_data_2011_sample"], -- GitLab From 47f27d768fb3b2792b50db2a2744ab27d5fffae6 Mon Sep 17 00:00:00 2001 From: Jesse Hines Date: Wed, 3 Sep 2025 12:04:29 -0400 Subject: [PATCH 273/388] Return start_date in data loaders --- raps/dataloaders/adastraMI250.py | 8 +- raps/dataloaders/bluewaters.py | 14 +++- raps/dataloaders/frontier.py | 17 +++- raps/dataloaders/fugaku.py | 7 +- raps/dataloaders/gcloudv2.py | 18 +++-- raps/dataloaders/kestrel.py | 8 +- raps/dataloaders/lassen.py | 8 +- raps/dataloaders/marconi100.py | 8 +- raps/dataloaders/mit_supercloud/loader.py | 22 ++--- raps/utils.py | 97 ++++++++++++++++++++++- 10 files changed, 169 insertions(+), 38 deletions(-) diff --git a/raps/dataloaders/adastraMI250.py b/raps/dataloaders/adastraMI250.py index 90201c8..9cb53d5 100644 --- a/raps/dataloaders/adastraMI250.py +++ b/raps/dataloaders/adastraMI250.py @@ -24,7 +24,7 @@ import pandas as pd from tqdm import tqdm from ..job import job_dict, Job -from ..utils import next_arrival_byconfkwargs +from ..utils import next_arrival_byconfkwargs, DataLoaderResult def load_data(jobs_path, **kwargs): @@ -205,7 +205,11 @@ def load_data_from_df(jobs_df: pd.DataFrame, **kwargs): count_jobs_notOK += 1 print("jobs not added: ", count_jobs_notOK) - return jobs, telemetry_start_time, telemetry_end_time + return DataLoaderResult( + jobs = jobs, + telemetry_start=telemetry_start_time, telemetry_end=telemetry_end_time, + start_date=telemetry_start_timestamp, + ) def xname_to_index(xname: str, config: dict): diff --git a/raps/dataloaders/bluewaters.py b/raps/dataloaders/bluewaters.py index 46fa462..b7f1c10 100644 --- a/raps/dataloaders/bluewaters.py +++ b/raps/dataloaders/bluewaters.py @@ -32,7 +32,9 @@ import math import re import pandas as pd from pathlib import Path +from datetime import datetime, timezone from raps.telemetry import Job, job_dict +from raps.utils import DataLoaderResult def throughput_traces(total_tx, total_rx, intervals): @@ -325,7 +327,11 @@ def load_data(local_dataset_path, **kwargs): j.trace_end_time -= t0 # pprint(jobs) - simulation_start = 0 - simulation_end = max((j.end_time for j in jobs), default=0) - - return jobs, simulation_start, simulation_end + telemetry_start = 0 + telemetry_end = max((j.end_time for j in jobs), default=0) + + return DataLoaderResult( + jobs=jobs, + telemetry_start=telemetry_start, telemetry_end=telemetry_end, + start_date=datetime.fromtimestamp(t0, timezone.utc), + ) diff --git a/raps/dataloaders/frontier.py b/raps/dataloaders/frontier.py index 8491617..15621d1 100644 --- a/raps/dataloaders/frontier.py +++ b/raps/dataloaders/frontier.py @@ -10,12 +10,13 @@ python -m raps.telemetry -f $DPATH/slurm/joblive/$DATEDIR,$DPATH/jobprofile/$DATEDIR """ import time +from datetime import datetime, timezone import numpy as np import pandas as pd from tqdm import tqdm from ..job import job_dict, Job -from ..utils import power_to_utilization, next_arrival_byconfkwargs, encrypt +from ..utils import power_to_utilization, next_arrival_byconfkwargs, encrypt, DataLoaderResult def aging_boost(nnodes): @@ -325,7 +326,12 @@ def load_data_from_df(jobs_df: pd.DataFrame, jobprofile_df: pd.DataFrame, **kwar job = Job(job_info) jobs.append(job) - return jobs, telemetry_start, telemetry_end + return DataLoaderResult( + jobs = jobs, + telemetry_start = telemetry_start, + telemetry_end = telemetry_end, + start_date = telemetry_start_timestamp, + ) def load_live_data(**kwargs): @@ -537,7 +543,12 @@ def load_live_data(**kwargs): job = Job(job_info) jobs.append(job) - return jobs, telemetry_start, telemetry_end + return DataLoaderResult( + jobs = jobs, + telemetry_start = telemetry_start, + telemetry_end = telemetry_end, + start_date = datetime.fromtimestamp(telemetry_start, timezone.utc), + ) def xname_to_index(xname: str, config: dict): diff --git a/raps/dataloaders/fugaku.py b/raps/dataloaders/fugaku.py index 734fa61..0dd2c3b 100644 --- a/raps/dataloaders/fugaku.py +++ b/raps/dataloaders/fugaku.py @@ -17,6 +17,7 @@ import pandas as pd from tqdm import tqdm from ..job import job_dict, Job +from ..utils import DataLoaderResult def load_data(path, **kwargs): @@ -167,7 +168,11 @@ def load_data_from_df(df, **kwargs): job = Job(job_info) job_list.append(job) - return job_list, telemetry_start, telemetry_end + return DataLoaderResult( + jobs=job_list, + telemetry_start = telemetry_start, telemetry_end = telemetry_end, + start_date = telemetry_start_timestamp, + ) def node_index_to_name(index: int, config: dict): diff --git a/raps/dataloaders/gcloudv2.py b/raps/dataloaders/gcloudv2.py index e19b0e8..73f918b 100644 --- a/raps/dataloaders/gcloudv2.py +++ b/raps/dataloaders/gcloudv2.py @@ -1,13 +1,14 @@ import os import re +from datetime import datetime from tqdm import tqdm from typing import List, Optional, Generator, Tuple, Any, Union import numpy as np import pandas as pd -from raps.job import job_dict -from raps.job import Job +from raps.job import job_dict, Job +from raps.utils import DataLoaderResult """ Official instructions are here: @@ -200,7 +201,7 @@ class GoogleClusterV2DataLoader: yield pd.concat(dfs, ignore_index=True) -def load_data(data_path: Union[str, List[str]], **kwargs: Any) -> Tuple[List[Any], float, float]: +def load_data(data_path: Union[str, List[str]], **kwargs: Any): config = kwargs.get('config') # Unpack list if isinstance(data_path, list): @@ -331,6 +332,11 @@ def load_data(data_path: Union[str, List[str]], **kwargs: Any) -> Tuple[List[Any jobs.append(Job(job_d)) # Compute simulation span: start at t=0, end at the latest job finish - simulation_start = 0 - simulation_end = int(max(usage_map_end.values()) - t0) - return jobs, simulation_start, simulation_end + telemetry_start = 0 + telemetry_end = int(max(usage_map_end.values()) - t0) + return DataLoaderResult( + jobs = jobs, + telemetry_start=telemetry_start, telemetry_end=telemetry_end, + # gcloud dataset timestamps are already relative, and it doesn't list a start exact date. + start_date=datetime.fromisoformat("2011-05-02T00:00:00Z"), + ) diff --git a/raps/dataloaders/kestrel.py b/raps/dataloaders/kestrel.py index 8b8470a..c82b957 100644 --- a/raps/dataloaders/kestrel.py +++ b/raps/dataloaders/kestrel.py @@ -6,7 +6,7 @@ import pandas as pd from tqdm import tqdm from ..job import job_dict, Job -from ..utils import power_to_utilization, next_arrival +from ..utils import power_to_utilization, next_arrival, DataLoaderResult def load_data(jobs_path, **kwargs): @@ -153,7 +153,11 @@ def load_data_from_df(jobs_df: pd.DataFrame, **kwargs): trace_quanta=trace_quanta) jobs.append(Job(job_info)) - return jobs, telemetry_start, telemetry_end + return DataLoaderResult( + jobs=jobs, + telemetry_start=telemetry_start, telemetry_end=telemetry_end, + start_date=telemetry_start_timestamp, + ) def node_index_to_name(index: int, config: dict): diff --git a/raps/dataloaders/lassen.py b/raps/dataloaders/lassen.py index c9aae0d..51292fc 100644 --- a/raps/dataloaders/lassen.py +++ b/raps/dataloaders/lassen.py @@ -38,7 +38,7 @@ from tqdm import tqdm from datetime import timedelta from ..job import job_dict, Job -from ..utils import power_to_utilization, next_arrival_byconfkwargs, parse_td +from ..utils import power_to_utilization, next_arrival_byconfkwargs, parse_td, DataLoaderResult def load_data(path, **kwargs): @@ -249,7 +249,11 @@ def load_data_from_df(allocation_df, node_df, step_df, **kwargs): job = Job(job_info) job_list.append(job) - return job_list, telemetry_start_time, telemetry_end_time + return DataLoaderResult( + jobs=job_list, + telemetry_start=telemetry_start_time, telemetry_end=telemetry_end_time, + start_date=telemetry_start_timestamp, + ) def get_scheduled_nodes(allocation_id, node_df): diff --git a/raps/dataloaders/marconi100.py b/raps/dataloaders/marconi100.py index fef8ec0..9a236f2 100644 --- a/raps/dataloaders/marconi100.py +++ b/raps/dataloaders/marconi100.py @@ -28,7 +28,7 @@ import pandas as pd from tqdm import tqdm from ..job import job_dict, Job -from ..utils import power_to_utilization, next_arrival_byconfkwargs +from ..utils import power_to_utilization, next_arrival_byconfkwargs, DataLoaderResult def load_data(jobs_path, **kwargs): @@ -233,7 +233,11 @@ def load_data_from_df(jobs_df: pd.DataFrame, **kwargs): job = Job(job_info) jobs.append(job) - return jobs, telemetry_start, telemetry_end + return DataLoaderResult( + jobs = jobs, + telemetry_start=telemetry_start, telemetry_end=telemetry_end, + start_date=telemetry_start_timestamp, + ) def node_index_to_name(index: int, config: dict): diff --git a/raps/dataloaders/mit_supercloud/loader.py b/raps/dataloaders/mit_supercloud/loader.py index 6057210..7c29f31 100644 --- a/raps/dataloaders/mit_supercloud/loader.py +++ b/raps/dataloaders/mit_supercloud/loader.py @@ -116,9 +116,9 @@ import re from tqdm import tqdm from typing import Dict, Union, Optional from collections import Counter - +from datetime import datetime, timezone from raps.job import job_dict, Job -from raps.utils import summarize_ranges +from raps.utils import summarize_ranges, DataLoaderResult from .utils import proc_cpu_series, proc_gpu_series, to_epoch from .utils import DEFAULT_START, DEFAULT_END @@ -602,20 +602,12 @@ def load_data(local_dataset_path, **kwargs): job = Job(current_job_dict) jobs_list.append(job) - # Calculate min_overall_utime and max_overall_utime - telemetry_start = int(sl.time_start.min()) - telemetry_end = int(sl.time_end.max()) - # min_overall_utime = int(sl.time_submit.min()) - # max_overall_utime = int(sl.time_submit.max()) - - # args_namespace = SimpleNamespace( - # fastforward=min_overall_utime, - # system='mit_supercloud', - # time=max_overall_utime - # ) - print("\nSkipped jobs summary:") for reason, count in skip_counts.items(): print(f"- {reason}: {count}") - return jobs_list, telemetry_start, telemetry_end # min_overall_utime, max_overall_utime, args_namespace + return DataLoaderResult( + jobs = jobs_list, + telemetry_start=0, telemetry_end=int(end_ts - start_ts), + start_date=datetime.fromtimestamp(start_ts, timezone.utc), + ) diff --git a/raps/utils.py b/raps/utils.py index fe7af8f..e0c9acf 100644 --- a/raps/utils.py +++ b/raps/utils.py @@ -21,7 +21,7 @@ import json import argparse from pathlib import Path from typing import Annotated as A, TypeVar, Callable, TypeAlias -from pydantic import BaseModel, TypeAdapter, AfterValidator +from pydantic import BaseModel, TypeAdapter, AfterValidator, ConfigDict, AwareDatetime from pydantic_settings import BaseSettings, SettingsConfigDict, CliApp, CliSettingsSource import yaml from raps.job import Job @@ -633,6 +633,9 @@ ExpandedPath = A[Path, AfterValidator(lambda v: Path(v).expanduser().resolve())] """ Type that that expands ~ and environment variables in a path string """ +SmartTimedelta = A[timedelta, AfterValidator(parse_td)] +""" Can be passed as ISO 8601 format like PT5M, or a string like 9s, or a number of seconds """ + T = TypeVar("T", bound=BaseModel) @@ -706,3 +709,95 @@ def yaml_dump(data): indent=2, allow_unicode=True, ) + + +class DataLoaderResult(BaseModel): + """ + Result of a dataloader load_data() function. + + jobs: + The list of parsed jobs. + + telemetry_start + the first timestep in which the simulation be executed. + + telemetry_end + the last timestep in which the simulation can be executed. + + start_date + The actual date that telemetry_start represents. + ---- + Explanation regarding times: + + The loaded dataframe contains + a first timestamp with associated data + and a last timestamp with associated data + + These form the maximum extent of the simuluation time. + telemetry_start and telemetry_end. + + [ ] + ^ ^ + telemetry_start telemetry_end + + These values form the maximum extent of the simulation. + telemetry_start is typically 0, but any int can be used as long as all the times in the + jobs are relative to the telemetry_start. + + Next is the actual extent of the simulation: + + [ ] + ^ ^ + simulation_start simulation_end + + The simulation will start at telemetry_start by default, but the user can specify an explicit + simulation start time. + + Additionally, jobs can have started before telemetry_start, + And can have a recorded ending after simulation_end, + [ ] + ^ ^ + first_start_timestamp last_end_timestamp + + This means that the time between first_start_timestamp and telemetry_start + has no associated values in the traces! + The missing values after simulation_end can be ignored, as the simulatuion + will have stoped before. + + However, the times before telemetry_start have to be padded to generate + correct offsets within their data! + Within the simulation a job's current time is specified as the difference + between its start_time and the current timestep of the simulation. + + With this each job's + - submit_time + - time_limit + - start_time # Maybe Null + - end_time # Maybe Null + - expected_run_time (end_time - start_time) # Maybe Null + - current_run_time (How long did the job run already, when loading) # Maybe zero + - trace_time (lenght of each trace in seconds) # Maybe Null + - trace_start_time (time offset in seconds after which the trace starts) # Maybe Null + - trace_end_time (time offset in seconds after which the trace ends) # Maybe Null + - trace_quanta (job's associated trace quanta, to correctly replay with different trace quanta) # Maybe Null + has to be set for use within the simulation + + The values trace_start_time are similar to the telemetry_start and + telemetry_stop but may different due to missing data, for each job. + + The returned values are these: + - The list of parsed jobs. (as a Job object) + - telemetry_start: int (in seconds) + - telemetry_end: int (in seconds) + - start_date: datetime + """ + jobs: list[Job] + telemetry_start: int + telemetry_end: int + # TODO: It might make more sense to make start_timestep/end_timestep always unix time, then we + # wouldn't need this extra start_date field. + start_date: AwareDatetime + + model_config = ConfigDict( + arbitrary_types_allowed = True, + ) -- GitLab From 485dd6ad4c6d890db7698afcc297375845606784 Mon Sep 17 00:00:00 2001 From: Jesse Hines Date: Thu, 4 Sep 2025 10:22:07 -0400 Subject: [PATCH 274/388] Refactor snapshots to save new dataloader result --- raps/engine.py | 9 +- raps/telemetry.py | 331 ++++++++++++++-------------------------------- raps/workload.py | 2 +- 3 files changed, 109 insertions(+), 233 deletions(-) diff --git a/raps/engine.py b/raps/engine.py index 64bf218..36e2559 100644 --- a/raps/engine.py +++ b/raps/engine.py @@ -266,8 +266,9 @@ class Engine: if sim_config.live and not sim_config.replay: td = Telemetry(**sim_config_dict) - jobs, timestep_start, timestep_end = \ - td.load_jobs_times_args_from_live_system() + result = td.load_from_live_system() + jobs = result.jobs + timestep_start, timestep_end = result.telemetry_start, result.telemetry_end elif sim_config.replay: # TODO: this will have issues if running separate systems or custom systems partition_short = partition.split("/")[-1] if partition else None @@ -286,10 +287,12 @@ class Engine: else: replay_files = sim_config.replay - jobs, timestep_start, timestep_end, args_from_file = td.load_jobs_times_args_from_files( + result = td.load_from_files( files=replay_files, args=sim_config_args, config=system_config_dict, ) + jobs = result.jobs + timestep_start, timestep_end = result.telemetry_start, result.telemetry_end else: # Synthetic jobs wl = Workload(sim_config_args, system_config_dict) jobs = wl.generate_jobs() diff --git a/raps/telemetry.py b/raps/telemetry.py index 3f883de..4bececc 100644 --- a/raps/telemetry.py +++ b/raps/telemetry.py @@ -13,14 +13,14 @@ from pathlib import Path # import json from typing import Optional from types import ModuleType - import importlib import numpy as np import pandas as pd from tqdm import tqdm -from pydantic import BaseModel +from pydantic import BaseModel, model_validator # from rich.progress import track +from raps.sim_config import SimConfig from raps.system_config import get_system_config from raps.job import Job, job_dict import matplotlib.pyplot as plt @@ -31,9 +31,48 @@ from raps.plotting import ( ) from raps.utils import ( next_arrival_byconfargs, convert_to_time_unit, pydantic_add_args, SubParsers, ExpandedPath, + DataLoaderResult, yaml_dump, ) +# TODO: should reuse this model in SimConfig +class TelemetryArgs(BaseModel): + jid: str = '*' + """ Replay job id """ + replay: list[ExpandedPath] | None = None + """ path/to/joblive path/to/jobprofile -or- filename.npz (overrides --workload option) """ + plot: list[Literal["jobs", "nodes"]] | None = None + """ Output plots """ + gantt_nodes: bool = False + """ Print Gannt with nodes required as line thickness (default false) """ + time: str | None = None + """ Length of time to simulate, e.g., 123, 123s, 27m, 3h, 7d """ + system: str = 'frontier' + """ System config to use """ + arrival: Literal['prescribed', 'poisson'] = "prescribed" + """ Modify arrival distribution ({choices[1]}) or use the original submit times """ + verbose: bool = False + output: str | None = None + """ Store output in --output file. """ + live: bool = False + """ Grab data from live system. """ + + @model_validator(mode="after") + def _validate_after(self): + if not self.live and not self.replay: + raise ValueError("Either --live or --replay is required") + return self + + +shortcuts = { + "replay": "f", + "plot": "p", + "time": "t", + "verbose": "v", + "output": "o", +} + + class Telemetry: """A class for handling telemetry data, including reading/parsing job data, and loading/saving snapshots.""" dataloader: Optional[ModuleType] @@ -49,16 +88,18 @@ class Telemetry: print(f"WARNING: Failed to load dataloader: {e}") self.dataloader = None - def save_snapshot(self, *, jobs: list, timestep_start: int, timestep_end: int, args: dict, filename: str): + def save_snapshot(self, *, dest: str, result: DataLoaderResult, args: SimConfig|TelemetryArgs): """Saves a snapshot of the jobs to a compressed file. """ - list_of_job_dicts = [] - for job in jobs: - list_of_job_dicts.append(job.__dict__) - np.savez_compressed(filename, jobs=list_of_job_dicts, timestep_start=timestep_start, - timestep_end=timestep_end, args=args) + np.savez_compressed(dest, + jobs=[vars(j) for j in result.jobs], + telemetry_start=result.telemetry_start, + telemetry_end=result.telemetry_end, + start_date=result.start_date, + args=args, + ) - def load_snapshot(self, snapshot: str, downscale=1) -> list: - """Reads a snapshot from a compressed file and return 4 values: joblist, timestep_start, timestep_end and args. + def load_snapshot(self, snapshot: str | Path) -> tuple[DataLoaderResult, SimConfig|TelemetryArgs]: + """Reads a snapshot from a compressed file :param str snapshot: Filename :returns: @@ -68,75 +109,19 @@ class Telemetry: - args, which were used to generate the loaded snapshot """ data = np.load(snapshot, allow_pickle=True, mmap_mode='r') - jobs = [] - list_of_job_dicts = data['jobs'].tolist() - for job_info in list_of_job_dicts: - jobs.append(Job(job_info)) - if 'timestep_start' in data: - timestep_start = int(data['timestep_start']) - else: - timestep_start = 0 - if 'timestep_end' in data: - timestep_end = int(data['timestep_end']) - else: - timestep_end = np.inf - raise ValueError("Invalid timestep_end in snapshot") - if 'args' in data: - args_from_file = data['args'].tolist() - else: - args_from_file = None - - return jobs, \ - timestep_start, \ - timestep_end, \ - args_from_file - - def load_csv_results(self, file): - jobs = [] - time_start = 0 - time_end = 0 - for line in pd.read_csv(file, chunksize=1): - job_info = job_dict(nodes_required=line.get('num_nodes').item(), - name=line.get('name').item(), - account=line.get('account').item(), - current_state=line.get('current_state').item(), - end_state=line.get('end_state').item(), - scheduled_nodes=line.get('scheduled_nodes').item(), - id=line.get('id').item(), - priority=line.get('priority').item(), - partition=line.get('partition').item(), - cpu_cores_required=line.get('cpu_cores_required').item(), - gpu_units_required=line.get('gpu_units_required').item(), - allocated_cpu_cores=line.get('allocated_cpu_cores').item(), - allocated_gpu_units=line.get('allocated_gpu_units').item(), - - cpu_trace=line.get('cpu_trace'), - gpu_trace=line.get('cpu_trace'), - ntx_trace=line.get('cpu_trace'), - nrx_trace=line.get('cpu_trace'), - submit_time=line.get('submit_time').item(), - time_limit=line.get('time_limit').item(), - start_time=line.get('start_time').item(), - end_time=line.get('end_time').item(), - expected_run_time=line.get('expected_run_time').item(), - current_run_time=line.get('current_run_time').item(), - trace_time=line.get('trace_time'), - # trace_start_time=line.get('trace_start_time').item(), - trace_start_time=line.get('trace_start_time'), - # trace_end_time=line.get('trace_end_time').item(), - trace_end_time=line.get('trace_end_time'), - trace_quanta=line.get('trace_quanta').item(), - trace_missing_values=line.get('trace_missing_values'), - downscale=line.get('downscale'), - ) - job = Job(job_info) - jobs.append(job) - # if hasattr(data,'args'): - # args_from_file = data["args"].item() # This should be empty as csv contains no args. - # else: - # args_from_file = None - - return jobs, time_start, time_end, None + jobs = [Job(j) for j in data['jobs']] + telemetry_start = data['telemetry_start'].item() + telemetry_end = data['telemetry_end'].item() + start_date = data['start_date'].item() + args = data['args'].item() + + result = DataLoaderResult( + jobs=jobs, + telemetry_start=telemetry_start, telemetry_end=telemetry_end, + start_date=start_date, + ) + + return result, args def load_data(self, files): """Load telemetry data using custom data loaders.""" @@ -148,43 +133,6 @@ class Telemetry: assert self.dataloader return self.dataloader.load_live_data(**self.kwargs) - def load_data_from_df(self, *args, **kwargs): - """Load telemetry data using custom data loaders.""" - assert self.dataloader - return self.dataloader.load_data_from_df(*args, **kwargs) - - def load_data_from_csv(self, file, *args, **kwargs): - jobs = [] - df = pd.read_csv(file, chunksize=1, header='infer') - for d in df: - # print(d['name'].astype(str)) - job_info = job_dict(nodes_required=None, - name=d['name'].astype(str).item(), - account=d['account'].astype(str).item(), - cpu_trace=None, - gpu_trace=None, - ntx_trace=None, - nrx_trace=None, - end_state=d['state'].astype(str).item(), - scheduled_nodes=d['scheduled_nodes'].item(), - id=d['id'].astype(int).item(), - priority=None, - partition=None, - submit_time=d['submit_time'].astype(int).item(), - time_limit=None, - start_time=d['start_time'].astype(int).item(), - end_time=d['end_time'].astype(int).item(), - wall_time=d['end_time'].astype(int).item() - d['start_time'].astype(int).item(), - trace_time=None, - trace_start_time=None, - trace_end_time=None, - trace_missing_values=None - ) - jobs.append(job_info) - minstarttime = min([x['start_time'] for x in jobs]) - maxendtime = max([x['end_time'] for x in jobs]) - return jobs, minstarttime, maxendtime, None - def node_index_to_name(self, index: int): """ Convert node index into a name""" assert self.dataloader @@ -200,105 +148,39 @@ class Telemetry: assert self.dataloader return self.dataloader.cdu_pos(index, config=self.config) - def load_jobs_times_args_from_live_system(self): - jobs, timestep_start, timestep_end = self.load_live_data() - # data_args = None - return jobs, timestep_start, timestep_end + def load_from_live_system(self): + result = self.load_live_data() + return result - def load_jobs_times_args_from_files(self, *, files, args, config, downscale=1): + def load_from_files(self, *, files, args, config): """ Load all files as combined jobs """ - # Read telemetry data (either npz file or via custom data loader) - # TODO: Merge args? See main.py:79 - timestep_end = 0 - timestep_start = sys.maxsize - jobs = [] - trigger_custom_dataloader = False - for i, file in enumerate(files): - file = str(Path(file)) - if hasattr(args, 'is_results_file') and args.is_results_file: - if file.endswith(".csv"): - jobs, timestep_start, timestep, _ = self.load_csv_results(file) - - elif file.endswith(".npz"): # Replay .npz file - print(f"Loading {file}...") - jobs_from_file, timestep_start_from_file, timestep_end_from_file, args_from_file = self.load_snapshot( - file) - if args_from_file is not None: - print(f"File was generated with:" - f"\n--system {args_from_file.system} ") - if hasattr(args_from_file, 'fastforward'): - print(f"--ff {args_from_file.fastforward} ") - if hasattr(args_from_file, 'time'): - print(f"-t {args_from_file.time}") - print(f"All Args:\n{args_from_file}" - "\nTo use these set them from the commandline!") - else: - print("No generation arguments extracted from input file!") - # Args are usually extracted to tell the users how to reporduce results. - # They are not processed and re-set to said arguments automatily - jobs.extend(jobs_from_file) - timestep_start = min(timestep_start, timestep_start_from_file) - timestep_end = max(timestep_end, timestep_end_from_file) - - if hasattr(args, 'scale') and args.scale: - for job in tqdm(jobs, desc=f"Scaling jobs to {args.scale} nodes"): - job['nodes_required'] = random.randint(1, args.scale) - job['scheduled_nodes'] = None # Setting to None triggers scheduler to assign nodes - - if hasattr(args, 'arrival') and args.arrival == 'poisson': - print("available nodes:", config['AVAILABLE_NODES']) - for job in tqdm(jobs, desc="Rescheduling jobs"): - job['scheduled_nodes'] = None - job['submit_time'] = next_arrival_byconfargs(config, args) - else: - trigger_custom_dataloader = True - break - - if trigger_custom_dataloader: # custom data loader - try: - jobs, timestep_start_from_data, timestep_end_from_data = self.load_data(args.replay) - except AssertionError: - raise ValueError("Forgot --is-results-file ?") - timestep_start = min(timestep_start, timestep_start_from_data) - timestep_end = max(timestep_end, timestep_end_from_data) + assert len(files) >= 1 + files = [Path(f) for f in files] + + if str(files[0]).endswith(".npz"): + file = files[0] + print(f"Loading {file}") + result, args = self.load_snapshot(file) + print(f"File was generated with: --system {args.system}") + + # TODO: should move this logic into a separate method and out of the individual dataloaders + if hasattr(args, 'scale') and args.scale: + for job in tqdm(result.jobs, desc=f"Scaling jobs to {args.scale} nodes"): + job.nodes_required = random.randint(1, args.scale) + job.scheduled_nodes = None # Setting to None triggers scheduler to assign nodes + + if hasattr(args, 'arrival') and args.arrival == 'poisson': + print("available nodes:", config['AVAILABLE_NODES']) + for job in tqdm(result.jobs, desc="Rescheduling jobs"): + job.scheduled_nodes = None + job.submit_time = next_arrival_byconfargs(config, args) + job.start_time = None + job.end_time = None + else: # custom data loader + result = self.load_data(args.replay) if args.time: - timestep_end = timestep_start + convert_to_time_unit(args.time) - elif not timestep_end: - timestep_end = int(max(job.wall_time + job.start_time for job in jobs)) + 1 - - return jobs, timestep_start, timestep_end, args - - -class TelemetryArgs(BaseModel): - jid: str = '*' - """ Replay job id """ - replay: list[ExpandedPath] | None = None - """ path/to/joblive path/to/jobprofile -or- filename.npz (overrides --workload option) """ - plot: list[Literal["jobs", "nodes"]] | None = None - """ Output plots """ - is_results_file: bool = False - gantt_nodes: bool = False - """ Print Gannt with nodes required as line thickness (default false) """ - time: str | None = None - """ Length of time to simulate, e.g., 123, 123s, 27m, 3h, 7d """ - system: str = 'frontier' - """ System config to use """ - arrival: Literal['prescribed', 'poisson'] = "prescribed" - """ Modify arrival distribution ({choices[1]}) or use the original submit times """ - verbose: bool = False - output: str | None = None - """ Store output in --output file. """ - live: bool = False - """ Grab data from live system. """ - - -shortcuts = { - "replay": "f", - "plot": "p", - "time": "t", - "verbose": "v", - "output": "o", -} + result.telemetry_end = result.telemetry_start + convert_to_time_unit(args.time) + return result def run_telemetry_add_parser(subparsers: SubParsers): @@ -318,24 +200,15 @@ def run_telemetry(args: TelemetryArgs): td = Telemetry(**args_dict) if args.live and not args.replay: - td = Telemetry(**args_dict) - jobs, timestep_start, timestep_end = \ - td.load_jobs_times_args_from_live_system() - if args.output: - td.save_snapshot( - jobs=jobs, timestep_start=timestep_start, - timestep_end=timestep_end, args=args, filename=args.output, - ) - - elif args.replay: - jobs, timestep_start, timestep_end, _ = \ - td.load_jobs_times_args_from_files(files=args.replay, - args=args, - config=config) - + result = td.load_from_live_system() else: - print("Either --live or --replay is required") - sys.exit(1) + result = td.load_from_files(files=args.replay, args=args, config=config) + jobs = result.jobs + timestep_start = result.telemetry_start + timestep_end = result.telemetry_end + + if args.output: + td.save_snapshot(dest = args.output, result = result, args = args) timesteps = timestep_end - timestep_start diff --git a/raps/workload.py b/raps/workload.py index f8ac77b..f5b2e55 100644 --- a/raps/workload.py +++ b/raps/workload.py @@ -972,7 +972,7 @@ def run_workload(sim_config: SimConfig): if sim_config.replay: td = Telemetry(**args_dict) - jobs, _, _, _ = td.load_jobs_times_args_from_files(files=sim_config.replay, args=args, config=config) + jobs = td.load_from_files(files=sim_config.replay, args=args, config=config).jobs else: workload = Workload(args, config) jobs = getattr(workload, sim_config.workload)(args=sim_config.get_legacy_args) -- GitLab From 873ac16cca103a4e24bcfd3b0176a0200a091de3 Mon Sep 17 00:00:00 2001 From: Jesse Hines Date: Thu, 4 Sep 2025 10:48:54 -0400 Subject: [PATCH 275/388] Use same format in Workload --- raps/dataloaders/adastraMI250.py | 4 ++-- raps/dataloaders/bluewaters.py | 4 ++-- raps/dataloaders/frontier.py | 6 +++--- raps/dataloaders/fugaku.py | 4 ++-- raps/dataloaders/gcloudv2.py | 4 ++-- raps/dataloaders/kestrel.py | 4 ++-- raps/dataloaders/lassen.py | 4 ++-- raps/dataloaders/marconi100.py | 4 ++-- raps/dataloaders/mit_supercloud/loader.py | 4 ++-- raps/telemetry.py | 8 ++++---- raps/utils.py | 5 +++-- raps/workload.py | 11 ++++++++--- 12 files changed, 34 insertions(+), 28 deletions(-) diff --git a/raps/dataloaders/adastraMI250.py b/raps/dataloaders/adastraMI250.py index 9cb53d5..60ad8d1 100644 --- a/raps/dataloaders/adastraMI250.py +++ b/raps/dataloaders/adastraMI250.py @@ -24,7 +24,7 @@ import pandas as pd from tqdm import tqdm from ..job import job_dict, Job -from ..utils import next_arrival_byconfkwargs, DataLoaderResult +from ..utils import next_arrival_byconfkwargs, WorkloadResult def load_data(jobs_path, **kwargs): @@ -205,7 +205,7 @@ def load_data_from_df(jobs_df: pd.DataFrame, **kwargs): count_jobs_notOK += 1 print("jobs not added: ", count_jobs_notOK) - return DataLoaderResult( + return WorkloadResult( jobs = jobs, telemetry_start=telemetry_start_time, telemetry_end=telemetry_end_time, start_date=telemetry_start_timestamp, diff --git a/raps/dataloaders/bluewaters.py b/raps/dataloaders/bluewaters.py index b7f1c10..c2d328f 100644 --- a/raps/dataloaders/bluewaters.py +++ b/raps/dataloaders/bluewaters.py @@ -34,7 +34,7 @@ import pandas as pd from pathlib import Path from datetime import datetime, timezone from raps.telemetry import Job, job_dict -from raps.utils import DataLoaderResult +from raps.utils import WorkloadResult def throughput_traces(total_tx, total_rx, intervals): @@ -330,7 +330,7 @@ def load_data(local_dataset_path, **kwargs): telemetry_start = 0 telemetry_end = max((j.end_time for j in jobs), default=0) - return DataLoaderResult( + return WorkloadResult( jobs=jobs, telemetry_start=telemetry_start, telemetry_end=telemetry_end, start_date=datetime.fromtimestamp(t0, timezone.utc), diff --git a/raps/dataloaders/frontier.py b/raps/dataloaders/frontier.py index 15621d1..d1968a6 100644 --- a/raps/dataloaders/frontier.py +++ b/raps/dataloaders/frontier.py @@ -16,7 +16,7 @@ import pandas as pd from tqdm import tqdm from ..job import job_dict, Job -from ..utils import power_to_utilization, next_arrival_byconfkwargs, encrypt, DataLoaderResult +from ..utils import power_to_utilization, next_arrival_byconfkwargs, encrypt, WorkloadResult def aging_boost(nnodes): @@ -326,7 +326,7 @@ def load_data_from_df(jobs_df: pd.DataFrame, jobprofile_df: pd.DataFrame, **kwar job = Job(job_info) jobs.append(job) - return DataLoaderResult( + return WorkloadResult( jobs = jobs, telemetry_start = telemetry_start, telemetry_end = telemetry_end, @@ -543,7 +543,7 @@ def load_live_data(**kwargs): job = Job(job_info) jobs.append(job) - return DataLoaderResult( + return WorkloadResult( jobs = jobs, telemetry_start = telemetry_start, telemetry_end = telemetry_end, diff --git a/raps/dataloaders/fugaku.py b/raps/dataloaders/fugaku.py index 0dd2c3b..703d28e 100644 --- a/raps/dataloaders/fugaku.py +++ b/raps/dataloaders/fugaku.py @@ -17,7 +17,7 @@ import pandas as pd from tqdm import tqdm from ..job import job_dict, Job -from ..utils import DataLoaderResult +from ..utils import WorkloadResult def load_data(path, **kwargs): @@ -168,7 +168,7 @@ def load_data_from_df(df, **kwargs): job = Job(job_info) job_list.append(job) - return DataLoaderResult( + return WorkloadResult( jobs=job_list, telemetry_start = telemetry_start, telemetry_end = telemetry_end, start_date = telemetry_start_timestamp, diff --git a/raps/dataloaders/gcloudv2.py b/raps/dataloaders/gcloudv2.py index 73f918b..60426cc 100644 --- a/raps/dataloaders/gcloudv2.py +++ b/raps/dataloaders/gcloudv2.py @@ -8,7 +8,7 @@ import numpy as np import pandas as pd from raps.job import job_dict, Job -from raps.utils import DataLoaderResult +from raps.utils import WorkloadResult """ Official instructions are here: @@ -334,7 +334,7 @@ def load_data(data_path: Union[str, List[str]], **kwargs: Any): # Compute simulation span: start at t=0, end at the latest job finish telemetry_start = 0 telemetry_end = int(max(usage_map_end.values()) - t0) - return DataLoaderResult( + return WorkloadResult( jobs = jobs, telemetry_start=telemetry_start, telemetry_end=telemetry_end, # gcloud dataset timestamps are already relative, and it doesn't list a start exact date. diff --git a/raps/dataloaders/kestrel.py b/raps/dataloaders/kestrel.py index c82b957..04adaa8 100644 --- a/raps/dataloaders/kestrel.py +++ b/raps/dataloaders/kestrel.py @@ -6,7 +6,7 @@ import pandas as pd from tqdm import tqdm from ..job import job_dict, Job -from ..utils import power_to_utilization, next_arrival, DataLoaderResult +from ..utils import power_to_utilization, next_arrival, WorkloadResult def load_data(jobs_path, **kwargs): @@ -153,7 +153,7 @@ def load_data_from_df(jobs_df: pd.DataFrame, **kwargs): trace_quanta=trace_quanta) jobs.append(Job(job_info)) - return DataLoaderResult( + return WorkloadResult( jobs=jobs, telemetry_start=telemetry_start, telemetry_end=telemetry_end, start_date=telemetry_start_timestamp, diff --git a/raps/dataloaders/lassen.py b/raps/dataloaders/lassen.py index 51292fc..88b36e7 100644 --- a/raps/dataloaders/lassen.py +++ b/raps/dataloaders/lassen.py @@ -38,7 +38,7 @@ from tqdm import tqdm from datetime import timedelta from ..job import job_dict, Job -from ..utils import power_to_utilization, next_arrival_byconfkwargs, parse_td, DataLoaderResult +from ..utils import power_to_utilization, next_arrival_byconfkwargs, parse_td, WorkloadResult def load_data(path, **kwargs): @@ -249,7 +249,7 @@ def load_data_from_df(allocation_df, node_df, step_df, **kwargs): job = Job(job_info) job_list.append(job) - return DataLoaderResult( + return WorkloadResult( jobs=job_list, telemetry_start=telemetry_start_time, telemetry_end=telemetry_end_time, start_date=telemetry_start_timestamp, diff --git a/raps/dataloaders/marconi100.py b/raps/dataloaders/marconi100.py index 9a236f2..c4648b3 100644 --- a/raps/dataloaders/marconi100.py +++ b/raps/dataloaders/marconi100.py @@ -28,7 +28,7 @@ import pandas as pd from tqdm import tqdm from ..job import job_dict, Job -from ..utils import power_to_utilization, next_arrival_byconfkwargs, DataLoaderResult +from ..utils import power_to_utilization, next_arrival_byconfkwargs, WorkloadResult def load_data(jobs_path, **kwargs): @@ -233,7 +233,7 @@ def load_data_from_df(jobs_df: pd.DataFrame, **kwargs): job = Job(job_info) jobs.append(job) - return DataLoaderResult( + return WorkloadResult( jobs = jobs, telemetry_start=telemetry_start, telemetry_end=telemetry_end, start_date=telemetry_start_timestamp, diff --git a/raps/dataloaders/mit_supercloud/loader.py b/raps/dataloaders/mit_supercloud/loader.py index 7c29f31..282f0b5 100644 --- a/raps/dataloaders/mit_supercloud/loader.py +++ b/raps/dataloaders/mit_supercloud/loader.py @@ -118,7 +118,7 @@ from typing import Dict, Union, Optional from collections import Counter from datetime import datetime, timezone from raps.job import job_dict, Job -from raps.utils import summarize_ranges, DataLoaderResult +from raps.utils import summarize_ranges, WorkloadResult from .utils import proc_cpu_series, proc_gpu_series, to_epoch from .utils import DEFAULT_START, DEFAULT_END @@ -606,7 +606,7 @@ def load_data(local_dataset_path, **kwargs): for reason, count in skip_counts.items(): print(f"- {reason}: {count}") - return DataLoaderResult( + return WorkloadResult( jobs = jobs_list, telemetry_start=0, telemetry_end=int(end_ts - start_ts), start_date=datetime.fromtimestamp(start_ts, timezone.utc), diff --git a/raps/telemetry.py b/raps/telemetry.py index 4bececc..f55d1bc 100644 --- a/raps/telemetry.py +++ b/raps/telemetry.py @@ -31,7 +31,7 @@ from raps.plotting import ( ) from raps.utils import ( next_arrival_byconfargs, convert_to_time_unit, pydantic_add_args, SubParsers, ExpandedPath, - DataLoaderResult, yaml_dump, + WorkloadResult, ) @@ -88,7 +88,7 @@ class Telemetry: print(f"WARNING: Failed to load dataloader: {e}") self.dataloader = None - def save_snapshot(self, *, dest: str, result: DataLoaderResult, args: SimConfig|TelemetryArgs): + def save_snapshot(self, *, dest: str, result: WorkloadResult, args: SimConfig|TelemetryArgs): """Saves a snapshot of the jobs to a compressed file. """ np.savez_compressed(dest, jobs=[vars(j) for j in result.jobs], @@ -98,7 +98,7 @@ class Telemetry: args=args, ) - def load_snapshot(self, snapshot: str | Path) -> tuple[DataLoaderResult, SimConfig|TelemetryArgs]: + def load_snapshot(self, snapshot: str | Path) -> tuple[WorkloadResult, SimConfig|TelemetryArgs]: """Reads a snapshot from a compressed file :param str snapshot: Filename @@ -115,7 +115,7 @@ class Telemetry: start_date = data['start_date'].item() args = data['args'].item() - result = DataLoaderResult( + result = WorkloadResult( jobs=jobs, telemetry_start=telemetry_start, telemetry_end=telemetry_end, start_date=start_date, diff --git a/raps/utils.py b/raps/utils.py index e0c9acf..f67c533 100644 --- a/raps/utils.py +++ b/raps/utils.py @@ -711,9 +711,10 @@ def yaml_dump(data): ) -class DataLoaderResult(BaseModel): +class WorkloadResult(BaseModel): """ - Result of a dataloader load_data() function. + Represents a workload, a list of jobs with some metadata. Returned by dataloaders load_data() + function, and by Workload.generate_jobs(). jobs: The list of parsed jobs. diff --git a/raps/workload.py b/raps/workload.py index f5b2e55..48c7af3 100644 --- a/raps/workload.py +++ b/raps/workload.py @@ -30,7 +30,8 @@ from raps.utils import ( determine_state, next_arrival, next_arrival_byconfargs, truncated_weibull, - truncated_weibull_float + truncated_weibull_float, + WorkloadResult, ) import math import random @@ -66,7 +67,11 @@ class Workload: # This function calls the job generation function as specified by the workload keyword. # The respective funciton of this class is called. jobs = getattr(self, self.args.workload)(args=self.args) - return jobs + return WorkloadResult( + jobs = jobs, + telemetry_start=0, telemetry_end=self.args.time, + start_date=self.args.start, + ) def compute_traces(self, cpu_util: float, @@ -994,5 +999,5 @@ def continuous_job_generation(*, engine, timestep, jobs): # print("if len(engine.queue) <= engine.continuous_workload.args.maxqueue:") # print(f"if {len(engine.queue)} <= {engine.continuous_workload.args.maxqueue}:") if len(engine.queue) <= engine.continuous_workload.args.maxqueue: - new_jobs = engine.continuous_workload.generate_jobs() + new_jobs = engine.continuous_workload.generate_jobs().jobs jobs.extend(new_jobs) -- GitLab From 9ae197cce44c504423a852aa94acb4c2b7e5b0a9 Mon Sep 17 00:00:00 2001 From: Jesse Hines Date: Thu, 4 Sep 2025 11:07:39 -0400 Subject: [PATCH 276/388] More fixes --- raps/dataloaders/adastraMI250.py | 2 +- raps/engine.py | 24 ++++++++---------------- raps/multi_part_engine.py | 15 ++++++++------- raps/run_sim.py | 22 +++++++++++----------- raps/sim_config.py | 4 ++-- raps/telemetry.py | 4 ++-- tests/systems/test_engine.py | 5 ++++- 7 files changed, 36 insertions(+), 40 deletions(-) diff --git a/raps/dataloaders/adastraMI250.py b/raps/dataloaders/adastraMI250.py index 60ad8d1..7522097 100644 --- a/raps/dataloaders/adastraMI250.py +++ b/raps/dataloaders/adastraMI250.py @@ -208,7 +208,7 @@ def load_data_from_df(jobs_df: pd.DataFrame, **kwargs): return WorkloadResult( jobs = jobs, telemetry_start=telemetry_start_time, telemetry_end=telemetry_end_time, - start_date=telemetry_start_timestamp, + start_date=telemetry_start_timestamp.tz_localize("UTC"), ) diff --git a/raps/engine.py b/raps/engine.py index 36e2559..f371b3e 100644 --- a/raps/engine.py +++ b/raps/engine.py @@ -10,12 +10,11 @@ import os import select import time import random -import math from raps.job import Job, JobState from raps.policy import PolicyType from raps.utils import ( summarize_ranges, - get_current_utilization + get_current_utilization, ) from raps.resmgr import ResourceManager from raps.schedulers import load_scheduler @@ -266,9 +265,7 @@ class Engine: if sim_config.live and not sim_config.replay: td = Telemetry(**sim_config_dict) - result = td.load_from_live_system() - jobs = result.jobs - timestep_start, timestep_end = result.telemetry_start, result.telemetry_end + workload_result = td.load_from_live_system() elif sim_config.replay: # TODO: this will have issues if running separate systems or custom systems partition_short = partition.split("/")[-1] if partition else None @@ -287,22 +284,17 @@ class Engine: else: replay_files = sim_config.replay - result = td.load_from_files( + workload_result = td.load_from_files( files=replay_files, args=sim_config_args, config=system_config_dict, ) - jobs = result.jobs - timestep_start, timestep_end = result.telemetry_start, result.telemetry_end else: # Synthetic jobs wl = Workload(sim_config_args, system_config_dict) - jobs = wl.generate_jobs() - timestep_start = 0 - if hasattr(jobs[0], 'end_time'): - timestep_end = int(math.ceil(max([job.end_time for job in jobs]))) - else: - timestep_end = 88200 # 24 hours - + workload_result = wl.generate_jobs() td = Telemetry(**sim_config_dict) + + jobs = workload_result.jobs + timestep_start, timestep_end = workload_result.telemetry_start, workload_result.telemetry_end # TODO refactor how stat/end/fastforward/time work if sim_config.fastforward is not None: @@ -342,7 +334,7 @@ class Engine: system_config=system_config, ) - return engine, jobs, timestep_start, timestep_end, time_delta + return engine, workload_result, time_delta def add_running_jobs_to_queue(self, jobs_to_submit: List): """ diff --git a/raps/multi_part_engine.py b/raps/multi_part_engine.py index 461425b..bebf47b 100644 --- a/raps/multi_part_engine.py +++ b/raps/multi_part_engine.py @@ -1,6 +1,7 @@ from collections.abc import Iterable from raps.engine import Engine, TickData from raps.sim_config import SimConfig +from raps.utils import WorkloadResult class MultiPartEngine: @@ -17,29 +18,29 @@ class MultiPartEngine: if len(root_systems) > 1: raise ValueError("Replay for multi-system runs is not supported") - jobs_by_partition = {} + workloads_by_partition: dict[str, WorkloadResult] = {} engines: dict[str, Engine] = {} timestep_start, timestep_end, time_delta = 0, 0, 0 for partition in sim_config.system_configs: name = partition.system_name - engine, jobs, timestep_start, timestep_end, time_delta = Engine.from_sim_config( + engine, workload_result, time_delta = Engine.from_sim_config( sim_config, partition=name, ) - for job in jobs: + for job in workload_result.jobs: job.partition = name - jobs_by_partition[name] = jobs + workloads_by_partition[name] = workload_result engines[name] = engine - total_initial_jobs = sum(len(j) for j in jobs_by_partition.values()) + total_initial_jobs = sum(len(j.jobs) for j in workloads_by_partition.values()) for engine in engines.values(): engine.total_initial_jobs = total_initial_jobs multi_engine = MultiPartEngine( engines=engines, - jobs=jobs_by_partition, + jobs={p: w.jobs for p, w in workloads_by_partition.items()}, ) - return multi_engine, jobs_by_partition, timestep_start, timestep_end, time_delta + return multi_engine, workloads_by_partition, timestep_start, timestep_end, time_delta def run_simulation(self, jobs: dict, timestep_start, timestep_end, time_delta=1 ) -> Iterable[dict[str, TickData | None]]: diff --git a/raps/run_sim.py b/raps/run_sim.py index 7587dbb..b41eb80 100644 --- a/raps/run_sim.py +++ b/raps/run_sim.py @@ -76,18 +76,18 @@ def run_sim(sim_config: SimConfig): print("Use run-multi-part to run multi-partition simulations") sys.exit(1) - engine, jobs, timestep_start, timestep_end, time_delta = Engine.from_sim_config(sim_config) + engine, workload_result, time_delta = Engine.from_sim_config(sim_config) out = sim_config.output if out: out.mkdir(parents=True) engine.telemetry.save_snapshot( - jobs=jobs, - timestep_start=timestep_start, - timestep_end=timestep_end, - args=sim_config.get_legacy_args(), filename=str(out), + dest = str(out), + result = workload_result, + args=sim_config, ) - + jobs = workload_result.jobs + timestep_start, timestep_end = workload_result.telemetry_start, workload_result.telemetry_end total_timesteps = timestep_end - timestep_start downscale = sim_config.downscale @@ -242,7 +242,7 @@ def run_multi_part_sim_add_parser(subparsers: SubParsers): def run_multi_part_sim(sim_config: SimConfig): - multi_engine, jobs, timestep_start, timestep_end, time_delta = MultiPartEngine.from_sim_config(sim_config) + multi_engine, workload_results, timestep_start, timestep_end, time_delta = MultiPartEngine.from_sim_config(sim_config) # TODO: The mit_supercloud dataloader seems to be outputting the wrong timesteps? mit_supercloud # is the only multi-partition system with replay, so just manually overriding the timesteps here @@ -253,11 +253,11 @@ def run_multi_part_sim(sim_config: SimConfig): if sim_config.output: for part, engine in multi_engine.engines.items(): engine.telemetry.save_snapshot( - jobs=jobs[part], - timestep_start=timestep_start, timestep_end=timestep_end, - filename=part.split('/')[-1], - args=sim_config.get_legacy_args(), + dest=str(sim_config.output / part.split('/')[-1]), + result=workload_results[part], + args=sim_config, ) + jobs = {p: w.jobs for p, w in workload_results.items()} ui_update_freq = sim_config.system_configs[0].scheduler.ui_update_freq gen = multi_engine.run_simulation(jobs, timestep_start, timestep_end, time_delta) diff --git a/raps/sim_config.py b/raps/sim_config.py index 26a328a..92a5f5f 100644 --- a/raps/sim_config.py +++ b/raps/sim_config.py @@ -48,9 +48,9 @@ class SimConfig(BaseModel): def downscale(self) -> int: return int(timedelta(seconds=1) / self.time_unit) - start: str = "2021-05-21T13:00" + start: str = "2021-05-21T13:00:00-04:00" """ ISO8601 start of simulation """ - end: str = "2021-05-21T14:00" + end: str = "2021-05-21T14:00:00-04:00" """ ISO8601 end of simulation """ numjobs: int = 100 diff --git a/raps/telemetry.py b/raps/telemetry.py index f55d1bc..83b09a2 100644 --- a/raps/telemetry.py +++ b/raps/telemetry.py @@ -160,8 +160,8 @@ class Telemetry: if str(files[0]).endswith(".npz"): file = files[0] print(f"Loading {file}") - result, args = self.load_snapshot(file) - print(f"File was generated with: --system {args.system}") + result, args_from_file = self.load_snapshot(file) + print(f"File was generated with: --system {args_from_file.system}") # TODO: should move this logic into a separate method and out of the individual dataloaders if hasattr(args, 'scale') and args.scale: diff --git a/tests/systems/test_engine.py b/tests/systems/test_engine.py index ce40878..4d57752 100644 --- a/tests/systems/test_engine.py +++ b/tests/systems/test_engine.py @@ -22,7 +22,10 @@ def test_engine(system, system_config, sim_output): "system": system, "time": "2m", }) - engine, jobs, timestep_start, timestep_end, time_delta = Engine.from_sim_config(sim_config) + engine, workload_result, time_delta = Engine.from_sim_config(sim_config) + jobs = workload_result.jobs + timestep_start = workload_result.telemetry_start + timestep_end = workload_result.telemetry_end ticks = list(engine.run_simulation(jobs, timestep_start, timestep_end, time_delta)) assert len(ticks) == 120 -- GitLab From 445b49360138746a86f99e368223f557c5095f18 Mon Sep 17 00:00:00 2001 From: Jesse Hines Date: Thu, 4 Sep 2025 12:47:49 -0400 Subject: [PATCH 277/388] Move arrival time logic into one place --- raps/dataloaders/adastraMI250.py | 14 ++++-------- raps/dataloaders/frontier.py | 20 +++++----------- raps/dataloaders/fugaku.py | 7 ------ raps/dataloaders/lassen.py | 14 ++++-------- raps/dataloaders/marconi100.py | 16 ++++--------- raps/engine.py | 10 +++----- raps/telemetry.py | 39 ++++++++++++++++---------------- raps/workload.py | 2 +- 8 files changed, 43 insertions(+), 79 deletions(-) diff --git a/raps/dataloaders/adastraMI250.py b/raps/dataloaders/adastraMI250.py index 7522097..4d96b02 100644 --- a/raps/dataloaders/adastraMI250.py +++ b/raps/dataloaders/adastraMI250.py @@ -146,15 +146,11 @@ def load_data_from_df(jobs_df: pd.DataFrame, **kwargs): priority = int(jobs_df.loc[jidx, 'priority']) - if arrival == 'poisson': # Modify the arrival times of the jobs according to Poisson distribution - scheduled_nodes = None - submit_time = next_arrival_byconfkwargs(config, kwargs) - else: # Prescribed replay - scheduled_nodes = (jobs_df.loc[jidx, 'nodes']).tolist() - - submit_timestamp = jobs_df.loc[jidx, 'submit_time'] - diff = submit_timestamp - telemetry_start_timestamp - submit_time = int(diff.total_seconds()) + scheduled_nodes = (jobs_df.loc[jidx, 'nodes']).tolist() + + submit_timestamp = jobs_df.loc[jidx, 'submit_time'] + diff = submit_timestamp - telemetry_start_timestamp + submit_time = int(diff.total_seconds()) time_limit = jobs_df.loc[jidx, 'time_limit'] # in seconds diff --git a/raps/dataloaders/frontier.py b/raps/dataloaders/frontier.py index d1968a6..de84770 100644 --- a/raps/dataloaders/frontier.py +++ b/raps/dataloaders/frontier.py @@ -267,20 +267,12 @@ def load_data_from_df(jobs_df: pd.DataFrame, jobprofile_df: pd.DataFrame, **kwar if '' in xnames: continue - if arrival == 'poisson': # Modify the arrival times of the jobs according to Poisson distribution - scheduled_nodes = None - submit_time = next_arrival_byconfkwargs(config, kwargs) - start_time = None # ? - end_time = None # ? - priority = aging_boost(nodes_required) - - else: # Prescribed replay - scheduled_nodes = [] - # priority = 0 # not used for replay - priority = aging_boost(nodes_required) - for xname in xnames: - indices = xname_to_index(xname, config) - scheduled_nodes.append(indices) + scheduled_nodes = [] + # priority = 0 # not used for replay + priority = aging_boost(nodes_required) + for xname in xnames: + indices = xname_to_index(xname, config) + scheduled_nodes.append(indices) # Throw out jobs that are not valid! if gpu_trace.size == 0: diff --git a/raps/dataloaders/fugaku.py b/raps/dataloaders/fugaku.py index 703d28e..1515915 100644 --- a/raps/dataloaders/fugaku.py +++ b/raps/dataloaders/fugaku.py @@ -135,13 +135,6 @@ def load_data_from_df(df, **kwargs): trace_missing_values = False # Sane Choice? trace_quanta = config['TRACE_QUANTA'] - # Should we still have this? - # if arrival == 'poisson': # Modify the arrival times of according to Poisson distribution - # time_offset = next_arrival(1/config['JOB_ARRIVAL_TIME']) - # else: - # time_offset = (submit_time - min_time).total_seconds() # Compute time offset in seconds - # Removed from job_dict: time_offset=time_offset, - # Create job dictionary job_info = job_dict( nodes_required=nodes_required, diff --git a/raps/dataloaders/lassen.py b/raps/dataloaders/lassen.py index 88b36e7..a0d1ca7 100644 --- a/raps/dataloaders/lassen.py +++ b/raps/dataloaders/lassen.py @@ -198,16 +198,10 @@ def load_data_from_df(allocation_df, node_df, step_df, **kwargs): priority = row.get('priority', 0) partition = row.get('partition', "0") - if arrival == 'poisson': # Modify the submit times according to Poisson process - scheduled_nodes = None - submit_time = fastforward + next_arrival_byconfkwargs(config, kwargs) - start_time = submit_time # Pretend Job could start immediately # Alternative: None - end_time = submit_time + wall_time # Alternative: None - else: # Prescribed replay - scheduled_nodes = get_scheduled_nodes(row['allocation_id'], node_df) - submit_time = compute_time_offset(row['job_submit_timestamp'], telemetry_start_timestamp) - start_time = compute_time_offset(row['begin_timestamp'], telemetry_start_timestamp) - end_time = compute_time_offset(row['end_timestamp'], telemetry_start_timestamp) + scheduled_nodes = get_scheduled_nodes(row['allocation_id'], node_df) + submit_time = compute_time_offset(row['job_submit_timestamp'], telemetry_start_timestamp) + start_time = compute_time_offset(row['begin_timestamp'], telemetry_start_timestamp) + end_time = compute_time_offset(row['end_timestamp'], telemetry_start_timestamp) time_limit = row['time_limit'] diff --git a/raps/dataloaders/marconi100.py b/raps/dataloaders/marconi100.py index c4648b3..fd08b91 100644 --- a/raps/dataloaders/marconi100.py +++ b/raps/dataloaders/marconi100.py @@ -165,17 +165,11 @@ def load_data_from_df(jobs_df: pd.DataFrame, **kwargs): print("wall_time != (end_time - start_time)") print(f"{wall_time} != {(end_time - start_time)}") - if arrival == 'poisson': # Modify the arrival times according to Poisson distribution - scheduled_nodes = None - submit_time = next_arrival_byconfkwargs(config, kwargs) - start_time = None - end_time = None - else: # Prescribed replay - scheduled_nodes = (jobs_df.loc[jidx, 'nodes']).tolist() - - submit_timestamp = jobs_df.loc[jidx, 'submit_time'] - diff = submit_timestamp - telemetry_start_timestamp - submit_time = int(diff.total_seconds()) + scheduled_nodes = (jobs_df.loc[jidx, 'nodes']).tolist() + + submit_timestamp = jobs_df.loc[jidx, 'submit_time'] + diff = submit_timestamp - telemetry_start_timestamp + submit_time = int(diff.total_seconds()) trace_time = gpu_trace.size * config['TRACE_QUANTA'] # seconds trace_start_time = 0 diff --git a/raps/engine.py b/raps/engine.py index f371b3e..3760254 100644 --- a/raps/engine.py +++ b/raps/engine.py @@ -284,24 +284,20 @@ class Engine: else: replay_files = sim_config.replay - workload_result = td.load_from_files( - files=replay_files, - args=sim_config_args, config=system_config_dict, - ) + workload_result = td.load_from_files(replay_files) else: # Synthetic jobs wl = Workload(sim_config_args, system_config_dict) workload_result = wl.generate_jobs() td = Telemetry(**sim_config_dict) jobs = workload_result.jobs - timestep_start, timestep_end = workload_result.telemetry_start, workload_result.telemetry_end # TODO refactor how stat/end/fastforward/time work if sim_config.fastforward is not None: - timestep_start = timestep_start + sim_config.fastforward + workload_result.telemetry_start = workload_result.telemetry_start + sim_config.fastforward if sim_config.time is not None: - timestep_end = timestep_start + sim_config.time + workload_result.telemetry_end = workload_result.telemetry_end + sim_config.time if sim_config.time_delta is not None: time_delta = sim_config.time_delta diff --git a/raps/telemetry.py b/raps/telemetry.py index 83b09a2..0d848fc 100644 --- a/raps/telemetry.py +++ b/raps/telemetry.py @@ -148,11 +148,11 @@ class Telemetry: assert self.dataloader return self.dataloader.cdu_pos(index, config=self.config) - def load_from_live_system(self): + def load_from_live_system(self) -> WorkloadResult: result = self.load_live_data() return result - def load_from_files(self, *, files, args, config): + def load_from_files(self, files) -> WorkloadResult: """ Load all files as combined jobs """ assert len(files) >= 1 files = [Path(f) for f in files] @@ -162,26 +162,25 @@ class Telemetry: print(f"Loading {file}") result, args_from_file = self.load_snapshot(file) print(f"File was generated with: --system {args_from_file.system}") - - # TODO: should move this logic into a separate method and out of the individual dataloaders - if hasattr(args, 'scale') and args.scale: - for job in tqdm(result.jobs, desc=f"Scaling jobs to {args.scale} nodes"): - job.nodes_required = random.randint(1, args.scale) - job.scheduled_nodes = None # Setting to None triggers scheduler to assign nodes - - if hasattr(args, 'arrival') and args.arrival == 'poisson': - print("available nodes:", config['AVAILABLE_NODES']) - for job in tqdm(result.jobs, desc="Rescheduling jobs"): - job.scheduled_nodes = None - job.submit_time = next_arrival_byconfargs(config, args) - job.start_time = None - job.end_time = None else: # custom data loader - result = self.load_data(args.replay) - if args.time: - result.telemetry_end = result.telemetry_start + convert_to_time_unit(args.time) + result = self.load_data(files) + self.update_jobs(result.jobs) return result + def update_jobs(self, jobs: list[Job]): + """ Updates jobs with new scale or random start times """ + if self.kwargs.get("scale") is not None: + for job in jobs: + job.nodes_required = random.randint(1, self.kwargs['scale']) + job.scheduled_nodes = None # Setting to None triggers scheduler to assign nodes + + if self.kwargs['arrival'] == "poisson": + for job in jobs: + job.scheduled_nodes = None + job.submit_time = next_arrival_byconfargs(self.config, self.kwargs) + job.start_time = None + job.end_time = None + def run_telemetry_add_parser(subparsers: SubParsers): parser = subparsers.add_parser("telemetry", description=""" @@ -202,7 +201,7 @@ def run_telemetry(args: TelemetryArgs): if args.live and not args.replay: result = td.load_from_live_system() else: - result = td.load_from_files(files=args.replay, args=args, config=config) + result = td.load_from_files(args.replay) jobs = result.jobs timestep_start = result.telemetry_start timestep_end = result.telemetry_end diff --git a/raps/workload.py b/raps/workload.py index 48c7af3..015678f 100644 --- a/raps/workload.py +++ b/raps/workload.py @@ -977,7 +977,7 @@ def run_workload(sim_config: SimConfig): if sim_config.replay: td = Telemetry(**args_dict) - jobs = td.load_from_files(files=sim_config.replay, args=args, config=config).jobs + jobs = td.load_from_files(sim_config.replay).jobs else: workload = Workload(args, config) jobs = getattr(workload, sim_config.workload)(args=sim_config.get_legacy_args) -- GitLab From 31be11d3830327beb62f0047318476bbeb05d556 Mon Sep 17 00:00:00 2001 From: Jesse Hines Date: Thu, 4 Sep 2025 12:54:55 -0400 Subject: [PATCH 278/388] Add result file back --- raps/telemetry.py | 58 ++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 55 insertions(+), 3 deletions(-) diff --git a/raps/telemetry.py b/raps/telemetry.py index 0d848fc..8ec4c9a 100644 --- a/raps/telemetry.py +++ b/raps/telemetry.py @@ -42,6 +42,7 @@ class TelemetryArgs(BaseModel): replay: list[ExpandedPath] | None = None """ path/to/joblive path/to/jobprofile -or- filename.npz (overrides --workload option) """ plot: list[Literal["jobs", "nodes"]] | None = None + is_results_file: bool = False """ Output plots """ gantt_nodes: bool = False """ Print Gannt with nodes required as line thickness (default false) """ @@ -123,6 +124,53 @@ class Telemetry: return result, args + def load_csv_results(self, file): + jobs = [] + time_start = 0 + time_end = 0 + for line in pd.read_csv(file, chunksize=1): + job_info = job_dict(nodes_required=line.get('num_nodes').item(), + name=line.get('name').item(), + account=line.get('account').item(), + current_state=line.get('current_state').item(), + end_state=line.get('end_state').item(), + scheduled_nodes=line.get('scheduled_nodes').item(), + id=line.get('id').item(), + priority=line.get('priority').item(), + partition=line.get('partition').item(), + cpu_cores_required=line.get('cpu_cores_required').item(), + gpu_units_required=line.get('gpu_units_required').item(), + allocated_cpu_cores=line.get('allocated_cpu_cores').item(), + allocated_gpu_units=line.get('allocated_gpu_units').item(), + + cpu_trace=line.get('cpu_trace'), + gpu_trace=line.get('cpu_trace'), + ntx_trace=line.get('cpu_trace'), + nrx_trace=line.get('cpu_trace'), + submit_time=line.get('submit_time').item(), + time_limit=line.get('time_limit').item(), + start_time=line.get('start_time').item(), + end_time=line.get('end_time').item(), + expected_run_time=line.get('expected_run_time').item(), + current_run_time=line.get('current_run_time').item(), + trace_time=line.get('trace_time'), + # trace_start_time=line.get('trace_start_time').item(), + trace_start_time=line.get('trace_start_time'), + # trace_end_time=line.get('trace_end_time').item(), + trace_end_time=line.get('trace_end_time'), + trace_quanta=line.get('trace_quanta').item(), + trace_missing_values=line.get('trace_missing_values'), + downscale=line.get('downscale'), + ) + job = Job(job_info) + jobs.append(job) + # if hasattr(data,'args'): + # args_from_file = data["args"].item() # This should be empty as csv contains no args. + # else: + # args_from_file = None + + return jobs, time_start, time_end + def load_data(self, files): """Load telemetry data using custom data loaders.""" assert self.dataloader @@ -198,13 +246,17 @@ def run_telemetry(args: TelemetryArgs): args_dict['config'] = config td = Telemetry(**args_dict) + if args.is_results_file and args.replay: + file = str(args.replay[0]) + jobs, timestep_start, timestep_end = td.load_csv_results(file) if args.live and not args.replay: result = td.load_from_live_system() + jobs = result.jobs + timestep_start, timestep_end = result.telemetry_start, result.telemetry_end else: result = td.load_from_files(args.replay) - jobs = result.jobs - timestep_start = result.telemetry_start - timestep_end = result.telemetry_end + jobs = result.jobs + timestep_start, timestep_end = result.telemetry_start, result.telemetry_end if args.output: td.save_snapshot(dest = args.output, result = result, args = args) -- GitLab From 3b43f122781a8e0a8b5be65c7cff38496b14b3b7 Mon Sep 17 00:00:00 2001 From: Jesse Hines Date: Thu, 4 Sep 2025 14:19:18 -0400 Subject: [PATCH 279/388] Formatting --- raps/dataloaders/adastraMI250.py | 5 ++--- raps/dataloaders/frontier.py | 19 ++++++++-------- raps/dataloaders/fugaku.py | 4 ++-- raps/dataloaders/gcloudv2.py | 4 ++-- raps/dataloaders/lassen.py | 3 +-- raps/dataloaders/marconi100.py | 5 ++--- raps/dataloaders/mit_supercloud/loader.py | 2 +- raps/engine.py | 9 +++++--- raps/run_sim.py | 7 +++--- raps/telemetry.py | 27 ++++++++++------------- raps/utils.py | 4 ++-- raps/workload.py | 2 +- 12 files changed, 44 insertions(+), 47 deletions(-) diff --git a/raps/dataloaders/adastraMI250.py b/raps/dataloaders/adastraMI250.py index 4d96b02..1ce7689 100644 --- a/raps/dataloaders/adastraMI250.py +++ b/raps/dataloaders/adastraMI250.py @@ -24,7 +24,7 @@ import pandas as pd from tqdm import tqdm from ..job import job_dict, Job -from ..utils import next_arrival_byconfkwargs, WorkloadResult +from ..utils import WorkloadResult def load_data(jobs_path, **kwargs): @@ -58,7 +58,6 @@ def load_data_from_df(jobs_df: pd.DataFrame, **kwargs): """ count_jobs_notOK = 0 config = kwargs.get('config') - arrival = kwargs.get('arrival') validate = kwargs.get('validate') jid = kwargs.get('jid', '*') @@ -202,7 +201,7 @@ def load_data_from_df(jobs_df: pd.DataFrame, **kwargs): print("jobs not added: ", count_jobs_notOK) return WorkloadResult( - jobs = jobs, + jobs=jobs, telemetry_start=telemetry_start_time, telemetry_end=telemetry_end_time, start_date=telemetry_start_timestamp.tz_localize("UTC"), ) diff --git a/raps/dataloaders/frontier.py b/raps/dataloaders/frontier.py index de84770..9ef6b72 100644 --- a/raps/dataloaders/frontier.py +++ b/raps/dataloaders/frontier.py @@ -16,7 +16,7 @@ import pandas as pd from tqdm import tqdm from ..job import job_dict, Job -from ..utils import power_to_utilization, next_arrival_byconfkwargs, encrypt, WorkloadResult +from ..utils import power_to_utilization, encrypt, WorkloadResult def aging_boost(nnodes): @@ -137,7 +137,6 @@ def load_data_from_df(jobs_df: pd.DataFrame, jobprofile_df: pd.DataFrame, **kwar """ config = kwargs.get('config') encrypt_bool = kwargs.get('encrypt') - arrival = kwargs.get('arrival') validate = kwargs.get('validate') jid = kwargs.get('jid', '*') debug = kwargs.get('debug') @@ -319,10 +318,10 @@ def load_data_from_df(jobs_df: pd.DataFrame, jobprofile_df: pd.DataFrame, **kwar job = Job(job_info) jobs.append(job) return WorkloadResult( - jobs = jobs, - telemetry_start = telemetry_start, - telemetry_end = telemetry_end, - start_date = telemetry_start_timestamp, + jobs=jobs, + telemetry_start=telemetry_start, + telemetry_end=telemetry_end, + start_date=telemetry_start_timestamp, ) @@ -536,10 +535,10 @@ def load_live_data(**kwargs): jobs.append(job) return WorkloadResult( - jobs = jobs, - telemetry_start = telemetry_start, - telemetry_end = telemetry_end, - start_date = datetime.fromtimestamp(telemetry_start, timezone.utc), + jobs=jobs, + telemetry_start=telemetry_start, + telemetry_end=telemetry_end, + start_date=datetime.fromtimestamp(telemetry_start, timezone.utc), ) diff --git a/raps/dataloaders/fugaku.py b/raps/dataloaders/fugaku.py index 1515915..70b6cb8 100644 --- a/raps/dataloaders/fugaku.py +++ b/raps/dataloaders/fugaku.py @@ -163,8 +163,8 @@ def load_data_from_df(df, **kwargs): return WorkloadResult( jobs=job_list, - telemetry_start = telemetry_start, telemetry_end = telemetry_end, - start_date = telemetry_start_timestamp, + telemetry_start=telemetry_start, telemetry_end=telemetry_end, + start_date=telemetry_start_timestamp, ) diff --git a/raps/dataloaders/gcloudv2.py b/raps/dataloaders/gcloudv2.py index 60426cc..4dba4fb 100644 --- a/raps/dataloaders/gcloudv2.py +++ b/raps/dataloaders/gcloudv2.py @@ -2,7 +2,7 @@ import os import re from datetime import datetime from tqdm import tqdm -from typing import List, Optional, Generator, Tuple, Any, Union +from typing import List, Optional, Generator, Any, Union import numpy as np import pandas as pd @@ -335,7 +335,7 @@ def load_data(data_path: Union[str, List[str]], **kwargs: Any): telemetry_start = 0 telemetry_end = int(max(usage_map_end.values()) - t0) return WorkloadResult( - jobs = jobs, + jobs=jobs, telemetry_start=telemetry_start, telemetry_end=telemetry_end, # gcloud dataset timestamps are already relative, and it doesn't list a start exact date. start_date=datetime.fromisoformat("2011-05-02T00:00:00Z"), diff --git a/raps/dataloaders/lassen.py b/raps/dataloaders/lassen.py index a0d1ca7..c9c8378 100644 --- a/raps/dataloaders/lassen.py +++ b/raps/dataloaders/lassen.py @@ -38,7 +38,7 @@ from tqdm import tqdm from datetime import timedelta from ..job import job_dict, Job -from ..utils import power_to_utilization, next_arrival_byconfkwargs, parse_td, WorkloadResult +from ..utils import power_to_utilization, parse_td, WorkloadResult def load_data(path, **kwargs): @@ -60,7 +60,6 @@ def load_data_from_df(allocation_df, node_df, step_df, **kwargs): config = kwargs.get('config') jid = kwargs.get('jid', '*') validate = kwargs.get('validate') - arrival = kwargs.get('arrival') verbose = kwargs.get('verbose') fastforward = kwargs.get('fastforward') # int in seconds diff --git a/raps/dataloaders/marconi100.py b/raps/dataloaders/marconi100.py index fd08b91..961eca3 100644 --- a/raps/dataloaders/marconi100.py +++ b/raps/dataloaders/marconi100.py @@ -28,7 +28,7 @@ import pandas as pd from tqdm import tqdm from ..job import job_dict, Job -from ..utils import power_to_utilization, next_arrival_byconfkwargs, WorkloadResult +from ..utils import power_to_utilization, WorkloadResult def load_data(jobs_path, **kwargs): @@ -60,7 +60,6 @@ def load_data_from_df(jobs_df: pd.DataFrame, **kwargs): """ config = kwargs.get('config') # min_time = kwargs.get('min_time', None) # Unused - arrival = kwargs.get('arrival') validate = kwargs.get('validate') jid = kwargs.get('jid', '*') debug = kwargs.get('debug') @@ -228,7 +227,7 @@ def load_data_from_df(jobs_df: pd.DataFrame, **kwargs): jobs.append(job) return WorkloadResult( - jobs = jobs, + jobs=jobs, telemetry_start=telemetry_start, telemetry_end=telemetry_end, start_date=telemetry_start_timestamp, ) diff --git a/raps/dataloaders/mit_supercloud/loader.py b/raps/dataloaders/mit_supercloud/loader.py index 282f0b5..3642936 100644 --- a/raps/dataloaders/mit_supercloud/loader.py +++ b/raps/dataloaders/mit_supercloud/loader.py @@ -607,7 +607,7 @@ def load_data(local_dataset_path, **kwargs): print(f"- {reason}: {count}") return WorkloadResult( - jobs = jobs_list, + jobs=jobs_list, telemetry_start=0, telemetry_end=int(end_ts - start_ts), start_date=datetime.fromtimestamp(start_ts, timezone.utc), ) diff --git a/raps/engine.py b/raps/engine.py index 3760254..dbc5e76 100644 --- a/raps/engine.py +++ b/raps/engine.py @@ -40,9 +40,9 @@ from raps.downtime import Downtime from raps.weather import Weather from raps.sim_config import SimConfig from raps.system_config import SystemConfig - from bisect import bisect_right + @dataclasses.dataclass class TickData: """ Represents the state output from the simulation each tick """ @@ -289,7 +289,7 @@ class Engine: wl = Workload(sim_config_args, system_config_dict) workload_result = wl.generate_jobs() td = Telemetry(**sim_config_dict) - + jobs = workload_result.jobs # TODO refactor how stat/end/fastforward/time work @@ -547,7 +547,10 @@ class Engine: job.running_time = self.current_timestep - job.start_time if job.current_state != JobState.RUNNING: - raise ValueError(f"Job {job.id} is in running list, but state is not RUNNING: job.state == {job.current_state}") + raise ValueError( + f"Job {job.id} is in running list, " + + "but state is not RUNNING: job.state == {job.current_state}" + ) else: # if job.state == JobState.RUNNING: # Error checks if job.running_time > job.time_limit and job.end_time is not None: diff --git a/raps/run_sim.py b/raps/run_sim.py index b41eb80..f1834c7 100644 --- a/raps/run_sim.py +++ b/raps/run_sim.py @@ -82,8 +82,8 @@ def run_sim(sim_config: SimConfig): if out: out.mkdir(parents=True) engine.telemetry.save_snapshot( - dest = str(out), - result = workload_result, + dest=str(out), + result=workload_result, args=sim_config, ) jobs = workload_result.jobs @@ -242,7 +242,8 @@ def run_multi_part_sim_add_parser(subparsers: SubParsers): def run_multi_part_sim(sim_config: SimConfig): - multi_engine, workload_results, timestep_start, timestep_end, time_delta = MultiPartEngine.from_sim_config(sim_config) + multi_engine, workload_results, timestep_start, timestep_end, time_delta = \ + MultiPartEngine.from_sim_config(sim_config) # TODO: The mit_supercloud dataloader seems to be outputting the wrong timesteps? mit_supercloud # is the only multi-partition system with replay, so just manually overriding the timesteps here diff --git a/raps/telemetry.py b/raps/telemetry.py index 8ec4c9a..93d7992 100644 --- a/raps/telemetry.py +++ b/raps/telemetry.py @@ -7,7 +7,6 @@ The module defines a `Telemetry` class for managing telemetry data and several helper functions for data encryption and conversion between node name and index formats. """ from typing import Literal -import sys import random from pathlib import Path # import json @@ -16,7 +15,6 @@ from types import ModuleType import importlib import numpy as np import pandas as pd -from tqdm import tqdm from pydantic import BaseModel, model_validator # from rich.progress import track @@ -30,8 +28,7 @@ from raps.plotting import ( plot_network_histogram ) from raps.utils import ( - next_arrival_byconfargs, convert_to_time_unit, pydantic_add_args, SubParsers, ExpandedPath, - WorkloadResult, + next_arrival_byconfargs, pydantic_add_args, SubParsers, ExpandedPath, WorkloadResult, ) @@ -89,17 +86,17 @@ class Telemetry: print(f"WARNING: Failed to load dataloader: {e}") self.dataloader = None - def save_snapshot(self, *, dest: str, result: WorkloadResult, args: SimConfig|TelemetryArgs): + def save_snapshot(self, *, dest: str, result: WorkloadResult, args: SimConfig | TelemetryArgs): """Saves a snapshot of the jobs to a compressed file. """ np.savez_compressed(dest, - jobs=[vars(j) for j in result.jobs], - telemetry_start=result.telemetry_start, - telemetry_end=result.telemetry_end, - start_date=result.start_date, - args=args, - ) - - def load_snapshot(self, snapshot: str | Path) -> tuple[WorkloadResult, SimConfig|TelemetryArgs]: + jobs=[vars(j) for j in result.jobs], + telemetry_start=result.telemetry_start, + telemetry_end=result.telemetry_end, + start_date=result.start_date, + args=args, + ) + + def load_snapshot(self, snapshot: str | Path) -> tuple[WorkloadResult, SimConfig | TelemetryArgs]: """Reads a snapshot from a compressed file :param str snapshot: Filename @@ -210,7 +207,7 @@ class Telemetry: print(f"Loading {file}") result, args_from_file = self.load_snapshot(file) print(f"File was generated with: --system {args_from_file.system}") - else: # custom data loader + else: # custom data loader result = self.load_data(files) self.update_jobs(result.jobs) return result @@ -259,7 +256,7 @@ def run_telemetry(args: TelemetryArgs): timestep_start, timestep_end = result.telemetry_start, result.telemetry_end if args.output: - td.save_snapshot(dest = args.output, result = result, args = args) + td.save_snapshot(dest=args.output, result=result, args=args) timesteps = timestep_end - timestep_start diff --git a/raps/utils.py b/raps/utils.py index f67c533..0e636f7 100644 --- a/raps/utils.py +++ b/raps/utils.py @@ -798,7 +798,7 @@ class WorkloadResult(BaseModel): # TODO: It might make more sense to make start_timestep/end_timestep always unix time, then we # wouldn't need this extra start_date field. start_date: AwareDatetime - + model_config = ConfigDict( - arbitrary_types_allowed = True, + arbitrary_types_allowed=True, ) diff --git a/raps/workload.py b/raps/workload.py index 015678f..cc1ba09 100644 --- a/raps/workload.py +++ b/raps/workload.py @@ -68,7 +68,7 @@ class Workload: # The respective funciton of this class is called. jobs = getattr(self, self.args.workload)(args=self.args) return WorkloadResult( - jobs = jobs, + jobs=jobs, telemetry_start=0, telemetry_end=self.args.time, start_date=self.args.start, ) -- GitLab From d67a220ea37eab39e7911b3b1ddab6f5156a2282 Mon Sep 17 00:00:00 2001 From: Jesse Hines Date: Thu, 4 Sep 2025 14:56:03 -0400 Subject: [PATCH 280/388] Renaming --- raps/dataloaders/adastraMI250.py | 4 ++-- raps/dataloaders/bluewaters.py | 4 ++-- raps/dataloaders/frontier.py | 6 +++--- raps/dataloaders/fugaku.py | 4 ++-- raps/dataloaders/gcloudv2.py | 4 ++-- raps/dataloaders/kestrel.py | 4 ++-- raps/dataloaders/lassen.py | 4 ++-- raps/dataloaders/marconi100.py | 4 ++-- raps/dataloaders/mit_supercloud/loader.py | 4 ++-- raps/engine.py | 14 +++++++------- raps/multi_part_engine.py | 10 +++++----- raps/run_sim.py | 8 ++++---- raps/telemetry.py | 12 ++++++------ raps/utils.py | 2 +- raps/workload.py | 4 ++-- tests/systems/test_engine.py | 8 ++++---- 16 files changed, 48 insertions(+), 48 deletions(-) diff --git a/raps/dataloaders/adastraMI250.py b/raps/dataloaders/adastraMI250.py index 1ce7689..6eec26e 100644 --- a/raps/dataloaders/adastraMI250.py +++ b/raps/dataloaders/adastraMI250.py @@ -24,7 +24,7 @@ import pandas as pd from tqdm import tqdm from ..job import job_dict, Job -from ..utils import WorkloadResult +from ..utils import WorkloadData def load_data(jobs_path, **kwargs): @@ -200,7 +200,7 @@ def load_data_from_df(jobs_df: pd.DataFrame, **kwargs): count_jobs_notOK += 1 print("jobs not added: ", count_jobs_notOK) - return WorkloadResult( + return WorkloadData( jobs=jobs, telemetry_start=telemetry_start_time, telemetry_end=telemetry_end_time, start_date=telemetry_start_timestamp.tz_localize("UTC"), diff --git a/raps/dataloaders/bluewaters.py b/raps/dataloaders/bluewaters.py index c2d328f..272db1c 100644 --- a/raps/dataloaders/bluewaters.py +++ b/raps/dataloaders/bluewaters.py @@ -34,7 +34,7 @@ import pandas as pd from pathlib import Path from datetime import datetime, timezone from raps.telemetry import Job, job_dict -from raps.utils import WorkloadResult +from raps.utils import WorkloadData def throughput_traces(total_tx, total_rx, intervals): @@ -330,7 +330,7 @@ def load_data(local_dataset_path, **kwargs): telemetry_start = 0 telemetry_end = max((j.end_time for j in jobs), default=0) - return WorkloadResult( + return WorkloadData( jobs=jobs, telemetry_start=telemetry_start, telemetry_end=telemetry_end, start_date=datetime.fromtimestamp(t0, timezone.utc), diff --git a/raps/dataloaders/frontier.py b/raps/dataloaders/frontier.py index 9ef6b72..0e1bdcd 100644 --- a/raps/dataloaders/frontier.py +++ b/raps/dataloaders/frontier.py @@ -16,7 +16,7 @@ import pandas as pd from tqdm import tqdm from ..job import job_dict, Job -from ..utils import power_to_utilization, encrypt, WorkloadResult +from ..utils import power_to_utilization, encrypt, WorkloadData def aging_boost(nnodes): @@ -317,7 +317,7 @@ def load_data_from_df(jobs_df: pd.DataFrame, jobprofile_df: pd.DataFrame, **kwar job = Job(job_info) jobs.append(job) - return WorkloadResult( + return WorkloadData( jobs=jobs, telemetry_start=telemetry_start, telemetry_end=telemetry_end, @@ -534,7 +534,7 @@ def load_live_data(**kwargs): job = Job(job_info) jobs.append(job) - return WorkloadResult( + return WorkloadData( jobs=jobs, telemetry_start=telemetry_start, telemetry_end=telemetry_end, diff --git a/raps/dataloaders/fugaku.py b/raps/dataloaders/fugaku.py index 70b6cb8..4ccc885 100644 --- a/raps/dataloaders/fugaku.py +++ b/raps/dataloaders/fugaku.py @@ -17,7 +17,7 @@ import pandas as pd from tqdm import tqdm from ..job import job_dict, Job -from ..utils import WorkloadResult +from ..utils import WorkloadData def load_data(path, **kwargs): @@ -161,7 +161,7 @@ def load_data_from_df(df, **kwargs): job = Job(job_info) job_list.append(job) - return WorkloadResult( + return WorkloadData( jobs=job_list, telemetry_start=telemetry_start, telemetry_end=telemetry_end, start_date=telemetry_start_timestamp, diff --git a/raps/dataloaders/gcloudv2.py b/raps/dataloaders/gcloudv2.py index 4dba4fb..6f05a87 100644 --- a/raps/dataloaders/gcloudv2.py +++ b/raps/dataloaders/gcloudv2.py @@ -8,7 +8,7 @@ import numpy as np import pandas as pd from raps.job import job_dict, Job -from raps.utils import WorkloadResult +from raps.utils import WorkloadData """ Official instructions are here: @@ -334,7 +334,7 @@ def load_data(data_path: Union[str, List[str]], **kwargs: Any): # Compute simulation span: start at t=0, end at the latest job finish telemetry_start = 0 telemetry_end = int(max(usage_map_end.values()) - t0) - return WorkloadResult( + return WorkloadData( jobs=jobs, telemetry_start=telemetry_start, telemetry_end=telemetry_end, # gcloud dataset timestamps are already relative, and it doesn't list a start exact date. diff --git a/raps/dataloaders/kestrel.py b/raps/dataloaders/kestrel.py index 04adaa8..c9efd70 100644 --- a/raps/dataloaders/kestrel.py +++ b/raps/dataloaders/kestrel.py @@ -6,7 +6,7 @@ import pandas as pd from tqdm import tqdm from ..job import job_dict, Job -from ..utils import power_to_utilization, next_arrival, WorkloadResult +from ..utils import power_to_utilization, next_arrival, WorkloadData def load_data(jobs_path, **kwargs): @@ -153,7 +153,7 @@ def load_data_from_df(jobs_df: pd.DataFrame, **kwargs): trace_quanta=trace_quanta) jobs.append(Job(job_info)) - return WorkloadResult( + return WorkloadData( jobs=jobs, telemetry_start=telemetry_start, telemetry_end=telemetry_end, start_date=telemetry_start_timestamp, diff --git a/raps/dataloaders/lassen.py b/raps/dataloaders/lassen.py index c9c8378..fd0e364 100644 --- a/raps/dataloaders/lassen.py +++ b/raps/dataloaders/lassen.py @@ -38,7 +38,7 @@ from tqdm import tqdm from datetime import timedelta from ..job import job_dict, Job -from ..utils import power_to_utilization, parse_td, WorkloadResult +from ..utils import power_to_utilization, parse_td, WorkloadData def load_data(path, **kwargs): @@ -242,7 +242,7 @@ def load_data_from_df(allocation_df, node_df, step_df, **kwargs): job = Job(job_info) job_list.append(job) - return WorkloadResult( + return WorkloadData( jobs=job_list, telemetry_start=telemetry_start_time, telemetry_end=telemetry_end_time, start_date=telemetry_start_timestamp, diff --git a/raps/dataloaders/marconi100.py b/raps/dataloaders/marconi100.py index 961eca3..a10e1e8 100644 --- a/raps/dataloaders/marconi100.py +++ b/raps/dataloaders/marconi100.py @@ -28,7 +28,7 @@ import pandas as pd from tqdm import tqdm from ..job import job_dict, Job -from ..utils import power_to_utilization, WorkloadResult +from ..utils import power_to_utilization, WorkloadData def load_data(jobs_path, **kwargs): @@ -226,7 +226,7 @@ def load_data_from_df(jobs_df: pd.DataFrame, **kwargs): job = Job(job_info) jobs.append(job) - return WorkloadResult( + return WorkloadData( jobs=jobs, telemetry_start=telemetry_start, telemetry_end=telemetry_end, start_date=telemetry_start_timestamp, diff --git a/raps/dataloaders/mit_supercloud/loader.py b/raps/dataloaders/mit_supercloud/loader.py index 3642936..e3103ba 100644 --- a/raps/dataloaders/mit_supercloud/loader.py +++ b/raps/dataloaders/mit_supercloud/loader.py @@ -118,7 +118,7 @@ from typing import Dict, Union, Optional from collections import Counter from datetime import datetime, timezone from raps.job import job_dict, Job -from raps.utils import summarize_ranges, WorkloadResult +from raps.utils import summarize_ranges, WorkloadData from .utils import proc_cpu_series, proc_gpu_series, to_epoch from .utils import DEFAULT_START, DEFAULT_END @@ -606,7 +606,7 @@ def load_data(local_dataset_path, **kwargs): for reason, count in skip_counts.items(): print(f"- {reason}: {count}") - return WorkloadResult( + return WorkloadData( jobs=jobs_list, telemetry_start=0, telemetry_end=int(end_ts - start_ts), start_date=datetime.fromtimestamp(start_ts, timezone.utc), diff --git a/raps/engine.py b/raps/engine.py index dbc5e76..3ef0d36 100644 --- a/raps/engine.py +++ b/raps/engine.py @@ -265,7 +265,7 @@ class Engine: if sim_config.live and not sim_config.replay: td = Telemetry(**sim_config_dict) - workload_result = td.load_from_live_system() + workload_data = td.load_from_live_system() elif sim_config.replay: # TODO: this will have issues if running separate systems or custom systems partition_short = partition.split("/")[-1] if partition else None @@ -284,20 +284,20 @@ class Engine: else: replay_files = sim_config.replay - workload_result = td.load_from_files(replay_files) + workload_data = td.load_from_files(replay_files) else: # Synthetic jobs wl = Workload(sim_config_args, system_config_dict) - workload_result = wl.generate_jobs() + workload_data = wl.generate_jobs() td = Telemetry(**sim_config_dict) - jobs = workload_result.jobs + jobs = workload_data.jobs # TODO refactor how stat/end/fastforward/time work if sim_config.fastforward is not None: - workload_result.telemetry_start = workload_result.telemetry_start + sim_config.fastforward + workload_data.telemetry_start = workload_data.telemetry_start + sim_config.fastforward if sim_config.time is not None: - workload_result.telemetry_end = workload_result.telemetry_end + sim_config.time + workload_data.telemetry_end = workload_data.telemetry_end + sim_config.time if sim_config.time_delta is not None: time_delta = sim_config.time_delta @@ -330,7 +330,7 @@ class Engine: system_config=system_config, ) - return engine, workload_result, time_delta + return engine, workload_data, time_delta def add_running_jobs_to_queue(self, jobs_to_submit: List): """ diff --git a/raps/multi_part_engine.py b/raps/multi_part_engine.py index bebf47b..f211b85 100644 --- a/raps/multi_part_engine.py +++ b/raps/multi_part_engine.py @@ -1,7 +1,7 @@ from collections.abc import Iterable from raps.engine import Engine, TickData from raps.sim_config import SimConfig -from raps.utils import WorkloadResult +from raps.utils import WorkloadData class MultiPartEngine: @@ -18,18 +18,18 @@ class MultiPartEngine: if len(root_systems) > 1: raise ValueError("Replay for multi-system runs is not supported") - workloads_by_partition: dict[str, WorkloadResult] = {} + workloads_by_partition: dict[str, WorkloadData] = {} engines: dict[str, Engine] = {} timestep_start, timestep_end, time_delta = 0, 0, 0 for partition in sim_config.system_configs: name = partition.system_name - engine, workload_result, time_delta = Engine.from_sim_config( + engine, workload_data, time_delta = Engine.from_sim_config( sim_config, partition=name, ) - for job in workload_result.jobs: + for job in workload_data.jobs: job.partition = name - workloads_by_partition[name] = workload_result + workloads_by_partition[name] = workload_data engines[name] = engine total_initial_jobs = sum(len(j.jobs) for j in workloads_by_partition.values()) for engine in engines.values(): diff --git a/raps/run_sim.py b/raps/run_sim.py index f1834c7..ceb80a9 100644 --- a/raps/run_sim.py +++ b/raps/run_sim.py @@ -76,18 +76,18 @@ def run_sim(sim_config: SimConfig): print("Use run-multi-part to run multi-partition simulations") sys.exit(1) - engine, workload_result, time_delta = Engine.from_sim_config(sim_config) + engine, workload_data, time_delta = Engine.from_sim_config(sim_config) out = sim_config.output if out: out.mkdir(parents=True) engine.telemetry.save_snapshot( dest=str(out), - result=workload_result, + result=workload_data, args=sim_config, ) - jobs = workload_result.jobs - timestep_start, timestep_end = workload_result.telemetry_start, workload_result.telemetry_end + jobs = workload_data.jobs + timestep_start, timestep_end = workload_data.telemetry_start, workload_data.telemetry_end total_timesteps = timestep_end - timestep_start downscale = sim_config.downscale diff --git a/raps/telemetry.py b/raps/telemetry.py index 93d7992..d82a6c5 100644 --- a/raps/telemetry.py +++ b/raps/telemetry.py @@ -28,7 +28,7 @@ from raps.plotting import ( plot_network_histogram ) from raps.utils import ( - next_arrival_byconfargs, pydantic_add_args, SubParsers, ExpandedPath, WorkloadResult, + next_arrival_byconfargs, pydantic_add_args, SubParsers, ExpandedPath, WorkloadData, ) @@ -86,7 +86,7 @@ class Telemetry: print(f"WARNING: Failed to load dataloader: {e}") self.dataloader = None - def save_snapshot(self, *, dest: str, result: WorkloadResult, args: SimConfig | TelemetryArgs): + def save_snapshot(self, *, dest: str, result: WorkloadData, args: SimConfig | TelemetryArgs): """Saves a snapshot of the jobs to a compressed file. """ np.savez_compressed(dest, jobs=[vars(j) for j in result.jobs], @@ -96,7 +96,7 @@ class Telemetry: args=args, ) - def load_snapshot(self, snapshot: str | Path) -> tuple[WorkloadResult, SimConfig | TelemetryArgs]: + def load_snapshot(self, snapshot: str | Path) -> tuple[WorkloadData, SimConfig | TelemetryArgs]: """Reads a snapshot from a compressed file :param str snapshot: Filename @@ -113,7 +113,7 @@ class Telemetry: start_date = data['start_date'].item() args = data['args'].item() - result = WorkloadResult( + result = WorkloadData( jobs=jobs, telemetry_start=telemetry_start, telemetry_end=telemetry_end, start_date=start_date, @@ -193,11 +193,11 @@ class Telemetry: assert self.dataloader return self.dataloader.cdu_pos(index, config=self.config) - def load_from_live_system(self) -> WorkloadResult: + def load_from_live_system(self) -> WorkloadData: result = self.load_live_data() return result - def load_from_files(self, files) -> WorkloadResult: + def load_from_files(self, files) -> WorkloadData: """ Load all files as combined jobs """ assert len(files) >= 1 files = [Path(f) for f in files] diff --git a/raps/utils.py b/raps/utils.py index 0e636f7..323ac8a 100644 --- a/raps/utils.py +++ b/raps/utils.py @@ -711,7 +711,7 @@ def yaml_dump(data): ) -class WorkloadResult(BaseModel): +class WorkloadData(BaseModel): """ Represents a workload, a list of jobs with some metadata. Returned by dataloaders load_data() function, and by Workload.generate_jobs(). diff --git a/raps/workload.py b/raps/workload.py index cc1ba09..338976d 100644 --- a/raps/workload.py +++ b/raps/workload.py @@ -31,7 +31,7 @@ from raps.utils import ( next_arrival_byconfargs, truncated_weibull, truncated_weibull_float, - WorkloadResult, + WorkloadData, ) import math import random @@ -67,7 +67,7 @@ class Workload: # This function calls the job generation function as specified by the workload keyword. # The respective funciton of this class is called. jobs = getattr(self, self.args.workload)(args=self.args) - return WorkloadResult( + return WorkloadData( jobs=jobs, telemetry_start=0, telemetry_end=self.args.time, start_date=self.args.start, diff --git a/tests/systems/test_engine.py b/tests/systems/test_engine.py index 4d57752..e483b18 100644 --- a/tests/systems/test_engine.py +++ b/tests/systems/test_engine.py @@ -22,10 +22,10 @@ def test_engine(system, system_config, sim_output): "system": system, "time": "2m", }) - engine, workload_result, time_delta = Engine.from_sim_config(sim_config) - jobs = workload_result.jobs - timestep_start = workload_result.telemetry_start - timestep_end = workload_result.telemetry_end + engine, workload_data, time_delta = Engine.from_sim_config(sim_config) + jobs = workload_data.jobs + timestep_start = workload_data.telemetry_start + timestep_end = workload_data.telemetry_end ticks = list(engine.run_simulation(jobs, timestep_start, timestep_end, time_delta)) assert len(ticks) == 120 -- GitLab From a38a3ad4be5947df595d4cde68a3f356834fb612 Mon Sep 17 00:00:00 2001 From: Jesse Hines Date: Thu, 4 Sep 2025 15:10:42 -0400 Subject: [PATCH 281/388] Allow multiple npz files --- raps/telemetry.py | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/raps/telemetry.py b/raps/telemetry.py index d82a6c5..1b5c256 100644 --- a/raps/telemetry.py +++ b/raps/telemetry.py @@ -203,14 +203,22 @@ class Telemetry: files = [Path(f) for f in files] if str(files[0]).endswith(".npz"): - file = files[0] - print(f"Loading {file}") - result, args_from_file = self.load_snapshot(file) - print(f"File was generated with: --system {args_from_file.system}") + data: WorkloadData | None = None + for file in files: + print(f"Loading {file}") + new_data, args_from_file = self.load_snapshot(file) + print(f"File was generated with: --system {args_from_file.system}") + if not data: + data = new_data + else: + data.jobs.extend(new_data.jobs) + data.telemetry_start = min(data.telemetry_start, new_data.telemetry_start) + data.telemetry_end = min(data.telemetry_end, new_data.telemetry_end) + data.start_date = min(data.start_date, new_data.start_date) else: # custom data loader - result = self.load_data(files) - self.update_jobs(result.jobs) - return result + data = self.load_data(files) + self.update_jobs(data.jobs) + return data def update_jobs(self, jobs: list[Job]): """ Updates jobs with new scale or random start times """ -- GitLab From b84a805c43d8536eb5d910b97a66de71533485b7 Mon Sep 17 00:00:00 2001 From: Jesse Hines Date: Thu, 4 Sep 2025 15:36:27 -0400 Subject: [PATCH 282/388] Fix bug --- raps/engine.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/raps/engine.py b/raps/engine.py index 3ef0d36..25a9b55 100644 --- a/raps/engine.py +++ b/raps/engine.py @@ -297,7 +297,7 @@ class Engine: workload_data.telemetry_start = workload_data.telemetry_start + sim_config.fastforward if sim_config.time is not None: - workload_data.telemetry_end = workload_data.telemetry_end + sim_config.time + workload_data.telemetry_end = workload_data.telemetry_start + sim_config.time if sim_config.time_delta is not None: time_delta = sim_config.time_delta -- GitLab From e1ad247aa24fd587d91a2d2100f67d5a56f1e5ca Mon Sep 17 00:00:00 2001 From: "Maiterth, Matthias" Date: Thu, 4 Sep 2025 20:46:28 +0000 Subject: [PATCH 283/388] Rebase: Fix bluewaters dataloader working to demonstrate slowdown and refactor network --- README.md | 27 ++ config/bluewaters.yaml | 4 +- experiments/bluewaters.yaml | 6 + experiments/lassen.yaml | 2 +- experiments/mit.yaml | 2 +- raps/dataloaders/bluewaters.py | 68 +++-- raps/engine.py | 20 -- raps/job.py | 47 +++- raps/network.py | 498 --------------------------------- raps/network/__init__.py | 126 +++++++++ raps/network/base.py | 136 +++++++++ raps/network/dragonfly.py | 75 +++++ raps/network/fat_tree.py | 59 ++++ raps/network/torus3d.py | 152 ++++++++++ raps/sim_config.py | 3 + raps/system_config.py | 4 +- 16 files changed, 669 insertions(+), 560 deletions(-) create mode 100644 experiments/bluewaters.yaml delete mode 100644 raps/network.py create mode 100644 raps/network/__init__.py create mode 100644 raps/network/base.py create mode 100644 raps/network/dragonfly.py create mode 100644 raps/network/fat_tree.py create mode 100644 raps/network/torus3d.py diff --git a/README.md b/README.md index a561278..eeb5a5d 100644 --- a/README.md +++ b/README.md @@ -169,6 +169,33 @@ See instructions in [server/README.md](https://code.ornl.gov/exadigit/simulation See instructions in [dashboard/README.md](https://code.ornl.gov/exadigit/simulation-dashboard) +## Running Tests + +RAPS uses [pytest](https://docs.pytest.org/) for its test suite. +Before running tests, ensure that you have a valid data directory available (e.g., `/opt/data`) and set the environment variable `RAPS_DATA_DIR` to point to it. + +### Run all tests +```bash +RAPS_DATA_DIR=/opt/data pytest -n auto -x +``` + +By default, tests are parallelized with `pytest-xdist` (`-n auto`) to speed up execution. +The `-x` flag stops execution after the first failure. Add `-v` to run in verbose mode. + +### Run only network-related tests + +```bash +RAPS_DATA_DIR=/opt/data pytest -n auto -x -m network +``` + +See `pytest.ini` for the different options for `-m`. + +### Run a specific test file + +```bash +RAPS_DATA_DIR=/opt/data pytest tests/systems/test_engine.py +``` + ### Contributing Code Install pre-commit hooks as set by the project: diff --git a/config/bluewaters.yaml b/config/bluewaters.yaml index 90be71d..989ca78 100644 --- a/config/bluewaters.yaml +++ b/config/bluewaters.yaml @@ -49,7 +49,9 @@ scheduler: NODE_FAIL: 0.01 network: topology: torus3d - network_max_bw: 9600000000.0 + #topology: capacity + #network_max_bw: 9.6E9 + network_max_bw: 1E7 torus_x: 24 torus_y: 24 torus_z: 24 diff --git a/experiments/bluewaters.yaml b/experiments/bluewaters.yaml new file mode 100644 index 0000000..80ab129 --- /dev/null +++ b/experiments/bluewaters.yaml @@ -0,0 +1,6 @@ +system: bluewaters +replay: + - /opt/data/bluewaters +start: "20170328" +simulate_network: True +filter: "traffic > 1e8" diff --git a/experiments/lassen.yaml b/experiments/lassen.yaml index 5434a1b..7ee04be 100644 --- a/experiments/lassen.yaml +++ b/experiments/lassen.yaml @@ -1,6 +1,6 @@ system: lassen replay: - - ~/data/lassen/Lassen-Supercomputer-Job-Dataset + - /opt/data/lassen/Lassen-Supercomputer-Job-Dataset policy: fcfs backfill: firstfit fastforward: 365d diff --git a/experiments/mit.yaml b/experiments/mit.yaml index bc718e4..77815d4 100644 --- a/experiments/mit.yaml +++ b/experiments/mit.yaml @@ -1,6 +1,6 @@ system: mit_supercloud partitions: ["mit_supercloud/part-cpu", "mit_supercloud/part-gpu"] replay: - - ~/data/mit/202201 + - /opt/data/mit_supercloud start: 2021-05-21T13:00 end: 2021-05-21T14:00 diff --git a/raps/dataloaders/bluewaters.py b/raps/dataloaders/bluewaters.py index 46fa462..e26c51f 100644 --- a/raps/dataloaders/bluewaters.py +++ b/raps/dataloaders/bluewaters.py @@ -17,7 +17,17 @@ To download the necessary datasets: 2. /node_metrics/cray_system_sampler - we are using the file 20170328.tgz (485MB) - Another dataset we plan to use (but not currently): + In order to speed up data loading, we have downsized these files to just + four columns using the following code: + + import csv + with open("20170328", "r") as infile, open("output.csv", "w", newline="") as outfile: + reader = csv.reader(infile, skipinitialspace=True) + writer = csv.writer(outfile) + for row in reader: + writer.writerow([row[0], row[1], row[15], row[16]]) + + Another dataset we plan to use (but not currently using) is: 3. Monet - Blue Waters Network Dataset (140GB) - https://databank.illinois.edu/datasets/IDB-2921318 @@ -32,6 +42,7 @@ import math import re import pandas as pd from pathlib import Path +from pprint import pprint from raps.telemetry import Job, job_dict @@ -62,20 +73,10 @@ def build_sampler_df(root, day, nodes, tmin, tmax, tx_idx, rx_idx, chunksize=Non df = df[df["nid"].isin(nodes)] if df.empty: return None - # sort & compute deltas per node + # sort values (optional, for consistency) df = df.sort_values(["nid", "ts"]) - df["ts_prev"] = df.groupby("nid")["ts"].shift(1) - df["tx_prev"] = df.groupby("nid")["tx"].shift(1) - df["rx_prev"] = df.groupby("nid")["rx"].shift(1) - # positive deltas only - df["dtx"] = df["tx"] - df["tx_prev"] - df["drx"] = df["rx"] - df["rx_prev"] - df = df[(df["dtx"] > 0) | (df["drx"] > 0)] - if df.empty: - return None - # mid-interval timestamp for window inclusion - df["mid_ts"] = 0.5 * (df["ts"] + df["ts_prev"]) - df = df[["nid", "mid_ts", "dtx", "drx"]].dropna() + # keep raw values + df = df[["nid", "ts", "tx", "rx"]].dropna() return df for fp in files: @@ -180,6 +181,8 @@ def load_data(local_dataset_path, **kwargs): root = Path(local_dataset_path[0]) day = kwargs.get("start") fp = root / "torque_logs" / day + filter_str = kwargs.get("filter") + debug = kwargs.get("debug") jobs_raw = [] @@ -254,8 +257,10 @@ def load_data(local_dataset_path, **kwargs): global_tmax = max(abs_ends) # Confirm the correct 0-based indices for ipogif0_* from the HEADER - tx_idx = 15 # kwargs.get("sampler_tx_idx", 15) # placeholder; pass real index via kwargs - rx_idx = 16 # kwargs.get("sampler_rx_idx", 16) # placeholder; pass real index via kwargs + # tx_idx = 15 # for the original file + # rx_idx = 16 + tx_idx = 2 # for a downselected file with just four columns: [timestamp, node, tx, rx] - for faster loading + rx_idx = 3 # Build once (chunk if files are huge) sampler_df = build_sampler_df(root, day, all_nodes, global_tmin, global_tmax, tx_idx, rx_idx, chunksize=None) @@ -277,16 +282,23 @@ def load_data(local_dataset_path, **kwargs): # Filter by nodes, sum positive deltas dfj = sampler_df[sampler_df["nid"].isin(nodes)] - total_tx = int(dfj["dtx"].sum()) if not dfj.empty else 0 - total_rx = int(dfj["drx"].sum()) if not dfj.empty else 0 - # total_tx and total_rx are bytes per node + + # Print first 10 rows (node, tx, rx) + if debug: + print(dfj[["nid", "tx", "rx"]].head(10)) + + total_tx = int(dfj["tx"].sum()) if not dfj.empty else 0 + total_rx = int(dfj["rx"].sum()) if not dfj.empty else 0 nodes_required = r.get("nodes_required") + avg_tx_per_node = total_tx / nodes_required if nodes_required > 0 else 0 + avg_rx_per_node = total_rx / nodes_required if nodes_required > 0 else 0 + # Smear totals evenly across bins (simple first pass) duration = max(1, et_abs - st_abs) samples = max(1, math.ceil(duration / bin_s)) - ntx, nrx = throughput_traces(total_tx, total_rx, samples) # bytes per bin + ntx, nrx = throughput_traces(avg_tx_per_node, avg_rx_per_node, samples) job_d = job_dict( nodes_required=nodes_required, @@ -312,7 +324,16 @@ def load_data(local_dataset_path, **kwargs): trace_quanta=bin_s, trace_missing_values=False, ) - jobs.append(Job(job_d)) + + if filter_str: + traffic = (avg_tx_per_node + avg_rx_per_node) / 2. + keep_jobs = eval(filter_str) + print(job_d["id"], filter_str, traffic, keep_jobs) + else: + keep_jobs = True + + if keep_jobs: + jobs.append(Job(job_d)) # Normalize times so first start = 0 t0 = min((j.start_time for j in jobs), default=0) @@ -324,8 +345,9 @@ def load_data(local_dataset_path, **kwargs): j.trace_start_time -= t0 j.trace_end_time -= t0 - # pprint(jobs) + if debug: + pprint(jobs) + simulation_start = 0 simulation_end = max((j.end_time for j in jobs), default=0) - return jobs, simulation_start, simulation_end diff --git a/raps/engine.py b/raps/engine.py index 64bf218..d07b9c5 100644 --- a/raps/engine.py +++ b/raps/engine.py @@ -350,24 +350,16 @@ class Engine: Adds running jobs to the queue, and removes them from the jobs_to_submit jobs_to_submit still holds the jobs that need be submitted in the future. """ - if self.debug: - print(f"[DEBUG] add_running_jobs_to_queue: current_time={self.current_timestep}") # Build a list of jobs whose start_time is <= current_time. eligible_jobs = [job for job in jobs_to_submit if job.start_time is not None and job.start_time < self.current_timestep] - if self.debug: - print(f"[DEBUG] add_running_jobs_to_queue: Found {len(eligible_jobs)} eligible jobs.") # Remove those jobs from jobs_to_submit: jobs_to_submit[:] = [job for job in jobs_to_submit if job.start_time is None or job.start_time >= self.current_timestep] - if self.debug: - print(f"[DEBUG] add_running_jobs_to_queue: {len(jobs_to_submit)} jobs remaining in jobs_to_submit.") # Convert them to Job instances and build list of eligible jobs. self.queue += eligible_jobs - if self.debug: - print(f"[DEBUG] add_running_jobs_to_queue: self.queue now has {len(self.queue)} jobs.") def add_eligible_jobs_to_queue(self, jobs_to_submit: List): """ @@ -379,20 +371,12 @@ class Engine: - true if new jobs are present - false if no new jobs are present """ - if self.debug: - print(f"[DEBUG] add_eligible_jobs_to_queue: current_time={self.current_timestep}") # Build a list of jobs whose submit_time is <= current_time. eligible_jobs = [job for job in jobs_to_submit if job.submit_time <= self.current_timestep] - if self.debug: - print(f"[DEBUG] add_eligible_jobs_to_queue: Found {len(eligible_jobs)} eligible jobs.") # Remove those jobs from jobs_to_submit: jobs_to_submit[:] = [job for job in jobs_to_submit if job.submit_time > self.current_timestep] - if self.debug: - print(f"[DEBUG] add_eligible_jobs_to_queue: {len(jobs_to_submit)} jobs remaining in jobs_to_submit.") # Convert them to Job instances and build list of eligible jobs. self.queue += eligible_jobs - if self.debug: - print(f"[DEBUG] add_eligible_jobs_to_queue: self.queue now has {len(self.queue)} jobs.") if eligible_jobs != []: return True else: @@ -544,14 +528,10 @@ class Engine: net_utils = [] net_tx_list = [] net_rx_list = [] - if self.debug: - print(f"Current Time: {self.current_timestep}") slowdown_factors = [] for job in self.running: - if self.debug: - print(f"JobID: {job.id}") job.running_time = self.current_timestep - job.start_time diff --git a/raps/job.py b/raps/job.py index ecb7fd3..05c455e 100644 --- a/raps/job.py +++ b/raps/job.py @@ -103,26 +103,45 @@ def dilate_trace(trace, factor): Scale a trace in the time dimension by the given factor. Parameters: - - trace (list of float): the original trace values. - - factor (float): the dilation factor; >1 to slow down (stretch) and <1 to speed up (compress). + - trace: list/tuple/np.ndarray of floats OR a single numeric scalar. + - factor (float): >1 to slow down (stretch in time), <1 to speed up. Returns: - - list of float: the dilated trace. + - list of float for sequence inputs, or numeric for scalar inputs. """ - if trace is None or (isinstance(trace, (list, np.ndarray)) and len(trace) == 0): + if trace is None: return trace - # Traces can be list/np.array or single float values. - # In case of a single float, we adjust the value directly as it is applied to each timestep - if isinstance(trace, (np.float64, float)): - return trace / factor # Single value - original_length = len(trace) - # Compute the new length (rounding to the nearest integer) - new_length = int(np.round(original_length * factor)) - # Create arrays for the old and new indices + + if factor is None: + raise ValueError("factor must be provided") + if factor == 0: + raise ValueError("factor must be non-zero") + + # Treat any numeric scalar (int/float/np.number) as a scalar trace + if isinstance(trace, (int, float, np.integer, np.floating, np.number)): + # Keep total "area" the same when stretching/compressing in time: + return trace / factor + + # Handle common sequence types directly + if isinstance(trace, (list, tuple, np.ndarray)): + arr = np.asarray(trace, dtype=float) + else: + # Last-resort: try coercion (e.g., pandas Series) + arr = np.asarray(trace, dtype=float) + + if arr.size == 0: + # empty sequence: nothing to do + return [] if not isinstance(trace, np.ndarray) else arr + + original_length = arr.size + # at least 1 sample after dilation + new_length = max(1, int(np.round(original_length * float(factor)))) + + # If original_length == 1, interpolation just repeats the value old_indices = np.linspace(0, original_length - 1, num=original_length) new_indices = np.linspace(0, original_length - 1, num=new_length) - # Use linear interpolation to compute the new trace values - new_trace = np.interp(new_indices, old_indices, trace).tolist() + + new_trace = np.interp(new_indices, old_indices, arr).tolist() return new_trace diff --git a/raps/network.py b/raps/network.py deleted file mode 100644 index b4340e4..0000000 --- a/raps/network.py +++ /dev/null @@ -1,498 +0,0 @@ -import csv -import networkx as nx -from itertools import combinations -from raps.utils import get_current_utilization -from pathlib import Path - - -class NetworkModel: - """ """ - - def __init__(self, *, available_nodes, config): - self.topology = config.get("TOPOLOGY") - # if fat-tree, build the graph once - if self.topology == "fat-tree": - print("building fat-tree graph...") - self.fattree_k = config.get("FATTREE_K") - self.net_graph = build_fattree(self.fattree_k) - print(self.net_graph) - elif self.topology == "torus3d": - print("building torus3d graph...") - dims = (int(config["TORUS_X"]), int(config["TORUS_Y"]), int(config["TORUS_Z"])) - wrap = bool(config.get("TORUS_WRAP", True)) - link_bw = float(config.get("TORUS_LINK_BW", config.get("NETWORK_MAX_BW"))) - hpr = int(config.get("HOSTS_PER_ROUTER")) - routing = config.get("TORUS_ROUTING", "DOR_XYZ").upper() - coords_csv = config.get("NODE_COORDS_CSV") # optional - self.net_graph, self.torus_meta = build_torus3d( - dims=dims, wrap=wrap, link_bw=link_bw, hosts_per_router=hpr, routing=routing, coords_csv=coords_csv - ) - elif self.topology == "dragonfly": - print("building dragonfly graph...") - D = config["DRAGONFLY_D"] # groups - A = config["DRAGONFLY_A"] # routers per group - P = config["DRAGONFLY_P"] # hosts per router - self.net_graph = build_dragonfly(D, A, P) - print(self.net_graph) - - real_ids = available_nodes - real_ids.sort() - self.real_to_fat_idx = {rid: idx for idx, rid in enumerate(real_ids)} - # e.g. real_to_fat_idx[10] = 0, real_to_fat_idx[11] = 1, etc., up to 791 → 791 - self.max_link_bw = config.get("NETWORK_MAX_BW") - - def simulate_network_utilization(self, *, job, debug=False): - net_util = 0 - net_cong = 0 - net_tx = 0 - net_rx = 0 - # self.config.get('TRACE_QUANTA') # Why? What should this be? - max_throughput = self.max_link_bw * job.trace_quanta - - if job.nodes_required <= 1: - # single node, no network utilization or congestion. - pass - else: - - net_tx = get_current_utilization(job.ntx_trace, job) # Are these % or actual bytes? - net_rx = get_current_utilization(job.nrx_trace, job) - net_util = network_utilization(net_tx, net_rx, max_throughput) - - # Congestion depends on topology: - if self.topology == "fat-tree": - # Map integers to hostnames - host_list = [node_id_to_host_name(n, self.fattree_k) for n in job.scheduled_nodes] - loads = link_loads_for_job(self.net_graph, host_list, net_tx) # ? Only tx not rx or total net_util) - net_cong = worst_link_util(loads, max_throughput) - - if debug: - print(" fat-tree hosts:", host_list) - - elif self.topology == "dragonfly": - D = self.config["DRAGONFLY_D"] - A = self.config["DRAGONFLY_A"] - P = self.config["DRAGONFLY_P"] - - host_list = [] - for real_n in job.scheduled_nodes: - fat_idx = self.real_to_fat_idx[real_n] # contiguous in [0..(D*A*P−1)] - host_list.append(dragonfly_node_id_to_host_name(fat_idx, D, A, P)) - if debug: - print(" dragonfly hosts:", host_list) - # if len(host_list) <= 1: - # net_cong = 0.0 - # else: - loads = link_loads_for_job(self.net_graph, host_list, net_tx) # ? Only tx not rx or total net_util) - net_cong = worst_link_util(loads, max_throughput) - - else: # capacity model: simple α+β or normalized overload - net_cong = network_congestion(net_tx, net_rx, max_throughput) - - return net_util, net_cong, net_tx, net_rx, max_throughput - - -def apply_job_slowdown(*, job, max_throughput, net_util, net_cong, net_tx, net_rx, debug: bool = False): - # Get the maximum allowed bandwidth from the configuration. - if net_cong > 1: - if debug: - print(f"congested net_cong: {net_cong}, max_throughput: {max_throughput}") - print(f"length of {len(job.gpu_trace)} before dilation") - throughput = net_tx + net_rx - slowdown_factor = network_slowdown(throughput, max_throughput) - - if debug: - print("***", hasattr(job, "dilated"), throughput, max_throughput, slowdown_factor) - - # Only apply slowdown once per job to avoid compounding the effect. - if not job.dilated: - if debug: - print(f"Applying slowdown factor {slowdown_factor:.2f} to job {job.id} due to network congestion") - job.apply_dilation(slowdown_factor) - job.dilated = True - if debug: - print(f"length of {len(job.gpu_trace)} after dilation") - else: - slowdown_factor = 1 - job.slowdown_factor = slowdown_factor - - return slowdown_factor - - -def compute_system_network_stats(net_utils, net_tx_list, net_rx_list, slowdown_factors): - - # Compute network averages - n = len(net_utils) or 1 - avg_tx = sum(net_tx_list) / n - avg_rx = sum(net_rx_list) / n - avg_net = sum(net_utils) / n - # avg_slowdown_per_job = sum(slowdown_factors) / n - # self.avg_slowdown_history.append(avg_slowdown_per_job) - # max_slowdown_per_job = max(slowdown_factors) - # self.max_slowdown_history.append(max_slowdown_per_job) - - return avg_tx, avg_rx, avg_net - - -def network_congestion(tx, rx, max_throughput): - """ - Overload factor ≥0: average of send/recv NOT clamped. - >1.0 means you’re pushing above capacity. - """ - tx_util = float(tx) / max_throughput - rx_util = float(rx) / max_throughput - return (tx_util + rx_util) / 2.0 - - -def network_utilization(tx, rx, max_throughput): - """ - True utilization in [0,1]: average of send/recv clamped to 100%. - """ - tx_u = min(float(tx) / max_throughput, 1.0) - rx_u = min(float(rx) / max_throughput, 1.0) - return (tx_u + rx_u) / 2.0 - - -def network_slowdown(current_throughput, max_throughput): - """ - Calculate a slowdown factor based on current network bandwidth usage. - - If current_bw is within limits, the factor is 1.0 (no slowdown). - If current_bw exceeds max_bw, the factor is current_bw/max_bw. - """ - if current_throughput <= max_throughput: - return 1.0 - else: - return current_throughput / max_throughput - - -def build_fattree(k): - """ - Build a k-ary fat-tree: - - k pods - - each pod has k/2 edge switches, k/2 agg switches - - core layer has (k/2)^2 core switches - - each edge switch connects to k/2 hosts - Returns a NetworkX Graph where: - - hosts are named "h_{pod}_{edge}_{i}" - - edge switches "e_{pod}_{edge}" - - agg switches "a_{pod}_{agg}" - - core switches "c_{i}_{j}" - """ - G = nx.Graph() - # core - # num_core = (k//2)**2 # Unused! - for i in range(k // 2): - for j in range(k // 2): - core = f"c_{i}_{j}" - G.add_node(core, type="core") - # pods - for pod in range(k): - # agg switches - for agg in range(k // 2): - a = f"a_{pod}_{agg}" - G.add_node(a, type="agg") - # connect to all core switches in column agg - for i in range(k // 2): - core = f"c_{agg}_{i}" - G.add_edge(a, core) - # edge switches + hosts - for edge in range(k // 2): - e = f"e_{pod}_{edge}" - G.add_node(e, type="edge") - # connect edge→each agg in this pod - for agg in range(k // 2): - a = f"a_{pod}_{agg}" - G.add_edge(e, a) - # connect hosts - for h in range(k // 2): - host = f"h_{pod}_{edge}_{h}" - G.add_node(host, type="host") - G.add_edge(e, host) - return G - - -def all_to_all_paths(G, hosts): - """ - Given a list of host names, return shortest‐paths for every unordered pair. - """ - paths = [] - for i in range(len(hosts)): - for j in range(i + 1, len(hosts)): - src, dst = hosts[i], hosts[j] - p = nx.shortest_path(G, src, dst) - paths.append((src, dst, p)) - return paths - - -def link_loads_for_job(G, job_hosts, tx_volume_bytes): - """ - Distribute tx_volume_bytes from each host equally to all its peers; - accumulate per-link loads and return a dict {(u,v):bytes, …}. - """ - paths = all_to_all_paths(G, job_hosts) - loads = {edge: 0.0 for edge in G.edges()} - # each host sends tx_volume_bytes to each of the (N-1) peers - for src in job_hosts: - if len(job_hosts) >= 2: - per_peer = tx_volume_bytes / (len(job_hosts) - 1) - else: - per_peer = 0 - # find paths where src is the sender - for s, d, p in paths: - if s != src: - continue - # add per_peer to every link on p - for u, v in zip(p, p[1:]): - # ensure ordering matches loads keys - edge = (u, v) if (u, v) in loads else (v, u) - loads[edge] += per_peer - return loads - - -def worst_link_util(loads, throughput): - """ - Given loads in **bytes** and capacity in **bits/sec**, convert: - util = (bytes * 8) / throughput - Return the maximum util over all links. - """ - max_util = 0.0 - for edge, byte_load in loads.items(): - util = (byte_load * 8) / throughput - if util > max_util: - max_util = util - return max_util - - -def node_id_to_host_name(node_id: int, k: int) -> str: - """ - Map a 0-based integer node_id into one of the fat-tree hosts "h_{pod}_{edge}_{h}". - There are (k^3/4) total hosts, assigned in ascending order across pod → edge → h. - """ - hosts_per_pod = (k // 2) * (k // 2) # e.g. for k=8, hosts_per_pod = 16 - pod = node_id // hosts_per_pod - offset = node_id % hosts_per_pod - edge = offset // (k // 2) - idx = offset % (k // 2) - return f"h_{pod}_{edge}_{idx}" - - -def build_dragonfly(D: int, A: int, P: int) -> nx.Graph: - """ - Build a “simple” k-ary Dragonfly with: - D = # of groups - A = # of routers per group - P = # of hosts (endpoints) per router - - Naming convention: - - Router nodes: "r_{g}_{r}" with g ∈ [0..D−1], r ∈ [0..A−1] - - Host nodes: "h_{g}_{r}_{p}" with p ∈ [0..P−1] - - Topology: - 1. All routers within a group form a full clique. - 2. Each router r in group g has exactly one “global link” to router r in each other group. - 3. Each router r in group g attaches to P hosts ("h_{g}_{r}_{0..P−1}"). - """ - G = nx.Graph() - - # 1) Create all router nodes - for g in range(D): - for r in range(A): - router = f"r_{g}_{r}" - G.add_node(router, type="router", group=g, index=r) - - # 2) Intra‐group full mesh of routers - for g in range(D): - routers_in_group = [f"r_{g}_{r}" for r in range(A)] - for u, v in combinations(routers_in_group, 2): - G.add_edge(u, v) - - # 3) Inter‐group “one‐to‐one” global links - # (router index r in group g → router index r in group g2) - for g1 in range(D): - for g2 in range(g1 + 1, D): - for r in range(A): - u = f"r_{g1}_{r}" - v = f"r_{g2}_{r}" - G.add_edge(u, v) - - # 4) Attach hosts to each router - for g in range(D): - for r in range(A): - router = f"r_{g}_{r}" - for p in range(P): - host = f"h_{g}_{r}_{p}" - G.add_node(host, type="host", group=g, router=r, index=p) - G.add_edge(router, host) - - return G - - -def dragonfly_node_id_to_host_name(fat_idx: int, D: int, A: int, P: int) -> str: - """ - Given a contiguous fat‐index ∈ [0..(D*A*P − 1)], return "h_{g}_{r}_{p}". - Hosts are laid out in order: - 0..(P−1) → group=0, router=0, p=0..P−1 - P..2P−1 → group=0, router=1, p=0..P−1 - … - (A*P)..(2A*P−1) → group=1, router=0, … - In general: - host_offset = fat_idx % P - router_offset = (fat_idx // P) % A - group = fat_idx // (A*P) - """ - total_hosts = D * A * P - assert 0 <= fat_idx < total_hosts, "fat_idx out of range" - - host_offset = fat_idx % P - router_group = (fat_idx // P) % A - pod = fat_idx // (A * P) - return f"h_{pod}_{router_group}_{host_offset}" - - -def build_torus3d(dims, wrap=True, link_bw=1e9, hosts_per_router=1, routing="DOR_XYZ", coords_csv=None): - """ - Build a 3D torus at router granularity, then attach host nodes to routers. - Node ids in the returned graph are host names ("h_x_y_z_i") and router names ("r_x_y_z"). - Edges have attribute 'capacity' (bytes/s) and 'latency' (per hop). - """ - X, Y, Z = map(int, dims) - G = nx.Graph() - - # Routers - def rname(x, y, z): - return f"r_{x}_{y}_{z}" - - for x in range(X): - for y in range(Y): - for z in range(Z): - G.add_node(rname(x, y, z), kind="router", coord=(x, y, z)) - - # Toroidal links between routers (±x, ±y, ±z) - def wrapi(i, n): - return (i + n) % n if wrap else (None if i < 0 or i >= n else i) - - for x in range(X): - for y in range(Y): - for z in range(Z): - u = rname(x, y, z) - # x+ - nxp = wrapi(x + 1, X) - v = rname(nxp, y, z) if nxp is not None else None - if v and not G.has_edge(u, v): - G.add_edge(u, v, capacity=link_bw) - # y+ - nyp = wrapi(y + 1, Y) - v = rname(x, nyp, z) if nyp is not None else None - if v and not G.has_edge(u, v): - G.add_edge(u, v, capacity=link_bw) - # z+ - nzp = wrapi(z + 1, Z) - v = rname(x, y, nzp) if nzp is not None else None - if v and not G.has_edge(u, v): - G.add_edge(u, v, capacity=link_bw) - - # Attach hosts to routers - host_to_router = {} - router_to_hosts = {} - - def hname(x, y, z, i): - return f"h_{x}_{y}_{z}_{i}" - - # If a nid→(x,y,z) CSV is supplied, place accordingly; else dense round-robin - # CSV format: nid,x,y,z[,i] - nid_placement = {} - if coords_csv: - p = Path(coords_csv) - with p.open("rt") as fh: - rd = csv.reader(fh) - for row in rd: - if not row: - continue - nid = int(row[0]) - x, y, z = map(int, row[1:4]) - i = int(row[4]) if len(row) > 4 else 0 - nid_placement[nid] = (x, y, z, i) - - # Build hosts - for x in range(X): - for y in range(Y): - for z in range(Z): - r = rname(x, y, z) - router_to_hosts[r] = [] - for i in range(hosts_per_router): - h = hname(x, y, z, i) - G.add_node(h, kind="host", coord=(x, y, z), local_index=i) - G.add_edge(h, r, capacity=link_bw) # host↔router edge; you can cap with NETWORK_MAX_BW instead - host_to_router[h] = r - router_to_hosts[r].append(h) - - meta = { - "dims": (X, Y, Z), - "wrap": wrap, - "routing": routing, - "host_to_router": host_to_router, - "router_to_hosts": router_to_hosts, - } - return G, meta - - -def _axis_steps(a, b, n, wrap=True): - """Return minimal step sequence along one axis from a to b with wrap-around.""" - if a == b: - return [] - fwd = (b - a) % n - back = (a - b) % n - if not wrap: - step = 1 if b > a else -1 - return [step] * abs(b - a) - if fwd <= back: - return [1] * fwd - else: - return [-1] * back - - -def torus_route_xyz(src_r, dst_r, dims, wrap=True): - """Router-level path (list of router names) using XYZ dimension-order routing.""" - X, Y, Z = dims - - def parse(r): - _, x, y, z = r.split("_") - return int(x), int(y), int(z) - - x1, y1, z1 = parse(src_r) - x2, y2, z2 = parse(dst_r) - - path = [src_r] - x, y, z = x1, y1, z1 - for step in _axis_steps(x, x2, X, wrap): - x = (x + step) % X - path.append(f"r_{x}_{y}_{z}") - for step in _axis_steps(y, y2, Y, wrap): - y = (y + step) % Y - path.append(f"r_{x}_{y}_{z}") - for step in _axis_steps(z, z2, Z, wrap): - z = (z + step) % Z - path.append(f"r_{x}_{y}_{z}") - return path - - -def torus_host_path(G, meta, h_src, h_dst): - r_src = meta["host_to_router"][h_src] - r_dst = meta["host_to_router"][h_dst] - routers = torus_route_xyz(r_src, r_dst, meta["dims"], meta["wrap"]) - # host->src_router + (router path) + dst_router->host - path = [h_src, r_src] + routers[1:] + [h_dst] - return path - - -def link_loads_for_job_torus(G, meta, host_list, traffic_bytes): - # all-to-all between hosts in host_list, route via torus_host_path, add traffic_bytes per pair - loads = {} - n = len(host_list) - for i in range(n): - for j in range(i + 1, n): - p = torus_host_path(G, meta, host_list[i], host_list[j]) - for u, v in zip(p, p[1:]): - e = tuple(sorted((u, v))) - loads[e] = loads.get(e, 0) + traffic_bytes - return loads diff --git a/raps/network/__init__.py b/raps/network/__init__.py new file mode 100644 index 0000000..eb49ee6 --- /dev/null +++ b/raps/network/__init__.py @@ -0,0 +1,126 @@ +from .base import ( + all_to_all_paths, + apply_job_slowdown, + compute_system_network_stats, + link_loads_for_job, + network_congestion, + network_slowdown, + network_utilization, + worst_link_util, +) + +from .fat_tree import build_fattree, node_id_to_host_name +from .torus3d import build_torus3d, link_loads_for_job_torus +from .dragonfly import build_dragonfly, dragonfly_node_id_to_host_name +from raps.utils import get_current_utilization + +__all__ = [ + "NetworkModel", + "apply_job_slowdown", + "compute_system_network_stats", + "network_congestion", + "network_utilization", + "network_slowdown", + "all_to_all_paths", + "link_loads_for_job", + "worst_link_util", + "build_fattree", + "build_torus3d", + "build_dragonfly", + "dragonfly_node_id_to_host_name", +] + + +class NetworkModel: + def __init__(self, *, available_nodes, config, **kwargs): + self.config = config + self.topology = config.get("TOPOLOGY") + self.max_link_bw = config.get("NETWORK_MAX_BW", 1e9) # default safeguard + self.real_to_fat_idx = kwargs.get("real_to_fat_idx", {}) + + if self.topology == "fat-tree": + self.fattree_k = config.get("FATTREE_K") + self.net_graph = build_fattree(self.fattree_k) + + elif self.topology == "torus3d": + dims = ( + int(config["TORUS_X"]), + int(config["TORUS_Y"]), + int(config["TORUS_Z"]) + ) + wrap = bool(config.get("TORUS_WRAP", True)) + hosts_per_router = int(config.get("HOSTS_PER_ROUTER", config.get("hosts_per_router", 1))) + + # Build the graph and metadata + self.net_graph, self.meta = build_torus3d(dims, wrap, hosts_per_router=hosts_per_router) + + # Deterministic numeric → host mapping + X, Y, Z = self.meta["dims"] + self.id_to_host = {} + nid = 0 + for x in range(X): + for y in range(Y): + for z in range(Z): + for i in range(hosts_per_router): + h = f"h_{x}_{y}_{z}_{i}" + self.id_to_host[nid] = h + nid += 1 + + elif self.topology == "dragonfly": + self.net_graph = build_dragonfly( + int(config["DRAGONFLY_D"]), + int(config["DRAGONFLY_A"]), + int(config.get("DRAGONFLY_P", 1)) + ) + + elif self.topology == "capacity": + # Capacity-only model: no explicit graph + self.net_graph = None + + else: + raise ValueError(f"Unsupported topology: {self.topology}") + + def simulate_network_utilization(self, *, job, debug=False): + net_util = net_cong = net_tx = net_rx = 0 + max_throughput = self.max_link_bw * job.trace_quanta + + if job.nodes_required <= 1: + # Single node job, skip network impact + return net_util, net_cong, net_tx, net_rx, max_throughput + + net_tx = get_current_utilization(job.ntx_trace, job) + net_rx = get_current_utilization(job.nrx_trace, job) + net_util = network_utilization(net_tx, net_rx, max_throughput) + + if self.topology == "fat-tree": + host_list = [node_id_to_host_name(n, self.fattree_k) for n in job.scheduled_nodes] + loads = link_loads_for_job(self.net_graph, host_list, net_tx) + net_cong = worst_link_util(loads, max_throughput) + if debug: + print(" fat-tree hosts:", host_list) + + elif self.topology == "dragonfly": + D, A, P = self.config["DRAGONFLY_D"], self.config["DRAGONFLY_A"], self.config["DRAGONFLY_P"] + host_list = [ + dragonfly_node_id_to_host_name(self.real_to_fat_idx[real_n], D, A, P) + for real_n in job.scheduled_nodes + ] + if debug: + print(" dragonfly hosts:", host_list) + loads = link_loads_for_job(self.net_graph, host_list, net_tx) + net_cong = worst_link_util(loads, max_throughput) + + elif self.topology == "torus3d": + host_list = [self.id_to_host[n] for n in job.scheduled_nodes] + loads = link_loads_for_job_torus(self.net_graph, self.meta, host_list, net_tx) + net_cong = worst_link_util(loads, max_throughput) + if debug: + print(" torus3d hosts:", host_list) + + elif self.topology == "capacity": + net_cong = network_congestion(net_tx, net_rx, max_throughput) + + else: + raise ValueError(f"Unsupported topology: {self.topology}") + + return net_util, net_cong, net_tx, net_rx, max_throughput diff --git a/raps/network/base.py b/raps/network/base.py new file mode 100644 index 0000000..f14c523 --- /dev/null +++ b/raps/network/base.py @@ -0,0 +1,136 @@ +import networkx as nx + + +def debug_print_trace(job, label: str = ""): + """Print either the length (if iterable) or the value of job.gpu_trace.""" + if hasattr(job.gpu_trace, "__len__"): + print(f"length of {len(job.gpu_trace)} {label}") + else: + print(f"gpu_trace value {job.gpu_trace} {label}") + + +def apply_job_slowdown(*, job, max_throughput, net_util, net_cong, net_tx, net_rx, debug: bool = False): + # Get the maximum allowed bandwidth from the configuration. + if net_cong > 1: + if debug: + print(f"congested net_cong: {net_cong}, max_throughput: {max_throughput}") + debug_print_trace(job, "before dilation") + + throughput = net_tx + net_rx + slowdown_factor = network_slowdown(throughput, max_throughput) + + if debug: + print("***", hasattr(job, "dilated"), throughput, max_throughput, slowdown_factor) + + # Only apply slowdown once per job to avoid compounding the effect. + if not job.dilated: + if debug: + print(f"Applying slowdown factor {slowdown_factor:.2f} to job {job.id} due to network congestion") + job.apply_dilation(slowdown_factor) + job.dilated = True + if debug: + debug_print_trace(job, "after dilation") + else: + slowdown_factor = 1 + job.slowdown_factor = slowdown_factor + + return slowdown_factor + + +def compute_system_network_stats(net_utils, net_tx_list, net_rx_list, slowdown_factors): + + # Compute network averages + n = len(net_utils) or 1 + avg_tx = sum(net_tx_list) / n + avg_rx = sum(net_rx_list) / n + avg_net = sum(net_utils) / n + # avg_slowdown_per_job = sum(slowdown_factors) / n + # self.avg_slowdown_history.append(avg_slowdown_per_job) + # max_slowdown_per_job = max(slowdown_factors) + # self.max_slowdown_history.append(max_slowdown_per_job) + + return avg_tx, avg_rx, avg_net + + +def network_congestion(tx, rx, max_throughput): + """ + Overload factor ≥0: average of send/recv NOT clamped. + >1.0 means you’re pushing above capacity. + """ + tx_util = float(tx) / max_throughput + rx_util = float(rx) / max_throughput + return (tx_util + rx_util) / 2.0 + + +def network_utilization(tx, rx, max_throughput): + """ + True utilization in [0,1]: average of send/recv clamped to 100%. + """ + tx_u = min(float(tx) / max_throughput, 1.0) + rx_u = min(float(rx) / max_throughput, 1.0) + return (tx_u + rx_u) / 2.0 + + +def network_slowdown(current_throughput, max_throughput): + """ + Calculate a slowdown factor based on current network bandwidth usage. + + If current_bw is within limits, the factor is 1.0 (no slowdown). + If current_bw exceeds max_bw, the factor is current_bw/max_bw. + """ + if current_throughput <= max_throughput: + return 1.0 + else: + return current_throughput / max_throughput + + +def all_to_all_paths(G, hosts): + """ + Given a list of host names, return shortest‐paths for every unordered pair. + """ + paths = [] + for i in range(len(hosts)): + for j in range(i + 1, len(hosts)): + src, dst = hosts[i], hosts[j] + p = nx.shortest_path(G, src, dst) + paths.append((src, dst, p)) + return paths + + +def link_loads_for_job(G, job_hosts, tx_volume_bytes): + """ + Distribute tx_volume_bytes from each host equally to all its peers; + accumulate per-link loads and return a dict {(u,v):bytes, …}. + """ + paths = all_to_all_paths(G, job_hosts) + loads = {edge: 0.0 for edge in G.edges()} + # each host sends tx_volume_bytes to each of the (N-1) peers + for src in job_hosts: + if len(job_hosts) >= 2: + per_peer = tx_volume_bytes / (len(job_hosts) - 1) + else: + per_peer = 0 + # find paths where src is the sender + for s, d, p in paths: + if s != src: + continue + # add per_peer to every link on p + for u, v in zip(p, p[1:]): + # ensure ordering matches loads keys + edge = (u, v) if (u, v) in loads else (v, u) + loads[edge] += per_peer + return loads + + +def worst_link_util(loads, throughput): + """ + Given loads in **bytes** and capacity in **bits/sec**, convert: + util = (bytes * 8) / throughput + Return the maximum util over all links. + """ + max_util = 0.0 + for edge, byte_load in loads.items(): + util = (byte_load * 8) / throughput + if util > max_util: + max_util = util + return max_util diff --git a/raps/network/dragonfly.py b/raps/network/dragonfly.py new file mode 100644 index 0000000..a13d1dc --- /dev/null +++ b/raps/network/dragonfly.py @@ -0,0 +1,75 @@ +import networkx as nx +from itertools import combinations + + +def build_dragonfly(D: int, A: int, P: int) -> nx.Graph: + """ + Build a “simple” k-ary Dragonfly with: + D = # of groups + A = # of routers per group + P = # of hosts (endpoints) per router + + Naming convention: + - Router nodes: "r_{g}_{r}" with g ∈ [0..D−1], r ∈ [0..A−1] + - Host nodes: "h_{g}_{r}_{p}" with p ∈ [0..P−1] + + Topology: + 1. All routers within a group form a full clique. + 2. Each router r in group g has exactly one “global link” to router r in each other group. + 3. Each router r in group g attaches to P hosts ("h_{g}_{r}_{0..P−1}"). + """ + G = nx.Graph() + + # 1) Create all router nodes + for g in range(D): + for r in range(A): + router = f"r_{g}_{r}" + G.add_node(router, type="router", group=g, index=r) + + # 2) Intra‐group full mesh of routers + for g in range(D): + routers_in_group = [f"r_{g}_{r}" for r in range(A)] + for u, v in combinations(routers_in_group, 2): + G.add_edge(u, v) + + # 3) Inter‐group “one‐to‐one” global links + # (router index r in group g → router index r in group g2) + for g1 in range(D): + for g2 in range(g1 + 1, D): + for r in range(A): + u = f"r_{g1}_{r}" + v = f"r_{g2}_{r}" + G.add_edge(u, v) + + # 4) Attach hosts to each router + for g in range(D): + for r in range(A): + router = f"r_{g}_{r}" + for p in range(P): + host = f"h_{g}_{r}_{p}" + G.add_node(host, type="host", group=g, router=r, index=p) + G.add_edge(router, host) + + return G + + +def dragonfly_node_id_to_host_name(fat_idx: int, D: int, A: int, P: int) -> str: + """ + Given a contiguous fat‐index ∈ [0..(D*A*P − 1)], return "h_{g}_{r}_{p}". + Hosts are laid out in order: + 0..(P−1) → group=0, router=0, p=0..P−1 + P..2P−1 → group=0, router=1, p=0..P−1 + … + (A*P)..(2A*P−1) → group=1, router=0, … + In general: + host_offset = fat_idx % P + router_offset = (fat_idx // P) % A + group = fat_idx // (A*P) + """ + total_hosts = D * A * P + assert 0 <= fat_idx < total_hosts, "fat_idx out of range" + + host_offset = fat_idx % P + router_group = (fat_idx // P) % A + pod = fat_idx // (A * P) + return f"h_{pod}_{router_group}_{host_offset}" diff --git a/raps/network/fat_tree.py b/raps/network/fat_tree.py new file mode 100644 index 0000000..2d27b39 --- /dev/null +++ b/raps/network/fat_tree.py @@ -0,0 +1,59 @@ +import networkx as nx + + +def node_id_to_host_name(node_id: int, k: int) -> str: + """ + Convert an integer node id to the host name string in the fat-tree. + Node IDs are assumed to be contiguous, mapping to h_{pod}_{edge}_{i}. + """ + # need to match the scheme from build_fattree + pod = node_id // (k * k // 4) + edge = (node_id % (k * k // 4)) // (k // 2) + host = node_id % (k // 2) + return f"h_{pod}_{edge}_{host}" + + +def build_fattree(k): + """ + Build a k-ary fat-tree: + - k pods + - each pod has k/2 edge switches, k/2 agg switches + - core layer has (k/2)^2 core switches + - each edge switch connects to k/2 hosts + Returns a NetworkX Graph where: + - hosts are named "h_{pod}_{edge}_{i}" + - edge switches "e_{pod}_{edge}" + - agg switches "a_{pod}_{agg}" + - core switches "c_{i}_{j}" + """ + G = nx.Graph() + # core + # num_core = (k//2)**2 # Unused! + for i in range(k // 2): + for j in range(k // 2): + core = f"c_{i}_{j}" + G.add_node(core, type="core") + # pods + for pod in range(k): + # agg switches + for agg in range(k // 2): + a = f"a_{pod}_{agg}" + G.add_node(a, type="agg") + # connect to all core switches in column agg + for i in range(k // 2): + core = f"c_{agg}_{i}" + G.add_edge(a, core) + # edge switches + hosts + for edge in range(k // 2): + e = f"e_{pod}_{edge}" + G.add_node(e, type="edge") + # connect edge→each agg in this pod + for agg in range(k // 2): + a = f"a_{pod}_{agg}" + G.add_edge(e, a) + # connect hosts + for h in range(k // 2): + host = f"h_{pod}_{edge}_{h}" + G.add_node(host, type="host") + G.add_edge(e, host) + return G diff --git a/raps/network/torus3d.py b/raps/network/torus3d.py new file mode 100644 index 0000000..50c988f --- /dev/null +++ b/raps/network/torus3d.py @@ -0,0 +1,152 @@ +import csv +import networkx as nx +from pathlib import Path + + +def build_torus3d(dims, wrap=True, link_bw=1e9, hosts_per_router=1, routing="DOR_XYZ", coords_csv=None): + """ + Build a 3D torus at router granularity, then attach host nodes to routers. + Node ids in the returned graph are host names ("h_x_y_z_i") and router names ("r_x_y_z"). + Edges have attribute 'capacity' (bytes/s) and 'latency' (per hop). + """ + X, Y, Z = map(int, dims) + G = nx.Graph() + + # Routers + def rname(x, y, z): + return f"r_{x}_{y}_{z}" + + for x in range(X): + for y in range(Y): + for z in range(Z): + G.add_node(rname(x, y, z), kind="router", coord=(x, y, z)) + + # Toroidal links between routers (±x, ±y, ±z) + def wrapi(i, n): + return (i + n) % n if wrap else (None if i < 0 or i >= n else i) + + for x in range(X): + for y in range(Y): + for z in range(Z): + u = rname(x, y, z) + # x+ + nxp = wrapi(x + 1, X) + v = rname(nxp, y, z) if nxp is not None else None + if v and not G.has_edge(u, v): + G.add_edge(u, v, capacity=link_bw) + # y+ + nyp = wrapi(y + 1, Y) + v = rname(x, nyp, z) if nyp is not None else None + if v and not G.has_edge(u, v): + G.add_edge(u, v, capacity=link_bw) + # z+ + nzp = wrapi(z + 1, Z) + v = rname(x, y, nzp) if nzp is not None else None + if v and not G.has_edge(u, v): + G.add_edge(u, v, capacity=link_bw) + + # Attach hosts to routers + host_to_router = {} + router_to_hosts = {} + + def hname(x, y, z, i): + return f"h_{x}_{y}_{z}_{i}" + + # If a nid→(x,y,z) CSV is supplied, place accordingly; else dense round-robin + # CSV format: nid,x,y,z[,i] + nid_placement = {} + if coords_csv: + p = Path(coords_csv) + with p.open("rt") as fh: + rd = csv.reader(fh) + for row in rd: + if not row: + continue + nid = int(row[0]) + x, y, z = map(int, row[1:4]) + i = int(row[4]) if len(row) > 4 else 0 + nid_placement[nid] = (x, y, z, i) + + # Build hosts + for x in range(X): + for y in range(Y): + for z in range(Z): + r = rname(x, y, z) + router_to_hosts[r] = [] + for i in range(hosts_per_router): + h = hname(x, y, z, i) + G.add_node(h, kind="host", coord=(x, y, z), local_index=i) + G.add_edge(h, r, capacity=link_bw) # host↔router edge; you can cap with NETWORK_MAX_BW instead + host_to_router[h] = r + router_to_hosts[r].append(h) + + meta = { + "dims": (X, Y, Z), + "wrap": wrap, + "routing": routing, + "host_to_router": host_to_router, + "router_to_hosts": router_to_hosts, + } + return G, meta + + +def _axis_steps(a, b, n, wrap=True): + """Return minimal step sequence along one axis from a to b with wrap-around.""" + if a == b: + return [] + fwd = (b - a) % n + back = (a - b) % n + if not wrap: + step = 1 if b > a else -1 + return [step] * abs(b - a) + if fwd <= back: + return [1] * fwd + else: + return [-1] * back + + +def torus_route_xyz(src_r, dst_r, dims, wrap=True): + """Router-level path (list of router names) using XYZ dimension-order routing.""" + X, Y, Z = dims + + def parse(r): + _, x, y, z = r.split("_") + return int(x), int(y), int(z) + + x1, y1, z1 = parse(src_r) + x2, y2, z2 = parse(dst_r) + + path = [src_r] + x, y, z = x1, y1, z1 + for step in _axis_steps(x, x2, X, wrap): + x = (x + step) % X + path.append(f"r_{x}_{y}_{z}") + for step in _axis_steps(y, y2, Y, wrap): + y = (y + step) % Y + path.append(f"r_{x}_{y}_{z}") + for step in _axis_steps(z, z2, Z, wrap): + z = (z + step) % Z + path.append(f"r_{x}_{y}_{z}") + return path + + +def torus_host_path(G, meta, h_src, h_dst): + r_src = meta["host_to_router"][h_src] + r_dst = meta["host_to_router"][h_dst] + routers = torus_route_xyz(r_src, r_dst, meta["dims"], meta["wrap"]) + # host->src_router + (router path) + dst_router->host + path = [h_src, r_src] + routers[1:] + [h_dst] + return path + + +def link_loads_for_job_torus(G, meta, host_list, traffic_bytes): + # all-to-all between hosts in host_list, route via torus_host_path, add traffic_bytes per pair + loads = {} + n = len(host_list) + for i in range(n): + for j in range(i + 1, n): + p = torus_host_path(G, meta, host_list[i], host_list[j]) + for u, v in zip(p, p[1:]): + e = tuple(sorted((u, v))) + loads[e] = loads.get(e, 0) + traffic_bytes + return loads diff --git a/raps/sim_config.py b/raps/sim_config.py index 26a328a..f003a3e 100644 --- a/raps/sim_config.py +++ b/raps/sim_config.py @@ -202,6 +202,9 @@ class SimConfig(BaseModel): maxqueue: int = 50 """ Specify the max queue length for continuous job generation """ + filter: str | None = None + """job filter \"traffic > 1e8\" """ + @model_validator(mode="before") def _validate_before(cls, data): # This is called with the raw input, before Pydantic parses it, so data is just a dict and diff --git a/raps/system_config.py b/raps/system_config.py index 642bb98..726c086 100644 --- a/raps/system_config.py +++ b/raps/system_config.py @@ -1,6 +1,6 @@ -import functools import glob import fnmatch +import functools from typing import Any, Literal from pathlib import Path from functools import cached_property @@ -141,7 +141,7 @@ class SystemCoolingConfig(BaseModel): class SystemNetworkConfig(BaseModel): - topology: Literal["fat-tree", "dragonfly", "torus3d"] + topology: Literal["capacity", "fat-tree", "dragonfly", "torus3d"] network_max_bw: float latency: float | None = None -- GitLab From f4212243c39c6bb75ae32d249c090a30f75e4cfa Mon Sep 17 00:00:00 2001 From: Jesse Hines Date: Thu, 4 Sep 2025 16:58:12 -0400 Subject: [PATCH 284/388] Fix workload no time --- raps/workload.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/raps/workload.py b/raps/workload.py index 338976d..9e26c97 100644 --- a/raps/workload.py +++ b/raps/workload.py @@ -67,9 +67,10 @@ class Workload: # This function calls the job generation function as specified by the workload keyword. # The respective funciton of this class is called. jobs = getattr(self, self.args.workload)(args=self.args) + timestep_end = int(math.ceil(max([job.end_time for job in jobs]))) return WorkloadData( jobs=jobs, - telemetry_start=0, telemetry_end=self.args.time, + telemetry_start=0, telemetry_end=timestep_end, start_date=self.args.start, ) -- GitLab From d3dce958624c32c8aba811acd7911d92bc377e5f Mon Sep 17 00:00:00 2001 From: Jesse Hines Date: Thu, 4 Sep 2025 17:16:20 -0400 Subject: [PATCH 285/388] Fix workload --- raps/workload.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/raps/workload.py b/raps/workload.py index 9e26c97..6fb3c3b 100644 --- a/raps/workload.py +++ b/raps/workload.py @@ -981,7 +981,7 @@ def run_workload(sim_config: SimConfig): jobs = td.load_from_files(sim_config.replay).jobs else: workload = Workload(args, config) - jobs = getattr(workload, sim_config.workload)(args=sim_config.get_legacy_args) + jobs = getattr(workload, sim_config.workload)(args=sim_config.get_legacy_args()) plot_job_hist(jobs, config=config, dist_split=sim_config.multimodal, -- GitLab From 0469271890ec4642e9f35eb6c9c3b4fa1affaf45 Mon Sep 17 00:00:00 2001 From: Jesse Hines Date: Fri, 5 Sep 2025 09:15:47 -0400 Subject: [PATCH 286/388] Fix bug in update_jobs Scale can be 0 --- raps/telemetry.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/raps/telemetry.py b/raps/telemetry.py index 1b5c256..340b9ae 100644 --- a/raps/telemetry.py +++ b/raps/telemetry.py @@ -222,7 +222,7 @@ class Telemetry: def update_jobs(self, jobs: list[Job]): """ Updates jobs with new scale or random start times """ - if self.kwargs.get("scale") is not None: + if self.kwargs.get("scale"): for job in jobs: job.nodes_required = random.randint(1, self.kwargs['scale']) job.scheduled_nodes = None # Setting to None triggers scheduler to assign nodes -- GitLab From 34b3e6f75c0d8c764a43ddf24f0ae03f9bf46a6f Mon Sep 17 00:00:00 2001 From: Wes Brewer Date: Thu, 4 Sep 2025 22:11:01 -0400 Subject: [PATCH 287/388] Fix issues with mit_supercloud dataloader - add exp files for: telemetry and synthetic Fix some issues with experiments/mit-replay-24hrs.yaml --- experiments/mit-replay-24hrs.yaml | 6 +++ experiments/mit-synthetic.yaml | 3 ++ experiments/mit.yaml | 6 --- raps/dataloaders/mit_supercloud/loader.py | 58 ++++++++++++++++++----- 4 files changed, 54 insertions(+), 19 deletions(-) create mode 100644 experiments/mit-replay-24hrs.yaml create mode 100644 experiments/mit-synthetic.yaml delete mode 100644 experiments/mit.yaml diff --git a/experiments/mit-replay-24hrs.yaml b/experiments/mit-replay-24hrs.yaml new file mode 100644 index 0000000..1357886 --- /dev/null +++ b/experiments/mit-replay-24hrs.yaml @@ -0,0 +1,6 @@ +# python main.py run-multi-part experiments/mit-replay-24hrs.yaml +partitions: ["mit_supercloud/part-cpu", "mit_supercloud/part-gpu"] +replay: + - /opt/data/mit_supercloud/202201 +start: 2021-05-21T00:00 +end: 2021-05-22T00:00 diff --git a/experiments/mit-synthetic.yaml b/experiments/mit-synthetic.yaml new file mode 100644 index 0000000..5f68cd1 --- /dev/null +++ b/experiments/mit-synthetic.yaml @@ -0,0 +1,3 @@ +# python main.py run-multi-part experiments/mit-synthetic.yaml +partitions: ["mit_supercloud/part-cpu", "mit_supercloud/part-gpu"] +workload: multitenant diff --git a/experiments/mit.yaml b/experiments/mit.yaml deleted file mode 100644 index 77815d4..0000000 --- a/experiments/mit.yaml +++ /dev/null @@ -1,6 +0,0 @@ -system: mit_supercloud -partitions: ["mit_supercloud/part-cpu", "mit_supercloud/part-gpu"] -replay: - - /opt/data/mit_supercloud -start: 2021-05-21T13:00 -end: 2021-05-21T14:00 diff --git a/raps/dataloaders/mit_supercloud/loader.py b/raps/dataloaders/mit_supercloud/loader.py index e3103ba..d0e32f4 100644 --- a/raps/dataloaders/mit_supercloud/loader.py +++ b/raps/dataloaders/mit_supercloud/loader.py @@ -116,9 +116,9 @@ import re from tqdm import tqdm from typing import Dict, Union, Optional from collections import Counter -from datetime import datetime, timezone + from raps.job import job_dict, Job -from raps.utils import summarize_ranges, WorkloadData +from raps.utils import summarize_ranges, next_arrival from .utils import proc_cpu_series, proc_gpu_series, to_epoch from .utils import DEFAULT_START, DEFAULT_END @@ -209,6 +209,8 @@ def load_data(local_dataset_path, **kwargs): jobs_list, sim_start_time, sim_end_time """ debug = kwargs.get("debug") + config = kwargs.get("config") + arrival = kwargs.get("arrival") NL_PATH = os.path.dirname(__file__) skip_counts = Counter() @@ -300,6 +302,17 @@ def load_data(local_dataset_path, **kwargs): cpu_only = (part == "part-cpu") mixed = (part == "part-gpu") + # handle single-partition configs (e.g., mit_supercloud.yaml) + if not cpu_only and not mixed: + gpus_per_node = config.get("GPUS_PER_NODE") + + if gpus_per_node == 0: + cpu_only = True + part = "part-cpu" + else: + mixed = True + part = "part-gpu" + # create nodelist mapping if cpu_only: with open(os.path.join(NL_PATH, "cpu_nodes.txt")) as f: @@ -516,7 +529,6 @@ def load_data(local_dataset_path, **kwargs): jobs_list = [] # Get CPUS_PER_NODE and GPUS_PER_NODE from config - config = kwargs.get('config', {}) cpus_per_node = config.get('CPUS_PER_NODE') cores_per_cpu = config.get('CORES_PER_CPU') # gpus_per_node = config.get('GPUS_PER_NODE') # Unused @@ -573,7 +585,21 @@ def load_data(local_dataset_path, **kwargs): cpu_peak = cpu_cores_req / cores_per_cpu / cpus_per_node # Is this per CPU? cpu_tr = [min(x/cores_per_cpu/cpus_per_node, cpu_peak) for x in cpu_tr] - submit_time = rec.get("time_submit", t0) - start_ts + if arrival == "poisson": + job_arrival_time = config.get("JOB_ARRIVAL_TIME") + submit_time = next_arrival(1 / job_arrival_time) + start_time = submit_time + end_time = None + scheduled_nodes = None + telemetry_start = 0 + telemetry_end = 86640 + else: # replay + start_time = t0 - start_ts + end_time = t1 - start_ts + submit_time = rec.get("time_submit") - start_ts + scheduled_nodes = rec.get("scheduled_nodes") + telemetry_start = int(sl.time_start.min()) + telemetry_end = int(sl.time_end.max()) current_job_dict = job_dict( nodes_required=nr, @@ -587,12 +613,12 @@ def load_data(local_dataset_path, **kwargs): nrx_trace=[], end_state=rec.get("state_end", "unknown"), id=jid, - scheduled_nodes=rec.get("scheduled_nodes"), + scheduled_nodes=scheduled_nodes, priority=rec.get("priority", 0), submit_time=submit_time, - time_limit=rec.get("time_limit", 0), - start_time=t0 - start_ts, - end_time=t1 - start_ts, + time_limit=rec.get("timelimit") * 60, + start_time=start_time, + end_time=end_time, expected_run_time=max(0, t1-t0), trace_time=len(cpu_tr)*quanta, trace_start_time=0, @@ -602,12 +628,18 @@ def load_data(local_dataset_path, **kwargs): job = Job(current_job_dict) jobs_list.append(job) + # Calculate min_overall_utime and max_overall_utime + # min_overall_utime = int(sl.time_submit.min()) + # max_overall_utime = int(sl.time_submit.max()) + + # args_namespace = SimpleNamespace( + # fastforward=min_overall_utime, + # system='mit_supercloud', + # time=max_overall_utime + # ) + print("\nSkipped jobs summary:") for reason, count in skip_counts.items(): print(f"- {reason}: {count}") - return WorkloadData( - jobs=jobs_list, - telemetry_start=0, telemetry_end=int(end_ts - start_ts), - start_date=datetime.fromtimestamp(start_ts, timezone.utc), - ) + return jobs_list, telemetry_start, telemetry_end # min_overall_utime, max_overall_utime, args_namespace -- GitLab From 81ab189f5216d41023e9153f76a092f310ff178b Mon Sep 17 00:00:00 2001 From: Jesse Hines Date: Fri, 5 Sep 2025 08:21:56 -0400 Subject: [PATCH 288/388] Fix conflicts --- raps/dataloaders/mit_supercloud/loader.py | 28 +++++++++-------------- raps/multi_part_engine.py | 5 +++- raps/run_sim.py | 6 ----- 3 files changed, 15 insertions(+), 24 deletions(-) diff --git a/raps/dataloaders/mit_supercloud/loader.py b/raps/dataloaders/mit_supercloud/loader.py index d0e32f4..2c8dbc1 100644 --- a/raps/dataloaders/mit_supercloud/loader.py +++ b/raps/dataloaders/mit_supercloud/loader.py @@ -116,9 +116,10 @@ import re from tqdm import tqdm from typing import Dict, Union, Optional from collections import Counter +from datetime import datetime, timezone from raps.job import job_dict, Job -from raps.utils import summarize_ranges, next_arrival +from raps.utils import summarize_ranges, next_arrival, WorkloadData from .utils import proc_cpu_series, proc_gpu_series, to_epoch from .utils import DEFAULT_START, DEFAULT_END @@ -585,21 +586,10 @@ def load_data(local_dataset_path, **kwargs): cpu_peak = cpu_cores_req / cores_per_cpu / cpus_per_node # Is this per CPU? cpu_tr = [min(x/cores_per_cpu/cpus_per_node, cpu_peak) for x in cpu_tr] - if arrival == "poisson": - job_arrival_time = config.get("JOB_ARRIVAL_TIME") - submit_time = next_arrival(1 / job_arrival_time) - start_time = submit_time - end_time = None - scheduled_nodes = None - telemetry_start = 0 - telemetry_end = 86640 - else: # replay - start_time = t0 - start_ts - end_time = t1 - start_ts - submit_time = rec.get("time_submit") - start_ts - scheduled_nodes = rec.get("scheduled_nodes") - telemetry_start = int(sl.time_start.min()) - telemetry_end = int(sl.time_end.max()) + start_time = t0 - start_ts + end_time = t1 - start_ts + submit_time = rec.get("time_submit") - start_ts + scheduled_nodes = rec.get("scheduled_nodes") current_job_dict = job_dict( nodes_required=nr, @@ -642,4 +632,8 @@ def load_data(local_dataset_path, **kwargs): for reason, count in skip_counts.items(): print(f"- {reason}: {count}") - return jobs_list, telemetry_start, telemetry_end # min_overall_utime, max_overall_utime, args_namespace + return WorkloadData( + jobs=jobs_list, + telemetry_start=0, telemetry_end=int(end_ts - start_ts), + start_date=datetime.fromtimestamp(start_ts, timezone.utc), + ) diff --git a/raps/multi_part_engine.py b/raps/multi_part_engine.py index f211b85..944ced9 100644 --- a/raps/multi_part_engine.py +++ b/raps/multi_part_engine.py @@ -21,7 +21,7 @@ class MultiPartEngine: workloads_by_partition: dict[str, WorkloadData] = {} engines: dict[str, Engine] = {} - timestep_start, timestep_end, time_delta = 0, 0, 0 + time_delta = 0 for partition in sim_config.system_configs: name = partition.system_name engine, workload_data, time_delta = Engine.from_sim_config( @@ -31,6 +31,9 @@ class MultiPartEngine: job.partition = name workloads_by_partition[name] = workload_data engines[name] = engine + timestep_start = min(w.telemetry_start for w in workloads_by_partition.values()) + timestep_end = min(w.telemetry_end for w in workloads_by_partition.values()) + total_initial_jobs = sum(len(j.jobs) for j in workloads_by_partition.values()) for engine in engines.values(): engine.total_initial_jobs = total_initial_jobs diff --git a/raps/run_sim.py b/raps/run_sim.py index ceb80a9..402acea 100644 --- a/raps/run_sim.py +++ b/raps/run_sim.py @@ -245,12 +245,6 @@ def run_multi_part_sim(sim_config: SimConfig): multi_engine, workload_results, timestep_start, timestep_end, time_delta = \ MultiPartEngine.from_sim_config(sim_config) - # TODO: The mit_supercloud dataloader seems to be outputting the wrong timesteps? mit_supercloud - # is the only multi-partition system with replay, so just manually overriding the timesteps here - # to fix it for now. The original multi-part-sim.py always started from timestep 0 as well. - timestep_end = timestep_end - timestep_start - timestep_start = 0 - if sim_config.output: for part, engine in multi_engine.engines.items(): engine.telemetry.save_snapshot( -- GitLab From 334fb61c65ab19ab4f341d53bbcb850cf029f16b Mon Sep 17 00:00:00 2001 From: Wes Brewer Date: Fri, 5 Sep 2025 10:52:47 -0400 Subject: [PATCH 289/388] Rename run-multi-part subcommand to run-parts. Throw warning when running with single partition. --- README.md | 31 ++++++++++++++----------------- main.py | 4 ++-- raps/run_sim.py | 18 +++++++++++++----- 3 files changed, 29 insertions(+), 24 deletions(-) diff --git a/README.md b/README.md index eeb5a5d..e49e08b 100644 --- a/README.md +++ b/README.md @@ -62,21 +62,20 @@ For MIT Supercloud python -m raps.dataloaders.mit_supercloud.cli download --start 2021-05-21T13:00 --end 2021-05-21T14:00 # Load data and run simulation - will save data as part-cpu.npz and part-gpu.npz files - raps run-multi-part -x 'mit_supercloud/*' -f $DPATH --system mit_supercloud \ - --start 2021-05-21T13:00 --end 2021-05-21T14:00 + raps run-parts -x mit_supercloud -f $DPATH --system mit_supercloud --start 2021-05-21T13:00 --end 2021-05-21T14:00 # Note: if no start, end dates provided will default to run 24 hours between # 2021-05-21T00:00 to 2021-05-22T00:00 set by defaults in raps/dataloaders/mit_supercloud/utils.py # Re-run simulation using npz files (much faster load) - raps run-multi-part -x mit_supercloud/* -f part-*.npz --system mit_supercloud + raps run-parts -x mit_supercloud -f part-*.npz --system mit_supercloud # Synthetic tests for verification studies: - raps run-multi-part -x 'mit_supercloud/*' -w multitenant + raps run-parts -x mit_supercloud -w multitenant For Lumi - # Synthetic test for lumi multi-part-sim: - raps run-multi-part -x lumi/* + # Synthetic test for Lumi: + raps run-parts -x lumi ## Perform Network Simulation @@ -93,7 +92,6 @@ given instead of the parquet files for more quickly running subsequent simulatio raps run -f jobs_2024-02-20_12-20-39.npz - ## Cooling models We provide several cooling models in the repo https://code.ornl.gov/exadigit/POWER9CSM @@ -111,23 +109,21 @@ use `--cooling` or `-c` argument. e.g., ## Support for multiple system partitions -Multi-partition systems are supported by running the `multi-part-sim.py` script, where a list of configurations can be specified using the `-x` flag as follows: +Multi-partition systems are supported by running `raps multi-parts ...` command, where a list of partitions can be specified using the `-x` flag as follows: - raps run-multi-part -x setonix/part-cpu setonix/part-gpu + raps run-parts -x setonix/part-cpu setonix/part-gpu or simply: - raps run-multi-part -x setonix/* # bash - - raps run-multi-part -x 'setonix/*' # zsh + raps run-parts -x setonix This will simulate synthetic workloads on two partitions as defined in `config/setonix-cpu` and `config/setonix-gpu`. To replay telemetry workloads from another system, e.g., Marconi100's PM100 dataset, first create a .npz snapshot of the telemetry data, e.g., - raps run-multi-part --system marconi100 -f /path/to/marconi100/job_table.parquet + raps run-parts --system marconi100 -f /path/to/marconi100/job_table.parquet -This will dump a .npz file with a randomized name, e.g. ac23db.npz. Let's rename this file to pm100.npz for clarity. Note: can control-C when the simulation starts. Now, this pm100.npz file can be used with `multi-part-sim.py` as follows: +This will dump a .npz file with a randomized name, e.g. ac23db.npz. Let's rename this file to pm100.npz for clarity. Note: can control-C when the simulation starts. Now, this pm100.npz file can be used as follows: - raps run-multi-part -x setonix/* -f pm100.npz --arrival poisson --scale 192 + raps run-parts -x setonix -f pm100.npz --arrival poisson --scale 192 ## Modifications to telemetry replay @@ -135,9 +131,10 @@ There are three ways to modify replaying of telemetry data: 1. `--arrival`. Changing the arrival time distribution - replay cases will default to `--arrival prescribed`, where the jobs will be submitted exactly as they were submitted on the physical machine. This can be changed to `--arrival poisson` to change when the jobs arrive, which is especially useful in cases where there may be gaps in time, e.g., when the system goes down for several days, or the system is is underutilized. python main.py -f $DPATH/slurm/joblive/$DATEDIR,$DPATH/jobprofile/$DATEDIR --arrival poisson -2. `--policy`. Changing the way the jobs are scheduled. The `--policy` flag will be set by default to `replay` in cases where a telemetry file is provided, in which case the jobs will be scheduled according to the start times provided. Changing the `--policy` to `fcfs` or `backfill` will use the internal scheduler. -python main.py -f $DPATH/slurm/joblive/$DATEDIR,$DPATH/jobprofile/$DATEDIR --policy fcfs --backfill firstfit -t 12h +2. `--policy`. Changing the way the jobs are scheduled. The `--policy` flag will be set by default to `replay` in cases where a telemetry file is provided, in which case the jobs will be scheduled according to the start times provided. Changing the `--policy` to `fcfs` or `backfill` will use the internal scheduler, e.g.: + + python main.py -f $DPATH/slurm/joblive/$DATEDIR,$DPATH/jobprofile/$DATEDIR --policy fcfs --backfill firstfit -t 12h 3. `--scale`. Changing the scale of each job in the telemetry data. The `--scale` flag will specify the maximum number of nodes for each job (generally set this to the max number of nodes of the smallest partition), and randomly select the number of nodes for each job from one to max nodes. This flag is useful when replaying telemetry from a larger system onto a smaller system. diff --git a/main.py b/main.py index 1e52395..7c38960 100644 --- a/main.py +++ b/main.py @@ -3,7 +3,7 @@ ExaDigiT Resource Allocator & Power Simulator (RAPS) """ import argparse from raps.helpers import check_python_version -from raps.run_sim import run_sim_add_parser, run_multi_part_sim_add_parser, show_add_parser +from raps.run_sim import run_sim_add_parser, run_parts_sim_add_parser, show_add_parser from raps.workload import run_workload_add_parser from raps.telemetry import run_telemetry_add_parser @@ -20,7 +20,7 @@ def main(cli_args: list[str] | None = None): subparsers = parser.add_subparsers(required=True) run_sim_add_parser(subparsers) - run_multi_part_sim_add_parser(subparsers) + run_parts_sim_add_parser(subparsers) show_add_parser(subparsers) run_workload_add_parser(subparsers) run_telemetry_add_parser(subparsers) diff --git a/raps/run_sim.py b/raps/run_sim.py index 402acea..5afd6f1 100644 --- a/raps/run_sim.py +++ b/raps/run_sim.py @@ -7,6 +7,7 @@ import json import pandas as pd import sys import yaml +import warnings from pathlib import Path from raps.ui import LayoutManager from raps.plotting import Plotter @@ -73,7 +74,7 @@ def run_sim(sim_config: SimConfig): if sim_config.verbose or sim_config.debug: print(f"SimConfig: {sim_config.model_dump_json(indent=4)}") if len(sim_config.system_configs) > 1: - print("Use run-multi-part to run multi-partition simulations") + print("Use run-parts to run multi-partition simulations") sys.exit(1) engine, workload_data, time_delta = Engine.from_sim_config(sim_config) @@ -221,8 +222,8 @@ def run_sim(sim_config: SimConfig): print("Output directory is: ", out) # If output is enabled, the user wants this information as last output -def run_multi_part_sim_add_parser(subparsers: SubParsers): - parser = subparsers.add_parser("run-multi-part", description=""" +def run_parts_sim_add_parser(subparsers: SubParsers): + parser = subparsers.add_parser("run-parts", description=""" Simulates multi-partition (heterogeneous) systems. Supports replaying telemetry or generating synthetic workloads across CPU-only, GPU, and mixed partitions. Initializes per-partition power, FLOPS, and scheduling models, then advances simulations in lockstep. @@ -237,11 +238,18 @@ def run_multi_part_sim_add_parser(subparsers: SubParsers): "cli_shortcuts": shortcuts, }) parser.set_defaults( - impl=lambda args: run_multi_part_sim(model_validate(args, read_yaml(args.config_file))) + impl=lambda args: run_parts_sim(model_validate(args, read_yaml(args.config_file))) ) -def run_multi_part_sim(sim_config: SimConfig): +def run_parts_sim(sim_config: SimConfig): + + if len(sim_config.system_configs) == 1: + warnings.warn( + "run_parts_sim is usually for multiple partitions. Did you mean to run with one?", + UserWarning + ) + multi_engine, workload_results, timestep_start, timestep_end, time_delta = \ MultiPartEngine.from_sim_config(sim_config) -- GitLab From c8d4d84f0510443efe3bf5585e2f30c88875a0a4 Mon Sep 17 00:00:00 2001 From: Wes Brewer Date: Fri, 5 Sep 2025 11:02:32 -0400 Subject: [PATCH 290/388] Update tests to use `run-parts` instead of `run-multi-part` --- tests/smoke.py | 2 +- tests/systems/test_multi_part_sim_basic_run.py | 2 +- tests/systems/test_multi_part_sim_network_run.py | 2 +- tests/systems/test_multi_part_sim_withdata_run.py | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/smoke.py b/tests/smoke.py index 7548de3..946f6db 100644 --- a/tests/smoke.py +++ b/tests/smoke.py @@ -54,7 +54,7 @@ def synthetic_workload_tests(): def hetero_tests(): """Run heterogeneous workload tests.""" print("Starting heterogeneous workload tests...") - run_command(f"python main.py run-multi-part -x setonix/part-cpu setonix/part-gpu -t {DEFAULT_TIME}") + run_command(f"python main.py run-parts -x setonix/part-cpu setonix/part-gpu -t {DEFAULT_TIME}") def main(): diff --git a/tests/systems/test_multi_part_sim_basic_run.py b/tests/systems/test_multi_part_sim_basic_run.py index 3ea2a9c..9351fd6 100644 --- a/tests/systems/test_multi_part_sim_basic_run.py +++ b/tests/systems/test_multi_part_sim_basic_run.py @@ -18,7 +18,7 @@ def test_multi_part_sim_basic_run(system, system_config): os.chdir(PROJECT_ROOT) result = subprocess.run([ - "python", "main.py", "run-multi-part", + "python", "main.py", "run-parts", "--time", "1h", "-x", f"{system}/*", ], capture_output=True, text=True, stdin=subprocess.DEVNULL) diff --git a/tests/systems/test_multi_part_sim_network_run.py b/tests/systems/test_multi_part_sim_network_run.py index aa90cca..c556014 100644 --- a/tests/systems/test_multi_part_sim_network_run.py +++ b/tests/systems/test_multi_part_sim_network_run.py @@ -19,7 +19,7 @@ def test_multi_part_sim_network_run(system, system_config, sim_output): os.chdir(PROJECT_ROOT) result = subprocess.run([ - "python", "main.py", "run-multi-part", + "python", "main.py", "run-parts", "--time", "1h", "-x", f"{system}/*", "--net", diff --git a/tests/systems/test_multi_part_sim_withdata_run.py b/tests/systems/test_multi_part_sim_withdata_run.py index 9694969..f38cf8e 100644 --- a/tests/systems/test_multi_part_sim_withdata_run.py +++ b/tests/systems/test_multi_part_sim_withdata_run.py @@ -20,7 +20,7 @@ def test_multi_part_sim_withdata_run(system, system_config, system_files): os.chdir(PROJECT_ROOT) result = subprocess.run([ - "python", "main.py", "run-multi-part", + "python", "main.py", "run-parts", "--time", "1h", "-x", f"{system}/*", "-f", ','.join(system_files), -- GitLab From 85438eb0767134d0b1a61e0f51f74ab90f1310f7 Mon Sep 17 00:00:00 2001 From: Wes Brewer Date: Fri, 5 Sep 2025 11:08:10 -0400 Subject: [PATCH 291/388] Update README.md for how to run tests for multi-partition systems --- README.md | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/README.md b/README.md index e49e08b..bccaec1 100644 --- a/README.md +++ b/README.md @@ -179,6 +179,12 @@ RAPS_DATA_DIR=/opt/data pytest -n auto -x By default, tests are parallelized with `pytest-xdist` (`-n auto`) to speed up execution. The `-x` flag stops execution after the first failure. Add `-v` to run in verbose mode. +### Run tests on multi-partition systems + +```bash +pytest -v -k "multi_part_sim" +``` + ### Run only network-related tests ```bash -- GitLab From 54e5390a6af45afe37f2b527f87b41b744f7e008 Mon Sep 17 00:00:00 2001 From: "Maiterth, Matthias" Date: Fri, 5 Sep 2025 17:14:05 +0000 Subject: [PATCH 292/388] Removed unused schedulers and fix scheduling poclicies of schedulers. --- raps/sim_config.py | 39 ++++++++++++++++++++++++++++++++++++--- 1 file changed, 36 insertions(+), 3 deletions(-) diff --git a/raps/sim_config.py b/raps/sim_config.py index a3091bf..5cdd09c 100644 --- a/raps/sim_config.py +++ b/raps/sim_config.py @@ -8,6 +8,7 @@ from raps.utils import ( ) from raps.system_config import SystemConfig, get_partition_configs from pydantic import BaseModel, model_validator +import importlib Distribution = Literal['uniform', 'weibull', 'normal'] @@ -158,12 +159,16 @@ class SimConfig(BaseModel): # Synthetic workloads scheduler: Literal[ - "default", "scheduleflow", "fastsim", "anl", "flux", "experimental", "multitenant", + "default", + "experimental", + "fastsim", + "multitenant", + "scheduleflow", ] = "default" """ Scheduler name """ - policy: PolicyType | None = None + policy: str | None = None """ Schedule policy """ - backfill: BackfillType | None = None + backfill: str | None = None """ Backfill policy """ # Arrival @@ -258,6 +263,34 @@ class SimConfig(BaseModel): if self.live and not self.replay and self.time is None: raise ValueError("--time must be set, specifing how long we want to predict") + if self.policy or self.backfill: + try: + module = importlib.import_module(f"raps.schedulers.{self.scheduler}") + except ImportError as e: + raise ValueError(f"Scheduler '{self.scheduler}' could not be imported") from e + + if self.policy: + extended_policytypes = getattr(module, "ExtendedPolicyType", None) + + valid_policies = set(m.value for m in PolicyType) + if extended_policytypes is not None: + valid_policies |= {m.value for m in extended_policytypes} + + if self.policy not in valid_policies: + raise ValueError(f"policy {self.policy} not implemented by {self.scheduler}. " + f"Valid selections: {sorted(valid_policies)}") + + if self.backfill: + extended_backfilltypes = getattr(module, "ExtendedBackfillType", None) + + valid_backfilltypes = set(m.value for m in BackfillType) + if extended_backfilltypes is not None: + valid_backfilltypes |= {m.value for m in extended_backfilltypes} + + if self.backfill not in valid_backfilltypes: + raise ValueError(f"policy {self.backfill} not implemented by {self.scheduler}. " + f"Valid selections: {sorted(valid_backfilltypes)}") + return self @property -- GitLab From c80d121d67e3ccf8ebd3c2a4301a06146d50f683 Mon Sep 17 00:00:00 2001 From: Wes Brewer Date: Tue, 9 Sep 2025 21:13:08 -0400 Subject: [PATCH 293/388] Quite a few changes to get the final RL results for the IEEE HPEC paper --- config/mit_supercloud.yaml | 52 +--- config/mit_supercloud/part-cpu.yaml | 2 +- experiments/mit.yaml | 4 +- experiments/mitrl.yaml | 4 +- raps/dataloaders/frontier.py | 32 ++- raps/dataloaders/mit_supercloud/loader.py | 2 +- raps/engine.py | 4 +- raps/envs/raps_env.py | 122 +++++++-- raps/resmgr/default.py | 3 +- raps/workload.py | 290 +++++++++++----------- train_rl.py | 4 + 11 files changed, 276 insertions(+), 243 deletions(-) mode change 100644 => 120000 config/mit_supercloud.yaml diff --git a/config/mit_supercloud.yaml b/config/mit_supercloud.yaml deleted file mode 100644 index b780b10..0000000 --- a/config/mit_supercloud.yaml +++ /dev/null @@ -1,51 +0,0 @@ -system: - num_cdus: 12 - racks_per_cdu: 1 - nodes_per_rack: 40 - chassis_per_rack: 8 - nodes_per_blade: 1 - switches_per_chassis: 4 - nics_per_node: 4 - rectifiers_per_chassis: 4 - nodes_per_rectifier: 4 - missing_racks: [] - down_nodes: [] - cpus_per_node: 2 - cores_per_cpu: 24 - gpus_per_node: 0 - cpu_peak_flops: 2995200000000.0 - gpu_peak_flops: 0 - cpu_fp_ratio: 0.667 - gpu_fp_ratio: 0.667 -power: - power_gpu_idle: 88 - power_gpu_max: 560 - power_cpu_idle: 1 - power_cpu_max: 6 - power_mem: 74.26 - power_nvme: 30 - power_nic: 20 - power_cdu: 8473.47 - power_switch: 250 - power_update_freq: 15 - rectifier_peak_threshold: 13670 - sivoc_loss_constant: 13 - sivoc_efficiency: 0.98 - rectifier_loss_constant: 17 - rectifier_efficiency: 0.96 - power_cost: 0.094 -scheduler: - multitenant: true - job_arrival_time: 1 - mtbf: 11 - trace_quanta: 10 - min_wall_time: 3600 - max_wall_time: 43200 - ui_update_freq: 900 - max_nodes_per_job: 3000 - job_end_probs: - COMPLETED: 0.63 - FAILED: 0.13 - CANCELLED: 0.12 - TIMEOUT: 0.11 - NODE_FAIL: 0.01 diff --git a/config/mit_supercloud.yaml b/config/mit_supercloud.yaml new file mode 120000 index 0000000..2167597 --- /dev/null +++ b/config/mit_supercloud.yaml @@ -0,0 +1 @@ +mit_supercloud/part-gpu.yaml \ No newline at end of file diff --git a/config/mit_supercloud/part-cpu.yaml b/config/mit_supercloud/part-cpu.yaml index 111882d..b780b10 100644 --- a/config/mit_supercloud/part-cpu.yaml +++ b/config/mit_supercloud/part-cpu.yaml @@ -36,7 +36,7 @@ power: power_cost: 0.094 scheduler: multitenant: true - job_arrival_time: 900 + job_arrival_time: 1 mtbf: 11 trace_quanta: 10 min_wall_time: 3600 diff --git a/experiments/mit.yaml b/experiments/mit.yaml index 83892f9..3dcd692 100644 --- a/experiments/mit.yaml +++ b/experiments/mit.yaml @@ -1,5 +1,5 @@ partitions: ["mit_supercloud/part-cpu", "mit_supercloud/part-gpu"] replay: - /opt/data/mit_supercloud -start: 2021-05-21T13:00 -end: 2021-05-21T14:00 +start: 2021-05-21T00:00 +end: 2021-05-22T00:00 diff --git a/experiments/mitrl.yaml b/experiments/mitrl.yaml index c0adbfe..3bbd988 100644 --- a/experiments/mitrl.yaml +++ b/experiments/mitrl.yaml @@ -1,7 +1,7 @@ system: "mit_supercloud" replay: - /opt/data/mit_supercloud -start: 2021-05-21T21:00 -end: 2021-05-21T22:00 +start: 2021-05-21T00:00 +end: 2021-05-22T00:00 episode_length: 500 arrival: poisson diff --git a/raps/dataloaders/frontier.py b/raps/dataloaders/frontier.py index 8491617..e749fa8 100644 --- a/raps/dataloaders/frontier.py +++ b/raps/dataloaders/frontier.py @@ -118,7 +118,7 @@ def load_data_from_df(jobs_df: pd.DataFrame, jobprofile_df: pd.DataFrame, **kwar - end_time # Maybe Null - expected_run_time (end_time - start_time) # Maybe Null - current_run_time (How long did the job run already, when loading) # Maybe zero - - trace_time (lenght of each trace in seconds) # Maybe Null + - trace_time (length of each trace in seconds) # Maybe Null - trace_start_time (time offset in seconds after which the trace starts) # Maybe Null - trace_end_time (time offset in seconds after which the trace ends) # Maybe Null - trace_quanta (job's associated trace quanta, to correctly replay with different trace quanta) # Maybe Null @@ -269,8 +269,8 @@ def load_data_from_df(jobs_df: pd.DataFrame, jobprofile_df: pd.DataFrame, **kwar if arrival == 'poisson': # Modify the arrival times of the jobs according to Poisson distribution scheduled_nodes = None submit_time = next_arrival_byconfkwargs(config, kwargs) - start_time = None # ? - end_time = None # ? + end_time = submit_time + end_time - start_time + start_time = submit_time priority = aging_boost(nodes_required) else: # Prescribed replay @@ -281,24 +281,20 @@ def load_data_from_df(jobs_df: pd.DataFrame, jobprofile_df: pd.DataFrame, **kwar indices = xname_to_index(xname, config) scheduled_nodes.append(indices) + if end_time < telemetry_start: + print("Job ends before first recorded telemetry entry:", job_id, "start:", + start_time, "end:", end_time, " Telemetry: ", len(gpu_trace), "entries.") + continue # skip + + if start_time > telemetry_end: + print("Job starts after last recorded telemetry entry:", job_id, "start:", + start_time, "end:", end_time, " Telemetry: ", len(gpu_trace), "entries.") + continue # skip + # Throw out jobs that are not valid! if gpu_trace.size == 0: print("ignoring job b/c zero trace:", jidx, submit_time, start_time, nodes_required) - continue # SKIP! - if end_time < telemetry_start: - # raise ValueError("Job ends before frist recorded telemetry entry:", - # job_id, "start:", start_time,"end:",end_time, - # " Telemetry: ", len(gpu_trace), "entries.") - print("Job ends before frist recorded telemetry entry:", job_id, "start:", - start_time, "end:", end_time, " Telemetry: ", len(gpu_trace), "entries.") - continue # SKIP! - if start_time > telemetry_end: - # raise ValueError("Job starts after last recorded telemetry entry:", - # job_id, "start:", start_time,"end:",end_time, - # " Telemetry: ", len(gpu_trace), "entries.") - print("Job starts after last recorded telemetry entry:", job_id, "start:", - start_time, "end:", end_time, " Telemetry: ", len(gpu_trace), "entries.") - continue # SKIP! + continue # skip if gpu_trace.size > 0 and (jid == job_id or jid == '*'): # and time_submit >= 0: job_info = job_dict( diff --git a/raps/dataloaders/mit_supercloud/loader.py b/raps/dataloaders/mit_supercloud/loader.py index ab08f69..d0e32f4 100644 --- a/raps/dataloaders/mit_supercloud/loader.py +++ b/raps/dataloaders/mit_supercloud/loader.py @@ -616,7 +616,7 @@ def load_data(local_dataset_path, **kwargs): scheduled_nodes=scheduled_nodes, priority=rec.get("priority", 0), submit_time=submit_time, - time_limit=rec.get("timelimit", 0), + time_limit=rec.get("timelimit") * 60, start_time=start_time, end_time=end_time, expected_run_time=max(0, t1-t0), diff --git a/raps/engine.py b/raps/engine.py index b4aa713..89fc400 100644 --- a/raps/engine.py +++ b/raps/engine.py @@ -402,7 +402,7 @@ class Engine: job.running_time = self.current_timestep - job.start_time if job.current_state != JobState.RUNNING: - raise ValueError(f"Job is in running list, but state is not RUNNING: job.state == {job.currentstate}") + raise ValueError(f"Job is in running list, but state is not RUNNING: job.state == {job.current_state}") else: # if job.state == JobState.RUNNING: # Error checks if job.running_time > job.time_limit: @@ -609,7 +609,7 @@ class Engine: # listener_thread = threading.Thread(target=keyboard_listener, args=(sim_state,), daemon=True) # listener_thread.start() - while self.current_timestep < self.timestep_end: # Runs every seconds! + while self.current_timestep < self.timestep_end: # Runs every second if sim_state.is_paused(): time.sleep(0.1) diff --git a/raps/envs/raps_env.py b/raps/envs/raps_env.py index c5cd2f2..bf3a161 100644 --- a/raps/envs/raps_env.py +++ b/raps/envs/raps_env.py @@ -1,6 +1,7 @@ +import copy import gym -import numpy as np from gym import spaces +import numpy as np from raps.engine import Engine from raps.power import PowerManager, compute_node_power @@ -30,7 +31,7 @@ def print_stats(stats, step=0): "average power": "engine/Average Power", "system power efficiency": "engine/System Power Efficiency", "total energy consumed": "engine/Total Energy Consumed", - "carbon emissions": "engine/Carbon Emissions", + "carbon emissions": "engine/Carbon Footprint", "jobs completed": "jobs/Jobs Completed", "throughput": "jobs/Throughput", "jobs still running": "jobs/Jobs Still Running", @@ -73,6 +74,7 @@ class RAPSEnv(gym.Env): # --- Build initial jobs & time bounds --- self.jobs, self.timestep_start, self.timestep_end = self._build_jobs() + self.original_jobs = self.jobs # keep pristine version self.engine = Engine( power_manager=self.power_manager, @@ -167,11 +169,60 @@ class RAPSEnv(gym.Env): else: raise ValueError("RAPSEnv requires either --workload or --replay to build jobs.") +# def reset(self, seed=None, options=None): +# super().reset(seed=seed) +# +# self.jobs = copy.deepcopy(self.original_jobs) # working copy +# +# # Reset engine +# self.engine.current_timestep = 0 +# #self.engine.reset() # or clear state manually +# power_manager = PowerManager(compute_node_power, **self.config) +# flops_manager = FLOPSManager(**self.args_dict) +# telemetry = Telemetry(**self.args_dict) +# jobs, timestep_start, timestep_end = self._build_jobs() +# +# self.engine = Engine( +# power_manager=power_manager, +# flops_manager=flops_manager, +# jobs=jobs, +# **self.args_dict +# ) +# +# self.engine.timestep_start = timestep_start +# self.engine.timestep_end = timestep_end +# #self.engine.current_timestep = timestep_start +# +# # Restart generator +# self.generator = self.layout_manager.run_stepwise( +# self.jobs, +# timestep_start=self.timestep_start, +# timestep_end=self.timestep_end, +# time_delta=self.args_dict.get("time_delta"), +# ) +# +# return self._get_state(), {} + def reset(self, **kwargs): - self.engine.jobs = self.jobs - self.engine.timestep_start = self.timestep_start - self.engine.timestep_end = self.timestep_end - self.engine.current_timestep = self.timestep_start + completed = [j.id for j in self.jobs if j.current_state.name == "COMPLETED"] + print(f"[RESET] Jobs already completed before deepcopy: {len(completed)}") + + super().reset(seed=42) + # self.engine.jobs = self.jobs + self.jobs = copy.deepcopy(self.original_jobs) # working copy + + # self.engine.timestep_start = self.timestep_start + # self.engine.timestep_end = self.timestep_end + # self.engine.reset(self.jobs, self.timestep_start, self.timestep_end) + + # self.engine.current_timestep = self.timestep_start + + # self.engine.jobs = self.jobs # repoint engine to fresh jobs + # self.engine.completed_jobs = [] + # self.engine.queue.clear() + # self.engine.running.clear() + # self.engine.power_manager.history.clear() + # self.engine.jobs_completed = 0 self.generator = self.layout_manager.run_stepwise( self.jobs, @@ -184,26 +235,51 @@ class RAPSEnv(gym.Env): def _compute_reward(self, tick_data): """ - Reward function: minimize carbon footprint per job completed. - Encourages the agent to complete jobs while keeping emissions low. + Reward function for RL scheduling on Frontier-like systems. + Balances throughput and carbon footprint, using incremental values. """ - reward = 0.0 - # Jobs completed this tick - jobs_completed = len(getattr(tick_data, "completed", [])) + # How many jobs completed *this tick* + jobs_done = len(getattr(tick_data, "completed", [])) - # Carbon emitted so far (metric tons CO2) - carbon_so_far = getattr(self.engine, "carbon emissions", 0.0) + # Incremental carbon emitted this tick + carbon_step = getattr(self.engine, "carbon emissions", 0.0) - if jobs_completed > 0: - # Reward is higher when more jobs finish with less carbon - reward = jobs_completed / (carbon_so_far + 1e-6) - else: - # Small penalty if no jobs finished (encourages progress) - reward = -0.01 + # Tradeoff weights (tunable hyperparameters) + alpha = 10.0 # reward for finishing a job + beta = 0.1 # penalty per metric ton CO2 + + # Reward = (jobs * alpha) - (carbon * beta) + reward = (alpha * jobs_done) - (beta * carbon_step) + + # Small penalty if idle and no jobs complete + if jobs_done == 0 and carbon_step == 0: + reward -= 0.01 return reward +# def _compute_reward(self, tick_data): +# """ +# Reward function: minimize carbon footprint per job completed. +# Encourages the agent to complete jobs while keeping emissions low. +# """ +# reward = 0.0 +# +# # Jobs completed this tick +# jobs_completed = len(getattr(tick_data, "completed", [])) +# +# # Carbon emitted so far (metric tons CO2) +# carbon_so_far = getattr(self.engine, "carbon emissions", 0.0) +# +# if jobs_completed > 0: +# # Reward is higher when more jobs finish with less carbon +# reward = jobs_completed / (carbon_so_far + 1e-6) +# else: +# # Small penalty if no jobs finished (encourages progress) +# reward = -0.01 +# +# return reward + def _compute_reward2(self, tick_data, alpha=10.0, beta=1.0, gamma=2.0): completed = getattr(tick_data, "completed", None) jobs_completed = len(completed) if completed else 0 @@ -252,12 +328,18 @@ class RAPSEnv(gym.Env): done = self.engine.current_timestep >= self.engine.timestep_end info = {} + print(f"t={self.engine.current_timestep}, " + f"queue={len(self.engine.queue)}, " + f"running={len(self.engine.running)}, " + f"completed={self.engine.jobs_completed}", + f"action={action}") + return obs, reward, done, info def _get_state(self): """Construct simple state representation from engine's job queue.""" # Example: take waiting jobs (haven’t started yet) - job_queue = [j for j in self.engine.jobs if getattr(j, "start_time", None) is None] + job_queue = [j for j in self.jobs if getattr(j, "start_time", None) is None] max_jobs, job_features = self.observation_space.shape state = np.zeros((max_jobs, job_features), dtype=np.float32) diff --git a/raps/resmgr/default.py b/raps/resmgr/default.py index 1429a5f..339f0ed 100644 --- a/raps/resmgr/default.py +++ b/raps/resmgr/default.py @@ -66,7 +66,8 @@ class ExclusiveNodeResourceManager: if n not in self.available_nodes: self.available_nodes.append(n) else: - raise KeyError(f"node was free but already in available nodes: {n.id}") + # Already free — log instead of raising + print(f"[WARN] Tried to free node {n}, but it was already available") self.available_nodes = sorted(self.available_nodes) def update_system_utilization(self, current_time, running_jobs): diff --git a/raps/workload.py b/raps/workload.py index 151e2c3..f02649c 100644 --- a/raps/workload.py +++ b/raps/workload.py @@ -642,6 +642,151 @@ class Workload: return jobs + def multitenant(self, **kwargs): + """ + Generate deterministic jobs to validate multitenant scheduling & power. + + Parameters + ---------- + mode : str + One of: + - 'ONE_JOB_PER_NODE_ALL_CORES' + - 'TWO_JOBS_PER_NODE_SPLIT' + - 'STAGGERED_JOBS_PER_NODE' + wall_time : int + Duration (seconds) of each job (default: 3600) + trace_quanta : int + Sampling interval for traces; defaults to config['TRACE_QUANTA'] + + Returns + ------- + list[dict] + List of job_dict entries. + """ + mode = kwargs.get('mode', 'TWO_JOBS_PER_NODE_SPLIT') + wall_time = kwargs.get('wall_time', 3600) + + jobs = [] + + for partition in self.partitions: + cfg = self.config_map[partition] + trace_quanta = kwargs.get('trace_quanta', cfg['TRACE_QUANTA']) + + cores_per_cpu = cfg.get('CORES_PER_CPU', 1) + cpus_per_node = cfg.get('CPUS_PER_NODE', 1) + cores_per_node = cores_per_cpu * cpus_per_node + gpus_per_node = cfg.get('GPUS_PER_NODE', 0) + + n_nodes = cfg['AVAILABLE_NODES'] + + def make_trace(cpu_util, gpu_util): + return self.compute_traces(cpu_util, gpu_util, wall_time, trace_quanta) + + job_id_ctr = 0 + + if mode == 'ONE_JOB_PER_NODE_ALL_CORES': + # Each node runs one job that consumes all cores/GPUs + for nid in range(n_nodes): + cpu_trace, gpu_trace = make_trace(cores_per_node, gpus_per_node) + jobs.append(Job(job_dict( + nodes_required=1, + cpu_cores_required=cores_per_node, + gpu_units_required=gpus_per_node, + name=f"MT_full_node_{partition}_{nid}", + account=random.choice(ACCT_NAMES), + cpu_trace=cpu_trace, + gpu_trace=gpu_trace, + ntx_trace=[], nrx_trace=[], + end_state='COMPLETED', + id=job_id_ctr, + priority=random.randint(0, MAX_PRIORITY), + partition=partition, + submit_time=0, + time_limit=wall_time, + start_time=0, + end_time=wall_time, + expected_run_time=wall_time, + trace_time=wall_time, + trace_start_time=0, + trace_end_time=wall_time, + trace_quanta=cfg['TRACE_QUANTA'] + ))) + job_id_ctr += 1 + + elif mode == 'TWO_JOBS_PER_NODE_SPLIT': + # Two jobs per node: split CPU/GPU roughly in half + for nid in range(n_nodes): + cpu_a = cores_per_node // 2 + cpu_b = cores_per_node - cpu_a + gpu_a = gpus_per_node // 2 + gpu_b = gpus_per_node - gpu_a + + for idx, (c_req, g_req, tag) in enumerate([(cpu_a, gpu_a, 'A'), + (cpu_b, gpu_b, 'B')]): + cpu_trace, gpu_trace = make_trace(c_req, g_req) + jobs.append(Job(job_dict( + nodes_required=1, # still one node; multitenant RM packs cores + cpu_cores_required=c_req, + gpu_units_required=g_req, + name=f"MT_split_node_{partition}_{nid}_{tag}", + account=random.choice(ACCT_NAMES), + cpu_trace=cpu_trace, + gpu_trace=gpu_trace, + ntx_trace=[], nrx_trace=[], + end_state='COMPLETED', + id=job_id_ctr, + priority=random.randint(0, MAX_PRIORITY), + partition=partition, + submit_time=0, + time_limit=wall_time, + start_time=0, + end_time=wall_time, + expected_run_time=wall_time, + trace_time=wall_time, + trace_start_time=0, + trace_end_time=wall_time, + trace_quanta=cfg['TRACE_QUANTA'] + ))) + job_id_ctr += 1 + + elif mode == 'STAGGERED_JOBS_PER_NODE': + # Three jobs per node, staggered starts: 0, wall_time/3, 2*wall_time/3 + offsets = [0, wall_time // 3, 2 * wall_time // 3] + cpu_each = cores_per_node // 3 or 1 + gpu_each = max(1, gpus_per_node // 3) if gpus_per_node else 0 + + for nid in range(n_nodes): + for k, offset in enumerate(offsets): + cpu_trace, gpu_trace = make_trace(cpu_each, gpu_each) + jobs.append(Job(job_dict( + nodes_required=1, + cpu_cores_required=cpu_each, + gpu_units_required=gpu_each, + name=f"MT_stagger_node_{partition}_{nid}_{k}", + account=random.choice(ACCT_NAMES), + cpu_trace=cpu_trace, + gpu_trace=gpu_trace, + ntx_trace=[], nrx_trace=[], + end_state='COMPLETED', + id=job_id_ctr, + priority=random.randint(0, MAX_PRIORITY), + partition=partition, + submit_time=offset, + time_limit=wall_time, + start_time=offset, + end_time=offset + wall_time, + expected_run_time=wall_time, + trace_time=wall_time, + trace_start_time=0, + trace_end_time=wall_time, + trace_quanta=cfg['TRACE_QUANTA'] + ))) + job_id_ctr += 1 + else: + raise ValueError(f"Unknown multitenant mode: {mode}") + + return jobs + def plot_job_hist(jobs, config=None, dist_split=None, gantt_nodes=False): # put args.multimodal in dist_split! @@ -818,151 +963,6 @@ def run_workload(): np.savez_compressed(filename, jobs=jobs, timestep_start=timestep_start, timestep_end=timestep_end, args=args) print(filename + ".npz") # To std-out to show which npz was created. - def multitenant(self, **kwargs): - """ - Generate deterministic jobs to validate multitenant scheduling & power. - - Parameters - ---------- - mode : str - One of: - - 'ONE_JOB_PER_NODE_ALL_CORES' - - 'TWO_JOBS_PER_NODE_SPLIT' - - 'STAGGERED_JOBS_PER_NODE' - wall_time : int - Duration (seconds) of each job (default: 3600) - trace_quanta : int - Sampling interval for traces; defaults to config['TRACE_QUANTA'] - - Returns - ------- - list[dict] - List of job_dict entries. - """ - mode = kwargs.get('mode', 'TWO_JOBS_PER_NODE_SPLIT') - wall_time = kwargs.get('wall_time', 3600) - - jobs = [] - - for partition in self.partitions: - cfg = self.config_map[partition] - trace_quanta = kwargs.get('trace_quanta', cfg['TRACE_QUANTA']) - - cores_per_cpu = cfg.get('CORES_PER_CPU', 1) - cpus_per_node = cfg.get('CPUS_PER_NODE', 1) - cores_per_node = cores_per_cpu * cpus_per_node - gpus_per_node = cfg.get('GPUS_PER_NODE', 0) - - n_nodes = cfg['AVAILABLE_NODES'] - - def make_trace(cpu_util, gpu_util): - return self.compute_traces(cpu_util, gpu_util, wall_time, trace_quanta) - - job_id_ctr = 0 - - if mode == 'ONE_JOB_PER_NODE_ALL_CORES': - # Each node runs one job that consumes all cores/GPUs - for nid in range(n_nodes): - cpu_trace, gpu_trace = make_trace(cores_per_node, gpus_per_node) - jobs.append(job_dict( - nodes_required=1, - cpu_cores_required=cores_per_node, - gpu_units_required=gpus_per_node, - name=f"MT_full_node_{partition}_{nid}", - account=random.choice(ACCT_NAMES), - cpu_trace=cpu_trace, - gpu_trace=gpu_trace, - ntx_trace=[], nrx_trace=[], - end_state='COMPLETED', - id=job_id_ctr, - priority=random.randint(0, MAX_PRIORITY), - partition=partition, - submit_time=0, - time_limit=wall_time, - start_time=0, - end_time=wall_time, - expected_run_time=wall_time, - trace_time=wall_time, - trace_start_time=0, - trace_end_time=wall_time, - trace_quanta=config['TRACE_QUANTA'] - )) - job_id_ctr += 1 - - elif mode == 'TWO_JOBS_PER_NODE_SPLIT': - # Two jobs per node: split CPU/GPU roughly in half - for nid in range(n_nodes): - cpu_a = cores_per_node // 2 - cpu_b = cores_per_node - cpu_a - gpu_a = gpus_per_node // 2 - gpu_b = gpus_per_node - gpu_a - - for idx, (c_req, g_req, tag) in enumerate([(cpu_a, gpu_a, 'A'), - (cpu_b, gpu_b, 'B')]): - cpu_trace, gpu_trace = make_trace(c_req, g_req) - jobs.append(job_dict( - nodes_required=1, # still one node; multitenant RM packs cores - cpu_cores_required=c_req, - gpu_units_required=g_req, - name=f"MT_split_node_{partition}_{nid}_{tag}", - account=random.choice(ACCT_NAMES), - cpu_trace=cpu_trace, - gpu_trace=gpu_trace, - ntx_trace=[], nrx_trace=[], - end_state='COMPLETED', - id=job_id_ctr, - priority=random.randint(0, MAX_PRIORITY), - partition=partition, - submit_time=0, - time_limit=wall_time, - start_time=0, - end_time=wall_time, - expected_run_time=wall_time, - trace_time=wall_time, - trace_start_time=0, - trace_end_time=wall_time, - trace_quanta=config['TRACE_QUANTA'] - )) - job_id_ctr += 1 - - elif mode == 'STAGGERED_JOBS_PER_NODE': - # Three jobs per node, staggered starts: 0, wall_time/3, 2*wall_time/3 - offsets = [0, wall_time // 3, 2 * wall_time // 3] - cpu_each = cores_per_node // 3 or 1 - gpu_each = max(1, gpus_per_node // 3) if gpus_per_node else 0 - - for nid in range(n_nodes): - for k, offset in enumerate(offsets): - cpu_trace, gpu_trace = make_trace(cpu_each, gpu_each) - jobs.append(job_dict( - nodes_required=1, - cpu_cores_required=cpu_each, - gpu_units_required=gpu_each, - name=f"MT_stagger_node_{partition}_{nid}_{k}", - account=random.choice(ACCT_NAMES), - cpu_trace=cpu_trace, - gpu_trace=gpu_trace, - ntx_trace=[], nrx_trace=[], - end_state='COMPLETED', - id=job_id_ctr, - priority=random.randint(0, MAX_PRIORITY), - partition=partition, - submit_time=offset, - time_limit=wall_time, - start_time=offset, - end_time=offset + wall_time, - expected_run_time=wall_time, - trace_time=wall_time, - trace_start_time=0, - trace_end_time=wall_time, - trace_quanta=config['TRACE_QUANTA'] - )) - job_id_ctr += 1 - else: - raise ValueError(f"Unknown multitenant mode: {mode}") - - return jobs - def continuous_job_generation(*, engine, timestep, jobs): # print("if len(engine.queue) <= engine.continuous_workload.args.maxqueue:") diff --git a/train_rl.py b/train_rl.py index e98f7a3..73d3ed7 100644 --- a/train_rl.py +++ b/train_rl.py @@ -1,3 +1,7 @@ +""" +Example usage: + python train_rl.py --system mit_supercloud -f /opt/data/mit_supercloud/202201 +""" from stable_baselines3 import PPO from raps.envs.raps_env import RAPSEnv from raps.system_config import get_system_config -- GitLab From 45c37286fdfe76e2311245d9aef2e0013f61f388 Mon Sep 17 00:00:00 2001 From: "Hines, Jesse" Date: Wed, 10 Sep 2025 13:09:28 +0000 Subject: [PATCH 294/388] Output and other changes --- raps/dataloaders/lassen.py | 3 +- raps/dataloaders/mit_supercloud/loader.py | 3 +- raps/engine.py | 8 +- raps/multi_part_engine.py | 4 +- raps/run_sim.py | 33 ++++--- raps/sim_config.py | 99 ++++++++++++++----- raps/system_config.py | 76 ++++++++++---- raps/telemetry.py | 6 +- raps/utils.py | 57 +++++++++-- raps/workload.py | 11 ++- tests/smoke.py | 2 +- tests/systems/test_engine.py | 4 +- tests/systems/test_main_basic_run.py | 2 +- .../systems/test_main_network_withdata_run.py | 2 +- .../test_main_time_delta_sub_second_run.py | 8 +- tests/systems/test_main_withdata_run.py | 2 +- .../systems/test_multi_part_sim_basic_run.py | 3 +- .../test_multi_part_sim_withdata_run.py | 6 +- tests/systems/test_telemetry_withdata_run.py | 2 +- tests/test_main.py | 25 ++--- 20 files changed, 237 insertions(+), 119 deletions(-) diff --git a/raps/dataloaders/lassen.py b/raps/dataloaders/lassen.py index fd0e364..bc57a7c 100644 --- a/raps/dataloaders/lassen.py +++ b/raps/dataloaders/lassen.py @@ -245,7 +245,8 @@ def load_data_from_df(allocation_df, node_df, step_df, **kwargs): return WorkloadData( jobs=job_list, telemetry_start=telemetry_start_time, telemetry_end=telemetry_end_time, - start_date=telemetry_start_timestamp, + # TODO: Confirm whether lassen timestamps are UTC or PDT + start_date=telemetry_start_timestamp.tz_localize("UTC"), ) diff --git a/raps/dataloaders/mit_supercloud/loader.py b/raps/dataloaders/mit_supercloud/loader.py index 2c8dbc1..ab68eb7 100644 --- a/raps/dataloaders/mit_supercloud/loader.py +++ b/raps/dataloaders/mit_supercloud/loader.py @@ -119,7 +119,7 @@ from collections import Counter from datetime import datetime, timezone from raps.job import job_dict, Job -from raps.utils import summarize_ranges, next_arrival, WorkloadData +from raps.utils import summarize_ranges, WorkloadData from .utils import proc_cpu_series, proc_gpu_series, to_epoch from .utils import DEFAULT_START, DEFAULT_END @@ -211,7 +211,6 @@ def load_data(local_dataset_path, **kwargs): """ debug = kwargs.get("debug") config = kwargs.get("config") - arrival = kwargs.get("arrival") NL_PATH = os.path.dirname(__file__) skip_counts = Counter() diff --git a/raps/engine.py b/raps/engine.py index 23a2605..c569bd0 100644 --- a/raps/engine.py +++ b/raps/engine.py @@ -161,7 +161,6 @@ class Engine: self.flops_manager = flops_manager self.debug = sim_config.debug self.continuous_workload = continuous_workload - self.output = sim_config.output self.replay = sim_config.replay self.downscale = sim_config.downscale # Factor to downscale the 1s timesteps (power of 10) self.simulate_network = sim_config.simulate_network @@ -215,10 +214,7 @@ class Engine: @staticmethod def from_sim_config(sim_config: SimConfig, partition: str | None = None): if partition: - system_config_by_name = {s.system_name: s for s in sim_config.system_configs} - system_config = system_config_by_name.get(partition) - if not system_config: - raise ValueError(f"Partition {partition} isn't in SimConfig") + system_config = sim_config.get_system_config_by_name(partition) elif len(sim_config.system_configs) > 1: raise ValueError( "Engine can only run single-partition simulations. Use MultiPartEngine for " + @@ -232,8 +228,6 @@ class Engine: sim_config_args = sim_config.get_legacy_args() sim_config_dict = sim_config.get_legacy_args_dict() sim_config_dict['config'] = system_config_dict - if partition: - sim_config_dict["system"] = sim_config.system_name if sim_config.seed: random.seed(sim_config.seed) diff --git a/raps/multi_part_engine.py b/raps/multi_part_engine.py index 944ced9..57e3e27 100644 --- a/raps/multi_part_engine.py +++ b/raps/multi_part_engine.py @@ -1,6 +1,6 @@ from collections.abc import Iterable from raps.engine import Engine, TickData -from raps.sim_config import SimConfig +from raps.sim_config import MultiPartSimConfig from raps.utils import WorkloadData @@ -11,7 +11,7 @@ class MultiPartEngine: self.jobs = jobs @staticmethod - def from_sim_config(sim_config: SimConfig): + def from_sim_config(sim_config: MultiPartSimConfig): if sim_config.replay: root_systems = set(s.system_name.split("/")[0] for s in sim_config.system_configs) # TODO should consider how to pass separate replay values for separate systems diff --git a/raps/run_sim.py b/raps/run_sim.py index 5afd6f1..ce89529 100644 --- a/raps/run_sim.py +++ b/raps/run_sim.py @@ -22,7 +22,7 @@ from raps.stats import ( print_formatted_report ) -from raps.sim_config import SimConfig +from raps.sim_config import SingleSimConfig, MultiPartSimConfig def read_yaml(config_file: str): @@ -62,7 +62,7 @@ def run_sim_add_parser(subparsers: SubParsers): YAML sim config file, can be used to configure an experiment instead of using CLI flags. Pass "-" to read from stdin. """) - model_validate = pydantic_add_args(parser, SimConfig, model_config={ + model_validate = pydantic_add_args(parser, SingleSimConfig, model_config={ "cli_shortcuts": shortcuts, }) parser.set_defaults( @@ -70,23 +70,26 @@ def run_sim_add_parser(subparsers: SubParsers): ) -def run_sim(sim_config: SimConfig): +def run_sim(sim_config: SingleSimConfig): if sim_config.verbose or sim_config.debug: - print(f"SimConfig: {sim_config.model_dump_json(indent=4)}") + print(f"SingleSimConfig: {sim_config.model_dump_json(indent=4)}") if len(sim_config.system_configs) > 1: print("Use run-parts to run multi-partition simulations") sys.exit(1) engine, workload_data, time_delta = Engine.from_sim_config(sim_config) - out = sim_config.output + out = sim_config.get_output() if out: out.mkdir(parents=True) engine.telemetry.save_snapshot( - dest=str(out), + dest=str(out / 'snapshot.npz'), result=workload_data, args=sim_config, ) + config_yaml = yaml_dump(sim_config.model_dump(mode="json", exclude_defaults=True)) + (out / 'sim_config.yaml').write_text(config_yaml) + jobs = workload_data.jobs timestep_start, timestep_end = workload_data.telemetry_start, workload_data.telemetry_end total_timesteps = timestep_end - timestep_start @@ -234,7 +237,7 @@ def run_parts_sim_add_parser(subparsers: SubParsers): YAML sim config file, can be used to configure an experiment instead of using CLI flags. Pass "-" to read from stdin. """) - model_validate = pydantic_add_args(parser, SimConfig, model_config={ + model_validate = pydantic_add_args(parser, MultiPartSimConfig, model_config={ "cli_shortcuts": shortcuts, }) parser.set_defaults( @@ -242,8 +245,7 @@ def run_parts_sim_add_parser(subparsers: SubParsers): ) -def run_parts_sim(sim_config: SimConfig): - +def run_parts_sim(sim_config: MultiPartSimConfig): if len(sim_config.system_configs) == 1: warnings.warn( "run_parts_sim is usually for multiple partitions. Did you mean to run with one?", @@ -253,13 +255,18 @@ def run_parts_sim(sim_config: SimConfig): multi_engine, workload_results, timestep_start, timestep_end, time_delta = \ MultiPartEngine.from_sim_config(sim_config) - if sim_config.output: + out = sim_config.get_output() + if out: + out.mkdir(parents=True) for part, engine in multi_engine.engines.items(): engine.telemetry.save_snapshot( - dest=str(sim_config.output / part.split('/')[-1]), + dest=str(out / part.split('/')[-1]), result=workload_results[part], args=sim_config, ) + config_yaml = yaml_dump(sim_config.model_dump(mode="json", exclude_defaults=True)) + (out / 'sim_config.yaml').write_text(config_yaml) + jobs = {p: w.jobs for p, w in workload_results.items()} ui_update_freq = sim_config.system_configs[0].scheduler.ui_update_freq @@ -317,7 +324,7 @@ def show_add_parser(subparsers: SubParsers): parser.add_argument("--show-defaults", default=False, help=""" If true, include defaults in the output YAML """) - model_validate = pydantic_add_args(parser, SimConfig, model_config={ + model_validate = pydantic_add_args(parser, SingleSimConfig, model_config={ "cli_shortcuts": shortcuts, }) @@ -328,6 +335,6 @@ def show_add_parser(subparsers: SubParsers): parser.set_defaults(impl=impl) -def show(sim_config: SimConfig, show_defaults=False): +def show(sim_config: SingleSimConfig, show_defaults=False): data = sim_config.model_dump(mode="json", exclude_defaults=not show_defaults) print(yaml_dump(data), end="") diff --git a/raps/sim_config.py b/raps/sim_config.py index 5cdd09c..a73cd3e 100644 --- a/raps/sim_config.py +++ b/raps/sim_config.py @@ -1,24 +1,22 @@ import argparse +import abc +from pathlib import Path from functools import cached_property from datetime import timedelta from typing import Literal +import importlib from raps.schedulers.default import PolicyType, BackfillType from raps.utils import ( - parse_time_unit, convert_to_time_unit, infer_time_unit, ExpandedPath, parse_td, + parse_time_unit, convert_to_time_unit, infer_time_unit, ExpandedPath, parse_td, create_casename, + RAPSBaseModel, ) -from raps.system_config import SystemConfig, get_partition_configs -from pydantic import BaseModel, model_validator -import importlib +from raps.system_config import SystemConfig, get_partition_configs, get_system_config +from pydantic import model_validator Distribution = Literal['uniform', 'weibull', 'normal'] -class SimConfig(BaseModel): - system: str | None = None - """ System config to use """ - partitions: list[str] = [] - """ List of multiple system configurations for a multi-partition run. Can contain wildcards """ - +class SimConfig(RAPSBaseModel, abc.ABC): cooling: bool = False """ Include the FMU cooling model """ simulate_network: bool = False @@ -62,8 +60,25 @@ class SimConfig(BaseModel): seed: int | None = None """ Set RNG seed for deterministic simulation """ - output: ExpandedPath | None = None - """ Output power, cooling, and loss models for later analysis. Argument specifies name. """ + + output: ExpandedPath | Literal['none'] | None = None + """ + Where to output power, cooling, and loss models for later analysis. + If omitted it will output to raps-output- by default. + Set to "none" to disable file output entirely. + """ + + _random_output: Path | None = None + + def get_output(self) -> Path | None: + if self.output is None: # by default, output to a random directory + if not self._random_output: + self._random_output = Path(create_casename("raps-output-")).resolve() + return self._random_output + elif self.output == "none": # allow explicitly disabling output with "none" + return None + else: + return self.output # return user defined output path debug: bool = False """ Enable debug mode and disable rich layout """ @@ -242,12 +257,6 @@ class SimConfig(BaseModel): @model_validator(mode="after") def _validate_after(self): - # This is called after Pydantic has parsed everything into the model - if self.system and self.partitions: - raise ValueError("system and partitions are mutually exclusive") - elif not self.system and not self.partitions: - self.system = "frontier" - if not self.replay and not self.workload: self.workload = "random" @@ -294,24 +303,28 @@ class SimConfig(BaseModel): return self @property + @abc.abstractmethod def system_name(self) -> str: """ Name of the system. - Note, this is different than system, as system can be a file or None if partition is set. + Note, this is different than system, as system can be a file, or there can be multiple systems """ - return self._multi_partition_system_config.system_name + pass @property + @abc.abstractmethod def system_configs(self) -> list[SystemConfig]: """ Return the SystemConfigs for the selected systems. Will be a single element array unless multiple `partitions` are selected. """ - return self._multi_partition_system_config.partitions + pass - @cached_property - def _multi_partition_system_config(self): - return get_partition_configs(self.partitions if self.partitions else [self.system]) + def get_system_config_by_name(self, name: str) -> SystemConfig: + for s in self.system_configs: + if s.system_name == name: + return s + raise ValueError(f"Partition {name} isn't in SimConfig") def get_legacy_args(self): """ @@ -326,6 +339,7 @@ class SimConfig(BaseModel): contains the SimConfig object itself. """ args_dict = self.model_dump(mode="json") + args_dict['system'] = self.system_name # validate has been renamed to power_scope args_dict['validate'] = args_dict["power_scope"] == "node" args_dict['downscale'] = self.downscale @@ -340,3 +354,40 @@ class SimConfig(BaseModel): args_dict['sim_config'] = self return args_dict + + +class SingleSimConfig(SimConfig, abc.ABC): + system: SystemConfig | str = "frontier" + """ + Name of the system to simulate, e.g "frontier". Can also be a path to a yaml file containing + the SystemConfig. You can also make modificiations to the SystemConfig on the CLI using + `--system.base`, e.g. `--system.base frontier --system.cooling.fmu-path path/to/my.fmu` + """ + + @property + def system_name(self) -> str: + return self.system_configs[0].system_name + + @cached_property + def system_configs(self) -> list[SystemConfig]: + return [get_system_config(self.system)] + + +class MultiPartSimConfig(SimConfig): + partitions: list[SystemConfig | str] + """ + List of multiple systems/partitions to run. Can be names of preconfigured systems, or paths + to custom SystemConfig yaml files. + """ + + @property + def system_name(self) -> str: + return self._multi_partition_system_config.system_name + + @property + def system_configs(self) -> list[SystemConfig]: + return self._multi_partition_system_config.partitions + + @cached_property + def _multi_partition_system_config(self): + return get_partition_configs(self.partitions) diff --git a/raps/system_config.py b/raps/system_config.py index 726c086..bd405be 100644 --- a/raps/system_config.py +++ b/raps/system_config.py @@ -5,13 +5,17 @@ from typing import Any, Literal from pathlib import Path from functools import cached_property import yaml -from pydantic import BaseModel, computed_field, model_validator, field_validator +from pydantic import ( + model_validator, field_validator, model_serializer, SerializationInfo, + SerializerFunctionWrapHandler, +) +from raps.utils import RAPSBaseModel, deep_merge, deep_subtract_dicts from raps.raps_config import raps_config # Define Pydantic models for the config to handle parsing and validation -class SystemSystemConfig(BaseModel): +class SystemSystemConfig(RAPSBaseModel): num_cdus: int racks_per_cdu: int nodes_per_rack: int @@ -41,27 +45,22 @@ class SystemSystemConfig(BaseModel): self.down_nodes = sorted(set(self.down_nodes)) return self - @computed_field @cached_property def num_racks(self) -> int: return self.num_cdus * self.racks_per_cdu - len(self.missing_racks) - @computed_field @cached_property def sc_shape(self) -> list[int]: return [self.num_cdus, self.racks_per_cdu, self.nodes_per_rack] - @computed_field @cached_property def total_nodes(self) -> int: return self.num_cdus * self.racks_per_cdu * self.nodes_per_rack - @computed_field @cached_property def blades_per_chassis(self) -> int: return int(self.nodes_per_rack / self.chassis_per_rack / self.nodes_per_blade) - @computed_field @cached_property def power_df_header(self) -> list[str]: power_df_header = ["CDU"] @@ -73,13 +72,12 @@ class SystemSystemConfig(BaseModel): power_df_header.append("Loss") return power_df_header - @computed_field @cached_property def available_nodes(self) -> int: return self.total_nodes - len(self.down_nodes) -class SystemPowerConfig(BaseModel): +class SystemPowerConfig(RAPSBaseModel): power_gpu_idle: float power_gpu_max: float power_cpu_idle: float @@ -100,7 +98,7 @@ class SystemPowerConfig(BaseModel): power_cost: float -class SystemUqConfig(BaseModel): +class SystemUqConfig(RAPSBaseModel): power_gpu_uncertainty: float power_cpu_uncertainty: float power_mem_uncertainty: float @@ -115,7 +113,7 @@ class SystemUqConfig(BaseModel): JobEndStates = Literal["COMPLETED", "FAILED", "CANCELLED", "TIMEOUT", "NODE_FAIL"] -class SystemSchedulerConfig(BaseModel): +class SystemSchedulerConfig(RAPSBaseModel): job_arrival_time: int mtbf: int trace_quanta: int @@ -127,7 +125,7 @@ class SystemSchedulerConfig(BaseModel): multitenant: bool = False -class SystemCoolingConfig(BaseModel): +class SystemCoolingConfig(RAPSBaseModel): cooling_efficiency: float wet_bulb_temp: float zip_code: str | None = None @@ -140,7 +138,7 @@ class SystemCoolingConfig(BaseModel): temperature_keys: list[str] -class SystemNetworkConfig(BaseModel): +class SystemNetworkConfig(RAPSBaseModel): topology: Literal["capacity", "fat-tree", "dragonfly", "torus3d"] network_max_bw: float latency: float | None = None @@ -163,10 +161,16 @@ class SystemNetworkConfig(BaseModel): node_coords_csv: str | None = None -class SystemConfig(BaseModel): +class SystemConfig(RAPSBaseModel): system_name: str """ Name of the system, defaults to the yaml file name """ + base: str | None = None + """ + Optional, name or path to another SystemConfig to "inherit" from. Lets you make small modifications + to an existing system without having to copy the whole config. + """ + system: SystemSystemConfig power: SystemPowerConfig scheduler: SystemSchedulerConfig @@ -174,6 +178,22 @@ class SystemConfig(BaseModel): cooling: SystemCoolingConfig | None = None network: SystemNetworkConfig | None = None + @model_validator(mode="before") + def _load_base(cls, data): + if isinstance(data, dict) and data.get("base"): + base = get_system_config(data['base']) + data = deep_merge(base.model_dump(mode='json'), data) + return data + + @model_serializer(mode='wrap') + def model_serializer(self, handler: SerializerFunctionWrapHandler, info: SerializationInfo): + # don't include the base system data in the output + if self.base and (info.exclude_defaults or info.exclude_unset): + base = get_system_config(self.base) + return deep_subtract_dicts(handler(self), handler(base)) + else: + return handler(self) + def get_legacy(self) -> dict[str, Any]: """ Return the system config as a flattened, uppercased dict. This is for backwards @@ -181,6 +201,8 @@ class SystemConfig(BaseModel): gradually. The dict also as a "system_config" key that contains the SystemConfig object itself. """ + dump = self.model_dump(mode="json", exclude_none=True) + renames = { # fields that need to be renamed to something other than just .upper() "system_name": "system_name", "w_htwps_key": "W_HTWPs_KEY", @@ -188,7 +210,6 @@ class SystemConfig(BaseModel): "w_cts_key": "W_CTs_KEY", "multitenant": "multitenant", } - dump = self.model_dump(mode="json", exclude_none=True) config_dict: dict[str, Any] = {} for k, v in dump.items(): # flatten @@ -196,13 +217,20 @@ class SystemConfig(BaseModel): config_dict.update(v) else: config_dict[k] = v + config_dict["num_racks"] = self.system.num_racks + config_dict["sc_shape"] = self.system.sc_shape + config_dict["total_nodes"] = self.system.total_nodes + config_dict["blades_per_chassis"] = self.system.blades_per_chassis + config_dict["power_df_header"] = self.system.power_df_header + config_dict["available_nodes"] = self.system.available_nodes + # rename keys config_dict = {renames.get(k, k.upper()): v for k, v in config_dict.items()} config_dict['system_config'] = self return config_dict -class MultiPartitionSystemConfig(BaseModel): +class MultiPartitionSystemConfig(RAPSBaseModel): system_name: str partitions: list[SystemConfig] @@ -227,13 +255,15 @@ def list_systems() -> list[str]: ]) -@functools.cache -def get_system_config(system: str) -> SystemConfig: +def get_system_config(system: str | SystemConfig) -> SystemConfig: """ Returns the system config as a Pydantic object. system can either be a path to a custom .yaml file, or the name of one of the pre-configured systems defined in RAPS_SYSTEM_CONFIG_DIR. """ + if isinstance(system, SystemConfig): # Just pass system through if its already parsed + return system + if system in list_systems(): config_path = raps_config.system_config_dir / f"{system}.yaml" system_name = system @@ -247,10 +277,13 @@ def get_system_config(system: str) -> SystemConfig: "system_name": system_name, # You can override system_name in the yaml as well **yaml.safe_load(config_path.read_text()), } + base = str(config.get('base', '')) + if base.endswith(".yaml"): + config['base'] = str(config_path.parent / base) # path relative to yaml return SystemConfig.model_validate(config) -def get_partition_configs(partitions: list[str]) -> MultiPartitionSystemConfig: +def get_partition_configs(partitions: list[str | SystemConfig]) -> MultiPartitionSystemConfig: """ Resolves multiple partition config files. Can pass globs, or directories to include all yaml files under the directory. @@ -261,7 +294,10 @@ def get_partition_configs(partitions: list[str]) -> MultiPartitionSystemConfig: parsed_configs: list[SystemConfig] = [] for pat in partitions: - if pat in multi_partition_systems: + if isinstance(pat, SystemConfig): + parsed_configs.append(pat) + combined_system_name.append(pat.system_name) + elif pat in multi_partition_systems: matched_systems = fnmatch.filter(systems, f"{pat}/*") combined_system_name.append(pat) elif fnmatch.filter(systems, pat): diff --git a/raps/telemetry.py b/raps/telemetry.py index 340b9ae..63ee158 100644 --- a/raps/telemetry.py +++ b/raps/telemetry.py @@ -15,7 +15,7 @@ from types import ModuleType import importlib import numpy as np import pandas as pd -from pydantic import BaseModel, model_validator +from pydantic import model_validator # from rich.progress import track from raps.sim_config import SimConfig @@ -28,12 +28,12 @@ from raps.plotting import ( plot_network_histogram ) from raps.utils import ( - next_arrival_byconfargs, pydantic_add_args, SubParsers, ExpandedPath, WorkloadData, + next_arrival_byconfargs, pydantic_add_args, SubParsers, ExpandedPath, WorkloadData, RAPSBaseModel, ) # TODO: should reuse this model in SimConfig -class TelemetryArgs(BaseModel): +class TelemetryArgs(RAPSBaseModel): jid: str = '*' """ Replay job id """ replay: list[ExpandedPath] | None = None diff --git a/raps/utils.py b/raps/utils.py index 323ac8a..ab02a2a 100644 --- a/raps/utils.py +++ b/raps/utils.py @@ -21,12 +21,39 @@ import json import argparse from pathlib import Path from typing import Annotated as A, TypeVar, Callable, TypeAlias -from pydantic import BaseModel, TypeAdapter, AfterValidator, ConfigDict, AwareDatetime +from pydantic import BaseModel, TypeAdapter, AfterValidator, ConfigDict, AwareDatetime, ValidationError from pydantic_settings import BaseSettings, SettingsConfigDict, CliApp, CliSettingsSource import yaml from raps.job import Job +def deep_merge(a: dict, b: dict): + a = {**a} + for key in b.keys(): + if key in a and isinstance(a[key], dict) and isinstance(b[key], dict): + a[key] = deep_merge(a[key], b[key]) + else: + a[key] = b[key] + return a + + +def deep_subtract_dicts(a: dict, b: dict): + """ + Remove all fields from a that are already in b, such that + deep_merge(deep_subtract_dicts(a, b), b) == a + a should contain a superset of b's keys. + """ + a = {**a} + for key in b.keys(): + if key in a: + if a[key] == b[key]: + a.pop(key) + elif isinstance(a[key], dict) and isinstance(b[key], dict): + a[key] = deep_subtract_dicts(a[key], b[key]) + # otherwise keep key in a as is + return a + + def sum_values(values): return sum(x[1] for x in values) if values else 0 @@ -639,6 +666,13 @@ SmartTimedelta = A[timedelta, AfterValidator(parse_td)] T = TypeVar("T", bound=BaseModel) +class RAPSBaseModel(BaseModel): + """ Base Pydantic model with shared config """ + model_config = ConfigDict( + use_attribute_docstrings=True, + ) + + def pydantic_add_args( parser: argparse.ArgumentParser, model_cls: type[T], model_config: SettingsConfigDict | None = None, @@ -655,6 +689,7 @@ def pydantic_add_args( model_config_dict = SettingsConfigDict({ "cli_implicit_flags": True, "cli_kebab_case": True, + "title": model_cls.__name__, **(model_config or {}), "cli_parse_args": False, # Don't automatically parse args }) @@ -671,13 +706,17 @@ def pydantic_add_args( cli_settings_source = CliSettingsSource(SettingsModel, root_parser=parser) def model_validate_args(args: argparse.Namespace, data: dict | None = None): - model = CliApp.run(SettingsModel, - cli_args=args, - cli_settings_source=cli_settings_source, - **(data or {}), - ) - # Recreate model so we don't return the SettingsModel subclass - return model_cls.model_validate(model.model_dump()) + try: + model = CliApp.run(SettingsModel, + cli_args=args, + cli_settings_source=cli_settings_source, + **(data or {}), + ) + # Recreate model so we don't return the SettingsModel subclass + return model_cls.model_validate(model.model_dump()) + except ValidationError as err: + print(err) + sys.exit(1) return model_validate_args @@ -711,7 +750,7 @@ def yaml_dump(data): ) -class WorkloadData(BaseModel): +class WorkloadData(RAPSBaseModel): """ Represents a workload, a list of jobs with some metadata. Returned by dataloaders load_data() function, and by Workload.generate_jobs(). diff --git a/raps/workload.py b/raps/workload.py index 6fb3c3b..2a630b2 100644 --- a/raps/workload.py +++ b/raps/workload.py @@ -40,7 +40,7 @@ import matplotlib.pyplot as plt from raps.telemetry import Telemetry from raps.job import job_dict, Job from raps.utils import create_file_indexed, SubParsers, pydantic_add_args -from raps.sim_config import SimConfig +from raps.sim_config import SingleSimConfig JOB_NAMES = ["LAMMPS", "GROMACS", "VASP", "Quantum ESPRESSO", "NAMD", @@ -965,13 +965,13 @@ def run_workload_add_parser(subparsers: SubParsers): YAML sim config file, can be used to configure an experiment instead of using CLI flags. Pass "-" to read from stdin. """) - model_validate = pydantic_add_args(parser, SimConfig, model_config={ + model_validate = pydantic_add_args(parser, SingleSimConfig, model_config={ "cli_shortcuts": shortcuts, }) parser.set_defaults(impl=lambda args: run_workload(model_validate(args, {}))) -def run_workload(sim_config: SimConfig): +def run_workload(sim_config: SingleSimConfig): args = sim_config.get_legacy_args() args_dict = sim_config.get_legacy_args() config = sim_config.system_configs[0].get_legacy() @@ -987,10 +987,11 @@ def run_workload(sim_config: SimConfig): dist_split=sim_config.multimodal, gantt_nodes=sim_config.gantt_nodes) - if sim_config.output: + out = sim_config.get_output() + if out: timestep_start = min([x.submit_time for x in jobs]) timestep_end = math.ceil(max([x.submit_time for x in jobs]) + max([x.expected_run_time for x in jobs])) - filename = create_file_indexed('wl', create=False, ending="npz").split(".npz")[0] + filename = create_file_indexed('wl', path=str(out), create=False, ending="npz").split(".npz")[0] # savez_compressed add npz itself, but create_file_indexed needs to check for .npz to find existing files np.savez_compressed(filename, jobs=jobs, timestep_start=timestep_start, timestep_end=timestep_end, args=args) print(filename + ".npz") # To std-out to show which npz was created. diff --git a/tests/smoke.py b/tests/smoke.py index 946f6db..a2ea598 100644 --- a/tests/smoke.py +++ b/tests/smoke.py @@ -32,7 +32,7 @@ def run_command(command): def build_command(system, file_paths, additional_args=""): """Build the command string for the given system and file paths.""" full_paths = " ".join([os.path.join(DATAPATH, path) for path in file_paths.split()]) - return f"python main.py run --system {system} -f {full_paths} -t {DEFAULT_TIME} {additional_args}".strip() + return f"python main.py run --system {system} -f {full_paths} -t {DEFAULT_TIME} -o none {additional_args}".strip() def execute_system_tests(systems): diff --git a/tests/systems/test_engine.py b/tests/systems/test_engine.py index e483b18..0404e89 100644 --- a/tests/systems/test_engine.py +++ b/tests/systems/test_engine.py @@ -1,6 +1,6 @@ import pytest from raps.engine import Engine -from raps.sim_config import SimConfig +from raps.sim_config import SingleSimConfig from raps.stats import ( get_engine_stats, # get_job_stats, @@ -18,7 +18,7 @@ def test_engine(system, system_config, sim_output): if not system_config.get("main", False): pytest.skip(f"{system} does not support basic main run.") - sim_config = SimConfig.model_validate({ + sim_config = SingleSimConfig.model_validate({ "system": system, "time": "2m", }) diff --git a/tests/systems/test_main_basic_run.py b/tests/systems/test_main_basic_run.py index 0cc9b69..37661f3 100644 --- a/tests/systems/test_main_basic_run.py +++ b/tests/systems/test_main_basic_run.py @@ -19,6 +19,6 @@ def test_main_basic_run(system, system_config, sim_output): "python", "main.py", "run", "--time", "1m", "--system", system, - "-o", sim_output + "-o", sim_output, ], capture_output=True, text=True, stdin=subprocess.DEVNULL) assert result.returncode == 0, f"Failed on {system}: {result.stderr}" diff --git a/tests/systems/test_main_network_withdata_run.py b/tests/systems/test_main_network_withdata_run.py index 58d14f9..1cbeae8 100644 --- a/tests/systems/test_main_network_withdata_run.py +++ b/tests/systems/test_main_network_withdata_run.py @@ -1,7 +1,7 @@ import os import subprocess import pytest -from tests.util import PROJECT_ROOT, DATA_PATH +from tests.util import PROJECT_ROOT pytestmark = [ diff --git a/tests/systems/test_main_time_delta_sub_second_run.py b/tests/systems/test_main_time_delta_sub_second_run.py index 55c0e3c..db80105 100644 --- a/tests/systems/test_main_time_delta_sub_second_run.py +++ b/tests/systems/test_main_time_delta_sub_second_run.py @@ -34,17 +34,11 @@ def test_main_time_delta_sub_second_run(system, system_config, time_arg, tdelta_ "--time-delta", tdelta_arg, "--system", system, "--noui", - "-o", sim_output + "-o", sim_output, ], capture_output=True, text=True, stdin=subprocess.DEVNULL) assert result.returncode == 0, f"Failed on {system}: {result.stderr}" time = parse_td(time_arg).seconds assert f"Time Simulated: {convert_seconds_to_hhmmss(time)}" in result.stdout - subprocess.run( - f"rm {sim_output}.npz && rm -fr simulation_results/{sim_output}", - shell=True, - check=True - ) - del result gc.collect() diff --git a/tests/systems/test_main_withdata_run.py b/tests/systems/test_main_withdata_run.py index eb996a3..3539db9 100644 --- a/tests/systems/test_main_withdata_run.py +++ b/tests/systems/test_main_withdata_run.py @@ -1,7 +1,7 @@ import os import subprocess import pytest -from tests.util import PROJECT_ROOT, DATA_PATH +from tests.util import PROJECT_ROOT pytestmark = [ diff --git a/tests/systems/test_multi_part_sim_basic_run.py b/tests/systems/test_multi_part_sim_basic_run.py index 9351fd6..0edcc90 100644 --- a/tests/systems/test_multi_part_sim_basic_run.py +++ b/tests/systems/test_multi_part_sim_basic_run.py @@ -11,7 +11,7 @@ pytestmark = [ ] -def test_multi_part_sim_basic_run(system, system_config): +def test_multi_part_sim_basic_run(system, system_config, sim_output): if not system_config.get("multi-part-sim", False): pytest.skip(f"{system} does not support basic multi-part-sim run.") @@ -21,6 +21,7 @@ def test_multi_part_sim_basic_run(system, system_config): "python", "main.py", "run-parts", "--time", "1h", "-x", f"{system}/*", + "-o", sim_output, ], capture_output=True, text=True, stdin=subprocess.DEVNULL) assert result.returncode == 0, f"Failed on {system}: {result.stderr}" del result diff --git a/tests/systems/test_multi_part_sim_withdata_run.py b/tests/systems/test_multi_part_sim_withdata_run.py index f38cf8e..538726c 100644 --- a/tests/systems/test_multi_part_sim_withdata_run.py +++ b/tests/systems/test_multi_part_sim_withdata_run.py @@ -1,8 +1,7 @@ import os import subprocess -import gc import pytest -from tests.util import PROJECT_ROOT, DATA_PATH +from tests.util import PROJECT_ROOT pytestmark = [ @@ -12,7 +11,7 @@ pytestmark = [ ] -def test_multi_part_sim_withdata_run(system, system_config, system_files): +def test_multi_part_sim_withdata_run(system, system_config, system_files, sim_output): if not system_config.get("multi-part-sim", False): pytest.skip(f"{system} does not support basic multi-part-sim run even without data.") if not system_config.get("withdata", False): @@ -24,5 +23,6 @@ def test_multi_part_sim_withdata_run(system, system_config, system_files): "--time", "1h", "-x", f"{system}/*", "-f", ','.join(system_files), + "-o", sim_output, ], capture_output=True, text=True, stdin=subprocess.DEVNULL) assert result.returncode == 0, f"Failed on {system}: {result.stderr}" diff --git a/tests/systems/test_telemetry_withdata_run.py b/tests/systems/test_telemetry_withdata_run.py index 2729c7c..43a218b 100644 --- a/tests/systems/test_telemetry_withdata_run.py +++ b/tests/systems/test_telemetry_withdata_run.py @@ -1,7 +1,7 @@ import os import subprocess import pytest -from tests.util import PROJECT_ROOT, DATA_PATH +from tests.util import PROJECT_ROOT pytestmark = [ diff --git a/tests/test_main.py b/tests/test_main.py index 5c08182..4b09fa0 100644 --- a/tests/test_main.py +++ b/tests/test_main.py @@ -13,11 +13,10 @@ PROJECT_ROOT = Path(__file__).resolve().parent.parent # adjust if needed def test_main_withui(): os.chdir(PROJECT_ROOT) result = subprocess.run([ - "python", "main.py", "run", - "--time", "1h", - ], capture_output=True, - text=True - ) + "python", "main.py", "run", + "--time", "1h", + "-o", 'none', + ], capture_output=True, text=True) assert result.returncode == 0 @@ -25,12 +24,10 @@ def test_main_withui(): def test_main_noui(): os.chdir(PROJECT_ROOT) result = subprocess.run([ - "python", "main.py", "run", - "--time", "1h", - "--noui" - ], capture_output=True, - text=True - ) + "python", "main.py", "run", + "--time", "1h", + "--noui", "-o", 'none', + ], capture_output=True, text=True) assert result.returncode == 0 @@ -39,8 +36,6 @@ def test_main_noui(): def test_main_long(): os.chdir(PROJECT_ROOT) result = subprocess.run([ - "python", "main.py", "run", - ], capture_output=True, - text=True - ) + "python", "main.py", "run", "-o", 'none', + ], capture_output=True, text=True) assert result.returncode == 0 -- GitLab From 2d112decfe0490cd3b8e501fa037a5b463b5c4c8 Mon Sep 17 00:00:00 2001 From: Jesse Hines Date: Wed, 10 Sep 2025 11:07:22 -0400 Subject: [PATCH 295/388] Remove symlink config This will cause issues with the multi-partiton lookup --- config/mit_supercloud.yaml | 1 - 1 file changed, 1 deletion(-) delete mode 120000 config/mit_supercloud.yaml diff --git a/config/mit_supercloud.yaml b/config/mit_supercloud.yaml deleted file mode 120000 index 2167597..0000000 --- a/config/mit_supercloud.yaml +++ /dev/null @@ -1 +0,0 @@ -mit_supercloud/part-gpu.yaml \ No newline at end of file -- GitLab From 57653313bb6c5432b2f3c6dc6afd2814b5a59387 Mon Sep 17 00:00:00 2001 From: Jesse Hines Date: Wed, 10 Sep 2025 11:12:32 -0400 Subject: [PATCH 296/388] Update pyproject.toml --- pyproject.toml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index f396280..c009d2a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "hatchling.build" [project] name = "raps" -version = "0.0.1" +version = "2.0.0.dev0" requires-python = ">=3.12" description = "RAPS" readme = "README.md" @@ -30,6 +30,8 @@ dependencies = [ "pyyaml>=6.0.2", "pydantic>=2.11.7", "pydantic-settings>=2.10.1", + "stable-baselines3==2.7.0", + "gym==0.26.2", "pre-commit" ] -- GitLab From 6699940d748855f1c508fdb3849352d3737dd0e9 Mon Sep 17 00:00:00 2001 From: Jesse Hines Date: Wed, 10 Sep 2025 12:04:55 -0400 Subject: [PATCH 297/388] Move sim shortcuts --- raps/run_sim.py | 25 ++++--------------------- raps/sim_config.py | 17 +++++++++++++++++ raps/workload.py | 4 ++-- 3 files changed, 23 insertions(+), 23 deletions(-) diff --git a/raps/run_sim.py b/raps/run_sim.py index ce89529..5525d7f 100644 --- a/raps/run_sim.py +++ b/raps/run_sim.py @@ -22,7 +22,7 @@ from raps.stats import ( print_formatted_report ) -from raps.sim_config import SingleSimConfig, MultiPartSimConfig +from raps.sim_config import SingleSimConfig, MultiPartSimConfig, SIM_SHORTCUTS def read_yaml(config_file: str): @@ -34,23 +34,6 @@ def read_yaml(config_file: str): return {} -shortcuts = { - "partitions": "x", - "cooling": "c", - "simulate-network": "net", - "fastforward": "ff", - "time": "t", - "debug": "d", - "numjobs": "n", - "verbose": "v", - "output": "o", - "uncertainties": "u", - "plot": "p", - "replay": "f", - "workload": "w", -} - - def run_sim_add_parser(subparsers: SubParsers): parser = subparsers.add_parser("run", description=""" Run single-partition (homogeneous) systems. Supports synthetic workload generation or @@ -63,7 +46,7 @@ def run_sim_add_parser(subparsers: SubParsers): flags. Pass "-" to read from stdin. """) model_validate = pydantic_add_args(parser, SingleSimConfig, model_config={ - "cli_shortcuts": shortcuts, + "cli_shortcuts": SIM_SHORTCUTS, }) parser.set_defaults( impl=lambda args: run_sim(model_validate(args, read_yaml(args.config_file))) @@ -238,7 +221,7 @@ def run_parts_sim_add_parser(subparsers: SubParsers): flags. Pass "-" to read from stdin. """) model_validate = pydantic_add_args(parser, MultiPartSimConfig, model_config={ - "cli_shortcuts": shortcuts, + "cli_shortcuts": SIM_SHORTCUTS, }) parser.set_defaults( impl=lambda args: run_parts_sim(model_validate(args, read_yaml(args.config_file))) @@ -325,7 +308,7 @@ def show_add_parser(subparsers: SubParsers): If true, include defaults in the output YAML """) model_validate = pydantic_add_args(parser, SingleSimConfig, model_config={ - "cli_shortcuts": shortcuts, + "cli_shortcuts": SIM_SHORTCUTS, }) def impl(args): diff --git a/raps/sim_config.py b/raps/sim_config.py index a73cd3e..05c078c 100644 --- a/raps/sim_config.py +++ b/raps/sim_config.py @@ -391,3 +391,20 @@ class MultiPartSimConfig(SimConfig): @cached_property def _multi_partition_system_config(self): return get_partition_configs(self.partitions) + + +SIM_SHORTCUTS = { + "partitions": "x", + "cooling": "c", + "simulate-network": "net", + "fastforward": "ff", + "time": "t", + "debug": "d", + "numjobs": "n", + "verbose": "v", + "output": "o", + "uncertainties": "u", + "plot": "p", + "replay": "f", + "workload": "w", +} diff --git a/raps/workload.py b/raps/workload.py index ddd4377..2d57227 100644 --- a/raps/workload.py +++ b/raps/workload.py @@ -952,7 +952,7 @@ def plot_job_hist(jobs, config=None, dist_split=None, gantt_nodes=False): def run_workload_add_parser(subparsers: SubParsers): - from raps.run_sim import shortcuts + from raps.sim_config import SIM_SHORTCUTS # TODO: Separate the arguments for this command parser = subparsers.add_parser("workload", description=""" Saves workload as a snapshot. @@ -962,7 +962,7 @@ def run_workload_add_parser(subparsers: SubParsers): flags. Pass "-" to read from stdin. """) model_validate = pydantic_add_args(parser, SingleSimConfig, model_config={ - "cli_shortcuts": shortcuts, + "cli_shortcuts": SIM_SHORTCUTS, }) parser.set_defaults(impl=lambda args: run_workload(model_validate(args, {}))) -- GitLab From 68cfb20ed9081fccbb89cb1da7cfb7891178400e Mon Sep 17 00:00:00 2001 From: Jesse Hines Date: Wed, 10 Sep 2025 12:07:00 -0400 Subject: [PATCH 298/388] Move read_yaml to utils so we can reuse it --- raps/run_sim.py | 11 +---------- raps/utils.py | 10 ++++++++++ 2 files changed, 11 insertions(+), 10 deletions(-) diff --git a/raps/run_sim.py b/raps/run_sim.py index 5525d7f..4a3f9b3 100644 --- a/raps/run_sim.py +++ b/raps/run_sim.py @@ -13,7 +13,7 @@ from raps.ui import LayoutManager from raps.plotting import Plotter from raps.engine import Engine from raps.multi_part_engine import MultiPartEngine -from raps.utils import write_dict_to_file, pydantic_add_args, SubParsers, yaml_dump +from raps.utils import write_dict_to_file, pydantic_add_args, SubParsers, yaml_dump, read_yaml from raps.stats import ( get_engine_stats, get_job_stats, @@ -25,15 +25,6 @@ from raps.stats import ( from raps.sim_config import SingleSimConfig, MultiPartSimConfig, SIM_SHORTCUTS -def read_yaml(config_file: str): - if config_file == "-": - return yaml.safe_load(sys.stdin.read()) - elif config_file: - return yaml.safe_load(Path(config_file).read_text()) - else: - return {} - - def run_sim_add_parser(subparsers: SubParsers): parser = subparsers.add_parser("run", description=""" Run single-partition (homogeneous) systems. Supports synthetic workload generation or diff --git a/raps/utils.py b/raps/utils.py index ab02a2a..aeb04c5 100644 --- a/raps/utils.py +++ b/raps/utils.py @@ -750,6 +750,16 @@ def yaml_dump(data): ) +def read_yaml(config_file: str): + """ Parses yaml file. Pass "-" to read from stdin """ + if config_file == "-": + return yaml.safe_load(sys.stdin.read()) + elif config_file: + return yaml.safe_load(Path(config_file).read_text()) + else: + return {} + + class WorkloadData(RAPSBaseModel): """ Represents a workload, a list of jobs with some metadata. Returned by dataloaders load_data() -- GitLab From 05527c712695fc321fdf45cc1b7d45797110dec5 Mon Sep 17 00:00:00 2001 From: Jesse Hines Date: Wed, 10 Sep 2025 12:16:29 -0400 Subject: [PATCH 299/388] Add train-rl subcommand --- main.py | 2 ++ raps/train_rl.py | 58 ++++++++++++++++++++++++++++++++++++++++++++++++ train_rl.py | 35 ----------------------------- 3 files changed, 60 insertions(+), 35 deletions(-) create mode 100644 raps/train_rl.py delete mode 100644 train_rl.py diff --git a/main.py b/main.py index 7c38960..9b36a76 100644 --- a/main.py +++ b/main.py @@ -6,6 +6,7 @@ from raps.helpers import check_python_version from raps.run_sim import run_sim_add_parser, run_parts_sim_add_parser, show_add_parser from raps.workload import run_workload_add_parser from raps.telemetry import run_telemetry_add_parser +from raps.train_rl import train_rl_add_parser check_python_version() @@ -24,6 +25,7 @@ def main(cli_args: list[str] | None = None): show_add_parser(subparsers) run_workload_add_parser(subparsers) run_telemetry_add_parser(subparsers) + train_rl_add_parser(subparsers) # TODO: move other misc scripts into here diff --git a/raps/train_rl.py b/raps/train_rl.py new file mode 100644 index 0000000..f854e18 --- /dev/null +++ b/raps/train_rl.py @@ -0,0 +1,58 @@ +from raps.sim_config import SingleSimConfig, SIM_SHORTCUTS +from raps.utils import SubParsers, pydantic_add_args, read_yaml + + +class RLConfig(SingleSimConfig): + # Reinforcement Learning + episode_length: int = 1000 + """ Number of timesteps per RL episode (default 1000) """ + + +def train_rl_add_parser(subparsers: SubParsers): + parser = subparsers.add_parser("train-rl", description=""" + Example usage: + raps train-rl --system mit_supercloud/part-gpu -f /opt/data/mit_supercloud/202201 + """) + parser.add_argument("config_file", nargs="?", default=None, help=""" + YAML sim config file, can be used to configure an experiment instead of using CLI + flags. Pass "-" to read from stdin. + """) + model_validate = pydantic_add_args(parser, RLConfig, model_config={ + "cli_shortcuts": SIM_SHORTCUTS, + }) + parser.set_defaults( + impl=lambda args: train_rl(model_validate(args, read_yaml(args.config_file))) + ) + + +def train_rl(rl_config: RLConfig): + from stable_baselines3 import PPO + from raps.envs.raps_env import RAPSEnv + + args_dict = rl_config.get_legacy_args_dict() + config = rl_config.system_configs[0].get_legacy() + args_dict['config'] = config + args_dict['args'] = rl_config.get_legacy_args() + + env = RAPSEnv(**args_dict) + + model = PPO( + "MlpPolicy", + env, + n_steps=512, # shorter rollouts (quicker feedback loop) + batch_size=128, # must divide n_steps evenly + n_epochs=10, # # of minibatch passes per update + gamma=0.99, # discount (keeps long-term credit) + learning_rate=3e-4, # default Adam lr, can try 1e-4 if unstable + ent_coef=0.01, # encourage exploration + verbose=1, + tensorboard_log="./ppo_raps_logs/" + ) + + model.learn(total_timesteps=10000, tb_log_name="ppo_raps") + + # Output stats + stats = env.get_stats() + + # Save trained model + model.save("ppo_raps") diff --git a/train_rl.py b/train_rl.py deleted file mode 100644 index 73d3ed7..0000000 --- a/train_rl.py +++ /dev/null @@ -1,35 +0,0 @@ -""" -Example usage: - python train_rl.py --system mit_supercloud -f /opt/data/mit_supercloud/202201 -""" -from stable_baselines3 import PPO -from raps.envs.raps_env import RAPSEnv -from raps.system_config import get_system_config -from raps.sim_config import args, args_dict - -config = get_system_config(args.system).get_legacy() -args_dict['config'] = config -args_dict['args'] = args - -env = RAPSEnv(**args_dict) - -model = PPO( - "MlpPolicy", - env, - n_steps=512, # shorter rollouts (quicker feedback loop) - batch_size=128, # must divide n_steps evenly - n_epochs=10, # # of minibatch passes per update - gamma=0.99, # discount (keeps long-term credit) - learning_rate=3e-4, # default Adam lr, can try 1e-4 if unstable - ent_coef=0.01, # encourage exploration - verbose=1, - tensorboard_log="./ppo_raps_logs/" -) - -model.learn(total_timesteps=10000, tb_log_name="ppo_raps") - -# Output stats -stats = env.get_stats() - -# Save trained model -model.save("ppo_raps") -- GitLab From a546f9938154a6d5fcbb8ae287dd0ccbdee1daf7 Mon Sep 17 00:00:00 2001 From: Wes Brewer Date: Wed, 10 Sep 2025 18:10:33 -0400 Subject: [PATCH 300/388] A number of simplifications to raps_env.py --- raps/envs/raps_env.py | 82 ++++++++----------------------------------- raps/train_rl.py | 24 ++++++------- 2 files changed, 26 insertions(+), 80 deletions(-) diff --git a/raps/envs/raps_env.py b/raps/envs/raps_env.py index bf3a161..e786013 100644 --- a/raps/envs/raps_env.py +++ b/raps/envs/raps_env.py @@ -4,23 +4,14 @@ from gym import spaces import numpy as np from raps.engine import Engine -from raps.power import PowerManager, compute_node_power -from raps.flops import FLOPSManager -from raps.telemetry import Telemetry from raps.workload import Workload -from raps.ui import LayoutManager -from raps.schedulers.rl import Scheduler # from raps.resmgr.default import MultiTenantResourceManager as ResourceManager -from raps.resmgr.default import ExclusiveNodeResourceManager as ResourceManager from raps.stats import get_engine_stats, get_job_stats, get_scheduler_stats, get_network_stats from stable_baselines3.common.logger import Logger, HumanOutputFormat import sys -logger = Logger( - folder=None, # no log file, just stdout - output_formats=[HumanOutputFormat(sys.stdout)] -) +logger = Logger(folder=None, output_formats=[HumanOutputFormat(sys.stdout)]) def print_stats(stats, step=0): @@ -56,65 +47,11 @@ class RAPSEnv(gym.Env): metadata = {"render.modes": ["human"]} - def __init__(self, **kwargs): + def __init__(self, sim_config): super().__init__() # Store everything in self.args - self.args_dict = kwargs # dict - self.cli_args = kwargs.get("args") # Namespace - self.config = kwargs.get("config") - if self.cli_args is None: - raise ValueError("RAPSEnv requires 'args' (argparse.Namespace) in kwargs") - if self.config is None: - raise ValueError("RAPSEnv requires 'config' in kwargs") - - # --- managers (minimal versions) --- - self.power_manager = PowerManager(compute_node_power, **self.config) - self.flops_manager = FLOPSManager(**self.args_dict) - self.telemetry = Telemetry(**self.args_dict) - - # --- Build initial jobs & time bounds --- - self.jobs, self.timestep_start, self.timestep_end = self._build_jobs() - self.original_jobs = self.jobs # keep pristine version - - self.engine = Engine( - power_manager=self.power_manager, - flops_manager=self.flops_manager, - jobs=self.jobs, - **self.args_dict - ) - - resmgr = ResourceManager( - total_nodes=self.config["TOTAL_NODES"], - down_nodes=self.config.get("DOWN_NODES", []), - config=self.config - ) - - # Plug in RL scheduler - self.scheduler = Scheduler( - config=self.config, - policy="fcfs", # or None if you want no heuristic fallback - resource_manager=resmgr, - env=self - ) - self.engine.scheduler = self.scheduler - - self.layout_manager = LayoutManager( - self.args_dict.get("layout"), engine=self.engine, - debug=self.args_dict.get("debug", False), - total_timesteps=self.args_dict.get("time", 1000), - args_dict=self.args_dict, - **self.config - ) - - self.timestep_start = 0 - self.timestep_end = getattr(self.cli_args, "episode_length") - - self.generator = self.layout_manager.run_stepwise( - self.jobs, - timestep_start=self.timestep_start, - timestep_end=self.timestep_end, - time_delta=self.args_dict.get("time_delta"), - ) + self.sim_config = sim_config + self.engine = self._create_engine() # --- RL spaces --- max_jobs = 100 @@ -124,6 +61,14 @@ class RAPSEnv(gym.Env): ) self.action_space = spaces.Discrete(max_jobs) + def _create_engine(self): + self.engine, workload_data, time_delta = Engine.from_sim_config(self.sim_config) + self.engine.scheduler.env = self + jobs = workload_data.jobs + timestep_start = workload_data.telemetry_start + timestep_end = workload_data.telemetry_end + self.generator = self.engine.run_simulation(jobs, timestep_start, timestep_end, time_delta) + def _build_jobs(self): """ Build a job list either from synthetic workload (--workload) @@ -204,6 +149,9 @@ class RAPSEnv(gym.Env): # return self._get_state(), {} def reset(self, **kwargs): + self.engine = self._create_engine() + + def reset2(self, **kwargs): completed = [j.id for j in self.jobs if j.current_state.name == "COMPLETED"] print(f"[RESET] Jobs already completed before deepcopy: {len(completed)}") diff --git a/raps/train_rl.py b/raps/train_rl.py index f854e18..eac4172 100644 --- a/raps/train_rl.py +++ b/raps/train_rl.py @@ -2,12 +2,6 @@ from raps.sim_config import SingleSimConfig, SIM_SHORTCUTS from raps.utils import SubParsers, pydantic_add_args, read_yaml -class RLConfig(SingleSimConfig): - # Reinforcement Learning - episode_length: int = 1000 - """ Number of timesteps per RL episode (default 1000) """ - - def train_rl_add_parser(subparsers: SubParsers): parser = subparsers.add_parser("train-rl", description=""" Example usage: @@ -17,15 +11,18 @@ def train_rl_add_parser(subparsers: SubParsers): YAML sim config file, can be used to configure an experiment instead of using CLI flags. Pass "-" to read from stdin. """) - model_validate = pydantic_add_args(parser, RLConfig, model_config={ + model_validate = pydantic_add_args(parser, SingleSimConfig, model_config={ "cli_shortcuts": SIM_SHORTCUTS, }) - parser.set_defaults( - impl=lambda args: train_rl(model_validate(args, read_yaml(args.config_file))) - ) + + def impl(args): + model = model_validate(args, read_yaml(args.config_file)) + model.scheduler = "rl" + train_rl(model) + parser.set_defaults(impl=impl) -def train_rl(rl_config: RLConfig): +def train_rl(rl_config: SingleSimConfig): from stable_baselines3 import PPO from raps.envs.raps_env import RAPSEnv @@ -34,14 +31,14 @@ def train_rl(rl_config: RLConfig): args_dict['config'] = config args_dict['args'] = rl_config.get_legacy_args() - env = RAPSEnv(**args_dict) + env = RAPSEnv(rl_config) model = PPO( "MlpPolicy", env, n_steps=512, # shorter rollouts (quicker feedback loop) batch_size=128, # must divide n_steps evenly - n_epochs=10, # # of minibatch passes per update + n_epochs=10, # of minibatch passes per update gamma=0.99, # discount (keeps long-term credit) learning_rate=3e-4, # default Adam lr, can try 1e-4 if unstable ent_coef=0.01, # encourage exploration @@ -53,6 +50,7 @@ def train_rl(rl_config: RLConfig): # Output stats stats = env.get_stats() + print(stats) # Save trained model model.save("ppo_raps") -- GitLab From cfac2e289baad4c20ae018799ea36c76a9d697ea Mon Sep 17 00:00:00 2001 From: Jesse Hines Date: Thu, 11 Sep 2025 11:25:43 -0400 Subject: [PATCH 301/388] Fix running single partition --- raps/dataloaders/mit_supercloud/loader.py | 2 +- raps/telemetry.py | 5 +++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/raps/dataloaders/mit_supercloud/loader.py b/raps/dataloaders/mit_supercloud/loader.py index 97deb4e..a622965 100644 --- a/raps/dataloaders/mit_supercloud/loader.py +++ b/raps/dataloaders/mit_supercloud/loader.py @@ -298,7 +298,7 @@ def load_data(local_dataset_path, **kwargs): ]) # partition mode - part = kwargs.get("partition", "").split("/")[-1].lower() + part = (kwargs.get("partition") or "").split("/")[-1].lower() cpu_only = (part == "part-cpu") mixed = (part == "part-gpu") diff --git a/raps/telemetry.py b/raps/telemetry.py index 63ee158..7ab911a 100644 --- a/raps/telemetry.py +++ b/raps/telemetry.py @@ -77,11 +77,12 @@ class Telemetry: def __init__(self, **kwargs): self.kwargs = kwargs - self.system = kwargs.get('system') + self.system = kwargs['system'] self.config = kwargs.get('config') try: - self.dataloader = importlib.import_module(f"raps.dataloaders.{self.system}", package=__package__) + module = self.system.split("/")[0] + self.dataloader = importlib.import_module(f"raps.dataloaders.{module}", package=__package__) except ImportError as e: print(f"WARNING: Failed to load dataloader: {e}") self.dataloader = None -- GitLab From af3a91710885d2917c65b5052c7bc414d11a004d Mon Sep 17 00:00:00 2001 From: Wes Brewer Date: Thu, 11 Sep 2025 12:29:38 -0400 Subject: [PATCH 302/388] Clean up raps_env.py. Add in check_env. Add sample command to README.md. Not working yet. --- README.md | 5 +- raps/envs/raps_env.py | 110 +++--------------------------------------- raps/train_rl.py | 2 + 3 files changed, 14 insertions(+), 103 deletions(-) diff --git a/README.md b/README.md index a3ccb54..eb39cc7 100644 --- a/README.md +++ b/README.md @@ -74,6 +74,9 @@ For MIT Supercloud # Synthetic tests for verification studies: raps run-parts -x mit_supercloud -w multitenant + # Reinforcement learning test case + python main.py train-rl --system mit_supercloud/part-cpu -f /opt/data/mit_supercloud/202201 + For Lumi # Synthetic test for Lumi: @@ -170,7 +173,7 @@ See instructions in [dashboard/README.md](https://code.ornl.gov/exadigit/simulat ## Running Tests -RAPS uses [pytest](https://docs.pytest.org/) for its test suite. +RAPS uses [pytest](https://docs.pytest.org/) for its test suite. Before running tests, ensure that you have a valid data directory available (e.g., `/opt/data`) and set the environment variable `RAPS_DATA_DIR` to point to it. ### Run all tests diff --git a/raps/envs/raps_env.py b/raps/envs/raps_env.py index e786013..2dbb032 100644 --- a/raps/envs/raps_env.py +++ b/raps/envs/raps_env.py @@ -1,4 +1,3 @@ -import copy import gym from gym import spaces import numpy as np @@ -64,10 +63,10 @@ class RAPSEnv(gym.Env): def _create_engine(self): self.engine, workload_data, time_delta = Engine.from_sim_config(self.sim_config) self.engine.scheduler.env = self - jobs = workload_data.jobs + self.jobs = workload_data.jobs timestep_start = workload_data.telemetry_start timestep_end = workload_data.telemetry_end - self.generator = self.engine.run_simulation(jobs, timestep_start, timestep_end, time_delta) + self.generator = self.engine.run_simulation(self.jobs, timestep_start, timestep_end, time_delta) def _build_jobs(self): """ @@ -114,72 +113,11 @@ class RAPSEnv(gym.Env): else: raise ValueError("RAPSEnv requires either --workload or --replay to build jobs.") -# def reset(self, seed=None, options=None): -# super().reset(seed=seed) -# -# self.jobs = copy.deepcopy(self.original_jobs) # working copy -# -# # Reset engine -# self.engine.current_timestep = 0 -# #self.engine.reset() # or clear state manually -# power_manager = PowerManager(compute_node_power, **self.config) -# flops_manager = FLOPSManager(**self.args_dict) -# telemetry = Telemetry(**self.args_dict) -# jobs, timestep_start, timestep_end = self._build_jobs() -# -# self.engine = Engine( -# power_manager=power_manager, -# flops_manager=flops_manager, -# jobs=jobs, -# **self.args_dict -# ) -# -# self.engine.timestep_start = timestep_start -# self.engine.timestep_end = timestep_end -# #self.engine.current_timestep = timestep_start -# -# # Restart generator -# self.generator = self.layout_manager.run_stepwise( -# self.jobs, -# timestep_start=self.timestep_start, -# timestep_end=self.timestep_end, -# time_delta=self.args_dict.get("time_delta"), -# ) -# -# return self._get_state(), {} - def reset(self, **kwargs): self.engine = self._create_engine() - - def reset2(self, **kwargs): - completed = [j.id for j in self.jobs if j.current_state.name == "COMPLETED"] - print(f"[RESET] Jobs already completed before deepcopy: {len(completed)}") - - super().reset(seed=42) - # self.engine.jobs = self.jobs - self.jobs = copy.deepcopy(self.original_jobs) # working copy - - # self.engine.timestep_start = self.timestep_start - # self.engine.timestep_end = self.timestep_end - # self.engine.reset(self.jobs, self.timestep_start, self.timestep_end) - - # self.engine.current_timestep = self.timestep_start - - # self.engine.jobs = self.jobs # repoint engine to fresh jobs - # self.engine.completed_jobs = [] - # self.engine.queue.clear() - # self.engine.running.clear() - # self.engine.power_manager.history.clear() - # self.engine.jobs_completed = 0 - - self.generator = self.layout_manager.run_stepwise( - self.jobs, - timestep_start=self.timestep_start, - timestep_end=self.timestep_end, - time_delta=self.args_dict.get("time_delta", 1), - ) - - return self._get_state() + obs = self._get_state() + info = {} + return obs, info def _compute_reward(self, tick_data): """ @@ -206,41 +144,6 @@ class RAPSEnv(gym.Env): return reward -# def _compute_reward(self, tick_data): -# """ -# Reward function: minimize carbon footprint per job completed. -# Encourages the agent to complete jobs while keeping emissions low. -# """ -# reward = 0.0 -# -# # Jobs completed this tick -# jobs_completed = len(getattr(tick_data, "completed", [])) -# -# # Carbon emitted so far (metric tons CO2) -# carbon_so_far = getattr(self.engine, "carbon emissions", 0.0) -# -# if jobs_completed > 0: -# # Reward is higher when more jobs finish with less carbon -# reward = jobs_completed / (carbon_so_far + 1e-6) -# else: -# # Small penalty if no jobs finished (encourages progress) -# reward = -0.01 -# -# return reward - - def _compute_reward2(self, tick_data, alpha=10.0, beta=1.0, gamma=2.0): - completed = getattr(tick_data, "completed", None) - jobs_completed = len(completed) if completed else 0 - power = self.power_manager.history[-1][1] - queue_len = len(self.engine.queue) - - reward = alpha * jobs_completed - beta * power - gamma * queue_len - - print(f"[t={self.engine.current_timestep}] jobs_completed={jobs_completed}, " - f"power={power}, queue_len={queue_len}, reward={reward}") - - return reward - def step(self, action): queue = self.engine.queue invalid_action = False @@ -268,6 +171,9 @@ class RAPSEnv(gym.Env): else: reward = self._compute_reward(tick_data) + # clip reward + reward = np.clip(reward, -10.0, 10.0) + # Print stats stats = self.get_stats() print_stats(stats) diff --git a/raps/train_rl.py b/raps/train_rl.py index eac4172..35edce1 100644 --- a/raps/train_rl.py +++ b/raps/train_rl.py @@ -24,6 +24,7 @@ def train_rl_add_parser(subparsers: SubParsers): def train_rl(rl_config: SingleSimConfig): from stable_baselines3 import PPO + from stable_baselines3.common.env_checker import check_env from raps.envs.raps_env import RAPSEnv args_dict = rl_config.get_legacy_args_dict() @@ -32,6 +33,7 @@ def train_rl(rl_config: SingleSimConfig): args_dict['args'] = rl_config.get_legacy_args() env = RAPSEnv(rl_config) + check_env(RAPSEnv(env)) model = PPO( "MlpPolicy", -- GitLab From 82f8925738c7aedbed9c5a3db6d28df77cf9cc69 Mon Sep 17 00:00:00 2001 From: Wes Brewer Date: Thu, 11 Sep 2025 12:47:56 -0400 Subject: [PATCH 303/388] Get RL working again... --- raps/envs/raps_env.py | 27 ++++++++++++++++++--------- raps/train_rl.py | 2 -- 2 files changed, 18 insertions(+), 11 deletions(-) diff --git a/raps/envs/raps_env.py b/raps/envs/raps_env.py index 2dbb032..d395066 100644 --- a/raps/envs/raps_env.py +++ b/raps/envs/raps_env.py @@ -61,12 +61,13 @@ class RAPSEnv(gym.Env): self.action_space = spaces.Discrete(max_jobs) def _create_engine(self): - self.engine, workload_data, time_delta = Engine.from_sim_config(self.sim_config) - self.engine.scheduler.env = self + engine, workload_data, time_delta = Engine.from_sim_config(self.sim_config) + engine.scheduler.env = self self.jobs = workload_data.jobs timestep_start = workload_data.telemetry_start timestep_end = workload_data.telemetry_end - self.generator = self.engine.run_simulation(self.jobs, timestep_start, timestep_end, time_delta) + self.generator = engine.run_simulation(self.jobs, timestep_start, timestep_end, time_delta) + return engine def _build_jobs(self): """ @@ -116,8 +117,7 @@ class RAPSEnv(gym.Env): def reset(self, **kwargs): self.engine = self._create_engine() obs = self._get_state() - info = {} - return obs, info + return obs def _compute_reward(self, tick_data): """ @@ -145,6 +145,9 @@ class RAPSEnv(gym.Env): return reward def step(self, action): + if self.engine is None: + raise RuntimeError("Engine not initialized. Did you forget to call reset()?") + queue = self.engine.queue invalid_action = False @@ -153,11 +156,17 @@ class RAPSEnv(gym.Env): invalid_action = True else: job = queue[int(action)] - available = len(self.engine.scheduler.resource_manager.available_nodes) - if job.nodes_required <= available: - # Valid scheduling + available_nodes = self.engine.scheduler.resource_manager.available_nodes + + if job.nodes_required <= len(available_nodes): + # Just pick the first available node (simplest placement policy) + node_id = available_nodes[0] self.engine.scheduler.place_job_and_manage_queues( - job, queue, self.engine.running, self.engine.current_timestep + job, + queue, + self.engine.running, + self.engine.current_timestep, + node_id, ) else: invalid_action = True diff --git a/raps/train_rl.py b/raps/train_rl.py index 35edce1..eac4172 100644 --- a/raps/train_rl.py +++ b/raps/train_rl.py @@ -24,7 +24,6 @@ def train_rl_add_parser(subparsers: SubParsers): def train_rl(rl_config: SingleSimConfig): from stable_baselines3 import PPO - from stable_baselines3.common.env_checker import check_env from raps.envs.raps_env import RAPSEnv args_dict = rl_config.get_legacy_args_dict() @@ -33,7 +32,6 @@ def train_rl(rl_config: SingleSimConfig): args_dict['args'] = rl_config.get_legacy_args() env = RAPSEnv(rl_config) - check_env(RAPSEnv(env)) model = PPO( "MlpPolicy", -- GitLab From edc8b43e00b995203592cb60189b5e8f169a9685 Mon Sep 17 00:00:00 2001 From: Wes Brewer Date: Thu, 11 Sep 2025 13:02:04 -0400 Subject: [PATCH 304/388] Remove unused _build_jobs method --- raps/envs/raps_env.py | 47 ------------------------------------------- 1 file changed, 47 deletions(-) diff --git a/raps/envs/raps_env.py b/raps/envs/raps_env.py index d395066..e27a6d2 100644 --- a/raps/envs/raps_env.py +++ b/raps/envs/raps_env.py @@ -3,8 +3,6 @@ from gym import spaces import numpy as np from raps.engine import Engine -from raps.workload import Workload -# from raps.resmgr.default import MultiTenantResourceManager as ResourceManager from raps.stats import get_engine_stats, get_job_stats, get_scheduler_stats, get_network_stats from stable_baselines3.common.logger import Logger, HumanOutputFormat @@ -69,51 +67,6 @@ class RAPSEnv(gym.Env): self.generator = engine.run_simulation(self.jobs, timestep_start, timestep_end, time_delta) return engine - def _build_jobs(self): - """ - Build a job list either from synthetic workload (--workload) - or from telemetry replay (--replay). - Returns: jobs, timestep_start, timestep_end - """ - # --- Case 1: Telemetry replay --- - if self.cli_args and getattr(self.cli_args, "replay"): - result = self.telemetry.load_jobs_times_args_from_files( - files=self.cli_args.replay, - args=self.cli_args, - config=self.config, - ) - - # Handle 3-tuple vs 4-tuple return - if len(result) == 3: - jobs, start_time, end_time = result - elif len(result) == 4: - jobs, start_time, end_time, _ = result - else: - raise ValueError(f"Unexpected telemetry return format: {len(result)} values") - - # Flatten partitioned jobs if necessary - if jobs and isinstance(jobs[0], list): - jobs = [job for sublist in jobs for job in sublist] - - return jobs, start_time, end_time - - # --- Case 2: Synthetic workload generation --- - elif self.cli_args and getattr(self.cli_args, "workload"): - wl = Workload(self.cli_args, self.config) - jobs = wl.generate_jobs() - - # For synthetic jobs, compute timestep_end from submit + run_time - timestep_start = 0 - timestep_end = max( - (getattr(job, "end_time", None) or getattr(job, "expected_run_time", 0) + job.submit_time) - for job in jobs - ) - return jobs, timestep_start, timestep_end - - # --- Error: neither replay nor workload specified --- - else: - raise ValueError("RAPSEnv requires either --workload or --replay to build jobs.") - def reset(self, **kwargs): self.engine = self._create_engine() obs = self._get_state() -- GitLab From 219d243c09a0fd1d6b7d6829df174c6c95e91b3b Mon Sep 17 00:00:00 2001 From: Jesse Hines Date: Mon, 15 Sep 2025 11:02:30 -0400 Subject: [PATCH 305/388] Fix next_arrival_byconfargs error --- raps/utils.py | 24 +++++++++++++++++++----- 1 file changed, 19 insertions(+), 5 deletions(-) diff --git a/raps/utils.py b/raps/utils.py index aeb04c5..c3c541f 100644 --- a/raps/utils.py +++ b/raps/utils.py @@ -54,6 +54,19 @@ def deep_subtract_dicts(a: dict, b: dict): return a +def to_dict(arg): + """ + Normalizes arg to a dictionary if necessary. Used to convert between legacy argparse.Namespace + objects and dictionaries. + """ + if isinstance(arg, dict): + return arg + elif isinstance(arg, argparse.Namespace): + return vars(arg) + else: + raise ValueError(f"Cannot convert {arg} to dict") + + def sum_values(values): return sum(x[1] for x in values) if values else 0 @@ -456,14 +469,15 @@ def create_dir_indexed(dir: str, path: str = None) -> str: def next_arrival_byconfargs(config, args, reset=False): + args = to_dict(args) arrival_rate = 1 arrival_time = config['JOB_ARRIVAL_TIME'] - downscale = args.downscale + downscale = args['downscale'] - if args.job_arrival_rate: - arrival_rate = args.job_arrival_rate - if args.job_arrival_time: - arrival_time = args.job_arrival_time + if args['job_arrival_rate']: + arrival_rate = args['job_arrival_rate'] + if args['job_arrival_time']: + arrival_time = args['job_arrival_time'] return next_arrival(arrival_rate / (arrival_time * downscale), reset) -- GitLab From c41b265e162f97f2f1fc94e7b9fec44f2dde5544 Mon Sep 17 00:00:00 2001 From: Jesse Hines Date: Tue, 16 Sep 2025 15:59:15 -0400 Subject: [PATCH 306/388] Add TODO comment --- raps/telemetry.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/raps/telemetry.py b/raps/telemetry.py index 7ab911a..6d5aa19 100644 --- a/raps/telemetry.py +++ b/raps/telemetry.py @@ -229,6 +229,8 @@ class Telemetry: job.scheduled_nodes = None # Setting to None triggers scheduler to assign nodes if self.kwargs['arrival'] == "poisson": + # TODO: --arrival poisson distribution throws errors about start_time in some scenarios + # e.g. `python main.py run-parts experiments/mit-replay-24hrs.yaml --arrival poisson` for job in jobs: job.scheduled_nodes = None job.submit_time = next_arrival_byconfargs(self.config, self.kwargs) -- GitLab From 421b1c7aa30c0c32046ca9f0a318039f7e0952bb Mon Sep 17 00:00:00 2001 From: "Brewer, Wes" Date: Tue, 16 Sep 2025 21:49:41 +0000 Subject: [PATCH 307/388] Breakup workload.py into workloads/*.py --- main.py | 2 +- raps/engine.py | 2 +- raps/workload.py | 1001 -------------------------------- raps/workloads/__init__.py | 75 +++ raps/workloads/basic.py | 419 +++++++++++++ raps/workloads/constants.py | 13 + raps/workloads/distribution.py | 188 ++++++ raps/workloads/live.py | 39 ++ raps/workloads/multitenant.py | 154 +++++ raps/workloads/utils.py | 159 +++++ 10 files changed, 1049 insertions(+), 1003 deletions(-) delete mode 100644 raps/workload.py create mode 100644 raps/workloads/__init__.py create mode 100644 raps/workloads/basic.py create mode 100644 raps/workloads/constants.py create mode 100644 raps/workloads/distribution.py create mode 100644 raps/workloads/live.py create mode 100644 raps/workloads/multitenant.py create mode 100644 raps/workloads/utils.py diff --git a/main.py b/main.py index 9b36a76..b3c03f6 100644 --- a/main.py +++ b/main.py @@ -4,7 +4,7 @@ ExaDigiT Resource Allocator & Power Simulator (RAPS) import argparse from raps.helpers import check_python_version from raps.run_sim import run_sim_add_parser, run_parts_sim_add_parser, show_add_parser -from raps.workload import run_workload_add_parser +from raps.workloads import run_workload_add_parser from raps.telemetry import run_telemetry_add_parser from raps.train_rl import train_rl_add_parser diff --git a/raps/engine.py b/raps/engine.py index 419f1eb..4fdadb2 100644 --- a/raps/engine.py +++ b/raps/engine.py @@ -34,7 +34,7 @@ from raps.network import ( from raps.telemetry import Telemetry from raps.cooling import ThermoFluidsModel from raps.flops import FLOPSManager -from raps.workload import Workload, continuous_job_generation +from raps.workloads import Workload, continuous_job_generation from raps.account import Accounts from raps.downtime import Downtime from raps.weather import Weather diff --git a/raps/workload.py b/raps/workload.py deleted file mode 100644 index 2d57227..0000000 --- a/raps/workload.py +++ /dev/null @@ -1,1001 +0,0 @@ -""" -Module for generating workload traces and jobs. - -This module provides functionality for generating random workload traces and -jobs for simulation and testing purposes. - -Attributes ----------- -TRACE_QUANTA : int - The time interval in seconds for tracing workload utilization. -MAX_NODES_PER_JOB : int - The maximum number of nodes required for a job. -JOB_NAMES : list - List of possible job names for random job generation. -CPUS_PER_NODE : int - Number of CPUs per node. -GPUS_PER_NODE : int - Number of GPUs per node. -MAX_WALL_TIME : int - Maximum wall time for a job in seconds. -MIN_WALL_TIME : int - Minimum wall time for a job in seconds. -JOB_END_PROBS : list - List of probabilities for different job end states. - -""" -from raps.utils import ( - truncated_normalvariate_int, - truncated_normalvariate_float, - determine_state, next_arrival, - next_arrival_byconfargs, - truncated_weibull, - truncated_weibull_float, - WorkloadData, -) -import math -import random -import numpy as np -import matplotlib.pyplot as plt -from raps.telemetry import Telemetry -from raps.job import job_dict, Job -from raps.utils import create_file_indexed, SubParsers, pydantic_add_args -from raps.sim_config import SingleSimConfig - - -JOB_NAMES = ["LAMMPS", "GROMACS", "VASP", "Quantum ESPRESSO", "NAMD", - "OpenFOAM", "WRF", "AMBER", "CP2K", "nek5000", "CHARMM", - "ABINIT", "Cactus", "Charm++", "NWChem", "STAR-CCM+", - "Gaussian", "ANSYS", "COMSOL", "PLUMED", "nekrs", - "TensorFlow", "PyTorch", "BLAST", "Spark", "GAMESS", - "ORCA", "Simulink", "MOOSE", "ELK"] - -ACCT_NAMES = ["ACT01", "ACT02", "ACT03", "ACT04", "ACT05", "ACT06", "ACT07", - "ACT08", "ACT09", "ACT10", "ACT11", "ACT12", "ACT13", "ACT14"] - -MAX_PRIORITY = 500000 - - -class Workload: - def __init__(self, args, *configs): - """ Initialize Workload with multiple configurations. """ - self.partitions = [config['system_name'] for config in configs] - self.config_map = {config['system_name']: config for config in configs} - self.args = args - - def generate_jobs(self): - # This function calls the job generation function as specified by the workload keyword. - # The respective funciton of this class is called. - jobs = getattr(self, self.args.workload)(args=self.args) - timestep_end = int(math.ceil(max([job.end_time for job in jobs]))) - return WorkloadData( - jobs=jobs, - telemetry_start=0, telemetry_end=timestep_end, - start_date=self.args.start, - ) - - def compute_traces(self, - cpu_util: float, - gpu_util: float, - expected_run_time: int, - trace_quanta: int - ) -> tuple[np.ndarray, np.ndarray]: - """ Compute CPU and GPU traces based on mean CPU & GPU utilizations and wall time. """ - cpu_trace = cpu_util * np.ones(int(expected_run_time) // trace_quanta) - gpu_trace = gpu_util * np.ones(int(expected_run_time) // trace_quanta) - return (cpu_trace, gpu_trace) - - def job_arrival_distribution_draw_poisson(self, args, config): - return next_arrival_byconfargs(config, args) - - def job_size_distribution_draw_uniform(self, args, config): - min_v = 1 - max_v = config['MAX_NODES_PER_JOB'] - if (args.jobsize_is_power_of is not None): - base = args.jobsize_is_power_of - possible_jobsizes = [base ** exp for exp in range(min_v, int(math.floor(math.log(max_v, base))))] - selection = random.randint(0, len(possible_jobsizes) - 1) - number = possible_jobsizes[selection] - elif (args.jobsize_is_of_degree is not None): - exp = args.jobsize_is_of_degree - possible_jobsizes = [base ** exp for base in range(min_v, int(math.floor(pow(max_v, 1 / exp))))] - selection = random.randint(0, len(possible_jobsizes) - 1) - number = possible_jobsizes[selection] - else: - number = random.randint(1, config['MAX_NODES_PER_JOB']) - return number - - def job_size_distribution_draw_weibull(self, args, config): - min_v = 1 - max_v = config['MAX_NODES_PER_JOB'] - if (args.jobsize_is_power_of is not None): - base = args.jobsize_is_power_of - possible_jobsizes = [base ** exp for exp in range(min_v, int(math.floor(math.log(max_v, base))))] - scale = math.log(args.jobsize_weibull_scale, base) - shape = math.log(args.jobsize_weibull_shape, base) - selection = truncated_weibull(scale, shape, 0, len(possible_jobsizes) - 1) - number = possible_jobsizes[selection] - elif (args.jobsize_is_of_degree is not None): - exp = args.jobsize_is_of_degree - possible_jobsizes = [base ** exp for base in range(min_v, int(math.floor(pow(max_v, 1 / exp))))] - scale = math.pow(args.jobsize_weibull_scale, 1 / exp) - shape = math.pow(args.jobsize_weibull_shape, 1 / exp) - selection = truncated_weibull(scale, shape, 0, len(possible_jobsizes) - 1) - number = possible_jobsizes[selection] - else: - number = truncated_weibull(args.jobsize_weibull_scale, args.jobsize_weibull_shape, - 1, config['MAX_NODES_PER_JOB']) - return number - - def job_size_distribution_draw_normal(self, args, config): - min_v = 1 - max_v = config['MAX_NODES_PER_JOB'] - if (args.jobsize_is_power_of is not None): - base = args.jobsize_is_power_of - possible_jobsizes = [base ** exp for exp in range(min_v, int(math.floor(math.log(max_v, base))))] - mean = math.log(args.jobsize_normal_mean, base) - stddev = math.log(args.jobsize_normal_stddev, base) # (len(possible_jobsizes) / (max_v - min_v)) - selection = truncated_normalvariate_int(mean, stddev, 0, len(possible_jobsizes) - 1) - number = possible_jobsizes[selection - 1] - elif (args.jobsize_is_of_degree is not None): - exp = args.jobsize_is_of_degree - possible_jobsizes = [base ** exp for base in range(min_v, int(math.floor(pow(max_v, 1 / exp))))] - mean = math.pow(args.jobsize_normal_mean, 1 / exp) - stddev = math.pow(args.jobsize_normal_stddev, 1 / exp) - selection = truncated_weibull(mean, stddev, 0, len(possible_jobsizes) - 1) - number = possible_jobsizes[selection] - else: - number = truncated_normalvariate_int( - args.jobsize_normal_mean, args.jobsize_normal_stddev, 1, config['MAX_NODES_PER_JOB']) - return number - - def cpu_utilization_distribution_draw_uniform(self, args, config): - return random.uniform(0.0, config['CPUS_PER_NODE']) - - def cpu_utilization_distribution_draw_normal(self, args, config): - return truncated_normalvariate_float(args.cpuutil_normal_mean, - args.cpuutil_normal_stddev, - 0.0, config['CPUS_PER_NODE']) - - def cpu_utilization_distribution_draw_weibull(self, args, config): - return truncated_weibull_float(args.cpuutil_weibull_scale, - args.cpuutil_weibull_shape, - 0.0, config['CPUS_PER_NODE']) - - def gpu_utilization_distribution_draw_uniform(self, args, config): - return random.uniform(0.0, config['GPUS_PER_NODE']) - - def gpu_utilization_distribution_draw_normal(self, args, config): - return truncated_normalvariate_float(args.gpuutil_normal_mean, - args.gpuutil_normal_stddev, - 0.0, config['GPUS_PER_NODE']) - - def gpu_utilization_distribution_draw_weibull(self, args, config): - return truncated_weibull_float(args.gpuutil_weibull_scale, - args.gpuutil_weibull_shape, - 0.0, config['GPUS_PER_NODE']) - - def wall_time_distribution_draw_uniform(self, args, config): - return random.uniform(config['MIN_WALL_TIME'], config['MAX_WALL_TIME']) - - def wall_time_distribution_draw_normal(self, args, config): - return max(1, truncated_normalvariate_int(float(args.walltime_normal_mean), - float(args.walltime_normal_stddev), config['MIN_WALL_TIME'], - config['MAX_WALL_TIME']) / 3600 * 3600) - - def wall_time_distribution_draw_weibull(self, args, config): - return truncated_weibull(args.walltime_weibull_scale, - args.walltime_weibull_shape, - config['MIN_WALL_TIME'], config['MAX_WALL_TIME']) - - def generate_jobs_from_distribution(self, *, - job_arrival_distribution_to_draw_from, - job_size_distribution_to_draw_from, - cpu_util_distribution_to_draw_from, - gpu_util_distribution_to_draw_from, - wall_time_distribution_to_draw_from, - args - ) -> list[list[any]]: - jobs = [] - partition = random.choice(self.partitions) - config = self.config_map[partition] - for job_index in range(args.numjobs): - submit_time = int(job_arrival_distribution_to_draw_from(args, config)) - start_time = submit_time - nodes_required = job_size_distribution_to_draw_from(args, config) - name = random.choice(JOB_NAMES) - account = random.choice(ACCT_NAMES) - cpu_util = cpu_util_distribution_to_draw_from(args, config) - if "CORES_PER_CPU" in config: - cpu_cores_required = random.randint(0, config["CORES_PER_CPU"]) - else: - cpu_cores_required = None - gpu_util = gpu_util_distribution_to_draw_from(args, config) - if "GPUS_PER_NODE" in config: - if isinstance(gpu_util, list): - gpu_units_required = random.randint(0, max(config["GPUS_PER_NODE"], math.ceil(max(gpu_util)))) - else: - gpu_units_required = random.randint(0, max(config["GPUS_PER_NODE"], math.ceil(gpu_util))) - wall_time = wall_time_distribution_to_draw_from(args, config) - end_time = start_time + wall_time - time_limit = max(wall_time, wall_time_distribution_to_draw_from(args, config)) - end_state = determine_state(config['JOB_END_PROBS']) - cpu_trace = cpu_util # self.compute_traces(cpu_util, gpu_util, wall_time, config['TRACE_QUANTA']) - gpu_trace = gpu_util # self.compute_traces(cpu_util, gpu_util, wall_time, config['TRACE_QUANTA']) - priority = random.randint(0, MAX_PRIORITY) - net_tx, net_rx = None, None - job_info = job_dict(nodes_required=nodes_required, name=name, - account=account, cpu_trace=cpu_trace, - gpu_trace=gpu_trace, ntx_trace=net_tx, - nrx_trace=net_rx, end_state=end_state, - id=job_index, priority=priority, - partition=partition, - submit_time=submit_time, - time_limit=time_limit, - start_time=start_time, - end_time=end_time, - expected_run_time=wall_time, trace_time=wall_time, - trace_start_time=0, trace_end_time=wall_time, - cpu_cores_required=cpu_cores_required, - gpu_units_required=gpu_units_required, - trace_quanta=config['TRACE_QUANTA'] - ) - job = Job(job_info) - jobs.append(job) - return jobs - - # Test for random 'reasonable' AI jobs - def randomAI(self, **kwargs): - args = kwargs.get('args', None) - jobs = [] - for i in range(args.numjobs): - draw = random.randint(0, 10) - if draw == 0: - et = random.randint(7200, 28800) - nr = random.choice([128, 256, 512, 1024, 1280, 1792, 2048]) - new_job = Job(job_dict(nodes_required=nr, - name="LLM", - account="llmUser", - end_state="Success", - id=random.randint(1, 99999), - cpu_trace=0.1, - gpu_trace=(random.uniform(0.55, 0.8) * - self.config_map[self.args.system]['GPUS_PER_NODE']), - ntx_trace=None, - nrx_trace=None, - submit_time=0, - time_limit=random.randint(43200, 43200), - start_time=0, - end_time=et, - expected_run_time=et)) - else: - new_job = Job(job_dict(nodes_required=1, - name="LLM", - account="llmUser", - end_state="Success", - id=random.randint(1, 99999), - cpu_trace=1, - gpu_trace=(0.2 * self.config_map[self.args.system]['GPUS_PER_NODE']), - ntx_trace=None, - nrx_trace=None, - submit_time=0, - time_limit=43200, - start_time=0, - end_time=7200, - expected_run_time=random.randint(60, 7200))) - jobs.append(new_job) - return jobs - - def synthetic(self, **kwargs): - args = kwargs.get('args', None) - print(args) - total_jobs = args.numjobs - orig_job_size_distribution = args.jobsize_distribution - orig_wall_time_distribution = args.walltime_distribution - orig_cpuutil_distribution = args.cpuutil_distribution - orig_gpuutil_distribution = args.gpuutil_distribution - jobs = [] - if len(args.jobsize_distribution) != 1 and sum(args.multimodal) != 1.0: - raise Exception(f"Sum of --multimodal != 1.0 : {args.multimodal} == {sum(args.multimodal)}") - for i, (jsdist, wtdist, cudist, gudist, percentage) in enumerate(zip(args.jobsize_distribution, - args.walltime_distribution, - args.cpuutil_distribution, - args.gpuutil_distribution, - args.multimodal)): - - args.numjobs = math.floor(total_jobs * percentage) - args.jobsize_distribution = jsdist - args.walltime_distribution = wtdist - args.cpuutil_distribution = cudist - args.gpuutil_distribution = gudist - - job_arrival_distribution_to_draw_from = self.job_arrival_distribution_draw_poisson - match args.jobsize_distribution: - case "uniform": - job_size_distribution_to_draw_from = self.job_size_distribution_draw_uniform - case "normal": - job_size_distribution_to_draw_from = self.job_size_distribution_draw_normal - case "weibull": - job_size_distribution_to_draw_from = self.job_size_distribution_draw_weibull - case _: - raise NotImplementedError(args.jobsize_distribution) - - match args.walltime_distribution: - case "weibull": - wall_time_distribution_to_draw_from = self.wall_time_distribution_draw_weibull - case "normal": - wall_time_distribution_to_draw_from = self.wall_time_distribution_draw_normal - case "uniform": - wall_time_distribution_to_draw_from = self.wall_time_distribution_draw_uniform - case _: - raise NotImplementedError(args.walltime_distribution) - - match args.cpuutil_distribution: - case "uniform": - cpu_util_distribution_to_draw_from = self.cpu_utilization_distribution_draw_uniform - case "normal": - cpu_util_distribution_to_draw_from = self.cpu_utilization_distribution_draw_normal - case "weibull": - cpu_util_distribution_to_draw_from = self.cpu_utilization_distribution_draw_weibull - case _: - raise NotImplementedError(args.cpuutil_distribution) - - match args.gpuutil_distribution: - case "uniform": - gpu_util_distribution_to_draw_from = self.gpu_utilization_distribution_draw_uniform - case "normal": - gpu_util_distribution_to_draw_from = self.gpu_utilization_distribution_draw_normal - case "weibull": - gpu_util_distribution_to_draw_from = self.gpu_utilization_distribution_draw_weibull - case _: - raise NotImplementedError(args.gpuutil_distribution) - - new_jobs = self.generate_jobs_from_distribution( - job_arrival_distribution_to_draw_from=job_arrival_distribution_to_draw_from, - job_size_distribution_to_draw_from=job_size_distribution_to_draw_from, - cpu_util_distribution_to_draw_from=cpu_util_distribution_to_draw_from, - gpu_util_distribution_to_draw_from=gpu_util_distribution_to_draw_from, - wall_time_distribution_to_draw_from=wall_time_distribution_to_draw_from, - args=args) - next_arrival(0, reset=True) - jobs.extend(new_jobs) - args.numjobs = total_jobs - args.jobsize_distribution = orig_job_size_distribution - args.cpuutil_distribution = orig_cpuutil_distribution - args.gpuutil_distribution = orig_gpuutil_distribution - args.walltime_distribution = orig_wall_time_distribution - return jobs - - def generate_random_jobs(self, args) -> list[list[any]]: - """ Generate random jobs with specified number of jobs. """ - - partition = random.choice(self.partitions) - config = self.config_map[partition] - - # time_delta = args.time_delta # Unused - downscale = args.downscale - - config['MIN_WALL_TIME'] = config['MIN_WALL_TIME'] * downscale - config['MAX_WALL_TIME'] = config['MAX_WALL_TIME'] * downscale - jobs = [] - for job_index in range(args.numjobs): - # Randomly select a partition - # Get the corresponding config for the selected partition - nodes_required = random.randint(1, config['MAX_NODES_PER_JOB']) - name = random.choice(JOB_NAMES) - account = random.choice(ACCT_NAMES) - cpu_util = random.random() * config['CPUS_PER_NODE'] - gpu_util = random.random() * config['GPUS_PER_NODE'] - mu = (config['MAX_WALL_TIME'] + config['MIN_WALL_TIME']) / 2 - sigma = (config['MAX_WALL_TIME'] - config['MIN_WALL_TIME']) / 6 - wall_time = (truncated_normalvariate_int( - mu, sigma, config['MIN_WALL_TIME'], config['MAX_WALL_TIME']) // (3600 * downscale) * (3600 * downscale)) - time_limit = (truncated_normalvariate_int(mu, sigma, wall_time, - config['MAX_WALL_TIME']) // (3600 * downscale) * (3600 * downscale)) - # print(f"wall_time: {wall_time//downscale}") - # print(f"time_limit: {time_limit//downscale}") - end_state = determine_state(config['JOB_END_PROBS']) - cpu_trace, gpu_trace = self.compute_traces(cpu_util, gpu_util, wall_time, config['TRACE_QUANTA']) - priority = random.randint(0, MAX_PRIORITY) - net_tx, net_rx = None, None - - # Jobs arrive according to Poisson process - time_to_next_job = int(next_arrival_byconfargs(config, args)) - # wall_time = wall_time * downscale - # time_limit = time_limit * downscale - - job_info = job_dict(nodes_required=nodes_required, name=name, - account=account, cpu_trace=cpu_trace, - gpu_trace=gpu_trace, ntx_trace=net_tx, - nrx_trace=net_rx, end_state=end_state, - id=job_index, priority=priority, - partition=partition, - submit_time=time_to_next_job - 100, - time_limit=time_limit, - start_time=time_to_next_job, - end_time=time_to_next_job + wall_time, - expected_run_time=wall_time, trace_time=wall_time, - trace_start_time=0, trace_end_time=wall_time, - trace_quanta=config['TRACE_QUANTA'] * downscale, - downscale=downscale - ) - job = Job(job_info) - jobs.append(job) - return jobs - - def random(self, **kwargs): - """ Generate random workload """ - args = kwargs.get('args', None) - return self.generate_random_jobs(args=args) - - def peak(self, **kwargs): - """Peak power test for multiple partitions""" - jobs = [] - - # Iterate through each partition and get its configuration - for partition in self.partitions: - # Fetch the config for the current partition - config = self.config_map[partition] - - # Generate traces based on partition-specific configuration - cpu_util = config['CPUS_PER_NODE'] - gpu_util = config['GPUS_PER_NODE'] - cpu_trace, gpu_trace = self.compute_traces(cpu_util, gpu_util, 10800, config['TRACE_QUANTA']) - net_tx, net_rx = None, None - - job_time = len(gpu_trace) * config['TRACE_QUANTA'] - # Create job info for this partition - job_info = job_dict(nodes_required=config['AVAILABLE_NODES'], - # Down nodes, therefore doesnt work list(range(config['AVAILABLE_NODES'])), - scheduled_nodes=[], - name=f"Max Test {partition}", - account=ACCT_NAMES[0], - cpu_trace=cpu_trace, - gpu_trace=gpu_trace, - ntx_trace=net_tx, - nrx_trace=net_rx, - end_state='COMPLETED', - id=None, - priority=100, - partition=partition, - time_limit=job_time + 1, - start_time=0, - end_time=job_time, - expected_run_time=job_time, - trace_time=job_time, - trace_start_time=0, - trace_end_time=job_time, - trace_quanta=config['TRACE_QUANTA'] - ) - job = Job(job_info) - jobs.append(job) # Add job to the list - - return jobs - - def idle(self, **kwargs): - jobs = [] - # Iterate through each partition and get its configuration - for partition in self.partitions: - # Fetch the config for the current partition - config = self.config_map[partition] - - # Generate traces based on partition-specific configuration - cpu_util, gpu_util = 0, 0 - cpu_trace, gpu_trace = self.compute_traces(cpu_util, gpu_util, 10800, config['TRACE_QUANTA']) - net_tx, net_rx = None, None - - job_time = len(gpu_trace) * config['TRACE_QUANTA'] - # Create job info for this partition - job_info = job_dict( - nodes_required=config['AVAILABLE_NODES'], - name=f"Idle Test {partition}", - account=ACCT_NAMES[0], - cpu_trace=cpu_trace, - gpu_trace=gpu_trace, - ntx_trace=net_tx, - nrx_trace=net_rx, - end_state='COMPLETED', - scheduled_nodes=[], # list(range(config['AVAILABLE_NODES'])), - id=None, - priority=100, - partition=partition, - time_limit=job_time + 1, - submit_time=0, - start_time=0, - end_time=job_time, - expected_run_time=job_time, - trace_time=job_time, - trace_start_time=0, - trace_end_time=job_time, - trace_quanta=config['TRACE_QUANTA']) - job = Job(job_info) - jobs.append(job) # Add job to the list - - return jobs - - def benchmark(self, **kwargs): - """Benchmark tests for multiple partitions""" - - # List to hold jobs for all partitions - jobs = [] - account = ACCT_NAMES[0] - # Iterate through each partition and its config - for partition in self.partitions: - # Fetch partition-specific configuration - config = self.config_map[partition] - net_tx, net_rx = None, None - - # Max test - cpu_util, gpu_util = 1, 4 - cpu_trace, gpu_trace = self.compute_traces(cpu_util, gpu_util, 10800, config['TRACE_QUANTA']) - - job_time = len(gpu_trace) * config['TRACE_QUANTA'] - - job_info = job_dict( - nodes_required=config['AVAILABLE_NODES'], - scheduled_nodes=[], # Explicit scheduled nodes will not work due to down nodes - name=f"Max Test {partition}", - account=account, - cpu_trace=cpu_trace, - gpu_trace=gpu_trace, - ntx_trace=net_tx, - nrx_trace=net_rx, - end_state='COMPLETED', - id=None, - priority=100, - partition=partition, - submit_time=0, - time_limit=job_time + 1, - start_time=0, - end_time=job_time, - expected_run_time=job_time, - trace_time=job_time, - trace_start_time=0, - trace_end_time=job_time, - trace_missing_values=False, - trace_quanta=config['TRACE_QUANTA']) - job = Job(job_info) - jobs.append(job) - - # OpenMxP run - cpu_util, gpu_util = 0, 4 - cpu_trace, gpu_trace = self.compute_traces(cpu_util, gpu_util, 3600, config['TRACE_QUANTA']) - job_time = len(gpu_trace) * config['TRACE_QUANTA'] - - job_info = job_dict( - nodes_required=config['AVAILABLE_NODES'], - scheduled_nodes=[], # Explicit scheduled nodes will not work due to down nodes - name=f"OpenMxP {partition}", - account=account, - cpu_trace=cpu_trace, - gpu_trace=gpu_trace, - ntx_trace=net_tx, - nrx_trace=net_rx, - end_state='COMPLETED', - id=None, - priority=100, - partition=partition, - submit_time=0, - time_limit=job_time + 1, - start_time=10800, - end_time=14200, - expected_run_time=job_time, - trace_time=job_time, - trace_start_time=0, - trace_end_time=job_time, - trace_missing_values=False, - trace_quanta=config['TRACE_QUANTA']) - job = Job(job_info) - jobs.append(job) - - # HPL run - cpu_util, gpu_util = 0.33, 0.79 * 4 # based on 24-01-18 run - cpu_trace, gpu_trace = self.compute_traces(cpu_util, gpu_util, 3600, config['TRACE_QUANTA']) - job_time = len(gpu_trace) * config['TRACE_QUANTA'] - job_info = job_dict( - nodes_required=config['AVAILABLE_NODES'], - scheduled_nodes=[], # Explicit scheduled nodes will not work due to down nodes - name=f"HPL {partition}", - account=account, - cpu_trace=cpu_trace, - gpu_trace=gpu_trace, - ntx_trace=net_tx, - nrx_trace=net_rx, - end_state='COMPLETED', - id=None, - priority=100, - partition=partition, - submit_time=0, - time_limit=job_time + 1, - start_time=14200, - end_time=17800, - expected_run_time=job_time, - trace_time=job_time, - trace_start_time=0, - trace_end_time=job_time, - trace_missing_values=False, - trace_quanta=config['TRACE_QUANTA']) - job = Job(job_info) - jobs.append(job) - - # Idle test - cpu_trace, gpu_trace = self.compute_traces(cpu_util, gpu_util, 3600, config['TRACE_QUANTA']) - job_time = len(gpu_trace) * config['TRACE_QUANTA'] - job_info = job_dict( - nodes_required=config['AVAILABLE_NODES'], - scheduled_nodes=[], # Explicit scheduled nodes will not work due to down nodes - name=f"Idle Test {partition}", - account=account, - cpu_trace=cpu_trace, - gpu_trace=gpu_trace, - ntx_trace=net_tx, - nrx_trace=net_rx, - end_state='COMPLETED', - id=None, - priority=100, - partition=partition, - submit_time=0, - time_limit=job_time + 1, - start_time=17800, - end_time=21400, - expected_run_time=job_time, - trace_time=job_time, - trace_start_time=0, - trace_end_time=job_time, - trace_missing_values=False, - trace_quanta=config['TRACE_QUANTA']) - job = Job(job_info) - jobs.append(job) - - return jobs - - def multitenant(self, **kwargs): - """ - Generate deterministic jobs to validate multitenant scheduling & power. - - Parameters - ---------- - mode : str - One of: - - 'ONE_JOB_PER_NODE_ALL_CORES' - - 'TWO_JOBS_PER_NODE_SPLIT' - - 'STAGGERED_JOBS_PER_NODE' - wall_time : int - Duration (seconds) of each job (default: 3600) - trace_quanta : int - Sampling interval for traces; defaults to config['TRACE_QUANTA'] - - Returns - ------- - list[dict] - List of job_dict entries. - """ - mode = kwargs.get('mode', 'TWO_JOBS_PER_NODE_SPLIT') - wall_time = kwargs.get('wall_time', 3600) - - jobs = [] - - for partition in self.partitions: - cfg = self.config_map[partition] - trace_quanta = kwargs.get('trace_quanta', cfg['TRACE_QUANTA']) - - cores_per_cpu = cfg.get('CORES_PER_CPU', 1) - cpus_per_node = cfg.get('CPUS_PER_NODE', 1) - cores_per_node = cores_per_cpu * cpus_per_node - gpus_per_node = cfg.get('GPUS_PER_NODE', 0) - - n_nodes = cfg['AVAILABLE_NODES'] - - def make_trace(cpu_util, gpu_util): - return self.compute_traces(cpu_util, gpu_util, wall_time, trace_quanta) - - job_id_ctr = 0 - - if mode == 'ONE_JOB_PER_NODE_ALL_CORES': - # Each node runs one job that consumes all cores/GPUs - for nid in range(n_nodes): - cpu_trace, gpu_trace = make_trace(cores_per_node, gpus_per_node) - jobs.append(Job(job_dict( - nodes_required=1, - cpu_cores_required=cores_per_node, - gpu_units_required=gpus_per_node, - name=f"MT_full_node_{partition}_{nid}", - account=random.choice(ACCT_NAMES), - cpu_trace=cpu_trace, - gpu_trace=gpu_trace, - ntx_trace=[], nrx_trace=[], - end_state='COMPLETED', - id=job_id_ctr, - priority=random.randint(0, MAX_PRIORITY), - partition=partition, - submit_time=0, - time_limit=wall_time, - start_time=0, - end_time=wall_time, - expected_run_time=wall_time, - trace_time=wall_time, - trace_start_time=0, - trace_end_time=wall_time, - trace_quanta=cfg['TRACE_QUANTA'] - ))) - job_id_ctr += 1 - - elif mode == 'TWO_JOBS_PER_NODE_SPLIT': - # Two jobs per node: split CPU/GPU roughly in half - for nid in range(n_nodes): - cpu_a = cores_per_node // 2 - cpu_b = cores_per_node - cpu_a - gpu_a = gpus_per_node // 2 - gpu_b = gpus_per_node - gpu_a - - for idx, (c_req, g_req, tag) in enumerate([(cpu_a, gpu_a, 'A'), - (cpu_b, gpu_b, 'B')]): - cpu_trace, gpu_trace = make_trace(c_req, g_req) - jobs.append(Job(job_dict( - nodes_required=1, # still one node; multitenant RM packs cores - cpu_cores_required=c_req, - gpu_units_required=g_req, - name=f"MT_split_node_{partition}_{nid}_{tag}", - account=random.choice(ACCT_NAMES), - cpu_trace=cpu_trace, - gpu_trace=gpu_trace, - ntx_trace=[], nrx_trace=[], - end_state='COMPLETED', - id=job_id_ctr, - priority=random.randint(0, MAX_PRIORITY), - partition=partition, - submit_time=0, - time_limit=wall_time, - start_time=0, - end_time=wall_time, - expected_run_time=wall_time, - trace_time=wall_time, - trace_start_time=0, - trace_end_time=wall_time, - trace_quanta=cfg['TRACE_QUANTA'] - ))) - job_id_ctr += 1 - - elif mode == 'STAGGERED_JOBS_PER_NODE': - # Three jobs per node, staggered starts: 0, wall_time/3, 2*wall_time/3 - offsets = [0, wall_time // 3, 2 * wall_time // 3] - cpu_each = cores_per_node // 3 or 1 - gpu_each = max(1, gpus_per_node // 3) if gpus_per_node else 0 - - for nid in range(n_nodes): - for k, offset in enumerate(offsets): - cpu_trace, gpu_trace = make_trace(cpu_each, gpu_each) - jobs.append(Job(job_dict( - nodes_required=1, - cpu_cores_required=cpu_each, - gpu_units_required=gpu_each, - name=f"MT_stagger_node_{partition}_{nid}_{k}", - account=random.choice(ACCT_NAMES), - cpu_trace=cpu_trace, - gpu_trace=gpu_trace, - ntx_trace=[], nrx_trace=[], - end_state='COMPLETED', - id=job_id_ctr, - priority=random.randint(0, MAX_PRIORITY), - partition=partition, - submit_time=offset, - time_limit=wall_time, - start_time=offset, - end_time=offset + wall_time, - expected_run_time=wall_time, - trace_time=wall_time, - trace_start_time=0, - trace_end_time=wall_time, - trace_quanta=cfg['TRACE_QUANTA'] - ))) - job_id_ctr += 1 - else: - raise ValueError(f"Unknown multitenant mode: {mode}") - - return jobs - - -def plot_job_hist(jobs, config=None, dist_split=None, gantt_nodes=False): - # put args.multimodal in dist_split! - split = [1.0] - num_dist = 1 - if dist_split: - num_dist = len(dist_split) - split = dist_split - - y = [y.nodes_required for y in jobs] - x = [x.expected_run_time for x in jobs] - x2 = [x.time_limit for x in jobs] - fig_m = plt.figure() - gs = fig_m.add_gridspec(30, 1) - gs0 = gs[0:20].subgridspec(500, 500, hspace=0, wspace=0) - gs1 = gs[24:].subgridspec(1, 1) - - ax_top = fig_m.add_subplot(gs0[:]) - ax_top.axis('off') - ax_top.set_title('Job Distribution') - - ax_bot = fig_m.add_subplot(gs1[:]) - ax_bot.axis('off') - ax_bot.set_title('Submit Time + Wall Time') - - # ax0 = fig_m.add_subplot(gs[:2,:]) - # ax1 = fig_m.add_subplot(gs[2:,:]) - - # gss = gridspec.GridSpec(5, 5, figure=ax0) - # fig, axs = plt.subplots(2, 2, gridspec_kw={'width_ratios': (4, 1), 'height_ratios': (1, 4)}) - axs = [] - col = [] - col.append(fig_m.add_subplot(gs0[:100, :433])) - col.append(fig_m.add_subplot(gs0[:100, 433:])) - axs.append(col.copy()) - col = [] - col.append(fig_m.add_subplot(gs0[100:, :433])) - col.append(fig_m.add_subplot(gs0[100:, 433:])) - axs.append(col.copy()) - - ax_b = fig_m.add_subplot(gs1[:, :]) - - # Create scatter plot - for i in range(len(x)): - axs[1][0].plot([x[i], x2[i]], [y[i], y[i]], color='lightblue', zorder=1) - axs[1][0].scatter(x2, y, marker='.', c='lightblue', zorder=2) - axs[1][0].scatter(x, y, zorder=3) - - cpu_util = [x.cpu_trace for x in jobs] - if isinstance(cpu_util[0], np.ndarray): - cpu_util = np.concatenate(cpu_util).ravel() - elif isinstance(cpu_util[0], list): - cpu_util = [sum(part) / len(part) for part in cpu_util] - gpu_util = [x.gpu_trace for x in jobs] - if isinstance(gpu_util[0], np.ndarray): - gpu_util = np.concatenate(gpu_util).ravel() - elif isinstance(gpu_util[0], list): - gpu_util = [sum(part) / len(part) for part in gpu_util] - if not all([x == 0 for x in gpu_util]): - axs[0][1].scatter(cpu_util, gpu_util, zorder=2, marker='.', s=0.2) - axs[0][1].hist(gpu_util, bins=100, orientation='horizontal', zorder=1, density=True, color='tab:purple') - axs[0][1].axhline(np.mean(gpu_util), color='r', linewidth=1, zorder=3) - axs[0][1].set(ylim=[0, config['GPUS_PER_NODE']]) - axs[0][1].set_ylabel("gpu util") - axs[0][1].yaxis.set_label_coords(1.15, 0.5) - axs[0][1].yaxis.set_label_position("right") - axs[0][1].yaxis.tick_right() - else: - axs[0][1].set_yticks([]) - axs[0][1].hist(cpu_util, bins=100, orientation='vertical', zorder=1, density=True, color='tab:cyan') - axs[0][1].axvline(np.mean(cpu_util), color='r', linewidth=1, zorder=3) - axs[0][1].set(xlim=[0, config['CPUS_PER_NODE']]) - axs[0][1].set_xlabel("cpu util") - axs[0][1].xaxis.set_label_coords(0.5, 1.30) - axs[0][1].xaxis.set_label_position("top") - axs[0][1].xaxis.tick_top() - axs[0][0].hist(x2, bins=max(1, math.ceil(min(100, (max(x2) - min(x))))), orientation='vertical', color='lightblue') - axs[0][0].hist(x, bins=max(1, math.ceil(min(100, (max(x2) - min(x))))), orientation='vertical') - axs[1][0].sharex(axs[0][0]) - axs[1][1].hist(y, bins=max(1, min(100, (max(y) - min(y)))), orientation='horizontal') - axs[1][0].sharey(axs[1][1]) - - # Remove ticks - axs[0][0].set_xticks([]) - axs[1][1].set_yticks([]) - axs[0][1].spines['top'].set_color('white') - axs[0][1].spines['right'].set_color('white') - axs[1][0].set_ylabel("nodes [N]") - axs[1][0].set_xlabel("wall time [hh:mm]") - minx_s = 0 - maxx_s = math.ceil(max(x2)) - x_label_mins = [n for n in np.arange(minx_s // 60, maxx_s // 60)] - x_label_ticks = [n * 60 for n in x_label_mins[0::60]] - x_label_str = [str(x1).zfill(2) + ":" + str(x2).zfill(2) for - (x1, x2) in [(n // 60, n % 60) for - n in x_label_mins[0::60]]] - axs[1][0].set_xticks(x_label_ticks, x_label_str) - miny = min(y) - maxy = max(y) - interval = max(1, maxy // 10) - y_ticks = np.arange(0, maxy, interval) - y_ticks[0] = miny - axs[1][0].set_yticks(y_ticks) - - axs[0][0].tick_params(axis="x", labelbottom=False) - axs[1][1].tick_params(axis="y", labelleft=False) - - # Submit_time and Wall_time - duration = [x.expected_run_time for x in jobs] - nodes_required = [x.nodes_required for x in jobs] - submit_t = [x.submit_time for x in jobs] - - offset = 0 - split_index = 0 - split_offset = math.floor(len(x) * split[split_index]) - if gantt_nodes: - if split[0] == 0.0: - ax_b.axhline(y=offset, color='red', linestyle='--', lw=0.5) - split_index += 1 - for i in range(len(x)): - # ax_b.barh(i,duration[i], height=1.0, left=submit_t[i]) - ax_b.barh(offset + nodes_required[i] / 2, duration[i], height=nodes_required[i], left=submit_t[i]) - offset += nodes_required[i] - if i != len(x) - 1 and i == split_offset - 1 and split_index < len(split): - ax_b.axhline(y=offset, color='red', linestyle='--', lw=0.5) - split_index += 1 - split_offset += math.floor(len(x) * split[split_index]) - # ax_b.axhline(y=(len(x)/num_dist * i)-0.5, color='red', linestyle='--',lw=0.5) - if split[-1] == 0.0: - ax_b.axhline(y=offset, color='red', linestyle='--', lw=0.5) - split_index += 1 - ax_b.set_ylabel("Jobs' acc. nodes") - else: - for i in range(len(x)): - ax_b.barh(i, duration[i], height=1.0, left=submit_t[i]) - for i in range(1, num_dist): - if num_dist == 1: - break - ax_b.axhline(y=(len(x) * split[split_index]) - 0.5, color='red', linestyle='--', lw=0.5) - split_index += 1 - ax_b.set_ylabel("Job ID") - # ax_b labels: - ax_b.set_xlabel("time [hh:mm]") - minx_s = 0 - maxx_s = math.ceil(max([x.expected_run_time for x in jobs]) + max([x.submit_time for x in jobs])) - x_label_mins = [n for n in np.arange(minx_s // 60, maxx_s // 60)] - x_label_ticks = [n * 60 for n in x_label_mins[0::60]] - x_label_str = [str(x1).zfill(2) + ":" + str(x2).zfill(2) for - (x1, x2) in [(n // 60, n % 60) for - n in x_label_mins[0::60]]] - - ax_b.set_xticks(x_label_ticks, x_label_str) - ax_b.yaxis.set_inverted(True) - - plt.show() - - -def run_workload_add_parser(subparsers: SubParsers): - from raps.sim_config import SIM_SHORTCUTS - # TODO: Separate the arguments for this command - parser = subparsers.add_parser("workload", description=""" - Saves workload as a snapshot. - """) - parser.add_argument("config_file", nargs="?", default=None, help=""" - YAML sim config file, can be used to configure an experiment instead of using CLI - flags. Pass "-" to read from stdin. - """) - model_validate = pydantic_add_args(parser, SingleSimConfig, model_config={ - "cli_shortcuts": SIM_SHORTCUTS, - }) - parser.set_defaults(impl=lambda args: run_workload(model_validate(args, {}))) - - -def run_workload(sim_config: SingleSimConfig): - args = sim_config.get_legacy_args() - args_dict = sim_config.get_legacy_args() - config = sim_config.system_configs[0].get_legacy() - - if sim_config.replay: - td = Telemetry(**args_dict) - jobs = td.load_from_files(sim_config.replay).jobs - else: - workload = Workload(args, config) - jobs = getattr(workload, sim_config.workload)(args=sim_config.get_legacy_args()) - plot_job_hist(jobs, - config=config, - dist_split=sim_config.multimodal, - gantt_nodes=sim_config.gantt_nodes) - - out = sim_config.get_output() - if out: - timestep_start = min([x.submit_time for x in jobs]) - timestep_end = math.ceil(max([x.submit_time for x in jobs]) + max([x.expected_run_time for x in jobs])) - filename = create_file_indexed('wl', path=str(out), create=False, ending="npz").split(".npz")[0] - # savez_compressed add npz itself, but create_file_indexed needs to check for .npz to find existing files - np.savez_compressed(filename, jobs=jobs, timestep_start=timestep_start, timestep_end=timestep_end, args=args) - print(filename + ".npz") # To std-out to show which npz was created. - - -def continuous_job_generation(*, engine, timestep, jobs): - # print("if len(engine.queue) <= engine.continuous_workload.args.maxqueue:") - # print(f"if {len(engine.queue)} <= {engine.continuous_workload.args.maxqueue}:") - if len(engine.queue) <= engine.continuous_workload.args.maxqueue: - new_jobs = engine.continuous_workload.generate_jobs().jobs - jobs.extend(new_jobs) diff --git a/raps/workloads/__init__.py b/raps/workloads/__init__.py new file mode 100644 index 0000000..080eafc --- /dev/null +++ b/raps/workloads/__init__.py @@ -0,0 +1,75 @@ +"""Workloads package init.""" + +import math +import numpy as np + +from raps.utils import WorkloadData, SubParsers +from raps.utils import pydantic_add_args +from raps.sim_config import SingleSimConfig + +from .basic import BasicWorkload +from .constants import JOB_NAMES, ACCT_NAMES, MAX_PRIORITY +from .distribution import DistributionWorkload +from .live import continuous_job_generation, run_workload +from .multitenant import MultitenantWorkload +from .utils import plot_job_hist + + +class BaseWorkload: + """Base class with common workload logic.""" + + def __init__(self, args, *configs): + self.partitions = [c['system_name'] for c in configs] + self.config_map = {c['system_name']: c for c in configs} + self.args = args + + def generate_jobs(self): + jobs = getattr(self, self.args.workload)(args=self.args) + timestep_end = int(math.ceil(max([job.end_time for job in jobs]))) + return WorkloadData( + jobs=jobs, + telemetry_start=0, + telemetry_end=timestep_end, + start_date=self.args.start, + ) + + def compute_traces(self, + cpu_util: float, + gpu_util: float, + expected_run_time: int, + trace_quanta: int + ) -> tuple[np.ndarray, np.ndarray]: + """ Compute CPU and GPU traces based on mean CPU & GPU utilizations and wall time. """ + cpu_trace = cpu_util * np.ones(int(expected_run_time) // trace_quanta) + gpu_trace = gpu_util * np.ones(int(expected_run_time) // trace_quanta) + return (cpu_trace, gpu_trace) + +class Workload( + BaseWorkload, + DistributionWorkload, + BasicWorkload, + MultitenantWorkload +): + """Final workload class with all workload types.""" + pass + +__all__ = [ + "Workload", + "JOB_NAMES", "ACCT_NAMES", "MAX_PRIORITY", +] + + +def run_workload_add_parser(subparsers: SubParsers): + from raps.sim_config import SIM_SHORTCUTS + # TODO: Separate the arguments for this command + parser = subparsers.add_parser("workload", description=""" + Saves workload as a snapshot. + """) + parser.add_argument("config_file", nargs="?", default=None, help=""" + YAML sim config file, can be used to configure an experiment instead of using CLI + flags. Pass "-" to read from stdin. + """) + model_validate = pydantic_add_args(parser, SingleSimConfig, model_config={ + "cli_shortcuts": SIM_SHORTCUTS, + }) + parser.set_defaults(impl=lambda args: run_workload(model_validate(args, {}))) diff --git a/raps/workloads/basic.py b/raps/workloads/basic.py new file mode 100644 index 0000000..208af54 --- /dev/null +++ b/raps/workloads/basic.py @@ -0,0 +1,419 @@ +import math +import random + +from raps.job import Job, job_dict +from raps.utils import ( + truncated_normalvariate_int, + determine_state, + next_arrival, + next_arrival_byconfargs, +) + +from .constants import JOB_NAMES, ACCT_NAMES, MAX_PRIORITY + +class BasicWorkload: + + # Test for random 'reasonable' AI jobs + def randomAI(self, **kwargs): + args = kwargs.get('args', None) + jobs = [] + for i in range(args.numjobs): + draw = random.randint(0, 10) + if draw == 0: + et = random.randint(7200, 28800) + nr = random.choice([128, 256, 512, 1024, 1280, 1792, 2048]) + new_job = Job(job_dict(nodes_required=nr, + name="LLM", + account="llmUser", + end_state="Success", + id=random.randint(1, 99999), + cpu_trace=0.1, + gpu_trace=(random.uniform(0.55, 0.8) * + self.config_map[self.args.system]['GPUS_PER_NODE']), + ntx_trace=None, + nrx_trace=None, + submit_time=0, + time_limit=random.randint(43200, 43200), + start_time=0, + end_time=et, + expected_run_time=et)) + else: + new_job = Job(job_dict(nodes_required=1, + name="LLM", + account="llmUser", + end_state="Success", + id=random.randint(1, 99999), + cpu_trace=1, + gpu_trace=(0.2 * self.config_map[self.args.system]['GPUS_PER_NODE']), + ntx_trace=None, + nrx_trace=None, + submit_time=0, + time_limit=43200, + start_time=0, + end_time=7200, + expected_run_time=random.randint(60, 7200))) + jobs.append(new_job) + return jobs + + def synthetic(self, **kwargs): + args = kwargs.get('args', None) + print(args) + total_jobs = args.numjobs + orig_job_size_distribution = args.jobsize_distribution + orig_wall_time_distribution = args.walltime_distribution + orig_cpuutil_distribution = args.cpuutil_distribution + orig_gpuutil_distribution = args.gpuutil_distribution + jobs = [] + if len(args.jobsize_distribution) != 1 and sum(args.multimodal) != 1.0: + raise Exception(f"Sum of --multimodal != 1.0 : {args.multimodal} == {sum(args.multimodal)}") + for i, (jsdist, wtdist, cudist, gudist, percentage) in enumerate(zip(args.jobsize_distribution, + args.walltime_distribution, + args.cpuutil_distribution, + args.gpuutil_distribution, + args.multimodal)): + + args.numjobs = math.floor(total_jobs * percentage) + args.jobsize_distribution = jsdist + args.walltime_distribution = wtdist + args.cpuutil_distribution = cudist + args.gpuutil_distribution = gudist + + job_arrival_distribution_to_draw_from = self.job_arrival_distribution_draw_poisson + match args.jobsize_distribution: + case "uniform": + job_size_distribution_to_draw_from = self.job_size_distribution_draw_uniform + case "normal": + job_size_distribution_to_draw_from = self.job_size_distribution_draw_normal + case "weibull": + job_size_distribution_to_draw_from = self.job_size_distribution_draw_weibull + case _: + raise NotImplementedError(args.jobsize_distribution) + + match args.walltime_distribution: + case "weibull": + wall_time_distribution_to_draw_from = self.wall_time_distribution_draw_weibull + case "normal": + wall_time_distribution_to_draw_from = self.wall_time_distribution_draw_normal + case "uniform": + wall_time_distribution_to_draw_from = self.wall_time_distribution_draw_uniform + case _: + raise NotImplementedError(args.walltime_distribution) + + match args.cpuutil_distribution: + case "uniform": + cpu_util_distribution_to_draw_from = self.cpu_utilization_distribution_draw_uniform + case "normal": + cpu_util_distribution_to_draw_from = self.cpu_utilization_distribution_draw_normal + case "weibull": + cpu_util_distribution_to_draw_from = self.cpu_utilization_distribution_draw_weibull + case _: + raise NotImplementedError(args.cpuutil_distribution) + + match args.gpuutil_distribution: + case "uniform": + gpu_util_distribution_to_draw_from = self.gpu_utilization_distribution_draw_uniform + case "normal": + gpu_util_distribution_to_draw_from = self.gpu_utilization_distribution_draw_normal + case "weibull": + gpu_util_distribution_to_draw_from = self.gpu_utilization_distribution_draw_weibull + case _: + raise NotImplementedError(args.gpuutil_distribution) + + new_jobs = self.generate_jobs_from_distribution( + job_arrival_distribution_to_draw_from=job_arrival_distribution_to_draw_from, + job_size_distribution_to_draw_from=job_size_distribution_to_draw_from, + cpu_util_distribution_to_draw_from=cpu_util_distribution_to_draw_from, + gpu_util_distribution_to_draw_from=gpu_util_distribution_to_draw_from, + wall_time_distribution_to_draw_from=wall_time_distribution_to_draw_from, + args=args) + next_arrival(0, reset=True) + jobs.extend(new_jobs) + args.numjobs = total_jobs + args.jobsize_distribution = orig_job_size_distribution + args.cpuutil_distribution = orig_cpuutil_distribution + args.gpuutil_distribution = orig_gpuutil_distribution + args.walltime_distribution = orig_wall_time_distribution + return jobs + + def generate_random_jobs(self, args) -> list[list[any]]: + """ Generate random jobs with specified number of jobs. """ + + partition = random.choice(self.partitions) + config = self.config_map[partition] + + # time_delta = args.time_delta # Unused + downscale = args.downscale + + config['MIN_WALL_TIME'] = config['MIN_WALL_TIME'] * downscale + config['MAX_WALL_TIME'] = config['MAX_WALL_TIME'] * downscale + jobs = [] + for job_index in range(args.numjobs): + # Randomly select a partition + # Get the corresponding config for the selected partition + nodes_required = random.randint(1, config['MAX_NODES_PER_JOB']) + name = random.choice(JOB_NAMES) + account = random.choice(ACCT_NAMES) + cpu_util = random.random() * config['CPUS_PER_NODE'] + gpu_util = random.random() * config['GPUS_PER_NODE'] + mu = (config['MAX_WALL_TIME'] + config['MIN_WALL_TIME']) / 2 + sigma = (config['MAX_WALL_TIME'] - config['MIN_WALL_TIME']) / 6 + wall_time = (truncated_normalvariate_int( + mu, sigma, config['MIN_WALL_TIME'], config['MAX_WALL_TIME']) // (3600 * downscale) * (3600 * downscale)) + time_limit = (truncated_normalvariate_int(mu, sigma, wall_time, + config['MAX_WALL_TIME']) // (3600 * downscale) * (3600 * downscale)) + # print(f"wall_time: {wall_time//downscale}") + # print(f"time_limit: {time_limit//downscale}") + end_state = determine_state(config['JOB_END_PROBS']) + cpu_trace, gpu_trace = self.compute_traces(cpu_util, gpu_util, wall_time, config['TRACE_QUANTA']) + priority = random.randint(0, MAX_PRIORITY) + net_tx, net_rx = None, None + + # Jobs arrive according to Poisson process + time_to_next_job = int(next_arrival_byconfargs(config, args)) + # wall_time = wall_time * downscale + # time_limit = time_limit * downscale + + job_info = job_dict(nodes_required=nodes_required, name=name, + account=account, cpu_trace=cpu_trace, + gpu_trace=gpu_trace, ntx_trace=net_tx, + nrx_trace=net_rx, end_state=end_state, + id=job_index, priority=priority, + partition=partition, + submit_time=time_to_next_job - 100, + time_limit=time_limit, + start_time=time_to_next_job, + end_time=time_to_next_job + wall_time, + expected_run_time=wall_time, trace_time=wall_time, + trace_start_time=0, trace_end_time=wall_time, + trace_quanta=config['TRACE_QUANTA'] * downscale, + downscale=downscale + ) + job = Job(job_info) + jobs.append(job) + return jobs + + def random(self, **kwargs): + """ Generate random workload """ + args = kwargs.get('args', None) + return self.generate_random_jobs(args=args) + + def peak(self, **kwargs): + """Peak power test for multiple partitions""" + jobs = [] + + # Iterate through each partition and get its configuration + for partition in self.partitions: + # Fetch the config for the current partition + config = self.config_map[partition] + + # Generate traces based on partition-specific configuration + cpu_util = config['CPUS_PER_NODE'] + gpu_util = config['GPUS_PER_NODE'] + cpu_trace, gpu_trace = self.compute_traces(cpu_util, gpu_util, 10800, config['TRACE_QUANTA']) + net_tx, net_rx = None, None + + job_time = len(gpu_trace) * config['TRACE_QUANTA'] + # Create job info for this partition + job_info = job_dict(nodes_required=config['AVAILABLE_NODES'], + # Down nodes, therefore doesnt work list(range(config['AVAILABLE_NODES'])), + scheduled_nodes=[], + name=f"Max Test {partition}", + account=ACCT_NAMES[0], + cpu_trace=cpu_trace, + gpu_trace=gpu_trace, + ntx_trace=net_tx, + nrx_trace=net_rx, + end_state='COMPLETED', + id=None, + priority=100, + partition=partition, + time_limit=job_time + 1, + start_time=0, + end_time=job_time, + expected_run_time=job_time, + trace_time=job_time, + trace_start_time=0, + trace_end_time=job_time, + trace_quanta=config['TRACE_QUANTA'] + ) + job = Job(job_info) + jobs.append(job) # Add job to the list + + return jobs + + def idle(self, **kwargs): + jobs = [] + # Iterate through each partition and get its configuration + for partition in self.partitions: + # Fetch the config for the current partition + config = self.config_map[partition] + + # Generate traces based on partition-specific configuration + cpu_util, gpu_util = 0, 0 + cpu_trace, gpu_trace = self.compute_traces(cpu_util, gpu_util, 10800, config['TRACE_QUANTA']) + net_tx, net_rx = None, None + + job_time = len(gpu_trace) * config['TRACE_QUANTA'] + # Create job info for this partition + job_info = job_dict( + nodes_required=config['AVAILABLE_NODES'], + name=f"Idle Test {partition}", + account=ACCT_NAMES[0], + cpu_trace=cpu_trace, + gpu_trace=gpu_trace, + ntx_trace=net_tx, + nrx_trace=net_rx, + end_state='COMPLETED', + scheduled_nodes=[], # list(range(config['AVAILABLE_NODES'])), + id=None, + priority=100, + partition=partition, + time_limit=job_time + 1, + submit_time=0, + start_time=0, + end_time=job_time, + expected_run_time=job_time, + trace_time=job_time, + trace_start_time=0, + trace_end_time=job_time, + trace_quanta=config['TRACE_QUANTA']) + job = Job(job_info) + jobs.append(job) # Add job to the list + + return jobs + + def benchmark(self, **kwargs): + """Benchmark tests for multiple partitions""" + + # List to hold jobs for all partitions + jobs = [] + account = ACCT_NAMES[0] + # Iterate through each partition and its config + for partition in self.partitions: + # Fetch partition-specific configuration + config = self.config_map[partition] + net_tx, net_rx = None, None + + # Max test + cpu_util, gpu_util = 1, 4 + cpu_trace, gpu_trace = self.compute_traces(cpu_util, gpu_util, 10800, config['TRACE_QUANTA']) + + job_time = len(gpu_trace) * config['TRACE_QUANTA'] + + job_info = job_dict( + nodes_required=config['AVAILABLE_NODES'], + scheduled_nodes=[], # Explicit scheduled nodes will not work due to down nodes + name=f"Max Test {partition}", + account=account, + cpu_trace=cpu_trace, + gpu_trace=gpu_trace, + ntx_trace=net_tx, + nrx_trace=net_rx, + end_state='COMPLETED', + id=None, + priority=100, + partition=partition, + submit_time=0, + time_limit=job_time + 1, + start_time=0, + end_time=job_time, + expected_run_time=job_time, + trace_time=job_time, + trace_start_time=0, + trace_end_time=job_time, + trace_missing_values=False, + trace_quanta=config['TRACE_QUANTA']) + job = Job(job_info) + jobs.append(job) + + # OpenMxP run + cpu_util, gpu_util = 0, 4 + cpu_trace, gpu_trace = self.compute_traces(cpu_util, gpu_util, 3600, config['TRACE_QUANTA']) + job_time = len(gpu_trace) * config['TRACE_QUANTA'] + + job_info = job_dict( + nodes_required=config['AVAILABLE_NODES'], + scheduled_nodes=[], # Explicit scheduled nodes will not work due to down nodes + name=f"OpenMxP {partition}", + account=account, + cpu_trace=cpu_trace, + gpu_trace=gpu_trace, + ntx_trace=net_tx, + nrx_trace=net_rx, + end_state='COMPLETED', + id=None, + priority=100, + partition=partition, + submit_time=0, + time_limit=job_time + 1, + start_time=10800, + end_time=14200, + expected_run_time=job_time, + trace_time=job_time, + trace_start_time=0, + trace_end_time=job_time, + trace_missing_values=False, + trace_quanta=config['TRACE_QUANTA']) + job = Job(job_info) + jobs.append(job) + + # HPL run + cpu_util, gpu_util = 0.33, 0.79 * 4 # based on 24-01-18 run + cpu_trace, gpu_trace = self.compute_traces(cpu_util, gpu_util, 3600, config['TRACE_QUANTA']) + job_time = len(gpu_trace) * config['TRACE_QUANTA'] + job_info = job_dict( + nodes_required=config['AVAILABLE_NODES'], + scheduled_nodes=[], # Explicit scheduled nodes will not work due to down nodes + name=f"HPL {partition}", + account=account, + cpu_trace=cpu_trace, + gpu_trace=gpu_trace, + ntx_trace=net_tx, + nrx_trace=net_rx, + end_state='COMPLETED', + id=None, + priority=100, + partition=partition, + submit_time=0, + time_limit=job_time + 1, + start_time=14200, + end_time=17800, + expected_run_time=job_time, + trace_time=job_time, + trace_start_time=0, + trace_end_time=job_time, + trace_missing_values=False, + trace_quanta=config['TRACE_QUANTA']) + job = Job(job_info) + jobs.append(job) + + # Idle test + cpu_trace, gpu_trace = self.compute_traces(cpu_util, gpu_util, 3600, config['TRACE_QUANTA']) + job_time = len(gpu_trace) * config['TRACE_QUANTA'] + job_info = job_dict( + nodes_required=config['AVAILABLE_NODES'], + scheduled_nodes=[], # Explicit scheduled nodes will not work due to down nodes + name=f"Idle Test {partition}", + account=account, + cpu_trace=cpu_trace, + gpu_trace=gpu_trace, + ntx_trace=net_tx, + nrx_trace=net_rx, + end_state='COMPLETED', + id=None, + priority=100, + partition=partition, + submit_time=0, + time_limit=job_time + 1, + start_time=17800, + end_time=21400, + expected_run_time=job_time, + trace_time=job_time, + trace_start_time=0, + trace_end_time=job_time, + trace_missing_values=False, + trace_quanta=config['TRACE_QUANTA']) + job = Job(job_info) + jobs.append(job) + + return jobs diff --git a/raps/workloads/constants.py b/raps/workloads/constants.py new file mode 100644 index 0000000..2ffb39e --- /dev/null +++ b/raps/workloads/constants.py @@ -0,0 +1,13 @@ +"""Shared constants for workloads.""" + +JOB_NAMES = [ + "LAMMPS", "GROMACS", "VASP", "Quantum ESPRESSO", "NAMD", + "OpenFOAM", "WRF", "AMBER", "CP2K", "nek5000", "CHARMM", + "ABINIT", "Cactus", "Charm++", "NWChem", "STAR-CCM+", + "Gaussian", "ANSYS", "COMSOL", "PLUMED", "nekrs", + "TensorFlow", "PyTorch", "BLAST", "Spark", "GAMESS", + "ORCA", "Simulink", "MOOSE", "ELK" +] + +ACCT_NAMES = [f"ACT{i:02d}" for i in range(1, 15)] +MAX_PRIORITY = 500000 diff --git a/raps/workloads/distribution.py b/raps/workloads/distribution.py new file mode 100644 index 0000000..8f3cc6f --- /dev/null +++ b/raps/workloads/distribution.py @@ -0,0 +1,188 @@ +import math +import random + +from raps.job import Job, job_dict +from raps.utils import ( + truncated_normalvariate_int, + truncated_normalvariate_float, + truncated_weibull, + truncated_weibull_float, + determine_state, + next_arrival_byconfargs, +) + +from .constants import JOB_NAMES, ACCT_NAMES, MAX_PRIORITY + +class DistributionWorkload: + + def job_arrival_distribution_draw_poisson(self, args, config): + return next_arrival_byconfargs(config, args) + + + def job_size_distribution_draw_uniform(self, args, config): + min_v = 1 + max_v = config['MAX_NODES_PER_JOB'] + if (args.jobsize_is_power_of is not None): + base = args.jobsize_is_power_of + possible_jobsizes = [base ** exp for exp in range(min_v, int(math.floor(math.log(max_v, base))))] + selection = random.randint(0, len(possible_jobsizes) - 1) + number = possible_jobsizes[selection] + elif (args.jobsize_is_of_degree is not None): + exp = args.jobsize_is_of_degree + possible_jobsizes = [base ** exp for base in range(min_v, int(math.floor(pow(max_v, 1 / exp))))] + selection = random.randint(0, len(possible_jobsizes) - 1) + number = possible_jobsizes[selection] + else: + number = random.randint(1, config['MAX_NODES_PER_JOB']) + return number + + + def job_size_distribution_draw_weibull(self, args, config): + min_v = 1 + max_v = config['MAX_NODES_PER_JOB'] + if (args.jobsize_is_power_of is not None): + base = args.jobsize_is_power_of + possible_jobsizes = [base ** exp for exp in range(min_v, int(math.floor(math.log(max_v, base))))] + scale = math.log(args.jobsize_weibull_scale, base) + shape = math.log(args.jobsize_weibull_shape, base) + selection = truncated_weibull(scale, shape, 0, len(possible_jobsizes) - 1) + number = possible_jobsizes[selection] + elif (args.jobsize_is_of_degree is not None): + exp = args.jobsize_is_of_degree + possible_jobsizes = [base ** exp for base in range(min_v, int(math.floor(pow(max_v, 1 / exp))))] + scale = math.pow(args.jobsize_weibull_scale, 1 / exp) + shape = math.pow(args.jobsize_weibull_shape, 1 / exp) + selection = truncated_weibull(scale, shape, 0, len(possible_jobsizes) - 1) + number = possible_jobsizes[selection] + else: + number = truncated_weibull(args.jobsize_weibull_scale, args.jobsize_weibull_shape, + 1, config['MAX_NODES_PER_JOB']) + return number + + + def job_size_distribution_draw_normal(self, args, config): + min_v = 1 + max_v = config['MAX_NODES_PER_JOB'] + if (args.jobsize_is_power_of is not None): + base = args.jobsize_is_power_of + possible_jobsizes = [base ** exp for exp in range(min_v, int(math.floor(math.log(max_v, base))))] + mean = math.log(args.jobsize_normal_mean, base) + stddev = math.log(args.jobsize_normal_stddev, base) # (len(possible_jobsizes) / (max_v - min_v)) + selection = truncated_normalvariate_int(mean, stddev, 0, len(possible_jobsizes) - 1) + number = possible_jobsizes[selection - 1] + elif (args.jobsize_is_of_degree is not None): + exp = args.jobsize_is_of_degree + possible_jobsizes = [base ** exp for base in range(min_v, int(math.floor(pow(max_v, 1 / exp))))] + mean = math.pow(args.jobsize_normal_mean, 1 / exp) + stddev = math.pow(args.jobsize_normal_stddev, 1 / exp) + selection = truncated_weibull(mean, stddev, 0, len(possible_jobsizes) - 1) + number = possible_jobsizes[selection] + else: + number = truncated_normalvariate_int( + args.jobsize_normal_mean, args.jobsize_normal_stddev, 1, config['MAX_NODES_PER_JOB']) + return number + + + def cpu_utilization_distribution_draw_uniform(self, args, config): + return random.uniform(0.0, config['CPUS_PER_NODE']) + + + def cpu_utilization_distribution_draw_normal(self, args, config): + return truncated_normalvariate_float(args.cpuutil_normal_mean, + args.cpuutil_normal_stddev, + 0.0, config['CPUS_PER_NODE']) + + + def cpu_utilization_distribution_draw_weibull(self, args, config): + return truncated_weibull_float(args.cpuutil_weibull_scale, + args.cpuutil_weibull_shape, + 0.0, config['CPUS_PER_NODE']) + + + def gpu_utilization_distribution_draw_uniform(self, args, config): + return random.uniform(0.0, config['GPUS_PER_NODE']) + + + def gpu_utilization_distribution_draw_normal(self, args, config): + return truncated_normalvariate_float(args.gpuutil_normal_mean, + args.gpuutil_normal_stddev, + 0.0, config['GPUS_PER_NODE']) + + + def gpu_utilization_distribution_draw_weibull(self, args, config): + return truncated_weibull_float(args.gpuutil_weibull_scale, + args.gpuutil_weibull_shape, + 0.0, config['GPUS_PER_NODE']) + + + def wall_time_distribution_draw_uniform(self, args, config): + return random.uniform(config['MIN_WALL_TIME'], config['MAX_WALL_TIME']) + + + def wall_time_distribution_draw_normal(self, args, config): + return max(1, truncated_normalvariate_int(float(args.walltime_normal_mean), + float(args.walltime_normal_stddev), config['MIN_WALL_TIME'], + config['MAX_WALL_TIME']) / 3600 * 3600) + + + def wall_time_distribution_draw_weibull(self, args, config): + return truncated_weibull(args.walltime_weibull_scale, + args.walltime_weibull_shape, + config['MIN_WALL_TIME'], config['MAX_WALL_TIME']) + + + def generate_jobs_from_distribution(self, *, + job_arrival_distribution_to_draw_from, + job_size_distribution_to_draw_from, + cpu_util_distribution_to_draw_from, + gpu_util_distribution_to_draw_from, + wall_time_distribution_to_draw_from, + args + ) -> list[list[any]]: + jobs = [] + partition = random.choice(self.partitions) + config = self.config_map[partition] + for job_index in range(args.numjobs): + submit_time = int(job_arrival_distribution_to_draw_from(args, config)) + start_time = submit_time + nodes_required = job_size_distribution_to_draw_from(args, config) + name = random.choice(JOB_NAMES) + account = random.choice(ACCT_NAMES) + cpu_util = cpu_util_distribution_to_draw_from(args, config) + if "CORES_PER_CPU" in config: + cpu_cores_required = random.randint(0, config["CORES_PER_CPU"]) + else: + cpu_cores_required = None + gpu_util = gpu_util_distribution_to_draw_from(args, config) + if "GPUS_PER_NODE" in config: + if isinstance(gpu_util, list): + gpu_units_required = random.randint(0, max(config["GPUS_PER_NODE"], math.ceil(max(gpu_util)))) + else: + gpu_units_required = random.randint(0, max(config["GPUS_PER_NODE"], math.ceil(gpu_util))) + wall_time = wall_time_distribution_to_draw_from(args, config) + end_time = start_time + wall_time + time_limit = max(wall_time, wall_time_distribution_to_draw_from(args, config)) + end_state = determine_state(config['JOB_END_PROBS']) + cpu_trace = cpu_util # self.compute_traces(cpu_util, gpu_util, wall_time, config['TRACE_QUANTA']) + gpu_trace = gpu_util # self.compute_traces(cpu_util, gpu_util, wall_time, config['TRACE_QUANTA']) + priority = random.randint(0, MAX_PRIORITY) + net_tx, net_rx = None, None + job_info = job_dict(nodes_required=nodes_required, name=name, + account=account, cpu_trace=cpu_trace, + gpu_trace=gpu_trace, ntx_trace=net_tx, + nrx_trace=net_rx, end_state=end_state, + id=job_index, priority=priority, + partition=partition, + submit_time=submit_time, + time_limit=time_limit, + start_time=start_time, + end_time=end_time, + expected_run_time=wall_time, trace_time=wall_time, + trace_start_time=0, trace_end_time=wall_time, + cpu_cores_required=cpu_cores_required, + gpu_units_required=gpu_units_required, + trace_quanta=config['TRACE_QUANTA'] + ) + job = Job(job_info) + jobs.append(job) + return jobs diff --git a/raps/workloads/live.py b/raps/workloads/live.py new file mode 100644 index 0000000..974b369 --- /dev/null +++ b/raps/workloads/live.py @@ -0,0 +1,39 @@ +import math +import numpy as np +from raps.sim_config import SingleSimConfig +from raps.telemetry import Telemetry +from raps.utils import create_file_indexed +from .utils import plot_job_hist + +def continuous_job_generation(self, *, engine, timestep, jobs): + # print("if len(engine.queue) <= engine.continuous_workload.args.maxqueue:") + # print(f"if {len(engine.queue)} <= {engine.continuous_workload.args.maxqueue}:") + if len(engine.queue) <= engine.continuous_workload.args.maxqueue: + new_jobs = engine.continuous_workload.generate_jobs().jobs + jobs.extend(new_jobs) + + +def run_workload(sim_config: SingleSimConfig): + args = sim_config.get_legacy_args() + args_dict = sim_config.get_legacy_args() + config = sim_config.system_configs[0].get_legacy() + + if sim_config.replay: + td = Telemetry(**args_dict) + jobs = td.load_from_files(sim_config.replay).jobs + else: + workload = Workload(args, config) + jobs = getattr(workload, sim_config.workload)(args=sim_config.get_legacy_args()) + plot_job_hist(jobs, + config=config, + dist_split=sim_config.multimodal, + gantt_nodes=sim_config.gantt_nodes) + + out = sim_config.get_output() + if out: + timestep_start = min([x.submit_time for x in jobs]) + timestep_end = math.ceil(max([x.submit_time for x in jobs]) + max([x.expected_run_time for x in jobs])) + filename = create_file_indexed('wl', path=str(out), create=False, ending="npz").split(".npz")[0] + # savez_compressed add npz itself, but create_file_indexed needs to check for .npz to find existing files + np.savez_compressed(filename, jobs=jobs, timestep_start=timestep_start, timestep_end=timestep_end, args=args) + print(filename + ".npz") # To std-out to show which npz was created. diff --git a/raps/workloads/multitenant.py b/raps/workloads/multitenant.py new file mode 100644 index 0000000..61d7c32 --- /dev/null +++ b/raps/workloads/multitenant.py @@ -0,0 +1,154 @@ +import random +from raps.job import Job, job_dict +from .constants import ACCT_NAMES, MAX_PRIORITY + +class MultitenantWorkload: + + def multitenant(self, **kwargs): + """ + Generate deterministic jobs to validate multitenant scheduling & power. + + usage example: + + python main.py run-multi-part -x mit_supercloud -w multitenant + + Parameters + ---------- + mode : str + One of: + - 'ONE_JOB_PER_NODE_ALL_CORES' + - 'TWO_JOBS_PER_NODE_SPLIT' + - 'STAGGERED_JOBS_PER_NODE' + wall_time : int + Duration (seconds) of each job (default: 3600) + trace_quanta : int + Sampling interval for traces; defaults to config['TRACE_QUANTA'] + + Returns + ------- + list[dict] + List of job_dict entries. + """ + mode = kwargs.get('mode', 'TWO_JOBS_PER_NODE_SPLIT') + wall_time = kwargs.get('wall_time', 3600) + + jobs = [] + + for partition in self.partitions: + cfg = self.config_map[partition] + trace_quanta = kwargs.get('trace_quanta', cfg['TRACE_QUANTA']) + + cores_per_cpu = cfg.get('CORES_PER_CPU', 1) + cpus_per_node = cfg.get('CPUS_PER_NODE', 1) + cores_per_node = cores_per_cpu * cpus_per_node + gpus_per_node = cfg.get('GPUS_PER_NODE', 0) + + n_nodes = cfg['AVAILABLE_NODES'] + + def make_trace(cpu_util, gpu_util): + return self.compute_traces(cpu_util, gpu_util, wall_time, trace_quanta) + + job_id_ctr = 0 + + if mode == 'ONE_JOB_PER_NODE_ALL_CORES': + # Each node runs one job that consumes all cores/GPUs + for nid in range(n_nodes): + cpu_trace, gpu_trace = make_trace(cores_per_node, gpus_per_node) + jobs.append(Job(job_dict( + nodes_required=1, + cpu_cores_required=cores_per_node, + gpu_units_required=gpus_per_node, + name=f"MT_full_node_{partition}_{nid}", + account=random.choice(ACCT_NAMES), + cpu_trace=cpu_trace, + gpu_trace=gpu_trace, + ntx_trace=[], nrx_trace=[], + end_state='COMPLETED', + id=job_id_ctr, + priority=random.randint(0, MAX_PRIORITY), + partition=partition, + submit_time=0, + time_limit=wall_time, + start_time=0, + end_time=wall_time, + expected_run_time=wall_time, + trace_time=wall_time, + trace_start_time=0, + trace_end_time=wall_time, + trace_quanta=cfg['TRACE_QUANTA'] + ))) + job_id_ctr += 1 + + elif mode == 'TWO_JOBS_PER_NODE_SPLIT': + # Two jobs per node: split CPU/GPU roughly in half + for nid in range(n_nodes): + cpu_a = cores_per_node // 2 + cpu_b = cores_per_node - cpu_a + gpu_a = gpus_per_node // 2 + gpu_b = gpus_per_node - gpu_a + + for idx, (c_req, g_req, tag) in enumerate([(cpu_a, gpu_a, 'A'), + (cpu_b, gpu_b, 'B')]): + cpu_trace, gpu_trace = make_trace(c_req, g_req) + jobs.append(Job(job_dict( + nodes_required=1, # still one node; multitenant RM packs cores + cpu_cores_required=c_req, + gpu_units_required=g_req, + name=f"MT_split_node_{partition}_{nid}_{tag}", + account=random.choice(ACCT_NAMES), + cpu_trace=cpu_trace, + gpu_trace=gpu_trace, + ntx_trace=[], nrx_trace=[], + end_state='COMPLETED', + id=job_id_ctr, + priority=random.randint(0, MAX_PRIORITY), + partition=partition, + submit_time=0, + time_limit=wall_time, + start_time=0, + end_time=wall_time, + expected_run_time=wall_time, + trace_time=wall_time, + trace_start_time=0, + trace_end_time=wall_time, + trace_quanta=cfg['TRACE_QUANTA'] + ))) + job_id_ctr += 1 + + elif mode == 'STAGGERED_JOBS_PER_NODE': + # Three jobs per node, staggered starts: 0, wall_time/3, 2*wall_time/3 + offsets = [0, wall_time // 3, 2 * wall_time // 3] + cpu_each = cores_per_node // 3 or 1 + gpu_each = max(1, gpus_per_node // 3) if gpus_per_node else 0 + + for nid in range(n_nodes): + for k, offset in enumerate(offsets): + cpu_trace, gpu_trace = make_trace(cpu_each, gpu_each) + jobs.append(Job(job_dict( + nodes_required=1, + cpu_cores_required=cpu_each, + gpu_units_required=gpu_each, + name=f"MT_stagger_node_{partition}_{nid}_{k}", + account=random.choice(ACCT_NAMES), + cpu_trace=cpu_trace, + gpu_trace=gpu_trace, + ntx_trace=[], nrx_trace=[], + end_state='COMPLETED', + id=job_id_ctr, + priority=random.randint(0, MAX_PRIORITY), + partition=partition, + submit_time=offset, + time_limit=wall_time, + start_time=offset, + end_time=offset + wall_time, + expected_run_time=wall_time, + trace_time=wall_time, + trace_start_time=0, + trace_end_time=wall_time, + trace_quanta=cfg['TRACE_QUANTA'] + ))) + job_id_ctr += 1 + else: + raise ValueError(f"Unknown multitenant mode: {mode}") + + return jobs diff --git a/raps/workloads/utils.py b/raps/workloads/utils.py new file mode 100644 index 0000000..d673ddf --- /dev/null +++ b/raps/workloads/utils.py @@ -0,0 +1,159 @@ +import math +import numpy as np +import matplotlib.pyplot as plt + + +def plot_job_hist(jobs, config=None, dist_split=None, gantt_nodes=False): + # put args.multimodal in dist_split! + split = [1.0] + num_dist = 1 + if dist_split: + num_dist = len(dist_split) + split = dist_split + + y = [y.nodes_required for y in jobs] + x = [x.expected_run_time for x in jobs] + x2 = [x.time_limit for x in jobs] + fig_m = plt.figure() + gs = fig_m.add_gridspec(30, 1) + gs0 = gs[0:20].subgridspec(500, 500, hspace=0, wspace=0) + gs1 = gs[24:].subgridspec(1, 1) + + ax_top = fig_m.add_subplot(gs0[:]) + ax_top.axis('off') + ax_top.set_title('Job Distribution') + + ax_bot = fig_m.add_subplot(gs1[:]) + ax_bot.axis('off') + ax_bot.set_title('Submit Time + Wall Time') + + # ax0 = fig_m.add_subplot(gs[:2,:]) + # ax1 = fig_m.add_subplot(gs[2:,:]) + + # gss = gridspec.GridSpec(5, 5, figure=ax0) + # fig, axs = plt.subplots(2, 2, gridspec_kw={'width_ratios': (4, 1), 'height_ratios': (1, 4)}) + axs = [] + col = [] + col.append(fig_m.add_subplot(gs0[:100, :433])) + col.append(fig_m.add_subplot(gs0[:100, 433:])) + axs.append(col.copy()) + col = [] + col.append(fig_m.add_subplot(gs0[100:, :433])) + col.append(fig_m.add_subplot(gs0[100:, 433:])) + axs.append(col.copy()) + + ax_b = fig_m.add_subplot(gs1[:, :]) + + # Create scatter plot + for i in range(len(x)): + axs[1][0].plot([x[i], x2[i]], [y[i], y[i]], color='lightblue', zorder=1) + axs[1][0].scatter(x2, y, marker='.', c='lightblue', zorder=2) + axs[1][0].scatter(x, y, zorder=3) + + cpu_util = [x.cpu_trace for x in jobs] + if isinstance(cpu_util[0], np.ndarray): + cpu_util = np.concatenate(cpu_util).ravel() + elif isinstance(cpu_util[0], list): + cpu_util = [sum(part) / len(part) for part in cpu_util] + gpu_util = [x.gpu_trace for x in jobs] + if isinstance(gpu_util[0], np.ndarray): + gpu_util = np.concatenate(gpu_util).ravel() + elif isinstance(gpu_util[0], list): + gpu_util = [sum(part) / len(part) for part in gpu_util] + if not all([x == 0 for x in gpu_util]): + axs[0][1].scatter(cpu_util, gpu_util, zorder=2, marker='.', s=0.2) + axs[0][1].hist(gpu_util, bins=100, orientation='horizontal', zorder=1, density=True, color='tab:purple') + axs[0][1].axhline(np.mean(gpu_util), color='r', linewidth=1, zorder=3) + axs[0][1].set(ylim=[0, config['GPUS_PER_NODE']]) + axs[0][1].set_ylabel("gpu util") + axs[0][1].yaxis.set_label_coords(1.15, 0.5) + axs[0][1].yaxis.set_label_position("right") + axs[0][1].yaxis.tick_right() + else: + axs[0][1].set_yticks([]) + axs[0][1].hist(cpu_util, bins=100, orientation='vertical', zorder=1, density=True, color='tab:cyan') + axs[0][1].axvline(np.mean(cpu_util), color='r', linewidth=1, zorder=3) + axs[0][1].set(xlim=[0, config['CPUS_PER_NODE']]) + axs[0][1].set_xlabel("cpu util") + axs[0][1].xaxis.set_label_coords(0.5, 1.30) + axs[0][1].xaxis.set_label_position("top") + axs[0][1].xaxis.tick_top() + axs[0][0].hist(x2, bins=max(1, math.ceil(min(100, (max(x2) - min(x))))), orientation='vertical', color='lightblue') + axs[0][0].hist(x, bins=max(1, math.ceil(min(100, (max(x2) - min(x))))), orientation='vertical') + axs[1][0].sharex(axs[0][0]) + axs[1][1].hist(y, bins=max(1, min(100, (max(y) - min(y)))), orientation='horizontal') + axs[1][0].sharey(axs[1][1]) + + # Remove ticks + axs[0][0].set_xticks([]) + axs[1][1].set_yticks([]) + axs[0][1].spines['top'].set_color('white') + axs[0][1].spines['right'].set_color('white') + axs[1][0].set_ylabel("nodes [N]") + axs[1][0].set_xlabel("wall time [hh:mm]") + minx_s = 0 + maxx_s = math.ceil(max(x2)) + x_label_mins = [n for n in np.arange(minx_s // 60, maxx_s // 60)] + x_label_ticks = [n * 60 for n in x_label_mins[0::60]] + x_label_str = [str(x1).zfill(2) + ":" + str(x2).zfill(2) for + (x1, x2) in [(n // 60, n % 60) for + n in x_label_mins[0::60]]] + axs[1][0].set_xticks(x_label_ticks, x_label_str) + miny = min(y) + maxy = max(y) + interval = max(1, maxy // 10) + y_ticks = np.arange(0, maxy, interval) + y_ticks[0] = miny + axs[1][0].set_yticks(y_ticks) + + axs[0][0].tick_params(axis="x", labelbottom=False) + axs[1][1].tick_params(axis="y", labelleft=False) + + # Submit_time and Wall_time + duration = [x.expected_run_time for x in jobs] + nodes_required = [x.nodes_required for x in jobs] + submit_t = [x.submit_time for x in jobs] + + offset = 0 + split_index = 0 + split_offset = math.floor(len(x) * split[split_index]) + if gantt_nodes: + if split[0] == 0.0: + ax_b.axhline(y=offset, color='red', linestyle='--', lw=0.5) + split_index += 1 + for i in range(len(x)): + # ax_b.barh(i,duration[i], height=1.0, left=submit_t[i]) + ax_b.barh(offset + nodes_required[i] / 2, duration[i], height=nodes_required[i], left=submit_t[i]) + offset += nodes_required[i] + if i != len(x) - 1 and i == split_offset - 1 and split_index < len(split): + ax_b.axhline(y=offset, color='red', linestyle='--', lw=0.5) + split_index += 1 + split_offset += math.floor(len(x) * split[split_index]) + # ax_b.axhline(y=(len(x)/num_dist * i)-0.5, color='red', linestyle='--',lw=0.5) + if split[-1] == 0.0: + ax_b.axhline(y=offset, color='red', linestyle='--', lw=0.5) + split_index += 1 + ax_b.set_ylabel("Jobs' acc. nodes") + else: + for i in range(len(x)): + ax_b.barh(i, duration[i], height=1.0, left=submit_t[i]) + for i in range(1, num_dist): + if num_dist == 1: + break + ax_b.axhline(y=(len(x) * split[split_index]) - 0.5, color='red', linestyle='--', lw=0.5) + split_index += 1 + ax_b.set_ylabel("Job ID") + # ax_b labels: + ax_b.set_xlabel("time [hh:mm]") + minx_s = 0 + maxx_s = math.ceil(max([x.expected_run_time for x in jobs]) + max([x.submit_time for x in jobs])) + x_label_mins = [n for n in np.arange(minx_s // 60, maxx_s // 60)] + x_label_ticks = [n * 60 for n in x_label_mins[0::60]] + x_label_str = [str(x1).zfill(2) + ":" + str(x2).zfill(2) for + (x1, x2) in [(n // 60, n % 60) for + n in x_label_mins[0::60]]] + + ax_b.set_xticks(x_label_ticks, x_label_str) + ax_b.yaxis.set_inverted(True) + + plt.show() -- GitLab From 16e437c328b3e586d205a11aec7620491acdab04 Mon Sep 17 00:00:00 2001 From: "Hines, Jesse" Date: Wed, 17 Sep 2025 21:21:35 +0000 Subject: [PATCH 308/388] Simplify fastforward/start/end/time handling --- README.md | 4 +- experiments/gcloudv2.yaml | 2 +- main.py | 1 + pytest.ini | 1 + raps/constants.py | 1 + raps/cooling.py | 8 +- raps/dataloaders/adastraMI250.py | 5 +- raps/dataloaders/bluewaters.py | 6 +- raps/dataloaders/frontier.py | 4 +- raps/dataloaders/kestrel.py | 11 +- raps/dataloaders/lassen.py | 28 +- raps/dataloaders/marconi100.py | 6 +- raps/dataloaders/mit_supercloud/cli.py | 4 +- raps/dataloaders/mit_supercloud/loader.py | 10 +- raps/engine.py | 93 +++-- raps/envs/raps_env.py | 20 +- raps/run_sim.py | 13 +- raps/sim_config.py | 149 ++++--- raps/stats.py | 151 +++---- raps/ui.py | 1 + raps/utils.py | 50 ++- raps/weather.py | 10 +- raps/workloads/__init__.py | 35 +- raps/workloads/live.py | 33 -- tests/conftest.py | 3 + tests/systems/conftest.py | 373 +++++++++--------- tests/systems/test_engine.py | 39 -- tests/systems/test_engine_basic.py | 20 + tests/systems/test_main_fastforward_run.py | 29 +- tests/systems/test_main_start_run.py | 22 ++ tests/systems/test_main_time_ff_delta_run.py | 38 -- tests/systems/test_main_withdata_range_run.py | 27 ++ tests/systems/test_main_withdata_run.py | 25 +- .../test_multi_part_sim_withdata_run.py | 22 +- tests/unit/test_utils.py | 16 +- tests/util.py | 85 ++++ 36 files changed, 758 insertions(+), 587 deletions(-) mode change 100644 => 100755 main.py delete mode 100644 tests/systems/test_engine.py create mode 100644 tests/systems/test_engine_basic.py create mode 100644 tests/systems/test_main_start_run.py delete mode 100644 tests/systems/test_main_time_ff_delta_run.py create mode 100644 tests/systems/test_main_withdata_range_run.py diff --git a/README.md b/README.md index eb39cc7..90ab831 100644 --- a/README.md +++ b/README.md @@ -46,7 +46,7 @@ For Adastra MI250 supercomputer, download 'AdastaJobsMI250_15days.parquet' from For Google cluster trace v2 - raps run --system gcloudv2 -f ~/data/gcloud/v2/google_cluster_data_2011_sample --ff 600 + raps run --system gcloudv2 -f ~/data/gcloud/v2/google_cluster_data_2011_sample --start '2011-05-02T00:10:00Z' # analyze dataset raps telemetry --system gcloudv2 -f ~/data/gcloud/v2/google_cluster_data_2011_sample -v @@ -87,7 +87,7 @@ For Lumi Lassen is one of the few datasets that has networking data. See `raps/dataloaders/lassen.py` for how to get the datasets. To run a network simulation, use the following command: - raps run -f ~/data/lassen/Lassen-Supercomputer-Job-Dataset --system lassen --policy fcfs --backfill firstfit --ff 365d -t 12h --arrival poisson --net + raps run -f ~/data/lassen/Lassen-Supercomputer-Job-Dataset --system lassen --policy fcfs --backfill firstfit --start '2019-08-22T00:00:00+00:00' -t 12h --arrival poisson --net ## Snapshot of extracted workload data diff --git a/experiments/gcloudv2.yaml b/experiments/gcloudv2.yaml index db8e218..9c7a700 100644 --- a/experiments/gcloudv2.yaml +++ b/experiments/gcloudv2.yaml @@ -1,4 +1,4 @@ system: gcloudv2 replay: - /opt/data/gcloud/v2/google_cluster_data_2011_sample -ff: 600 +start: 2011-05-02T00:10:00Z diff --git a/main.py b/main.py old mode 100644 new mode 100755 index b3c03f6..b2eae93 --- a/main.py +++ b/main.py @@ -1,3 +1,4 @@ +#!/usr/bin/env python3 """ ExaDigiT Resource Allocator & Power Simulator (RAPS) """ diff --git a/pytest.ini b/pytest.ini index 08a528c..78b07aa 100644 --- a/pytest.ini +++ b/pytest.ini @@ -22,6 +22,7 @@ markers = 40frontiers: System test adastraMI250: System test + bluewaters: System test frontier: System test fugaku: System test gcloudv2: System test diff --git a/raps/constants.py b/raps/constants.py index 53711e1..85b5e23 100644 --- a/raps/constants.py +++ b/raps/constants.py @@ -2,6 +2,7 @@ RAPS Constants """ from pathlib import Path +from datetime import datetime ELLIPSES = '\u2026' OUTPUT_PATH = Path('simulation_results') diff --git a/raps/cooling.py b/raps/cooling.py index ba96e43..066c12c 100644 --- a/raps/cooling.py +++ b/raps/cooling.py @@ -16,7 +16,7 @@ from fmpy import read_model_description, extract from fmpy.fmi2 import FMU2Slave from datetime import timedelta -from raps.policy import PolicyType +from raps.weather import Weather def get_matching_variables(variables, pattern): @@ -92,7 +92,7 @@ class ThermoFluidsModel: self.outputs = None self.unzipdir = None self.fmu = None - self.weather = None + self.weather: Weather | None = None def initialize(self): """ @@ -153,9 +153,7 @@ class ThermoFluidsModel: temperature = self.config['WET_BULB_TEMP'] # If replay mode is on and weather data is available - if engine.scheduler.policy == PolicyType.REPLAY and \ - self.weather and self.weather.start is not None and \ - self.weather.has_coords: + if self.weather and self.weather.has_coords: # Convert total seconds to timedelta object delta = timedelta(seconds=engine.current_timestep) target_datetime = self.weather.start + delta diff --git a/raps/dataloaders/adastraMI250.py b/raps/dataloaders/adastraMI250.py index 6eec26e..8cadbfb 100644 --- a/raps/dataloaders/adastraMI250.py +++ b/raps/dataloaders/adastraMI250.py @@ -11,8 +11,9 @@ # to replay with different scheduling policy python main.py -f /path/to/AdastaJobsMI250_15days.parquet --system adastraMI250 --policy priority --backfill easy - # to fast-forward 60 days and replay for 1 day - python main.py -f /path/to/AdastaJobsMI250_15days.parquet --system adastraMI250 --ff 60d -t 1d + # to run a specific time range + python main.py -f /path/to/AdastaJobsMI250_15days.parquet --system adastraMI250 \ + --start 2024-11-01T00:00:00Z --end 2024-11-02T00:00:00Z # to analyze dataset python -m raps.telemetry -f /path/to/AdastaJobsMI250_15days.parquet --system adastraMI250 -v diff --git a/raps/dataloaders/bluewaters.py b/raps/dataloaders/bluewaters.py index 9d5dc4d..728e2bb 100644 --- a/raps/dataloaders/bluewaters.py +++ b/raps/dataloaders/bluewaters.py @@ -181,7 +181,11 @@ def _parse_line(line: str, debug=False): def load_data(local_dataset_path, **kwargs): config = kwargs.get("config") root = Path(local_dataset_path[0]) - day = kwargs.get("start") + # TODO: confirm bluewater dates are in UTC + start = datetime.fromisoformat(kwargs.get('start') or "2017-03-28T00:00:00Z") + start = start.astimezone(timezone.utc) + # TODO: support multiple day replay + day = start.strftime("%Y%m%d") fp = root / "torque_logs" / day filter_str = kwargs.get("filter") debug = kwargs.get("debug") diff --git a/raps/dataloaders/frontier.py b/raps/dataloaders/frontier.py index 3849731..a6ac45e 100644 --- a/raps/dataloaders/frontier.py +++ b/raps/dataloaders/frontier.py @@ -275,12 +275,12 @@ def load_data_from_df(jobs_df: pd.DataFrame, jobprofile_df: pd.DataFrame, **kwar if end_time < telemetry_start: print("Job ends before first recorded telemetry entry:", job_id, "start:", - start_time, "end:", end_time, " Telemetry: ", len(gpu_trace), "entries.") + start_time, "end:", end_time, " Telemetry: ", len(gpu_trace), "entries.") continue # skip if start_time > telemetry_end: print("Job starts after last recorded telemetry entry:", job_id, "start:", - start_time, "end:", end_time, " Telemetry: ", len(gpu_trace), "entries.") + start_time, "end:", end_time, " Telemetry: ", len(gpu_trace), "entries.") continue # skip # Throw out jobs that are not valid! diff --git a/raps/dataloaders/kestrel.py b/raps/dataloaders/kestrel.py index c9efd70..f15c80b 100644 --- a/raps/dataloaders/kestrel.py +++ b/raps/dataloaders/kestrel.py @@ -49,14 +49,10 @@ def load_data_from_df(jobs_df: pd.DataFrame, **kwargs): The list of parsed jobs. """ config = kwargs.get('config') - min_time = kwargs.get('min_time', None) reschedule = kwargs.get('reschedule') - fastforward = kwargs.get('fastforward') validate = kwargs.get('validate') jid = kwargs.get('jid', '*') - if fastforward: print(f"fast-forwarding {fastforward} seconds") - # Sort jobs dataframe based on values in time_start column, adjust indices after sorting jobs_df = jobs_df.sort_values(by='submit_time') jobs_df = jobs_df[(jobs_df.start_time.between(pd.to_datetime('2024-09-01T00:00:00'), @@ -73,10 +69,7 @@ def load_data_from_df(jobs_df: pd.DataFrame, **kwargs): # Take earliest time as baseline reference # We can use the start time of the first job. - if min_time: - time_zero = min_time - else: - time_zero = jobs_df['submit_time'].min() + time_zero = jobs_df['submit_time'].min() num_jobs = len(jobs_df) print("time_zero:", time_zero, "num_jobs", num_jobs) @@ -125,8 +118,6 @@ def load_data_from_df(jobs_df: pd.DataFrame, **kwargs): # When extracting out a single job, run one iteration past the end of the job time_offset = config['UI_UPDATE_FREQ'] - if fastforward: time_offset -= fastforward - if reschedule: # Let the scheduler reschedule the jobs scheduled_nodes = None time_offset = next_arrival(1/config['JOB_ARRIVAL_TIME']) diff --git a/raps/dataloaders/lassen.py b/raps/dataloaders/lassen.py index bc57a7c..8bded75 100644 --- a/raps/dataloaders/lassen.py +++ b/raps/dataloaders/lassen.py @@ -23,7 +23,7 @@ Usage Instructions: python main.py -f /path/to/LAST/Lassen-Supercomputer-Job-Dataset --system lassen --arrival poisson # to fast-forward 365 days and replay for 1 day. This region day has 2250 jobs with 1650 jobs executed. - python main.py -f /path/to/LAST/Lassen-Supercomputer-Job-Dataset --system lassen --ff 365d -t 1d + python main.py -f /path/to/LAST/Lassen-Supercomputer-Job-Dataset --system lassen --start '2019-08-22T00:00:00+00:00' -t 1d # For the network replay this command gives suiteable snapshots: python main.py -f /path/to/LAST/Lassen-Supercomputer-Job-Dataset --system lassen --policy fcfs --backfill firstfit -t 12h --arrival poisson # noqa @@ -35,10 +35,10 @@ import uuid import numpy as np import pandas as pd from tqdm import tqdm -from datetime import timedelta +from datetime import datetime, timedelta from ..job import job_dict, Job -from ..utils import power_to_utilization, parse_td, WorkloadData +from ..utils import power_to_utilization, WorkloadData def load_data(path, **kwargs): @@ -61,32 +61,26 @@ def load_data_from_df(allocation_df, node_df, step_df, **kwargs): jid = kwargs.get('jid', '*') validate = kwargs.get('validate') verbose = kwargs.get('verbose') - fastforward = kwargs.get('fastforward') # int in seconds + start = datetime.fromisoformat(kwargs['start']) if kwargs.get('start') else None allocation_df['job_submit_timestamp'] = pd.to_datetime( allocation_df['job_submit_time'], format='mixed', errors='coerce') allocation_df['begin_timestamp'] = pd.to_datetime(allocation_df['begin_time'], format='mixed', errors='coerce') allocation_df['end_timestamp'] = pd.to_datetime(allocation_df['end_time'], format='mixed', errors='coerce') - # Too large dataset! Cut by fastforward and time to simulate! - if fastforward is None: # This is in seconds / int? - fastforward = 0 - fastforward_timedelta = timedelta(seconds=fastforward) # timedelta - else: - fastforward_timedelta = timedelta(seconds=fastforward) # timedelta - time_to_simulate = kwargs.get('time') # int in seconds - if time_to_simulate is None: # This is a string! - time_to_simulate = 31536000 # a year - time_to_simulate_timedelta = timedelta(seconds=time_to_simulate) # timedelta - else: - time_to_simulate_timedelta = parse_td(time_to_simulate) # timedelta - telemetry_start_timestamp = allocation_df['begin_timestamp'].min() telemetry_start_time = 0 telemetry_end_timestamp = allocation_df['end_timestamp'].max() diff = telemetry_end_timestamp - telemetry_start_timestamp telemetry_end_time = int(math.ceil(diff.total_seconds())) + # Too large dataset! Cut by fastforward and time to simulate! + if start is None: + fastforward_timedelta = timedelta(seconds=0) + else: + fastforward_timedelta = start - telemetry_start_timestamp.tz_localize("UTC") + time_to_simulate_timedelta = timedelta(seconds=kwargs['time']) + simulation_start_timestamp = telemetry_start_timestamp + fastforward_timedelta simulation_end_timestamp = simulation_start_timestamp + time_to_simulate_timedelta diff --git a/raps/dataloaders/marconi100.py b/raps/dataloaders/marconi100.py index a10e1e8..6ff310b 100644 --- a/raps/dataloaders/marconi100.py +++ b/raps/dataloaders/marconi100.py @@ -16,7 +16,7 @@ python main.py -f /path/to/job_table.parquet --system marconi100 --policy priority --backfill firstfit # to fast-forward 60 days and replay for 1 day - python main.py -f /path/to/job_table.parquet --system marconi100 --ff 60d -t 1d + python main.py -f /path/to/job_table.parquet --system marconi100 --start 2020-07-05T00:00:00+00:00 -t 1d # to analyze dataset python -m raps.telemetry -f /path/to/job_table.parquet --system marconi100 -v @@ -64,10 +64,6 @@ def load_data_from_df(jobs_df: pd.DataFrame, **kwargs): jid = kwargs.get('jid', '*') debug = kwargs.get('debug') - # fastforward = kwargs.get('fastforward') - # if fastforward: - # print(f"fast-forwarding {fastforward} seconds") - # Sort jobs dataframe based on values in time_start column, adjust indices after sorting jobs_df = jobs_df.sort_values(by='start_time') jobs_df = jobs_df.reset_index(drop=True) diff --git a/raps/dataloaders/mit_supercloud/cli.py b/raps/dataloaders/mit_supercloud/cli.py index 0596e5c..e459209 100644 --- a/raps/dataloaders/mit_supercloud/cli.py +++ b/raps/dataloaders/mit_supercloud/cli.py @@ -32,8 +32,8 @@ def main(): pl = subs.add_parser("load", parents=[common], help="Load local data into RAPS") pl.add_argument("path", help="Local data root") pl.set_defaults(func=lambda args: load_data(args.path, - start_date=args.start, - end_date=args.end, + start=args.start, + end=args.end, partition=args.partition)) args = p.parse_args() diff --git a/raps/dataloaders/mit_supercloud/loader.py b/raps/dataloaders/mit_supercloud/loader.py index a622965..fc16ade 100644 --- a/raps/dataloaders/mit_supercloud/loader.py +++ b/raps/dataloaders/mit_supercloud/loader.py @@ -119,7 +119,7 @@ from collections import Counter from datetime import datetime, timezone from raps.job import job_dict, Job -from raps.utils import summarize_ranges, next_arrival, WorkloadData +from raps.utils import summarize_ranges, WorkloadData from .utils import proc_cpu_series, proc_gpu_series, to_epoch from .utils import DEFAULT_START, DEFAULT_END @@ -236,8 +236,8 @@ def load_data(local_dataset_path, **kwargs): sl["__line__"] = sl.index + 2 # date window - start_ts = to_epoch(kwargs.get("start", DEFAULT_START)) - end_ts = to_epoch(kwargs.get("end", DEFAULT_END)) + start_ts = to_epoch(kwargs.get("start") or DEFAULT_START) + end_ts = to_epoch(kwargs.get("end") or DEFAULT_END) mask = (sl.time_submit >= start_ts) & (sl.time_submit < end_ts) sl = sl[mask] @@ -283,8 +283,8 @@ def load_data(local_dataset_path, **kwargs): # —— ERROR CATCH: no jobs in this window? —— if sl.empty: raise ValueError( - f"No SLURM jobs found between {kwargs.get('start_date')} and " - f"{kwargs.get('end_date')}. Please pick a range covered by the dataset." + f"No SLURM jobs found between {kwargs.get('start')} and " + f"{kwargs.get('end')}. Please pick a range covered by the dataset." ) # detect GPU‐using jobs diff --git a/raps/engine.py b/raps/engine.py index 4fdadb2..9ae27b1 100644 --- a/raps/engine.py +++ b/raps/engine.py @@ -173,9 +173,9 @@ class Engine: self.avg_slowdown_history = [] self.max_slowdown_history = [] self.node_occupancy_history = [] - self.downtime = Downtime(first_downtime=sim_config.downtime_first, - downtime_interval=sim_config.downtime_interval, - downtime_length=sim_config.downtime_length, + self.downtime = Downtime(first_downtime=sim_config.downtime_first_int, + downtime_interval=sim_config.downtime_interval_int, + downtime_length=sim_config.downtime_length_int, debug=sim_config.debug, ) @@ -233,33 +233,9 @@ class Engine: random.seed(sim_config.seed) np.random.seed(sim_config.seed + 1) - if sim_config.cooling: - cooling_model = ThermoFluidsModel(**system_config_dict) - cooling_model.initialize() - if sim_config.start: - cooling_model.weather = Weather(sim_config.start, config=system_config_dict) - else: - cooling_model = None - - if sim_config.power_scope == 'node': - if sim_config.uncertainties: - power_manager = PowerManager(compute_node_power_validate_uncertainties, **system_config_dict) - else: - power_manager = PowerManager(compute_node_power_validate, **system_config_dict) - else: - if sim_config.uncertainties: - power_manager = PowerManager(compute_node_power_uncertainties, **system_config_dict) - else: - power_manager = PowerManager(compute_node_power, **system_config_dict) - - flops_manager = FLOPSManager( - config=system_config_dict, - validate=(sim_config.power_scope == "node"), - ) - if sim_config.live and not sim_config.replay: td = Telemetry(**sim_config_dict) - workload_data = td.load_from_live_system() + wd = td.load_from_live_system() elif sim_config.replay: # TODO: this will have issues if running separate systems or custom systems partition_short = partition.split("/")[-1] if partition else None @@ -278,31 +254,62 @@ class Engine: else: replay_files = sim_config.replay - workload_data = td.load_from_files(replay_files) + wd = td.load_from_files(replay_files) else: # Synthetic jobs wl = Workload(sim_config_args, system_config_dict) - workload_data = wl.generate_jobs() + wd = wl.generate_jobs() td = Telemetry(**sim_config_dict) - jobs = workload_data.jobs - - # TODO refactor how stat/end/fastforward/time work - if sim_config.fastforward is not None: - workload_data.telemetry_start = workload_data.telemetry_start + sim_config.fastforward - - if sim_config.time is not None: - workload_data.telemetry_end = workload_data.telemetry_start + sim_config.time - - if sim_config.time_delta is not None: - time_delta = sim_config.time_delta + jobs = wd.jobs + if len(jobs) == 0: + print(f"Warning no jobs found for {partition or 'system'}") + + if sim_config.start: + start = sim_config.start + diff = start - wd.start_date + if diff.total_seconds() < 0: + raise Exception( + f"{start.isoformat()} is before data range in workload. " + + f"Workload data begins at {wd.start_date.isoformat()}" + ) + wd.telemetry_start += int(diff.total_seconds()) + wd.start_date = start else: - time_delta = 1 + start = wd.start_date + start = start + sim_config.fastforward + wd.telemetry_end = wd.telemetry_start + sim_config.time_int + + time_delta = sim_config.time_delta_int if sim_config.continuous_job_generation: continuous_workload = wl else: continuous_workload = None + if sim_config.cooling: + cooling_model = ThermoFluidsModel(**system_config_dict) + cooling_model.initialize() + if sim_config.weather: + cooling_model.weather = Weather(start, config=system_config_dict) + else: + cooling_model = None + + if sim_config.power_scope == 'node': + if sim_config.uncertainties: + power_manager = PowerManager(compute_node_power_validate_uncertainties, **system_config_dict) + else: + power_manager = PowerManager(compute_node_power_validate, **system_config_dict) + else: + if sim_config.uncertainties: + power_manager = PowerManager(compute_node_power_uncertainties, **system_config_dict) + else: + power_manager = PowerManager(compute_node_power, **system_config_dict) + + flops_manager = FLOPSManager( + config=system_config_dict, + validate=(sim_config.power_scope == "node"), + ) + accounts = None if sim_config.accounts: job_accounts = Accounts(jobs) @@ -324,7 +331,7 @@ class Engine: system_config=system_config, ) - return engine, workload_data, time_delta + return engine, wd, time_delta def add_running_jobs_to_queue(self, jobs_to_submit: List): """ diff --git a/raps/envs/raps_env.py b/raps/envs/raps_env.py index e27a6d2..ec9fcad 100644 --- a/raps/envs/raps_env.py +++ b/raps/envs/raps_env.py @@ -15,23 +15,23 @@ def print_stats(stats, step=0): """prints SB3-style stats output""" wanted_keys = { - "time simulated": "engine/Time Simulated", - "average power": "engine/Average Power", - "system power efficiency": "engine/System Power Efficiency", - "total energy consumed": "engine/Total Energy Consumed", - "carbon emissions": "engine/Carbon Footprint", - "jobs completed": "jobs/Jobs Completed", + "time_simulated": "engine/Time Simulated", + "average_power": "engine/Average Power", + "system_power_efficiency": "engine/System Power Efficiency", + "total_energy_consumed": "engine/Total Energy Consumed", + "carbon_emissions": "engine/Carbon Footprint", + "jobs_completed": "jobs/Jobs Completed", "throughput": "jobs/Throughput", - "jobs still running": "jobs/Jobs Still Running", + "jobs_still_running": "jobs/Jobs Still Running", } for section in ["engine_stats", "job_stats"]: if section in stats: for k, v in stats[section].items(): - if k.lower() in wanted_keys: - if k.lower() == "jobs still running" and isinstance(v, list): + if k in wanted_keys: + if k == "jobs_still_running" and isinstance(v, list): v = len(v) - logger.record(wanted_keys[k.lower()], v) + logger.record(wanted_keys[k], v) logger.dump(step=step) diff --git a/raps/run_sim.py b/raps/run_sim.py index 4a3f9b3..74ea87a 100644 --- a/raps/run_sim.py +++ b/raps/run_sim.py @@ -6,14 +6,12 @@ These functions just handle rendering the terminal UI and outputting results to import json import pandas as pd import sys -import yaml import warnings -from pathlib import Path from raps.ui import LayoutManager from raps.plotting import Plotter from raps.engine import Engine from raps.multi_part_engine import MultiPartEngine -from raps.utils import write_dict_to_file, pydantic_add_args, SubParsers, yaml_dump, read_yaml +from raps.utils import write_dict_to_file, pydantic_add_args, SubParsers, read_yaml from raps.stats import ( get_engine_stats, get_job_stats, @@ -61,8 +59,7 @@ def run_sim(sim_config: SingleSimConfig): result=workload_data, args=sim_config, ) - config_yaml = yaml_dump(sim_config.model_dump(mode="json", exclude_defaults=True)) - (out / 'sim_config.yaml').write_text(config_yaml) + (out / 'sim_config.yaml').write_text(sim_config.dump_yaml()) jobs = workload_data.jobs timestep_start, timestep_end = workload_data.telemetry_start, workload_data.telemetry_end @@ -238,8 +235,7 @@ def run_parts_sim(sim_config: MultiPartSimConfig): result=workload_results[part], args=sim_config, ) - config_yaml = yaml_dump(sim_config.model_dump(mode="json", exclude_defaults=True)) - (out / 'sim_config.yaml').write_text(config_yaml) + (out / 'sim_config.yaml').write_text(sim_config.dump_yaml()) jobs = {p: w.jobs for p, w in workload_results.items()} @@ -310,5 +306,4 @@ def show_add_parser(subparsers: SubParsers): def show(sim_config: SingleSimConfig, show_defaults=False): - data = sim_config.model_dump(mode="json", exclude_defaults=not show_defaults) - print(yaml_dump(data), end="") + print(sim_config.dump_yaml(exclude_unset=not show_defaults), end='') diff --git a/raps/sim_config.py b/raps/sim_config.py index 05c078c..867dac0 100644 --- a/raps/sim_config.py +++ b/raps/sim_config.py @@ -3,15 +3,15 @@ import abc from pathlib import Path from functools import cached_property from datetime import timedelta -from typing import Literal +from typing import Literal, Annotated as A import importlib from raps.schedulers.default import PolicyType, BackfillType from raps.utils import ( - parse_time_unit, convert_to_time_unit, infer_time_unit, ExpandedPath, parse_td, create_casename, - RAPSBaseModel, + parse_time_unit, convert_to_time_unit, infer_time_unit, ExpandedPath, create_casename, + RAPSBaseModel, AutoAwareDatetime, SmartTimedelta, yaml_dump, ) from raps.system_config import SystemConfig, get_partition_configs, get_system_config -from pydantic import model_validator +from pydantic import model_validator, Field Distribution = Literal['uniform', 'weibull', 'normal'] @@ -21,37 +21,54 @@ class SimConfig(RAPSBaseModel, abc.ABC): """ Include the FMU cooling model """ simulate_network: bool = False """ Include network model """ + weather: bool | None = None + """ + Include weather information in the cooling model. + Defaults to True if replay, False otherwise. + """ # Simulation runtime options - fastforward: int | None = None + start: AutoAwareDatetime | None = None + """ Start of simulation """ + # Exclude end from serialization as it is redundant with time + end: A[AutoAwareDatetime | None, Field(exclude=True)] = None + """ End of simulation. Pass either `time` or `end`, not both. """ + time: SmartTimedelta = timedelta(hours=1) """ - Fast-forward by time amount (unit specified by `time_unit`, default seconds). - Can pass a string like 15s, 1m, 1h + Length of time to simulate (default seconds). + Can pass a string like 123, 27m, 3h, 7d + Pass either `time` or `end`, not both. """ - time: int | None = None + fastforward: SmartTimedelta = timedelta(seconds=0) """ - Length of time to simulate (unit specified by `time_unit`, default seconds). - Can pass a string like 123, 27m, 3h, 7d + "Fast-forward" the simulation by time amount before starting. This is just a convenience + shortcut for setting --start without having to recall the exact start date of the dataset. + Can pass a string like 15s, 1m, 1h """ - time_delta: int = 1 + time_delta: SmartTimedelta = timedelta(seconds=1) """ - Step size (unit specified by `time_unit`, default seconds). + Step size for the power simulation (default seconds). Can pass a string like 15s, 1m, 1h, 1ms """ time_unit: timedelta = timedelta(seconds=1) """ - Units all time delta ints are measured in (default seconds) + The base unit of the simulation, determining how often it will tick the job scheduler. """ + @cached_property + def time_int(self) -> int: + """ Return time as an int of time_unit """ + return int(self.time / self.time_unit) + + @cached_property + def time_delta_int(self) -> int: + """ Return time_delta as an int of time_unit """ + return int(self.time_delta / self.time_unit) + @cached_property def downscale(self) -> int: return int(timedelta(seconds=1) / self.time_unit) - start: str = "2021-05-21T13:00:00-04:00" - """ ISO8601 start of simulation """ - end: str = "2021-05-21T14:00:00-04:00" - """ ISO8601 end of simulation """ - numjobs: int = 100 """ Number of jobs to schedule """ @@ -200,22 +217,34 @@ class SimConfig(RAPSBaseModel, abc.ABC): """ Path to accounts JSON file from previous run """ # Downtime - downtime_first: int | None = None + downtime_first: SmartTimedelta | None = None """ First downtime (unit specified by `time_unit`, default seconds). Can pass a string like 27m, 3h, 7d """ - downtime_interval: str | None = None + downtime_interval: SmartTimedelta | None = None """ Interval between downtimes (unit specified by `time_unit`, default seconds). Can pass a string like 123, 27m, 3h, 7d """ - downtime_length: str | None = None + downtime_length: SmartTimedelta | None = None """ Downtime length (unit specified by `time_unit`, default seconds). Can pass a string like 123, 27m, 3h, 7d """ + @cached_property + def downtime_first_int(self) -> int | None: + return None if self.downtime_first is None else int(self.downtime_first / self.time_unit) + + @cached_property + def downtime_interval_int(self) -> int | None: + return None if self.downtime_interval is None else int(self.downtime_interval / self.time_unit) + + @cached_property + def downtime_length_int(self) -> int | None: + return None if self.downtime_length is None else int(self.downtime_length / self.time_unit) + # Continous Job Generation continuous_job_generation: bool = False """ Activate continuous job generation """ @@ -229,40 +258,60 @@ class SimConfig(RAPSBaseModel, abc.ABC): def _validate_before(cls, data): # This is called with the raw input, before Pydantic parses it, so data is just a dict and # contain any data types. + data = {**data} - time_fields = [ + # infer time_unit + td_fields = [ "time_delta", "time", "fastforward", "downtime_first", "downtime_interval", "downtime_length", ] - - if data.get('time_unit') is not None: - time_unit = parse_time_unit(data['time_unit']) - input_time_unit = time_unit - else: + if data.get('time_unit') is None: time_unit = min( - [infer_time_unit(data[f]) for f in time_fields if data.get(f)], + [infer_time_unit(data[f]) for f in td_fields if data.get(f)], default=timedelta(seconds=1) ) - # When "inferring" time unit interpret raw numbers as seconds. - # E.g. `-t 10 --time-delta 1ds` should be `-t 10s --time-delta 1ds` - input_time_unit = timedelta(seconds=1) - + else: + time_unit = parse_time_unit(data['time_unit']) data['time_unit'] = time_unit - for field in time_fields: - if data.get(field) is not None: - td = parse_td(data[field], input_time_unit) - data[field] = convert_to_time_unit(td, time_unit) return data @model_validator(mode="after") def _validate_after(self): + # Allow setting either start/end or start/time for backwards compatibility and convenience + if self.start and self.fastforward: + raise ValueError("start and fastforward are mutually exclusive") + + if self.end: + if not self.start: + raise ValueError("end requires start to be set") + if 'time' not in self.model_fields_set: # If time was not explicitly set + self.time = self.end - self.start + elif self.start: + self.end = self.start + self.time + + if self.start and self.start + self.time != self.end: + raise ValueError("time and end values don't match. You only need to specify one.") + + td_fields = [ + "time_delta", "time", "fastforward", + "downtime_first", "downtime_interval", "downtime_length", + ] + # Check time fields are divisible by time_unit. + for field in td_fields: + td = getattr(self, field) + if td is not None: + convert_to_time_unit(td, self.time_unit) # will throw if invalid + if not self.replay and not self.workload: self.workload = "random" if self.cooling: self.layout = "layout2" + if self.weather is None: + self.weather = self.cooling and bool(self.replay) + if self.jobsize_is_power_of is not None and self.jobsize_is_of_degree is not None: raise ValueError("jobsize_is_power_of and jobsize_is_of_degree are mutually exclusive") @@ -341,20 +390,32 @@ class SimConfig(RAPSBaseModel, abc.ABC): args_dict = self.model_dump(mode="json") args_dict['system'] = self.system_name # validate has been renamed to power_scope - args_dict['validate'] = args_dict["power_scope"] == "node" + args_dict['validate'] = self.power_scope == "node" args_dict['downscale'] = self.downscale # Convert Path objects to str - if args_dict['output']: - args_dict['output'] = str(args_dict['output']) - if args_dict['replay']: - args_dict['replay'] = [str(p) for p in args_dict['replay']] - if args_dict['accounts_json']: - args_dict['accounts_json'] = str(args_dict['accounts_json']) + if self.output: + args_dict['output'] = str(self.output) + if self.replay: + args_dict['replay'] = [str(p) for p in self.replay] + if self.accounts_json: + args_dict['accounts_json'] = str(self.accounts_json) + + args_dict["time"] = self.time_int + args_dict["time_delta"] = self.time_delta_int + args_dict["downtime_first"] = self.downtime_first_int + args_dict["downtime_interval"] = self.downtime_interval_int + args_dict["downtime_length"] = self.downtime_length_int + args_dict['start'] = self.start.astimezone().isoformat() if self.start else None + args_dict['end'] = self.end.astimezone().isoformat() if self.end else None + args_dict.pop("fastforward") # Remove fastforward from this to avoid confusion later args_dict['sim_config'] = self return args_dict + def dump_yaml(self, exclude_unset=True): + return yaml_dump(self.model_dump(mode="json", exclude_unset=exclude_unset)) + class SingleSimConfig(SimConfig, abc.ABC): system: SystemConfig | str = "frontier" @@ -397,8 +458,8 @@ SIM_SHORTCUTS = { "partitions": "x", "cooling": "c", "simulate-network": "net", - "fastforward": "ff", "time": "t", + "fastforward": "ff", "debug": "d", "numjobs": "n", "verbose": "v", diff --git a/raps/stats.py b/raps/stats.py index aa8610e..a420151 100644 --- a/raps/stats.py +++ b/raps/stats.py @@ -30,26 +30,26 @@ def get_engine_stats(engine: Engine): total_cost = total_energy_consumed * 1000 * engine.config.get('POWER_COST', 0) # Total cost in dollars stats = { - 'time simulated': time_simulated, + 'time_simulated': time_simulated, 'num_samples': num_samples, - 'average power': f'{average_power_mw:.4f} MW', - 'min loss': f'{min_loss_mw:.4f} MW', - 'average loss': f'{average_loss_mw:.2f} MW', - 'max loss': f'{max_loss_mw:.2f} MW', - 'system power efficiency': f'{efficiency * 100:.2f}%', - 'total energy consumed': f'{total_energy_consumed:.2f} MW-hr', - 'carbon emissions': f'{emissions:.4f} metric tons CO2', - 'total cost': f'${total_cost:.2f}' + 'average_power': average_power_mw, + 'min_loss': min_loss_mw, + 'average_loss': average_loss_mw, + 'max_loss': max_loss_mw, + 'system_power_efficiency': efficiency * 100, + 'total_energy_consumed': total_energy_consumed, + 'carbon_emissions': emissions, + 'total_cost': total_cost, } if engine.config['multitenant']: # Multitenancy Stats total_jobs_loaded = engine.total_initial_jobs # Assuming this is passed to __init__ - stats['total jobs loaded'] = total_jobs_loaded + stats['total_jobs_loaded'] = total_jobs_loaded if total_jobs_loaded > 0: - stats['jobs completed percentage'] = f"{(engine.jobs_completed / total_jobs_loaded * 100):.2f}%" + stats['jobs_completed_percentage'] = engine.jobs_completed / total_jobs_loaded * 100 else: - stats['jobs completed percentage'] = "0%" + stats['jobs_completed_percentage'] = 0 if engine.node_occupancy_history: # Calculate average concurrent jobs per node (average density across all nodes and timesteps) @@ -76,11 +76,11 @@ def get_engine_stats(engine: Engine): avg_jobs_per_active_node = (sum_jobs_per_active_node / count_active_timesteps_for_avg_active) \ if count_active_timesteps_for_avg_active > 0 else 0 - stats['avg concurrent jobs per active node'] = f"{avg_jobs_per_active_node:.2f}" - stats['max concurrent jobs per node'] = max_concurrent_jobs_per_node + stats['avg_concurrent_jobs_per_active_node'] = avg_jobs_per_active_node + stats['max_concurrent_jobs_per_node'] = max_concurrent_jobs_per_node else: - stats['avg concurrent jobs per node'] = "N/A" - stats['max concurrent jobs per node'] = "N/A" + stats['avg_concurrent_jobs_per_node'] = None + stats['max_concurrent_jobs_per_node'] = None # network_stats = get_network_stats() # stats.update(network_stats) @@ -124,19 +124,19 @@ def get_network_stats(engine: Engine): else: mean_net_util = 0.0 - stats["avg network util"] = f"{mean_net_util * 100:.2f}%" + stats["avg_network_util"] = mean_net_util * 100 if engine.avg_slowdown_history: avg_job_slow = sum(engine.avg_slowdown_history) / len(engine.avg_slowdown_history) else: avg_job_slow = 1.0 - stats["avg per-job slowdown"] = f"{avg_job_slow:.2f}x" + stats["avg_per_job_slowdown"] = avg_job_slow if engine.max_slowdown_history: max_job_slow = max(engine.max_slowdown_history) else: max_job_slow = 1.0 - stats["max per-job slowdown"] = f"{max_job_slow:.2f}x" + stats["max_per_job_slowdown"] = max_job_slow return stats @@ -301,31 +301,32 @@ def get_job_stats(engine: Engine): min_nrx_u, max_nrx_u, avg_nrx_u = -1, -1, -1 job_stats = { - 'jobs completed': engine.jobs_completed, - 'throughput': f'{throughput:.2f} jobs/hour', - 'jobs still running': [job.id for job in engine.running], - 'jobs still in queue': [job.id for job in engine.queue], - 'Jobs <= 5 nodes': jobsSmall, - 'Jobs <= 50 nodes': jobsMedium, - 'Jobs <= 250 nodes': jobsLarge, - 'Jobs <= 4500 nodes': jobsVLarge, - 'Jobs > 4500 nodes': jobsHuge, + 'jobs_total': engine.jobs_completed + len(engine.running) + len(engine.queue), + 'jobs_completed': engine.jobs_completed, + 'throughput': throughput, + 'jobs_still_running': [job.id for job in engine.running], + 'jobs_still_in_queue': [job.id for job in engine.queue], + 'jobs <= 5 nodes': jobsSmall, + 'jobs <= 50 nodes': jobsMedium, + 'jobs <= 250 nodes': jobsLarge, + 'jobs <= 4500 nodes': jobsVLarge, + 'jobs > 4500 nodes': jobsHuge, # Information on job-mix executed - 'min job size': min_job_size, - 'max job size': max_job_size, - 'average job size': avg_job_size, - 'min runtime': min_runtime, - 'max runtime': max_runtime, - 'average runtime': avg_runtime, - 'min energy': min_energy, - 'max energy': max_energy, - 'avg energy': avg_energy, - 'min edp': min_edp, - 'max edp': max_edp, - 'avg edp': avg_edp, - 'min edp^2': min_edp2, - 'max edp^2': max_edp2, - 'avg edp^2': avg_edp2, + 'min_job_size': min_job_size, + 'max_job_size': max_job_size, + 'average_job_size': avg_job_size, + 'min_runtime': min_runtime, + 'max_runtime': max_runtime, + 'average_runtime': avg_runtime, + 'min_energy': min_energy, + 'max_energy': max_energy, + 'avg_energy': avg_energy, + 'min_edp': min_edp, + 'max_edp': max_edp, + 'avg_edp': avg_edp, + 'min_edp^2': min_edp2, + 'max_edp^2': max_edp2, + 'avg_edp^2': avg_edp2, 'min_aggregate_node_hours': min_agg_node_hours, 'max_aggregate_node_hours': max_agg_node_hours, 'avg_aggregate_node_hours': avg_agg_node_hours, @@ -362,28 +363,44 @@ def print_formatted_report(engine_stats=None, scheduler_stats=None, network_stats=None ): + def print_report_section(name, data, templates): + if data: + rep_str = f"--- {name} ---" + print(rep_str) + for key, value in data.items(): + pretty_key = key.replace('_', ' ').title() + if key in templates: + pretty_value = templates[key].format(value) + elif isinstance(value, float): + pretty_value = f"{value:.2f}" + elif value is None: + pretty_value = "N/A" + else: + pretty_value = str(value) + print(f"{pretty_key}: {pretty_value}") + print(f"{'-' * len(rep_str)}\n") + print() + # Print a formatted report - if engine_stats: - rep_str = "--- Simulation Report ---" - print(f"\n{rep_str}") - for key, value in engine_stats.items(): - print(f"{key.replace('_', ' ').title()}: {value}") - print(f"{'-' * len(rep_str)}\n") - if job_stats: - rep_str = "--- Job Stat Report ---" - print(f"\n{rep_str}") - for key, value in job_stats.items(): - print(f"{key.replace('_', ' ').title()}: {value}") - print(f"{'-' * len(rep_str)}\n") - if scheduler_stats: - rep_str = "--- Scheduler Report ---" - print(f"\n{rep_str}") - for key, value in scheduler_stats.items(): - print(f"{key.replace('_', ' ').title()}: {value}") - print(f"{'-' * len(rep_str)}\n") - if network_stats: - rep_str = "--- Network Report ---" - print(f"\n{rep_str}") - for key, value in network_stats.items(): - print(f"{key.replace('_', ' ').title()}: {value}") - print(f"{'-' * len(rep_str)}\n") + print() + print_report_section("Simulation Report", engine_stats, { + 'average_power': '{:.4f} MW', + 'min_loss': '{:.4f} MW', + 'average_loss': '{:.2f} MW', + 'max_loss': '{:.2f} MW', + 'system_power_efficiency': '{:.2f}%', + 'total_energy_consumed': '{:.2f} MW-hr', + 'carbon_emissions': '{:.4f} metric tons CO2', + 'total_cost': '${:.2f}', + }) + print_report_section("Job Stat Report", job_stats, { + 'throughput': '{:.2f} jobs/hour', + 'jobs_completed_percentage': "{:.2f}%", + }) + print_report_section("Scheduler Report", scheduler_stats, { + }) + print_report_section("Network Report", network_stats, { + "avg_network_util": "{:.2f}%", + "avg_per_job_slowdown": "{:.2f}x", + "max_per_job_slowdown": "{:.2f}x", + }) diff --git a/raps/ui.py b/raps/ui.py index b4234bc..3965935 100644 --- a/raps/ui.py +++ b/raps/ui.py @@ -26,6 +26,7 @@ from raps.engine import TickData, Engine MAX_ROWS = 30 + class LayoutManager: def __init__(self, layout_type, engine: Engine, total_timesteps=0, debug=None, args_dict=None, **config): self.debug = debug diff --git a/raps/utils.py b/raps/utils.py index c3c541f..4414fd9 100644 --- a/raps/utils.py +++ b/raps/utils.py @@ -6,7 +6,7 @@ generating random numbers, summarizing and expanding ranges, determining job sta """ -from datetime import timedelta +from datetime import datetime, timedelta, timezone from enum import Enum import os import hashlib @@ -21,7 +21,9 @@ import json import argparse from pathlib import Path from typing import Annotated as A, TypeVar, Callable, TypeAlias -from pydantic import BaseModel, TypeAdapter, AfterValidator, ConfigDict, AwareDatetime, ValidationError +from pydantic import ( + BaseModel, TypeAdapter, AfterValidator, BeforeValidator, ConfigDict, AwareDatetime, ValidationError +) from pydantic_settings import BaseSettings, SettingsConfigDict, CliApp, CliSettingsSource import yaml from raps.job import Job @@ -533,6 +535,9 @@ def parse_td(td, unit: str | timedelta = 's') -> timedelta: if TypeAdapter(timedelta).validator.isinstance_python(td): return TypeAdapter(timedelta).validate_python(td) if isinstance(td, str): + if not pd.isna(pd.to_timedelta(td, errors="coerce")): + return pd.to_timedelta(td) + # Special case parsing for ds and cs units which pandas doesn't support re_match = re.fullmatch(r"(\d+)\s*(\w+)", td.strip()) if re_match and re_match[2] in TIME_UNITS: num_str, unit_str = re_match.groups() @@ -670,15 +675,23 @@ class ValueComparableEnum(Enum): return hash(self.value) +def normalize_tz(d: datetime): + """ Convert datetime to UTC. If naive, assume local time, then convert to UTC """ + if not d.tzinfo: + return d.astimezone().astimezone(timezone.utc) + else: + return d.astimezone(timezone.utc) + + ExpandedPath = A[Path, AfterValidator(lambda v: Path(v).expanduser().resolve())] """ Type that that expands ~ and environment variables in a path string """ +AutoAwareDatetime = A[datetime, AfterValidator(normalize_tz)] +""" Datetime type wrapper, makes sure timezone is set """ -SmartTimedelta = A[timedelta, AfterValidator(parse_td)] +SmartTimedelta = A[timedelta, BeforeValidator(parse_td)] """ Can be passed as ISO 8601 format like PT5M, or a string like 9s, or a number of seconds """ -T = TypeVar("T", bound=BaseModel) - class RAPSBaseModel(BaseModel): """ Base Pydantic model with shared config """ @@ -687,6 +700,9 @@ class RAPSBaseModel(BaseModel): ) +T = TypeVar("T", bound=BaseModel) + + def pydantic_add_args( parser: argparse.ArgumentParser, model_cls: type[T], model_config: SettingsConfigDict | None = None, @@ -727,7 +743,8 @@ def pydantic_add_args( **(data or {}), ) # Recreate model so we don't return the SettingsModel subclass - return model_cls.model_validate(model.model_dump()) + # use exclude_unset so that model_field_set is preserved as well + return model_cls.model_validate(model.model_dump(exclude_unset=True)) except ValidationError as err: print(err) sys.exit(1) @@ -738,8 +755,11 @@ SubParsers: TypeAlias = "argparse._SubParsersAction[argparse.ArgumentParser]" """ Alias for the result of argparse parser.add_subparsers """ -def yaml_dump(data): +def yaml_dump(data, header_comment=''): """ Dumps yaml with pretty formatting """ + if header_comment: + header_comment = '\n'.join(f'# {ln}' for ln in header_comment.splitlines()) + "\n" + class IndentDumper(yaml.Dumper): def represent_data(self, data): # Quote all strings with special characters to avoid confusion @@ -755,7 +775,7 @@ def yaml_dump(data): # Indent lists return super(IndentDumper, self).increase_indent(flow, False) - return yaml.dump( + return header_comment + yaml.dump( data, Dumper=IndentDumper, sort_keys=False, @@ -766,10 +786,15 @@ def yaml_dump(data): def read_yaml(config_file: str): """ Parses yaml file. Pass "-" to read from stdin """ - if config_file == "-": - return yaml.safe_load(sys.stdin.read()) + # Assume stdin if not terminal + if config_file == "-" or (not config_file and not sys.stdin.isatty()): + data = sys.stdin.read() elif config_file: - return yaml.safe_load(Path(config_file).read_text()) + data = Path(config_file).read_text() + else: + data = "" + if data.strip(): + return yaml.safe_load(data) else: return {} @@ -860,7 +885,8 @@ class WorkloadData(RAPSBaseModel): telemetry_end: int # TODO: It might make more sense to make start_timestep/end_timestep always unix time, then we # wouldn't need this extra start_date field. - start_date: AwareDatetime + # Don't use AutoAwareDatetime here as we want to enforce dataloaders returning timezone info + start_date: A[AwareDatetime, AfterValidator(lambda d: d.astimezone(timezone.utc))] model_config = ConfigDict( arbitrary_types_allowed=True, diff --git a/raps/weather.py b/raps/weather.py index 8a4e138..655e8f3 100644 --- a/raps/weather.py +++ b/raps/weather.py @@ -7,7 +7,7 @@ urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) class Weather: - def __init__(self, iso_string, config): + def __init__(self, start: datetime | None, config): """ Initialize the Weather class with configuration loaded from a JSON file. If zip_code and country_code are provided, the coordinates (lat, lon) @@ -20,13 +20,7 @@ class Weather: self.lon = None self.weather_cache = {} # Cache for storing weather data for the entire day self.has_coords = False - self.start = None - - try: - # Convert the ISO 8601 string to a datetime object - self.start = datetime.fromisoformat(iso_string.replace("Z", "+00:00")) - except ValueError: - print("Invalid ISO 8601 datetime string specified for --start. Using default temperature instead.") + self.start = start # Retrieve coordinates if zip_code and country_code are provided if self.zip_code and self.country_code: diff --git a/raps/workloads/__init__.py b/raps/workloads/__init__.py index 080eafc..a34261a 100644 --- a/raps/workloads/__init__.py +++ b/raps/workloads/__init__.py @@ -2,15 +2,17 @@ import math import numpy as np +import pandas as pd from raps.utils import WorkloadData, SubParsers -from raps.utils import pydantic_add_args +from raps.utils import pydantic_add_args, create_file_indexed from raps.sim_config import SingleSimConfig +from raps.telemetry import Telemetry from .basic import BasicWorkload from .constants import JOB_NAMES, ACCT_NAMES, MAX_PRIORITY from .distribution import DistributionWorkload -from .live import continuous_job_generation, run_workload +from .live import continuous_job_generation from .multitenant import MultitenantWorkload from .utils import plot_job_hist @@ -26,11 +28,12 @@ class BaseWorkload: def generate_jobs(self): jobs = getattr(self, self.args.workload)(args=self.args) timestep_end = int(math.ceil(max([job.end_time for job in jobs]))) + now = pd.Timestamp.now('UTC').floor("min").to_pydatetime() return WorkloadData( jobs=jobs, telemetry_start=0, telemetry_end=timestep_end, - start_date=self.args.start, + start_date=self.args.start if self.args.start else now, ) def compute_traces(self, @@ -73,3 +76,29 @@ def run_workload_add_parser(subparsers: SubParsers): "cli_shortcuts": SIM_SHORTCUTS, }) parser.set_defaults(impl=lambda args: run_workload(model_validate(args, {}))) + + +def run_workload(sim_config: SingleSimConfig): + args = sim_config.get_legacy_args() + args_dict = sim_config.get_legacy_args() + config = sim_config.system_configs[0].get_legacy() + + if sim_config.replay: + td = Telemetry(**args_dict) + jobs = td.load_from_files(sim_config.replay).jobs + else: + workload = Workload(args, config) + jobs = getattr(workload, sim_config.workload)(args=sim_config.get_legacy_args()) + plot_job_hist(jobs, + config=config, + dist_split=sim_config.multimodal, + gantt_nodes=sim_config.gantt_nodes) + + out = sim_config.get_output() + if out: + timestep_start = min([x.submit_time for x in jobs]) + timestep_end = math.ceil(max([x.submit_time for x in jobs]) + max([x.expected_run_time for x in jobs])) + filename = create_file_indexed('wl', path=str(out), create=False, ending="npz").split(".npz")[0] + # savez_compressed add npz itself, but create_file_indexed needs to check for .npz to find existing files + np.savez_compressed(filename, jobs=jobs, timestep_start=timestep_start, timestep_end=timestep_end, args=args) + print(filename + ".npz") # To std-out to show which npz was created. diff --git a/raps/workloads/live.py b/raps/workloads/live.py index 974b369..b4f2733 100644 --- a/raps/workloads/live.py +++ b/raps/workloads/live.py @@ -1,39 +1,6 @@ -import math -import numpy as np -from raps.sim_config import SingleSimConfig -from raps.telemetry import Telemetry -from raps.utils import create_file_indexed -from .utils import plot_job_hist - def continuous_job_generation(self, *, engine, timestep, jobs): # print("if len(engine.queue) <= engine.continuous_workload.args.maxqueue:") # print(f"if {len(engine.queue)} <= {engine.continuous_workload.args.maxqueue}:") if len(engine.queue) <= engine.continuous_workload.args.maxqueue: new_jobs = engine.continuous_workload.generate_jobs().jobs jobs.extend(new_jobs) - - -def run_workload(sim_config: SingleSimConfig): - args = sim_config.get_legacy_args() - args_dict = sim_config.get_legacy_args() - config = sim_config.system_configs[0].get_legacy() - - if sim_config.replay: - td = Telemetry(**args_dict) - jobs = td.load_from_files(sim_config.replay).jobs - else: - workload = Workload(args, config) - jobs = getattr(workload, sim_config.workload)(args=sim_config.get_legacy_args()) - plot_job_hist(jobs, - config=config, - dist_split=sim_config.multimodal, - gantt_nodes=sim_config.gantt_nodes) - - out = sim_config.get_output() - if out: - timestep_start = min([x.submit_time for x in jobs]) - timestep_end = math.ceil(max([x.submit_time for x in jobs]) + max([x.expected_run_time for x in jobs])) - filename = create_file_indexed('wl', path=str(out), create=False, ending="npz").split(".npz")[0] - # savez_compressed add npz itself, but create_file_indexed needs to check for .npz to find existing files - np.savez_compressed(filename, jobs=jobs, timestep_start=timestep_start, timestep_end=timestep_end, args=args) - print(filename + ".npz") # To std-out to show which npz was created. diff --git a/tests/conftest.py b/tests/conftest.py index 855f969..0b7d8b3 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -4,6 +4,8 @@ import shutil from glob import glob from pathlib import Path import gc +import os +from .util import PROJECT_ROOT def pytest_addoption(parser): @@ -25,6 +27,7 @@ def sim_output(): Handles cleaning up output from the sim. Can also be used even if you aren't outputing anything to run garbage collection after the sim. """ + os.chdir(PROJECT_ROOT) out = f"test-output/test-{str(uuid.uuid4())[:8]}" yield out for file in glob(f"{out}*"): diff --git a/tests/systems/conftest.py b/tests/systems/conftest.py index 2703755..31fa936 100644 --- a/tests/systems/conftest.py +++ b/tests/systems/conftest.py @@ -2,18 +2,194 @@ import pytest from tests.util import DATA_PATH +SYSTEM_CONFIGS = { + "40frontiers": { + "marks": [pytest.mark.long], # All these tests are long running as the system is large. + "main": True, + "telemetry": False, + "workload": False, + "multi-part-sim": False, + "withdata": False, + "start": None, + "files": [], + "cooling": False, + "uncertainty": True, + "time": True, + "time_delta": True, + "net": False, + }, + "adastraMI250": { + "marks": [], + "main": True, + "telemetry": True, + "workload": True, + "multi-part-sim": False, + "withdata": True, + "start": "2024-09-01T02:00:00Z", + "files": ["adastraMI250/AdastaJobsMI250_15days.parquet"], + "cooling": False, + "uncertainty": True, + "time": True, + "time_delta": True, + "net": False, + }, + "bluewaters": { + "marks": [], + "main": True, + "telemetry": True, + "workload": True, + "multi-part-sim": False, + "withdata": True, + "start": "2017-03-28T02:00:00Z", + "files": ["bluewaters"], + "cooling": False, + "uncertainty": False, + "time": True, + "time_delta": True, + "net": False, + }, + "frontier": { + "marks": [], + "main": True, + "telemetry": True, + "workload": True, + "multi-part-sim": False, + "withdata": True, + "start": "2024-01-18T03:00:00Z", + "files": ["frontier/slurm/joblive/date=2024-01-18/", "frontier/jobprofile/date=2024-01-18/"], + "cooling": True, + "uncertainty": True, + "time": True, + "time_delta": True, + "net": False, + }, + "fugaku": { + "marks": [], + "main": True, + "telemetry": True, + "workload": True, + "multi-part-sim": False, + "withdata": True, + "start": "2021-04-03T02:00:00Z", + "files": ["fugaku/21_04.parquet"], + "cooling": False, + "uncertainty": False, + "time": True, + "time_delta": True, + "net": False, + }, + "gcloudv2": { + "marks": [], + "main": True, + "telemetry": True, + "workload": True, + "multi-part-sim": False, + "withdata": True, + "start": "2011-05-02T05:00:00Z", + "files": ["gcloud/v2/google_cluster_data_2011_sample"], + "cooling": False, + "uncertainty": False, + "time": True, + "time_delta": True, + "net": False, + }, + "lassen": { + "marks": [], + "main": True, + "telemetry": False, # Takes very long! + "workload": False, + "multi-part-sim": False, + "withdata": True, + "start": "2019-08-22T00:00:00Z", + "files": ["lassen/Lassen-Supercomputer-Job-Dataset"], + "cooling": True, + "uncertainty": False, + "time": True, + "time_delta": True, + "net": True, + }, + "marconi100": { + "marks": [], + "main": True, + "telemetry": True, + "workload": True, + "multi-part-sim": False, + "withdata": True, + "start": "2020-05-06T07:30:00Z", + "files": ["marconi100/job_table.parquet"], + "cooling": True, + "uncertainty": False, + "time": True, + "time_delta": True, + "net": False, + }, + "mit_supercloud": { + "marks": [], + "main": False, + "telemetry": False, + "workload": False, + "multi-part-sim": True, + "withdata": True, + "start": "2021-05-22T00:00:00Z", + "files": ["mit_supercloud/202201"], + "cooling": False, + "uncertainty": False, + "time": False, + "time_delta": False, + "net": False, + "net-multi-sim": True, + }, + "setonix": { + "marks": [], + "main": False, + "telemetry": True, + "workload": False, + "multi-part-sim": True, + "withdata": False, + "files": [], + "start": None, + "cooling": False, + "uncertainty": False, + "time": False, + "time_delta": False, + "net": False, + }, + "summit": { + "marks": [], + "main": True, + "telemetry": False, + "workload": False, + "multi-part-sim": False, + "withdata": False, + "files": [], + "start": None, + "cooling": True, + "uncertainty": False, + "time": True, + "time_delta": True, + "net": False, + }, + "lumi": { + "marks": [], + "main": False, + "telemetry": False, + "workload": False, + "multi-part-sim": True, + "withdata": False, + "files": [], + "start": None, + "cooling": False, + "uncertainty": False, + "time": False, + "time_delta": False, + "net": False, + "net-multi-sim": False + }, +} + + @pytest.fixture(params=[ - pytest.param("40frontiers", marks=pytest.mark.long), # All these tests are long running as the system is large. - "adastraMI250", - "frontier", - "fugaku", - "gcloudv2", - "lassen", - "marconi100", - "mit_supercloud", - "setonix", - "summit", - "lumi" + pytest.param(k, marks=v.get('marks', [])) for k, v in SYSTEM_CONFIGS.items() ]) def system(request): return request.param @@ -33,183 +209,12 @@ def pytest_collection_modifyitems(config, items): # #Define tests to run here! @pytest.fixture def system_config(system): - # Defaults for systems not listed explicitly - default_config = {} # No defaults! - - configs = { - "40frontiers": { - "main": True, - "telemetry": False, - "workload": False, - "multi-part-sim": False, - "withdata": False, - "cooling": False, - "uncertainty": True, - "time": True, - "fastforward": True, - "time_delta": True, - "net": False, - }, - "adastraMI250": { - "main": True, - "telemetry": True, - "workload": True, - "multi-part-sim": False, - "withdata": True, - "cooling": False, - "uncertainty": True, - "time": True, - "fastforward": True, - "time_delta": True, - "net": False, - }, - "bluewaters": { - "main": True, - "telemetry": True, - "multi-part-sim": False, - "withdata": True, - "cooling": False, - "uncertainty": False, - "time": True, - "fastforward": True, - "time_delta": True, - "net": False, - }, - "frontier": { - "main": True, - "telemetry": True, - "workload": True, - "multi-part-sim": False, - "withdata": True, - "cooling": True, - "uncertainty": True, - "time": True, - "fastforward": True, - "time_delta": True, - "net": False, - }, - "fugaku": { - "main": True, - "telemetry": True, - "multi-part-sim": False, - "withdata": True, - "cooling": False, - "uncertainty": False, - "time": True, - "fastforward": True, - "time_delta": True, - "net": False, - }, - "gcloudv2": { - "main": True, - "telemetry": True, - "multi-part-sim": False, - "withdata": True, - "cooling": False, - "uncertainty": False, - "time": True, - "fastforward": True, - "time_delta": True, - "net": False, - }, - "lassen": { - "main": True, - "telemetry": False, # Takes very long! - "multi-part-sim": False, - "withdata": True, - "cooling": True, - "uncertainty": False, - "time": True, - "fastforward": True, - "time_delta": True, - "net": True, - }, - "marconi100": { - "main": True, - "telemetry": True, - "multi-part-sim": False, - "withdata": True, - "cooling": True, - "uncertainty": False, - "time": True, - "fastforward": True, - "time_delta": True, - "net": False, - }, - "mit_supercloud": { - "main": False, - "telemetry": False, - "multi-part-sim": True, - "withdata": True, - "cooling": False, - "uncertainty": False, - "time": False, - "fastforward": False, - "time_delta": False, - "net": False, - "net-multi-sim": True, - }, - "setonix": { - "main": False, - "telemetry": True, - "multi-part-sim": True, - "withdata": False, - "cooling": False, - "uncertainty": False, - "time": False, - "fastforward": False, - "time_delta": False, - "net": False, - }, - "summit": { - "main": True, - "telemetry": False, - "multi-part-sim": False, - "withdata": False, - "cooling": True, - "uncertainty": False, - "time": True, - "fastforward": True, - "time_delta": True, - "net": False, - }, - "lumi": { - "main": False, - "telemetry": False, - "multi-part-sim": True, - "withdata": False, - "cooling": False, - "uncertainty": False, - "time": False, - "fastforward": False, - "time_delta": False, - "net": False, - "net-multi-sim": False - }, - - - } - return configs.get(system, default_config) + return SYSTEM_CONFIGS[system] @pytest.fixture def system_files(system): - files = { - "40frontiers": [], - "adastraMI250": ["adastraMI250/AdastaJobsMI250_15days.parquet"], - "bluewaters": ["bluewaters"], - "frontier": ["frontier/slurm/joblive/date=2024-01-18/", "frontier/jobprofile/date=2024-01-18/"], - "fugaku": ["fugaku/21_04.parquet"], - "gcloudv2": ["gcloud/v2/google_cluster_data_2011_sample"], - "lassen": ["lassen/Lassen-Supercomputer-Job-Dataset"], - "marconi100": ["marconi100/job_table.parquet"], - "mit_supercloud": ["mit_supercloud/202201"], - "setonix": [], - "summit": [], - "lumi": [] - } - - file_list = [DATA_PATH / f for f in files.get(system, [])] + file_list = [DATA_PATH / f for f in SYSTEM_CONFIGS[system].get('files', [])] for file in file_list: assert file.exists(), \ f"File `{file}' does not exist. does ./data exist or is RAPS_DATA_DIR set?" diff --git a/tests/systems/test_engine.py b/tests/systems/test_engine.py deleted file mode 100644 index 0404e89..0000000 --- a/tests/systems/test_engine.py +++ /dev/null @@ -1,39 +0,0 @@ -import pytest -from raps.engine import Engine -from raps.sim_config import SingleSimConfig -from raps.stats import ( - get_engine_stats, - # get_job_stats, - # get_scheduler_stats, - # get_network_stats, -) - -pytestmark = [ - pytest.mark.system, - pytest.mark.nodata -] - - -def test_engine(system, system_config, sim_output): - if not system_config.get("main", False): - pytest.skip(f"{system} does not support basic main run.") - - sim_config = SingleSimConfig.model_validate({ - "system": system, - "time": "2m", - }) - engine, workload_data, time_delta = Engine.from_sim_config(sim_config) - jobs = workload_data.jobs - timestep_start = workload_data.telemetry_start - timestep_end = workload_data.telemetry_end - ticks = list(engine.run_simulation(jobs, timestep_start, timestep_end, time_delta)) - - assert len(ticks) == 120 - - engine_stats = get_engine_stats(engine) - # job_stats = get_job_stats(engine) - # scheduler_stats = get_scheduler_stats(engine) - # network_stats = get_network_stats(engine) - - assert engine_stats['time simulated'] == '0:02:00' - # TODO: More specific tests of values diff --git a/tests/systems/test_engine_basic.py b/tests/systems/test_engine_basic.py new file mode 100644 index 0000000..96c6253 --- /dev/null +++ b/tests/systems/test_engine_basic.py @@ -0,0 +1,20 @@ +import pytest +from ..util import run_engine + +pytestmark = [ + pytest.mark.system, + pytest.mark.nodata +] + + +def test_engine_basic(system, system_config, sim_output): + if not system_config.get("main", False): + pytest.skip(f"{system} does not support basic main run.") + + engine, stats = run_engine({ + "system": system, + "time": "2m", + }) + + assert stats['tick_count'] == 120 + assert stats['engine']['time_simulated'] == '0:02:00' diff --git a/tests/systems/test_main_fastforward_run.py b/tests/systems/test_main_fastforward_run.py index ab19b24..9fe3216 100644 --- a/tests/systems/test_main_fastforward_run.py +++ b/tests/systems/test_main_fastforward_run.py @@ -1,7 +1,7 @@ import os import subprocess import pytest -from tests.util import PROJECT_ROOT +from ..util import run_engine pytestmark = [ @@ -11,23 +11,14 @@ pytestmark = [ ] -@pytest.mark.parametrize("ff_arg", [ - "0", "1", "3600", "7200", "43200", - "0s", "1s", "3600s", "7200s", "43200s", - "0m", "1m", "60m", - "0h", "1h", "6h", -]) +@pytest.mark.parametrize("ff_arg", ["0s", "1s", "3600s", "60m"]) def test_main_fastforward_run(system, system_config, ff_arg, sim_output): - if not system_config.get("fastforward", False): - pytest.skip(f"{system} does not support basic main run.") + if not system_config.get("main", False): + pytest.skip(f"{system} does not support basic main even without data.") - os.chdir(PROJECT_ROOT) - result = subprocess.run([ - "python", "main.py", "run", - "-t 1", - "--fastforward", ff_arg, - "--system", system, - "--noui", - "-o", sim_output - ], capture_output=True, text=True, stdin=subprocess.DEVNULL) - assert result.returncode == 0, f"Failed on {system}: {result.stderr}" + engine, stats = run_engine({ + "system": system, + "fastforward": ff_arg, + "time": "10s", + }) + assert stats['engine']['time_simulated'] == '0:00:10' diff --git a/tests/systems/test_main_start_run.py b/tests/systems/test_main_start_run.py new file mode 100644 index 0000000..19cc163 --- /dev/null +++ b/tests/systems/test_main_start_run.py @@ -0,0 +1,22 @@ +import pytest +from ..util import run_engine + + +pytestmark = [ + pytest.mark.system, + pytest.mark.nodata, +] + + +@pytest.mark.parametrize("start", [ + "2025-01-01", "2024-01-04T00:00Z", "1970-01-01T00:00:00+00:00", +]) +def test_main_start_run(system, system_config, sim_output, start): + if not system_config.get("main", False): + pytest.skip(f"{system} does not support basic main even without data.") + + engine, stats = run_engine({ + "system": system, + "time": "10s", + "start": start + }) diff --git a/tests/systems/test_main_time_ff_delta_run.py b/tests/systems/test_main_time_ff_delta_run.py deleted file mode 100644 index 7424758..0000000 --- a/tests/systems/test_main_time_ff_delta_run.py +++ /dev/null @@ -1,38 +0,0 @@ -import os -import subprocess -import pytest -from tests.util import PROJECT_ROOT - - -pytestmark = [ - pytest.mark.system, - pytest.mark.nodata, - pytest.mark.time_delta -] - - -@pytest.mark.parametrize("time_arg, tdelta_arg, ff_arg", [ - ("100", "1", "103"), - ("100", "1s", "2s"), - ("100", "10s", "10s"), - ("10m", "1m", "1m"), - ("10h", "1h", "2h"), - ("10h", "3h", "1h"), - pytest.param("3d", "1d", "1d", marks=pytest.mark.long, id="1d (long)"), -], ids=["1", "1s", "10s", "1m", "1h", "3h", "1d"]) -def test_main_time_ff_delta_run(system, system_config, time_arg, tdelta_arg, - ff_arg, sim_output): - if not system_config.get("time_delta", False): - pytest.skip(f"{system} does not support time_delta run.") - - os.chdir(PROJECT_ROOT) - result = subprocess.run([ - "python", "main.py", "run", - "-t", time_arg, - "--ff", ff_arg, - "--time-delta", tdelta_arg, - "--system", system, - "--noui", - "-o", sim_output - ], capture_output=True, text=True, stdin=subprocess.DEVNULL) - assert result.returncode == 0, f"Failed on {system}: {result.stderr}" diff --git a/tests/systems/test_main_withdata_range_run.py b/tests/systems/test_main_withdata_range_run.py new file mode 100644 index 0000000..63c3e86 --- /dev/null +++ b/tests/systems/test_main_withdata_range_run.py @@ -0,0 +1,27 @@ +import pytest +from ..util import run_engine + +pytestmark = [ + pytest.mark.system, + pytest.mark.withdata, + pytest.mark.long +] + + +def test_main_withdata_range_run(system, system_config, system_files, sim_output): + if not system_config.get("main", False): + pytest.skip(f"{system} does not support basic main even without data.") + if not system_config.get("withdata", False): + pytest.skip(f"{system} does not support basic main with data.") + + engine, stats = run_engine({ + "system": system, + "start": system_config['start'], + "time": "10m", + "replay": system_files, + }) + + # Check that it at least loaded some data + assert stats['tick_count'] == 10 * 60 + assert stats['job']['jobs_total'] > 0 + assert len(stats['job']['jobs_still_running']) + stats['job']['jobs_completed'] > 0 diff --git a/tests/systems/test_main_withdata_run.py b/tests/systems/test_main_withdata_run.py index 3539db9..ed1a944 100644 --- a/tests/systems/test_main_withdata_run.py +++ b/tests/systems/test_main_withdata_run.py @@ -1,8 +1,5 @@ -import os -import subprocess import pytest -from tests.util import PROJECT_ROOT - +from ..util import run_engine pytestmark = [ pytest.mark.system, @@ -16,12 +13,14 @@ def test_main_withdata_run(system, system_config, system_files, sim_output): pytest.skip(f"{system} does not support basic main even without data.") if not system_config.get("withdata", False): pytest.skip(f"{system} does not support basic main with data.") - os.chdir(PROJECT_ROOT) - result = subprocess.run([ - "python", "main.py", "run", - "--time", "1m", - "--system", system, - "-f", ','.join(system_files), - "-o", sim_output - ], capture_output=True, text=True, stdin=subprocess.DEVNULL) - assert result.returncode == 0, f"Failed on {system}: {result.stderr}" + + engine, stats = run_engine({ + "system": system, + "time": "10m", + "replay": system_files, + }) + + # Check that it at least loaded some data + assert stats['tick_count'] == 10 * 60 + assert stats['job']['jobs_total'] > 0 + assert len(stats['job']['jobs_still_running']) + stats['job']['jobs_completed'] > 0 diff --git a/tests/systems/test_multi_part_sim_withdata_run.py b/tests/systems/test_multi_part_sim_withdata_run.py index 538726c..b35e8fe 100644 --- a/tests/systems/test_multi_part_sim_withdata_run.py +++ b/tests/systems/test_multi_part_sim_withdata_run.py @@ -1,7 +1,5 @@ -import os -import subprocess import pytest -from tests.util import PROJECT_ROOT +from tests.util import run_multi_part_engine pytestmark = [ @@ -17,12 +15,12 @@ def test_multi_part_sim_withdata_run(system, system_config, system_files, sim_ou if not system_config.get("withdata", False): pytest.skip(f"{system} does not support multi-part-sim run with data.") - os.chdir(PROJECT_ROOT) - result = subprocess.run([ - "python", "main.py", "run-parts", - "--time", "1h", - "-x", f"{system}/*", - "-f", ','.join(system_files), - "-o", sim_output, - ], capture_output=True, text=True, stdin=subprocess.DEVNULL) - assert result.returncode == 0, f"Failed on {system}: {result.stderr}" + engine, stats = run_multi_part_engine({ + "start": system_config['start'], + "time": "1h", + "partitions": [system], + "replay": system_files, + }) + + times = [s['engine']['time_simulated'] for s in stats['partitions'].values()] + assert len(set(times)) == 1 # All run the same time diff --git a/tests/unit/test_utils.py b/tests/unit/test_utils.py index edf06fe..5eb7edb 100644 --- a/tests/unit/test_utils.py +++ b/tests/unit/test_utils.py @@ -1,6 +1,6 @@ import pytest from datetime import timedelta -from raps.utils import parse_td, convert_to_time_unit, infer_time_unit, TIME_UNITS +from raps.utils import parse_td, convert_to_time_unit, infer_time_unit, TIME_UNITS, parse_time_unit @pytest.mark.parametrize("input,expected", [ @@ -9,11 +9,25 @@ from raps.utils import parse_td, convert_to_time_unit, infer_time_unit, TIME_UNI (timedelta(minutes=1), timedelta(minutes=1)), (2, timedelta(seconds=2)), ("PT2S", timedelta(seconds=2)), + ("+1 day", timedelta(days=1)), + ("2ds", timedelta(milliseconds=200)), + ("2cs", timedelta(milliseconds=20)), + ("2ms", timedelta(milliseconds=2)), ]) def test_parse_td(input, expected): assert parse_td(input) == expected +@pytest.mark.parametrize("input,expected", [ + ("s", timedelta(seconds=1)), + ("ms", timedelta(milliseconds=1)), + ("ds", timedelta(milliseconds=100)), + ("cs", timedelta(milliseconds=10)), +]) +def test_parse_time_unit(input, expected): + assert parse_time_unit(input) == expected + + def test_parse_td_error(): with pytest.raises(ValueError): parse_td("1x") diff --git a/tests/util.py b/tests/util.py index 6ee1df7..b5ba495 100644 --- a/tests/util.py +++ b/tests/util.py @@ -1,5 +1,14 @@ import os +from typing import Any from pathlib import Path +import shlex +import json +from raps.engine import Engine +from raps.multi_part_engine import MultiPartEngine +from raps.sim_config import SingleSimConfig, MultiPartSimConfig +from raps.stats import ( + get_engine_stats, get_job_stats, get_scheduler_stats, get_network_stats, +) def find_project_root(): @@ -26,3 +35,79 @@ def requires_all_markers(request, required_markers): markexpr = getattr(request.config.option, "markexpr", "") selected = set(part.strip() for part in markexpr.split("and")) return required_markers.issubset(selected) + + +def _get_cmd(config, sub_cmd): + return f"echo {shlex.quote(json.dumps(config))} | python main.py {sub_cmd} - -o none" + + +def _get_stats(engine: Engine): + return { + 'engine': get_engine_stats(engine), + 'job': get_job_stats(engine), + 'scheduler': get_scheduler_stats(engine), + 'network': get_network_stats(engine) if engine.simulate_network else None, + } + + +def run_engine(sim_config, include_ticks=False) -> tuple[Engine, dict[str, Any]]: + """ + Run a simulation to completion. Returns the completed Engine and a dict containing the engine + stats. If include_ticks is True, the dict will also include a list of all the TickDatas (this + can be very large, especially if cooling is enabled!) + """ + # Log command to rerun the test manually for debugging convenience + print(f"Command to reproduce run:\n {_get_cmd(sim_config, "run")}") + + sim_config = SingleSimConfig.model_validate(sim_config) + engine, workload_data, time_delta = Engine.from_sim_config(sim_config) + jobs = workload_data.jobs + timestep_start = workload_data.telemetry_start + timestep_end = workload_data.telemetry_end + gen = engine.run_simulation(jobs, timestep_start, timestep_end, time_delta) + + stats = { + "tick_count": 0, + "tick_datas": [] if include_ticks else None, + } + + for tick in gen: + stats['tick_count'] += 1 + if include_ticks: + stats['tick_datas'].append(tick) + + stats.update(_get_stats(engine)) + + return engine, stats + + +def run_multi_part_engine(sim_config, include_ticks=False) -> tuple[MultiPartEngine, dict[str, dict[str, Any]]]: + """ + Run a multi-part simulation to completion. Returns the completed Engine and a dict containing the engine + stats for each partition. If include_ticks is True, the dicts will also include a list of all the + TickDatas (this can be very large, especially if cooling is enabled!) + """ + # Log command to rerun the test manually for debugging convenience + print(f"Command to reproduce run:\n {_get_cmd(sim_config, "run-parts")}") + + sim_config = MultiPartSimConfig.model_validate(sim_config) + multi_engine, workload_results, timestep_start, timestep_end, time_delta = \ + MultiPartEngine.from_sim_config(sim_config) + jobs = {p: w.jobs for p, w in workload_results.items()} + gen = multi_engine.run_simulation(jobs, timestep_start, timestep_end, time_delta) + + stats = { + "tick_count": 0, + "tick_datas": [] if include_ticks else None, + "partitions": {}, + } + + for tick in gen: + stats['tick_count'] += 1 + if include_ticks: + stats['tick_datas'].append(tick) + + for partition, engine in multi_engine.engines.items(): + stats['partitions'][partition] = _get_stats(engine) + + return multi_engine, stats -- GitLab From 2baf2b1bf24ecb90c3aea273535be400605cf1dd Mon Sep 17 00:00:00 2001 From: Jesse Hines Date: Thu, 18 Sep 2025 15:32:41 -0400 Subject: [PATCH 309/388] Fix multi-part snapshots --- raps/engine.py | 2 +- raps/telemetry.py | 6 +++++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/raps/engine.py b/raps/engine.py index 9ae27b1..8d7e231 100644 --- a/raps/engine.py +++ b/raps/engine.py @@ -248,7 +248,7 @@ class Engine: if len(snap_map) > 0: if partition_short not in snap_map: raise RuntimeError(f"Snapshot '{partition_short}.npz' not in {sim_config.replay[0]}") - replay_files = snap_map[partition_short] + replay_files = [snap_map[partition_short]] else: replay_files = sim_config.replay else: diff --git a/raps/telemetry.py b/raps/telemetry.py index 6d5aa19..915fc97 100644 --- a/raps/telemetry.py +++ b/raps/telemetry.py @@ -61,6 +61,10 @@ class TelemetryArgs(RAPSBaseModel): raise ValueError("Either --live or --replay is required") return self + @property + def system_name(self): + return self.system + shortcuts = { "replay": "f", @@ -208,7 +212,7 @@ class Telemetry: for file in files: print(f"Loading {file}") new_data, args_from_file = self.load_snapshot(file) - print(f"File was generated with: --system {args_from_file.system}") + print(f"File was generated with: --system {args_from_file.system_name}") if not data: data = new_data else: -- GitLab From b627c16669873c2ed878e81d24f3e8ff68351078 Mon Sep 17 00:00:00 2001 From: "Hines, Jesse" Date: Mon, 22 Sep 2025 18:14:36 +0000 Subject: [PATCH 310/388] Simplify Engine creation --- raps/cooling.py | 4 +- raps/engine.py | 221 ++++++++++++++++++-------------------- raps/envs/raps_env.py | 8 +- raps/multi_part_engine.py | 46 +++----- raps/run_sim.py | 24 ++--- raps/sim_config.py | 3 +- raps/ui.py | 12 +-- tests/util.py | 13 +-- 8 files changed, 145 insertions(+), 186 deletions(-) diff --git a/raps/cooling.py b/raps/cooling.py index 066c12c..68a8a30 100644 --- a/raps/cooling.py +++ b/raps/cooling.py @@ -155,8 +155,8 @@ class ThermoFluidsModel: # If replay mode is on and weather data is available if self.weather and self.weather.has_coords: # Convert total seconds to timedelta object - delta = timedelta(seconds=engine.current_timestep) - target_datetime = self.weather.start + delta + delta = timedelta(seconds=engine.current_timestep - engine.timestep_start) + target_datetime = engine.start + delta # Get temperature from weather data temperature = self.weather.get_temperature(target_datetime) or self.config['WET_BULB_TEMP'] diff --git a/raps/engine.py b/raps/engine.py index 8d7e231..ec02adb 100644 --- a/raps/engine.py +++ b/raps/engine.py @@ -15,6 +15,7 @@ from raps.policy import PolicyType from raps.utils import ( summarize_ranges, get_current_utilization, + WorkloadData, ) from raps.resmgr import ResourceManager from raps.schedulers import load_scheduler @@ -39,7 +40,6 @@ from raps.account import Accounts from raps.downtime import Downtime from raps.weather import Weather from raps.sim_config import SimConfig -from raps.system_config import SystemConfig from bisect import bisect_right @@ -125,94 +125,7 @@ def keyboard_listener(state): class Engine: """Job scheduling simulation engine.""" - def __init__(self, *, - power_manager: PowerManager, - flops_manager: FLOPSManager, - telemetry: Telemetry, - cooling_model: ThermoFluidsModel | None = None, - jobs=None, - total_initial_jobs=0, - # Workload class to generate from for continuous generation - continuous_workload: Workload | None = None, - accounts=None, - sim_config: SimConfig, - system_config: SystemConfig, - ): - self.config = system_config.get_legacy() - self.down_nodes = summarize_ranges(self.config['DOWN_NODES']) - self.resource_manager = ResourceManager( - total_nodes=self.config['TOTAL_NODES'], - down_nodes=self.config['DOWN_NODES'], - config=self.config - ) - # Initialize running and queue, etc. - self.running = [] - self.queue = [] - self.accounts = accounts - self.telemetry = telemetry - self.job_history_dict = [] - self.jobs_completed = 0 - self.jobs_killed = 0 - self.total_initial_jobs = total_initial_jobs - self.current_timestep = 0 - self.cooling_model = cooling_model - self.sys_power = 0 - self.power_manager = power_manager - self.flops_manager = flops_manager - self.debug = sim_config.debug - self.continuous_workload = continuous_workload - self.replay = sim_config.replay - self.downscale = sim_config.downscale # Factor to downscale the 1s timesteps (power of 10) - self.simulate_network = sim_config.simulate_network - self.sys_util_history = [] - self.scheduler_queue_history = [] - self.scheduler_running_history = [] - self.avg_net_tx = [] - self.avg_net_rx = [] - self.net_util_history = [] - self.avg_slowdown_history = [] - self.max_slowdown_history = [] - self.node_occupancy_history = [] - self.downtime = Downtime(first_downtime=sim_config.downtime_first_int, - downtime_interval=sim_config.downtime_interval_int, - downtime_length=sim_config.downtime_length_int, - debug=sim_config.debug, - ) - - # Set scheduler type - either based on config or command-line args - defaults to 'default' - if self.config['multitenant']: - scheduler_type = 'multitenant' - else: - scheduler_type = sim_config.scheduler - - policy_type = sim_config.policy - backfill_type = sim_config.backfill - - self.scheduler = load_scheduler(scheduler_type)( - config=self.config, - policy=policy_type, - bfpolicy=backfill_type, - resource_manager=self.resource_manager, - jobs=jobs - ) - if sim_config.live: - assert self.scheduler.policy != PolicyType.REPLAY, \ - "Cannot replay from a live system. Choose a scheduling policy!" - print(f"Using scheduler: {str(self.scheduler.__class__).split('.')[2]}" - f", with policy {self.scheduler.policy} " - f"and backfill {self.scheduler.bfpolicy}") - - if self.simulate_network: - available_nodes = self.resource_manager.available_nodes - self.network_model = NetworkModel( - available_nodes=available_nodes, - config=self.config, - ) - else: - self.network_model = None - - @staticmethod - def from_sim_config(sim_config: SimConfig, partition: str | None = None): + def __init__(self, sim_config: SimConfig, partition: str | None = None): if partition: system_config = sim_config.get_system_config_by_name(partition) elif len(sim_config.system_configs) > 1: @@ -234,12 +147,12 @@ class Engine: np.random.seed(sim_config.seed + 1) if sim_config.live and not sim_config.replay: - td = Telemetry(**sim_config_dict) - wd = td.load_from_live_system() + telemetry = Telemetry(**sim_config_dict) + wd = telemetry.load_from_live_system() elif sim_config.replay: # TODO: this will have issues if running separate systems or custom systems partition_short = partition.split("/")[-1] if partition else None - td = Telemetry( + telemetry = Telemetry( **sim_config_dict, partition=partition, ) @@ -254,15 +167,18 @@ class Engine: else: replay_files = sim_config.replay - wd = td.load_from_files(replay_files) + wd = telemetry.load_from_files(replay_files) else: # Synthetic jobs wl = Workload(sim_config_args, system_config_dict) wd = wl.generate_jobs() - td = Telemetry(**sim_config_dict) + telemetry = Telemetry(**sim_config_dict) jobs = wd.jobs if len(jobs) == 0: print(f"Warning no jobs found for {partition or 'system'}") + if partition and len(sim_config.system_configs) > 1: + for job in jobs: + job.partition = partition if sim_config.start: start = sim_config.start @@ -319,19 +235,94 @@ class Engine: else: accounts = job_accounts - engine = Engine( - power_manager=power_manager, - flops_manager=flops_manager, - cooling_model=cooling_model, - continuous_workload=continuous_workload, - jobs=jobs, - accounts=accounts, - telemetry=td, - sim_config=sim_config, - system_config=system_config, + self.sim_config = sim_config + self.system_config = system_config + self.config = system_config.get_legacy() + + self.start = start + self.timestep_start = wd.telemetry_start + self.timestep_end = wd.telemetry_end + self.time_delta = time_delta + + self.down_nodes = summarize_ranges(self.config['DOWN_NODES']) + self.resource_manager = ResourceManager( + total_nodes=self.config['TOTAL_NODES'], + down_nodes=self.config['DOWN_NODES'], + config=self.config ) + # Initialize running and queue, etc. + self.running = [] + self.queue = [] + self.accounts = accounts + self.telemetry = telemetry + self.job_history_dict = [] + self.jobs_completed = 0 + self.jobs_killed = 0 + self.jobs = jobs + self.total_initial_jobs = len(jobs) + self.current_timestep = 0 + self.cooling_model = cooling_model + self.sys_power = 0 + self.power_manager = power_manager + self.flops_manager = flops_manager + self.debug = sim_config.debug + self.continuous_workload = continuous_workload + self.replay = sim_config.replay + self.downscale = sim_config.downscale # Factor to downscale the 1s timesteps (power of 10) + self.simulate_network = sim_config.simulate_network + self.sys_util_history = [] + self.scheduler_queue_history = [] + self.scheduler_running_history = [] + self.avg_net_tx = [] + self.avg_net_rx = [] + self.net_util_history = [] + self.avg_slowdown_history = [] + self.max_slowdown_history = [] + self.node_occupancy_history = [] + self.downtime = Downtime(first_downtime=sim_config.downtime_first_int, + downtime_interval=sim_config.downtime_interval_int, + downtime_length=sim_config.downtime_length_int, + debug=sim_config.debug, + ) + + # Set scheduler type - either based on config or command-line args - defaults to 'default' + if self.config['multitenant']: + scheduler_type = 'multitenant' + else: + scheduler_type = sim_config.scheduler + + policy_type = sim_config.policy + backfill_type = sim_config.backfill + + self.scheduler = load_scheduler(scheduler_type)( + config=self.config, + policy=policy_type, + bfpolicy=backfill_type, + resource_manager=self.resource_manager, + jobs=jobs + ) + if sim_config.live: + assert self.scheduler.policy != PolicyType.REPLAY, \ + "Cannot replay from a live system. Choose a scheduling policy!" + print(f"Using scheduler: {str(self.scheduler.__class__).split('.')[2]}" + f", with policy {self.scheduler.policy} " + f"and backfill {self.scheduler.bfpolicy}") + + if self.simulate_network: + available_nodes = self.resource_manager.available_nodes + self.network_model = NetworkModel( + available_nodes=available_nodes, + config=self.config, + ) + else: + self.network_model = None - return engine, wd, time_delta + def get_workload_data(self) -> WorkloadData: + return WorkloadData( + jobs=self.jobs[:], + telemetry_start=self.timestep_start, telemetry_end=self.timestep_end, + start_date=self.start, + ) def add_running_jobs_to_queue(self, jobs_to_submit: List): """ @@ -713,7 +704,7 @@ class Engine: self.scheduler.policy = target_policy self.scheduler.bfpolicy = target_bfpolicy - def run_simulation(self, jobs, timestep_start, timestep_end, time_delta=1, autoshutdown=False): + def run_simulation(self, autoshutdown=False): """Generator that yields after each simulation tick.""" if self.scheduler.policy == PolicyType.REPLAY: @@ -722,24 +713,26 @@ class Engine: replay = False if self.debug: - print(f"[DEBUG] run_simulation: Initial jobs count: {len(jobs)}") - if jobs: + print(f"[DEBUG] run_simulation: Initial jobs count: {len(self.jobs)}") + if self.jobs: print("[DEBUG] run_simulation: First job submit_time: " - f"{jobs[0].submit_time}, start_time: {jobs[0].start_time}") + f"{self.jobs[0].submit_time}, start_time: {self.jobs[0].start_time}") # Set times and place jobs that are currently running, onto the system. - self.prepare_system_state(all_jobs=jobs, timestep_start=timestep_start, timestep_end=timestep_end) + self.prepare_system_state(all_jobs=self.jobs, + timestep_start=self.timestep_start, timestep_end=self.timestep_end, + ) # Process jobs in batches for better performance of timestep loop - all_jobs = jobs.copy() + all_jobs = self.jobs.copy() submit_times = [j.submit_time for j in all_jobs] cursor = 0 jobs = [] # Batch Jobs into 6h windows based on submit_time or twice the time_delta if larger - batch_window = max(60 * 60 * 6, 2 * time_delta) # at least 6h + batch_window = max(60 * 60 * 6, 2 * self.time_delta) # at least 6h - sim_state = SimulationState(time_delta) + sim_state = SimulationState(self.time_delta) # listener_thread = threading.Thread(target=keyboard_listener, args=(sim_state,), daemon=True) # listener_thread.start() @@ -751,7 +744,7 @@ class Engine: current_time_delta = sim_state.get_time_delta() - if (self.current_timestep % batch_window == 0) or (self.current_timestep == timestep_start): + if (self.current_timestep % batch_window == 0) or (self.current_timestep == self.timestep_start): # Add jobs that are within the batching window and remove them from all jobs # jobs += [job for job in all_jobs if job.submit_time <= self.current_timestep + batch_window] # all_jobs[:] = [job for job in all_jobs if job.submit_time > self.current_timestep + batch_window] diff --git a/raps/envs/raps_env.py b/raps/envs/raps_env.py index ec9fcad..e4ca8eb 100644 --- a/raps/envs/raps_env.py +++ b/raps/envs/raps_env.py @@ -59,12 +59,10 @@ class RAPSEnv(gym.Env): self.action_space = spaces.Discrete(max_jobs) def _create_engine(self): - engine, workload_data, time_delta = Engine.from_sim_config(self.sim_config) + engine = Engine(self.sim_config) engine.scheduler.env = self - self.jobs = workload_data.jobs - timestep_start = workload_data.telemetry_start - timestep_end = workload_data.telemetry_end - self.generator = engine.run_simulation(self.jobs, timestep_start, timestep_end, time_delta) + self.jobs = engine.jobs + self.generator = engine.run_simulation() return engine def reset(self, **kwargs): diff --git a/raps/multi_part_engine.py b/raps/multi_part_engine.py index 57e3e27..6332aa5 100644 --- a/raps/multi_part_engine.py +++ b/raps/multi_part_engine.py @@ -1,57 +1,37 @@ from collections.abc import Iterable from raps.engine import Engine, TickData from raps.sim_config import MultiPartSimConfig -from raps.utils import WorkloadData class MultiPartEngine: - def __init__(self, engines: dict[str, Engine], jobs: dict[str, list]): - self.partition_names = sorted(engines.keys()) - self.engines = engines - self.jobs = jobs - - @staticmethod - def from_sim_config(sim_config: MultiPartSimConfig): + def __init__(self, sim_config: MultiPartSimConfig): if sim_config.replay: root_systems = set(s.system_name.split("/")[0] for s in sim_config.system_configs) # TODO should consider how to pass separate replay values for separate systems if len(root_systems) > 1: raise ValueError("Replay for multi-system runs is not supported") - workloads_by_partition: dict[str, WorkloadData] = {} engines: dict[str, Engine] = {} - time_delta = 0 for partition in sim_config.system_configs: - name = partition.system_name - engine, workload_data, time_delta = Engine.from_sim_config( - sim_config, partition=name, - ) - for job in workload_data.jobs: - job.partition = name - workloads_by_partition[name] = workload_data - engines[name] = engine - timestep_start = min(w.telemetry_start for w in workloads_by_partition.values()) - timestep_end = min(w.telemetry_end for w in workloads_by_partition.values()) - - total_initial_jobs = sum(len(j.jobs) for j in workloads_by_partition.values()) + engine = Engine(sim_config, partition=partition.system_name) + engines[partition.system_name] = engine + + total_initial_jobs = sum(len(e.jobs) for e in engines.values()) for engine in engines.values(): engine.total_initial_jobs = total_initial_jobs - multi_engine = MultiPartEngine( - engines=engines, - jobs={p: w.jobs for p, w in workloads_by_partition.items()}, - ) - - return multi_engine, workloads_by_partition, timestep_start, timestep_end, time_delta + self.partition_names = sorted(engines.keys()) + self.engines = engines + first_engine = list(engines.values())[0] + self.start = first_engine.start + self.timestep_start = first_engine.timestep_start + self.timestep_end = first_engine.timestep_end - def run_simulation(self, jobs: dict, timestep_start, timestep_end, time_delta=1 - ) -> Iterable[dict[str, TickData | None]]: + def run_simulation(self) -> Iterable[dict[str, TickData | None]]: generators = [] for part in self.partition_names: - generators.append(self.engines[part].run_simulation( - jobs[part], timestep_start, timestep_end, time_delta, - )) + generators.append(self.engines[part].run_simulation()) for tick_datas in zip(*generators, strict=True): yield dict(zip(self.partition_names, tick_datas)) diff --git a/raps/run_sim.py b/raps/run_sim.py index 74ea87a..51bf6f5 100644 --- a/raps/run_sim.py +++ b/raps/run_sim.py @@ -49,37 +49,34 @@ def run_sim(sim_config: SingleSimConfig): print("Use run-parts to run multi-partition simulations") sys.exit(1) - engine, workload_data, time_delta = Engine.from_sim_config(sim_config) + engine = Engine(sim_config) out = sim_config.get_output() if out: out.mkdir(parents=True) engine.telemetry.save_snapshot( dest=str(out / 'snapshot.npz'), - result=workload_data, + result=engine.get_workload_data(), args=sim_config, ) (out / 'sim_config.yaml').write_text(sim_config.dump_yaml()) - jobs = workload_data.jobs - timestep_start, timestep_end = workload_data.telemetry_start, workload_data.telemetry_end + jobs = engine.jobs + timestep_start, timestep_end = engine.timestep_start, engine.timestep_end total_timesteps = timestep_end - timestep_start downscale = sim_config.downscale downscale_str = ""if downscale == 1 else f"/{downscale}" print(f"Simulating {len(jobs)} jobs for {total_timesteps}{downscale_str}" f" seconds from {timestep_start} to {timestep_end}.") - print(f"Simulation time delta: {time_delta}{downscale_str} s," + print(f"Simulation time delta: {engine.time_delta}{downscale_str} s," f"Telemetry trace quanta: {jobs[0].trace_quanta}{downscale_str} s.") layout_manager = LayoutManager( sim_config.layout, engine=engine, debug=sim_config.debug, total_timesteps=total_timesteps, args_dict=sim_config.get_legacy_args_dict(), **sim_config.system_configs[0].get_legacy(), ) - layout_manager.run( - jobs, - timestep_start=timestep_start, timestep_end=timestep_end, time_delta=time_delta, - ) + layout_manager.run() engine_stats = get_engine_stats(engine) job_stats = get_job_stats(engine) @@ -223,8 +220,7 @@ def run_parts_sim(sim_config: MultiPartSimConfig): UserWarning ) - multi_engine, workload_results, timestep_start, timestep_end, time_delta = \ - MultiPartEngine.from_sim_config(sim_config) + multi_engine = MultiPartEngine(sim_config) out = sim_config.get_output() if out: @@ -232,15 +228,13 @@ def run_parts_sim(sim_config: MultiPartSimConfig): for part, engine in multi_engine.engines.items(): engine.telemetry.save_snapshot( dest=str(out / part.split('/')[-1]), - result=workload_results[part], + result=engine.get_workload_data(), args=sim_config, ) (out / 'sim_config.yaml').write_text(sim_config.dump_yaml()) - jobs = {p: w.jobs for p, w in workload_results.items()} - ui_update_freq = sim_config.system_configs[0].scheduler.ui_update_freq - gen = multi_engine.run_simulation(jobs, timestep_start, timestep_end, time_delta) + gen = multi_engine.run_simulation() for tick_datas in gen: sys_power = 0 diff --git a/raps/sim_config.py b/raps/sim_config.py index 867dac0..c27a2ab 100644 --- a/raps/sim_config.py +++ b/raps/sim_config.py @@ -4,6 +4,7 @@ from pathlib import Path from functools import cached_property from datetime import timedelta from typing import Literal, Annotated as A +from annotated_types import Len import importlib from raps.schedulers.default import PolicyType, BackfillType from raps.utils import ( @@ -435,7 +436,7 @@ class SingleSimConfig(SimConfig, abc.ABC): class MultiPartSimConfig(SimConfig): - partitions: list[SystemConfig | str] + partitions: A[list[SystemConfig | str], Len(min_length=1)] """ List of multiple systems/partitions to run. Can be names of preconfigured systems, or paths to custom SystemConfig yaml files. diff --git a/raps/ui.py b/raps/ui.py index 3965935..6330bc9 100644 --- a/raps/ui.py +++ b/raps/ui.py @@ -556,7 +556,7 @@ class LayoutManager: data.system_util, uncertainties=uncertainties, ) - def run(self, jobs, timestep_start, timestep_end, time_delta): + def run(self): """ Runs the UI, blocking until the simulation is complete """ if not self.debug and not self.noui: context = Live(self.layout, auto_refresh=True, refresh_per_second=3) @@ -565,13 +565,11 @@ class LayoutManager: try: with context: # last_i = 0 - for i, data in enumerate(self.engine.run_simulation(jobs, - timestep_start, - timestep_end, - time_delta, - autoshutdown=True)): + for i, data in enumerate(self.engine.run_simulation(autoshutdown=True)): if data and (not self.debug and not self.noui): - self.update_full_layout(data, time_delta, timestep_start=timestep_start) + self.update_full_layout(data, + self.engine.time_delta, + timestep_start=self.engine.timestep_start) # self.update_progress_bar(i-last_i) # last_i=i if not self.debug and not self.noui: diff --git a/tests/util.py b/tests/util.py index b5ba495..4bbf8f8 100644 --- a/tests/util.py +++ b/tests/util.py @@ -60,11 +60,8 @@ def run_engine(sim_config, include_ticks=False) -> tuple[Engine, dict[str, Any]] print(f"Command to reproduce run:\n {_get_cmd(sim_config, "run")}") sim_config = SingleSimConfig.model_validate(sim_config) - engine, workload_data, time_delta = Engine.from_sim_config(sim_config) - jobs = workload_data.jobs - timestep_start = workload_data.telemetry_start - timestep_end = workload_data.telemetry_end - gen = engine.run_simulation(jobs, timestep_start, timestep_end, time_delta) + engine = Engine(sim_config) + gen = engine.run_simulation() stats = { "tick_count": 0, @@ -91,10 +88,8 @@ def run_multi_part_engine(sim_config, include_ticks=False) -> tuple[MultiPartEng print(f"Command to reproduce run:\n {_get_cmd(sim_config, "run-parts")}") sim_config = MultiPartSimConfig.model_validate(sim_config) - multi_engine, workload_results, timestep_start, timestep_end, time_delta = \ - MultiPartEngine.from_sim_config(sim_config) - jobs = {p: w.jobs for p, w in workload_results.items()} - gen = multi_engine.run_simulation(jobs, timestep_start, timestep_end, time_delta) + multi_engine = MultiPartEngine(sim_config) + gen = multi_engine.run_simulation() stats = { "tick_count": 0, -- GitLab From 136c252e0a0999ee9d85019bcad68042a3449252 Mon Sep 17 00:00:00 2001 From: Jesse Hines Date: Mon, 15 Sep 2025 15:20:59 -0400 Subject: [PATCH 311/388] More consistent usage of entrypoint script in docs --- README.md | 7 ++++--- experiments/mit-replay-24hrs.yaml | 2 +- experiments/mit-synthetic.yaml | 2 +- raps/dataloaders/adastraMI250.py | 6 +++--- raps/dataloaders/bluewaters.py | 2 +- raps/dataloaders/frontier.py | 2 +- raps/dataloaders/fugaku.py | 6 +++--- raps/dataloaders/lassen.py | 10 +++++----- raps/dataloaders/marconi100.py | 8 ++++---- raps/schedulers/fastsim.py | 2 +- scripts/marconi100-day51.sh | 8 ++++---- tests/util.py | 2 +- 12 files changed, 29 insertions(+), 28 deletions(-) diff --git a/README.md b/README.md index 90ab831..9c708af 100644 --- a/README.md +++ b/README.md @@ -75,7 +75,7 @@ For MIT Supercloud raps run-parts -x mit_supercloud -w multitenant # Reinforcement learning test case - python main.py train-rl --system mit_supercloud/part-cpu -f /opt/data/mit_supercloud/202201 + raps train-rl --system mit_supercloud/part-cpu -f /opt/data/mit_supercloud/202201 For Lumi @@ -135,11 +135,12 @@ This will dump a .npz file with a randomized name, e.g. ac23db.npz. Let's rename There are three ways to modify replaying of telemetry data: 1. `--arrival`. Changing the arrival time distribution - replay cases will default to `--arrival prescribed`, where the jobs will be submitted exactly as they were submitted on the physical machine. This can be changed to `--arrival poisson` to change when the jobs arrive, which is especially useful in cases where there may be gaps in time, e.g., when the system goes down for several days, or the system is is underutilized. -python main.py -f $DPATH/slurm/joblive/$DATEDIR,$DPATH/jobprofile/$DATEDIR --arrival poisson + + raps run -f $DPATH/slurm/joblive/$DATEDIR,$DPATH/jobprofile/$DATEDIR --arrival poisson 2. `--policy`. Changing the way the jobs are scheduled. The `--policy` flag will be set by default to `replay` in cases where a telemetry file is provided, in which case the jobs will be scheduled according to the start times provided. Changing the `--policy` to `fcfs` or `backfill` will use the internal scheduler, e.g.: - python main.py -f $DPATH/slurm/joblive/$DATEDIR,$DPATH/jobprofile/$DATEDIR --policy fcfs --backfill firstfit -t 12h + raps run -f $DPATH/slurm/joblive/$DATEDIR,$DPATH/jobprofile/$DATEDIR --policy fcfs --backfill firstfit -t 12h 3. `--scale`. Changing the scale of each job in the telemetry data. The `--scale` flag will specify the maximum number of nodes for each job (generally set this to the max number of nodes of the smallest partition), and randomly select the number of nodes for each job from one to max nodes. This flag is useful when replaying telemetry from a larger system onto a smaller system. diff --git a/experiments/mit-replay-24hrs.yaml b/experiments/mit-replay-24hrs.yaml index 1357886..6990069 100644 --- a/experiments/mit-replay-24hrs.yaml +++ b/experiments/mit-replay-24hrs.yaml @@ -1,4 +1,4 @@ -# python main.py run-multi-part experiments/mit-replay-24hrs.yaml +# raps run-multi-part experiments/mit-replay-24hrs.yaml partitions: ["mit_supercloud/part-cpu", "mit_supercloud/part-gpu"] replay: - /opt/data/mit_supercloud/202201 diff --git a/experiments/mit-synthetic.yaml b/experiments/mit-synthetic.yaml index 5f68cd1..6a24946 100644 --- a/experiments/mit-synthetic.yaml +++ b/experiments/mit-synthetic.yaml @@ -1,3 +1,3 @@ -# python main.py run-multi-part experiments/mit-synthetic.yaml +# raps run-multi-part experiments/mit-synthetic.yaml partitions: ["mit_supercloud/part-cpu", "mit_supercloud/part-gpu"] workload: multitenant diff --git a/raps/dataloaders/adastraMI250.py b/raps/dataloaders/adastraMI250.py index 8cadbfb..ed60807 100644 --- a/raps/dataloaders/adastraMI250.py +++ b/raps/dataloaders/adastraMI250.py @@ -6,13 +6,13 @@ # to simulate the dataset - python main.py -f /path/to/AdastaJobsMI250_15days.parquet --system adastraMI250 + raps run -f /path/to/AdastaJobsMI250_15days.parquet --system adastraMI250 # to replay with different scheduling policy - python main.py -f /path/to/AdastaJobsMI250_15days.parquet --system adastraMI250 --policy priority --backfill easy + raps run -f /path/to/AdastaJobsMI250_15days.parquet --system adastraMI250 --policy priority --backfill easy # to run a specific time range - python main.py -f /path/to/AdastaJobsMI250_15days.parquet --system adastraMI250 \ + raps run -f /path/to/AdastaJobsMI250_15days.parquet --system adastraMI250 \ --start 2024-11-01T00:00:00Z --end 2024-11-02T00:00:00Z # to analyze dataset diff --git a/raps/dataloaders/bluewaters.py b/raps/dataloaders/bluewaters.py index 728e2bb..7b1ee1f 100644 --- a/raps/dataloaders/bluewaters.py +++ b/raps/dataloaders/bluewaters.py @@ -3,7 +3,7 @@ Blue Waters dataloader Example test case: - python main.py -f /opt/data/bluewaters --start 20170328 --system bluewaters -net + raps run -f /opt/data/bluewaters --start 20170328 --system bluewaters -net To download the necessary datasets: diff --git a/raps/dataloaders/frontier.py b/raps/dataloaders/frontier.py index a6ac45e..23efd2f 100644 --- a/raps/dataloaders/frontier.py +++ b/raps/dataloaders/frontier.py @@ -4,7 +4,7 @@ # To simulate DATEDIR="date=2024-01-18" DPATH=/path/to/data - python main.py -f $DPATH/slurm/joblive/$DATEDIR,$DPATH/jobprofile/$DATEDIR + raps run -f $DPATH/slurm/joblive/$DATEDIR,$DPATH/jobprofile/$DATEDIR # To analyze the data python -m raps.telemetry -f $DPATH/slurm/joblive/$DATEDIR,$DPATH/jobprofile/$DATEDIR diff --git a/raps/dataloaders/fugaku.py b/raps/dataloaders/fugaku.py index 4ccc885..5a531fa 100644 --- a/raps/dataloaders/fugaku.py +++ b/raps/dataloaders/fugaku.py @@ -10,9 +10,9 @@ The '--arrival poisson' will compute submit times from Poisson distribution, instead of using the submit times given in F-Data. - python main.py --system fugaku -f /path/to/21_04.parquet - python main.py --system fugaku -f /path/to/21_04.parquet --validate - python main.py --system fugaku -f /path/to/21_04.parquet --policy priority --backfill easy + raps run --system fugaku -f /path/to/21_04.parquet + raps run --system fugaku -f /path/to/21_04.parquet --validate + raps run --system fugaku -f /path/to/21_04.parquet --policy priority --backfill easy """ import pandas as pd from tqdm import tqdm diff --git a/raps/dataloaders/lassen.py b/raps/dataloaders/lassen.py index 8bded75..db86513 100644 --- a/raps/dataloaders/lassen.py +++ b/raps/dataloaders/lassen.py @@ -14,19 +14,19 @@ Usage Instructions: git lfs pull # to analyze dataset and plot histograms - python -m raps.telemetry -f /path/to/LAST/Lassen-Supercomputer-Job-Dataset --system lassen --plot + raps telemetry -f /path/to/LAST/Lassen-Supercomputer-Job-Dataset --system lassen --plot # to simulate the dataset as submitted - python main.py -f /path/to/LAST/Lassen-Supercomputer-Job-Dataset --system lassen + raps run -f /path/to/LAST/Lassen-Supercomputer-Job-Dataset --system lassen # to modify the submit times of the telemetry according to Poisson distribution - python main.py -f /path/to/LAST/Lassen-Supercomputer-Job-Dataset --system lassen --arrival poisson + raps run -f /path/to/LAST/Lassen-Supercomputer-Job-Dataset --system lassen --arrival poisson # to fast-forward 365 days and replay for 1 day. This region day has 2250 jobs with 1650 jobs executed. - python main.py -f /path/to/LAST/Lassen-Supercomputer-Job-Dataset --system lassen --start '2019-08-22T00:00:00+00:00' -t 1d + raps run -f /path/to/LAST/Lassen-Supercomputer-Job-Dataset --system lassen --start '2019-08-22T00:00:00+00:00' -t 1d # For the network replay this command gives suiteable snapshots: - python main.py -f /path/to/LAST/Lassen-Supercomputer-Job-Dataset --system lassen --policy fcfs --backfill firstfit -t 12h --arrival poisson # noqa + raps run -f /path/to/LAST/Lassen-Supercomputer-Job-Dataset --system lassen --policy fcfs --backfill firstfit -t 12h --arrival poisson # noqa """ import math diff --git a/raps/dataloaders/marconi100.py b/raps/dataloaders/marconi100.py index 6ff310b..4b3c5c6 100644 --- a/raps/dataloaders/marconi100.py +++ b/raps/dataloaders/marconi100.py @@ -9,14 +9,14 @@ Download `job_table.parquet` from https://zenodo.org/records/10127767 # to simulate the dataset - python main.py -f /path/to/job_table.parquet --system marconi100 + raps run -f /path/to/job_table.parquet --system marconi100 # to replay using differnt schedulers - python main.py -f /path/to/job_table.parquet --system marconi100 --policy fcfs --backfill easy - python main.py -f /path/to/job_table.parquet --system marconi100 --policy priority --backfill firstfit + raps run -f /path/to/job_table.parquet --system marconi100 --policy fcfs --backfill easy + raps run -f /path/to/job_table.parquet --system marconi100 --policy priority --backfill firstfit # to fast-forward 60 days and replay for 1 day - python main.py -f /path/to/job_table.parquet --system marconi100 --start 2020-07-05T00:00:00+00:00 -t 1d + raps run -f /path/to/job_table.parquet --system marconi100 --start 2020-07-05T00:00:00+00:00 -t 1d # to analyze dataset python -m raps.telemetry -f /path/to/job_table.parquet --system marconi100 -v diff --git a/raps/schedulers/fastsim.py b/raps/schedulers/fastsim.py index 855dcbd..e930a1c 100644 --- a/raps/schedulers/fastsim.py +++ b/raps/schedulers/fastsim.py @@ -10,7 +10,7 @@ from raps.sim_config import args from raps.system_config import get_system_config # Run with this command: -# python main.py --system kestrel -f ../data/fastsim_jobs_output.parquet --scheduler fastsim --policy priority --start 2024-09-01T00:00 --end 2024-09-15T00:00 +# raps run --system kestrel -f ../data/fastsim_jobs_output.parquet --scheduler fastsim --policy priority --start 2024-09-01T00:00 --end 2024-09-15T00:00 class Scheduler(): """ diff --git a/scripts/marconi100-day51.sh b/scripts/marconi100-day51.sh index 01da9a2..77cbe45 100644 --- a/scripts/marconi100-day51.sh +++ b/scripts/marconi100-day51.sh @@ -1,4 +1,4 @@ -python main.py -f ~/data/marconi100/job_table.parquet --system marconi100 --ff 4381000 -t 61000 -o --policy replay -python main.py -f ~/data/marconi100/job_table.parquet --system marconi100 --ff 4381000 -t 61000 -o --policy fcfs -python main.py -f ~/data/marconi100/job_table.parquet --system marconi100 --ff 4381000 -t 61000 -o --policy fcfs --backfill easy -python main.py -f ~/data/marconi100/job_table.parquet --system marconi100 --ff 4381000 -t 61000 -o --policy priority --backfill firstfit +./main.py run -f ~/data/marconi100/job_table.parquet --system marconi100 --ff 4381000 -t 61000 -o --policy replay +./main.py run -f ~/data/marconi100/job_table.parquet --system marconi100 --ff 4381000 -t 61000 -o --policy fcfs +./main.py run -f ~/data/marconi100/job_table.parquet --system marconi100 --ff 4381000 -t 61000 -o --policy fcfs --backfill easy +./main.py run -f ~/data/marconi100/job_table.parquet --system marconi100 --ff 4381000 -t 61000 -o --policy priority --backfill firstfit diff --git a/tests/util.py b/tests/util.py index 4bbf8f8..46736b3 100644 --- a/tests/util.py +++ b/tests/util.py @@ -13,7 +13,7 @@ from raps.stats import ( def find_project_root(): path = Path(__file__).resolve() - while not (path / "main.py").exists(): + while not (path / "pyproject.toml").exists(): if path.parent == path: raise RuntimeError("Could not find project root.") path = path.parent -- GitLab From 66797338bfd51f50604f4710830241c05a2388fc Mon Sep 17 00:00:00 2001 From: Jesse Hines Date: Mon, 15 Sep 2025 15:31:02 -0400 Subject: [PATCH 312/388] Add dependencies --- pyproject.toml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pyproject.toml b/pyproject.toml index c009d2a..732315e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -32,6 +32,8 @@ dependencies = [ "pydantic-settings>=2.10.1", "stable-baselines3==2.7.0", "gym==0.26.2", + "dill==0.4.0", + "argcomplete==3.6.2", "pre-commit" ] -- GitLab From 1090bf977b7212d409f671c8eddbbf1b04fa6c9e Mon Sep 17 00:00:00 2001 From: Jesse Hines Date: Mon, 15 Sep 2025 17:35:33 -0400 Subject: [PATCH 313/388] Add shell completion --- .gitignore | 1 + main.py | 74 ++++++++++++++++++++++++++++++++++++++++++++++++------ 2 files changed, 68 insertions(+), 7 deletions(-) diff --git a/.gitignore b/.gitignore index bf49923..5f7f2b5 100644 --- a/.gitignore +++ b/.gitignore @@ -6,3 +6,4 @@ venv *.prof simulation_results/ models/*.fmu +.shell-completion-cache diff --git a/main.py b/main.py index b2eae93..ab464b0 100755 --- a/main.py +++ b/main.py @@ -1,18 +1,77 @@ #!/usr/bin/env python3 +# PYTHON_ARGCOMPLETE_OK """ ExaDigiT Resource Allocator & Power Simulator (RAPS) """ import argparse -from raps.helpers import check_python_version -from raps.run_sim import run_sim_add_parser, run_parts_sim_add_parser, show_add_parser -from raps.workloads import run_workload_add_parser -from raps.telemetry import run_telemetry_add_parser -from raps.train_rl import train_rl_add_parser +from pathlib import Path +import os +import textwrap +import copy +import gzip +import dill +import argcomplete -check_python_version() +# Implement shell completion using argcomplete +# Importing all of raps' dependencies like pandas etc can be rather slow, often taking 1-2 seconds. So for snappy shell +# completion we need avoid imports on the shell completion path. We could do this by shuffling the code around to +# create the parser without importing any heavy-weight libraries. But that would be a pain to maintain and track that +# pandas or scipy aren't accidentally imported transitively. Pandas can also be convenient to use in validating SimConfig +# etc, which is needed to build the argparser. So instead, we cache the generated argparser object so that shell +# completion can run without importing the rest of raps. +PARSER_CACHE = Path(__file__).parent / '.shell-completion-cache' + + +def shell_completion_add_parser(subparsers): + parser = subparsers.add_parser("shell-completion", description=textwrap.dedent(""" + Register shell completion for RAPS. + """).strip(), formatter_class=argparse.RawDescriptionHelpFormatter) + + # Run the command from argcomplete, this edits ~/.bash_completion to register argcomplete + def impl(args): + os.system("activate-global-python-argcomplete") + + parser.set_defaults(impl=impl) + + +def shell_complete(): + try: + parser = dill.loads(gzip.decompress(PARSER_CACHE.read_bytes())) + except Exception: + PARSER_CACHE.unlink(missing_ok=True) # delete cache if corrupted somehow + parser = argparse.ArgumentParser() + # Use a dummy parser so that autocomplete still handles sys.exit tab complete if there's no + # cache. Cache will be created on first run of `main.py` + + argcomplete.autocomplete(parser, always_complete_options=False) + + +def cache_parser(parser: argparse.ArgumentParser): + parser = copy.deepcopy(parser) + subparsers = next(a for a in parser._actions if isinstance(a, argparse._SubParsersAction)) + # Don't need to pickle the impl functions + for subparser in subparsers.choices.values(): + subparser.set_defaults(impl=lambda args: None) + + pickled = gzip.compress(dill.dumps(parser), compresslevel=4, mtime=0) + if not PARSER_CACHE.exists() or PARSER_CACHE.read_bytes() != pickled: + try: # Ignore if there's some kind of write or permission error + PARSER_CACHE.write_bytes(pickled) + except Exception: + pass def main(cli_args: list[str] | None = None): + shell_complete() # will output shell completion and sys.exit during tab complete + + from raps.helpers import check_python_version + check_python_version() + + from raps.run_sim import run_sim_add_parser, run_parts_sim_add_parser, show_add_parser + from raps.workloads import run_workload_add_parser + from raps.telemetry import run_telemetry_add_parser + from raps.train_rl import train_rl_add_parser + parser = argparse.ArgumentParser( description=""" ExaDigiT Resource Allocator & Power Simulator (RAPS) @@ -27,8 +86,9 @@ def main(cli_args: list[str] | None = None): run_workload_add_parser(subparsers) run_telemetry_add_parser(subparsers) train_rl_add_parser(subparsers) + shell_completion_add_parser(subparsers) - # TODO: move other misc scripts into here + cache_parser(parser) args = parser.parse_args(cli_args) assert args.impl, "subparsers should add an impl function to args" -- GitLab From 23402a74c7aab34ebc028b50dd801d35e3f0c67c Mon Sep 17 00:00:00 2001 From: "Maiterth, Matthias" Date: Tue, 23 Sep 2025 21:27:06 +0000 Subject: [PATCH 314/388] Update to TickData and condition handling in the simulation loop and tick. --- raps/engine.py | 123 +++++++++++++++++++++++++++++++------------------ 1 file changed, 77 insertions(+), 46 deletions(-) diff --git a/raps/engine.py b/raps/engine.py index ec02adb..67cd999 100644 --- a/raps/engine.py +++ b/raps/engine.py @@ -43,6 +43,22 @@ from raps.sim_config import SimConfig from bisect import bisect_right +@dataclasses.dataclass +class TickReturn: + """ Represents the state output from the simulation each tick """ + power_df: Optional[pd.DataFrame] + p_flops: Optional[float] + g_flops_w: Optional[float] + system_util: float + fmu_inputs: Optional[dict] + fmu_outputs: Optional[dict] + avg_net_tx: Optional[float] + avg_net_rx: Optional[float] + avg_net_util: Optional[float] + slowdown_per_job: float + node_occupancy: dict[int, int] + + @dataclasses.dataclass class TickData: """ Represents the state output from the simulation each tick """ @@ -55,16 +71,16 @@ class TickData: power_df: Optional[pd.DataFrame] p_flops: Optional[float] g_flops_w: Optional[float] - system_util: float + system_util: Optional[float] fmu_inputs: Optional[dict] fmu_outputs: Optional[dict] num_active_nodes: int num_free_nodes: int - avg_net_tx: float - avg_net_rx: float - avg_net_util: float - slowdown_per_job: float - node_occupancy: dict[int, int] + avg_net_tx: Optional[float] + avg_net_rx: Optional[float] + avg_net_util: Optional[float] + slowdown_per_job: Optional[float] + node_occupancy: Optional[dict[int, int]] time_delta: int @@ -130,8 +146,8 @@ class Engine: system_config = sim_config.get_system_config_by_name(partition) elif len(sim_config.system_configs) > 1: raise ValueError( - "Engine can only run single-partition simulations. Use MultiPartEngine for " + - "multi-partition simulations, or pass partition to select the partition to run." + "Engine can only run single-partition simulations. Use MultiPartEngine for " + + "multi-partition simulations, or pass partition to select the partition to run." ) else: system_config = sim_config.system_configs[0] @@ -185,8 +201,8 @@ class Engine: diff = start - wd.start_date if diff.total_seconds() < 0: raise Exception( - f"{start.isoformat()} is before data range in workload. " + - f"Workload data begins at {wd.start_date.isoformat()}" + f"{start.isoformat()} is before data range in workload. " + + f"Workload data begins at {wd.start_date.isoformat()}" ) wd.telemetry_start += int(diff.total_seconds()) wd.start_date = start @@ -366,16 +382,21 @@ class Engine: return False def prepare_timestep(self, *, replay: bool = True, jobs): + # 0 track need to reschedule # 1 identify completed jobs # 2 Check continuous job generation # 3 Simulate node failure # Defunct feature! # 4 Simulate downtime # 5 Update active and free nodes + need_reschedule = False + # 1 Identify Completed Jobs completed_jobs = [job for job in self.running if job.end_time is not None and job.end_time <= self.current_timestep] + need_reschedule = need_reschedule or (completed_jobs != []) + # Update Completed Jobs, their account and and Free resources. for job in completed_jobs: self.power_manager.set_idle(job.scheduled_nodes) @@ -393,6 +414,8 @@ class Engine: killed_jobs = [job for job in self.running if job.end_time is not None and job.start_time + job.time_limit <= self.current_timestep] + need_reschedule = need_reschedule or (killed_jobs != []) + for job in killed_jobs: self.power_manager.set_idle(job.scheduled_nodes) job.current_state = JobState.TIMEOUT @@ -419,9 +442,12 @@ class Engine: else: newly_downed_nodes = [] - need_reschedule = False + need_reschedule = need_reschedule or (newly_downed_nodes != []) + # 4 Simulate downtime - need_reschedule = self.downtime.check_and_trigger(timestep=self.current_timestep, engine=self) + downtime = self.downtime.check_and_trigger(timestep=self.current_timestep, engine=self) + + need_reschedule = need_reschedule or downtime # 5 Update active/free nodes based on core/GPU utilization if self.config['multitenant']: @@ -447,10 +473,12 @@ class Engine: self.num_active_nodes = self.config['TOTAL_NODES'] \ - len(self.resource_manager.available_nodes) \ - len(self.resource_manager.down_nodes) + if self.down_nodes != self.resource_manager.down_nodes: + need_reschedule = need_reschedule or True self.down_nodes = self.resource_manager.down_nodes # TODO This should only be managed in the resource manager! - return completed_jobs, killed_jobs, newly_downed_nodes, need_reschedule + return completed_jobs, killed_jobs, need_reschedule def complete_timestep(self, *, actively_considered_jobs: List, @@ -522,8 +550,8 @@ class Engine: if job.current_state != JobState.RUNNING: raise ValueError( - f"Job {job.id} is in running list, " + - f"but state is not RUNNING: job.state == {job.current_state}" + f"Job {job.id} is in running list, " + + f"but state is not RUNNING: job.state == {job.current_state}" ) else: # if job.state == JobState.RUNNING: # Error checks @@ -531,13 +559,13 @@ class Engine: raise Exception(f"Job exceded time limit! " f"{job.running_time} > {job.time_limit}" f"\n{job}" - f"\nCurrent timestep:{self.current_timestep-self.timestep_start} (rel)" + f"\nCurrent timestep:{self.current_timestep - self.timestep_start} (rel)" ) if replay and job.running_time > job.expected_run_time: raise Exception(f"Job should have ended in replay! " f" {job.running_time} > {job.expected_run_time}" f"\n{job}" - f"\nCurrent timestep:{self.current_timestep-self.timestep_start} (rel)" + f"\nCurrent timestep:{self.current_timestep - self.timestep_start} (rel)" ) # Aggregate scheduled nodes @@ -627,7 +655,7 @@ class Engine: net_rx_list=net_rx_list, slowdown_factors=slowdown_factors ) - slowdown_per_job = sum(slowdown_factors)/len(slowdown_factors) if len(slowdown_factors) != 0 else 0 + slowdown_per_job = sum(slowdown_factors) / len(slowdown_factors) if len(slowdown_factors) != 0 else 0 self.record_network_stats(avg_tx=avg_tx, avg_rx=avg_rx, avg_net=avg_net) @@ -646,29 +674,19 @@ class Engine: self.node_occupancy_history.append(node_occupancy) - tick_data = TickData( - current_timestep=self.current_timestep, - completed=None, - killed=None, - running=self.running, - queue=self.queue, - down_nodes=self.down_nodes, + return TickReturn( power_df=power_df, p_flops=pflops, g_flops_w=gflops_per_watt, system_util=system_util, fmu_inputs=cooling_inputs, fmu_outputs=cooling_outputs, - num_active_nodes=self.num_active_nodes, - num_free_nodes=self.num_free_nodes, avg_net_tx=avg_tx, avg_net_rx=avg_rx, avg_net_util=avg_net, slowdown_per_job=slowdown_per_job, node_occupancy=node_occupancy, - time_delta=time_delta ) - return tick_data def prepare_system_state(self, *, all_jobs: List, timestep_start, timestep_end): # Set engine timesteps @@ -755,18 +773,14 @@ class Engine: cursor = r # 1. Prepare Timestep: - completed_jobs, killed_jobs, newly_downed_nodes, need_reschedule = \ - self.prepare_timestep(jobs=jobs) + completed_jobs, killed_jobs, need_reschedule = self.prepare_timestep(jobs=jobs) # 2. Identify eligible jobs and add them to the queue. has_new_additions = self.add_eligible_jobs_to_queue(jobs) + need_reschedule = need_reschedule or has_new_additions # 3. Schedule jobs that are now in the queue. - if completed_jobs != [] \ - or killed_jobs != [] \ - or newly_downed_nodes != [] \ - or has_new_additions \ - or need_reschedule: + if need_reschedule: self.scheduler.schedule(self.queue, self.running, self.current_timestep, accounts=self.accounts, @@ -776,16 +790,34 @@ class Engine: print(".", end="", flush=True) # 4. Run tick only at specified time_delta - if 0 == (self.current_timestep % current_time_delta) \ - and ((current_time_delta == 1 - and self.current_timestep % self.config['POWER_UPDATE_FREQ'] == 0) - or (current_time_delta != 1 or self.downscale != 1) - ): - tick_data = self.tick(time_delta=current_time_delta, replay=replay) - tick_data.completed = completed_jobs - tick_data.killed = completed_jobs + if 0 == (self.current_timestep % current_time_delta): + tick_return = self.tick(time_delta=current_time_delta, replay=replay) else: - tick_data = None + pass + + # Yield TickData here! + yield TickData( + current_timestep=self.current_timestep, + completed=completed_jobs, + killed=killed_jobs, + running=self.running, + queue=self.queue, + down_nodes=self.down_nodes, + power_df=tick_return.power_df, + p_flops=tick_return.p_flops, + g_flops_w=tick_return.g_flops_w, + system_util=tick_return.system_util, + fmu_inputs=tick_return.fmu_inputs, + fmu_outputs=tick_return.fmu_outputs, + num_active_nodes=self.num_active_nodes, + num_free_nodes=self.num_free_nodes, + avg_net_rx=tick_return.avg_net_rx, + avg_net_tx=tick_return.avg_net_tx, + avg_net_util=tick_return.avg_net_util, + slowdown_per_job=tick_return.slowdown_per_job, + node_occupancy=tick_return.node_occupancy, + time_delta=self.time_delta + ) # 5. Complete the timestep simulation_done = self.complete_timestep(actively_considered_jobs=jobs, @@ -795,7 +827,6 @@ class Engine: cursor=cursor) if simulation_done: break - yield tick_data def get_job_history_dict(self): return self.job_history_dict -- GitLab From e670db3c83265c87b8adbcce4dcaffbfcfc819f7 Mon Sep 17 00:00:00 2001 From: "Hines, Jesse" Date: Tue, 30 Sep 2025 19:29:54 +0000 Subject: [PATCH 315/388] Misc changes for the simulation server --- .gitignore | 2 +- Makefile | 8 +++ config/adastraMI250.yaml | 2 +- config/frontier.yaml | 2 +- config/lassen.yaml | 2 +- config/marconi100.yaml | 2 +- config/summit.yaml | 2 +- main.py | 6 +- raps/__init__.py | 16 +++++ raps/constants.py | 1 - raps/raps_config.py | 4 +- raps/run_sim.py | 8 +-- raps/sim_config.py | 59 +++++++++++----- raps/stats.py | 41 ++++++++--- raps/system_config.py | 41 ++++++----- raps/telemetry.py | 4 +- raps/train_rl.py | 4 +- raps/utils.py | 81 ++++++++++++++++++---- tests/systems/test_main_fastforward_run.py | 2 - tests/util.py | 17 +---- 20 files changed, 213 insertions(+), 91 deletions(-) diff --git a/.gitignore b/.gitignore index 5f7f2b5..ed10fab 100644 --- a/.gitignore +++ b/.gitignore @@ -5,5 +5,5 @@ venv *.npz *.prof simulation_results/ -models/*.fmu +models/fmu-models .shell-completion-cache diff --git a/Makefile b/Makefile index a2f4211..d66f02c 100644 --- a/Makefile +++ b/Makefile @@ -1,4 +1,6 @@ #Makefile +SHELL := /bin/bash + .PHONY: pip run docker_build docker_run all: pip @@ -19,3 +21,9 @@ docker_build: docker_run: docker run --platform linux/amd64 -it $(IMAGE_NAME) +fetch-fmu-models: + if [ ! -d ./models/fmu-models ]; then \ + git clone git@code.ornl.gov:exadigit/fmu-models.git ./models/fmu-models; \ + else \ + git -C ./models/fmu-models pull; \ + fi diff --git a/config/adastraMI250.yaml b/config/adastraMI250.yaml index c7b95b8..88f68e9 100644 --- a/config/adastraMI250.yaml +++ b/config/adastraMI250.yaml @@ -90,7 +90,7 @@ cooling: wet_bulb_temp: 290.0 zip_code: '37831' country_code: US - fmu_path: "models/Simulator_olcf5_base.fmu" + fmu_path: "../models/fmu-models/Frontier/Simulator_olcf5_base.fmu" fmu_column_mapping: T_sec_r_C: "Rack Return Temperature (°C)" T_sec_s_C: "Rack Supply Temperature (°C)" diff --git a/config/frontier.yaml b/config/frontier.yaml index 3102f31..84891c1 100644 --- a/config/frontier.yaml +++ b/config/frontier.yaml @@ -63,7 +63,7 @@ cooling: wet_bulb_temp: 290.0 zip_code: '37831' country_code: US - fmu_path: "models/Simulator_olcf5_base.fmu" + fmu_path: "../models/fmu-models/Frontier/Simulator_olcf5_base.fmu" fmu_column_mapping: T_sec_r_C: "Rack Return Temperature (°C)" T_sec_s_C: "Rack Supply Temperature (°C)" diff --git a/config/lassen.yaml b/config/lassen.yaml index 640c55e..594479d 100644 --- a/config/lassen.yaml +++ b/config/lassen.yaml @@ -56,7 +56,7 @@ cooling: wet_bulb_temp: 290.0 zip_code: '94550' country_code: US - fmu_path: "models/POWER9CSM/fmus/lassen.fmu" + fmu_path: "../models/POWER9CSM/fmus/lassen.fmu" fmu_column_mapping: T_sec_r_C: "Rack Return Temperature (°C)" T_sec_s_C: "Rack Supply Temperature (°C)" diff --git a/config/marconi100.yaml b/config/marconi100.yaml index 797153e..0e66a7e 100644 --- a/config/marconi100.yaml +++ b/config/marconi100.yaml @@ -52,7 +52,7 @@ cooling: wet_bulb_temp: 290.0 zip_code: '30170' country_code: IT - fmu_path: "models/POWER9CSM/fmus/marconi100.fmu" + fmu_path: "../models/POWER9CSM/fmus/marconi100.fmu" fmu_column_mapping: T_sec_r_C: "Rack Return Temperature (°C)" T_sec_s_C: "Rack Supply Temperature (°C)" diff --git a/config/summit.yaml b/config/summit.yaml index 8dc6fe3..7b2b5fe 100644 --- a/config/summit.yaml +++ b/config/summit.yaml @@ -52,7 +52,7 @@ cooling: wet_bulb_temp: 290.0 zip_code: '37831' country_code: US - fmu_path: "models/POWER9CSM/fmus/summit.fmu" + fmu_path: "../models/POWER9CSM/fmus/summit.fmu" fmu_column_mapping: T_sec_r_C: "Rack Return Temperature (°C)" T_sec_s_C: "Rack Supply Temperature (°C)" diff --git a/main.py b/main.py index ab464b0..18ecd9a 100755 --- a/main.py +++ b/main.py @@ -16,9 +16,9 @@ import argcomplete # Importing all of raps' dependencies like pandas etc can be rather slow, often taking 1-2 seconds. So for snappy shell # completion we need avoid imports on the shell completion path. We could do this by shuffling the code around to # create the parser without importing any heavy-weight libraries. But that would be a pain to maintain and track that -# pandas or scipy aren't accidentally imported transitively. Pandas can also be convenient to use in validating SimConfig -# etc, which is needed to build the argparser. So instead, we cache the generated argparser object so that shell -# completion can run without importing the rest of raps. +# pandas or scipy aren't accidentally imported transitively. Pandas can also be convenient to use in validating +# SimConfig etc, which is needed to build the argparser. So instead, we cache the generated argparser object so that +# shell completion can run without importing the rest of raps. PARSER_CACHE = Path(__file__).parent / '.shell-completion-cache' diff --git a/raps/__init__.py b/raps/__init__.py index e69de29..a7f3523 100644 --- a/raps/__init__.py +++ b/raps/__init__.py @@ -0,0 +1,16 @@ +from .sim_config import SimConfig, SingleSimConfig, MultiPartSimConfig +from .system_config import ( + SystemConfig, SystemCoolingConfig, SystemNetworkConfig, SystemPowerConfig, SystemSchedulerConfig, + SystemSystemConfig, SystemUqConfig, +) +from raps.schedulers.default import PolicyType, BackfillType +from .engine import Engine +from .multi_part_engine import MultiPartEngine + +__all__ = [ + "SimConfig", "SingleSimConfig", "MultiPartSimConfig", + "SystemConfig", "SystemCoolingConfig", "SystemNetworkConfig", "SystemPowerConfig", "SystemSchedulerConfig", + "SystemSystemConfig", "SystemUqConfig", + "PolicyType", "BackfillType", + "Engine", "MultiPartEngine", +] diff --git a/raps/constants.py b/raps/constants.py index 85b5e23..53711e1 100644 --- a/raps/constants.py +++ b/raps/constants.py @@ -2,7 +2,6 @@ RAPS Constants """ from pathlib import Path -from datetime import datetime ELLIPSES = '\u2026' OUTPUT_PATH = Path('simulation_results') diff --git a/raps/raps_config.py b/raps/raps_config.py index 6eddca8..d1e1385 100644 --- a/raps/raps_config.py +++ b/raps/raps_config.py @@ -1,5 +1,5 @@ from pathlib import Path -from raps.utils import ExpandedPath +from raps.utils import ResolvedPath from pydantic_settings import BaseSettings, SettingsConfigDict, YamlConfigSettingsSource ROOT_DIR = Path(__file__).parent.parent @@ -13,7 +13,7 @@ class RapsConfig(BaseSettings): # We'll be using SimConfig in the simulation server and those settings aren't applicable there, # so it makes sense to keep SimConfig scoped to the logical operation of the sim. - system_config_dir: ExpandedPath = ROOT_DIR / 'config' + system_config_dir: ResolvedPath = ROOT_DIR / 'config' """ Directory containing system configuration files """ model_config = SettingsConfigDict( diff --git a/raps/run_sim.py b/raps/run_sim.py index 51bf6f5..aa2d9d9 100644 --- a/raps/run_sim.py +++ b/raps/run_sim.py @@ -11,7 +11,7 @@ from raps.ui import LayoutManager from raps.plotting import Plotter from raps.engine import Engine from raps.multi_part_engine import MultiPartEngine -from raps.utils import write_dict_to_file, pydantic_add_args, SubParsers, read_yaml +from raps.utils import write_dict_to_file, pydantic_add_args, SubParsers, read_yaml_parsed from raps.stats import ( get_engine_stats, get_job_stats, @@ -38,7 +38,7 @@ def run_sim_add_parser(subparsers: SubParsers): "cli_shortcuts": SIM_SHORTCUTS, }) parser.set_defaults( - impl=lambda args: run_sim(model_validate(args, read_yaml(args.config_file))) + impl=lambda args: run_sim(model_validate(args, read_yaml_parsed(SingleSimConfig, args.config_file))) ) @@ -209,7 +209,7 @@ def run_parts_sim_add_parser(subparsers: SubParsers): "cli_shortcuts": SIM_SHORTCUTS, }) parser.set_defaults( - impl=lambda args: run_parts_sim(model_validate(args, read_yaml(args.config_file))) + impl=lambda args: run_parts_sim(model_validate(args, read_yaml_parsed(MultiPartSimConfig, args.config_file))) ) @@ -293,7 +293,7 @@ def show_add_parser(subparsers: SubParsers): }) def impl(args): - sim_config = model_validate(args, read_yaml(args.config_file)) + sim_config = model_validate(args, read_yaml_parsed(SingleSimConfig, args.config_file)) show(sim_config, show_defaults=args.show_defaults) parser.set_defaults(impl=impl) diff --git a/raps/sim_config.py b/raps/sim_config.py index c27a2ab..2f9bc44 100644 --- a/raps/sim_config.py +++ b/raps/sim_config.py @@ -1,6 +1,7 @@ import argparse import abc from pathlib import Path +import pandas as pd from functools import cached_property from datetime import timedelta from typing import Literal, Annotated as A @@ -8,10 +9,12 @@ from annotated_types import Len import importlib from raps.schedulers.default import PolicyType, BackfillType from raps.utils import ( - parse_time_unit, convert_to_time_unit, infer_time_unit, ExpandedPath, create_casename, + parse_time_unit, convert_to_time_unit, infer_time_unit, ResolvedPath, create_casename, RAPSBaseModel, AutoAwareDatetime, SmartTimedelta, yaml_dump, ) -from raps.system_config import SystemConfig, get_partition_configs, get_system_config +from raps.system_config import ( + SystemConfig, get_partition_configs, get_system_config, list_systems, resolve_system_reference, +) from pydantic import model_validator, Field Distribution = Literal['uniform', 'weibull', 'normal'] @@ -79,7 +82,7 @@ class SimConfig(RAPSBaseModel, abc.ABC): seed: int | None = None """ Set RNG seed for deterministic simulation """ - output: ExpandedPath | Literal['none'] | None = None + output: ResolvedPath | Literal['none'] | None = None """ Where to output power, cooling, and loss models for later analysis. If omitted it will output to raps-output- by default. @@ -112,7 +115,7 @@ class SimConfig(RAPSBaseModel, abc.ABC): imtype: Literal["png", "svg", "jpg", "pdf", "eps"] = "png" """ Plot image type """ - replay: list[ExpandedPath] | None = None + replay: list[ResolvedPath] | None = None """ Either: path/to/joblive path/to/jobprofile OR filename.npz """ encrypt: bool = False @@ -214,7 +217,7 @@ class SimConfig(RAPSBaseModel, abc.ABC): # Accounts accounts: bool = False - accounts_json: ExpandedPath | None = None + accounts_json: ResolvedPath | None = None """ Path to accounts JSON file from previous run """ # Downtime @@ -283,6 +286,11 @@ class SimConfig(RAPSBaseModel, abc.ABC): if self.start and self.fastforward: raise ValueError("start and fastforward are mutually exclusive") + if self.start: + self.start = pd.Timestamp(self.start).floor(self.time_unit).to_pydatetime() + if self.end: + self.end = pd.Timestamp(self.end).floor(self.time_unit).to_pydatetime() + if self.end: if not self.start: raise ValueError("end requires start to be set") @@ -419,20 +427,32 @@ class SimConfig(RAPSBaseModel, abc.ABC): class SingleSimConfig(SimConfig, abc.ABC): - system: SystemConfig | str = "frontier" - """ - Name of the system to simulate, e.g "frontier". Can also be a path to a yaml file containing - the SystemConfig. You can also make modificiations to the SystemConfig on the CLI using - `--system.base`, e.g. `--system.base frontier --system.cooling.fmu-path path/to/my.fmu` - """ + # Dynamic help string + system: A[SystemConfig | str, Field(description=f""" + Name of the system to simulate or a path to a yaml file containing the SystemConfig. + + You can also make modifications to the SystemConfig on the CLI using `--system.base`, e.g + `--system.base frontier --system.cooling.fmu-path path/to/my.fmu`. + + Built-in systems: {', '.join(list_systems())} + """)] = "frontier" + + @model_validator(mode="after") + def _validate_system(self, info): + self.system = resolve_system_reference(self.system, info) + try: + self._system_configs = [get_system_config(self.system)] + except FileNotFoundError as e: + raise ValueError(str(e)) + return self @property def system_name(self) -> str: return self.system_configs[0].system_name - @cached_property + @property def system_configs(self) -> list[SystemConfig]: - return [get_system_config(self.system)] + return self._system_configs class MultiPartSimConfig(SimConfig): @@ -442,6 +462,15 @@ class MultiPartSimConfig(SimConfig): to custom SystemConfig yaml files. """ + @model_validator(mode="after") + def _validate_partitions(self, info): + self.partitions = [resolve_system_reference(p, info) for p in self.partitions] + try: + self._multi_partition_system_config = get_partition_configs(self.partitions) + except FileNotFoundError as e: + raise ValueError(str(e)) + return self + @property def system_name(self) -> str: return self._multi_partition_system_config.system_name @@ -450,10 +479,6 @@ class MultiPartSimConfig(SimConfig): def system_configs(self) -> list[SystemConfig]: return self._multi_partition_system_config.partitions - @cached_property - def _multi_partition_system_config(self): - return get_partition_configs(self.partitions) - SIM_SHORTCUTS = { "partitions": "x", diff --git a/raps/stats.py b/raps/stats.py index a420151..924a696 100644 --- a/raps/stats.py +++ b/raps/stats.py @@ -13,8 +13,11 @@ from .utils import sum_values, min_value, max_value, convert_seconds_to_hhmmss from .engine import Engine -def get_engine_stats(engine: Engine): - """ Return engine statistics """ +def get_engine_stats(engine: Engine, *, fast = False): + """ + Return engine statistics + Setting `fast = False` excludes some stats that are more expensive to calculate. + """ timesteps = engine.current_timestep - engine.timestep_start num_samples = len(engine.power_manager.history) if engine.power_manager else 0 time_simulated = convert_seconds_to_hhmmss(timesteps / engine.downscale) @@ -51,7 +54,7 @@ def get_engine_stats(engine: Engine): else: stats['jobs_completed_percentage'] = 0 - if engine.node_occupancy_history: + if not fast and engine.node_occupancy_history: # Calculate average concurrent jobs per node (average density across all nodes and timesteps) total_jobs_running_timesteps = 0 max_concurrent_jobs_per_node = 0 @@ -72,12 +75,12 @@ def get_engine_stats(engine: Engine): sum_jobs_per_active_node += sum(active_nodes_in_timestep) / len(active_nodes_in_timestep) count_active_timesteps_for_avg_active += 1 - # Average jobs per *active* node (user's desired "1" type) - avg_jobs_per_active_node = (sum_jobs_per_active_node / count_active_timesteps_for_avg_active) \ - if count_active_timesteps_for_avg_active > 0 else 0 + # Average jobs per *active* node (user's desired "1" type) + avg_jobs_per_active_node = (sum_jobs_per_active_node / count_active_timesteps_for_avg_active) \ + if count_active_timesteps_for_avg_active > 0 else 0 - stats['avg_concurrent_jobs_per_active_node'] = avg_jobs_per_active_node - stats['max_concurrent_jobs_per_node'] = max_concurrent_jobs_per_node + stats['avg_concurrent_jobs_per_active_node'] = avg_jobs_per_active_node + stats['max_concurrent_jobs_per_node'] = max_concurrent_jobs_per_node else: stats['avg_concurrent_jobs_per_node'] = None stats['max_concurrent_jobs_per_node'] = None @@ -358,6 +361,15 @@ def get_job_stats(engine: Engine): return job_stats +def get_stats(engine: Engine, *, fast = False): + return { + 'engine': get_engine_stats(engine, fast = fast), + 'job': get_job_stats(engine), + 'scheduler': get_scheduler_stats(engine), + 'network': get_network_stats(engine) if engine.simulate_network else {}, + } + + def print_formatted_report(engine_stats=None, job_stats=None, scheduler_stats=None, @@ -404,3 +416,16 @@ def print_formatted_report(engine_stats=None, "avg_per_job_slowdown": "{:.2f}x", "max_per_job_slowdown": "{:.2f}x", }) + + +def get_gauge_limits(engine: Engine): + """For setting max values in dashboard gauges""" + peak_flops = engine.flops_manager.get_rpeak() + peak_power = engine.power_manager.get_peak_power() + gflops_per_watt_max = peak_flops / 1E9 / peak_power + + return { + 'peak_flops': peak_flops, + 'peak_power': peak_power, + 'g_flops_w_peak': gflops_per_watt_max + } diff --git a/raps/system_config.py b/raps/system_config.py index bd405be..5253e0b 100644 --- a/raps/system_config.py +++ b/raps/system_config.py @@ -7,9 +7,11 @@ from functools import cached_property import yaml from pydantic import ( model_validator, field_validator, model_serializer, SerializationInfo, - SerializerFunctionWrapHandler, + SerializerFunctionWrapHandler, ValidationInfo, +) +from raps.utils import ( + RAPSBaseModel, deep_merge, deep_subtract_dicts, is_yaml_file, ResolvedPath, validate_resolved_path, ) -from raps.utils import RAPSBaseModel, deep_merge, deep_subtract_dicts from raps.raps_config import raps_config # Define Pydantic models for the config to handle parsing and validation @@ -130,7 +132,7 @@ class SystemCoolingConfig(RAPSBaseModel): wet_bulb_temp: float zip_code: str | None = None country_code: str | None = None - fmu_path: str + fmu_path: ResolvedPath fmu_column_mapping: dict[str, str] w_htwps_key: str w_ctwps_key: str @@ -179,10 +181,12 @@ class SystemConfig(RAPSBaseModel): network: SystemNetworkConfig | None = None @model_validator(mode="before") - def _load_base(cls, data): + def _load_base(cls, data, info: ValidationInfo): if isinstance(data, dict) and data.get("base"): - base = get_system_config(data['base']) - data = deep_merge(base.model_dump(mode='json'), data) + data['base'] = resolve_system_reference(data['base'], info) + base_model = get_system_config(data['base']) + base_data = base_model.model_dump(mode='json', exclude_unset=True) + data = deep_merge(base_data, data) return data @model_serializer(mode='wrap') @@ -263,13 +267,12 @@ def get_system_config(system: str | SystemConfig) -> SystemConfig: """ if isinstance(system, SystemConfig): # Just pass system through if its already parsed return system - - if system in list_systems(): + elif is_yaml_file(system): + config_path = Path(system) + system_name = config_path.stem + else: config_path = raps_config.system_config_dir / f"{system}.yaml" system_name = system - else: - config_path = Path(system).resolve() - system_name = config_path.stem if not config_path.is_file(): raise FileNotFoundError(f'"{system}" not found. Valid systems are: {list_systems()}') @@ -277,10 +280,8 @@ def get_system_config(system: str | SystemConfig) -> SystemConfig: "system_name": system_name, # You can override system_name in the yaml as well **yaml.safe_load(config_path.read_text()), } - base = str(config.get('base', '')) - if base.endswith(".yaml"): - config['base'] = str(config_path.parent / base) # path relative to yaml - return SystemConfig.model_validate(config) + # Pass context so paths in the SystemConfig can be resolved relative to the yaml file + return SystemConfig.model_validate(config, context={'base_path': config_path.parent}) def get_partition_configs(partitions: list[str | SystemConfig]) -> MultiPartitionSystemConfig: @@ -304,7 +305,7 @@ def get_partition_configs(partitions: list[str | SystemConfig]) -> MultiPartitio matched_systems = fnmatch.filter(systems, pat) combined_system_name.extend(s.split("/")[0] for s in matched_systems) elif Path(pat).is_dir(): - matched_systems = sorted(Path(pat).glob("*.yaml")) + matched_systems = sorted([str(s) for s in Path(pat).glob("*.yaml")]) combined_system_name.append(Path(pat).name) else: matched_systems = sorted(glob.glob(pat)) @@ -322,3 +323,11 @@ def get_partition_configs(partitions: list[str | SystemConfig]) -> MultiPartitio system_name=combined_system_name, partitions=parsed_configs, ) + + +def resolve_system_reference(system: str | SystemConfig, info: ValidationInfo): + """ If system is a yaml path, resolve it as a path. Otherwise leave it as a string """ + if isinstance(system, str) and is_yaml_file(system): + return str(validate_resolved_path(system, info)) + else: + return system diff --git a/raps/telemetry.py b/raps/telemetry.py index 915fc97..b7f29b7 100644 --- a/raps/telemetry.py +++ b/raps/telemetry.py @@ -28,7 +28,7 @@ from raps.plotting import ( plot_network_histogram ) from raps.utils import ( - next_arrival_byconfargs, pydantic_add_args, SubParsers, ExpandedPath, WorkloadData, RAPSBaseModel, + next_arrival_byconfargs, pydantic_add_args, SubParsers, ResolvedPath, WorkloadData, RAPSBaseModel, ) @@ -36,7 +36,7 @@ from raps.utils import ( class TelemetryArgs(RAPSBaseModel): jid: str = '*' """ Replay job id """ - replay: list[ExpandedPath] | None = None + replay: list[ResolvedPath] | None = None """ path/to/joblive path/to/jobprofile -or- filename.npz (overrides --workload option) """ plot: list[Literal["jobs", "nodes"]] | None = None is_results_file: bool = False diff --git a/raps/train_rl.py b/raps/train_rl.py index eac4172..d6ddd42 100644 --- a/raps/train_rl.py +++ b/raps/train_rl.py @@ -1,5 +1,5 @@ from raps.sim_config import SingleSimConfig, SIM_SHORTCUTS -from raps.utils import SubParsers, pydantic_add_args, read_yaml +from raps.utils import SubParsers, pydantic_add_args, read_yaml_parsed def train_rl_add_parser(subparsers: SubParsers): @@ -16,7 +16,7 @@ def train_rl_add_parser(subparsers: SubParsers): }) def impl(args): - model = model_validate(args, read_yaml(args.config_file)) + model = model_validate(args, read_yaml_parsed(SingleSimConfig, args.config_file)) model.scheduler = "rl" train_rl(model) parser.set_defaults(impl=impl) diff --git a/raps/utils.py b/raps/utils.py index 4414fd9..de565d4 100644 --- a/raps/utils.py +++ b/raps/utils.py @@ -20,12 +20,14 @@ import uuid import json import argparse from pathlib import Path -from typing import Annotated as A, TypeVar, Callable, TypeAlias +from typing import Annotated as A, TypeVar, TypeAlias, Protocol from pydantic import ( - BaseModel, TypeAdapter, AfterValidator, BeforeValidator, ConfigDict, AwareDatetime, ValidationError + BaseModel, TypeAdapter, AfterValidator, BeforeValidator, ConfigDict, AwareDatetime, ValidationError, + ValidationInfo, ) -from pydantic_settings import BaseSettings, SettingsConfigDict, CliApp, CliSettingsSource +from pydantic_settings import BaseSettings, SettingsConfigDict, CliApp, CliSettingsSource, SettingsError import yaml +from yaml import YAMLError from raps.job import Job @@ -683,8 +685,27 @@ def normalize_tz(d: datetime): return d.astimezone(timezone.utc) -ExpandedPath = A[Path, AfterValidator(lambda v: Path(v).expanduser().resolve())] -""" Type that that expands ~ and environment variables in a path string """ +def validate_resolved_path(path: str | Path, info: ValidationInfo): + context = info.context or {} + path = Path(path).expanduser() + if context.get('base_path'): + base_path = Path(context["base_path"]).expanduser().resolve() + else: + base_path = Path.cwd() + path = (base_path / path).resolve() + # This is used on the simulation server to block reading arbitrary files + if context.get("force_under_base_path"): + if not path.is_relative_to(base_path): + raise ValueError(f"{path} is not under {base_path}") + return path + +ResolvedPath = A[Path, AfterValidator(validate_resolved_path)] +""" +Resolve a path, and expand ~ in the path string. +Paths can be resolved relative to specific path instead of cwd by passing +`context={"base_path": "my/path"}` in model_validate(). +""" + AutoAwareDatetime = A[datetime, AfterValidator(normalize_tz)] """ Datetime type wrapper, makes sure timezone is set """ @@ -700,13 +721,18 @@ class RAPSBaseModel(BaseModel): ) -T = TypeVar("T", bound=BaseModel) +T = TypeVar("T", bound=BaseModel, covariant=True) + + +class ModelArgsValidator(Protocol[T]): + def __call__(self, args: argparse.Namespace, init_data: dict | None = None) -> T: + ... def pydantic_add_args( parser: argparse.ArgumentParser, model_cls: type[T], model_config: SettingsConfigDict | None = None, -) -> Callable[[argparse.Namespace, dict | None], T]: +) -> ModelArgsValidator[T]: """ Add arguments to the parser from the model. Returns a function that can be used to parse the model from the argparse args. @@ -735,20 +761,20 @@ def pydantic_add_args( cli_settings_source = CliSettingsSource(SettingsModel, root_parser=parser) - def model_validate_args(args: argparse.Namespace, data: dict | None = None): + def model_args_validator(args: argparse.Namespace, init_data: dict | None = None): try: model = CliApp.run(SettingsModel, cli_args=args, cli_settings_source=cli_settings_source, - **(data or {}), + **(init_data or {}), ) # Recreate model so we don't return the SettingsModel subclass # use exclude_unset so that model_field_set is preserved as well return model_cls.model_validate(model.model_dump(exclude_unset=True)) - except ValidationError as err: + except (ValidationError, SettingsError) as err: print(err) sys.exit(1) - return model_validate_args + return model_args_validator SubParsers: TypeAlias = "argparse._SubParsersAction[argparse.ArgumentParser]" @@ -784,7 +810,7 @@ def yaml_dump(data, header_comment=''): ) -def read_yaml(config_file: str): +def read_yaml(config_file: str | None) -> dict: """ Parses yaml file. Pass "-" to read from stdin """ # Assume stdin if not terminal if config_file == "-" or (not config_file and not sys.stdin.isatty()): @@ -794,9 +820,36 @@ def read_yaml(config_file: str): else: data = "" if data.strip(): - return yaml.safe_load(data) + result = yaml.safe_load(data) else: - return {} + result = {} + if not isinstance(result, dict): + raise ValueError("Expected yaml document to contain a top-level mapping") + return result + + +def read_yaml_parsed(cls: type[T], config_file = None) -> dict: + """ + Like read_yaml, but parses the input to resolve paths etc. + Exits on error after printing message (for use in the CLI) + """ + try: + yaml_data = read_yaml(config_file) + if yaml_data: + # Resolve paths in yaml relative to the yaml file + base_path = Path(config_file).parent if config_file and config_file != "-" else None + model = cls.model_validate(yaml_data, context={"base_path": base_path}) + yaml_data = model.model_dump(mode='json', exclude_unset=True) + except (ValidationError, ValueError, YAMLError) as err: + print(f'Failed to parse yaml "{config_file}"') + print(err) + sys.exit(1) + return yaml_data + + +def is_yaml_file(path: str | Path): + """ Return true if the path is .yaml, .yml, or .json """ + return Path(path).suffix in ['.yaml', '.yml', '.json'] class WorkloadData(RAPSBaseModel): diff --git a/tests/systems/test_main_fastforward_run.py b/tests/systems/test_main_fastforward_run.py index 9fe3216..5a3b38f 100644 --- a/tests/systems/test_main_fastforward_run.py +++ b/tests/systems/test_main_fastforward_run.py @@ -1,5 +1,3 @@ -import os -import subprocess import pytest from ..util import run_engine diff --git a/tests/util.py b/tests/util.py index 46736b3..20c9054 100644 --- a/tests/util.py +++ b/tests/util.py @@ -4,11 +4,9 @@ from pathlib import Path import shlex import json from raps.engine import Engine +from raps.stats import get_stats from raps.multi_part_engine import MultiPartEngine from raps.sim_config import SingleSimConfig, MultiPartSimConfig -from raps.stats import ( - get_engine_stats, get_job_stats, get_scheduler_stats, get_network_stats, -) def find_project_root(): @@ -41,15 +39,6 @@ def _get_cmd(config, sub_cmd): return f"echo {shlex.quote(json.dumps(config))} | python main.py {sub_cmd} - -o none" -def _get_stats(engine: Engine): - return { - 'engine': get_engine_stats(engine), - 'job': get_job_stats(engine), - 'scheduler': get_scheduler_stats(engine), - 'network': get_network_stats(engine) if engine.simulate_network else None, - } - - def run_engine(sim_config, include_ticks=False) -> tuple[Engine, dict[str, Any]]: """ Run a simulation to completion. Returns the completed Engine and a dict containing the engine @@ -73,7 +62,7 @@ def run_engine(sim_config, include_ticks=False) -> tuple[Engine, dict[str, Any]] if include_ticks: stats['tick_datas'].append(tick) - stats.update(_get_stats(engine)) + stats.update(get_stats(engine)) return engine, stats @@ -103,6 +92,6 @@ def run_multi_part_engine(sim_config, include_ticks=False) -> tuple[MultiPartEng stats['tick_datas'].append(tick) for partition, engine in multi_engine.engines.items(): - stats['partitions'][partition] = _get_stats(engine) + stats['partitions'][partition] = engine.get_stats() return multi_engine, stats -- GitLab From 7511a022d39a5b19f39e6b61bc575156cc664d63 Mon Sep 17 00:00:00 2001 From: "Hines, Jesse" Date: Tue, 7 Oct 2025 14:17:42 +0000 Subject: [PATCH 316/388] Some more tweaks for simulation server and dashboard --- pyproject.toml | 1 + raps/sim_config.py | 28 +++++++++++++++------------- tests/util.py | 2 +- 3 files changed, 17 insertions(+), 14 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 732315e..8cc586d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -34,6 +34,7 @@ dependencies = [ "gym==0.26.2", "dill==0.4.0", "argcomplete==3.6.2", + "pyzmq==27.1.0", "pre-commit" ] diff --git a/raps/sim_config.py b/raps/sim_config.py index 2f9bc44..350c873 100644 --- a/raps/sim_config.py +++ b/raps/sim_config.py @@ -15,7 +15,7 @@ from raps.utils import ( from raps.system_config import ( SystemConfig, get_partition_configs, get_system_config, list_systems, resolve_system_reference, ) -from pydantic import model_validator, Field +from pydantic import model_validator, Field, BeforeValidator Distribution = Literal['uniform', 'weibull', 'normal'] @@ -25,7 +25,7 @@ class SimConfig(RAPSBaseModel, abc.ABC): """ Include the FMU cooling model """ simulate_network: bool = False """ Include network model """ - weather: bool | None = None + weather: bool = False """ Include weather information in the cooling model. Defaults to True if replay, False otherwise. @@ -54,7 +54,7 @@ class SimConfig(RAPSBaseModel, abc.ABC): Step size for the power simulation (default seconds). Can pass a string like 15s, 1m, 1h, 1ms """ - time_unit: timedelta = timedelta(seconds=1) + time_unit: A[timedelta, BeforeValidator(parse_time_unit)] = timedelta(seconds=1) """ The base unit of the simulation, determining how often it will tick the job scheduler. """ @@ -134,7 +134,7 @@ class SimConfig(RAPSBaseModel, abc.ABC): """ Grab data from live system. """ # Workload arguments (TODO split into separate model) - workload: Literal['random', 'benchmark', 'peak', 'idle', 'synthetic', 'multitenant'] | None = None + workload: Literal['random', 'benchmark', 'peak', 'idle', 'synthetic', 'multitenant', 'replay'] = "random" """ Type of synthetic workload """ multimodal: list[float] = [1.0] @@ -223,18 +223,15 @@ class SimConfig(RAPSBaseModel, abc.ABC): # Downtime downtime_first: SmartTimedelta | None = None """ - First downtime (unit specified by `time_unit`, default seconds). - Can pass a string like 27m, 3h, 7d + First downtime. Can pass a string like 27m, 3h, 7d """ downtime_interval: SmartTimedelta | None = None """ - Interval between downtimes (unit specified by `time_unit`, default seconds). - Can pass a string like 123, 27m, 3h, 7d + Interval between downtimes. Can pass a string like 123, 27m, 3h, 7d """ downtime_length: SmartTimedelta | None = None """ - Downtime length (unit specified by `time_unit`, default seconds). - Can pass a string like 123, 27m, 3h, 7d + Downtime length. Can pass a string like 123, 27m, 3h, 7d """ @cached_property @@ -269,6 +266,7 @@ class SimConfig(RAPSBaseModel, abc.ABC): "time_delta", "time", "fastforward", "downtime_first", "downtime_interval", "downtime_length", ] + # infer time unit from other timedelta fields if it wasn't set explicitly if data.get('time_unit') is None: time_unit = min( [infer_time_unit(data[f]) for f in td_fields if data.get(f)], @@ -312,13 +310,17 @@ class SimConfig(RAPSBaseModel, abc.ABC): if td is not None: convert_to_time_unit(td, self.time_unit) # will throw if invalid - if not self.replay and not self.workload: - self.workload = "random" + if "workload" not in self.model_fields_set and self.replay: + self.workload = "replay" # default to replay if --replay is set + if self.workload == "replay" and not self.replay: + raise ValueError('--replay must be set when workload type is "replay"') + elif self.workload != "replay" and self.replay: + raise ValueError('workload must be either omitted or "replay" when --replay is set') if self.cooling: self.layout = "layout2" - if self.weather is None: + if 'weather' not in self.model_fields_set: self.weather = self.cooling and bool(self.replay) if self.jobsize_is_power_of is not None and self.jobsize_is_of_degree is not None: diff --git a/tests/util.py b/tests/util.py index 20c9054..4082d4f 100644 --- a/tests/util.py +++ b/tests/util.py @@ -92,6 +92,6 @@ def run_multi_part_engine(sim_config, include_ticks=False) -> tuple[MultiPartEng stats['tick_datas'].append(tick) for partition, engine in multi_engine.engines.items(): - stats['partitions'][partition] = engine.get_stats() + stats['partitions'][partition] = get_stats(engine) return multi_engine, stats -- GitLab From 481fb0a8f69e70ce19548bd89d7906f7a8b43a5a Mon Sep 17 00:00:00 2001 From: "Hines, Jesse" Date: Thu, 9 Oct 2025 20:28:20 +0000 Subject: [PATCH 317/388] Add RunningStats helper --- raps/stats.py | 86 ++++++++++++++++++++++++++++-- tests/systems/test_engine_basic.py | 28 ++++++++++ 2 files changed, 109 insertions(+), 5 deletions(-) diff --git a/raps/stats.py b/raps/stats.py index 924a696..e5824a9 100644 --- a/raps/stats.py +++ b/raps/stats.py @@ -13,10 +13,9 @@ from .utils import sum_values, min_value, max_value, convert_seconds_to_hhmmss from .engine import Engine -def get_engine_stats(engine: Engine, *, fast = False): +def get_engine_stats(engine: Engine): """ Return engine statistics - Setting `fast = False` excludes some stats that are more expensive to calculate. """ timesteps = engine.current_timestep - engine.timestep_start num_samples = len(engine.power_manager.history) if engine.power_manager else 0 @@ -54,7 +53,7 @@ def get_engine_stats(engine: Engine, *, fast = False): else: stats['jobs_completed_percentage'] = 0 - if not fast and engine.node_occupancy_history: + if engine.node_occupancy_history: # Calculate average concurrent jobs per node (average density across all nodes and timesteps) total_jobs_running_timesteps = 0 max_concurrent_jobs_per_node = 0 @@ -361,9 +360,9 @@ def get_job_stats(engine: Engine): return job_stats -def get_stats(engine: Engine, *, fast = False): +def get_stats(engine: Engine): return { - 'engine': get_engine_stats(engine, fast = fast), + 'engine': get_engine_stats(engine), 'job': get_job_stats(engine), 'scheduler': get_scheduler_stats(engine), 'network': get_network_stats(engine) if engine.simulate_network else {}, @@ -429,3 +428,80 @@ def get_gauge_limits(engine: Engine): 'peak_power': peak_power, 'g_flops_w_peak': gflops_per_watt_max } + + +class RunningStats: + """ + Calculate a subset of the stats in as "running totals" for each engine tick. This is much more + efficient than calling get_engine_stats() repeatedly. + """ + # TODO: maybe should combine this and get_engine_stats logic? + @staticmethod + def _running_stats(engine: Engine): + # Infinite generator used for the RunningStats logic + def running_sum_values(values, last_value, last_index): + return last_value + sum_values(values[last_index:]) + + def running_min_value(values, last_value, last_index): + if last_index < len(values): + new_min = min_value(values[last_index:]) + rtrn = new_min if last_value is None else min(new_min, last_value) + else: + rtrn = last_value # No change + return rtrn + + def running_max_value(values, last_value, last_index): + if last_index < len(values): + new_max = max_value(values[last_index:]) + return new_max if last_value is None else max(new_max, last_value) + else: + return last_value # No change + + last_power_index = 0 + power_sum = 0 + last_loss_index = 0 + loss_sum = 0 + loss_min = None + loss_max = None + + while True: + timesteps = engine.current_timestep - engine.timestep_start + throughput = engine.jobs_completed / timesteps * 3600 if timesteps != 0 else 0 # Jobs per hour + num_samples = len(engine.power_manager.history) if engine.power_manager else 0 + + power_sum = running_sum_values(engine.power_manager.history, power_sum, last_power_index) + average_power_mw = power_sum / num_samples / 1000 if num_samples else 0 + last_power_index = len(engine.power_manager.history) + + loss_sum = running_sum_values(engine.power_manager.loss_history, loss_sum, last_loss_index) + average_loss_mw = loss_sum / num_samples / 1000 if num_samples else 0 + loss_min = running_min_value(engine.power_manager.loss_history, loss_min, last_loss_index) + min_loss_mw = loss_min / 1000 if num_samples else 0 + loss_max = running_max_value(engine.power_manager.loss_history, loss_max, last_loss_index) + max_loss_mw = loss_max / 1000 if num_samples else 0 + last_loss_index = len(engine.power_manager.loss_history) + + loss_fraction = average_loss_mw / average_power_mw if average_power_mw else 0 + efficiency = 1 - loss_fraction if loss_fraction else 0 + total_energy_consumed = average_power_mw * timesteps / 3600 if timesteps else 0 # MW-hr + carbon_emissions = total_energy_consumed * 852.3 / 2204.6 / efficiency if efficiency else 0 + total_cost = total_energy_consumed * 1000 * engine.config.get('POWER_COST', 0) # Total cost in dollars + + yield { + "throughput": throughput, + "num_samples": num_samples, + "average_power": average_power_mw, + "min_loss": min_loss_mw, + "average_loss": average_loss_mw, + "max_loss": max_loss_mw, + "system_power_efficiency": efficiency * 100, + "total_energy_consumed": total_energy_consumed, + "carbon_emissions": carbon_emissions, + "total_cost": total_cost, + } + + def __init__(self, engine: Engine): + self._gen = RunningStats._running_stats(engine) + + def get_stats(self) -> dict: + return next(self._gen) diff --git a/tests/systems/test_engine_basic.py b/tests/systems/test_engine_basic.py index 96c6253..32b4221 100644 --- a/tests/systems/test_engine_basic.py +++ b/tests/systems/test_engine_basic.py @@ -1,5 +1,8 @@ import pytest from ..util import run_engine +from raps.engine import Engine +from raps.sim_config import SingleSimConfig +from raps.stats import get_engine_stats, get_job_stats, RunningStats pytestmark = [ pytest.mark.system, @@ -18,3 +21,28 @@ def test_engine_basic(system, system_config, sim_output): assert stats['tick_count'] == 120 assert stats['engine']['time_simulated'] == '0:02:00' + + +def test_engine_stats(system, system_config, sim_output): + if not system_config.get("main", False): + pytest.skip(f"{system} does not support basic main run.") + + engine = Engine(SingleSimConfig.model_validate({ + "system": system, + "time": "2m", + })) + gen = engine.run_simulation() + running_stats = RunningStats(engine) + + for tick in gen: + stats = running_stats.get_stats() + stats = running_stats.get_stats() + + final_stats = { + **get_engine_stats(engine), + **get_job_stats(engine), + } + + # Confirm the running stats match up with the final stat computation + for stat in stats.keys(): + assert pytest.approx(stats[stat]) == final_stats[stat], f"stat {stat}" -- GitLab From 82f348a2c759261f6f0046fc63027e9c3d43e960 Mon Sep 17 00:00:00 2001 From: Matthias Maiterth Date: Tue, 14 Oct 2025 11:22:59 -0400 Subject: [PATCH 318/388] OCIZettascale10 Rough sketch with what ExaDigit can model. This is a rough estimate according to https://www.hpcwire.com/off-the-wire/oracle-unveils-next-gen-oracle-cloud-infrastructure-zettascale10-cluster-for-ai/ Getting the overal numbers in the ballpark, while the Kyber + Kyber Side Car is not currently direcly modelled. Grain of salts included. The ballpark is ok. Run with: python main.py run --system OCIZettascale10 --workload randomAI --continuous-job-generation --- config/OCIZettascale10.yaml | 59 +++++++++++++++++++++++++++++++++++++ raps/sim_config.py | 3 +- raps/workloads/basic.py | 17 ++++++----- raps/workloads/live.py | 2 +- 4 files changed, 72 insertions(+), 9 deletions(-) create mode 100644 config/OCIZettascale10.yaml diff --git a/config/OCIZettascale10.yaml b/config/OCIZettascale10.yaml new file mode 100644 index 0000000..7497164 --- /dev/null +++ b/config/OCIZettascale10.yaml @@ -0,0 +1,59 @@ +system: + num_cdus: 2778 # 800,000 Vera Rubin total + racks_per_cdu: 3 + nodes_per_rack: 72 # 600kW # like NV72 + chassis_per_rack: 1 + nodes_per_blade: 1 + switches_per_chassis: 72 # Chassis concept is Cray => NV72? + nics_per_node: 1 # Most likely 4 + rectifiers_per_chassis: 1 # power / losses will be set to zero as this is unknown + nodes_per_rectifier: 1 # power / losses will be set to zero as this is unknown + #missing_racks: + down_nodes: [] + cpus_per_node: 1 + gpus_per_node: 4 # Chiplets 4? + cpu_peak_flops: 2048000000000.0 # Insignificant + gpu_peak_flops: 15000000000000000000.0 # 15EFlops/s FP4 + cpu_fp_ratio: 0.667 + gpu_fp_ratio: 0.667 +power: + power_gpu_idle: 200 # 200 == 4* 50 + power_gpu_max: 2200 # 2kW per node = 4*525 + power_cpu_idle: 90 + power_cpu_max: 280 + power_mem: 74.26 + power_nic: 20 + power_nvme: 30 + power_switch: 250 + power_cdu: 8473.47 + power_update_freq: 15 + rectifier_peak_threshold: 13670 + sivoc_loss_constant: 0 + sivoc_efficiency: 1.00 + rectifier_loss_constant: 0 + rectifier_efficiency: 1.00 + power_cost: 0.094 +scheduler: + job_arrival_time: 1 + mtbf: 11 + trace_quanta: 15 + min_wall_time: 3600 + max_wall_time: 43200 + ui_update_freq: 900 + max_nodes_per_job: 9000 + job_end_probs: + COMPLETED: 0.63 + FAILED: 0.13 + CANCELLED: 0.12 + TIMEOUT: 0.11 + NODE_FAIL: 0.01 +uq: + power_gpu_uncertainty: 0.05 + power_cpu_uncertainty: 0.05 + power_mem_uncertainty: 0.05 + power_nic_uncertainty: 0.05 + power_nvme_uncertainty: 0.05 + power_cdus_uncertainty: 0.05 + power_node_uncertainty: 0.002 + power_switch_uncertainty: 0.05 + rectifier_power_uncertainty: 0.05 diff --git a/raps/sim_config.py b/raps/sim_config.py index 350c873..da3541c 100644 --- a/raps/sim_config.py +++ b/raps/sim_config.py @@ -134,7 +134,8 @@ class SimConfig(RAPSBaseModel, abc.ABC): """ Grab data from live system. """ # Workload arguments (TODO split into separate model) - workload: Literal['random', 'benchmark', 'peak', 'idle', 'synthetic', 'multitenant', 'replay'] = "random" + workload: Literal['random', 'benchmark', 'peak', 'idle', + 'synthetic', 'multitenant', 'replay', 'randomAI'] = "random" """ Type of synthetic workload """ multimodal: list[float] = [1.0] diff --git a/raps/workloads/basic.py b/raps/workloads/basic.py index 208af54..dbe2dfd 100644 --- a/raps/workloads/basic.py +++ b/raps/workloads/basic.py @@ -11,6 +11,7 @@ from raps.utils import ( from .constants import JOB_NAMES, ACCT_NAMES, MAX_PRIORITY + class BasicWorkload: # Test for random 'reasonable' AI jobs @@ -19,17 +20,17 @@ class BasicWorkload: jobs = [] for i in range(args.numjobs): draw = random.randint(0, 10) - if draw == 0: + if draw != 0: et = random.randint(7200, 28800) nr = random.choice([128, 256, 512, 1024, 1280, 1792, 2048]) new_job = Job(job_dict(nodes_required=nr, - name="LLM", + name="LLM Production", account="llmUser", end_state="Success", id=random.randint(1, 99999), cpu_trace=0.1, - gpu_trace=(random.uniform(0.55, 0.8) * - self.config_map[self.args.system]['GPUS_PER_NODE']), + gpu_trace=(random.uniform(0.55, 0.8) + * self.config_map[self.args.system]['GPUS_PER_NODE']), ntx_trace=None, nrx_trace=None, submit_time=0, @@ -38,8 +39,10 @@ class BasicWorkload: end_time=et, expected_run_time=et)) else: - new_job = Job(job_dict(nodes_required=1, - name="LLM", + et = random.randint(300, 7200) + nr = random.choice([1, 1, 1, 1, 1, 2, 4, 8, 16, 32, 128]) + new_job = Job(job_dict(nodes_required=nr, + name="User-Test LLM", account="llmUser", end_state="Success", id=random.randint(1, 99999), @@ -50,7 +53,7 @@ class BasicWorkload: submit_time=0, time_limit=43200, start_time=0, - end_time=7200, + end_time=et, expected_run_time=random.randint(60, 7200))) jobs.append(new_job) return jobs diff --git a/raps/workloads/live.py b/raps/workloads/live.py index b4f2733..4a468af 100644 --- a/raps/workloads/live.py +++ b/raps/workloads/live.py @@ -1,4 +1,4 @@ -def continuous_job_generation(self, *, engine, timestep, jobs): +def continuous_job_generation(*, engine, timestep, jobs): # print("if len(engine.queue) <= engine.continuous_workload.args.maxqueue:") # print(f"if {len(engine.queue)} <= {engine.continuous_workload.args.maxqueue}:") if len(engine.queue) <= engine.continuous_workload.args.maxqueue: -- GitLab From c91a819000611981d90b8dd50d1b5e9c2af4e9e2 Mon Sep 17 00:00:00 2001 From: Wes Brewer Date: Wed, 17 Sep 2025 23:20:07 -0400 Subject: [PATCH 319/388] Initial config and dataloader for philly traces (dataloader not yet working) --- README.md | 8 +++ config/philly/2-gpu.yaml | 51 ++++++++++++++++ config/philly/8-gpu.yaml | 51 ++++++++++++++++ raps/dataloaders/philly.py | 116 +++++++++++++++++++++++++++++++++++++ 4 files changed, 226 insertions(+) create mode 100644 config/philly/2-gpu.yaml create mode 100644 config/philly/8-gpu.yaml create mode 100644 raps/dataloaders/philly.py diff --git a/README.md b/README.md index 9c708af..92c68e0 100644 --- a/README.md +++ b/README.md @@ -77,6 +77,14 @@ For MIT Supercloud # Reinforcement learning test case raps train-rl --system mit_supercloud/part-cpu -f /opt/data/mit_supercloud/202201 +Microsoft Azure - 2017 Philly Traces + + # Synthetic + python main.py run-parts -x philly -w multitenant + + # Telemetry replay + python main.py run-parts -x philly -f /opt/data/philly/trace-data + For Lumi # Synthetic test for Lumi: diff --git a/config/philly/2-gpu.yaml b/config/philly/2-gpu.yaml new file mode 100644 index 0000000..0622605 --- /dev/null +++ b/config/philly/2-gpu.yaml @@ -0,0 +1,51 @@ +system: + num_cdus: 1 + racks_per_cdu: 1 + nodes_per_rack: 321 + chassis_per_rack: 3 + nodes_per_blade: 2 + switches_per_chassis: 4 + nics_per_node: 4 + rectifiers_per_chassis: 4 + nodes_per_rectifier: 4 + missing_racks: [] + down_nodes: [] + cpus_per_node: 2 + cores_per_cpu: 20 + gpus_per_node: 2 + cpu_peak_flops: 1248000000000.0 + gpu_peak_flops: 7800000000000.0 + cpu_fp_ratio: 0.667 + gpu_fp_ratio: 0.667 +power: + power_gpu_idle: 75 + power_gpu_max: 300 + power_cpu_idle: 90 + power_cpu_max: 280 + power_mem: 74.26 + power_nvme: 30 + power_nic: 20 + power_cdu: 8473.47 + power_switch: 250 + power_update_freq: 15 + rectifier_peak_threshold: 13670 + sivoc_loss_constant: 13 + sivoc_efficiency: 0.98 + rectifier_loss_constant: 17 + rectifier_efficiency: 0.96 + power_cost: 0.094 +scheduler: + multitenant: true + job_arrival_time: 900 + mtbf: 11 + trace_quanta: 20 + min_wall_time: 3600 + max_wall_time: 43200 + ui_update_freq: 900 + max_nodes_per_job: 192 + job_end_probs: + COMPLETED: 0.63 + FAILED: 0.13 + CANCELLED: 0.12 + TIMEOUT: 0.11 + NODE_FAIL: 0.01 diff --git a/config/philly/8-gpu.yaml b/config/philly/8-gpu.yaml new file mode 100644 index 0000000..aae80ee --- /dev/null +++ b/config/philly/8-gpu.yaml @@ -0,0 +1,51 @@ +system: + num_cdus: 1 + racks_per_cdu: 1 + nodes_per_rack: 231 + chassis_per_rack: 3 + nodes_per_blade: 2 + switches_per_chassis: 4 + nics_per_node: 4 + rectifiers_per_chassis: 4 + nodes_per_rectifier: 4 + missing_racks: [] + down_nodes: [] + cpus_per_node: 2 + cores_per_cpu: 20 + gpus_per_node: 8 + cpu_peak_flops: 1248000000000.0 + gpu_peak_flops: 7800000000000.0 + cpu_fp_ratio: 0.667 + gpu_fp_ratio: 0.667 +power: + power_gpu_idle: 75 + power_gpu_max: 300 + power_cpu_idle: 90 + power_cpu_max: 280 + power_mem: 74.26 + power_nvme: 30 + power_nic: 20 + power_cdu: 8473.47 + power_switch: 250 + power_update_freq: 15 + rectifier_peak_threshold: 13670 + sivoc_loss_constant: 13 + sivoc_efficiency: 0.98 + rectifier_loss_constant: 17 + rectifier_efficiency: 0.96 + power_cost: 0.094 +scheduler: + multitenant: true + job_arrival_time: 900 + mtbf: 11 + trace_quanta: 20 + min_wall_time: 3600 + max_wall_time: 43200 + ui_update_freq: 900 + max_nodes_per_job: 192 + job_end_probs: + COMPLETED: 0.63 + FAILED: 0.13 + CANCELLED: 0.12 + TIMEOUT: 0.11 + NODE_FAIL: 0.01 diff --git a/raps/dataloaders/philly.py b/raps/dataloaders/philly.py new file mode 100644 index 0000000..08c02e1 --- /dev/null +++ b/raps/dataloaders/philly.py @@ -0,0 +1,116 @@ +import os +import json +import csv +import datetime +import pandas as pd +import warnings +from raps.job import Job + +DATE_FORMAT_STR = "%Y-%m-%d %H:%M:%S" + +def parse_date(s): + if not s or s == "None": + return None + # strip possible timezone labels like "PST"/"PDT" + s = s.replace(" PST", "").replace(" PDT", "") + return datetime.datetime.strptime(s, DATE_FORMAT_STR) + +def load_data(files, **kwargs): + """ + Load Philly trace into ExaDigiT Job objects. + + Args: + files (list[str]): A list with one directory path (e.g., ['/opt/data/philly/trace-data']). + + Returns: + list[Job] + """ + assert len(files) == 1, "Expecting a single directory path" + trace_dir = files[0] + + # --- 1. Machine list --- + machine_file = os.path.join(trace_dir, "cluster_machine_list") + machines = {} + with open(machine_file) as f: + reader = csv.DictReader(f) + for row in reader: + mid = row["machineId"] + machines[mid] = { + "num_gpus": int(row[" number of GPUs"]), + "gpu_mem": row[" single GPU mem"].strip() + } + + # --- 2. CPU util --- + cpu_file = os.path.join(trace_dir, "cluster_cpu_util") + cpu_util = pd.read_csv(cpu_file) + # cpu_util has columns: time, machine_id, cpu_util + + # --- 3. GPU util --- + gpu_file = os.path.join(trace_dir, "cluster_gpu_util") + + with warnings.catch_warnings(record=True) as wlist: + gpu_util = pd.read_csv( + gpu_file, + engine="python", + on_bad_lines="skip" + ) + + if wlist: + warnings.warn( + f"cluster_gpu_util: skipped {len(wlist)} malformed lines while reading {gpu_file}", + UserWarning + ) + + # --- 4. Job log --- + job_file = os.path.join(trace_dir, "cluster_job_log") + with open(job_file) as f: + job_log = json.load(f) + + jobs = [] + for raw in job_log: + jobid = raw.get("jobid") + user = raw.get("user") + status = raw.get("status") + submitted = parse_date(raw.get("submitted_time")) + + attempts = raw.get("attempts", []) + start, end = None, None + if attempts: + start = parse_date(attempts[0].get("start_time")) + end = parse_date(attempts[-1].get("end_time")) + + wall_time = None + if start and end: + wall_time = (end - start).total_seconds() + + # Which machines did this job run on? + machine_ids = [] + gpus = 0 + if attempts and "detail" in attempts[0]: + for detail in attempts[0]["detail"]: + mid = detail["ip"] + machine_ids.append(mid) + gpus += len(detail.get("gpus", [])) + + # Collect utilization traces for each machine this job touched + job_cpu = cpu_util[cpu_util["machine_id"].isin(machine_ids)] + job_gpu = gpu_util[gpu_util["machineId"].isin(machine_ids)] + + job = Job( + job_id=jobid, + name=f"philly-{jobid}", + user=user, + nodes_required=len(machine_ids) if machine_ids else None, + wall_time=wall_time, + start_time=start, + end_time=end, + queue_time=submitted, + scheduled_nodes=machine_ids, + cpu_trace=job_cpu if not job_cpu.empty else None, + gpu_trace=job_gpu if not job_gpu.empty else None, + priority=None, + end_state=status + ) + jobs.append(job) + + return jobs -- GitLab From f5b9fc4124c8eb29390ee55ccd26789a9469a092 Mon Sep 17 00:00:00 2001 From: Wes Brewer Date: Thu, 18 Sep 2025 12:09:31 -0400 Subject: [PATCH 320/388] Add in all the args for job_dict call --- raps/dataloaders/philly.py | 64 ++++++++++++++++++++++++++++---------- 1 file changed, 47 insertions(+), 17 deletions(-) diff --git a/raps/dataloaders/philly.py b/raps/dataloaders/philly.py index 08c02e1..1601eee 100644 --- a/raps/dataloaders/philly.py +++ b/raps/dataloaders/philly.py @@ -4,7 +4,7 @@ import csv import datetime import pandas as pd import warnings -from raps.job import Job +from raps.job import job_dict, Job DATE_FORMAT_STR = "%Y-%m-%d %H:%M:%S" @@ -96,21 +96,51 @@ def load_data(files, **kwargs): job_cpu = cpu_util[cpu_util["machine_id"].isin(machine_ids)] job_gpu = gpu_util[gpu_util["machineId"].isin(machine_ids)] - job = Job( - job_id=jobid, - name=f"philly-{jobid}", - user=user, - nodes_required=len(machine_ids) if machine_ids else None, - wall_time=wall_time, - start_time=start, - end_time=end, - queue_time=submitted, - scheduled_nodes=machine_ids, - cpu_trace=job_cpu if not job_cpu.empty else None, - gpu_trace=job_gpu if not job_gpu.empty else None, - priority=None, - end_state=status - ) - jobs.append(job) + print("***", len(machine_ids), machine_ids) + + if machine_ids: + + job = job_dict( + # Core identity + id=jobid, + name=f"philly-{jobid}", + account=user if user else "unknown", # Philly log has user + + # Partition & priority + nodes_required=len(machine_ids) if machine_ids else 0, + partition=0, + priority=0, + + # Resource requests + cpu_cores_required=0, # Philly logs don’t track cores + gpu_units_required=gpus, # we can count GPUs from attempts + allocated_cpu_cores=0, + allocated_gpu_units=gpus, + + # State + end_state=status, + + # Traces + cpu_trace=job_cpu if not job_cpu.empty else None, + gpu_trace=job_gpu if not job_gpu.empty else None, + ntx_trace=None, + nrx_trace=None, + + # Timing + submit_time=submitted.timestamp() if submitted else 0, + start_time=start.timestamp() if start else 0, + end_time=end.timestamp() if end else 0, + time_limit=0, + expected_run_time=wall_time if wall_time else 0, + current_run_time=0, + trace_time=None, + trace_start_time=None, + trace_end_time=None, + trace_quanta=None, + trace_missing_values=False, + downscale=1 + ) + print(job) + jobs.append(Job(job)) return jobs -- GitLab From 55d02cebaa08179aa73a7ff4554b04e10c6b6c5d Mon Sep 17 00:00:00 2001 From: Wes Brewer Date: Thu, 18 Sep 2025 12:28:54 -0400 Subject: [PATCH 321/388] Shift jobs relative to earliest submitted time --- raps/dataloaders/philly.py | 144 +++++++++++++++++++++++-------------- 1 file changed, 92 insertions(+), 52 deletions(-) diff --git a/raps/dataloaders/philly.py b/raps/dataloaders/philly.py index 1601eee..399c546 100644 --- a/raps/dataloaders/philly.py +++ b/raps/dataloaders/philly.py @@ -4,6 +4,7 @@ import csv import datetime import pandas as pd import warnings +from tqdm import tqdm from raps.job import job_dict, Job DATE_FORMAT_STR = "%Y-%m-%d %H:%M:%S" @@ -66,81 +67,120 @@ def load_data(files, **kwargs): with open(job_file) as f: job_log = json.load(f) - jobs = [] + # --- First pass: find earliest submit time --- + earliest_submit = None for raw in job_log: + submitted = raw.get("submitted_time") + if submitted is None or submitted == "None": + continue + + # Philly uses either string dates or epoch ints + if isinstance(submitted, (int, float)): + t = int(submitted) + else: + t = parse_date(submitted).timestamp() + + if earliest_submit is None or t < earliest_submit: + earliest_submit = t + + if earliest_submit is None: + raise ValueError("No valid submitted_time found in Philly traces") + + # --- Second pass: build jobs --- + jobs = [] + for raw in tqdm(job_log, desc="Building Job objects"): jobid = raw.get("jobid") user = raw.get("user") status = raw.get("status") - submitted = parse_date(raw.get("submitted_time")) + + # Submitted time + submitted = raw.get("submitted_time") + if isinstance(submitted, (int, float)): + submitted = datetime.datetime.fromtimestamp(int(submitted)) + else: + submitted = parse_date(submitted) attempts = raw.get("attempts", []) start, end = None, None if attempts: - start = parse_date(attempts[0].get("start_time")) - end = parse_date(attempts[-1].get("end_time")) + st = attempts[0].get("start_time") + et = attempts[-1].get("end_time") + + if isinstance(st, (int, float)): + start = datetime.datetime.fromtimestamp(int(st)) + elif st: + start = parse_date(st) + + if isinstance(et, (int, float)): + end = datetime.datetime.fromtimestamp(int(et)) + elif et: + end = parse_date(et) wall_time = None if start and end: wall_time = (end - start).total_seconds() # Which machines did this job run on? - machine_ids = [] - gpus = 0 + machine_ids, gpus = [], 0 if attempts and "detail" in attempts[0]: for detail in attempts[0]["detail"]: mid = detail["ip"] machine_ids.append(mid) gpus += len(detail.get("gpus", [])) - # Collect utilization traces for each machine this job touched + # Collect utilization traces job_cpu = cpu_util[cpu_util["machine_id"].isin(machine_ids)] job_gpu = gpu_util[gpu_util["machineId"].isin(machine_ids)] - print("***", len(machine_ids), machine_ids) - if machine_ids: - - job = job_dict( - # Core identity - id=jobid, - name=f"philly-{jobid}", - account=user if user else "unknown", # Philly log has user - - # Partition & priority - nodes_required=len(machine_ids) if machine_ids else 0, - partition=0, - priority=0, - - # Resource requests - cpu_cores_required=0, # Philly logs don’t track cores - gpu_units_required=gpus, # we can count GPUs from attempts - allocated_cpu_cores=0, - allocated_gpu_units=gpus, - - # State - end_state=status, - - # Traces - cpu_trace=job_cpu if not job_cpu.empty else None, - gpu_trace=job_gpu if not job_gpu.empty else None, - ntx_trace=None, - nrx_trace=None, - - # Timing - submit_time=submitted.timestamp() if submitted else 0, - start_time=start.timestamp() if start else 0, - end_time=end.timestamp() if end else 0, - time_limit=0, - expected_run_time=wall_time if wall_time else 0, - current_run_time=0, - trace_time=None, - trace_start_time=None, - trace_end_time=None, - trace_quanta=None, - trace_missing_values=False, - downscale=1 - ) - print(job) - jobs.append(Job(job)) + # Shift times relative to earliest_submit + submit_time = submitted.timestamp() - earliest_submit if submitted else None + start_time = start.timestamp() - earliest_submit if start else None + end_time = end.timestamp() - earliest_submit if end else None + + if not submit_time or not start_time or not end_time: + warnings.warn( + f"skipped {jobid} b/c missing submit_time, start_time, or end_time", + UserWarning + ) + + + if submit_time and start_time and end_time: + + job = job_dict( + id=jobid, + name=f"philly-{jobid}", + account=user if user else "unknown", + + nodes_required=len(machine_ids), + partition=0, + priority=0, + + cpu_cores_required=0, + gpu_units_required=gpus, + allocated_cpu_cores=0, + allocated_gpu_units=gpus, + + end_state=status, + + cpu_trace=job_cpu if not job_cpu.empty else None, + gpu_trace=job_gpu if not job_gpu.empty else None, + ntx_trace=None, + nrx_trace=None, + + submit_time=submit_time, + start_time=start_time, + end_time=end_time, + time_limit=0, + expected_run_time=wall_time if wall_time else 0, + current_run_time=0, + trace_time=None, + trace_start_time=None, + trace_end_time=None, + trace_quanta=None, + trace_missing_values=False, + downscale=1 + ) + jobs.append(Job(job)) return jobs -- GitLab From a3a9a9d843d469c0ec4b58826d2edd3d10ca96d8 Mon Sep 17 00:00:00 2001 From: Wes Brewer Date: Thu, 18 Sep 2025 16:16:15 -0400 Subject: [PATCH 322/388] Work on getting cpu and gpu traces working correctly... but not yet --- raps/dataloaders/philly.py | 107 +++++++++++++++++++++++++++++-------- 1 file changed, 85 insertions(+), 22 deletions(-) diff --git a/raps/dataloaders/philly.py b/raps/dataloaders/philly.py index 399c546..0e9eaa8 100644 --- a/raps/dataloaders/philly.py +++ b/raps/dataloaders/philly.py @@ -1,11 +1,13 @@ import os import json import csv -import datetime import pandas as pd import warnings + +from datetime import datetime, timezone from tqdm import tqdm from raps.job import job_dict, Job +from raps.utils import WorkloadData DATE_FORMAT_STR = "%Y-%m-%d %H:%M:%S" @@ -14,7 +16,7 @@ def parse_date(s): return None # strip possible timezone labels like "PST"/"PDT" s = s.replace(" PST", "").replace(" PDT", "") - return datetime.datetime.strptime(s, DATE_FORMAT_STR) + return datetime.strptime(s, DATE_FORMAT_STR) def load_data(files, **kwargs): """ @@ -41,10 +43,17 @@ def load_data(files, **kwargs): "gpu_mem": row[" single GPU mem"].strip() } + # build node → index mapping + node_mapping = {mid: idx for idx, mid in enumerate(sorted(machines.keys()))} + # --- 2. CPU util --- cpu_file = os.path.join(trace_dir, "cluster_cpu_util") cpu_util = pd.read_csv(cpu_file) # cpu_util has columns: time, machine_id, cpu_util + cpu_util["time"] = cpu_util["time"].str.replace(" PST","").str.replace(" PDT","") + #cpu_util["time"] = pd.to_datetime(cpu_util["time"], format="%Y-%m-%d %H:%M:%S") + # now cpu_util has: time (datetime), machine_id, cpu_util + cpu_util["time"] = cpu_util["time"].apply(parse_date) # --- 3. GPU util --- gpu_file = os.path.join(trace_dir, "cluster_gpu_util") @@ -55,20 +64,33 @@ def load_data(files, **kwargs): engine="python", on_bad_lines="skip" ) - if wlist: warnings.warn( f"cluster_gpu_util: skipped {len(wlist)} malformed lines while reading {gpu_file}", UserWarning ) + # Convert time to datetime + gpu_util["time"] = pd.to_datetime(gpu_util["time"], errors="coerce").dt.tz_localize(None) + + # Identify GPU columns + gpu_cols = [c for c in gpu_util.columns if c.startswith("gpu")] + + # Collapse per row: sum all GPU utilizations and divide by 100 + gpu_util["gpu_util"] = gpu_util[gpu_cols].sum(axis=1) / 100.0 + + # Keep only collapsed util plus metadata + gpu_util = gpu_util[["time", "machineId", "gpu_util"]] + + print("Sample GPU util after preprocess:", gpu_util.head()) + # --- 4. Job log --- job_file = os.path.join(trace_dir, "cluster_job_log") with open(job_file) as f: job_log = json.load(f) # --- First pass: find earliest submit time --- - earliest_submit = None + start_ts = None for raw in job_log: submitted = raw.get("submitted_time") if submitted is None or submitted == "None": @@ -80,14 +102,15 @@ def load_data(files, **kwargs): else: t = parse_date(submitted).timestamp() - if earliest_submit is None or t < earliest_submit: - earliest_submit = t + if start_ts is None or t < start_ts: + start_ts = t - if earliest_submit is None: + if start_ts is None: raise ValueError("No valid submitted_time found in Philly traces") + # --- Second pass: build jobs --- - jobs = [] + jobs_list = [] for raw in tqdm(job_log, desc="Building Job objects"): jobid = raw.get("jobid") user = raw.get("user") @@ -96,7 +119,7 @@ def load_data(files, **kwargs): # Submitted time submitted = raw.get("submitted_time") if isinstance(submitted, (int, float)): - submitted = datetime.datetime.fromtimestamp(int(submitted)) + submitted = datetime.fromtimestamp(int(submitted)) else: submitted = parse_date(submitted) @@ -107,12 +130,12 @@ def load_data(files, **kwargs): et = attempts[-1].get("end_time") if isinstance(st, (int, float)): - start = datetime.datetime.fromtimestamp(int(st)) + start = datetime.fromtimestamp(int(st)) elif st: start = parse_date(st) if isinstance(et, (int, float)): - end = datetime.datetime.fromtimestamp(int(et)) + end = datetime.fromtimestamp(int(et)) elif et: end = parse_date(et) @@ -128,15 +151,44 @@ def load_data(files, **kwargs): machine_ids.append(mid) gpus += len(detail.get("gpus", [])) - # Collect utilization traces - job_cpu = cpu_util[cpu_util["machine_id"].isin(machine_ids)] - job_gpu = gpu_util[gpu_util["machineId"].isin(machine_ids)] + # CPU utilization traces + if machine_ids and start and end: + mask = ( + cpu_util["machine_id"].isin(machine_ids) & + (cpu_util["time"] >= start) & + (cpu_util["time"] <= end) + ) + job_cpu = cpu_util.loc[mask].copy() + + # Aggregate across machines if >1 machine + if len(machine_ids) > 1: + job_cpu = job_cpu.groupby("time")["cpu_util"].mean().reset_index() + + print("Job", jobid) + print("machine_ids from job:", machine_ids[:5]) + print("gpu_util machineId sample:", gpu_util["machineId"].unique()[:5]) + print("start, end:", start, end) + print("gpu_util time range:", gpu_util["time"].min(), gpu_util["time"].max()) + + # GPU utilization traces + job_gpu = None + if machine_ids and start and end: + mask = ( + gpu_util["machineId"].isin(machine_ids) & + (gpu_util["time"] >= start) & + (gpu_util["time"] <= end) + ) + job_gpu = gpu_util.loc[mask].copy() + + # Aggregate across machines if >1 machine + if len(machine_ids) > 1: + job_gpu = job_gpu.groupby("time")["gpu_util"].sum().reset_index() if machine_ids: - # Shift times relative to earliest_submit - submit_time = submitted.timestamp() - earliest_submit if submitted else None - start_time = start.timestamp() - earliest_submit if start else None - end_time = end.timestamp() - earliest_submit if end else None + # Shift times relative to start_ts + submit_time = submitted.timestamp() - start_ts if submitted else None + start_time = start.timestamp() - start_ts if start else None + end_time = end.timestamp() - start_ts if end else None if not submit_time or not start_time or not end_time: warnings.warn( @@ -144,6 +196,7 @@ def load_data(files, **kwargs): UserWarning ) + scheduled_nodes = [node_mapping[mid] for mid in machine_ids if mid in node_mapping] if submit_time and start_time and end_time: @@ -162,9 +215,10 @@ def load_data(files, **kwargs): allocated_gpu_units=gpus, end_state=status, + scheduled_nodes=scheduled_nodes, - cpu_trace=job_cpu if not job_cpu.empty else None, - gpu_trace=job_gpu if not job_gpu.empty else None, + cpu_trace=job_cpu, + gpu_trace=job_gpu, ntx_trace=None, nrx_trace=None, @@ -181,6 +235,15 @@ def load_data(files, **kwargs): trace_missing_values=False, downscale=1 ) - jobs.append(Job(job)) + jobs_list.append(Job(job)) + + print(job) + + # Find max end timestamp across jobs + end_ts = max(j.end_time for j in jobs_list if j.end_time is not None) - return jobs + return WorkloadData( + jobs=jobs_list, + telemetry_start=0, telemetry_end=int(end_ts - start_ts), + start_date=datetime.fromtimestamp(start_ts, timezone.utc), + ) -- GitLab From de2aba18e8ebbc755d192bc93cc56df27adc11d1 Mon Sep 17 00:00:00 2001 From: Wes Brewer Date: Thu, 18 Sep 2025 23:32:14 -0400 Subject: [PATCH 323/388] Fix some issues so that both partitions work --- raps/dataloaders/philly.py | 34 +++++++++++++++++++++++----------- 1 file changed, 23 insertions(+), 11 deletions(-) diff --git a/raps/dataloaders/philly.py b/raps/dataloaders/philly.py index 0e9eaa8..789633f 100644 --- a/raps/dataloaders/philly.py +++ b/raps/dataloaders/philly.py @@ -30,6 +30,10 @@ def load_data(files, **kwargs): """ assert len(files) == 1, "Expecting a single directory path" trace_dir = files[0] + config = kwargs.get('config') + gpus_per_node = config.get("GPUS_PER_NODE") + if gpus_per_node is None: + raise ValueError("Must pass gpus_per_node (2 or 8)") # --- 1. Machine list --- machine_file = os.path.join(trace_dir, "cluster_machine_list") @@ -43,8 +47,17 @@ def load_data(files, **kwargs): "gpu_mem": row[" single GPU mem"].strip() } - # build node → index mapping - node_mapping = {mid: idx for idx, mid in enumerate(sorted(machines.keys()))} + partition_machines = { + mid: info for mid, info in machines.items() + if info["num_gpus"] == gpus_per_node + } + + # Build node → index mapping for this partition + node_mapping = {mid: idx for idx, mid in enumerate(sorted(partition_machines.keys()))} + max_nodes = len(node_mapping) + + # Assign partition ID (e.g. 0 for 2-GPU, 1 for 8-GPU) + partition_id = 0 if gpus_per_node == 2 else 1 # --- 2. CPU util --- cpu_file = os.path.join(trace_dir, "cluster_cpu_util") @@ -82,7 +95,7 @@ def load_data(files, **kwargs): # Keep only collapsed util plus metadata gpu_util = gpu_util[["time", "machineId", "gpu_util"]] - print("Sample GPU util after preprocess:", gpu_util.head()) + #print("Sample GPU util after preprocess:", gpu_util.head()) # --- 4. Job log --- job_file = os.path.join(trace_dir, "cluster_job_log") @@ -108,10 +121,9 @@ def load_data(files, **kwargs): if start_ts is None: raise ValueError("No valid submitted_time found in Philly traces") - # --- Second pass: build jobs --- jobs_list = [] - for raw in tqdm(job_log, desc="Building Job objects"): + for raw in tqdm(job_log[:1000], desc="Building Job objects"): jobid = raw.get("jobid") user = raw.get("user") status = raw.get("status") @@ -164,11 +176,11 @@ def load_data(files, **kwargs): if len(machine_ids) > 1: job_cpu = job_cpu.groupby("time")["cpu_util"].mean().reset_index() - print("Job", jobid) - print("machine_ids from job:", machine_ids[:5]) - print("gpu_util machineId sample:", gpu_util["machineId"].unique()[:5]) - print("start, end:", start, end) - print("gpu_util time range:", gpu_util["time"].min(), gpu_util["time"].max()) + #print("Job", jobid) + #print("machine_ids from job:", machine_ids[:5]) + #print("gpu_util machineId sample:", gpu_util["machineId"].unique()[:5]) + #print("start, end:", start, end) + #print("gpu_util time range:", gpu_util["time"].min(), gpu_util["time"].max()) # GPU utilization traces job_gpu = None @@ -206,7 +218,7 @@ def load_data(files, **kwargs): account=user if user else "unknown", nodes_required=len(machine_ids), - partition=0, + partition=partition_id, priority=0, cpu_cores_required=0, -- GitLab From 90fe862830dfbff162b33a37698866e3335ab9eb Mon Sep 17 00:00:00 2001 From: Wes Brewer Date: Mon, 29 Sep 2025 01:49:16 +0300 Subject: [PATCH 324/388] Work towards support for reading philly traces that are already parsed into daily files --- raps/dataloaders/philly.py | 168 ++++++++++++++++++++++++++----------- 1 file changed, 120 insertions(+), 48 deletions(-) diff --git a/raps/dataloaders/philly.py b/raps/dataloaders/philly.py index 789633f..aa1a8ae 100644 --- a/raps/dataloaders/philly.py +++ b/raps/dataloaders/philly.py @@ -1,15 +1,98 @@ import os +import glob import json import csv import pandas as pd import warnings -from datetime import datetime, timezone +from datetime import datetime, timezone, timedelta from tqdm import tqdm from raps.job import job_dict, Job from raps.utils import WorkloadData DATE_FORMAT_STR = "%Y-%m-%d %H:%M:%S" +DEFAULT_START = "2017-10-03T00:00" +DEFAULT_END = "2017-10-04T00:00" + +def to_epoch(ts_str): + if ts_str is None: + return None + if isinstance(ts_str, (int, float)): + return int(ts_str) + if "T" in ts_str: + dt = datetime.fromisoformat(ts_str) + else: + dt = datetime.strptime(ts_str, DATE_FORMAT_STR) + return int(dt.timestamp()) + +def parse_timestamp(val): + """ + Convert Philly job log timestamps to datetime. + Handles integers (epoch) and strings with PST/PDT. + Returns datetime or None. + """ + if val is None or val == "None": + return None + if isinstance(val, (int, float)): + return datetime.fromtimestamp(int(val), tz=timezone.utc).replace(tzinfo=None) + if isinstance(val, str): + val = val.replace(" PST", "").replace(" PDT", "") + try: + return datetime.strptime(val, DATE_FORMAT_STR).replace(tzinfo=None) + except ValueError: + return None + return None + +def load_gpu_traces_by_dayXX(gpu_trace_dir, machine_ids, job_start_dt, job_end_dt): + """ + Load GPU utilization for specific machines and time range, + using preprocessed per-day CSVs (gpu_by_day/). + """ + dfs = [] + current = job_start_dt.date() + while current <= job_end_dt.date(): + day_file = os.path.join(gpu_trace_dir, f"{current}.csv") + if os.path.exists(day_file): + df = pd.read_csv( + day_file, + names=["time", "machineId", "gpu_util"], + parse_dates=["time"], + on_bad_lines="skip" + ) + df = df[df["machineId"].isin(machine_ids)] + df = df[(df["time"] >= job_start_dt) & (df["time"] <= job_end_dt)] + if not df.empty: + dfs.append(df) + current += timedelta(days=1) + + if dfs: + return pd.concat(dfs, ignore_index=True) + return pd.DataFrame(columns=["time", "machineId", "gpu_util"]) + + +def load_gpu_traces_by_day(trace_dir, start_dt, end_dt): + """Load GPU traces only for the days between start_dt and end_dt.""" + gpu_dir = os.path.join(trace_dir, "dist/gpu_by_day") + frames = [] + + current = start_dt.date() + while current <= end_dt.date(): + daily_file = os.path.join(gpu_dir, f"{current}.csv") + if os.path.exists(daily_file): + df = pd.read_csv( + daily_file, + names=["time", "machineId", "gpu_util"], # no header in daily CSVs + parse_dates=["time"] + ) + frames.append(df) + else: + print(f"⚠ No trace file for {current}") + current += timedelta(days=1) + + if not frames: + return pd.DataFrame(columns=["time", "machineId", "gpu_util"]) + + return pd.concat(frames, ignore_index=True) def parse_date(s): if not s or s == "None": @@ -28,8 +111,12 @@ def load_data(files, **kwargs): Returns: list[Job] """ + # extract --start from kwargs + start_ts = to_epoch(kwargs.get("start", DEFAULT_START)) + end_ts = to_epoch(kwargs.get("end", DEFAULT_END)) assert len(files) == 1, "Expecting a single directory path" trace_dir = files[0] + gpu_trace_dir = os.path.join(files[0], "dist", "gpu_by_day") config = kwargs.get('config') gpus_per_node = config.get("GPUS_PER_NODE") if gpus_per_node is None: @@ -62,46 +149,36 @@ def load_data(files, **kwargs): # --- 2. CPU util --- cpu_file = os.path.join(trace_dir, "cluster_cpu_util") cpu_util = pd.read_csv(cpu_file) - # cpu_util has columns: time, machine_id, cpu_util cpu_util["time"] = cpu_util["time"].str.replace(" PST","").str.replace(" PDT","") - #cpu_util["time"] = pd.to_datetime(cpu_util["time"], format="%Y-%m-%d %H:%M:%S") - # now cpu_util has: time (datetime), machine_id, cpu_util cpu_util["time"] = cpu_util["time"].apply(parse_date) # --- 3. GPU util --- - gpu_file = os.path.join(trace_dir, "cluster_gpu_util") + start_dt = datetime.fromtimestamp(start_ts, tz=timezone.utc) + end_dt = datetime.fromtimestamp(end_ts, tz=timezone.utc) - with warnings.catch_warnings(record=True) as wlist: - gpu_util = pd.read_csv( - gpu_file, - engine="python", - on_bad_lines="skip" - ) - if wlist: - warnings.warn( - f"cluster_gpu_util: skipped {len(wlist)} malformed lines while reading {gpu_file}", - UserWarning - ) - - # Convert time to datetime - gpu_util["time"] = pd.to_datetime(gpu_util["time"], errors="coerce").dt.tz_localize(None) - - # Identify GPU columns - gpu_cols = [c for c in gpu_util.columns if c.startswith("gpu")] + gpu_trace_dir = os.path.join(trace_dir, "dist", "gpu_by_day") - # Collapse per row: sum all GPU utilizations and divide by 100 - gpu_util["gpu_util"] = gpu_util[gpu_cols].sum(axis=1) / 100.0 - - # Keep only collapsed util plus metadata - gpu_util = gpu_util[["time", "machineId", "gpu_util"]] - - #print("Sample GPU util after preprocess:", gpu_util.head()) + # For each job: + gpu_trace = load_gpu_traces_by_day(gpu_trace_dir, start_dt, end_dt) + job_gpu = load_gpu_traces_by_day(gpu_trace_dir, start_dt, end_dt) # --- 4. Job log --- job_file = os.path.join(trace_dir, "cluster_job_log") with open(job_file) as f: job_log = json.load(f) + # Filter job_log to only jobs matching the partition's gpus_per_node + if gpus_per_node is not None: + filtered_log = [] + for raw in job_log: + attempts = raw.get("attempts", []) + if attempts and "detail" in attempts[0]: + # Count GPUs from the first detail + gpus = sum(len(detail.get("gpus", [])) for detail in attempts[0]["detail"]) + if gpus > 0 and (gpus % gpus_per_node == 0): + filtered_log.append(raw) + job_log = filtered_log + # --- First pass: find earliest submit time --- start_ts = None for raw in job_log: @@ -176,25 +253,19 @@ def load_data(files, **kwargs): if len(machine_ids) > 1: job_cpu = job_cpu.groupby("time")["cpu_util"].mean().reset_index() - #print("Job", jobid) - #print("machine_ids from job:", machine_ids[:5]) - #print("gpu_util machineId sample:", gpu_util["machineId"].unique()[:5]) - #print("start, end:", start, end) - #print("gpu_util time range:", gpu_util["time"].min(), gpu_util["time"].max()) + # --- absolute datetimes (used for filtering traces) --- + submitted_dt = parse_timestamp(raw.get("submitted_time")) - # GPU utilization traces - job_gpu = None - if machine_ids and start and end: - mask = ( - gpu_util["machineId"].isin(machine_ids) & - (gpu_util["time"] >= start) & - (gpu_util["time"] <= end) - ) - job_gpu = gpu_util.loc[mask].copy() + mask = ( + (gpu_trace["machineId"].isin(machine_ids)) & + (gpu_trace["time"] >= start_dt) & + (gpu_trace["time"] <= end_dt) + ) + job_gpu = gpu_trace.loc[mask].copy() - # Aggregate across machines if >1 machine - if len(machine_ids) > 1: - job_gpu = job_gpu.groupby("time")["gpu_util"].sum().reset_index() + print(f" job_gpu shape after filtering: {job_gpu.shape}") + if job_gpu.empty: + print(" ⚠ No GPU rows matched this job") if machine_ids: # Shift times relative to start_ts @@ -229,7 +300,7 @@ def load_data(files, **kwargs): end_state=status, scheduled_nodes=scheduled_nodes, - cpu_trace=job_cpu, + cpu_trace=0, gpu_trace=job_gpu, ntx_trace=None, nrx_trace=None, @@ -252,7 +323,8 @@ def load_data(files, **kwargs): print(job) # Find max end timestamp across jobs - end_ts = max(j.end_time for j in jobs_list if j.end_time is not None) + #end_ts = max(j.end_time for j in jobs_list if j.end_time is not None) + end_ts = 3600 return WorkloadData( jobs=jobs_list, -- GitLab From abb0ed312cc2ca8fe0fcb6bf4f54ce6c632cb158 Mon Sep 17 00:00:00 2001 From: Wes Brewer Date: Mon, 29 Sep 2025 02:53:16 +0300 Subject: [PATCH 325/388] Get gpu trace working but exit after first job - still lots of work to do --- README.md | 2 +- raps/dataloaders/philly.py | 86 +++++++++++++++----------------------- 2 files changed, 34 insertions(+), 54 deletions(-) diff --git a/README.md b/README.md index 92c68e0..3684594 100644 --- a/README.md +++ b/README.md @@ -83,7 +83,7 @@ Microsoft Azure - 2017 Philly Traces python main.py run-parts -x philly -w multitenant # Telemetry replay - python main.py run-parts -x philly -f /opt/data/philly/trace-data + python main.py run-parts -x philly -f /opt/data/philly/trace-data --start 2017-10-03T00:00 --end 2017-10-04T00:00 For Lumi diff --git a/raps/dataloaders/philly.py b/raps/dataloaders/philly.py index aa1a8ae..8747e16 100644 --- a/raps/dataloaders/philly.py +++ b/raps/dataloaders/philly.py @@ -43,41 +43,14 @@ def parse_timestamp(val): return None return None -def load_gpu_traces_by_dayXX(gpu_trace_dir, machine_ids, job_start_dt, job_end_dt): - """ - Load GPU utilization for specific machines and time range, - using preprocessed per-day CSVs (gpu_by_day/). - """ - dfs = [] - current = job_start_dt.date() - while current <= job_end_dt.date(): - day_file = os.path.join(gpu_trace_dir, f"{current}.csv") - if os.path.exists(day_file): - df = pd.read_csv( - day_file, - names=["time", "machineId", "gpu_util"], - parse_dates=["time"], - on_bad_lines="skip" - ) - df = df[df["machineId"].isin(machine_ids)] - df = df[(df["time"] >= job_start_dt) & (df["time"] <= job_end_dt)] - if not df.empty: - dfs.append(df) - current += timedelta(days=1) - - if dfs: - return pd.concat(dfs, ignore_index=True) - return pd.DataFrame(columns=["time", "machineId", "gpu_util"]) - - def load_gpu_traces_by_day(trace_dir, start_dt, end_dt): """Load GPU traces only for the days between start_dt and end_dt.""" - gpu_dir = os.path.join(trace_dir, "dist/gpu_by_day") frames = [] current = start_dt.date() + while current <= end_dt.date(): - daily_file = os.path.join(gpu_dir, f"{current}.csv") + daily_file = os.path.join(trace_dir, f"{current}.csv") if os.path.exists(daily_file): df = pd.read_csv( daily_file, @@ -114,6 +87,7 @@ def load_data(files, **kwargs): # extract --start from kwargs start_ts = to_epoch(kwargs.get("start", DEFAULT_START)) end_ts = to_epoch(kwargs.get("end", DEFAULT_END)) + assert len(files) == 1, "Expecting a single directory path" trace_dir = files[0] gpu_trace_dir = os.path.join(files[0], "dist", "gpu_by_day") @@ -153,15 +127,12 @@ def load_data(files, **kwargs): cpu_util["time"] = cpu_util["time"].apply(parse_date) # --- 3. GPU util --- - start_dt = datetime.fromtimestamp(start_ts, tz=timezone.utc) - end_dt = datetime.fromtimestamp(end_ts, tz=timezone.utc) + PDT = timezone(timedelta(hours=-7)) + start_dt = datetime.fromtimestamp(start_ts, tz=PDT) + end_dt = datetime.fromtimestamp(end_ts, tz=PDT) gpu_trace_dir = os.path.join(trace_dir, "dist", "gpu_by_day") - # For each job: - gpu_trace = load_gpu_traces_by_day(gpu_trace_dir, start_dt, end_dt) - job_gpu = load_gpu_traces_by_day(gpu_trace_dir, start_dt, end_dt) - # --- 4. Job log --- job_file = os.path.join(trace_dir, "cluster_job_log") with open(job_file) as f: @@ -241,31 +212,38 @@ def load_data(files, **kwargs): gpus += len(detail.get("gpus", [])) # CPU utilization traces - if machine_ids and start and end: - mask = ( - cpu_util["machine_id"].isin(machine_ids) & - (cpu_util["time"] >= start) & - (cpu_util["time"] <= end) - ) - job_cpu = cpu_util.loc[mask].copy() - - # Aggregate across machines if >1 machine - if len(machine_ids) > 1: - job_cpu = job_cpu.groupby("time")["cpu_util"].mean().reset_index() + #if machine_ids and start and end: + # mask = ( + # cpu_util["machine_id"].isin(machine_ids) & + # (cpu_util["time"] >= start) & + # (cpu_util["time"] <= end) + # ) + # job_cpu = cpu_util.loc[mask].copy() + # + # # Aggregate across machines if >1 machine + # if len(machine_ids) > 1: + # job_cpu = job_cpu.groupby("time")["cpu_util"].mean().reset_index() # --- absolute datetimes (used for filtering traces) --- submitted_dt = parse_timestamp(raw.get("submitted_time")) + print("***", machine_ids, start, end) + + gpu_trace = load_gpu_traces_by_day(gpu_trace_dir, start, end) + mask = ( (gpu_trace["machineId"].isin(machine_ids)) & - (gpu_trace["time"] >= start_dt) & - (gpu_trace["time"] <= end_dt) + (gpu_trace["time"] >= start) & + (gpu_trace["time"] <= end) ) - job_gpu = gpu_trace.loc[mask].copy() + #job_gpu = gpu_trace.loc[mask].copy() + #job_gpu_series = job_gpu["gpu_util"].tolist() + #job_gpu_series = (job_gpu["gpu_util"].to_numpy() * 0.01).tolist() + job_gpu_trace = (gpu_trace.loc[mask, "gpu_util"].to_numpy() * 0.01).tolist() - print(f" job_gpu shape after filtering: {job_gpu.shape}") - if job_gpu.empty: - print(" ⚠ No GPU rows matched this job") + #print(f" job_gpu shape after filtering: {job_gpu_trace.shape}") + #if job_gpu_trace.empty: + # print(" ⚠ No GPU rows matched this job") if machine_ids: # Shift times relative to start_ts @@ -301,7 +279,7 @@ def load_data(files, **kwargs): scheduled_nodes=scheduled_nodes, cpu_trace=0, - gpu_trace=job_gpu, + gpu_trace=job_gpu_trace, ntx_trace=None, nrx_trace=None, @@ -321,6 +299,8 @@ def load_data(files, **kwargs): jobs_list.append(Job(job)) print(job) + + exit() # Find max end timestamp across jobs #end_ts = max(j.end_time for j in jobs_list if j.end_time is not None) -- GitLab From 89a03fe92fd80c59f54de480ca249cb0e2334024 Mon Sep 17 00:00:00 2001 From: Wes Brewer Date: Mon, 29 Sep 2025 02:57:55 +0300 Subject: [PATCH 326/388] Remove the job_log[:1000] slice to use full job_log --- raps/dataloaders/philly.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/raps/dataloaders/philly.py b/raps/dataloaders/philly.py index 8747e16..8d6f1fe 100644 --- a/raps/dataloaders/philly.py +++ b/raps/dataloaders/philly.py @@ -171,7 +171,7 @@ def load_data(files, **kwargs): # --- Second pass: build jobs --- jobs_list = [] - for raw in tqdm(job_log[:1000], desc="Building Job objects"): + for raw in tqdm(job_log, desc="Building Job objects"): jobid = raw.get("jobid") user = raw.get("user") status = raw.get("status") -- GitLab From 273cd768c0f274002f0bec1c0e389f2b019993e7 Mon Sep 17 00:00:00 2001 From: Wes Brewer Date: Tue, 30 Sep 2025 00:40:51 +0300 Subject: [PATCH 327/388] Get gpu utilization fully working correctly --- raps/dataloaders/philly.py | 19 ++++++------------- 1 file changed, 6 insertions(+), 13 deletions(-) diff --git a/raps/dataloaders/philly.py b/raps/dataloaders/philly.py index 8d6f1fe..473f426 100644 --- a/raps/dataloaders/philly.py +++ b/raps/dataloaders/philly.py @@ -210,6 +210,9 @@ def load_data(files, **kwargs): mid = detail["ip"] machine_ids.append(mid) gpus += len(detail.get("gpus", [])) + + num_nodes = len(machine_ids) + gpus_per_node = gpus // num_nodes # CPU utilization traces #if machine_ids and start and end: @@ -227,8 +230,6 @@ def load_data(files, **kwargs): # --- absolute datetimes (used for filtering traces) --- submitted_dt = parse_timestamp(raw.get("submitted_time")) - print("***", machine_ids, start, end) - gpu_trace = load_gpu_traces_by_day(gpu_trace_dir, start, end) mask = ( @@ -236,14 +237,8 @@ def load_data(files, **kwargs): (gpu_trace["time"] >= start) & (gpu_trace["time"] <= end) ) - #job_gpu = gpu_trace.loc[mask].copy() - #job_gpu_series = job_gpu["gpu_util"].tolist() - #job_gpu_series = (job_gpu["gpu_util"].to_numpy() * 0.01).tolist() - job_gpu_trace = (gpu_trace.loc[mask, "gpu_util"].to_numpy() * 0.01).tolist() - - #print(f" job_gpu shape after filtering: {job_gpu_trace.shape}") - #if job_gpu_trace.empty: - # print(" ⚠ No GPU rows matched this job") + # Convert traces from percent to fraction of gpus_per_node, e.g., 8 gpus at 100% is 8, at 50% is 4, etc. + job_gpu_trace = (gpu_trace.loc[mask, "gpu_util"].to_numpy() * 0.01 * gpus_per_node).tolist() if machine_ids: # Shift times relative to start_ts @@ -271,9 +266,7 @@ def load_data(files, **kwargs): priority=0, cpu_cores_required=0, - gpu_units_required=gpus, - allocated_cpu_cores=0, - allocated_gpu_units=gpus, + gpu_units_required=gpus_per_node, end_state=status, scheduled_nodes=scheduled_nodes, -- GitLab From ad076e7a65147c334494ee5f8277c1269067dbc2 Mon Sep 17 00:00:00 2001 From: Wes Brewer Date: Tue, 30 Sep 2025 01:52:56 +0300 Subject: [PATCH 328/388] Get cpu traces also working (currently just single job test case) --- raps/dataloaders/philly.py | 110 +++++++++++++++++++++++++++++-------- 1 file changed, 88 insertions(+), 22 deletions(-) diff --git a/raps/dataloaders/philly.py b/raps/dataloaders/philly.py index 473f426..d8b065e 100644 --- a/raps/dataloaders/philly.py +++ b/raps/dataloaders/philly.py @@ -43,10 +43,9 @@ def parse_timestamp(val): return None return None -def load_gpu_traces_by_day(trace_dir, start_dt, end_dt): - """Load GPU traces only for the days between start_dt and end_dt.""" +def load_traces_by_day(trace_dir, start_dt, end_dt, colname): + """Load CPU or GPU traces between start_dt and end_dt.""" frames = [] - current = start_dt.date() while current <= end_dt.date(): @@ -54,19 +53,81 @@ def load_gpu_traces_by_day(trace_dir, start_dt, end_dt): if os.path.exists(daily_file): df = pd.read_csv( daily_file, - names=["time", "machineId", "gpu_util"], # no header in daily CSVs - parse_dates=["time"] + names=["time", "machineId", colname], # no header in daily CSVs + dtype={"machineId": str, colname: str}, # avoid DtypeWarning ) + + # Normalize time column (strip PST/PDT, parse datetime) + df["time"] = df["time"].str.replace(" PST", "").str.replace(" PDT", "") + df["time"] = pd.to_datetime(df["time"], errors="coerce", format=DATE_FORMAT_STR) + + # Convert util column to numeric (NA/invalid → NaN) + df[colname] = pd.to_numeric(df[colname], errors="coerce") + frames.append(df) else: print(f"⚠ No trace file for {current}") current += timedelta(days=1) if not frames: - return pd.DataFrame(columns=["time", "machineId", "gpu_util"]) + return pd.DataFrame(columns=["time", "machineId", colname]) return pd.concat(frames, ignore_index=True) +#def load_traces_by_day(trace_dir, start_dt, end_dt, colname): +# """Load CPU or GPU traces between start_dt and end_dt. +# +# Args: +# trace_dir (str): Directory containing daily CSV files. +# start_dt (datetime): Start datetime. +# end_dt (datetime): End datetime. +# colname (str): Name of the utilization column (e.g., 'cpu_util' or 'gpu_util'). +# """ +# frames = [] +# current = start_dt.date() +# +# while current <= end_dt.date(): +# daily_file = os.path.join(trace_dir, f"{current}.csv") +# if os.path.exists(daily_file): +# df = pd.read_csv( +# daily_file, +# names=["time", "machineId", colname], # no header in daily CSVs +# parse_dates=["time"] +# ) +# frames.append(df) +# else: +# print(f"⚠ No trace file for {current}") +# current += timedelta(days=1) +# +# if not frames: +# return pd.DataFrame(columns=["time", "machineId", colname]) +# +# return pd.concat(frames, ignore_index=True) + +#def load_gpu_traces_by_day(trace_dir, start_dt, end_dt): +# """Load GPU traces only for the days between start_dt and end_dt.""" +# frames = [] +# +# current = start_dt.date() +# +# while current <= end_dt.date(): +# daily_file = os.path.join(trace_dir, f"{current}.csv") +# if os.path.exists(daily_file): +# df = pd.read_csv( +# daily_file, +# names=["time", "machineId", "gpu_util"], # no header in daily CSVs +# parse_dates=["time"] +# ) +# frames.append(df) +# else: +# print(f"⚠ No trace file for {current}") +# current += timedelta(days=1) +# +# if not frames: +# return pd.DataFrame(columns=["time", "machineId", "gpu_util"]) +# +# return pd.concat(frames, ignore_index=True) + def parse_date(s): if not s or s == "None": return None @@ -131,6 +192,7 @@ def load_data(files, **kwargs): start_dt = datetime.fromtimestamp(start_ts, tz=PDT) end_dt = datetime.fromtimestamp(end_ts, tz=PDT) + cpu_trace_dir = os.path.join(trace_dir, "dist", "cpu_by_day") gpu_trace_dir = os.path.join(trace_dir, "dist", "gpu_by_day") # --- 4. Job log --- @@ -214,23 +276,27 @@ def load_data(files, **kwargs): num_nodes = len(machine_ids) gpus_per_node = gpus // num_nodes - # CPU utilization traces - #if machine_ids and start and end: - # mask = ( - # cpu_util["machine_id"].isin(machine_ids) & - # (cpu_util["time"] >= start) & - # (cpu_util["time"] <= end) - # ) - # job_cpu = cpu_util.loc[mask].copy() - # - # # Aggregate across machines if >1 machine - # if len(machine_ids) > 1: - # job_cpu = job_cpu.groupby("time")["cpu_util"].mean().reset_index() - # --- absolute datetimes (used for filtering traces) --- submitted_dt = parse_timestamp(raw.get("submitted_time")) - gpu_trace = load_gpu_traces_by_day(gpu_trace_dir, start, end) + # CPU utilization traces + cpu_trace = load_traces_by_day(cpu_trace_dir, start, end, "cpu_util") + + mask = ( + (cpu_trace["machineId"].isin(machine_ids)) & + (cpu_trace["time"] >= start) & + (cpu_trace["time"] <= end) + ) + job_cpu = cpu_trace.loc[mask].copy() + + # Aggregate across machines if >1 machine + if len(machine_ids) > 1: + job_cpu = job_cpu.groupby("time")["cpu_util"].mean().reset_index() + + job_cpu_trace = (job_cpu["cpu_util"].to_numpy() * 0.01).tolist() + + # Extract GPU utilization traces + gpu_trace = load_traces_by_day(gpu_trace_dir, start, end, "gpu_util") mask = ( (gpu_trace["machineId"].isin(machine_ids)) & @@ -265,13 +331,13 @@ def load_data(files, **kwargs): partition=partition_id, priority=0, - cpu_cores_required=0, + cpu_cores_required=1, gpu_units_required=gpus_per_node, end_state=status, scheduled_nodes=scheduled_nodes, - cpu_trace=0, + cpu_trace=job_cpu_trace, gpu_trace=job_gpu_trace, ntx_trace=None, nrx_trace=None, -- GitLab From a83a5aaa287e327adcb47226da4775c41ad11f54 Mon Sep 17 00:00:00 2001 From: Wes Brewer Date: Tue, 30 Sep 2025 02:22:18 +0300 Subject: [PATCH 329/388] A few more fixes and cleanups to try to get working for a single day --- raps/dataloaders/philly.py | 85 +++++++++++--------------------------- 1 file changed, 23 insertions(+), 62 deletions(-) diff --git a/raps/dataloaders/philly.py b/raps/dataloaders/philly.py index d8b065e..840f566 100644 --- a/raps/dataloaders/philly.py +++ b/raps/dataloaders/philly.py @@ -74,60 +74,6 @@ def load_traces_by_day(trace_dir, start_dt, end_dt, colname): return pd.concat(frames, ignore_index=True) -#def load_traces_by_day(trace_dir, start_dt, end_dt, colname): -# """Load CPU or GPU traces between start_dt and end_dt. -# -# Args: -# trace_dir (str): Directory containing daily CSV files. -# start_dt (datetime): Start datetime. -# end_dt (datetime): End datetime. -# colname (str): Name of the utilization column (e.g., 'cpu_util' or 'gpu_util'). -# """ -# frames = [] -# current = start_dt.date() -# -# while current <= end_dt.date(): -# daily_file = os.path.join(trace_dir, f"{current}.csv") -# if os.path.exists(daily_file): -# df = pd.read_csv( -# daily_file, -# names=["time", "machineId", colname], # no header in daily CSVs -# parse_dates=["time"] -# ) -# frames.append(df) -# else: -# print(f"⚠ No trace file for {current}") -# current += timedelta(days=1) -# -# if not frames: -# return pd.DataFrame(columns=["time", "machineId", colname]) -# -# return pd.concat(frames, ignore_index=True) - -#def load_gpu_traces_by_day(trace_dir, start_dt, end_dt): -# """Load GPU traces only for the days between start_dt and end_dt.""" -# frames = [] -# -# current = start_dt.date() -# -# while current <= end_dt.date(): -# daily_file = os.path.join(trace_dir, f"{current}.csv") -# if os.path.exists(daily_file): -# df = pd.read_csv( -# daily_file, -# names=["time", "machineId", "gpu_util"], # no header in daily CSVs -# parse_dates=["time"] -# ) -# frames.append(df) -# else: -# print(f"⚠ No trace file for {current}") -# current += timedelta(days=1) -# -# if not frames: -# return pd.DataFrame(columns=["time", "machineId", "gpu_util"]) -# -# return pd.concat(frames, ignore_index=True) - def parse_date(s): if not s or s == "None": return None @@ -188,9 +134,11 @@ def load_data(files, **kwargs): cpu_util["time"] = cpu_util["time"].apply(parse_date) # --- 3. GPU util --- - PDT = timezone(timedelta(hours=-7)) - start_dt = datetime.fromtimestamp(start_ts, tz=PDT) - end_dt = datetime.fromtimestamp(end_ts, tz=PDT) + #PDT = timezone(timedelta(hours=-7)) + #start_dt = datetime.fromtimestamp(start_ts, tz=PDT) + #end_dt = datetime.fromtimestamp(end_ts, tz=PDT) + start_dt = datetime.fromtimestamp(start_ts) # naive datetime + end_dt = datetime.fromtimestamp(end_ts) cpu_trace_dir = os.path.join(trace_dir, "dist", "cpu_by_day") gpu_trace_dir = os.path.join(trace_dir, "dist", "gpu_by_day") @@ -279,8 +227,15 @@ def load_data(files, **kwargs): # --- absolute datetimes (used for filtering traces) --- submitted_dt = parse_timestamp(raw.get("submitted_time")) + # Clamp to global CLI window - this should be fixed later to include the actual + # trace start and end times (trace_start_time? and trace_end_time?) + #job_start = max(start, start_dt) if start else start_dt + #job_end = min(end, end_dt) if end else end_dt + job_start = start + job_end = end + # CPU utilization traces - cpu_trace = load_traces_by_day(cpu_trace_dir, start, end, "cpu_util") + cpu_trace = load_traces_by_day(cpu_trace_dir, job_start, job_end, "cpu_util") mask = ( (cpu_trace["machineId"].isin(machine_ids)) & @@ -293,10 +248,11 @@ def load_data(files, **kwargs): if len(machine_ids) > 1: job_cpu = job_cpu.groupby("time")["cpu_util"].mean().reset_index() + # Convert from percentage to fraction job_cpu_trace = (job_cpu["cpu_util"].to_numpy() * 0.01).tolist() # Extract GPU utilization traces - gpu_trace = load_traces_by_day(gpu_trace_dir, start, end, "gpu_util") + gpu_trace = load_traces_by_day(gpu_trace_dir, job_start, job_end, "gpu_util") mask = ( (gpu_trace["machineId"].isin(machine_ids)) & @@ -304,7 +260,14 @@ def load_data(files, **kwargs): (gpu_trace["time"] <= end) ) # Convert traces from percent to fraction of gpus_per_node, e.g., 8 gpus at 100% is 8, at 50% is 4, etc. - job_gpu_trace = (gpu_trace.loc[mask, "gpu_util"].to_numpy() * 0.01 * gpus_per_node).tolist() + job_gpu = gpu_trace.loc[mask].copy() + + # Aggregate across machines if >1 machine + if len(machine_ids) > 1: + job_gpu = job_gpu.groupby("time")["gpu_util"].mean().reset_index() + + job_gpu_trace = (job_gpu["gpu_util"].to_numpy() * 0.01 * gpus_per_node).tolist() + if machine_ids: # Shift times relative to start_ts @@ -359,8 +322,6 @@ def load_data(files, **kwargs): print(job) - exit() - # Find max end timestamp across jobs #end_ts = max(j.end_time for j in jobs_list if j.end_time is not None) end_ts = 3600 -- GitLab From f709929b5a8ce9805b755d42cc7e4a385f4ac9b8 Mon Sep 17 00:00:00 2001 From: Wes Brewer Date: Tue, 30 Sep 2025 02:25:15 +0300 Subject: [PATCH 330/388] Add scripts/parse_philly_traces.py --- scripts/parse_philly_traces.py | 63 ++++++++++++++++++++++++++++++++++ 1 file changed, 63 insertions(+) create mode 100644 scripts/parse_philly_traces.py diff --git a/scripts/parse_philly_traces.py b/scripts/parse_philly_traces.py new file mode 100644 index 0000000..5693e4e --- /dev/null +++ b/scripts/parse_philly_traces.py @@ -0,0 +1,63 @@ +import os +import sys +from datetime import datetime +from tqdm import tqdm + +if len(sys.argv) < 2: + print("Usage: python parse_by_day.py ") + sys.exit(1) + +input_file = sys.argv[1] + +with open(input_file) as f: + total_lines = sum(1 for _ in f) - 1 + +with open(input_file) as f: + header = f.readline().strip().split(",") + print("Header:", header) + + # detect file type from header + is_cpu = "cpu_util" in [h.lower() for h in header] + + # pick output dir name based on file type + output_dir = "cpu_by_day" if is_cpu else "gpu_by_day" + os.makedirs(output_dir, exist_ok=True) + + #for i, line in enumerate(f, 1): + for line in tqdm(f, total=total_lines, desc="Processing lines"): + parts = line.strip().split(",") + + if len(parts) < 3: + continue + + raw_time = parts[0].replace(" PST", "").replace(" PDT", "") + try: + ts = datetime.strptime(raw_time, "%Y-%m-%d %H:%M:%S") + except ValueError: + continue + + machine_id = parts[1] + + if is_cpu: + try: + value = float(parts[2]) + except ValueError: + value = 0.0 + label = "cpu_util" + else: + utils = [] + for v in parts[2:]: + try: + utils.append(float(v)) + except ValueError: + pass + value = sum(utils) / max(1, len([u for u in utils if u > 0])) + label = "gpu_util" + + day_str = ts.strftime("%Y-%m-%d") + out_path = os.path.join(output_dir, f"{day_str}.csv") + + with open(out_path, "a") as out: + if out.tell() == 0: # only write header if file is new + out.write(f"time,machine_id,{label}\n") + out.write(f"{ts},{machine_id},{value:.3f}\n") -- GitLab From 9512db297653fe800b1c1b4af79221569ec81de5 Mon Sep 17 00:00:00 2001 From: Wes Brewer Date: Tue, 30 Sep 2025 23:41:37 +0300 Subject: [PATCH 331/388] Add configs for perlmutter and selene --- config/perlmutter.yaml | 51 ++++++++++++++++++++++++++++++++++++++++++ config/selene.yaml | 51 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 102 insertions(+) create mode 100644 config/perlmutter.yaml create mode 100644 config/selene.yaml diff --git a/config/perlmutter.yaml b/config/perlmutter.yaml new file mode 100644 index 0000000..8863a36 --- /dev/null +++ b/config/perlmutter.yaml @@ -0,0 +1,51 @@ +system: + num_cdus: 36 + racks_per_cdu: 3 + nodes_per_rack: 128 + rectifiers_per_rack: 32 + chassis_per_rack: 8 + nodes_per_blade: 2 + switches_per_chassis: 4 + nics_per_node: 4 + rectifiers_per_chassis: 4 + nodes_per_rectifier: 4 + missing_racks: [] + down_nodes: [] + cpus_per_node: 1 + gpus_per_node: 4 + cpu_peak_flops: 3580000000000.0 + gpu_peak_flops: 9700000000000.0 + cpu_fp_ratio: 0.667 + gpu_fp_ratio: 0.667 +power: + power_gpu_idle: 88 + power_gpu_max: 300 + power_cpu_idle: 90 + power_cpu_max: 280 + power_mem: 74.26 + power_nic: 20 + power_nvme: 30 + power_switch: 250 + power_cdu: 8473.47 + power_update_freq: 15 + rectifier_peak_threshold: 13670 + sivoc_loss_constant: 13 + sivoc_efficiency: 0.98 + rectifier_loss_constant: 17 + rectifier_efficiency: 0.96 + power_cost: 0.094 +scheduler: + seed: 42 + job_arrival_time: 900 + mtbf: 11 + trace_quanta: 15 + min_wall_time: 3600 + max_wall_time: 43200 + ui_update_freq: 900 + max_nodes_per_job: 3000 + job_end_probs: + COMPLETED: 0.63 + FAILED: 0.13 + CANCELLED: 0.12 + TIMEOUT: 0.11 + NODE_FAIL: 0.01 diff --git a/config/selene.yaml b/config/selene.yaml new file mode 100644 index 0000000..8f42bf6 --- /dev/null +++ b/config/selene.yaml @@ -0,0 +1,51 @@ +system: + num_cdus: 20 + racks_per_cdu: 7 + nodes_per_rack: 4 + rectifiers_per_rack: 32 + chassis_per_rack: 4 + nodes_per_blade: 2 + switches_per_chassis: 4 + nics_per_node: 4 + rectifiers_per_chassis: 4 + nodes_per_rectifier: 4 + missing_racks: [] + down_nodes: [] + cpus_per_node: 2 + gpus_per_node: 8 + cpu_peak_flops: 3481000000000.0 + gpu_peak_flops: 624000000000000.0 # BF8 performance + cpu_fp_ratio: 0.667 + gpu_fp_ratio: 0.667 +power: + power_gpu_idle: 88 + power_gpu_max: 400 + power_cpu_idle: 90 + power_cpu_max: 280 + power_mem: 74.26 + power_nic: 20 + power_nvme: 30 + power_switch: 250 + power_cdu: 8473.47 + power_update_freq: 15 + rectifier_peak_threshold: 13670 + sivoc_loss_constant: 13 + sivoc_efficiency: 0.98 + rectifier_loss_constant: 17 + rectifier_efficiency: 0.96 + power_cost: 0.094 +scheduler: + seed: 42 + job_arrival_time: 900 + mtbf: 11 + trace_quanta: 15 + min_wall_time: 3600 + max_wall_time: 43200 + ui_update_freq: 900 + max_nodes_per_job: 3000 + job_end_probs: + COMPLETED: 0.63 + FAILED: 0.13 + CANCELLED: 0.12 + TIMEOUT: 0.11 + NODE_FAIL: 0.01 -- GitLab From 26912590eae6f002c8e69bade78439d264d9226f Mon Sep 17 00:00:00 2001 From: Wes Brewer Date: Wed, 1 Oct 2025 17:58:54 +0300 Subject: [PATCH 332/388] Fix issue with end time of simulation for philly traces --- raps/dataloaders/philly.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/raps/dataloaders/philly.py b/raps/dataloaders/philly.py index 840f566..636d943 100644 --- a/raps/dataloaders/philly.py +++ b/raps/dataloaders/philly.py @@ -321,10 +321,15 @@ def load_data(files, **kwargs): jobs_list.append(Job(job)) print(job) - - # Find max end timestamp across jobs - #end_ts = max(j.end_time for j in jobs_list if j.end_time is not None) - end_ts = 3600 + +# if len(jobs_list) >= 5: +# break + + # Find max end timestamp across jobs, relative to first job + end_ts = max(j.end_time for j in jobs_list if j.end_time is not None) + + # Absolute end_ts + end_ts = start_ts + end_ts return WorkloadData( jobs=jobs_list, -- GitLab From 02506f01196d9e037dccda665c45f0b7f3a6a969 Mon Sep 17 00:00:00 2001 From: Wes Brewer Date: Wed, 1 Oct 2025 19:43:02 +0300 Subject: [PATCH 333/388] Got philly working and scheduling on both partitions, but only by setting start_time to 0 --- raps/dataloaders/philly.py | 33 +++++++++++++++++++++------------ 1 file changed, 21 insertions(+), 12 deletions(-) diff --git a/raps/dataloaders/philly.py b/raps/dataloaders/philly.py index 636d943..247d967 100644 --- a/raps/dataloaders/philly.py +++ b/raps/dataloaders/philly.py @@ -274,6 +274,9 @@ def load_data(files, **kwargs): submit_time = submitted.timestamp() - start_ts if submitted else None start_time = start.timestamp() - start_ts if start else None end_time = end.timestamp() - start_ts if end else None + #submit_time = submitted.timestamp() + #start_time = start.timestamp() + #end_time = end.timestamp() if not submit_time or not start_time or not end_time: warnings.warn( @@ -305,31 +308,37 @@ def load_data(files, **kwargs): ntx_trace=None, nrx_trace=None, - submit_time=submit_time, - start_time=start_time, + submit_time=0, #submit_time, + start_time=0, #start_time, end_time=end_time, - time_limit=0, + time_limit=end_time, #0, expected_run_time=wall_time if wall_time else 0, current_run_time=0, trace_time=None, - trace_start_time=None, - trace_end_time=None, - trace_quanta=None, + trace_start_time=0, #None, + trace_end_time=end_time, #None, + trace_quanta=60, trace_missing_values=False, downscale=1 ) - jobs_list.append(Job(job)) + if job_cpu_trace and job_gpu_trace: + jobs_list.append(Job(job)) - print(job) + #print(job) + print(start_ts, job["start_time"], job["end_time"]) -# if len(jobs_list) >= 5: -# break + if len(jobs_list) >= 20: + break # Find max end timestamp across jobs, relative to first job - end_ts = max(j.end_time for j in jobs_list if j.end_time is not None) + #start_ts = min(j.start_time for j in jobs_list if j.start_time is not None) + #end_ts = max(j.end_time for j in jobs_list if j.end_time is not None) # Absolute end_ts - end_ts = start_ts + end_ts + #end_ts = start_ts + end_ts + end_ts = start_ts + 43200 + + print("***", start_ts, end_ts) return WorkloadData( jobs=jobs_list, -- GitLab From 80b299962648ea65569f5b0f0dddf059ea24620e Mon Sep 17 00:00:00 2001 From: Wes Brewer Date: Wed, 1 Oct 2025 20:58:26 +0300 Subject: [PATCH 334/388] Get it fully working now with proper start times (note manually implemented 2baf2b1b here) --- raps/dataloaders/philly.py | 62 ++++++++++++++++++++-------------- raps/engine.py | 69 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 106 insertions(+), 25 deletions(-) diff --git a/raps/dataloaders/philly.py b/raps/dataloaders/philly.py index 247d967..a0cc2af 100644 --- a/raps/dataloaders/philly.py +++ b/raps/dataloaders/philly.py @@ -134,10 +134,7 @@ def load_data(files, **kwargs): cpu_util["time"] = cpu_util["time"].apply(parse_date) # --- 3. GPU util --- - #PDT = timezone(timedelta(hours=-7)) - #start_dt = datetime.fromtimestamp(start_ts, tz=PDT) - #end_dt = datetime.fromtimestamp(end_ts, tz=PDT) - start_dt = datetime.fromtimestamp(start_ts) # naive datetime + start_dt = datetime.fromtimestamp(start_ts) end_dt = datetime.fromtimestamp(end_ts) cpu_trace_dir = os.path.join(trace_dir, "dist", "cpu_by_day") @@ -148,6 +145,20 @@ def load_data(files, **kwargs): with open(job_file) as f: job_log = json.load(f) + # --- First pass: filter jobs by date range --- + filtered_log = [] + for raw in job_log: + submitted = raw.get("submitted_time") + if submitted is None or submitted == "None": + continue + if isinstance(submitted, (int, float)): + submitted_dt = datetime.fromtimestamp(int(submitted)) + else: + submitted_dt = parse_date(submitted) + if submitted_dt and start_dt <= submitted_dt <= end_dt: + filtered_log.append(raw) + job_log = filtered_log + # Filter job_log to only jobs matching the partition's gpus_per_node if gpus_per_node is not None: filtered_log = [] @@ -162,6 +173,15 @@ def load_data(files, **kwargs): # --- First pass: find earliest submit time --- start_ts = None + + ### debug + print("num jobs found", len(job_log)) + for job in job_log[:100]: + print(f"Job {job['jobid']}:") + for attempt in job["attempts"]: + print(" Start:", attempt["start_time"]) + ### end debug + for raw in job_log: submitted = raw.get("submitted_time") if submitted is None or submitted == "None": @@ -176,6 +196,9 @@ def load_data(files, **kwargs): if start_ts is None or t < start_ts: start_ts = t + # debug + print(f"Job {job['jobid']}: submit_time {submitted}, start_ts: {start_ts}") + if start_ts is None: raise ValueError("No valid submitted_time found in Philly traces") @@ -222,6 +245,8 @@ def load_data(files, **kwargs): gpus += len(detail.get("gpus", [])) num_nodes = len(machine_ids) + if num_nodes == 0: + continue gpus_per_node = gpus // num_nodes # --- absolute datetimes (used for filtering traces) --- @@ -274,15 +299,13 @@ def load_data(files, **kwargs): submit_time = submitted.timestamp() - start_ts if submitted else None start_time = start.timestamp() - start_ts if start else None end_time = end.timestamp() - start_ts if end else None - #submit_time = submitted.timestamp() - #start_time = start.timestamp() - #end_time = end.timestamp() if not submit_time or not start_time or not end_time: warnings.warn( f"skipped {jobid} b/c missing submit_time, start_time, or end_time", UserWarning ) + continue scheduled_nodes = [node_mapping[mid] for mid in machine_ids if mid in node_mapping] @@ -308,10 +331,11 @@ def load_data(files, **kwargs): ntx_trace=None, nrx_trace=None, - submit_time=0, #submit_time, - start_time=0, #start_time, + submit_time=submit_time, + start_time=start_time, end_time=end_time, - time_limit=end_time, #0, + #time_limit=end_time - start_time, + time_limit=end_time, expected_run_time=wall_time if wall_time else 0, current_run_time=0, trace_time=None, @@ -323,22 +347,10 @@ def load_data(files, **kwargs): ) if job_cpu_trace and job_gpu_trace: jobs_list.append(Job(job)) + else: + tqdm.write(f"skipping {job['id']} b/c either no cpu or gpu trace") - #print(job) - print(start_ts, job["start_time"], job["end_time"]) - - if len(jobs_list) >= 20: - break - - # Find max end timestamp across jobs, relative to first job - #start_ts = min(j.start_time for j in jobs_list if j.start_time is not None) - #end_ts = max(j.end_time for j in jobs_list if j.end_time is not None) - - # Absolute end_ts - #end_ts = start_ts + end_ts - end_ts = start_ts + 43200 - - print("***", start_ts, end_ts) + tqdm.write(f"abs start time: {start_ts} rel job start: {job['start_time']} rel job end: {job['end_time']}") return WorkloadData( jobs=jobs_list, diff --git a/raps/engine.py b/raps/engine.py index 67cd999..6e3ad19 100644 --- a/raps/engine.py +++ b/raps/engine.py @@ -340,6 +340,75 @@ class Engine: start_date=self.start, ) + if sim_config.live and not sim_config.replay: + td = Telemetry(**sim_config_dict) + workload_data = td.load_from_live_system() + elif sim_config.replay: + # TODO: this will have issues if running separate systems or custom systems + partition_short = partition.split("/")[-1] if partition else None + td = Telemetry( + **sim_config_dict, + partition=partition, + ) + if partition: + snap_map = {p.stem: p for p in sim_config.replay[0].glob("*.npz")} + if len(snap_map) > 0: + if partition_short not in snap_map: + raise RuntimeError(f"Snapshot '{partition_short}.npz' not in {sim_config.replay[0]}") + replay_files = [snap_map[partition_short]] + else: + replay_files = sim_config.replay + else: + replay_files = sim_config.replay + + workload_data = td.load_from_files(replay_files) + else: # Synthetic jobs + wl = Workload(sim_config_args, system_config_dict) + workload_data = wl.generate_jobs() + td = Telemetry(**sim_config_dict) + + jobs = workload_data.jobs + + # TODO refactor how stat/end/fastforward/time work + if sim_config.fastforward is not None: + workload_data.telemetry_start = workload_data.telemetry_start + sim_config.fastforward + + if sim_config.time is not None: + workload_data.telemetry_end = workload_data.telemetry_start + sim_config.time + + if sim_config.time_delta is not None: + time_delta = sim_config.time_delta + else: + time_delta = 1 + + if sim_config.continuous_job_generation: + continuous_workload = wl + else: + continuous_workload = None + + accounts = None + if sim_config.accounts: + job_accounts = Accounts(jobs) + if sim_config.accounts_json: + loaded_accounts = Accounts.from_json_filename(sim_config.accounts_json) + accounts = Accounts.merge(loaded_accounts, job_accounts) + else: + accounts = job_accounts + + engine = Engine( + power_manager=power_manager, + flops_manager=flops_manager, + cooling_model=cooling_model, + continuous_workload=continuous_workload, + jobs=jobs, + accounts=accounts, + telemetry=td, + sim_config=sim_config, + system_config=system_config, + ) + + return engine, workload_data, time_delta + def add_running_jobs_to_queue(self, jobs_to_submit: List): """ Modifies jobs_to_submit and self.queue -- GitLab From cdff947d5fc3198a1dc00c887f38979475742b81 Mon Sep 17 00:00:00 2001 From: Wes Brewer Date: Wed, 1 Oct 2025 22:40:32 +0300 Subject: [PATCH 335/388] Add docstring to top of file and work on dataloader performance enhancements --- raps/dataloaders/philly.py | 190 +++++++++++++++++++++++++------------ 1 file changed, 128 insertions(+), 62 deletions(-) diff --git a/raps/dataloaders/philly.py b/raps/dataloaders/philly.py index a0cc2af..990a72f 100644 --- a/raps/dataloaders/philly.py +++ b/raps/dataloaders/philly.py @@ -1,9 +1,75 @@ +""" +Main reference to Philly traces: + + Jeon, Myeongjae, et al. "Analysis of Large-Scale Multi-Tenant GPU clusters for DNN training workloads." + 2019 USENIX Annual Technical Conference (USENIX ATC 19). 2019. + https://www.usenix.org/system/files/atc19-jeon.pdf + +The repository is available here: + + https://github.com/msr-fiddle/philly-traces + +The data portion of the repo can be downloaded using one of the following methods: + + git clone https://github.com/msr-fiddle/philly-traces.git + cd philly-traces + git lfs pull + + wget https://github.com/msr-fiddle/philly-traces/raw/master/trace-data.tar.gz + + curl -L -o trace-data.tar.gz https://github.com/msr-fiddle/philly-traces/raw/master/trace-data.tar.gz + +Once the file is downloaded, assuming its in /opt/data/philly/trace-data directory: + + /opt/data/philly/trace-data/trace-data.tar.gz + + cd /opt/data/philly/trace-data + + run `tar xvfz trace-data.tar.gz` which will unpack the following files: + + cluster_cpu_util 1.5G + cluster_gpu_util 2.8G + cluster_mem_util 2.2G + cluster_job_log 37M + cluster_machine_list 8K + + then run the following: + + python /path/to/raps/scripts/parse_philly_traces.py cluster_cpu_util + python /path/to/raps/scripts/parse_philly_traces.py cluster_gpu_util + + this will parse these two files into two directories, cpu_by_day and gpu_by_day, + creating one file for each day and adding the lines for that day into the files. + + sanity checks: + + wc -l cluster_cpu_util + 45028261 cluster_cpu_util + wc -l cpu_by_day/*.csv + 45350898 total + + wc -l cluster_gpu_util + 44750641 cluster_gpu_util + wc -l gpu_by_day/*.csv + 44750640 total + +Running a replay simulation: + + python main.py run-parts -x philly -f /opt/data/philly/trace-data --start 2017-10-03T00:00 --end 2017-10-04T00:00 + +One the dataloader has been run at least once, it will dump npz files into a directory, so +they can be replayed again without having to go through the expensive extractoin process, using e.g.: + + python main.py run-parts -x philly -f raps-output-5efefa3 + +Note: it is possible to run simulations for an user-defined length of time between 10/3/2017 to 12/15/2017. + +""" import os import glob import json import csv import pandas as pd -import warnings from datetime import datetime, timezone, timedelta from tqdm import tqdm @@ -45,7 +111,7 @@ def parse_timestamp(val): def load_traces_by_day(trace_dir, start_dt, end_dt, colname): """Load CPU or GPU traces between start_dt and end_dt.""" - frames = [] + traces = {} current = start_dt.date() while current <= end_dt.date(): @@ -64,15 +130,15 @@ def load_traces_by_day(trace_dir, start_dt, end_dt, colname): # Convert util column to numeric (NA/invalid → NaN) df[colname] = pd.to_numeric(df[colname], errors="coerce") - frames.append(df) + traces[current] = df else: print(f"⚠ No trace file for {current}") current += timedelta(days=1) - if not frames: - return pd.DataFrame(columns=["time", "machineId", colname]) + if not traces: + return {} - return pd.concat(frames, ignore_index=True) + return traces def parse_date(s): if not s or s == "None": @@ -91,13 +157,16 @@ def load_data(files, **kwargs): Returns: list[Job] """ + debug = kwargs.get("debug") + print("started reading of philly traces... please be patient...", flush=True) + # extract --start from kwargs start_ts = to_epoch(kwargs.get("start", DEFAULT_START)) end_ts = to_epoch(kwargs.get("end", DEFAULT_END)) assert len(files) == 1, "Expecting a single directory path" trace_dir = files[0] - gpu_trace_dir = os.path.join(files[0], "dist", "gpu_by_day") + gpu_trace_dir = os.path.join(files[0], "gpu_by_day") config = kwargs.get('config') gpus_per_node = config.get("GPUS_PER_NODE") if gpus_per_node is None: @@ -127,19 +196,10 @@ def load_data(files, **kwargs): # Assign partition ID (e.g. 0 for 2-GPU, 1 for 8-GPU) partition_id = 0 if gpus_per_node == 2 else 1 - # --- 2. CPU util --- - cpu_file = os.path.join(trace_dir, "cluster_cpu_util") - cpu_util = pd.read_csv(cpu_file) - cpu_util["time"] = cpu_util["time"].str.replace(" PST","").str.replace(" PDT","") - cpu_util["time"] = cpu_util["time"].apply(parse_date) - # --- 3. GPU util --- start_dt = datetime.fromtimestamp(start_ts) end_dt = datetime.fromtimestamp(end_ts) - cpu_trace_dir = os.path.join(trace_dir, "dist", "cpu_by_day") - gpu_trace_dir = os.path.join(trace_dir, "dist", "gpu_by_day") - # --- 4. Job log --- job_file = os.path.join(trace_dir, "cluster_job_log") with open(job_file) as f: @@ -174,14 +234,6 @@ def load_data(files, **kwargs): # --- First pass: find earliest submit time --- start_ts = None - ### debug - print("num jobs found", len(job_log)) - for job in job_log[:100]: - print(f"Job {job['jobid']}:") - for attempt in job["attempts"]: - print(" Start:", attempt["start_time"]) - ### end debug - for raw in job_log: submitted = raw.get("submitted_time") if submitted is None or submitted == "None": @@ -196,12 +248,15 @@ def load_data(files, **kwargs): if start_ts is None or t < start_ts: start_ts = t - # debug - print(f"Job {job['jobid']}: submit_time {submitted}, start_ts: {start_ts}") - if start_ts is None: raise ValueError("No valid submitted_time found in Philly traces") + # --- Pre-load all traces for the given date range --- + cpu_trace_dir = os.path.join(trace_dir, "cpu_by_day") + gpu_trace_dir = os.path.join(trace_dir, "gpu_by_day") + all_cpu_traces = load_traces_by_day(cpu_trace_dir, start_dt, end_dt, "cpu_util") + all_gpu_traces = load_traces_by_day(gpu_trace_dir, start_dt, end_dt, "gpu_util") + # --- Second pass: build jobs --- jobs_list = [] for raw in tqdm(job_log, desc="Building Job objects"): @@ -252,46 +307,59 @@ def load_data(files, **kwargs): # --- absolute datetimes (used for filtering traces) --- submitted_dt = parse_timestamp(raw.get("submitted_time")) - # Clamp to global CLI window - this should be fixed later to include the actual - # trace start and end times (trace_start_time? and trace_end_time?) - #job_start = max(start, start_dt) if start else start_dt - #job_end = min(end, end_dt) if end else end_dt job_start = start job_end = end - # CPU utilization traces - cpu_trace = load_traces_by_day(cpu_trace_dir, job_start, job_end, "cpu_util") + if not job_start or not job_end: + continue - mask = ( - (cpu_trace["machineId"].isin(machine_ids)) & - (cpu_trace["time"] >= start) & - (cpu_trace["time"] <= end) - ) - job_cpu = cpu_trace.loc[mask].copy() + # --- CPU utilization traces --- + cpu_dfs = [] + current_date = job_start.date() + while current_date <= job_end.date(): + if current_date in all_cpu_traces: + cpu_dfs.append(all_cpu_traces[current_date]) + current_date += timedelta(days=1) - # Aggregate across machines if >1 machine - if len(machine_ids) > 1: - job_cpu = job_cpu.groupby("time")["cpu_util"].mean().reset_index() + if not cpu_dfs: + job_cpu_trace = [] + else: + job_cpu_df = pd.concat(cpu_dfs, ignore_index=True) + mask = ( + (job_cpu_df["machineId"].isin(machine_ids)) & + (job_cpu_df["time"] >= start) & + (job_cpu_df["time"] <= end) + ) + job_cpu = job_cpu_df.loc[mask].copy() - # Convert from percentage to fraction - job_cpu_trace = (job_cpu["cpu_util"].to_numpy() * 0.01).tolist() + if len(machine_ids) > 1: + job_cpu = job_cpu.groupby("time")["cpu_util"].mean().reset_index() - # Extract GPU utilization traces - gpu_trace = load_traces_by_day(gpu_trace_dir, job_start, job_end, "gpu_util") + job_cpu_trace = (job_cpu["cpu_util"].to_numpy() * 0.01).tolist() - mask = ( - (gpu_trace["machineId"].isin(machine_ids)) & - (gpu_trace["time"] >= start) & - (gpu_trace["time"] <= end) - ) - # Convert traces from percent to fraction of gpus_per_node, e.g., 8 gpus at 100% is 8, at 50% is 4, etc. - job_gpu = gpu_trace.loc[mask].copy() + # --- GPU utilization traces --- + gpu_dfs = [] + current_date = job_start.date() + while current_date <= job_end.date(): + if current_date in all_gpu_traces: + gpu_dfs.append(all_gpu_traces[current_date]) + current_date += timedelta(days=1) - # Aggregate across machines if >1 machine - if len(machine_ids) > 1: - job_gpu = job_gpu.groupby("time")["gpu_util"].mean().reset_index() + if not gpu_dfs: + job_gpu_trace = [] + else: + job_gpu_df = pd.concat(gpu_dfs, ignore_index=True) + mask = ( + (job_gpu_df["machineId"].isin(machine_ids)) & + (job_gpu_df["time"] >= start) & + (job_gpu_df["time"] <= end) + ) + job_gpu = job_gpu_df.loc[mask].copy() - job_gpu_trace = (job_gpu["gpu_util"].to_numpy() * 0.01 * gpus_per_node).tolist() + if len(machine_ids) > 1: + job_gpu = job_gpu.groupby("time")["gpu_util"].mean().reset_index() + + job_gpu_trace = (job_gpu["gpu_util"].to_numpy() * 0.01 * gpus_per_node).tolist() if machine_ids: @@ -301,10 +369,7 @@ def load_data(files, **kwargs): end_time = end.timestamp() - start_ts if end else None if not submit_time or not start_time or not end_time: - warnings.warn( - f"skipped {jobid} b/c missing submit_time, start_time, or end_time", - UserWarning - ) + tqdm.write(f"skipped {jobid} b/c missing submit_time, start_time, or end_time") continue scheduled_nodes = [node_mapping[mid] for mid in machine_ids if mid in node_mapping] @@ -350,7 +415,8 @@ def load_data(files, **kwargs): else: tqdm.write(f"skipping {job['id']} b/c either no cpu or gpu trace") - tqdm.write(f"abs start time: {start_ts} rel job start: {job['start_time']} rel job end: {job['end_time']}") + if debug: + tqdm.write(f"abs start time: {start_ts} rel job start: {job['start_time']} rel job end: {job['end_time']}") return WorkloadData( jobs=jobs_list, -- GitLab From f397377b4df255dbf04739297e80c95500144d89 Mon Sep 17 00:00:00 2001 From: Wes Brewer Date: Wed, 1 Oct 2025 23:31:44 +0300 Subject: [PATCH 336/388] Refined and document the hardware TDP and performance specs that were used for philly --- config/philly/2-gpu.yaml | 10 +++++----- config/philly/8-gpu.yaml | 10 +++++----- raps/dataloaders/philly.py | 13 ++++++++++++- 3 files changed, 22 insertions(+), 11 deletions(-) diff --git a/config/philly/2-gpu.yaml b/config/philly/2-gpu.yaml index 0622605..d7201b2 100644 --- a/config/philly/2-gpu.yaml +++ b/config/philly/2-gpu.yaml @@ -13,15 +13,15 @@ system: cpus_per_node: 2 cores_per_cpu: 20 gpus_per_node: 2 - cpu_peak_flops: 1248000000000.0 - gpu_peak_flops: 7800000000000.0 + cpu_peak_flops: 1248000000000.0 # assume Xeon E5-2690v4 CPU 64-bit + gpu_peak_flops: 9300000000000.0 # assume 12G P100 32-bit cpu_fp_ratio: 0.667 gpu_fp_ratio: 0.667 power: - power_gpu_idle: 75 - power_gpu_max: 300 + power_gpu_idle: 30 + power_gpu_max: 250 power_cpu_idle: 90 - power_cpu_max: 280 + power_cpu_max: 270 power_mem: 74.26 power_nvme: 30 power_nic: 20 diff --git a/config/philly/8-gpu.yaml b/config/philly/8-gpu.yaml index aae80ee..1e92282 100644 --- a/config/philly/8-gpu.yaml +++ b/config/philly/8-gpu.yaml @@ -13,15 +13,15 @@ system: cpus_per_node: 2 cores_per_cpu: 20 gpus_per_node: 8 - cpu_peak_flops: 1248000000000.0 - gpu_peak_flops: 7800000000000.0 + cpu_peak_flops: 1248000000000.0 # assume Xeon E5-2690v4 CPU 64-bit + gpu_peak_flops: 12000000000000.0 # assume 24G P40 32-bit cpu_fp_ratio: 0.667 gpu_fp_ratio: 0.667 power: - power_gpu_idle: 75 - power_gpu_max: 300 + power_gpu_idle: 50 + power_gpu_max: 250 power_cpu_idle: 90 - power_cpu_max: 280 + power_cpu_max: 270 power_mem: 74.26 power_nvme: 30 power_nic: 20 diff --git a/raps/dataloaders/philly.py b/raps/dataloaders/philly.py index 990a72f..552c647 100644 --- a/raps/dataloaders/philly.py +++ b/raps/dataloaders/philly.py @@ -1,10 +1,21 @@ """ -Main reference to Philly traces: +This is the dataloader for the Philly traces which is documented in this paper: Jeon, Myeongjae, et al. "Analysis of Large-Scale Multi-Tenant GPU clusters for DNN training workloads." 2019 USENIX Annual Technical Conference (USENIX ATC 19). 2019. https://www.usenix.org/system/files/atc19-jeon.pdf +Note on hardware specs: + + Philly only provides GPU memory sizes (12G & 24G) without clarifying GPU models. + Hu et al. (2024) https://arxiv.org/html/2403.07648v1 + + For estimating system power and FLOPS performance, we assume that the 2-GPU + nodes used Tesla P100 (12 GB) GPUs and the 8-GPU nodes used Tesla P40 (24 GB) + GPUs, consistent with hardware Microsoft deployed around 2017. Training is + assumed to have been performed in 32-bit (FP32), and the CPUs are assumed + to be 64-bit Intel Xeon E5-2690 v4. + The repository is available here: https://github.com/msr-fiddle/philly-traces -- GitLab From 63ffbc7f5ed3f5c6ddf8d87a2504e038f13806ca Mon Sep 17 00:00:00 2001 From: Wes Brewer Date: Thu, 2 Oct 2025 00:41:53 +0300 Subject: [PATCH 337/388] Refactor to improve pylint score. Move to using absolute times for jobs. --- raps/dataloaders/philly.py | 139 ++++++++++++++++++++----------------- 1 file changed, 75 insertions(+), 64 deletions(-) diff --git a/raps/dataloaders/philly.py b/raps/dataloaders/philly.py index 552c647..ca8121b 100644 --- a/raps/dataloaders/philly.py +++ b/raps/dataloaders/philly.py @@ -1,9 +1,9 @@ """ This is the dataloader for the Philly traces which is documented in this paper: - Jeon, Myeongjae, et al. "Analysis of Large-Scale Multi-Tenant GPU clusters for DNN training workloads." - 2019 USENIX Annual Technical Conference (USENIX ATC 19). 2019. - https://www.usenix.org/system/files/atc19-jeon.pdf + Jeon, Myeongjae, et al. "Analysis of Large-Scale Multi-Tenant GPU clusters + for DNN training workloads." 2019 USENIX Annual Technical Conference + (USENIX ATC 19). 2019. https://www.usenix.org/system/files/atc19-jeon.pdf Note on hardware specs: @@ -11,9 +11,9 @@ Note on hardware specs: Hu et al. (2024) https://arxiv.org/html/2403.07648v1 For estimating system power and FLOPS performance, we assume that the 2-GPU - nodes used Tesla P100 (12 GB) GPUs and the 8-GPU nodes used Tesla P40 (24 GB) - GPUs, consistent with hardware Microsoft deployed around 2017. Training is - assumed to have been performed in 32-bit (FP32), and the CPUs are assumed + nodes used Tesla P100 (12 GB) GPUs and the 8-GPU nodes used Tesla P40 (24 GB) + GPUs, consistent with hardware Microsoft deployed around 2017. Training is + assumed to have been performed in 32-bit (FP32), and the CPUs are assumed to be 64-bit Intel Xeon E5-2690 v4. The repository is available here: @@ -28,9 +28,10 @@ The data portion of the repo can be downloaded using one of the following method wget https://github.com/msr-fiddle/philly-traces/raw/master/trace-data.tar.gz - curl -L -o trace-data.tar.gz https://github.com/msr-fiddle/philly-traces/raw/master/trace-data.tar.gz + curl -L -o trace-data.tar.gz \ + https://github.com/msr-fiddle/philly-traces/raw/master/trace-data.tar.gz -Once the file is downloaded, assuming its in /opt/data/philly/trace-data directory: +After the file is downloaded, assuming its in /opt/data/philly/trace-data directory: /opt/data/philly/trace-data/trace-data.tar.gz @@ -66,32 +67,38 @@ Once the file is downloaded, assuming its in /opt/data/philly/trace-data directo Running a replay simulation: - python main.py run-parts -x philly -f /opt/data/philly/trace-data --start 2017-10-03T00:00 --end 2017-10-04T00:00 + python main.py run-parts -x philly -f /opt/data/philly/trace-data \ + --start 2017-10-03T00:00 --end 2017-10-04T00:00 -One the dataloader has been run at least once, it will dump npz files into a directory, so -they can be replayed again without having to go through the expensive extractoin process, using e.g.: +Once the dataloader has been run at least once, it will dump npz files into a directory, +so they can be replayed again without having to go through the expensive extractoin process, +using e.g.: python main.py run-parts -x philly -f raps-output-5efefa3 -Note: it is possible to run simulations for an user-defined length of time between 10/3/2017 to 12/15/2017. +Note: it is possible to run simulations for an user-defined length of time between +10/3/2017 to 12/15/2017. """ -import os -import glob -import json + import csv -import pandas as pd +import json +import os +from datetime import datetime, timedelta, timezone -from datetime import datetime, timezone, timedelta +import pandas as pd from tqdm import tqdm -from raps.job import job_dict, Job + +from raps.job import Job, job_dict from raps.utils import WorkloadData DATE_FORMAT_STR = "%Y-%m-%d %H:%M:%S" DEFAULT_START = "2017-10-03T00:00" DEFAULT_END = "2017-10-04T00:00" + def to_epoch(ts_str): + """Convert a timestamp string or int/float into epoch seconds.""" if ts_str is None: return None if isinstance(ts_str, (int, float)): @@ -102,6 +109,7 @@ def to_epoch(ts_str): dt = datetime.strptime(ts_str, DATE_FORMAT_STR) return int(dt.timestamp()) + def parse_timestamp(val): """ Convert Philly job log timestamps to datetime. @@ -120,6 +128,7 @@ def parse_timestamp(val): return None return None + def load_traces_by_day(trace_dir, start_dt, end_dt, colname): """Load CPU or GPU traces between start_dt and end_dt.""" traces = {} @@ -131,12 +140,14 @@ def load_traces_by_day(trace_dir, start_dt, end_dt, colname): df = pd.read_csv( daily_file, names=["time", "machineId", colname], # no header in daily CSVs - dtype={"machineId": str, colname: str}, # avoid DtypeWarning + dtype={"machineId": str, colname: str}, # avoid DtypeWarning ) # Normalize time column (strip PST/PDT, parse datetime) df["time"] = df["time"].str.replace(" PST", "").str.replace(" PDT", "") - df["time"] = pd.to_datetime(df["time"], errors="coerce", format=DATE_FORMAT_STR) + df["time"] = pd.to_datetime( + df["time"], errors="coerce", format=DATE_FORMAT_STR + ) # Convert util column to numeric (NA/invalid → NaN) df[colname] = pd.to_numeric(df[colname], errors="coerce") @@ -151,13 +162,16 @@ def load_traces_by_day(trace_dir, start_dt, end_dt, colname): return traces + def parse_date(s): + """Parse a Philly trace date string into a datetime object.""" if not s or s == "None": return None # strip possible timezone labels like "PST"/"PDT" s = s.replace(" PST", "").replace(" PDT", "") return datetime.strptime(s, DATE_FORMAT_STR) + def load_data(files, **kwargs): """ Load Philly trace into ExaDigiT Job objects. @@ -178,7 +192,7 @@ def load_data(files, **kwargs): assert len(files) == 1, "Expecting a single directory path" trace_dir = files[0] gpu_trace_dir = os.path.join(files[0], "gpu_by_day") - config = kwargs.get('config') + config = kwargs.get("config") gpus_per_node = config.get("GPUS_PER_NODE") if gpus_per_node is None: raise ValueError("Must pass gpus_per_node (2 or 8)") @@ -186,34 +200,34 @@ def load_data(files, **kwargs): # --- 1. Machine list --- machine_file = os.path.join(trace_dir, "cluster_machine_list") machines = {} - with open(machine_file) as f: + with open(machine_file, encoding="utf-8") as f: reader = csv.DictReader(f) for row in reader: mid = row["machineId"] machines[mid] = { "num_gpus": int(row[" number of GPUs"]), - "gpu_mem": row[" single GPU mem"].strip() + "gpu_mem": row[" single GPU mem"].strip(), } partition_machines = { - mid: info for mid, info in machines.items() - if info["num_gpus"] == gpus_per_node + mid: info for mid, info in machines.items() if info["num_gpus"] == gpus_per_node } # Build node → index mapping for this partition - node_mapping = {mid: idx for idx, mid in enumerate(sorted(partition_machines.keys()))} - max_nodes = len(node_mapping) + node_mapping = { + mid: idx for idx, mid in enumerate(sorted(partition_machines.keys())) + } # Assign partition ID (e.g. 0 for 2-GPU, 1 for 8-GPU) partition_id = 0 if gpus_per_node == 2 else 1 # --- 3. GPU util --- start_dt = datetime.fromtimestamp(start_ts) - end_dt = datetime.fromtimestamp(end_ts) + end_dt = datetime.fromtimestamp(end_ts) # --- 4. Job log --- job_file = os.path.join(trace_dir, "cluster_job_log") - with open(job_file) as f: + with open(job_file, encoding="utf-8") as f: job_log = json.load(f) # --- First pass: filter jobs by date range --- @@ -237,7 +251,9 @@ def load_data(files, **kwargs): attempts = raw.get("attempts", []) if attempts and "detail" in attempts[0]: # Count GPUs from the first detail - gpus = sum(len(detail.get("gpus", [])) for detail in attempts[0]["detail"]) + gpus = sum( + len(detail.get("gpus", [])) for detail in attempts[0]["detail"] + ) if gpus > 0 and (gpus % gpus_per_node == 0): filtered_log.append(raw) job_log = filtered_log @@ -309,7 +325,7 @@ def load_data(files, **kwargs): mid = detail["ip"] machine_ids.append(mid) gpus += len(detail.get("gpus", [])) - + num_nodes = len(machine_ids) if num_nodes == 0: continue @@ -337,9 +353,9 @@ def load_data(files, **kwargs): else: job_cpu_df = pd.concat(cpu_dfs, ignore_index=True) mask = ( - (job_cpu_df["machineId"].isin(machine_ids)) & - (job_cpu_df["time"] >= start) & - (job_cpu_df["time"] <= end) + (job_cpu_df["machineId"].isin(machine_ids)) + & (job_cpu_df["time"] >= start) + & (job_cpu_df["time"] <= end) ) job_cpu = job_cpu_df.loc[mask].copy() @@ -361,65 +377,59 @@ def load_data(files, **kwargs): else: job_gpu_df = pd.concat(gpu_dfs, ignore_index=True) mask = ( - (job_gpu_df["machineId"].isin(machine_ids)) & - (job_gpu_df["time"] >= start) & - (job_gpu_df["time"] <= end) + (job_gpu_df["machineId"].isin(machine_ids)) + & (job_gpu_df["time"] >= start) + & (job_gpu_df["time"] <= end) ) job_gpu = job_gpu_df.loc[mask].copy() if len(machine_ids) > 1: job_gpu = job_gpu.groupby("time")["gpu_util"].mean().reset_index() - job_gpu_trace = (job_gpu["gpu_util"].to_numpy() * 0.01 * gpus_per_node).tolist() - + job_gpu_trace = ( + job_gpu["gpu_util"].to_numpy() * 0.01 * gpus_per_node + ).tolist() if machine_ids: - # Shift times relative to start_ts - submit_time = submitted.timestamp() - start_ts if submitted else None - start_time = start.timestamp() - start_ts if start else None - end_time = end.timestamp() - start_ts if end else None + submit_time = submitted.timestamp() + start_time = start.timestamp() + end_time = end.timestamp() if not submit_time or not start_time or not end_time: - tqdm.write(f"skipped {jobid} b/c missing submit_time, start_time, or end_time") + tqdm.write( + f"skipped {jobid} b/c missing submit_time, start_time, or end_time" + ) continue - - scheduled_nodes = [node_mapping[mid] for mid in machine_ids if mid in node_mapping] - if submit_time and start_time and end_time: + scheduled_nodes = [ + node_mapping[mid] for mid in machine_ids if mid in node_mapping + ] + + if submit_time and start_time and end_time: job = job_dict( id=jobid, name=f"philly-{jobid}", account=user if user else "unknown", - nodes_required=len(machine_ids), partition=partition_id, - priority=0, - cpu_cores_required=1, gpu_units_required=gpus_per_node, - end_state=status, scheduled_nodes=scheduled_nodes, - cpu_trace=job_cpu_trace, gpu_trace=job_gpu_trace, - ntx_trace=None, - nrx_trace=None, - + ntx_trace=[], + nrx_trace=[], submit_time=submit_time, start_time=start_time, end_time=end_time, - #time_limit=end_time - start_time, time_limit=end_time, expected_run_time=wall_time if wall_time else 0, - current_run_time=0, - trace_time=None, - trace_start_time=0, #None, - trace_end_time=end_time, #None, + trace_start_time=start_time, # None, + trace_end_time=end_time, # None, trace_quanta=60, - trace_missing_values=False, - downscale=1 + trace_missing_values=False ) if job_cpu_trace and job_gpu_trace: jobs_list.append(Job(job)) @@ -427,10 +437,11 @@ def load_data(files, **kwargs): tqdm.write(f"skipping {job['id']} b/c either no cpu or gpu trace") if debug: - tqdm.write(f"abs start time: {start_ts} rel job start: {job['start_time']} rel job end: {job['end_time']}") + tqdm.write(f"{job['id']} start: {job['start_time']} end: {job['end_time']}") return WorkloadData( jobs=jobs_list, - telemetry_start=0, telemetry_end=int(end_ts - start_ts), + telemetry_start=start_ts, + telemetry_end=end_ts, start_date=datetime.fromtimestamp(start_ts, timezone.utc), ) -- GitLab From 49d9afa99bfd3b40bfbb8a6f4e51b154b86a2e39 Mon Sep 17 00:00:00 2001 From: Wes Brewer Date: Fri, 3 Oct 2025 00:42:09 +0300 Subject: [PATCH 338/388] Add in Srishti's `-w calculon` option and module for generating realistic LLM traces --- raps/sim_config.py | 4 +- raps/workloads/__init__.py | 4 +- raps/workloads/calculon.py | 173 +++++++++++++++++++++++++++++++++++++ 3 files changed, 178 insertions(+), 3 deletions(-) create mode 100644 raps/workloads/calculon.py diff --git a/raps/sim_config.py b/raps/sim_config.py index da3541c..a69c288 100644 --- a/raps/sim_config.py +++ b/raps/sim_config.py @@ -134,8 +134,8 @@ class SimConfig(RAPSBaseModel, abc.ABC): """ Grab data from live system. """ # Workload arguments (TODO split into separate model) - workload: Literal['random', 'benchmark', 'peak', 'idle', - 'synthetic', 'multitenant', 'replay', 'randomAI'] = "random" + workload: Literal['random', 'benchmark', 'peak', 'idle', 'synthetic', + 'multitenant', 'replay', 'randomAI', 'calculon'] = "random" """ Type of synthetic workload """ multimodal: list[float] = [1.0] diff --git a/raps/workloads/__init__.py b/raps/workloads/__init__.py index a34261a..d61befc 100644 --- a/raps/workloads/__init__.py +++ b/raps/workloads/__init__.py @@ -10,6 +10,7 @@ from raps.sim_config import SingleSimConfig from raps.telemetry import Telemetry from .basic import BasicWorkload +from .calculon import Calculon from .constants import JOB_NAMES, ACCT_NAMES, MAX_PRIORITY from .distribution import DistributionWorkload from .live import continuous_job_generation @@ -51,7 +52,8 @@ class Workload( BaseWorkload, DistributionWorkload, BasicWorkload, - MultitenantWorkload + MultitenantWorkload, + Calculon ): """Final workload class with all workload types.""" pass diff --git a/raps/workloads/calculon.py b/raps/workloads/calculon.py new file mode 100644 index 0000000..8e8c1bc --- /dev/null +++ b/raps/workloads/calculon.py @@ -0,0 +1,173 @@ +""" +Calculon is a analytical model for estimating LLM training times for given architectures +on particular hardware. It is described in the paper: + + Isaev, Mikhail, et al. "Calculon: a methodology and tool for high-level co-design of + systems and large language models." SC23 Proceedings + https://dl.acm.org/doi/pdf/10.1145/3581784.3607102 + +The code is available at https://github.com/calculon-ai/calculon +which this module assumes is already cloned into the third_party directory. + +Calculon requires installing `psutil`, which can be pip installed via: + + pip install psutil + +Since Calculon by default supports A100 GPUs, we are able to use the default files that +are already setup in Calculon, and therefore have added two systems which have A100 GPUs: +Selene and Perlmutter. Example run commands: + + python main.py run --system selene -w calculon + python main.py run --system perlmutter -w calculon + +This code is currently setup to generate synthetic traces for four different LLM models: +megatron-22B, gpt3-175B, turing-530B, and megatron-1T. Adjust these by modifying +llm_model_tests below. + +""" +import json +import os +import random +import subprocess +from pathlib import Path + +import numpy as np + +from raps.job import job_dict + +from .constants import ACCT_NAMES + + +class Calculon: + """Calculon workload mixin for Workload class.""" + + def __init__(self, *args, **kwargs): + # NOTE: mixins usually accept (sim_config_args, system_config_dict) through Workload + super().__init__(*args, **kwargs) + + def calculon(self, **kwargs): + """Generate workload using Calculon backend + job trace synthesis.""" + jobs = [] + + llm_models_test = [ + ["megatron-22B", 8, 4], + ["gpt3-175B", 64, 64], + ["turing-530B", 280, 280], + ["megatron-1T", 512, 512], + ] + + for llm_model, num_nodes, max_batch_size in llm_models_test: + for partition in self.partitions: + config = self.config_map[partition] + gpu_system = "a100_80g" + data_type = "float16" + output = f"{llm_model}_{gpu_system}_{max_batch_size}_{data_type}_{num_nodes}.json" + + # call Calculon binary/subprocess to get MFU + batch time + mfu, total_batch_time = self._run_calculon( + llm_model, gpu_system, max_batch_size, num_nodes, data_type, output + ) + + # derive job stats + num_iters = 3000 + trace_quanta = config["TRACE_QUANTA"] + job_time = total_batch_time * num_iters + num_samples = int(job_time // trace_quanta) + + system_util = np.full(num_samples, mfu) + cpu_util = random.random() * config["CPUS_PER_NODE"] + cpu_trace = cpu_util * np.ones(num_iters) + + net_tx, net_rx = [], [] + num_nodes = num_nodes // config["GPUS_PER_NODE"] + + epochs = 1 + wall_time = job_time + for i in range(epochs): + job_info = job_dict( + nodes_required=num_nodes, + scheduled_nodes=[], + name=f"{llm_model} training for {num_iters} iterations", + account=ACCT_NAMES[0], + cpu_trace=cpu_trace, + gpu_trace=system_util, + ntx_trace=net_tx, + nrx_trace=net_rx, + end_state="COMPLETED", + id=None, + priority=100, + partition=partition, + time_limit=job_time + 1, + start_time=0, + end_time=job_time, + trace_time=job_time, + trace_start_time=0, + trace_end_time=job_time, + ) + jobs.append(job_info) + wall_time += job_time + + return jobs + + def _run_calculon(self, model, system, max_batch_size, num_nodes, data_type, output): + """Internal: run Calculon subprocess and parse result.""" + base_path = Path("third_party/calculon") + + # paths + model_path = base_path / "models" / f"{model}.json" + system_path = base_path / "systems" / f"{system}.json" + raw_path = base_path / "optimal_executions" / output.replace(".json", "_raw.json") + exec_path = base_path / "optimal_executions" / output.replace(".json", "_exec.json") + stats_path = base_path / "optimal_executions" / output.replace(".json", "_stats.json") + + # Run llm-optimal-execution to generate candidate executions + opt_cmd = [ + "./bin/calculon", "llm-optimal-execution", + f"models/{model}.json", + str(num_nodes), + str(max_batch_size), + data_type, + f"systems/{system}.json", + f"optimal_executions/{output.replace('.json', '_raw.json')}", + ] + subprocess.run(opt_cmd, check=True, cwd=base_path, env={**os.environ, "PYTHONPATH": "."}) + + # Read raw output, pick first/best execution and dump it as exec.json + with open(raw_path) as f: + raw_data = json.load(f) + + # get first (or best) key + first_key = sorted(raw_data.keys(), key=lambda k: float(k))[0] + best_exec = raw_data[first_key]["execution"] + + with open(exec_path, "w") as f: + json.dump(best_exec, f, indent=2) + + # Run llm with chosen execution, system, and model → stats.json + llm_cmd = [ + "./bin/calculon", "llm", + f"models/{model}.json", + f"optimal_executions/{output.replace('.json', '_exec.json')}", + f"systems/{system}.json", + f"optimal_executions/{output.replace('.json', '_stats.json')}", + ] + subprocess.run(llm_cmd, check=True, cwd=base_path, env={**os.environ, "PYTHONPATH": "."}) + + # Parse stats.json to extract metrics + with open(stats_path) as f: + stats_data = json.load(f) + + stats = stats_data.get("stats", {}) + + # These keys may vary depending on Calculon version + mfu = stats.get("model_flops_utilization") \ + or stats.get("sample_rate") \ + or stats.get("best_sample_rate") \ + or 0.0 + + total_batch_time = stats.get("block_fw_time") \ + or stats.get("batch_time") \ + or stats.get("total_time") \ + or 0.0 + + return mfu, total_batch_time -- GitLab From badf9df6ca9b0853aa4786e12202b868e7442b3e Mon Sep 17 00:00:00 2001 From: Wes Brewer Date: Fri, 3 Oct 2025 02:05:59 +0300 Subject: [PATCH 339/388] Fix some issues to get calculon workloads running. Add ability to use previously cached results --- config/perlmutter.yaml | 2 +- config/selene.yaml | 2 +- raps/workloads/calculon.py | 109 ++++++++++++++++++++----------------- 3 files changed, 60 insertions(+), 53 deletions(-) diff --git a/config/perlmutter.yaml b/config/perlmutter.yaml index 8863a36..e8de04c 100644 --- a/config/perlmutter.yaml +++ b/config/perlmutter.yaml @@ -38,7 +38,7 @@ scheduler: seed: 42 job_arrival_time: 900 mtbf: 11 - trace_quanta: 15 + trace_quanta: 10 min_wall_time: 3600 max_wall_time: 43200 ui_update_freq: 900 diff --git a/config/selene.yaml b/config/selene.yaml index 8f42bf6..0520da1 100644 --- a/config/selene.yaml +++ b/config/selene.yaml @@ -38,7 +38,7 @@ scheduler: seed: 42 job_arrival_time: 900 mtbf: 11 - trace_quanta: 15 + trace_quanta: 10 min_wall_time: 3600 max_wall_time: 43200 ui_update_freq: 900 diff --git a/raps/workloads/calculon.py b/raps/workloads/calculon.py index 8e8c1bc..f843084 100644 --- a/raps/workloads/calculon.py +++ b/raps/workloads/calculon.py @@ -21,10 +21,19 @@ Selene and Perlmutter. Example run commands: python main.py run --system perlmutter -w calculon This code is currently setup to generate synthetic traces for four different LLM models: -megatron-22B, gpt3-175B, turing-530B, and megatron-1T. Adjust these by modifying -llm_model_tests below. +megatron-22B, gpt3-175B, turing-530B, and megatron-1T. These four tests can take a couple +**hours** to run. On first run, consider commenting out the last three models to only test +the smallest case, megatron-22B. The parameter `llm_models_tests` below defines which tests +are run. + +Finally, the code below is setup to uses previously cached results, so once the json +files are generated by Calculon, they can be rerun very quickly again and again. +The caveat to this is if you want to change some Calculon configurations, +you will need to delete the cached json files in the calculon/optimal_executions folder, +to force it to regenerate new files. """ +import math import json import os import random @@ -33,7 +42,7 @@ from pathlib import Path import numpy as np -from raps.job import job_dict +from raps.job import Job, job_dict from .constants import ACCT_NAMES @@ -69,14 +78,17 @@ class Calculon: ) # derive job stats - num_iters = 3000 + num_iters = 1000000 # realistic number is probably in the millions trace_quanta = config["TRACE_QUANTA"] + job_time = total_batch_time * num_iters - num_samples = int(job_time // trace_quanta) + num_samples = math.ceil(job_time / trace_quanta) + 1 + end_time = num_samples * trace_quanta # align job to tick grid - system_util = np.full(num_samples, mfu) + # use random CPU utilizations for now cpu_util = random.random() * config["CPUS_PER_NODE"] - cpu_trace = cpu_util * np.ones(num_iters) + cpu_trace = np.full(num_samples, cpu_util) # same length + gpu_trace = np.full(num_samples, mfu) # length matches simulation steps net_tx, net_rx = [], [] num_nodes = num_nodes // config["GPUS_PER_NODE"] @@ -90,7 +102,7 @@ class Calculon: name=f"{llm_model} training for {num_iters} iterations", account=ACCT_NAMES[0], cpu_trace=cpu_trace, - gpu_trace=system_util, + gpu_trace=gpu_trace, ntx_trace=net_tx, nrx_trace=net_rx, end_state="COMPLETED", @@ -99,12 +111,15 @@ class Calculon: partition=partition, time_limit=job_time + 1, start_time=0, - end_time=job_time, + end_time=end_time, + expected_run_time=end_time, + trace_quanta=trace_quanta, trace_time=job_time, trace_start_time=0, trace_end_time=job_time, ) - jobs.append(job_info) + job = Job(job_info) + jobs.append(job) wall_time += job_time return jobs @@ -112,15 +127,26 @@ class Calculon: def _run_calculon(self, model, system, max_batch_size, num_nodes, data_type, output): """Internal: run Calculon subprocess and parse result.""" base_path = Path("third_party/calculon") - - # paths - model_path = base_path / "models" / f"{model}.json" - system_path = base_path / "systems" / f"{system}.json" - raw_path = base_path / "optimal_executions" / output.replace(".json", "_raw.json") - exec_path = base_path / "optimal_executions" / output.replace(".json", "_exec.json") - stats_path = base_path / "optimal_executions" / output.replace(".json", "_stats.json") - - # Run llm-optimal-execution to generate candidate executions + output_dir = base_path / "optimal_executions" + output_dir.mkdir(exist_ok=True) + + # expected files + raw_file = output_dir / f"{output.replace('.json', '_raw.json')}" + exec_file = output_dir / f"{output.replace('.json', '_exec.json')}" + stats_file = output_dir / f"{output.replace('.json', '_stats.json')}" + + # if all three exist, skip running + if raw_file.exists() and exec_file.exists() and stats_file.exists(): + print(f"[INFO] Using cached Calculon results for {output}") + with open(raw_file) as f: + data = json.load(f) + first_key = list(data.keys())[0] + stats = data[first_key]["stats"] + mfu = stats.get("sample_rate", 0) # or compute MFU if you want + batch_time = stats.get("block_fw_time", 0) # example placeholder + return mfu, batch_time + + # otherwise, run Calculon opt_cmd = [ "./bin/calculon", "llm-optimal-execution", f"models/{model}.json", @@ -128,46 +154,27 @@ class Calculon: str(max_batch_size), data_type, f"systems/{system}.json", - f"optimal_executions/{output.replace('.json', '_raw.json')}", + str(raw_file), ] - subprocess.run(opt_cmd, check=True, cwd=base_path, env={**os.environ, "PYTHONPATH": "."}) - - # Read raw output, pick first/best execution and dump it as exec.json - with open(raw_path) as f: - raw_data = json.load(f) - # get first (or best) key - first_key = sorted(raw_data.keys(), key=lambda k: float(k))[0] - best_exec = raw_data[first_key]["execution"] - - with open(exec_path, "w") as f: - json.dump(best_exec, f, indent=2) - - # Run llm with chosen execution, system, and model → stats.json llm_cmd = [ "./bin/calculon", "llm", f"models/{model}.json", - f"optimal_executions/{output.replace('.json', '_exec.json')}", + str(exec_file), f"systems/{system}.json", - f"optimal_executions/{output.replace('.json', '_stats.json')}", + str(stats_file), ] - subprocess.run(llm_cmd, check=True, cwd=base_path, env={**os.environ, "PYTHONPATH": "."}) - # Parse stats.json to extract metrics - with open(stats_path) as f: - stats_data = json.load(f) - - stats = stats_data.get("stats", {}) + subprocess.run(opt_cmd, check=True, cwd=base_path, env={**os.environ, "PYTHONPATH": "."}) + subprocess.run(llm_cmd, check=True, cwd=base_path, env={**os.environ, "PYTHONPATH": "."}) - # These keys may vary depending on Calculon version - mfu = stats.get("model_flops_utilization") \ - or stats.get("sample_rate") \ - or stats.get("best_sample_rate") \ - or 0.0 + # parse output + with open(raw_file) as f: + data = json.load(f) + first_key = list(data.keys())[0] + stats = data[first_key]["stats"] - total_batch_time = stats.get("block_fw_time") \ - or stats.get("batch_time") \ - or stats.get("total_time") \ - or 0.0 + mfu = stats.get("sample_rate", 0) + batch_time = stats.get("block_fw_time", 0) - return mfu, total_batch_time + return mfu, batch_time -- GitLab From b701d357ef91163febbc2036c621702b5d2aecdf Mon Sep 17 00:00:00 2001 From: Wes Brewer Date: Fri, 3 Oct 2025 02:10:12 +0300 Subject: [PATCH 340/388] Refactor get_current_utilization b/c it didn't handle case where job.trace_quanta not set --- raps/utils.py | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/raps/utils.py b/raps/utils.py index de565d4..e232bce 100644 --- a/raps/utils.py +++ b/raps/utils.py @@ -634,22 +634,23 @@ def convert_numpy_to_builtin(obj): def get_current_utilization(trace, job: Job): - # Return utilizaiton for a trace at the jobs current running time. - # Note: this should move to a trace.py and a Trace class! - util = 0.0 + """Return utilization for a trace at the job's current running time. + Note: this should move to a trace.py and a Trace class! + """ + if not job.trace_quanta: + raise ValueError("job.trace_quanta is not set; cannot compute utilization.") - if job.trace_quanta: - time_quanta_index = int((job.running_time - job.trace_start_time) // job.trace_quanta) - if time_quanta_index < 0: - time_quanta_index = 0 + time_quanta_index = int((job.running_time - job.trace_start_time) // job.trace_quanta) + if time_quanta_index < 0: + time_quanta_index = 0 - if (isinstance(trace, list) and trace != []) or \ + if (isinstance(trace, list) and trace) or \ (isinstance(trace, np.ndarray) and trace.size != 0): if time_quanta_index < len(trace): util = get_utilization(trace, time_quanta_index) else: util = get_utilization(trace, max(0, len(trace) - 1)) - elif isinstance(trace, float) or isinstance(trace, int): + elif isinstance(trace, (float, int)): util = trace else: util = 0.0 -- GitLab From a7ed8225f02c4b9eab3f2ec1398b7672e76d7a2c Mon Sep 17 00:00:00 2001 From: Wes Brewer Date: Tue, 21 Oct 2025 12:36:49 -0400 Subject: [PATCH 341/388] Update parse_philly_traces.py with docstring and cleanup --- scripts/parse_philly_traces.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/scripts/parse_philly_traces.py b/scripts/parse_philly_traces.py index 5693e4e..57ce8c2 100644 --- a/scripts/parse_philly_traces.py +++ b/scripts/parse_philly_traces.py @@ -1,3 +1,14 @@ +""" +See raps/dataloaders/philly.py for how to download philly traces. + +Run following to parse philly traces into separate files for each day: + + python /path/to/raps/scripts/parse_philly_traces.py cluster_cpu_util + python /path/to/raps/scripts/parse_philly_traces.py cluster_gpu_util + +This will parse these two files into two directories, cpu_by_day and gpu_by_day, +creating one file for each day and adding the lines for that day into the files. +""" import os import sys from datetime import datetime @@ -23,7 +34,6 @@ with open(input_file) as f: output_dir = "cpu_by_day" if is_cpu else "gpu_by_day" os.makedirs(output_dir, exist_ok=True) - #for i, line in enumerate(f, 1): for line in tqdm(f, total=total_lines, desc="Processing lines"): parts = line.strip().split(",") -- GitLab From 4fa6fd65bc2051ed78d8a721214e61484aa4a689 Mon Sep 17 00:00:00 2001 From: Wes Brewer Date: Tue, 21 Oct 2025 14:31:00 -0400 Subject: [PATCH 342/388] Update philly run command to use --start 2017-10-03T00:14:56Z --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 3684594..160549f 100644 --- a/README.md +++ b/README.md @@ -83,7 +83,7 @@ Microsoft Azure - 2017 Philly Traces python main.py run-parts -x philly -w multitenant # Telemetry replay - python main.py run-parts -x philly -f /opt/data/philly/trace-data --start 2017-10-03T00:00 --end 2017-10-04T00:00 + python main.py run-parts -x philly -f /opt/data/philly/trace-data --start 2017-10-03T00:14:56Z --end 2017-10-04T00:00 For Lumi -- GitLab From b5a530ffcc504c03a00272129d8cc3fd449c2a72 Mon Sep 17 00:00:00 2001 From: Wes Brewer Date: Tue, 21 Oct 2025 14:32:30 -0400 Subject: [PATCH 343/388] Update .gitignore to ignore raps-output-* and ppo_raps_logs --- .gitignore | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.gitignore b/.gitignore index ed10fab..c5f7241 100644 --- a/.gitignore +++ b/.gitignore @@ -7,3 +7,5 @@ venv simulation_results/ models/fmu-models .shell-completion-cache +raps-output-* +ppo_raps_logs -- GitLab From 5843ad39292a340168235618619284c37f623950 Mon Sep 17 00:00:00 2001 From: Wes Brewer Date: Sun, 5 Oct 2025 04:19:30 +0300 Subject: [PATCH 344/388] Increase fattree_k from 16 to 32 for lassen. Add checks and better messaging when k value too small --- config/lassen.yaml | 2 +- raps/network/__init__.py | 3 ++- raps/network/fat_tree.py | 8 +++++++- 3 files changed, 10 insertions(+), 3 deletions(-) diff --git a/config/lassen.yaml b/config/lassen.yaml index 594479d..08bc346 100644 --- a/config/lassen.yaml +++ b/config/lassen.yaml @@ -121,7 +121,7 @@ cooling: network: topology: fat-tree network_max_bw: 1000000000.0 - fattree_k: 16 + fattree_k: 32 dragonfly_d: 11 dragonfly_a: 9 dragonfly_p: 8 diff --git a/raps/network/__init__.py b/raps/network/__init__.py index eb49ee6..2740466 100644 --- a/raps/network/__init__.py +++ b/raps/network/__init__.py @@ -39,8 +39,9 @@ class NetworkModel: self.real_to_fat_idx = kwargs.get("real_to_fat_idx", {}) if self.topology == "fat-tree": + total_nodes = config['TOTAL_NODES'] - len(config['DOWN_NODES']) self.fattree_k = config.get("FATTREE_K") - self.net_graph = build_fattree(self.fattree_k) + self.net_graph = build_fattree(self.fattree_k, total_nodes) elif self.topology == "torus3d": dims = ( diff --git a/raps/network/fat_tree.py b/raps/network/fat_tree.py index 2d27b39..1c21476 100644 --- a/raps/network/fat_tree.py +++ b/raps/network/fat_tree.py @@ -13,7 +13,7 @@ def node_id_to_host_name(node_id: int, k: int) -> str: return f"h_{pod}_{edge}_{host}" -def build_fattree(k): +def build_fattree(k, total_nodes): """ Build a k-ary fat-tree: - k pods @@ -26,6 +26,12 @@ def build_fattree(k): - agg switches "a_{pod}_{agg}" - core switches "c_{i}_{j}" """ + num_hosts = (k**3) // 4 + if num_hosts < total_nodes: + raise ValueError( + f"Fat-tree network with k={k} has {num_hosts} hosts, but the system has {total_nodes} nodes. " + f"Please increase the value of 'fattree_k' in the system configuration file." + ) G = nx.Graph() # core # num_core = (k//2)**2 # Unused! -- GitLab From 830f45ab1323a22a2bcd508b750a46523fddacec Mon Sep 17 00:00:00 2001 From: Wes Brewer Date: Sun, 5 Oct 2025 16:41:16 +0300 Subject: [PATCH 345/388] Add unit tests for the different network topologies --- tests/unit/test_net_dragonfly.py | 38 +++++++++++++++++++++++++++++ tests/unit/test_net_fat_tree.py | 42 ++++++++++++++++++++++++++++++++ tests/unit/test_net_torus3d.py | 41 +++++++++++++++++++++++++++++++ 3 files changed, 121 insertions(+) create mode 100644 tests/unit/test_net_dragonfly.py create mode 100644 tests/unit/test_net_fat_tree.py create mode 100644 tests/unit/test_net_torus3d.py diff --git a/tests/unit/test_net_dragonfly.py b/tests/unit/test_net_dragonfly.py new file mode 100644 index 0000000..a36afdc --- /dev/null +++ b/tests/unit/test_net_dragonfly.py @@ -0,0 +1,38 @@ +import pytest +from raps.network.dragonfly import build_dragonfly, dragonfly_node_id_to_host_name + +def test_build_dragonfly(): + """Test building a small dragonfly network.""" + D, A, P = 2, 2, 2 + G = build_dragonfly(D, A, P) + + # Check number of nodes + num_routers = D * A + num_hosts = D * A * P + total_nodes = num_routers + num_hosts + assert len(G.nodes) == total_nodes + + # Check number of edges + # Intra-group edges (clique) + intra_group_edges = D * (A * (A - 1) // 2) + # Inter-group edges + inter_group_edges = A * (D * (D - 1) // 2) + # Host to router edges + host_router_edges = num_hosts + total_edges = intra_group_edges + inter_group_edges + host_router_edges + assert len(G.edges) == total_edges + + # Check node types + node_types = [data["type"] for _, data in G.nodes(data=True)] + assert node_types.count("router") == num_routers + assert node_types.count("host") == num_hosts + +def test_dragonfly_node_id_to_host_name(): + """Test the dragonfly_node_id_to_host_name function.""" + D, A, P = 2, 2, 2 + # Test a few node IDs + assert dragonfly_node_id_to_host_name(0, D, A, P) == "h_0_0_0" + assert dragonfly_node_id_to_host_name(1, D, A, P) == "h_0_0_1" + assert dragonfly_node_id_to_host_name(2, D, A, P) == "h_0_1_0" + assert dragonfly_node_id_to_host_name(3, D, A, P) == "h_0_1_1" + assert dragonfly_node_id_to_host_name(4, D, A, P) == "h_1_0_0" diff --git a/tests/unit/test_net_fat_tree.py b/tests/unit/test_net_fat_tree.py new file mode 100644 index 0000000..93750e0 --- /dev/null +++ b/tests/unit/test_net_fat_tree.py @@ -0,0 +1,42 @@ +import pytest +from raps.network.fat_tree import build_fattree, node_id_to_host_name + +def test_build_fattree_k4(): + """Test building a k=4 fat-tree.""" + k = 4 + G = build_fattree(k, 16) + + # Check number of nodes + num_hosts = k * (k // 2) * (k // 2) + num_edge_switches = k * (k // 2) + num_agg_switches = k * (k // 2) + num_core_switches = (k // 2) ** 2 + total_nodes = num_hosts + num_edge_switches + num_agg_switches + num_core_switches + assert len(G.nodes) == total_nodes + + # Check number of edges + # Host to edge switch edges + host_edges = num_hosts + # Edge to agg switch edges + edge_agg_edges = k * (k // 2) * (k // 2) + # Agg to core switch edges + agg_core_edges = k * (k // 2) * (k // 2) + total_edges = host_edges + edge_agg_edges + agg_core_edges + assert len(G.edges) == total_edges + + # Check node types + node_types = [data["type"] for _, data in G.nodes(data=True)] + assert node_types.count("host") == num_hosts + assert node_types.count("edge") == num_edge_switches + assert node_types.count("agg") == num_agg_switches + assert node_types.count("core") == num_core_switches + +def test_node_id_to_host_name(): + """Test the node_id_to_host_name function.""" + k = 4 + # Test a few node IDs + assert node_id_to_host_name(0, k) == "h_0_0_0" + assert node_id_to_host_name(1, k) == "h_0_0_1" + assert node_id_to_host_name(2, k) == "h_0_1_0" + assert node_id_to_host_name(3, k) == "h_0_1_1" + assert node_id_to_host_name(4, k) == "h_1_0_0" diff --git a/tests/unit/test_net_torus3d.py b/tests/unit/test_net_torus3d.py new file mode 100644 index 0000000..b18cbfa --- /dev/null +++ b/tests/unit/test_net_torus3d.py @@ -0,0 +1,41 @@ +import pytest +from raps.network.torus3d import build_torus3d, torus_route_xyz + +def test_build_torus3d(): + """Test building a small 3D torus network.""" + dims = (2, 2, 2) + G, meta = build_torus3d(dims) + + # Check number of nodes + num_routers = dims[0] * dims[1] * dims[2] + num_hosts = num_routers # hosts_per_router=1 + total_nodes = num_routers + num_hosts + assert len(G.nodes) == total_nodes + + # Check number of edges + # Router to router edges + router_edges = (num_routers * 3) // 2 # Each router has 3 neighbors in a 3D torus + # Host to router edges + host_router_edges = num_hosts + total_edges = router_edges + host_router_edges + assert len(G.edges) == total_edges + + # Check node types + node_types = [data["kind"] for _, data in G.nodes(data=True)] + assert node_types.count("router") == num_routers + assert node_types.count("host") == num_hosts + +def test_torus_route_xyz(): + """Test the torus_route_xyz function.""" + dims = (4, 4, 4) + # Test a simple route + path = torus_route_xyz("r_0_0_0", "r_1_1_1", dims) + assert path == ["r_0_0_0", "r_1_0_0", "r_1_1_0", "r_1_1_1"] + + # Test a route with wrap-around + path = torus_route_xyz("r_3_3_3", "r_0_0_0", dims, wrap=True) + assert path == ["r_3_3_3", "r_0_3_3", "r_0_0_3", "r_0_0_0"] + + # Test a route without wrap-around + path = torus_route_xyz("r_0_0_0", "r_1_1_1", dims, wrap=False) + assert path == ["r_0_0_0", "r_1_0_0", "r_1_1_0", "r_1_1_1"] -- GitLab From 8d19087eba5129f037c1d1ba92d048bc3ce9e5aa Mon Sep 17 00:00:00 2001 From: Wes Brewer Date: Sun, 5 Oct 2025 17:21:36 +0300 Subject: [PATCH 346/388] Add support for network plots --- README.md | 2 +- raps/engine.py | 2 ++ raps/network/dragonfly.py | 6 ++++++ raps/network/fat_tree.py | 10 +++++++--- raps/network/torus3d.py | 6 ++++++ raps/plotting.py | 40 ++++++++++++++++++++++++++++++++++++++- raps/run_sim.py | 13 +++++++++++-- raps/sim_config.py | 2 +- 8 files changed, 73 insertions(+), 8 deletions(-) diff --git a/README.md b/README.md index 9c708af..a2fb171 100644 --- a/README.md +++ b/README.md @@ -87,7 +87,7 @@ For Lumi Lassen is one of the few datasets that has networking data. See `raps/dataloaders/lassen.py` for how to get the datasets. To run a network simulation, use the following command: - raps run -f ~/data/lassen/Lassen-Supercomputer-Job-Dataset --system lassen --policy fcfs --backfill firstfit --start '2019-08-22T00:00:00+00:00' -t 12h --arrival poisson --net + raps run -f /opt/data/lassen/Lassen-Supercomputer-Job-Dataset --system lassen --policy fcfs --backfill firstfit --start '2019-08-22T00:00:00+00:00' -t 12h --arrival poisson --net ## Snapshot of extracted workload data diff --git a/raps/engine.py b/raps/engine.py index 67cd999..5fe8cf4 100644 --- a/raps/engine.py +++ b/raps/engine.py @@ -329,6 +329,8 @@ class Engine: self.network_model = NetworkModel( available_nodes=available_nodes, config=self.config, + plot=self.sim_config.plot, + output_dir=self.sim_config.get_output(), ) else: self.network_model = None diff --git a/raps/network/dragonfly.py b/raps/network/dragonfly.py index a13d1dc..d0b9a23 100644 --- a/raps/network/dragonfly.py +++ b/raps/network/dragonfly.py @@ -17,6 +17,12 @@ def build_dragonfly(D: int, A: int, P: int) -> nx.Graph: 1. All routers within a group form a full clique. 2. Each router r in group g has exactly one “global link” to router r in each other group. 3. Each router r in group g attaches to P hosts ("h_{g}_{r}_{0..P−1}"). + + Examples + -------- + >>> from raps.plotting import plot_network_graph + >>> G = build_dragonfly(D=2, A=2, P=2) + >>> plot_network_graph(G, 'dragonfly.png') """ G = nx.Graph() diff --git a/raps/network/fat_tree.py b/raps/network/fat_tree.py index 1c21476..c8d0e26 100644 --- a/raps/network/fat_tree.py +++ b/raps/network/fat_tree.py @@ -1,6 +1,5 @@ import networkx as nx - def node_id_to_host_name(node_id: int, k: int) -> str: """ Convert an integer node id to the host name string in the fat-tree. @@ -12,7 +11,6 @@ def node_id_to_host_name(node_id: int, k: int) -> str: host = node_id % (k // 2) return f"h_{pod}_{edge}_{host}" - def build_fattree(k, total_nodes): """ Build a k-ary fat-tree: @@ -25,6 +23,12 @@ def build_fattree(k, total_nodes): - edge switches "e_{pod}_{edge}" - agg switches "a_{pod}_{agg}" - core switches "c_{i}_{j}" + + Examples + -------- + >>> from raps.plotting import plot_network_graph + >>> G = build_fattree(k=4, total_nodes=16) + >>> plot_network_graph(G, 'fat_tree.png') """ num_hosts = (k**3) // 4 if num_hosts < total_nodes: @@ -62,4 +66,4 @@ def build_fattree(k, total_nodes): host = f"h_{pod}_{edge}_{h}" G.add_node(host, type="host") G.add_edge(e, host) - return G + return G \ No newline at end of file diff --git a/raps/network/torus3d.py b/raps/network/torus3d.py index 50c988f..4d8054d 100644 --- a/raps/network/torus3d.py +++ b/raps/network/torus3d.py @@ -8,6 +8,12 @@ def build_torus3d(dims, wrap=True, link_bw=1e9, hosts_per_router=1, routing="DOR Build a 3D torus at router granularity, then attach host nodes to routers. Node ids in the returned graph are host names ("h_x_y_z_i") and router names ("r_x_y_z"). Edges have attribute 'capacity' (bytes/s) and 'latency' (per hop). + + Examples + -------- + >>> from raps.plotting import plot_network_graph + >>> G, meta = build_torus3d(dims=(2, 2, 2)) + >>> plot_network_graph(G, 'torus3d.png') """ X, Y, Z = map(int, dims) G = nx.Graph() diff --git a/raps/plotting.py b/raps/plotting.py index 606ac7a..192a171 100644 --- a/raps/plotting.py +++ b/raps/plotting.py @@ -20,6 +20,7 @@ import matplotlib.ticker as ticker from matplotlib.ticker import MaxNLocator import time import numpy as np +import networkx as nx from uncertainties import unumpy from rich.progress import track @@ -404,6 +405,43 @@ def plot_nodes_gantt(*, ax=None, jobs): return ax +def plot_network_graph(G, filename, layout='spring'): + """ + Plot the network graph with edge labels and save it to a file. + + Parameters + ---------- + G : networkx.Graph + The graph to plot. + filename : str + The path to save the plot. + layout : str, optional + The layout to use for the plot. Can be 'spring', 'circular', 'kamada_kawai', 'random', 'shell', 'spectral'. + Default is 'spring'. + """ + plt.figure(figsize=(20, 20)) + if layout == 'spring': + pos = nx.spring_layout(G) + elif layout == 'circular': + pos = nx.circular_layout(G) + elif layout == 'kamada_kawai': + pos = nx.kamada_kawai_layout(G) + elif layout == 'random': + pos = nx.random_layout(G) + elif layout == 'shell': + pos = nx.shell_layout(G) + elif layout == 'spectral': + pos = nx.spectral_layout(G) + else: + raise ValueError(f"Unsupported layout: {layout}") + + nx.draw(G, pos, with_labels=True, node_size=500, node_color="skyblue") + edge_labels = nx.get_edge_attributes(G, "capacity") + nx.draw_networkx_edge_labels(G, pos, edge_labels=edge_labels) + plt.savefig(filename) + plt.close() + + if __name__ == "__main__": plotter = Plotter() - # plotter.plot_history([1, 2, 3, 4]) + # plotter.plot_history([1, 2, 3, 4]) \ No newline at end of file diff --git a/raps/run_sim.py b/raps/run_sim.py index aa2d9d9..285b70d 100644 --- a/raps/run_sim.py +++ b/raps/run_sim.py @@ -8,7 +8,7 @@ import pandas as pd import sys import warnings from raps.ui import LayoutManager -from raps.plotting import Plotter +from raps.plotting import Plotter, plot_network_graph from raps.engine import Engine from raps.multi_part_engine import MultiPartEngine from raps.utils import write_dict_to_file, pydantic_add_args, SubParsers, read_yaml_parsed @@ -53,7 +53,7 @@ def run_sim(sim_config: SingleSimConfig): out = sim_config.get_output() if out: - out.mkdir(parents=True) + out.mkdir(parents=True, exist_ok=True) engine.telemetry.save_snapshot( dest=str(out / 'snapshot.npz'), result=engine.get_workload_data(), @@ -148,6 +148,15 @@ def run_sim(sim_config: SingleSimConfig): else: print('Cooling model not enabled... skipping output of plot') + if 'net' in sim_config.plot: + if engine.network_model: + plot_network_graph( + engine.network_model.net_graph, + out / f'{engine.system_config.system_name}_network.png', + ) + else: + print('Network model not enabled... skipping output of plot') + if out: if sim_config.uncertainties: # Parquet cannot handle annotated ufloat format AFAIK diff --git a/raps/sim_config.py b/raps/sim_config.py index da3541c..2f8a75f 100644 --- a/raps/sim_config.py +++ b/raps/sim_config.py @@ -109,7 +109,7 @@ class SimConfig(RAPSBaseModel, abc.ABC): """ Enable verbose output """ layout: Literal["layout1", "layout2"] = "layout1" """ UI layout """ - plot: list[Literal["power", "loss", "pue", "temp", "util"]] | None = None + plot: list[Literal["power", "loss", "pue", "temp", "util", "net"]] | None = None """ Plots to generate """ imtype: Literal["png", "svg", "jpg", "pdf", "eps"] = "png" -- GitLab From bf08dbea6c976a625f94ea3e726279939a1f8559 Mon Sep 17 00:00:00 2001 From: Wes Brewer Date: Mon, 6 Oct 2025 01:53:40 +0300 Subject: [PATCH 347/388] Implement good plot representation for fat-tree network --- raps/network/__init__.py | 5 ++- raps/network/fat_tree.py | 15 ++++++++- raps/plotting.py | 66 +++++++++++++++++++++++++++++++++++++++- 3 files changed, 83 insertions(+), 3 deletions(-) diff --git a/raps/network/__init__.py b/raps/network/__init__.py index 2740466..18596c3 100644 --- a/raps/network/__init__.py +++ b/raps/network/__init__.py @@ -9,9 +9,10 @@ from .base import ( worst_link_util, ) -from .fat_tree import build_fattree, node_id_to_host_name +from .fat_tree import build_fattree, node_id_to_host_name, subsample_hosts from .torus3d import build_torus3d, link_loads_for_job_torus from .dragonfly import build_dragonfly, dragonfly_node_id_to_host_name +from raps.plotting import plot_fattree_hierarchy from raps.utils import get_current_utilization __all__ = [ @@ -42,6 +43,8 @@ class NetworkModel: total_nodes = config['TOTAL_NODES'] - len(config['DOWN_NODES']) self.fattree_k = config.get("FATTREE_K") self.net_graph = build_fattree(self.fattree_k, total_nodes) + #self.net_graph = subsample_hosts(self.net_graph, num_hosts=4626) + plot_fattree_hierarchy(self.net_graph, k=self.fattree_k) elif self.topology == "torus3d": dims = ( diff --git a/raps/network/fat_tree.py b/raps/network/fat_tree.py index c8d0e26..c514b83 100644 --- a/raps/network/fat_tree.py +++ b/raps/network/fat_tree.py @@ -1,5 +1,7 @@ +import random import networkx as nx + def node_id_to_host_name(node_id: int, k: int) -> str: """ Convert an integer node id to the host name string in the fat-tree. @@ -11,6 +13,7 @@ def node_id_to_host_name(node_id: int, k: int) -> str: host = node_id % (k // 2) return f"h_{pod}_{edge}_{host}" + def build_fattree(k, total_nodes): """ Build a k-ary fat-tree: @@ -66,4 +69,14 @@ def build_fattree(k, total_nodes): host = f"h_{pod}_{edge}_{h}" G.add_node(host, type="host") G.add_edge(e, host) - return G \ No newline at end of file + return G + + +def subsample_hosts(G, num_hosts): + """Reduce the number of host nodes in the FatTree graph to match system size.""" + hosts = [n for n in G if n.startswith("h")] + if num_hosts < len(hosts): + keep = set(random.sample(hosts, num_hosts)) + remove = [n for n in hosts if n not in keep] + G.remove_nodes_from(remove) + return G diff --git a/raps/plotting.py b/raps/plotting.py index 192a171..20e2d92 100644 --- a/raps/plotting.py +++ b/raps/plotting.py @@ -442,6 +442,70 @@ def plot_network_graph(G, filename, layout='spring'): plt.close() +def plot_fattree_hierarchy(G, k=32, save_path='network.png'): + """Draw a hierarchical Fat-Tree layout with automatic scaling.""" + pos = {} + + # --- Layer order and matching prefixes --- + layers = ["core", "agg", "edge", "h"] + layer_prefixes = { + "core": ["core", "c_"], + "agg": ["agg", "a_"], + "edge": ["edge", "e_"], + "h": ["h", "host"] + } + + # --- Compute how many nodes per layer --- + layer_counts = {} + for layer in layers: + prefixes = layer_prefixes[layer] + layer_nodes = [n for n in G.nodes if any(n.startswith(p) for p in prefixes)] + layer_counts[layer] = len(layer_nodes) + + max_nodes = max(layer_counts.values()) or 1 + y_gap = 1.0 / (len(layers) - 1) + + # --- Assign positions, normalized to [0,1] range --- + for j, layer in enumerate(layers): + prefixes = layer_prefixes[layer] + layer_nodes = [n for n in G.nodes if any(n.startswith(p) for p in prefixes)] + n_layer = len(layer_nodes) + if n_layer == 0: + continue + x_spacing = 1.0 / n_layer + y = 1.0 - j * y_gap + for i, node in enumerate(layer_nodes): + x = (i + 0.5) * x_spacing # center each node + pos[node] = (x, y) + + # --- Draw figure --- + plt.figure(figsize=(10, 8)) + color_map = {"core": "red", "agg": "orange", "edge": "green", "h": "blue"} + size_map = {"core": 30, "agg": 20, "edge": 10, "h": 5} + + for layer in layers: + nodes = [n for n in G.nodes if any(n.startswith(p) for p in layer_prefixes[layer])] + if nodes: + nx.draw_networkx_nodes( + G, pos, nodelist=nodes, node_color=color_map[layer], + node_size=size_map[layer], label=layer.capitalize(), alpha=0.7 + ) + + # --- Only draw inter-layer edges for clarity --- + edgelist = [ + (u, v) for (u, v) in G.edges + if not any(u.startswith(p) and v.startswith(p) + for p in ["c_", "a_", "e_", "h", "core", "agg", "edge", "host"]) + ] + nx.draw_networkx_edges(G, pos, edgelist=edgelist, alpha=0.05, width=0.4) + + plt.legend() + plt.axis("off") + plt.tight_layout() + if save_path: + plt.savefig(save_path, dpi=300) + + if __name__ == "__main__": plotter = Plotter() - # plotter.plot_history([1, 2, 3, 4]) \ No newline at end of file + # plotter.plot_history([1, 2, 3, 4]) -- GitLab From d74e55558778960c3d7a7c18c382995cc7137608 Mon Sep 17 00:00:00 2001 From: Wes Brewer Date: Tue, 7 Oct 2025 19:21:29 +0300 Subject: [PATCH 348/388] Add plotting function for dragonfly, fix some issues in dragonfly network, test on lassen --- raps/network/__init__.py | 39 +++++++++++----- raps/network/dragonfly.py | 97 ++++++++++++++++++++++++++++++++------- raps/plotting.py | 67 ++++++++++++++++++++++++++- 3 files changed, 173 insertions(+), 30 deletions(-) diff --git a/raps/network/__init__.py b/raps/network/__init__.py index 18596c3..f218f5a 100644 --- a/raps/network/__init__.py +++ b/raps/network/__init__.py @@ -11,8 +11,8 @@ from .base import ( from .fat_tree import build_fattree, node_id_to_host_name, subsample_hosts from .torus3d import build_torus3d, link_loads_for_job_torus -from .dragonfly import build_dragonfly, dragonfly_node_id_to_host_name -from raps.plotting import plot_fattree_hierarchy +from .dragonfly import build_dragonfly, dragonfly_node_id_to_host_name, build_dragonfly_idx_map +from raps.plotting import plot_fattree_hierarchy, plot_dragonfly from raps.utils import get_current_utilization __all__ = [ @@ -71,11 +71,24 @@ class NetworkModel: nid += 1 elif self.topology == "dragonfly": - self.net_graph = build_dragonfly( - int(config["DRAGONFLY_D"]), - int(config["DRAGONFLY_A"]), - int(config.get("DRAGONFLY_P", 1)) - ) + D = self.config["DRAGONFLY_D"] + A = self.config["DRAGONFLY_A"] + P = self.config["DRAGONFLY_P"] + self.net_graph = build_dragonfly(D, A, P) + + # total nodes seen by scheduler or job trace + total_real_nodes = getattr(self, "available_nodes", None) + if total_real_nodes is None: + total_real_nodes = 4626 # fallback for Lassen + + # if available_nodes is a list, take its length + if not isinstance(total_real_nodes, int): + total_real_nodes = len(total_real_nodes) + + self.real_to_fat_idx = build_dragonfly_idx_map(D, A, P, total_real_nodes) + print(f"[DEBUG] Dragonfly mapping: {len(self.real_to_fat_idx)} entries") + + plot_dragonfly(self.net_graph) elif self.topology == "capacity": # Capacity-only model: no explicit graph @@ -104,13 +117,15 @@ class NetworkModel: print(" fat-tree hosts:", host_list) elif self.topology == "dragonfly": - D, A, P = self.config["DRAGONFLY_D"], self.config["DRAGONFLY_A"], self.config["DRAGONFLY_P"] - host_list = [ - dragonfly_node_id_to_host_name(self.real_to_fat_idx[real_n], D, A, P) - for real_n in job.scheduled_nodes - ] + D = self.config["DRAGONFLY_D"] + A = self.config["DRAGONFLY_A"] + P = self.config["DRAGONFLY_P"] + # Directly use mapped host names + host_list = [self.real_to_fat_idx[real_n] for real_n in job.scheduled_nodes] if debug: print(" dragonfly hosts:", host_list) + print("Example nodes in graph:", list(self.net_graph.nodes)[:10]) + print("Contains h_0_9_0?", "h_0_9_0" in self.net_graph) loads = link_loads_for_job(self.net_graph, host_list, net_tx) net_cong = worst_link_util(loads, max_throughput) diff --git a/raps/network/dragonfly.py b/raps/network/dragonfly.py index d0b9a23..0f29b68 100644 --- a/raps/network/dragonfly.py +++ b/raps/network/dragonfly.py @@ -2,7 +2,50 @@ import networkx as nx from itertools import combinations -def build_dragonfly(D: int, A: int, P: int) -> nx.Graph: +import networkx as nx + +def build_dragonfly(d, a, p): + """ + Build a Dragonfly network graph. + d = routers per group + a = global connections per router + p = compute nodes per router + """ + G = nx.Graph() + num_groups = a + 1 # standard Dragonfly rule + + # --- Routers and hosts --- + for g in range(num_groups): + for r in range(d): + router = f"r_{g}_{r}" + G.add_node(router, layer="router", group=g) + + # attach p hosts to each router + for h in range(p): + host = f"h_{g}_{r}_{h}" + G.add_node(host, layer="host", group=g) + G.add_edge(router, host) + + # --- Intra-group full mesh --- + for g in range(num_groups): + routers = [f"r_{g}_{r}" for r in range(d)] + for i in range(d): + for j in range(i + 1, d): + G.add_edge(routers[i], routers[j]) + + # --- Inter-group (global) links --- + for g in range(num_groups): + for r in range(d): + src = f"r_{g}_{r}" + for offset in range(1, a + 1): + dst_group = (g + offset) % num_groups + dst = f"r_{dst_group}_{r % d}" + G.add_edge(src, dst) + + return G + + +def build_dragonfly2(D: int, A: int, P: int) -> nx.Graph: """ Build a “simple” k-ary Dragonfly with: D = # of groups @@ -61,21 +104,41 @@ def build_dragonfly(D: int, A: int, P: int) -> nx.Graph: def dragonfly_node_id_to_host_name(fat_idx: int, D: int, A: int, P: int) -> str: """ - Given a contiguous fat‐index ∈ [0..(D*A*P − 1)], return "h_{g}_{r}_{p}". - Hosts are laid out in order: - 0..(P−1) → group=0, router=0, p=0..P−1 - P..2P−1 → group=0, router=1, p=0..P−1 - … - (A*P)..(2A*P−1) → group=1, router=0, … - In general: - host_offset = fat_idx % P - router_offset = (fat_idx // P) % A - group = fat_idx // (A*P) + Convert a contiguous Dragonfly host index to its hierarchical name. + + For a Dragonfly with: + D routers per group, + A global links per router ⇒ num_groups = A + 1, + P compute nodes per router. + + Hosts are laid out in contiguous order: + group g = floor(fat_idx / (D * P)) + router r = (fat_idx // P) % D + host h = fat_idx % P + """ + num_groups = A + 1 + total_hosts = num_groups * D * P + assert 0 <= fat_idx < total_hosts, f"fat_idx {fat_idx} out of range (max {total_hosts-1})" + + group = fat_idx // (D * P) + router = (fat_idx // P) % D + host = fat_idx % P + return f"h_{group}_{router}_{host}" + + +def build_dragonfly_idx_map(d: int, a: int, p: int, total_real_nodes: int) -> dict[int, str]: + """ + Build a mapping {real_node_index: host_name} for Dragonfly. + Wrap around if total_real_nodes > total_hosts. """ - total_hosts = D * A * P - assert 0 <= fat_idx < total_hosts, "fat_idx out of range" + num_groups = a + 1 + total_hosts = num_groups * d * p - host_offset = fat_idx % P - router_group = (fat_idx // P) % A - pod = fat_idx // (A * P) - return f"h_{pod}_{router_group}_{host_offset}" + mapping = {} + for i in range(total_real_nodes): + fat_idx = i % total_hosts # <- wrap safely + group = fat_idx // (d * p) + router = (fat_idx // p) % d + host = fat_idx % p + mapping[i] = f"h_{group}_{router}_{host}" + return mapping diff --git a/raps/plotting.py b/raps/plotting.py index 20e2d92..7135595 100644 --- a/raps/plotting.py +++ b/raps/plotting.py @@ -442,7 +442,7 @@ def plot_network_graph(G, filename, layout='spring'): plt.close() -def plot_fattree_hierarchy(G, k=32, save_path='network.png'): +def plot_fattree_hierarchy(G, k=32, save_path='net_fattree.png'): """Draw a hierarchical Fat-Tree layout with automatic scaling.""" pos = {} @@ -505,6 +505,71 @@ def plot_fattree_hierarchy(G, k=32, save_path='network.png'): if save_path: plt.savefig(save_path, dpi=300) +def plot_dragonfly(G, save_path='net_dragonfly.png'): + """ + Draw a circular Dragonfly layout: groups in a large ring, + routers in small inner rings, hosts hanging around each router. + """ + import math + import matplotlib.pyplot as plt + import networkx as nx + + # Identify groups + groups = sorted({G.nodes[n]["group"] for n in G if "group" in G.nodes[n]}) + num_groups = len(groups) + + pos = {} + R_outer = 1.0 # radius of the outer ring (groups) + R_inner = 0.15 # radius of each group's internal ring + + # --- compute positions --- + for i, g in enumerate(groups): + # center of this group + theta_g = 2 * math.pi * i / num_groups + cx = R_outer * math.cos(theta_g) + cy = R_outer * math.sin(theta_g) + + routers = [n for n in G if n.startswith("r_") and G.nodes[n]["group"] == g] + hosts = [n for n in G if n.startswith("h_") and G.nodes[n]["group"] == g] + + # routers in small ring + for j, r in enumerate(routers): + theta_r = 2 * math.pi * j / len(routers) + x = cx + R_inner * math.cos(theta_r) + y = cy + R_inner * math.sin(theta_r) + pos[r] = (x, y) + + # hosts slightly further out around each router + for j, h in enumerate(hosts): + router = f"r_{g}_{j // 8}" if len(routers) > 0 else None + # angle toward router’s position if available + angle = 2 * math.pi * (j / len(hosts)) + r_off = R_inner + 0.05 + x = cx + r_off * math.cos(angle) + y = cy + r_off * math.sin(angle) + pos[h] = (x, y) + + # --- Draw figure --- + plt.figure(figsize=(10, 10)) + nx.draw_networkx_nodes(G, pos, + nodelist=[n for n in G if n.startswith("r_")], + node_color="orange", node_size=20, label="Routers", alpha=0.9) + nx.draw_networkx_nodes(G, pos, + nodelist=[n for n in G if n.startswith("h_")], + node_color="blue", node_size=8, label="Hosts", alpha=0.7) + + # intra-group edges light gray, inter-group black + intra = [(u, v) for (u, v) in G.edges if G.nodes[u]["group"] == G.nodes[v]["group"]] + inter = [(u, v) for (u, v) in G.edges if G.nodes[u]["group"] != G.nodes[v]["group"]] + nx.draw_networkx_edges(G, pos, edgelist=intra, alpha=0.1, width=0.3, edge_color="gray") + nx.draw_networkx_edges(G, pos, edgelist=inter, alpha=0.4, width=0.4, edge_color="black") + + plt.axis("off") + plt.legend() + plt.tight_layout() + if save_path: + plt.savefig(save_path, dpi=300) + if __name__ == "__main__": plotter = Plotter() -- GitLab From 807b881f7039d1f21af8baadc07873a406197ac9 Mon Sep 17 00:00:00 2001 From: Wes Brewer Date: Wed, 8 Oct 2025 17:15:19 +0300 Subject: [PATCH 349/388] Add support for plotting torus3d - test all three network topologies with Lassen --- config/lassen.yaml | 6 ++ raps/network/__init__.py | 18 ++++- raps/network/torus3d.py | 170 ++++++++++++++++++++++++--------------- raps/plotting.py | 91 +++++++++++++++++++++ 4 files changed, 215 insertions(+), 70 deletions(-) diff --git a/config/lassen.yaml b/config/lassen.yaml index 08bc346..2555aa2 100644 --- a/config/lassen.yaml +++ b/config/lassen.yaml @@ -126,3 +126,9 @@ network: dragonfly_a: 9 dragonfly_p: 8 latency: 1 + torus_x: 17 + torus_y: 17 + torus_z: 8 + torus_wrap: true + hosts_per_router: 2 + torus_routing: DOR_XYZ diff --git a/raps/network/__init__.py b/raps/network/__init__.py index f218f5a..56dc4cb 100644 --- a/raps/network/__init__.py +++ b/raps/network/__init__.py @@ -10,9 +10,10 @@ from .base import ( ) from .fat_tree import build_fattree, node_id_to_host_name, subsample_hosts -from .torus3d import build_torus3d, link_loads_for_job_torus +from .torus3d import build_torus3d, link_loads_for_job_torus, torus_host_from_real_index from .dragonfly import build_dragonfly, dragonfly_node_id_to_host_name, build_dragonfly_idx_map -from raps.plotting import plot_fattree_hierarchy, plot_dragonfly +from raps.plotting import plot_fattree_hierarchy, plot_dragonfly, plot_torus2d, plot_torus3d + from raps.utils import get_current_utilization __all__ = [ @@ -58,6 +59,9 @@ class NetworkModel: # Build the graph and metadata self.net_graph, self.meta = build_torus3d(dims, wrap, hosts_per_router=hosts_per_router) + plot_torus2d(self.net_graph) + plot_torus3d(self.net_graph) + # Deterministic numeric → host mapping X, Y, Z = self.meta["dims"] self.id_to_host = {} @@ -130,7 +134,15 @@ class NetworkModel: net_cong = worst_link_util(loads, max_throughput) elif self.topology == "torus3d": - host_list = [self.id_to_host[n] for n in job.scheduled_nodes] + X = self.config["TORUS_X"] + Y = self.config["TORUS_Y"] + Z = self.config["TORUS_Z"] + hosts_per_router = self.config["HOSTS_PER_ROUTER"] + #host_list = [self.id_to_host[n] for n in job.scheduled_nodes] + host_list = [ + torus_host_from_real_index(n, X, Y, Z, hosts_per_router) + for n in job.scheduled_nodes + ] loads = link_loads_for_job_torus(self.net_graph, self.meta, host_list, net_tx) net_cong = worst_link_util(loads, max_throughput) if debug: diff --git a/raps/network/torus3d.py b/raps/network/torus3d.py index 4d8054d..b88e1d2 100644 --- a/raps/network/torus3d.py +++ b/raps/network/torus3d.py @@ -3,96 +3,120 @@ import networkx as nx from pathlib import Path -def build_torus3d(dims, wrap=True, link_bw=1e9, hosts_per_router=1, routing="DOR_XYZ", coords_csv=None): +def build_torus3d( + dims, + wrap=True, + hosts_per_router: int = 1, + torus_link_bw: float = None, + latency_per_hop: float = None, + network_max_bw: float = None, +): """ - Build a 3D torus at router granularity, then attach host nodes to routers. - Node ids in the returned graph are host names ("h_x_y_z_i") and router names ("r_x_y_z"). - Edges have attribute 'capacity' (bytes/s) and 'latency' (per hop). - - Examples - -------- - >>> from raps.plotting import plot_network_graph - >>> G, meta = build_torus3d(dims=(2, 2, 2)) - >>> plot_network_graph(G, 'torus3d.png') + Build a 3D torus network (routers + hosts). + Each router r_x_y_z connects to 6 neighbors (±X, ±Y, ±Z) + and attaches hosts h_x_y_z_p for p ∈ [0..hosts_per_router-1]. + + Returns: + (G, meta) where: + - G: networkx.Graph + - meta: dict with topology info for plotting/simulation """ - X, Y, Z = map(int, dims) + X, Y, Z = dims G = nx.Graph() - # Routers - def rname(x, y, z): - return f"r_{x}_{y}_{z}" - + # --- Add routers with normalized coordinates --- for x in range(X): for y in range(Y): for z in range(Z): - G.add_node(rname(x, y, z), kind="router", coord=(x, y, z)) - - # Toroidal links between routers (±x, ±y, ±z) - def wrapi(i, n): - return (i + n) % n if wrap else (None if i < 0 or i >= n else i) - + name = f"r_{x}_{y}_{z}" + G.add_node( + name, + type="router", + x=x / (X - 1 if X > 1 else 1), + y=y / (Y - 1 if Y > 1 else 1), + z=z / (Z - 1 if Z > 1 else 1), + ) + + # --- Add wrap-around router-to-router edges --- for x in range(X): for y in range(Y): for z in range(Z): - u = rname(x, y, z) - # x+ - nxp = wrapi(x + 1, X) - v = rname(nxp, y, z) if nxp is not None else None - if v and not G.has_edge(u, v): - G.add_edge(u, v, capacity=link_bw) - # y+ - nyp = wrapi(y + 1, Y) - v = rname(x, nyp, z) if nyp is not None else None - if v and not G.has_edge(u, v): - G.add_edge(u, v, capacity=link_bw) - # z+ - nzp = wrapi(z + 1, Z) - v = rname(x, y, nzp) if nzp is not None else None - if v and not G.has_edge(u, v): - G.add_edge(u, v, capacity=link_bw) - - # Attach hosts to routers + src = f"r_{x}_{y}_{z}" + + nx_ = (x + 1) % X if wrap else x + 1 + if nx_ < X: + G.add_edge( + src, f"r_{nx_}_{y}_{z}", + bandwidth=torus_link_bw, + latency=latency_per_hop, + type="router_link" + ) + + ny_ = (y + 1) % Y if wrap else y + 1 + if ny_ < Y: + G.add_edge( + src, f"r_{x}_{ny_}_{z}", + bandwidth=torus_link_bw, + latency=latency_per_hop, + type="router_link" + ) + + nz_ = (z + 1) % Z if wrap else z + 1 + if nz_ < Z: + G.add_edge( + src, f"r_{x}_{y}_{nz_}", + bandwidth=torus_link_bw, + latency=latency_per_hop, + type="router_link" + ) + + # --- Add hosts and host-router edges --- + for x in range(X): + for y in range(Y): + for z in range(Z): + router = f"r_{x}_{y}_{z}" + for p in range(hosts_per_router): + host = f"h_{x}_{y}_{z}_{p}" + G.add_node( + host, + type="host", + x=(x + 0.1) / (X - 1 if X > 1 else 1), + y=(y + 0.1) / (Y - 1 if Y > 1 else 1), + z=(z + 0.1 * (p + 1)) / (Z - 1 if Z > 1 else 1), + ) + G.add_edge( + host, router, + bandwidth=network_max_bw, + latency=latency_per_hop, + type="host_link" + ) + + # --- Build host <-> router mappings for simulator use --- host_to_router = {} router_to_hosts = {} - def hname(x, y, z, i): - return f"h_{x}_{y}_{z}_{i}" - - # If a nid→(x,y,z) CSV is supplied, place accordingly; else dense round-robin - # CSV format: nid,x,y,z[,i] - nid_placement = {} - if coords_csv: - p = Path(coords_csv) - with p.open("rt") as fh: - rd = csv.reader(fh) - for row in rd: - if not row: - continue - nid = int(row[0]) - x, y, z = map(int, row[1:4]) - i = int(row[4]) if len(row) > 4 else 0 - nid_placement[nid] = (x, y, z, i) - - # Build hosts for x in range(X): for y in range(Y): for z in range(Z): - r = rname(x, y, z) - router_to_hosts[r] = [] - for i in range(hosts_per_router): - h = hname(x, y, z, i) - G.add_node(h, kind="host", coord=(x, y, z), local_index=i) - G.add_edge(h, r, capacity=link_bw) # host↔router edge; you can cap with NETWORK_MAX_BW instead - host_to_router[h] = r - router_to_hosts[r].append(h) + router = f"r_{x}_{y}_{z}" + router_to_hosts[router] = [] + for p in range(hosts_per_router): + host = f"h_{x}_{y}_{z}_{p}" + host_to_router[host] = router + router_to_hosts[router].append(host) meta = { + "topology": "torus3d", "dims": (X, Y, Z), + "hosts_per_router": hosts_per_router, "wrap": wrap, - "routing": routing, + "num_routers": X * Y * Z, + "num_hosts": X * Y * Z * hosts_per_router, "host_to_router": host_to_router, "router_to_hosts": router_to_hosts, } + + print(f"Built 3D torus with {meta['num_routers']} routers and {meta['num_hosts']} hosts.") return G, meta @@ -156,3 +180,15 @@ def link_loads_for_job_torus(G, meta, host_list, traffic_bytes): e = tuple(sorted((u, v))) loads[e] = loads.get(e, 0) + traffic_bytes return loads + + +def torus_host_from_real_index(real_n, X, Y, Z, hosts_per_router): + total_hosts = X * Y * Z * hosts_per_router + idx = real_n % total_hosts + r = idx // hosts_per_router + h = idx % hosts_per_router + z = r % Z + y = (r // Z) % Y + x = (r // (Y * Z)) % X + return f"h_{x}_{y}_{z}_{h}" + diff --git a/raps/plotting.py b/raps/plotting.py index 7135595..06c3da3 100644 --- a/raps/plotting.py +++ b/raps/plotting.py @@ -17,10 +17,13 @@ import itertools from pathlib import Path import matplotlib.pyplot as plt import matplotlib.ticker as ticker +from mpl_toolkits.mplot3d import Axes3D from matplotlib.ticker import MaxNLocator + import time import numpy as np import networkx as nx +import random from uncertainties import unumpy from rich.progress import track @@ -505,6 +508,7 @@ def plot_fattree_hierarchy(G, k=32, save_path='net_fattree.png'): if save_path: plt.savefig(save_path, dpi=300) + def plot_dragonfly(G, save_path='net_dragonfly.png'): """ Draw a circular Dragonfly layout: groups in a large ring, @@ -571,6 +575,93 @@ def plot_dragonfly(G, save_path='net_dragonfly.png'): plt.savefig(save_path, dpi=300) +def plot_torus2d(G, save_path="net_torus2d.png"): + import matplotlib.pyplot as plt + + routers = [n for n, d in G.nodes(data=True) if d["type"] == "router"] + hosts = [n for n, d in G.nodes(data=True) if d["type"] == "host"] + + fig, ax = plt.subplots(figsize=(8,8)) + + for u, v, d in G.edges(data=True): + if d.get("type") == "router_link": + x1, y1 = G.nodes[u]["x"], G.nodes[u]["y"] + x2, y2 = G.nodes[v]["x"], G.nodes[v]["y"] + ax.plot([x1, x2], [y1, y2], color="gray", alpha=0.1, linewidth=0.5) + + # flatten z by adding it to y or x offset + xs = [G.nodes[n]["x"] for n in routers] + ys = [G.nodes[n]["y"] + 0.05*G.nodes[n]["z"] for n in routers] + ax.scatter(xs, ys, c="orange", s=10, label="Routers", alpha=0.8) + + hx = [G.nodes[n]["x"] for n in hosts] + hy = [G.nodes[n]["y"] + 0.05*G.nodes[n]["z"] for n in hosts] + ax.scatter(hx, hy, c="blue", s=4, label="Hosts", alpha=0.5) + + ax.set_xlabel("X") + ax.set_ylabel("Y + (scaled Z)") + ax.legend() + if save_path: + plt.savefig(save_path, dpi=300) + + +def plot_torus3d(G, active_edges=None, max_edges=4000, save_path="net_torus3d.png"): + """ + Plot a 3D torus with routers, hosts, and optional job link highlights. + Args: + G : networkx.Graph + active_edges : list of (u,v) tuples for job links to highlight + max_edges : subsample edges to avoid clutter + """ + fig = plt.figure(figsize=(8, 8)) + ax = fig.add_subplot(111, projection="3d") + + # --- Separate routers and hosts --- + routers = [n for n, d in G.nodes(data=True) if d["type"] == "router"] + hosts = [n for n, d in G.nodes(data=True) if d["type"] == "host"] + + # --- Plot routers --- + xs, ys, zs = [G.nodes[n]["x"] for n in routers], [G.nodes[n]["y"] for n in routers], [G.nodes[n]["z"] for n in routers] + ax.scatter(xs, ys, zs, c="orange", s=6, label="Routers", alpha=0.8) + + # --- Plot hosts --- + hx, hy, hz = [G.nodes[n]["x"] for n in hosts], [G.nodes[n]["y"] for n in hosts], [G.nodes[n]["z"] for n in hosts] + ax.scatter(hx, hy, hz, c="dodgerblue", s=3, label="Hosts", alpha=0.6) + + # --- Draw router-to-router edges (subsampled) --- + all_router_edges = [(u, v) for u, v, d in G.edges(data=True) if d.get("type") == "router_link"] + if len(all_router_edges) > max_edges: + all_router_edges = random.sample(all_router_edges, max_edges) + for u, v in all_router_edges: + x1, y1, z1 = G.nodes[u]["x"], G.nodes[u]["y"], G.nodes[u]["z"] + x2, y2, z2 = G.nodes[v]["x"], G.nodes[v]["y"], G.nodes[v]["z"] + ax.plot([x1, x2], [y1, y2], [z1, z2], color="gray", alpha=0.05, linewidth=0.5) + + # --- Draw host links lightly --- + for u, v, d in G.edges(data=True): + if d.get("type") == "host_link": + x1, y1, z1 = G.nodes[u]["x"], G.nodes[u]["y"], G.nodes[u]["z"] + x2, y2, z2 = G.nodes[v]["x"], G.nodes[v]["y"], G.nodes[v]["z"] + ax.plot([x1, x2], [y1, y2], [z1, z2], color="lightblue", alpha=0.05, linewidth=0.3) + + # --- Overlay active job edges --- + if active_edges: + for u, v in active_edges: + if u in G.nodes and v in G.nodes: + x1, y1, z1 = G.nodes[u]["x"], G.nodes[u]["y"], G.nodes[u]["z"] + x2, y2, z2 = G.nodes[v]["x"], G.nodes[v]["y"], G.nodes[v]["z"] + ax.plot([x1, x2], [y1, y2], [z1, z2], color="red", linewidth=1.8, alpha=0.8) + + ax.set_xlabel("X") + ax.set_ylabel("Y") + ax.set_zlabel("Z") + ax.legend() + plt.tight_layout() + if save_path: + plt.savefig(save_path, dpi=300) + + + if __name__ == "__main__": plotter = Plotter() # plotter.plot_history([1, 2, 3, 4]) -- GitLab From 32a0b2c9ec4354f75de599cb75f9e1f267dca19f Mon Sep 17 00:00:00 2001 From: Wes Brewer Date: Fri, 10 Oct 2025 18:06:49 +0300 Subject: [PATCH 350/388] Dump output network png files into the output_dir --- raps/network/__init__.py | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/raps/network/__init__.py b/raps/network/__init__.py index 56dc4cb..852c6c6 100644 --- a/raps/network/__init__.py +++ b/raps/network/__init__.py @@ -1,3 +1,5 @@ +import os + from .base import ( all_to_all_paths, apply_job_slowdown, @@ -36,6 +38,8 @@ __all__ = [ class NetworkModel: def __init__(self, *, available_nodes, config, **kwargs): self.config = config + self.output_dir = kwargs.get('output_dir') + self.output_dir.mkdir(parents=True, exist_ok=True) self.topology = config.get("TOPOLOGY") self.max_link_bw = config.get("NETWORK_MAX_BW", 1e9) # default safeguard self.real_to_fat_idx = kwargs.get("real_to_fat_idx", {}) @@ -45,7 +49,8 @@ class NetworkModel: self.fattree_k = config.get("FATTREE_K") self.net_graph = build_fattree(self.fattree_k, total_nodes) #self.net_graph = subsample_hosts(self.net_graph, num_hosts=4626) - plot_fattree_hierarchy(self.net_graph, k=self.fattree_k) + save_path = os.path.join(self.output_dir, "net-fat-tree.png") + plot_fattree_hierarchy(self.net_graph, k=self.fattree_k, save_path=save_path) elif self.topology == "torus3d": dims = ( @@ -59,8 +64,10 @@ class NetworkModel: # Build the graph and metadata self.net_graph, self.meta = build_torus3d(dims, wrap, hosts_per_router=hosts_per_router) - plot_torus2d(self.net_graph) - plot_torus3d(self.net_graph) + save_path = os.path.join(self.output_dir, "net-torus2d.png") + plot_torus2d(self.net_graph, save_path=save_path) + save_path = os.path.join(self.output_dir, "net-torus3d.png") + plot_torus3d(self.net_graph, save_path=save_path) # Deterministic numeric → host mapping X, Y, Z = self.meta["dims"] @@ -92,7 +99,8 @@ class NetworkModel: self.real_to_fat_idx = build_dragonfly_idx_map(D, A, P, total_real_nodes) print(f"[DEBUG] Dragonfly mapping: {len(self.real_to_fat_idx)} entries") - plot_dragonfly(self.net_graph) + save_path = os.path.join(self.output_dir, "net-dragonfly.png") + plot_dragonfly(self.net_graph, save_path=save_path) elif self.topology == "capacity": # Capacity-only model: no explicit graph -- GitLab From 63fefdeb8a68cbecde25ef67c5d7e362f134ae13 Mon Sep 17 00:00:00 2001 From: Wes Brewer Date: Sat, 11 Oct 2025 17:16:53 +0300 Subject: [PATCH 351/388] Clean up: Remove plot_network_graph() and references --- raps/plotting.py | 37 ------------------------------------- raps/run_sim.py | 11 +---------- 2 files changed, 1 insertion(+), 47 deletions(-) diff --git a/raps/plotting.py b/raps/plotting.py index 06c3da3..44a66af 100644 --- a/raps/plotting.py +++ b/raps/plotting.py @@ -408,43 +408,6 @@ def plot_nodes_gantt(*, ax=None, jobs): return ax -def plot_network_graph(G, filename, layout='spring'): - """ - Plot the network graph with edge labels and save it to a file. - - Parameters - ---------- - G : networkx.Graph - The graph to plot. - filename : str - The path to save the plot. - layout : str, optional - The layout to use for the plot. Can be 'spring', 'circular', 'kamada_kawai', 'random', 'shell', 'spectral'. - Default is 'spring'. - """ - plt.figure(figsize=(20, 20)) - if layout == 'spring': - pos = nx.spring_layout(G) - elif layout == 'circular': - pos = nx.circular_layout(G) - elif layout == 'kamada_kawai': - pos = nx.kamada_kawai_layout(G) - elif layout == 'random': - pos = nx.random_layout(G) - elif layout == 'shell': - pos = nx.shell_layout(G) - elif layout == 'spectral': - pos = nx.spectral_layout(G) - else: - raise ValueError(f"Unsupported layout: {layout}") - - nx.draw(G, pos, with_labels=True, node_size=500, node_color="skyblue") - edge_labels = nx.get_edge_attributes(G, "capacity") - nx.draw_networkx_edge_labels(G, pos, edge_labels=edge_labels) - plt.savefig(filename) - plt.close() - - def plot_fattree_hierarchy(G, k=32, save_path='net_fattree.png'): """Draw a hierarchical Fat-Tree layout with automatic scaling.""" pos = {} diff --git a/raps/run_sim.py b/raps/run_sim.py index 285b70d..9caa96a 100644 --- a/raps/run_sim.py +++ b/raps/run_sim.py @@ -8,7 +8,7 @@ import pandas as pd import sys import warnings from raps.ui import LayoutManager -from raps.plotting import Plotter, plot_network_graph +from raps.plotting import Plotter from raps.engine import Engine from raps.multi_part_engine import MultiPartEngine from raps.utils import write_dict_to_file, pydantic_add_args, SubParsers, read_yaml_parsed @@ -148,15 +148,6 @@ def run_sim(sim_config: SingleSimConfig): else: print('Cooling model not enabled... skipping output of plot') - if 'net' in sim_config.plot: - if engine.network_model: - plot_network_graph( - engine.network_model.net_graph, - out / f'{engine.system_config.system_name}_network.png', - ) - else: - print('Network model not enabled... skipping output of plot') - if out: if sim_config.uncertainties: # Parquet cannot handle annotated ufloat format AFAIK -- GitLab From c6b94cf5fba617bf403c03b27d64346e04619525 Mon Sep 17 00:00:00 2001 From: Wes Brewer Date: Sun, 12 Oct 2025 13:52:09 +0300 Subject: [PATCH 352/388] Add in synthetic workload test for network -w network_test (see README.md for example) --- README.md | 4 ++++ raps/sim_config.py | 4 ++-- raps/workloads/__init__.py | 4 +++- raps/workloads/network.py | 36 ++++++++++++++++++++++++++++++++++++ 4 files changed, 45 insertions(+), 3 deletions(-) create mode 100644 raps/workloads/network.py diff --git a/README.md b/README.md index a2fb171..682e38a 100644 --- a/README.md +++ b/README.md @@ -89,6 +89,10 @@ get the datasets. To run a network simulation, use the following command: raps run -f /opt/data/lassen/Lassen-Supercomputer-Job-Dataset --system lassen --policy fcfs --backfill firstfit --start '2019-08-22T00:00:00+00:00' -t 12h --arrival poisson --net +To simulate synthetic network tests: + + raps run --system lassen -w network_test --net -t 15m + ## Snapshot of extracted workload data To reduce the expense of extracting the needed data from the telemetry parquet files, diff --git a/raps/sim_config.py b/raps/sim_config.py index 2f8a75f..72ce1b6 100644 --- a/raps/sim_config.py +++ b/raps/sim_config.py @@ -134,8 +134,8 @@ class SimConfig(RAPSBaseModel, abc.ABC): """ Grab data from live system. """ # Workload arguments (TODO split into separate model) - workload: Literal['random', 'benchmark', 'peak', 'idle', - 'synthetic', 'multitenant', 'replay', 'randomAI'] = "random" + workload: Literal['random', 'benchmark', 'peak', 'idle', 'synthetic', + 'multitenant', 'replay', 'randomAI', 'network_test'] = "random" """ Type of synthetic workload """ multimodal: list[float] = [1.0] diff --git a/raps/workloads/__init__.py b/raps/workloads/__init__.py index a34261a..a88873f 100644 --- a/raps/workloads/__init__.py +++ b/raps/workloads/__init__.py @@ -14,6 +14,7 @@ from .constants import JOB_NAMES, ACCT_NAMES, MAX_PRIORITY from .distribution import DistributionWorkload from .live import continuous_job_generation from .multitenant import MultitenantWorkload +from .network import NetworkTestWorkload from .utils import plot_job_hist @@ -51,7 +52,8 @@ class Workload( BaseWorkload, DistributionWorkload, BasicWorkload, - MultitenantWorkload + MultitenantWorkload, + NetworkTestWorkload ): """Final workload class with all workload types.""" pass diff --git a/raps/workloads/network.py b/raps/workloads/network.py new file mode 100644 index 0000000..6cc3bbb --- /dev/null +++ b/raps/workloads/network.py @@ -0,0 +1,36 @@ + +from raps.job import Job, job_dict + +class NetworkTestWorkload: + def network_test(self, **kwargs): + """ + A synthetic workload to test network congestion. + """ + config = kwargs.get('config', {}) + # High network traffic to trigger congestion + # These values are per-node, and the network simulation sums them up + # so we need to make them high enough to exceed the total network bandwidth + net_tx = 1e12 # bytes + net_rx = 1e12 # bytes + + job_info = job_dict( + nodes_required=2, + name="network-test-job", + account="test", + cpu_trace=[1], + gpu_trace=[1], + ntx_trace=[net_tx], + nrx_trace=[net_rx], + end_state='COMPLETED', + id=1, + priority=100, + partition='partition', + submit_time=0, + time_limit=3600, + start_time=0, + end_time=3600, + expected_run_time=3600, + trace_quanta=20, + ) + job = Job(job_info) + return [job] -- GitLab From f9eb2f800cb33801ea2f486286d56d4cc1015feb Mon Sep 17 00:00:00 2001 From: Wes Brewer Date: Mon, 13 Oct 2025 17:08:22 +0300 Subject: [PATCH 353/388] Make the synthetic network test a bit more sophisticated - 5 jobs now --- raps/workloads/network.py | 76 ++++++++++++++++++++++++--------------- 1 file changed, 48 insertions(+), 28 deletions(-) diff --git a/raps/workloads/network.py b/raps/workloads/network.py index 6cc3bbb..00d945d 100644 --- a/raps/workloads/network.py +++ b/raps/workloads/network.py @@ -1,36 +1,56 @@ from raps.job import Job, job_dict + class NetworkTestWorkload: def network_test(self, **kwargs): """ - A synthetic workload to test network congestion. + Synthetic workload to test network congestion. + Generates several jobs with varying sizes and bandwidths, + including overlapping node assignments to induce interference. """ - config = kwargs.get('config', {}) - # High network traffic to trigger congestion - # These values are per-node, and the network simulation sums them up - # so we need to make them high enough to exceed the total network bandwidth - net_tx = 1e12 # bytes - net_rx = 1e12 # bytes + jobs = [] + trace_len = 180 # 15 minutes with 20s quanta + + # -------------------------------------------------------- + # Hard-coded configuration + # -------------------------------------------------------- + # Define per-job properties + job_configs = [ + # (job_id, node_list, bandwidth_bytes_per_tick) + (1, [0, 1], 1e11), # 2-node job + (2, [1, 2], 8e11), # overlaps node 1 (causes congestion) + (3, [256], 1e12), # isolated single-node job + (4, [512, 513, 514], 5e11), # multi-node but separate + (5, [1020], 1e12), # distant single-node job + ] + + runtime = 900 # seconds + time_limit = 1800 # seconds + trace_quanta = 20 # seconds + + # -------------------------------------------------------- + # Job creation loop + # -------------------------------------------------------- + for job_id, node_list, bw in job_configs: + job_info = job_dict( + id=job_id, + name=f"net_job_{job_id}", + account="test", + nodes_required=len(node_list), + scheduled_nodes=node_list, + cpu_trace=[1] * trace_len, + gpu_trace=[1] * trace_len, + ntx_trace=[bw] * trace_len, + nrx_trace=[bw] * trace_len, + submit_time=0, + start_time=0, + expected_run_time=runtime, + time_limit=time_limit, + end_state="COMPLETED", + trace_quanta=trace_quanta, + ) + jobs.append(Job(job_info)) + print(f"[DEBUG] Created net_job_{job_id} nodes={node_list} bw={bw:.2e}") - job_info = job_dict( - nodes_required=2, - name="network-test-job", - account="test", - cpu_trace=[1], - gpu_trace=[1], - ntx_trace=[net_tx], - nrx_trace=[net_rx], - end_state='COMPLETED', - id=1, - priority=100, - partition='partition', - submit_time=0, - time_limit=3600, - start_time=0, - end_time=3600, - expected_run_time=3600, - trace_quanta=20, - ) - job = Job(job_info) - return [job] + return jobs -- GitLab From 054fd6f3eb1b40020d7a3ffacef278cc58714fce Mon Sep 17 00:00:00 2001 From: Wes Brewer Date: Tue, 14 Oct 2025 14:58:50 +0300 Subject: [PATCH 354/388] More tests and debugging of 5 synthetic network tests --- config/lassen.yaml | 2 +- raps/workloads/network.py | 14 ++++++++++---- 2 files changed, 11 insertions(+), 5 deletions(-) diff --git a/config/lassen.yaml b/config/lassen.yaml index 2555aa2..ad636f0 100644 --- a/config/lassen.yaml +++ b/config/lassen.yaml @@ -120,7 +120,7 @@ cooling: w_cts_key: "simulator[1].centralEnergyPlant[1].coolingTowerLoop[1].summary.W_flow_CT_kW" network: topology: fat-tree - network_max_bw: 1000000000.0 + network_max_bw: 12.5e9 fattree_k: 32 dragonfly_d: 11 dragonfly_a: 9 diff --git a/raps/workloads/network.py b/raps/workloads/network.py index 00d945d..e5302c8 100644 --- a/raps/workloads/network.py +++ b/raps/workloads/network.py @@ -16,13 +16,15 @@ class NetworkTestWorkload: # Hard-coded configuration # -------------------------------------------------------- # Define per-job properties + bw = 1e10 job_configs = [ # (job_id, node_list, bandwidth_bytes_per_tick) - (1, [0, 1], 1e11), # 2-node job - (2, [1, 2], 8e11), # overlaps node 1 (causes congestion) - (3, [256], 1e12), # isolated single-node job + (1, [0, 1], bw), # 2-node job +# (2, [1, 2], bw), # Job 2 overlaps node 1 (causes congestion) + (2, [128, 129], bw), # Job 2 on a distant rack (no shared link) + (3, [256], bw), # isolated single-node job (4, [512, 513, 514], 5e11), # multi-node but separate - (5, [1020], 1e12), # distant single-node job + (5, [1020], bw), # distant single-node job ] runtime = 900 # seconds @@ -53,4 +55,8 @@ class NetworkTestWorkload: jobs.append(Job(job_info)) print(f"[DEBUG] Created net_job_{job_id} nodes={node_list} bw={bw:.2e}") + print("\n[DEBUG] Requested node assignments:") + for job in jobs: + print(f" Job {job.id}: nodes_required={job.nodes_required}, scheduled_nodes={job.scheduled_nodes}") + return jobs -- GitLab From 73bbe28e894390835a2e3f0edd23118a91bd9240 Mon Sep 17 00:00:00 2001 From: Wes Brewer Date: Tue, 21 Oct 2025 15:51:54 -0400 Subject: [PATCH 355/388] Add support for inter-job congestion synthetic simulations both outside (scripts/run_interjob_congestion.py) and within RAPS using -w inter_job_congestion --- README.md | 4 + raps/engine.py | 12 ++- raps/network/__init__.py | 6 ++ raps/network/base.py | 84 ++++++++++++++- raps/sim_config.py | 2 +- raps/stats.py | 13 +++ raps/workloads/__init__.py | 4 +- raps/workloads/inter_job_congestion.py | 141 +++++++++++++++++++++++++ scripts/run_inter_job_congestion.py | 97 +++++++++++++++++ 9 files changed, 359 insertions(+), 4 deletions(-) create mode 100644 raps/workloads/inter_job_congestion.py create mode 100644 scripts/run_inter_job_congestion.py diff --git a/README.md b/README.md index 682e38a..62bd2cd 100644 --- a/README.md +++ b/README.md @@ -93,6 +93,10 @@ To simulate synthetic network tests: raps run --system lassen -w network_test --net -t 15m +Run network congestion tests outside of RAPS: + + python scripts/run_inter_job_congestion.py --config config/lassen.yaml -v + ## Snapshot of extracted workload data To reduce the expense of extracting the needed data from the telemetry parquet files, diff --git a/raps/engine.py b/raps/engine.py index 5fe8cf4..8b89543 100644 --- a/raps/engine.py +++ b/raps/engine.py @@ -30,7 +30,8 @@ from raps.power import ( from raps.network import ( NetworkModel, apply_job_slowdown, - compute_system_network_stats + compute_system_network_stats, + simulate_inter_job_congestion ) from raps.telemetry import Telemetry from raps.cooling import ThermoFluidsModel @@ -292,6 +293,7 @@ class Engine: self.avg_net_tx = [] self.avg_net_rx = [] self.net_util_history = [] + self.net_congestion_history = [] self.avg_slowdown_history = [] self.max_slowdown_history = [] self.node_occupancy_history = [] @@ -619,6 +621,14 @@ class Engine: system_util = self.num_active_nodes / self.config['AVAILABLE_NODES'] * 100 self.record_util_stats(system_util=system_util) + # --- Inter-Job Network Congestion --- + if self.simulate_network and self.network_model and self.running: + total_congestion = simulate_inter_job_congestion( + self.network_model, self.running, self.config, self.debug + ) + self.net_congestion_history.append((self.current_timestep, total_congestion)) + # --- + # System Power if self.power_manager: # Power is always simulated power_df, rack_power, total_power_kw, total_loss_kw, jobs_power = \ diff --git a/raps/network/__init__.py b/raps/network/__init__.py index 852c6c6..bec27ce 100644 --- a/raps/network/__init__.py +++ b/raps/network/__init__.py @@ -9,6 +9,9 @@ from .base import ( network_slowdown, network_utilization, worst_link_util, + get_link_util_stats, + simulate_inter_job_congestion, + max_throughput_per_tick, ) from .fat_tree import build_fattree, node_id_to_host_name, subsample_hosts @@ -32,6 +35,9 @@ __all__ = [ "build_torus3d", "build_dragonfly", "dragonfly_node_id_to_host_name", + "simulate_inter_job_congestion", + "max_throughput_per_tick", + "get_link_util_stats", ] diff --git a/raps/network/base.py b/raps/network/base.py index f14c523..bab2ec8 100644 --- a/raps/network/base.py +++ b/raps/network/base.py @@ -1,5 +1,8 @@ import networkx as nx - +import numpy as np +from raps.utils import get_current_utilization +from raps.network.fat_tree import node_id_to_host_name +from raps.network.torus3d import link_loads_for_job_torus, torus_host_from_real_index def debug_print_trace(job, label: str = ""): """Print either the length (if iterable) or the value of job.gpu_trace.""" @@ -134,3 +137,82 @@ def worst_link_util(loads, throughput): if util > max_util: max_util = util return max_util + +def get_link_util_stats(loads, throughput, top_n=10): + """ + Calculates a distribution of link utilization stats. + Returns a dictionary with min, mean, max, std_dev, and top N congested links. + """ + if not loads: + return {'max': 0, 'mean': 0, 'min': 0, 'std_dev': 0, 'top_links': []} + + # Calculate utilization for every link + utilizations = {(edge): (byte_load * 8) / throughput for edge, byte_load in loads.items()} + + util_values = list(utilizations.values()) + + stats = { + 'max': np.max(util_values), + 'mean': np.mean(util_values), + 'min': np.min(util_values), + 'std_dev': np.std(util_values) + } + + # Get top N congested links + sorted_links = sorted(utilizations.items(), key=lambda item: item[1], reverse=True) + stats['top_links'] = sorted_links[:top_n] + + return stats + +def max_throughput_per_tick(legacy_cfg: dict, trace_quanta: int) -> float: + """Return bytes-per-tick throughput of a single link.""" + bw = legacy_cfg.get("NETWORK_MAX_BW") or 12.5e9 + return float(bw) * trace_quanta + +def simulate_inter_job_congestion(network_model, jobs, legacy_cfg, debug=False): + """ + Simulates network congestion from a list of concurrently running jobs. + """ + if not network_model.net_graph: + print("[WARN] Network graph is not defined. Skipping congestion simulation.") + return 0.0 + + total_loads = {tuple(sorted(edge)): 0.0 for edge in network_model.net_graph.edges()} + trace_quanta = jobs[0].trace_quanta if jobs else 0 + + for job in jobs: + # Assuming job.running_time is 0 for this static simulation + job.running_time = 0 + job.trace_start_time = 0 + net_tx = get_current_utilization(job.ntx_trace, job) + + job_loads = {} + if network_model.topology in ("fat-tree", "dragonfly"): + if network_model.topology == "fat-tree": + k = int(legacy_cfg.get("FATTREE_K", 32)) + host_list = [node_id_to_host_name(n, k) for n in job.scheduled_nodes] + else: # dragonfly + host_list = [network_model.real_to_fat_idx[real_n] for real_n in job.scheduled_nodes] + + job_loads = link_loads_for_job(network_model.net_graph, host_list, net_tx) + + elif network_model.topology == "torus3d": + X = int(legacy_cfg.get("TORUS_X", 12)) + Y = int(legacy_cfg.get("TORUS_Y", 12)) + Z = int(legacy_cfg.get("TORUS_Z", 12)) + hosts_per_router = int(legacy_cfg.get("HOSTS_PER_ROUTER", 1)) + host_list = [ + torus_host_from_real_index(n, X, Y, Z, hosts_per_router) + for n in job.scheduled_nodes + ] + job_loads = link_loads_for_job_torus(network_model.net_graph, network_model.meta, host_list, net_tx) + + for edge, load in job_loads.items(): + edge_key = tuple(sorted(edge)) + if edge_key in total_loads: + total_loads[edge_key] += load + + max_throughput = max_throughput_per_tick(legacy_cfg, trace_quanta) + net_stats = get_link_util_stats(total_loads, max_throughput) + + return net_stats diff --git a/raps/sim_config.py b/raps/sim_config.py index 72ce1b6..9a50406 100644 --- a/raps/sim_config.py +++ b/raps/sim_config.py @@ -135,7 +135,7 @@ class SimConfig(RAPSBaseModel, abc.ABC): # Workload arguments (TODO split into separate model) workload: Literal['random', 'benchmark', 'peak', 'idle', 'synthetic', - 'multitenant', 'replay', 'randomAI', 'network_test'] = "random" + 'multitenant', 'replay', 'randomAI', 'network_test', 'inter_job_congestion'] = "random" """ Type of synthetic workload """ multimodal: list[float] = [1.0] diff --git a/raps/stats.py b/raps/stats.py index e5824a9..7df6208 100644 --- a/raps/stats.py +++ b/raps/stats.py @@ -140,6 +140,16 @@ def get_network_stats(engine: Engine): max_job_slow = 1.0 stats["max_per_job_slowdown"] = max_job_slow + if engine.net_congestion_history: + congestion_values = [c for t, c in engine.net_congestion_history] + stats['avg_inter_job_congestion'] = sum(congestion_values) / len(congestion_values) + stats['max_inter_job_congestion'] = max(congestion_values) + stats['min_inter_job_congestion'] = min(congestion_values) + else: + stats['avg_inter_job_congestion'] = 0.0 + stats['max_inter_job_congestion'] = 0.0 + stats['min_inter_job_congestion'] = 0.0 + return stats @@ -414,6 +424,9 @@ def print_formatted_report(engine_stats=None, "avg_network_util": "{:.2f}%", "avg_per_job_slowdown": "{:.2f}x", "max_per_job_slowdown": "{:.2f}x", + "avg_inter_job_congestion": "{:.2f}", + "max_inter_job_congestion": "{:.2f}", + "min_inter_job_congestion": "{:.2f}", }) diff --git a/raps/workloads/__init__.py b/raps/workloads/__init__.py index a88873f..2bfaf9b 100644 --- a/raps/workloads/__init__.py +++ b/raps/workloads/__init__.py @@ -15,6 +15,7 @@ from .distribution import DistributionWorkload from .live import continuous_job_generation from .multitenant import MultitenantWorkload from .network import NetworkTestWorkload +from .inter_job_congestion import InterJobCongestionWorkload from .utils import plot_job_hist @@ -53,7 +54,8 @@ class Workload( DistributionWorkload, BasicWorkload, MultitenantWorkload, - NetworkTestWorkload + NetworkTestWorkload, + InterJobCongestionWorkload ): """Final workload class with all workload types.""" pass diff --git a/raps/workloads/inter_job_congestion.py b/raps/workloads/inter_job_congestion.py new file mode 100644 index 0000000..3fd569d --- /dev/null +++ b/raps/workloads/inter_job_congestion.py @@ -0,0 +1,141 @@ +import math +import random +from typing import List, Tuple + +from raps.job import Job, job_dict +from raps.network import max_throughput_per_tick + +class InterJobCongestionWorkload: + """ Workload generator for inter-job congestion test """ + def inter_job_congestion(self, args) -> List[Job]: + legacy_cfg = self.config_map[self.partitions[0]] + topology = legacy_cfg.get("TOPOLOGY", "").lower() + return generate_jobs( + legacy_cfg=legacy_cfg, + topology=topology, + J=args.numjobs, + trace_quanta=legacy_cfg.get("TRACE_QUANTA", 20), + tx_fraction_per_job=getattr(args, 'txfrac', 0.35), # Assuming txfrac might be an arg + seed=args.seed + ) + + +def infer_group_params(legacy_cfg: dict, topology: str) -> Tuple[int, int, str]: + """ + Infer (hosts_per_group, total_groups, group_label) + depending on network topology. + """ + total_nodes = int(legacy_cfg["TOTAL_NODES"]) + + if topology == "fat-tree": + k = int(legacy_cfg.get("FATTREE_K", 32)) + H = k // 2 # hosts per ToR + R = math.ceil(total_nodes / H) + return H, R, "rack" + + elif topology == "dragonfly": + routers_per_group = int(legacy_cfg.get("ROUTERS_PER_GROUP", 8)) + nodes_per_router = int(legacy_cfg.get("NODES_PER_ROUTER", 4)) + H = routers_per_group * nodes_per_router + R = max(1, total_nodes // H) + return H, R, "group" + + elif topology == "torus3d": + dims = ( + int(legacy_cfg.get("TORUS_X", 12)), + int(legacy_cfg.get("TORUS_Y", 12)), + int(legacy_cfg.get("TORUS_Z", 12)), + ) + R = math.prod(dims) + return 1, R, "torus" + + else: + return 1, 1, "flat" + + +def pick_two_distinct_groups(R: int) -> Tuple[int, int]: + """Pick two distinct group indices (far apart if possible).""" + if R <= 2: + return (0, 1 if R > 1 else 0) + a = random.randrange(0, R // 2) + b = random.randrange(R // 2, R) + if a == b: + b = (b + 1) % R + return a, b + + +def nodes_in_group(group_idx: int, H: int, total_nodes: int, n: int) -> List[int]: + """Pick n contiguous nodes from a group.""" + start = group_idx * H + end = min(start + H, total_nodes) + n = min(n, end - start) + base = random.randrange(start, end - n + 1) if (end - start - n) > 0 else start + return list(range(base, base + n)) + + +def generate_jobs( + legacy_cfg: dict, + topology: str, + J: int = 60, + trace_quanta: int = 20, + tx_fraction_per_job: float = 0.35, + seed: int = 42 +) -> List[Job]: + """Generate synthetic jobs spanning and overlapping local groups.""" + random.seed(seed) + total_nodes = int(legacy_cfg["TOTAL_NODES"]) + H, R, label = infer_group_params(legacy_cfg, topology) + per_tick_bw = max_throughput_per_tick(legacy_cfg, trace_quanta) + per_dir = tx_fraction_per_job * per_tick_bw + + print(f"[INFO] topology={topology}, {label}s={R}, hosts_per_{label}={H}") + print(f"[INFO] total_nodes={total_nodes}, per-dir={per_dir:.2e} B/tick") + + jobs: List[Job] = [] + jid = 1 + + # Roughly 60% cross-group, 25% intra-group, 15% multi-group + n_cross = int(J * 0.6) + n_intra = int(J * 0.25) + n_multi = J - n_cross - n_intra + + for _ in range(n_cross): + a, b = pick_two_distinct_groups(R) + nodes = nodes_in_group(a, H, total_nodes, 1) + nodes_in_group(b, H, total_nodes, 1) + jobs.append(make_job(jid, nodes, per_dir, trace_quanta)) + jid += 1 + + for _ in range(n_intra): + g = random.randrange(0, R) + nodes = nodes_in_group(g, H, total_nodes, 2) + jobs.append(make_job(jid, nodes, per_dir, trace_quanta)) + jid += 1 + + for _ in range(n_multi): + a, b = pick_two_distinct_groups(R) + nodes = nodes_in_group(a, H, total_nodes, 2) + nodes_in_group(b, H, total_nodes, 2) + jobs.append(make_job(jid, nodes, per_dir, trace_quanta)) + jid += 1 + + print(f"[INFO] jobs={len(jobs)} (cross={n_cross}, intra={n_intra}, multi={n_multi})") + return jobs + + +def make_job(jid: int, nodes: List[int], per_dir: float, trace_quanta: int) -> Job: + """Helper: create one synthetic Job object.""" + trace_len = 900 // trace_quanta + return Job(job_dict( + id=jid, + name=f"job_{jid}", + account="test", + nodes_required=len(nodes), + scheduled_nodes=nodes, + cpu_trace=[0] * trace_len, + gpu_trace=[0] * trace_len, + ntx_trace=[per_dir] * trace_len, + nrx_trace=[per_dir] * trace_len, + trace_quanta=trace_quanta, + expected_run_time=900, + time_limit=1800, + end_state="COMPLETED" + )) diff --git a/scripts/run_inter_job_congestion.py b/scripts/run_inter_job_congestion.py new file mode 100644 index 0000000..9312fd4 --- /dev/null +++ b/scripts/run_inter_job_congestion.py @@ -0,0 +1,97 @@ +#!/usr/bin/env python3 +""" +RAPS Network Congestion Test (Inter-Job Interference) +====================================================== + +This script is a wrapper that uses the integrated `inter_job_congestion` +workload from the RAPS library to run a standalone network simulation. + +It evaluates inter-job network congestion by simulating multiple jobs +running concurrently on the same network and finding the total congestion +on the most loaded link. + +Usage: + python scripts/run_inter_job_congestion.py --config config/lassen.yaml + +Example: + python scripts/run_inter_job_congestion.py --config config/lassen.yaml --jobs 80 --txfrac 0.35 -v +""" + +from __future__ import annotations +import argparse +from pathlib import Path + +from raps.system_config import get_system_config +from raps.network import ( + NetworkModel, + simulate_inter_job_congestion, +) +from raps.workloads import Workload + + +def print_verbose_stats(stats): + print("\n--- Detailed Network Congestion Stats ---") + print(f" Max Congestion (Worst Link): {stats['max']:.2f}") + print(f" Mean Link Congestion: {stats['mean']:.2f}") + print(f" Min Link Congestion: {stats['min']:.2f}") + print(f" Std Dev of Congestion: {stats['std_dev']:.2f}") + print("\n Top 10 Most Congested Links:") + for (link, congestion) in stats['top_links']: + print(f" - Link {link}: {congestion:.2f}") + print("---------------------------------------") + +# --------------------------------------------------------------------------- +# Main +# --------------------------------------------------------------------------- + +def main(): + parser = argparse.ArgumentParser(description="Standalone inter-job network congestion test for RAPS.") + parser.add_argument("--config", required=True, help="Path to system YAML (e.g., config/lassen.yaml)") + parser.add_argument("--jobs", type=int, default=60, help="Number of synthetic jobs") + parser.add_argument("--txfrac", type=float, default=0.35, help="Fraction of per-link bandwidth per job") + parser.add_argument("--debug", action="store_true", help="Enable network debug output") + parser.add_argument("--verbose", "-v", action="store_true", help="Print detailed statistics") + args = parser.parse_args() + + # --- Load config and detect topology --- + sys_cfg = get_system_config(args.config) + legacy = sys_cfg.get_legacy() + + topology = legacy.get("TOPOLOGY", "").lower() + if not topology: + raise ValueError(f"Could not infer topology from {args.config}. Found: {topology!r}") + + # --- Generate Jobs via Workload module --- + # The workload class expects specific attribute names, so we add them to the args object. + args.workload = 'inter_job_congestion' + args.numjobs = args.jobs + args.seed = 42 # Keep seed consistent for this test script + args.start = None + + workload_generator = Workload(args, legacy) + workload_data = workload_generator.generate_jobs() + jobs = workload_data.jobs + + print(f"[INFO] Detected topology: {topology}") + print(f"[INFO] Generated {len(jobs)} jobs for congestion test.") + + # --- Initialize network model --- + net = NetworkModel( + config=legacy, + available_nodes=list(range(legacy["TOTAL_NODES"])), + output_dir=Path(f"test-{Path(args.config).stem}"), + debug=args.debug, + ) + + # --- Simulate all jobs running concurrently --- + congestion_stats = simulate_inter_job_congestion(net, jobs, legacy, debug=args.debug) + + print(f"[RESULT] config={args.config}, topology={topology}, jobs={len(jobs)}, " + f"total_congestion={congestion_stats['max']:.2f}") + + if args.verbose: + print_verbose_stats(congestion_stats) + + +if __name__ == "__main__": + main() \ No newline at end of file -- GitLab From dba096f067d65e89aa07524d772972a7bdb188ab Mon Sep 17 00:00:00 2001 From: Wes Brewer Date: Tue, 21 Oct 2025 15:40:36 -0400 Subject: [PATCH 356/388] Add sample run command for -w inter_job_congestion --- README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/README.md b/README.md index 62bd2cd..fb2594f 100644 --- a/README.md +++ b/README.md @@ -93,6 +93,8 @@ To simulate synthetic network tests: raps run --system lassen -w network_test --net -t 15m + raps run --system lassen -w inter_job_congestion --net -t 15m + Run network congestion tests outside of RAPS: python scripts/run_inter_job_congestion.py --config config/lassen.yaml -v -- GitLab From b2181f80eceedcc7fb41e86128d6c186a7da1723 Mon Sep 17 00:00:00 2001 From: Wes Brewer Date: Wed, 22 Oct 2025 16:15:49 -0400 Subject: [PATCH 357/388] Remove dead code that was not properly deleted when resolving merge conflict --- raps/engine.py | 69 -------------------------------------------------- 1 file changed, 69 deletions(-) diff --git a/raps/engine.py b/raps/engine.py index 6e3ad19..67cd999 100644 --- a/raps/engine.py +++ b/raps/engine.py @@ -340,75 +340,6 @@ class Engine: start_date=self.start, ) - if sim_config.live and not sim_config.replay: - td = Telemetry(**sim_config_dict) - workload_data = td.load_from_live_system() - elif sim_config.replay: - # TODO: this will have issues if running separate systems or custom systems - partition_short = partition.split("/")[-1] if partition else None - td = Telemetry( - **sim_config_dict, - partition=partition, - ) - if partition: - snap_map = {p.stem: p for p in sim_config.replay[0].glob("*.npz")} - if len(snap_map) > 0: - if partition_short not in snap_map: - raise RuntimeError(f"Snapshot '{partition_short}.npz' not in {sim_config.replay[0]}") - replay_files = [snap_map[partition_short]] - else: - replay_files = sim_config.replay - else: - replay_files = sim_config.replay - - workload_data = td.load_from_files(replay_files) - else: # Synthetic jobs - wl = Workload(sim_config_args, system_config_dict) - workload_data = wl.generate_jobs() - td = Telemetry(**sim_config_dict) - - jobs = workload_data.jobs - - # TODO refactor how stat/end/fastforward/time work - if sim_config.fastforward is not None: - workload_data.telemetry_start = workload_data.telemetry_start + sim_config.fastforward - - if sim_config.time is not None: - workload_data.telemetry_end = workload_data.telemetry_start + sim_config.time - - if sim_config.time_delta is not None: - time_delta = sim_config.time_delta - else: - time_delta = 1 - - if sim_config.continuous_job_generation: - continuous_workload = wl - else: - continuous_workload = None - - accounts = None - if sim_config.accounts: - job_accounts = Accounts(jobs) - if sim_config.accounts_json: - loaded_accounts = Accounts.from_json_filename(sim_config.accounts_json) - accounts = Accounts.merge(loaded_accounts, job_accounts) - else: - accounts = job_accounts - - engine = Engine( - power_manager=power_manager, - flops_manager=flops_manager, - cooling_model=cooling_model, - continuous_workload=continuous_workload, - jobs=jobs, - accounts=accounts, - telemetry=td, - sim_config=sim_config, - system_config=system_config, - ) - - return engine, workload_data, time_delta - def add_running_jobs_to_queue(self, jobs_to_submit: List): """ Modifies jobs_to_submit and self.queue -- GitLab From e9f762667e2a0fbcea0463375bc7be1923fecdb1 Mon Sep 17 00:00:00 2001 From: Wes Brewer Date: Wed, 22 Oct 2025 17:05:45 -0400 Subject: [PATCH 358/388] Fix to ensure `--out none` doesn't output network plot files --- raps/network/__init__.py | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/raps/network/__init__.py b/raps/network/__init__.py index bec27ce..2298928 100644 --- a/raps/network/__init__.py +++ b/raps/network/__init__.py @@ -45,7 +45,8 @@ class NetworkModel: def __init__(self, *, available_nodes, config, **kwargs): self.config = config self.output_dir = kwargs.get('output_dir') - self.output_dir.mkdir(parents=True, exist_ok=True) + if self.output_dir: + self.output_dir.mkdir(parents=True, exist_ok=True) self.topology = config.get("TOPOLOGY") self.max_link_bw = config.get("NETWORK_MAX_BW", 1e9) # default safeguard self.real_to_fat_idx = kwargs.get("real_to_fat_idx", {}) @@ -54,9 +55,11 @@ class NetworkModel: total_nodes = config['TOTAL_NODES'] - len(config['DOWN_NODES']) self.fattree_k = config.get("FATTREE_K") self.net_graph = build_fattree(self.fattree_k, total_nodes) + # TODO: future testing of subsampling feature #self.net_graph = subsample_hosts(self.net_graph, num_hosts=4626) - save_path = os.path.join(self.output_dir, "net-fat-tree.png") - plot_fattree_hierarchy(self.net_graph, k=self.fattree_k, save_path=save_path) + if self.output_dir: + save_path = os.path.join(self.output_dir, "net-fat-tree.png") + plot_fattree_hierarchy(self.net_graph, k=self.fattree_k, save_path=save_path) elif self.topology == "torus3d": dims = ( @@ -70,10 +73,11 @@ class NetworkModel: # Build the graph and metadata self.net_graph, self.meta = build_torus3d(dims, wrap, hosts_per_router=hosts_per_router) - save_path = os.path.join(self.output_dir, "net-torus2d.png") - plot_torus2d(self.net_graph, save_path=save_path) - save_path = os.path.join(self.output_dir, "net-torus3d.png") - plot_torus3d(self.net_graph, save_path=save_path) + if self.output_dir: + save_path = os.path.join(self.output_dir, "net-torus2d.png") + plot_torus2d(self.net_graph, save_path=save_path) + save_path = os.path.join(self.output_dir, "net-torus3d.png") + plot_torus3d(self.net_graph, save_path=save_path) # Deterministic numeric → host mapping X, Y, Z = self.meta["dims"] @@ -105,8 +109,9 @@ class NetworkModel: self.real_to_fat_idx = build_dragonfly_idx_map(D, A, P, total_real_nodes) print(f"[DEBUG] Dragonfly mapping: {len(self.real_to_fat_idx)} entries") - save_path = os.path.join(self.output_dir, "net-dragonfly.png") - plot_dragonfly(self.net_graph, save_path=save_path) + if self.output_dir: + save_path = os.path.join(self.output_dir, "net-dragonfly.png") + plot_dragonfly(self.net_graph, save_path=save_path) elif self.topology == "capacity": # Capacity-only model: no explicit graph -- GitLab From 3194fa6a65093a480b2a856fbc258bfdeb9bd885 Mon Sep 17 00:00:00 2001 From: Wes Brewer Date: Wed, 22 Oct 2025 17:06:56 -0400 Subject: [PATCH 359/388] Fix issue with total_congestion calculation - was getting TypeError (int + dict) --- raps/engine.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/raps/engine.py b/raps/engine.py index 8b89543..d86ccd3 100644 --- a/raps/engine.py +++ b/raps/engine.py @@ -623,9 +623,13 @@ class Engine: # --- Inter-Job Network Congestion --- if self.simulate_network and self.network_model and self.running: - total_congestion = simulate_inter_job_congestion( + congestion_stats = simulate_inter_job_congestion( self.network_model, self.running, self.config, self.debug ) + if isinstance(congestion_stats, dict): + total_congestion = congestion_stats['mean'] + else: + total_congestion = congestion_stats self.net_congestion_history.append((self.current_timestep, total_congestion)) # --- -- GitLab From ad2b93e7e9735eb03c30a57344ec79c7f1f0b5fe Mon Sep 17 00:00:00 2001 From: Wes Brewer Date: Wed, 22 Oct 2025 17:41:28 -0400 Subject: [PATCH 360/388] Refactor network plotting to use NetworkModel.plot_topology method --- raps/engine.py | 4 +--- raps/network/__init__.py | 37 +++++++++++++++++++++---------------- raps/run_sim.py | 3 +++ raps/sim_config.py | 2 +- 4 files changed, 26 insertions(+), 20 deletions(-) diff --git a/raps/engine.py b/raps/engine.py index d86ccd3..68e540a 100644 --- a/raps/engine.py +++ b/raps/engine.py @@ -330,9 +330,7 @@ class Engine: available_nodes = self.resource_manager.available_nodes self.network_model = NetworkModel( available_nodes=available_nodes, - config=self.config, - plot=self.sim_config.plot, - output_dir=self.sim_config.get_output(), + config=self.config ) else: self.network_model = None diff --git a/raps/network/__init__.py b/raps/network/__init__.py index 2298928..3522b39 100644 --- a/raps/network/__init__.py +++ b/raps/network/__init__.py @@ -1,4 +1,5 @@ import os +import warnings from .base import ( all_to_all_paths, @@ -44,9 +45,6 @@ __all__ = [ class NetworkModel: def __init__(self, *, available_nodes, config, **kwargs): self.config = config - self.output_dir = kwargs.get('output_dir') - if self.output_dir: - self.output_dir.mkdir(parents=True, exist_ok=True) self.topology = config.get("TOPOLOGY") self.max_link_bw = config.get("NETWORK_MAX_BW", 1e9) # default safeguard self.real_to_fat_idx = kwargs.get("real_to_fat_idx", {}) @@ -57,9 +55,6 @@ class NetworkModel: self.net_graph = build_fattree(self.fattree_k, total_nodes) # TODO: future testing of subsampling feature #self.net_graph = subsample_hosts(self.net_graph, num_hosts=4626) - if self.output_dir: - save_path = os.path.join(self.output_dir, "net-fat-tree.png") - plot_fattree_hierarchy(self.net_graph, k=self.fattree_k, save_path=save_path) elif self.topology == "torus3d": dims = ( @@ -73,12 +68,6 @@ class NetworkModel: # Build the graph and metadata self.net_graph, self.meta = build_torus3d(dims, wrap, hosts_per_router=hosts_per_router) - if self.output_dir: - save_path = os.path.join(self.output_dir, "net-torus2d.png") - plot_torus2d(self.net_graph, save_path=save_path) - save_path = os.path.join(self.output_dir, "net-torus3d.png") - plot_torus3d(self.net_graph, save_path=save_path) - # Deterministic numeric → host mapping X, Y, Z = self.meta["dims"] self.id_to_host = {} @@ -109,10 +98,6 @@ class NetworkModel: self.real_to_fat_idx = build_dragonfly_idx_map(D, A, P, total_real_nodes) print(f"[DEBUG] Dragonfly mapping: {len(self.real_to_fat_idx)} entries") - if self.output_dir: - save_path = os.path.join(self.output_dir, "net-dragonfly.png") - plot_dragonfly(self.net_graph, save_path=save_path) - elif self.topology == "capacity": # Capacity-only model: no explicit graph self.net_graph = None @@ -174,3 +159,23 @@ class NetworkModel: raise ValueError(f"Unsupported topology: {self.topology}") return net_util, net_cong, net_tx, net_rx, max_throughput + + def plot_topology(self, output_dir): + """Plot network topology - save as png file in output_dir.""" + if output_dir: + if self.topology == "fat-tree": + save_path = output_dir / "net-fat-tree.png" + plot_fattree_hierarchy(self.net_graph, k=self.fattree_k, save_path=save_path) + elif self.topology == "dragonfly": + save_path = output_dir / "net-dragonfly.png" + plot_dragonfly(self.net_graph, save_path=save_path) + elif self.topology == "torus3d": + save_path = output_dir / "net-torus2d.png" + plot_torus2d(self.net_graph, save_path=save_path) + save_path = output_dir / "net-torus3d.png" + plot_torus3d(self.net_graph, save_path=save_path) + else: + warnings.warn( + f"plotting not supported for {self.topology} topology", + UserWarning + ) diff --git a/raps/run_sim.py b/raps/run_sim.py index 9caa96a..db50465 100644 --- a/raps/run_sim.py +++ b/raps/run_sim.py @@ -137,6 +137,9 @@ def run_sim(sim_config: SingleSimConfig): else: print('Cooling model not enabled... skipping output of plot') + if 'net' in sim_config.plot: + engine.network_model.plot_topology(out) + if 'temp' in sim_config.plot: if engine.cooling_model: ylabel = 'Tr_pri_Out[1]' diff --git a/raps/sim_config.py b/raps/sim_config.py index 9a50406..966a88c 100644 --- a/raps/sim_config.py +++ b/raps/sim_config.py @@ -327,7 +327,7 @@ class SimConfig(RAPSBaseModel, abc.ABC): if self.jobsize_is_power_of is not None and self.jobsize_is_of_degree is not None: raise ValueError("jobsize_is_power_of and jobsize_is_of_degree are mutually exclusive") - if self.plot and not self.output: + if self.plot and self.output == "none": raise ValueError("plot requires an output directory to be set") if self.live and not self.replay and self.time is None: -- GitLab From 0c1235b09f6ac1dfcfe467b5e6cce83ffd131a16 Mon Sep 17 00:00:00 2001 From: Wes Brewer Date: Wed, 22 Oct 2025 17:59:50 -0400 Subject: [PATCH 361/388] Consistently use `/opt/data` as the default data path in README.md --- README.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index fb2594f..5cff439 100644 --- a/README.md +++ b/README.md @@ -29,7 +29,7 @@ Note: Requires python3.12 or greater. # Frontier DATEDIR="date=2024-01-18" - DPATH=~/data/frontier-sample-2024-01-18 + DPATH=/opt/data/frontier raps run -f $DPATH/slurm/joblive/$DATEDIR,$DPATH/jobprofile/$DATEDIR ## Open Telemetry dataset @@ -37,7 +37,7 @@ Note: Requires python3.12 or greater. For Marconi supercomputer, download `job_table.parquet` from https://zenodo.org/records/10127767 # Marconi100 - raps run --system marconi100 -f ~/data/marconi100/job_table.parquet + raps run --system marconi100 -f /opt/data/marconi100/job_table.parquet For Adastra MI250 supercomputer, download 'AdastaJobsMI250_15days.parquet' from https://zenodo.org/records/14007065 @@ -46,10 +46,10 @@ For Adastra MI250 supercomputer, download 'AdastaJobsMI250_15days.parquet' from For Google cluster trace v2 - raps run --system gcloudv2 -f ~/data/gcloud/v2/google_cluster_data_2011_sample --start '2011-05-02T00:10:00Z' + raps run --system gcloudv2 -f /opt/data/gcloud/v2/google_cluster_data_2011_sample --start '2011-05-02T00:10:00Z' # analyze dataset - raps telemetry --system gcloudv2 -f ~/data/gcloud/v2/google_cluster_data_2011_sample -v + raps telemetry --system gcloudv2 -f /opt/data/gcloud/v2/google_cluster_data_2011_sample -v For MIT Supercloud -- GitLab From 545c69ef2caaeffaaaff0b7e1a0e7295ec586be2 Mon Sep 17 00:00:00 2001 From: Matthias Maiterth Date: Wed, 22 Oct 2025 18:03:45 -0400 Subject: [PATCH 362/388] Fixed jobs being killed prematurely in replay where this should not be done. --- raps/engine.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/raps/engine.py b/raps/engine.py index 67cd999..ebf99c5 100644 --- a/raps/engine.py +++ b/raps/engine.py @@ -411,8 +411,12 @@ class Engine: # Free the nodes via the resource manager. self.resource_manager.free_nodes_from_job(job) - killed_jobs = [job for job in self.running if - job.end_time is not None and job.start_time + job.time_limit <= self.current_timestep] + if not replay: + killed_jobs = [job for job in self.running if + job.end_time is not None and + job.start_time + job.time_limit <= self.current_timestep] + else: + killed_jobs = [] need_reschedule = need_reschedule or (killed_jobs != []) @@ -555,7 +559,7 @@ class Engine: ) else: # if job.state == JobState.RUNNING: # Error checks - if job.running_time > job.time_limit and job.end_time is not None: + if not replay and job.running_time > job.time_limit and job.end_time is not None: raise Exception(f"Job exceded time limit! " f"{job.running_time} > {job.time_limit}" f"\n{job}" -- GitLab From 1365689d905381a22bab2630683febb782be559b Mon Sep 17 00:00:00 2001 From: Wes Brewer Date: Fri, 24 Oct 2025 20:39:29 -0400 Subject: [PATCH 363/388] Initial implementation of Hao's HPL analytical model --- raps/sim_config.py | 2 +- raps/workloads/__init__.py | 4 +- raps/workloads/hpl.py | 140 +++++++++++++++++++++++++++++++++++++ 3 files changed, 144 insertions(+), 2 deletions(-) create mode 100644 raps/workloads/hpl.py diff --git a/raps/sim_config.py b/raps/sim_config.py index 254859a..a12512f 100644 --- a/raps/sim_config.py +++ b/raps/sim_config.py @@ -136,7 +136,7 @@ class SimConfig(RAPSBaseModel, abc.ABC): # Workload arguments (TODO split into separate model) workload: Literal['random', 'benchmark', 'peak', 'idle', 'synthetic', 'multitenant', 'replay', 'randomAI', 'network_test', - 'inter_job_congestion', 'calculon'] = "random" + 'inter_job_congestion', 'calculon', 'hpl'] = "random" """ Type of synthetic workload """ multimodal: list[float] = [1.0] diff --git a/raps/workloads/__init__.py b/raps/workloads/__init__.py index 9bcb41a..d789196 100644 --- a/raps/workloads/__init__.py +++ b/raps/workloads/__init__.py @@ -13,6 +13,7 @@ from .basic import BasicWorkload from .calculon import Calculon from .constants import JOB_NAMES, ACCT_NAMES, MAX_PRIORITY from .distribution import DistributionWorkload +from .hpl import HPL from .live import continuous_job_generation from .multitenant import MultitenantWorkload from .network import NetworkTestWorkload @@ -57,7 +58,8 @@ class Workload( MultitenantWorkload, NetworkTestWorkload, InterJobCongestionWorkload, - Calculon + Calculon, + HPL ): """Final workload class with all workload types.""" pass diff --git a/raps/workloads/hpl.py b/raps/workloads/hpl.py new file mode 100644 index 0000000..bb6f18c --- /dev/null +++ b/raps/workloads/hpl.py @@ -0,0 +1,140 @@ +""" +Test using: + + python main.py run -w hpl -d + python raps/workloads/hpl.py +""" +from raps.job import Job, job_dict +import numpy as np +import math, random, json + + +class HPL: + """Analytical HPL workload generator for ExaDigiT""" + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + + def hpl(self, **kwargs): + jobs = [] + # Example: parameter sweep across node counts or block sizes + hpl_tests = [ + #{"M": 131072, "b": 576, "P": 192, "Q": 384, "Rtype": "1-ring"}, + #{"M": 131072, "b": 576, "P": 16, "Q": 32, "Rtype": "1-ring"}, + {"M": 741455, "b": 576, "P": 16, "Q": 32, "Rtype": "1-ring"}, + ] + + #GCDS_PER_GPU = 2 + + for test in hpl_tests: + for partition in self.partitions: + cfg = self.config_map[partition] + trace_quanta = cfg["TRACE_QUANTA"] + + # --- Analytical model evaluation --- + results = self._run_hpl_model(**test) + + total_time = results["T_total"] + gpu_util = results["gpu_util"] + cpu_util = results["cpu_util"] + + num_samples = math.ceil(total_time / trace_quanta) + 1 + gpu_trace = np.full(num_samples, gpu_util) + cpu_trace = np.full(num_samples, cpu_util) + + job_info = job_dict( + #nodes_required=test["P"] * test["Q"] // (cfg["GPUS_PER_NODE"] * GCDS_PER_GPU), + nodes_required=test["P"] * test["Q"] // cfg["GPUS_PER_NODE"], + scheduled_nodes=[], + name=f"HPL_{test['M']}x{test['M']}", + account="benchmark", + cpu_trace=cpu_trace, + gpu_trace=gpu_trace, + ntx_trace=[], nrx_trace=[], + id=None, + end_state="COMPLETED", + priority=100, + partition=partition, + time_limit=total_time, + start_time=0, + end_time=total_time, + expected_run_time=total_time, + trace_quanta=trace_quanta, + trace_time=total_time, + trace_start_time=0, + trace_end_time=total_time, + ) + jobs.append(Job(job_info)) + return jobs + + def _run_hpl_model(self, M, b, P, Q, Rtype="1-ring", f=0.6): + # constants (Table II + Fig 2b) + CAllgather = 6.3e9 + C1ring = 7e9 + Creduce = 46e6 + Fcpublas = 240e9 + Fgemm = 24e12 + + Ml = M / P + Nl = M / Q + nb = int(M / b) + total_T = 0.0 + + print("*** nb:", nb) + for i in range(nb): + Ml_i = Ml - (i * b / P) + Nl1_i = max((1 - f) * Nl - i * b / Q, 0) + Nl2_i = f * Nl if i * b < f * Nl else Nl - i * b / Q + + TPDFACT = b ** 2 / Creduce + (2 / 3) * b ** 2 * Ml_i / Fcpublas + TLBCAST = 16 * b * Ml_i / C1ring + TUPD1 = 2 * b * Ml_i * Nl1_i / Fgemm + TUPD2 = 2 * b * Ml_i * Nl2_i / Fgemm + TRS1 = 16 * b * Nl1_i / CAllgather + TRS2 = 16 * b * Nl2_i / CAllgather + + total_T += max(TPDFACT + TLBCAST + TRS1, TUPD2) + max(TRS2, TUPD1) + + # derive synthetic utilization + gpu_util = min(1.0, (Fgemm / 25e12)) # normalized ratio + cpu_util = min(1.0, (Fcpublas / 250e9)) + + return {"T_total": total_T, "gpu_util": gpu_util, "cpu_util": cpu_util} + +if __name__ == "__main__": + import json + import numpy as np + + # Mock minimal configuration values to mimic ExaDigiT runtime + class DummyHPL(HPL): + def __init__(self): + # Provide fake partitions and system config + self.partitions = ["gpu"] + self.config_map = { + "gpu": { + "TRACE_QUANTA": 15.0, # seconds per trace tick + "GPUS_PER_NODE": 4, + "CPUS_PER_NODE": 64, + } + } + + # Instantiate dummy workload + workload = DummyHPL() + + # Run synthetic job generation + jobs = workload.hpl() + + print(f"Generated {len(jobs)} HPL jobs:\n") + for i, job in enumerate(jobs): + print(i, job) + print(f"--- Job {i} ---") + print(f"Name: {job.name}") + print(f"Nodes required: {job.nodes_required}") + print(f"Wall time: {job.trace_time:.2f} s") + print(f"CPU trace length: {len(job.cpu_trace)}") + print(f"GPU trace length: {len(job.gpu_trace)}") + print(f"Avg CPU util: {np.mean(job.cpu_trace):.3f}") + print(f"Avg GPU util: {np.mean(job.gpu_trace):.3f}") + print(f"Expected run time: {job.expected_run_time:.2f}") + print() + -- GitLab From fea99646a9836a3b2ce5a969b21f579f3998d0c0 Mon Sep 17 00:00:00 2001 From: Wes Brewer Date: Fri, 24 Oct 2025 20:49:52 -0400 Subject: [PATCH 364/388] Add ref to Hao's SC25 paper --- raps/workloads/hpl.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/raps/workloads/hpl.py b/raps/workloads/hpl.py index bb6f18c..31469ae 100644 --- a/raps/workloads/hpl.py +++ b/raps/workloads/hpl.py @@ -1,8 +1,16 @@ """ +Hao Lu's analytical HPL model. Ref: + + Lu et al., "Insights from Optimizing HPL Performance on Exascale Systems: + A Comparative Analysis of Panel Factorization", in SC'25 Proceedings. + Test using: python main.py run -w hpl -d + +or: python raps/workloads/hpl.py + """ from raps.job import Job, job_dict import numpy as np @@ -101,6 +109,7 @@ class HPL: return {"T_total": total_T, "gpu_util": gpu_util, "cpu_util": cpu_util} + if __name__ == "__main__": import json import numpy as np @@ -137,4 +146,3 @@ if __name__ == "__main__": print(f"Avg GPU util: {np.mean(job.gpu_trace):.3f}") print(f"Expected run time: {job.expected_run_time:.2f}") print() - -- GitLab From 4c539b51fa95ca5b32d676d0e321dea33b95c053 Mon Sep 17 00:00:00 2001 From: Matthias Maiterth Date: Tue, 28 Oct 2025 11:08:37 -0400 Subject: [PATCH 365/388] Fixed old remnants of running_time. --> jobs have current_run_time --- raps/engine.py | 12 ++++++------ raps/job.py | 6 +++--- raps/network/base.py | 18 +++++++++++------- raps/power.py | 3 +-- raps/ui.py | 6 +++--- raps/utils.py | 5 +++-- 6 files changed, 27 insertions(+), 23 deletions(-) diff --git a/raps/engine.py b/raps/engine.py index ac7e8c7..d502e45 100644 --- a/raps/engine.py +++ b/raps/engine.py @@ -499,7 +499,7 @@ class Engine: # update Running time for job in self.running: if job.current_state == JobState.RUNNING: - job.running_time = self.current_timestep - job.start_time + job.current_run_time = self.current_timestep - job.start_time # Stop the simulation if no more jobs are running or in the queue or in the job list. if autoshutdown and \ @@ -552,7 +552,7 @@ class Engine: for job in self.running: - job.running_time = self.current_timestep - job.start_time + job.current_run_time = self.current_timestep - job.start_time if job.current_state != JobState.RUNNING: raise ValueError( @@ -561,15 +561,15 @@ class Engine: ) else: # if job.state == JobState.RUNNING: # Error checks - if not replay and job.running_time > job.time_limit and job.end_time is not None: + if not replay and job.current_run_time > job.time_limit and job.end_time is not None: raise Exception(f"Job exceded time limit! " - f"{job.running_time} > {job.time_limit}" + f"{job.current_run_time} > {job.time_limit}" f"\n{job}" f"\nCurrent timestep:{self.current_timestep - self.timestep_start} (rel)" ) - if replay and job.running_time > job.expected_run_time: + if replay and job.current_run_time > job.expected_run_time: raise Exception(f"Job should have ended in replay! " - f" {job.running_time} > {job.expected_run_time}" + f" {job.current_run_time} > {job.expected_run_time}" f"\n{job}" f"\nCurrent timestep:{self.current_timestep - self.timestep_start} (rel)" ) diff --git a/raps/job.py b/raps/job.py index 05c455e..4d1dda0 100644 --- a/raps/job.py +++ b/raps/job.py @@ -180,7 +180,7 @@ class Job: self.trace_start_time = None # Relative start time of the trace (to running time) self.trace_end_time = None # Relative end time of the trace self.trace_quanta = None # Trace quanta associated with the job # None means single value! - self.running_time = 0 # Current running time updated when simulating + self.current_run_time = 0 # Current running time updated when simulating # If a job dict was given, override the values from the job_dict: for key, value in job_dict.items(): @@ -232,7 +232,7 @@ class Job: f"trace_start_time={self.trace_start_time}, " f"trace_end_time={self.trace_end_time}, " f"trace_quanta={self.trace_quanta}, " - f"running_time={self.running_time}, " + f"current_run_time={self.current_run_time}, " f"power={self.power}, " f"power_history={self.power_history})") @@ -296,7 +296,7 @@ class JobStatistics: self.account = job.account self.num_nodes = len(job.scheduled_nodes) self.scheduled_nodes = job.scheduled_nodes - self.run_time = job.running_time + self.run_time = job.current_run_time self.submit_time = job.submit_time self.start_time = job.start_time self.end_time = job.end_time diff --git a/raps/network/base.py b/raps/network/base.py index bab2ec8..3f3daeb 100644 --- a/raps/network/base.py +++ b/raps/network/base.py @@ -4,6 +4,7 @@ from raps.utils import get_current_utilization from raps.network.fat_tree import node_id_to_host_name from raps.network.torus3d import link_loads_for_job_torus, torus_host_from_real_index + def debug_print_trace(job, label: str = ""): """Print either the length (if iterable) or the value of job.gpu_trace.""" if hasattr(job.gpu_trace, "__len__"): @@ -138,6 +139,7 @@ def worst_link_util(loads, throughput): max_util = util return max_util + def get_link_util_stats(loads, throughput, top_n=10): """ Calculates a distribution of link utilization stats. @@ -148,9 +150,9 @@ def get_link_util_stats(loads, throughput, top_n=10): # Calculate utilization for every link utilizations = {(edge): (byte_load * 8) / throughput for edge, byte_load in loads.items()} - + util_values = list(utilizations.values()) - + stats = { 'max': np.max(util_values), 'mean': np.mean(util_values), @@ -161,14 +163,16 @@ def get_link_util_stats(loads, throughput, top_n=10): # Get top N congested links sorted_links = sorted(utilizations.items(), key=lambda item: item[1], reverse=True) stats['top_links'] = sorted_links[:top_n] - + return stats + def max_throughput_per_tick(legacy_cfg: dict, trace_quanta: int) -> float: """Return bytes-per-tick throughput of a single link.""" bw = legacy_cfg.get("NETWORK_MAX_BW") or 12.5e9 return float(bw) * trace_quanta + def simulate_inter_job_congestion(network_model, jobs, legacy_cfg, debug=False): """ Simulates network congestion from a list of concurrently running jobs. @@ -181,8 +185,8 @@ def simulate_inter_job_congestion(network_model, jobs, legacy_cfg, debug=False): trace_quanta = jobs[0].trace_quanta if jobs else 0 for job in jobs: - # Assuming job.running_time is 0 for this static simulation - job.running_time = 0 + # Assuming job.current_run_time is 0 for this static simulation + job.current_run_time = 0 job.trace_start_time = 0 net_tx = get_current_utilization(job.ntx_trace, job) @@ -193,7 +197,7 @@ def simulate_inter_job_congestion(network_model, jobs, legacy_cfg, debug=False): host_list = [node_id_to_host_name(n, k) for n in job.scheduled_nodes] else: # dragonfly host_list = [network_model.real_to_fat_idx[real_n] for real_n in job.scheduled_nodes] - + job_loads = link_loads_for_job(network_model.net_graph, host_list, net_tx) elif network_model.topology == "torus3d": @@ -214,5 +218,5 @@ def simulate_inter_job_congestion(network_model, jobs, legacy_cfg, debug=False): max_throughput = max_throughput_per_tick(legacy_cfg, trace_quanta) net_stats = get_link_util_stats(total_loads, max_throughput) - + return net_stats diff --git a/raps/power.py b/raps/power.py index dd0745b..b1e6c9d 100644 --- a/raps/power.py +++ b/raps/power.py @@ -55,7 +55,7 @@ def compute_node_power(cpu_util, gpu_util, net_util, config): power_gpu = gpu_util * config['POWER_GPU_MAX'] + \ (config['GPUS_PER_NODE'] - gpu_util) * config['POWER_GPU_IDLE'] - if config.get("POWER_NIC_IDLE") != None and config.get("POWER_NIC_MAX") != None: + if config.get("POWER_NIC_IDLE") is not None and config.get("POWER_NIC_MAX") is not None: power_nic = config['POWER_NIC_IDLE'] + \ (config['POWER_NIC_MAX'] - config['POWER_NIC_IDLE']) * net_util else: @@ -432,7 +432,6 @@ class PowerManager: jobs_power = self.update_power_state(scheduled_nodes, cpu_utils, gpu_utils, net_utils) for i, job in enumerate(running_jobs): - # if job.running_time % self.config['TRACE_QUANTA'] == 0: job.power_history.append(jobs_power[i] * len(job.scheduled_nodes)) # Update the power array UI component diff --git a/raps/ui.py b/raps/ui.py index 6330bc9..03ca136 100644 --- a/raps/ui.py +++ b/raps/ui.py @@ -191,10 +191,10 @@ class LayoutManager: nodes_display = col_nodelist if self.engine.downscale != 1: - running_time_str = convert_seconds_to_hhmmss(job.running_time // self.engine.downscale) + \ - f" +{job.running_time % self.engine.downscale}/{self.engine.downscale}s" + running_time_str = convert_seconds_to_hhmmss(job.current_run_time // self.engine.downscale) + \ + f" +{job.current_run_time % self.engine.downscale}/{self.engine.downscale}s" else: - running_time_str = convert_seconds_to_hhmm(job.running_time) + running_time_str = convert_seconds_to_hhmm(job.current_run_time) row = [ str(job.id).zfill(5), diff --git a/raps/utils.py b/raps/utils.py index e232bce..d98be2a 100644 --- a/raps/utils.py +++ b/raps/utils.py @@ -640,7 +640,7 @@ def get_current_utilization(trace, job: Job): if not job.trace_quanta: raise ValueError("job.trace_quanta is not set; cannot compute utilization.") - time_quanta_index = int((job.running_time - job.trace_start_time) // job.trace_quanta) + time_quanta_index = int((job.current_run_time - job.trace_start_time) // job.trace_quanta) if time_quanta_index < 0: time_quanta_index = 0 @@ -700,6 +700,7 @@ def validate_resolved_path(path: str | Path, info: ValidationInfo): raise ValueError(f"{path} is not under {base_path}") return path + ResolvedPath = A[Path, AfterValidator(validate_resolved_path)] """ Resolve a path, and expand ~ in the path string. @@ -829,7 +830,7 @@ def read_yaml(config_file: str | None) -> dict: return result -def read_yaml_parsed(cls: type[T], config_file = None) -> dict: +def read_yaml_parsed(cls: type[T], config_file=None) -> dict: """ Like read_yaml, but parses the input to resolve paths etc. Exits on error after printing message (for use in the CLI) -- GitLab From 81070941ea676b9d77cf61621e47ff2a7c678828 Mon Sep 17 00:00:00 2001 From: Matthias Maiterth Date: Tue, 28 Oct 2025 11:10:36 -0400 Subject: [PATCH 366/388] Scheduler stats displaying seconds again. --- raps/ui.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/raps/ui.py b/raps/ui.py index 03ca136..cee033a 100644 --- a/raps/ui.py +++ b/raps/ui.py @@ -269,13 +269,13 @@ class LayoutManager: # Add data row with white values time_in_s = time // self.engine.downscale if (time_in_s < 946684800): # Introducing Y2K into our codebase! Kek - time_str = convert_seconds_to_hhmm(time_in_s) + time_str = convert_seconds_to_hhmmss(time_in_s) else: # For the curious: If the simulation time in seconds is large than # unix timestamp for Jan 2000 this is a unix timestamp, time_str = f"{datetime.fromtimestamp(time_in_s).strftime('%Y-%m-%d %H:%M')}" if timestep_start != 0: # append time simulated - time_str += f"\nSim: {convert_seconds_to_hhmm(time_in_s - timestep_start)}" + time_str += f"\nSim: {convert_seconds_to_hhmmss(time_in_s - timestep_start)}" row.append(time_str) row.append(str(nrun)) -- GitLab From b633cf89869c8ef3341c1b887ad91befde850bd3 Mon Sep 17 00:00:00 2001 From: Wes Brewer Date: Tue, 28 Oct 2025 11:29:06 -0400 Subject: [PATCH 367/388] Add more HPL test cases --- raps/workloads/hpl.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/raps/workloads/hpl.py b/raps/workloads/hpl.py index 31469ae..e65b4d5 100644 --- a/raps/workloads/hpl.py +++ b/raps/workloads/hpl.py @@ -27,9 +27,9 @@ class HPL: jobs = [] # Example: parameter sweep across node counts or block sizes hpl_tests = [ - #{"M": 131072, "b": 576, "P": 192, "Q": 384, "Rtype": "1-ring"}, - #{"M": 131072, "b": 576, "P": 16, "Q": 32, "Rtype": "1-ring"}, - {"M": 741455, "b": 576, "P": 16, "Q": 32, "Rtype": "1-ring"}, + {"M": 1482910, "b": 576, "P": 16, "Q": 32, "Rtype": "1-ring"}, + {"M": 2965820, "b": 576, "P": 32, "Q": 32, "Rtype": "1-ring"}, + {"M": 16777216, "b": 576, "P": 192, "Q": 192, "Rtype": "1-ring"}, ] #GCDS_PER_GPU = 2 -- GitLab From efe3dff3c7e2aaff0b033c8e0a0ff1af2d31e246 Mon Sep 17 00:00:00 2001 From: Matthias Maiterth Date: Tue, 28 Oct 2025 13:01:36 -0400 Subject: [PATCH 368/388] Adjusted gpu_util to consider node utilization + linter fixes --- raps/workloads/hpl.py | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/raps/workloads/hpl.py b/raps/workloads/hpl.py index e65b4d5..7378a44 100644 --- a/raps/workloads/hpl.py +++ b/raps/workloads/hpl.py @@ -1,7 +1,7 @@ """ Hao Lu's analytical HPL model. Ref: - Lu et al., "Insights from Optimizing HPL Performance on Exascale Systems: + Lu et al., "Insights from Optimizing HPL Performance on Exascale Systems: A Comparative Analysis of Panel Factorization", in SC'25 Proceedings. Test using: @@ -9,12 +9,12 @@ Test using: python main.py run -w hpl -d or: - python raps/workloads/hpl.py + python raps/workloads/hpl.py """ from raps.job import Job, job_dict import numpy as np -import math, random, json +import math class HPL: @@ -32,7 +32,7 @@ class HPL: {"M": 16777216, "b": 576, "P": 192, "Q": 192, "Rtype": "1-ring"}, ] - #GCDS_PER_GPU = 2 + # GCDS_PER_GPU = 2 for test in hpl_tests: for partition in self.partitions: @@ -43,7 +43,7 @@ class HPL: results = self._run_hpl_model(**test) total_time = results["T_total"] - gpu_util = results["gpu_util"] + gpu_util = self.config_map[self.args.system]['GPUS_PER_NODE'] * results["gpu_util"] cpu_util = results["cpu_util"] num_samples = math.ceil(total_time / trace_quanta) + 1 @@ -51,7 +51,7 @@ class HPL: cpu_trace = np.full(num_samples, cpu_util) job_info = job_dict( - #nodes_required=test["P"] * test["Q"] // (cfg["GPUS_PER_NODE"] * GCDS_PER_GPU), + # nodes_required=test["P"] * test["Q"] // (cfg["GPUS_PER_NODE"] * GCDS_PER_GPU), nodes_required=test["P"] * test["Q"] // cfg["GPUS_PER_NODE"], scheduled_nodes=[], name=f"HPL_{test['M']}x{test['M']}", @@ -111,8 +111,6 @@ class HPL: if __name__ == "__main__": - import json - import numpy as np # Mock minimal configuration values to mimic ExaDigiT runtime class DummyHPL(HPL): -- GitLab From 7162992814a8e784d554bc023cbe1e6ecaf19bda Mon Sep 17 00:00:00 2001 From: Wes Brewer Date: Tue, 28 Oct 2025 17:10:25 -0400 Subject: [PATCH 369/388] Update HPL to call Hao's model for each iteration --- raps/workloads/hpl.py | 225 +++++++++++++++++++++++++++++------------- 1 file changed, 154 insertions(+), 71 deletions(-) diff --git a/raps/workloads/hpl.py b/raps/workloads/hpl.py index 7378a44..e338061 100644 --- a/raps/workloads/hpl.py +++ b/raps/workloads/hpl.py @@ -1,64 +1,66 @@ """ -Hao Lu's analytical HPL model. Ref: - - Lu et al., "Insights from Optimizing HPL Performance on Exascale Systems: - A Comparative Analysis of Panel Factorization", in SC'25 Proceedings. - -Test using: +Hao Lu’s analytical HPL model adapter for ExaDigiT. +Usage: python main.py run -w hpl -d - or: python raps/workloads/hpl.py - """ + from raps.job import Job, job_dict import numpy as np import math class HPL: - """Analytical HPL workload generator for ExaDigiT""" + """Analytical HPL workload generator for ExaDigiT.""" def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) + # ------------------------------------------------------------------------- + # Public entry + # ------------------------------------------------------------------------- def hpl(self, **kwargs): jobs = [] - # Example: parameter sweep across node counts or block sizes + + # You can add more scenarios; comment out big ones while testing. hpl_tests = [ - {"M": 1482910, "b": 576, "P": 16, "Q": 32, "Rtype": "1-ring"}, - {"M": 2965820, "b": 576, "P": 32, "Q": 32, "Rtype": "1-ring"}, - {"M": 16777216, "b": 576, "P": 192, "Q": 192, "Rtype": "1-ring"}, + # Smaller grid (quick sanity check) + {"M": 2_097_152, "b": 576, "P": 16, "Q": 32, "Rtype": "1-ring", "f": 0.6}, + # Frontier-scale shape (comment in when ready) + {"M": 8_900_000, "b": 576, "P": 192, "Q": 384, "Rtype": "1-ring", "f": 0.6}, ] - # GCDS_PER_GPU = 2 - for test in hpl_tests: for partition in self.partitions: cfg = self.config_map[partition] trace_quanta = cfg["TRACE_QUANTA"] - # --- Analytical model evaluation --- - results = self._run_hpl_model(**test) + # Per-iteration timings (already concurrency-aware) + iterations = self._run_hpl_model(**test) - total_time = results["T_total"] - gpu_util = self.config_map[self.args.system]['GPUS_PER_NODE'] * results["gpu_util"] - cpu_util = results["cpu_util"] + # Convert iteration timings to sampled traces on TRACE_QUANTA grid + gpu_trace, cpu_trace = self._emit_traces_from_iters( + iterations, trace_quanta, cfg + ) + total_time = len(gpu_trace) * trace_quanta - num_samples = math.ceil(total_time / trace_quanta) + 1 - gpu_trace = np.full(num_samples, gpu_util) - cpu_trace = np.full(num_samples, cpu_util) + # Node count: ranks / (GPUs_per_node * GCDs_per_GPU) + gpus = cfg["GPUS_PER_NODE"] + gcds = cfg.get("GCDS_PER_GPU", 2) # Frontier MI250X default: 2 + ranks = test["P"] * test["Q"] + nodes_required = max(1, ranks // (gpus * gcds)) job_info = job_dict( - # nodes_required=test["P"] * test["Q"] // (cfg["GPUS_PER_NODE"] * GCDS_PER_GPU), - nodes_required=test["P"] * test["Q"] // cfg["GPUS_PER_NODE"], + nodes_required=nodes_required, scheduled_nodes=[], - name=f"HPL_{test['M']}x{test['M']}", + name=f"HPL_{test['M']}x{test['M']}_P{test['P']}Q{test['Q']}", account="benchmark", cpu_trace=cpu_trace, gpu_trace=gpu_trace, - ntx_trace=[], nrx_trace=[], + ntx_trace=[], + nrx_trace=[], id=None, end_state="COMPLETED", priority=100, @@ -73,74 +75,155 @@ class HPL: trace_end_time=total_time, ) jobs.append(Job(job_info)) + return jobs + # ------------------------------------------------------------------------- + # Analytical per-iteration model (concurrency-aware) + # ------------------------------------------------------------------------- def _run_hpl_model(self, M, b, P, Q, Rtype="1-ring", f=0.6): - # constants (Table II + Fig 2b) - CAllgather = 6.3e9 - C1ring = 7e9 - Creduce = 46e6 - Fcpublas = 240e9 - Fgemm = 24e12 + """ + Returns a list of dicts, one per iteration: + { + "T_iter": , + "gpu_active": , + "cpu_active": , + "net_active": , + } + + Concurrency-aware scaling: + - UPDATE (DGEMM) work is distributed over the full P*Q ranks → divide by (P*Q) + - PDFACT/LBCAST/RS* progress along process columns (Q) → divide by Q + This makes the per-iteration times reflect global wall-time. + """ + # Effective per-rank throughputs/bandwidths (empirical constants) + CAllgather = 6.3e9 # bytes/s + C1ring = 7.0e9 # bytes/s + Creduce = 46e6 # bytes/s + Fcpublas = 240e9 # FLOP/s + Fgemm = 24e12 # FLOP/s Ml = M / P Nl = M / Q nb = int(M / b) - total_T = 0.0 + iterations = [] - print("*** nb:", nb) for i in range(nb): Ml_i = Ml - (i * b / P) - Nl1_i = max((1 - f) * Nl - i * b / Q, 0) - Nl2_i = f * Nl if i * b < f * Nl else Nl - i * b / Q - - TPDFACT = b ** 2 / Creduce + (2 / 3) * b ** 2 * Ml_i / Fcpublas - TLBCAST = 16 * b * Ml_i / C1ring - TUPD1 = 2 * b * Ml_i * Nl1_i / Fgemm - TUPD2 = 2 * b * Ml_i * Nl2_i / Fgemm - TRS1 = 16 * b * Nl1_i / CAllgather - TRS2 = 16 * b * Nl2_i / CAllgather - - total_T += max(TPDFACT + TLBCAST + TRS1, TUPD2) + max(TRS2, TUPD1) - - # derive synthetic utilization - gpu_util = min(1.0, (Fgemm / 25e12)) # normalized ratio - cpu_util = min(1.0, (Fcpublas / 250e9)) - - return {"T_total": total_T, "gpu_util": gpu_util, "cpu_util": cpu_util} - - + if Ml_i <= 0: + break + + # Local column partition sizes (A = [A1 | A2]), f is the split ratio + Nl1_i = max((1.0 - f) * Nl - (i * b / Q), 0.0) + Nl2_i = (f * Nl) if (i * b) < (f * Nl) else max(Nl - (i * b / Q), 0.0) + + # Component times (per-rank formulations) + # NOTE: units already account for bytes vs. elements (coeffs 16, 2/3, etc.) + TPDFACT_rank = (b**2) / Creduce + (2.0 / 3.0) * (b**2) * Ml_i / Fcpublas + TLBCAST_rank = 16.0 * b * Ml_i / C1ring + TUPD1_rank = 2.0 * b * Ml_i * Nl1_i / Fgemm + TUPD2_rank = 2.0 * b * Ml_i * Nl2_i / Fgemm + TRS1_rank = 16.0 * b * Nl1_i / CAllgather + TRS2_rank = 16.0 * b * Nl2_i / CAllgather + + # Concurrency: convert rank-local times to global wall-time contributions + # (coarse but effective partitioning of the communicators) + TPDFACT = TPDFACT_rank #/ Q + TLBCAST = TLBCAST_rank #/ Q + TRS1 = TRS1_rank #/ Q + TRS2 = TRS2_rank #/ Q + TUPD1 = TUPD1_rank #/ (P * Q) + TUPD2 = TUPD2_rank #/ (P * Q) + + # Two pipeline stages per iteration (HPL) + stage1 = max(TPDFACT + TLBCAST + TRS1, TUPD2) + stage2 = max(TRS2, TUPD1) + T_iter = stage1 + stage2 + + # Attribute activity (for utilization duty fractions) + gpu_active = max(TUPD1, TUPD2) + cpu_active = TPDFACT + net_active = TLBCAST + TRS1 + TRS2 + + iterations.append( + dict( + T_iter=T_iter, + gpu_active=gpu_active, + cpu_active=cpu_active, + net_active=net_active, + ) + ) + + return iterations + + def _emit_traces_from_iters(self, iterations, trace_quanta, cfg): + gpn = cfg["GPUS_PER_NODE"] + gpu_trace, cpu_trace = [], [] + acc_time = 0.0 + acc_gpu = 0.0 + acc_cpu = 0.0 + + for it in iterations: + T = it["T_iter"] + if T <= 0: + continue + + total_act = it["gpu_active"] + it["cpu_active"] + it["net_active"] + compute_ratio = it["gpu_active"] / total_act if total_act > 0 else 0.0 + cpu_ratio = it["cpu_active"] / total_act if total_act > 0 else 0.0 + fg = 0.8 + 0.2 * compute_ratio + fc = 0.6 + 0.3 * cpu_ratio + + acc_time += T + acc_gpu += gpn * fg * T + acc_cpu += fc * T + + # emit one sample each time we accumulate ≥ trace_quanta + while acc_time >= trace_quanta: + gpu_trace.append(acc_gpu / acc_time) + cpu_trace.append(acc_cpu / acc_time) + acc_time -= trace_quanta + acc_gpu = acc_cpu = 0.0 + + # flush remainder + if acc_time > 0: + gpu_trace.append(acc_gpu / acc_time) + cpu_trace.append(acc_cpu / acc_time) + + return np.array(gpu_trace), np.array(cpu_trace) + +# ----------------------------------------------------------------------------- +# Stand-alone test +# ----------------------------------------------------------------------------- if __name__ == "__main__": - # Mock minimal configuration values to mimic ExaDigiT runtime class DummyHPL(HPL): def __init__(self): - # Provide fake partitions and system config self.partitions = ["gpu"] self.config_map = { "gpu": { - "TRACE_QUANTA": 15.0, # seconds per trace tick - "GPUS_PER_NODE": 4, + "TRACE_QUANTA": 15.0, # seconds/sample + "GPUS_PER_NODE": 4, # Frontier physical GPUs/node + "GCDS_PER_GPU": 2, # MI250X logical ranks/GPU "CPUS_PER_NODE": 64, } } - # Instantiate dummy workload - workload = DummyHPL() - - # Run synthetic job generation - jobs = workload.hpl() + hpl = DummyHPL() + jobs = hpl.hpl() - print(f"Generated {len(jobs)} HPL jobs:\n") + print(f"Generated {len(jobs)} HPL job(s)\n") for i, job in enumerate(jobs): - print(i, job) print(f"--- Job {i} ---") print(f"Name: {job.name}") print(f"Nodes required: {job.nodes_required}") - print(f"Wall time: {job.trace_time:.2f} s") - print(f"CPU trace length: {len(job.cpu_trace)}") - print(f"GPU trace length: {len(job.gpu_trace)}") - print(f"Avg CPU util: {np.mean(job.cpu_trace):.3f}") - print(f"Avg GPU util: {np.mean(job.gpu_trace):.3f}") - print(f"Expected run time: {job.expected_run_time:.2f}") + print(f"Wall time: {job.trace_time:.1f}s") + print(f"Trace samples: {len(job.gpu_trace)}") + print(f"Avg GPU util: {np.mean(job.gpu_trace):.2f} (0..{hpl.config_map['gpu']['GPUS_PER_NODE']})") + print(f"Avg CPU util: {np.mean(job.cpu_trace):.2f} (0..1)") + # Peek at starts/ends + print("GPU head:", np.round(job.gpu_trace[:8], 3)) + print("GPU tail:", np.round(job.gpu_trace[-8:], 3)) + print("CPU head:", np.round(job.cpu_trace[:8], 3)) + print("CPU tail:", np.round(job.cpu_trace[-8:], 3)) print() -- GitLab From 6dc3ffa21b895f326436c5df44bf138eb20a6107 Mon Sep 17 00:00:00 2001 From: Jesse Hines Date: Wed, 15 Oct 2025 10:47:35 -0400 Subject: [PATCH 370/388] Add skeleton for dataset download --- main.py | 3 ++- raps/telemetry.py | 31 ++++++++++++++++++++++++++++++- 2 files changed, 32 insertions(+), 2 deletions(-) diff --git a/main.py b/main.py index 18ecd9a..8ae0c06 100755 --- a/main.py +++ b/main.py @@ -69,7 +69,7 @@ def main(cli_args: list[str] | None = None): from raps.run_sim import run_sim_add_parser, run_parts_sim_add_parser, show_add_parser from raps.workloads import run_workload_add_parser - from raps.telemetry import run_telemetry_add_parser + from raps.telemetry import run_telemetry_add_parser, run_download_add_parser from raps.train_rl import train_rl_add_parser parser = argparse.ArgumentParser( @@ -85,6 +85,7 @@ def main(cli_args: list[str] | None = None): show_add_parser(subparsers) run_workload_add_parser(subparsers) run_telemetry_add_parser(subparsers) + run_download_add_parser(subparsers) train_rl_add_parser(subparsers) shell_completion_add_parser(subparsers) diff --git a/raps/telemetry.py b/raps/telemetry.py index b7f29b7..e01234a 100644 --- a/raps/telemetry.py +++ b/raps/telemetry.py @@ -9,7 +9,7 @@ helper functions for data encryption and conversion between node name and index from typing import Literal import random from pathlib import Path -# import json +from datetime import datetime from typing import Optional from types import ModuleType import importlib @@ -21,6 +21,7 @@ from pydantic import model_validator from raps.sim_config import SimConfig from raps.system_config import get_system_config from raps.job import Job, job_dict +from raps.utils import AutoAwareDatetime import matplotlib.pyplot as plt from raps.plotting import ( plot_jobs_gantt, @@ -183,6 +184,13 @@ class Telemetry: assert self.dataloader return self.dataloader.load_live_data(**self.kwargs) + def download_data(self, dest: Path, start: datetime | None, end: datetime | None): + """Load telemetry data using custom data loaders.""" + assert self.dataloader + if not hasattr(self.dataloader, "download"): + raise ValueError("Dataloader does not support download") + return self.dataloader.download(dest, start, end) + def node_index_to_name(self, index: int): """ Convert node index into a name""" assert self.dataloader @@ -359,3 +367,24 @@ def run_telemetry(args: TelemetryArgs): print(f"Saved to: {filename}") else: plt.show() + + +class DownloadArgs(RAPSBaseModel): + system: str + dest: ResolvedPath + start: AutoAwareDatetime | None = None + end: AutoAwareDatetime | None = None + + +def run_download_add_parser(subparsers: SubParsers): + parser = subparsers.add_parser("download", description=""" + Download telemetry data + """) + model_validate = pydantic_add_args(parser, DownloadArgs) + parser.set_defaults(impl=lambda args: run_download(model_validate(args, {}))) + + +def run_download(args: DownloadArgs): + config = get_system_config(args.system).get_legacy() + td = Telemetry(system = args.system, config = config) + td.download_data(args.dest, args.start, args.end) -- GitLab From b951ad92e9909873c30dcbdfea60dcd6d2caac0c Mon Sep 17 00:00:00 2001 From: Jesse Hines Date: Wed, 15 Oct 2025 13:06:35 -0400 Subject: [PATCH 371/388] Add fugaku downloads --- raps/dataloaders/fugaku.py | 49 ++++++++++++++++++++++++++++++-------- 1 file changed, 39 insertions(+), 10 deletions(-) diff --git a/raps/dataloaders/fugaku.py b/raps/dataloaders/fugaku.py index 5a531fa..fdd86c5 100644 --- a/raps/dataloaders/fugaku.py +++ b/raps/dataloaders/fugaku.py @@ -1,21 +1,26 @@ """ - Download parquet files from https://zenodo.org/records/11467483 +Use the fugaku dataset published at https://zenodo.org/records/11467483 - Note that F-Data doesn't give a list of nodes used, so we set 'scheduled_nodes' to None - which triggers the scheduler to schedule the nodes itself. +Note that F-Data doesn't give a list of nodes used, so we set 'scheduled_nodes' to None +which triggers the scheduler to schedule the nodes itself. - Also, power in F-Data is only given at node-level. We can use node-level power by - adding the --validate option. +Also, power in F-Data is only given at node-level. We can use node-level power by +adding the --validate option. - The '--arrival poisson' will compute submit times from Poisson distribution, instead of using - the submit times given in F-Data. +The '--arrival poisson' will compute submit times from Poisson distribution, instead of using +the submit times given in F-Data. - raps run --system fugaku -f /path/to/21_04.parquet - raps run --system fugaku -f /path/to/21_04.parquet --validate - raps run --system fugaku -f /path/to/21_04.parquet --policy priority --backfill easy +raps run --system fugaku -f /path/to/21_04.parquet +raps run --system fugaku -f /path/to/21_04.parquet --validate +raps run --system fugaku -f /path/to/21_04.parquet --policy priority --backfill easy """ import pandas as pd from tqdm import tqdm +from datetime import datetime +from pathlib import Path +from zoneinfo import ZoneInfo +import urllib.request +import requests from ..job import job_dict, Job from ..utils import WorkloadData @@ -180,3 +185,27 @@ def cdu_index_to_name(index: int, config: dict): def cdu_pos(index: int, config: dict) -> tuple[int, int]: """ Return (row, col) tuple for a cdu index """ return (0, index) # TODO + + +def download(dest: Path, start: datetime | None, end: datetime | None): + tz = ZoneInfo("Asia/Tokyo") + + files = requests.get("https://zenodo.org/api/records/11467483").json()["files"] + files = [f for f in files if f['key'].endswith(".parquet")] + files = sorted(files, key = lambda f: f['key']) + + # TODO: I think fugaku data is indexed by submission time not start time, so filtering by + # filename will probably miss some jobs that ran over start -> end + if start: + start_file = start.astimezone(tz).strftime("%y_%m.parquet") + files = [f for f in files if f['key'] >= start_file] + if end: + end_file = end.astimezone(tz).strftime("%y_%m.parquet") + files = [f for f in files if f['key'] <= end_file] + + dest.mkdir(parents = True) + for file in files: + print(f"Downloading {file['key']}") + urllib.request.urlretrieve(file['links']['self'], dest / file['key']) + + print("Done!") -- GitLab From 77d4626ae057e9a1ad2e4d96976e923cca8a02c5 Mon Sep 17 00:00:00 2001 From: Jesse Hines Date: Wed, 15 Oct 2025 13:25:05 -0400 Subject: [PATCH 372/388] Make download dest optional --- raps/telemetry.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/raps/telemetry.py b/raps/telemetry.py index e01234a..238f2bb 100644 --- a/raps/telemetry.py +++ b/raps/telemetry.py @@ -371,7 +371,7 @@ def run_telemetry(args: TelemetryArgs): class DownloadArgs(RAPSBaseModel): system: str - dest: ResolvedPath + dest: ResolvedPath | None = None start: AutoAwareDatetime | None = None end: AutoAwareDatetime | None = None @@ -387,4 +387,5 @@ def run_download_add_parser(subparsers: SubParsers): def run_download(args: DownloadArgs): config = get_system_config(args.system).get_legacy() td = Telemetry(system = args.system, config = config) - td.download_data(args.dest, args.start, args.end) + dest = args.dest if args.dest else Path("./data").resolve() / args.system + td.download_data(dest, args.start, args.end) -- GitLab From a10579454d5e7daec585bc6883af1fa76fdff8a9 Mon Sep 17 00:00:00 2001 From: Jesse Hines Date: Wed, 15 Oct 2025 14:53:17 -0400 Subject: [PATCH 373/388] Add frontier download --- raps/dataloaders/frontier.py | 46 ++++++++++++++++++++++++++++++++++-- raps/utils.py | 10 +++++++- scripts/get_data.sh | 20 ---------------- 3 files changed, 53 insertions(+), 23 deletions(-) delete mode 100755 scripts/get_data.sh diff --git a/raps/dataloaders/frontier.py b/raps/dataloaders/frontier.py index 23efd2f..45994e1 100644 --- a/raps/dataloaders/frontier.py +++ b/raps/dataloaders/frontier.py @@ -10,13 +10,16 @@ python -m raps.telemetry -f $DPATH/slurm/joblive/$DATEDIR,$DPATH/jobprofile/$DATEDIR """ import time -from datetime import datetime, timezone +from datetime import datetime, timezone, timedelta +from zoneinfo import ZoneInfo import numpy as np import pandas as pd +import subprocess from tqdm import tqdm +from pathlib import Path from ..job import job_dict, Job -from ..utils import power_to_utilization, encrypt, WorkloadData +from ..utils import power_to_utilization, encrypt, WorkloadData, date_range def aging_boost(nnodes): @@ -609,3 +612,42 @@ def cdu_pos(index: int, config: dict) -> tuple[int, int]: name = CDU_NAMES[index - 1] row, col = int(name[2]), int(name[3:5]) return (row, col) + + +def download(dest: Path, start: datetime | None, end: datetime | None): + HOST = "dtn.ccs.ornl.gov" + DATA_LAKE = "/lustre/orion/stf218/proj-shared/data/lake/frontier" + + print("Downloading the Frontier dataset requires access permissions.") + print("If you have access you can download via SSH.") + USERNAME = input("NCCS Username: ") + # jobs are indexed by submission time so download a few extra days to make sure we get all that + # ran over start -> end + if start: + start = (start - timedelta(days = 2)).astimezone(ZoneInfo("UTC")) + else: + start = datetime.fromisoformat("2023-09-01T00:00:00Z") + if end: + end = (end + timedelta(days = 2)).astimezone(ZoneInfo("UTC")) + else: + end = datetime.now(ZoneInfo("UTC")) + + days = list(date_range(start, end)) + + dest.mkdir(parents=True) + subprocess.run(["rsync", "-rvm", + *[f"--include=date={d.date().isoformat()}/***" for d in days], + "--exclude", '*', + f"{USERNAME}@{HOST}:{DATA_LAKE}/jobprofile/jobprofile/", + str(dest / "jobprofile") + ], check=True, text=True) + + (dest / 'slurm').mkdir(parents=True) + subprocess.run(["rsync", "-rvm", + *[f"--include=date={d.date().isoformat()}/***" for d in days], + "--exclude", '*', + f"{USERNAME}@{HOST}:{DATA_LAKE}/slurm/joblive/", + str(dest / "slurm/joblive") + ], check=True, text=True) + + print("Done!") diff --git a/raps/utils.py b/raps/utils.py index d98be2a..8385361 100644 --- a/raps/utils.py +++ b/raps/utils.py @@ -6,7 +6,8 @@ generating random numbers, summarizing and expanding ranges, determining job sta """ -from datetime import datetime, timedelta, timezone +from datetime import datetime, timedelta, timezone, date +from collections.abc import Iterable from enum import Enum import os import hashlib @@ -70,6 +71,13 @@ def to_dict(arg): else: raise ValueError(f"Cannot convert {arg} to dict") +DateType = TypeVar("DateType", date, datetime) +def date_range(start: DateType, end: DateType, step = timedelta(days=1)) -> Iterable[DateType]: + window_start = start + while window_start < end: + yield window_start + window_start += step + def sum_values(values): return sum(x[1] for x in values) if values else 0 diff --git a/scripts/get_data.sh b/scripts/get_data.sh deleted file mode 100755 index a4263fd..0000000 --- a/scripts/get_data.sh +++ /dev/null @@ -1,20 +0,0 @@ -#!/bin/bash -# Note: -# recommend setting up ~/.ssh/config to specify User and HostName -# Host mymachine -# User jdoe -# HostName mymachine.com - -machine="mymachine" -mkdir -p jobprofile slurm/jobcomplete slurm/joblive - -if [ -n "$1" ]; then - DATE=$1 -else - DATE="2024-01-19" -fi - -DPATH=/path/to/data/lake - -/usr/bin/scp -r $machine:$DPATH/jobprofile/jobprofile/date=$DATE jobprofile -/usr/bin/scp -r $machine:$DPATH/slurm/joblive/date=$DATE slurm/joblive -- GitLab From 52ab19faa3db413e81279204684b6076ab3a1b09 Mon Sep 17 00:00:00 2001 From: Jesse Hines Date: Wed, 15 Oct 2025 15:32:46 -0400 Subject: [PATCH 374/388] Marconi100 download --- raps/dataloaders/marconi100.py | 49 ++++++++++++++++++++++------------ 1 file changed, 32 insertions(+), 17 deletions(-) diff --git a/raps/dataloaders/marconi100.py b/raps/dataloaders/marconi100.py index 4b3c5c6..23ba3ab 100644 --- a/raps/dataloaders/marconi100.py +++ b/raps/dataloaders/marconi100.py @@ -1,31 +1,34 @@ """ - # Reference - Antici, Francesco, et al. "PM100: A Job Power Consumption Dataset of a - Large-scale Production HPC System." Proceedings of the SC'23 Workshops - of The International Conference on High Performance Computing, - Network, Storage, and Analysis. 2023. +# Reference +Antici, Francesco, et al. "PM100: A Job Power Consumption Dataset of a +Large-scale Production HPC System." Proceedings of the SC'23 Workshops +of The International Conference on High Performance Computing, +Network, Storage, and Analysis. 2023. - # get the data - Download `job_table.parquet` from https://zenodo.org/records/10127767 +# get the data +Download `job_table.parquet` from https://zenodo.org/records/10127767 - # to simulate the dataset - raps run -f /path/to/job_table.parquet --system marconi100 +# to simulate the dataset +raps run -f /path/to/job_table.parquet --system marconi100 - # to replay using differnt schedulers - raps run -f /path/to/job_table.parquet --system marconi100 --policy fcfs --backfill easy - raps run -f /path/to/job_table.parquet --system marconi100 --policy priority --backfill firstfit +# to replay using differnt schedulers +raps run -f /path/to/job_table.parquet --system marconi100 --policy fcfs --backfill easy +raps run -f /path/to/job_table.parquet --system marconi100 --policy priority --backfill firstfit - # to fast-forward 60 days and replay for 1 day - raps run -f /path/to/job_table.parquet --system marconi100 --start 2020-07-05T00:00:00+00:00 -t 1d - - # to analyze dataset - python -m raps.telemetry -f /path/to/job_table.parquet --system marconi100 -v +# to fast-forward 60 days and replay for 1 day +raps run -f /path/to/job_table.parquet --system marconi100 --start 2020-07-05T00:00:00+00:00 -t 1d +# to analyze dataset +python -m raps.telemetry -f /path/to/job_table.parquet --system marconi100 -v """ import uuid import numpy as np import pandas as pd from tqdm import tqdm +from pathlib import Path +from datetime import datetime +import requests +import urllib.request from ..job import job_dict, Job from ..utils import power_to_utilization, WorkloadData @@ -241,3 +244,15 @@ def cdu_index_to_name(index: int, config: dict): def cdu_pos(index: int, config: dict) -> tuple[int, int]: """ Return (row, col) tuple for a cdu index """ return (0, index) # TODO + + +def download(dest: Path, start: datetime | None, end: datetime | None): + files = requests.get("https://zenodo.org/api/records/10127767").json()["files"] + + # marconi100 is just one big parquet, nothing to pre-filter + dest.mkdir(parents = True) + for file in files: + print(f"Downloading {file['key']}") + urllib.request.urlretrieve(file['links']['self'], dest / file['key']) + + print("Done!") -- GitLab From ffb2d1b4e37409c23f4757706788497632bcfbca Mon Sep 17 00:00:00 2001 From: Jesse Hines Date: Wed, 15 Oct 2025 15:54:58 -0400 Subject: [PATCH 375/388] Lassen download --- raps/dataloaders/lassen.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/raps/dataloaders/lassen.py b/raps/dataloaders/lassen.py index db86513..6ab7ca5 100644 --- a/raps/dataloaders/lassen.py +++ b/raps/dataloaders/lassen.py @@ -35,6 +35,8 @@ import uuid import numpy as np import pandas as pd from tqdm import tqdm +from pathlib import Path +import subprocess, shutil from datetime import datetime, timedelta from ..job import job_dict, Job @@ -339,3 +341,12 @@ if __name__ == "__main__": tx_sequence, rx_sequence = generate_network_sequences(total_ib_tx, total_ib_rx, intervals, lambda_poisson) print(tx_sequence, rx_sequence) + + +def download(dest: Path, start: datetime | None, end: datetime | None): + dest.mkdir(parents = True) + subprocess.run(["git", "clone", "https://github.com/LLNL/LAST/", str(dest / 'repo')], check=True, text=True) + subprocess.run(["git", "lfs", "pull"], check=True, text=True, cwd=dest / "repo") + (dest / "repo" / "Lassen-Supercomputer-Job-Dataset").rename(dest / "Lassen-Supercomputer-Job-Dataset") + shutil.rmtree(dest / 'repo') + print("Done!") -- GitLab From c3e8abe28f5d055ab7fd1d141c571eddbe885dde Mon Sep 17 00:00:00 2001 From: Jesse Hines Date: Wed, 15 Oct 2025 16:03:12 -0400 Subject: [PATCH 376/388] Add adastraMI250 download --- .gitignore | 1 + raps/dataloaders/adastraMI250.py | 40 ++++++++++++++++++++------------ raps/dataloaders/frontier.py | 14 +++++------ raps/dataloaders/fugaku.py | 2 +- raps/dataloaders/lassen.py | 13 +++++------ raps/dataloaders/marconi100.py | 6 ++++- 6 files changed, 45 insertions(+), 31 deletions(-) diff --git a/.gitignore b/.gitignore index c5f7241..3e87161 100644 --- a/.gitignore +++ b/.gitignore @@ -9,3 +9,4 @@ models/fmu-models .shell-completion-cache raps-output-* ppo_raps_logs +/data diff --git a/raps/dataloaders/adastraMI250.py b/raps/dataloaders/adastraMI250.py index ed60807..51fd9fd 100644 --- a/raps/dataloaders/adastraMI250.py +++ b/raps/dataloaders/adastraMI250.py @@ -1,28 +1,30 @@ """ +# get the data +``` +raps download --system adastraMI250 +``` +This will download the dataset from https://zenodo.org/records/14007065/files/AdastaJobsMI250_15days.parquet - # get the data - Download `AdastaJobsMI250_15days.parquet` from - https://zenodo.org/records/14007065/files/AdastaJobsMI250_15days.parquet +# to simulate the dataset +raps run -f /path/to/AdastaJobsMI250_15days.parquet --system adastraMI250 +# to replay with different scheduling policy +raps run -f /path/to/AdastaJobsMI250_15days.parquet --system adastraMI250 --policy priority --backfill easy - # to simulate the dataset - raps run -f /path/to/AdastaJobsMI250_15days.parquet --system adastraMI250 - - # to replay with different scheduling policy - raps run -f /path/to/AdastaJobsMI250_15days.parquet --system adastraMI250 --policy priority --backfill easy - - # to run a specific time range - raps run -f /path/to/AdastaJobsMI250_15days.parquet --system adastraMI250 \ - --start 2024-11-01T00:00:00Z --end 2024-11-02T00:00:00Z - - # to analyze dataset - python -m raps.telemetry -f /path/to/AdastaJobsMI250_15days.parquet --system adastraMI250 -v +# to run a specific time range +raps run -f /path/to/AdastaJobsMI250_15days.parquet --system adastraMI250 \ + --start 2024-11-01T00:00:00Z --end 2024-11-02T00:00:00Z +# to analyze dataset +python -m raps.telemetry -f /path/to/AdastaJobsMI250_15days.parquet --system adastraMI250 -v """ import uuid import numpy as np import pandas as pd +from pathlib import Path +from datetime import datetime from tqdm import tqdm +import urllib.request from ..job import job_dict, Job from ..utils import WorkloadData @@ -279,3 +281,11 @@ def cdu_pos(index: int, config: dict) -> tuple[int, int]: name = CDU_NAMES[index - 1] row, col = int(name[2]), int(name[3:5]) return (row, col) + + +def download(dest: Path, start: datetime | None, end: datetime | None): + dest.mkdir(parents = True) + filename = "AdastaJobsMI250_15days.parquet" + print(f"Downloading {filename}") + urllib.request.urlretrieve(f"https://zenodo.org/records/14007065/files/{filename}", dest / filename) + print("Done!") diff --git a/raps/dataloaders/frontier.py b/raps/dataloaders/frontier.py index 45994e1..9c2d232 100644 --- a/raps/dataloaders/frontier.py +++ b/raps/dataloaders/frontier.py @@ -1,13 +1,13 @@ """ - Note: Frontier telemetry data is not publicly available. +Note: Frontier telemetry data is not publicly available. - # To simulate - DATEDIR="date=2024-01-18" - DPATH=/path/to/data - raps run -f $DPATH/slurm/joblive/$DATEDIR,$DPATH/jobprofile/$DATEDIR +# To simulate +DATEDIR="date=2024-01-18" +DPATH=/path/to/data +raps run -f $DPATH/slurm/joblive/$DATEDIR,$DPATH/jobprofile/$DATEDIR - # To analyze the data - python -m raps.telemetry -f $DPATH/slurm/joblive/$DATEDIR,$DPATH/jobprofile/$DATEDIR +# To analyze the data +python -m raps.telemetry -f $DPATH/slurm/joblive/$DATEDIR,$DPATH/jobprofile/$DATEDIR """ import time from datetime import datetime, timezone, timedelta diff --git a/raps/dataloaders/fugaku.py b/raps/dataloaders/fugaku.py index fdd86c5..80b0fcb 100644 --- a/raps/dataloaders/fugaku.py +++ b/raps/dataloaders/fugaku.py @@ -1,5 +1,5 @@ """ -Use the fugaku dataset published at https://zenodo.org/records/11467483 +Uses the fugaku dataset published at https://zenodo.org/records/11467483 Note that F-Data doesn't give a list of nodes used, so we set 'scheduled_nodes' to None which triggers the scheduler to schedule the nodes itself. diff --git a/raps/dataloaders/lassen.py b/raps/dataloaders/lassen.py index 6ab7ca5..d60a5db 100644 --- a/raps/dataloaders/lassen.py +++ b/raps/dataloaders/lassen.py @@ -10,23 +10,22 @@ Reference: Usage Instructions: - git clone https://github.com/LLNL/LAST/ && cd LAST - git lfs pull + raps download --system lassen # to analyze dataset and plot histograms - raps telemetry -f /path/to/LAST/Lassen-Supercomputer-Job-Dataset --system lassen --plot + raps telemetry -f ./data/lassen/Lassen-Supercomputer-Job-Dataset --system lassen --plot # to simulate the dataset as submitted - raps run -f /path/to/LAST/Lassen-Supercomputer-Job-Dataset --system lassen + raps run -f ./data/lassen/Lassen-Supercomputer-Job-Dataset --system lassen # to modify the submit times of the telemetry according to Poisson distribution - raps run -f /path/to/LAST/Lassen-Supercomputer-Job-Dataset --system lassen --arrival poisson + raps run -f ./data/lassen/Lassen-Supercomputer-Job-Dataset --system lassen --arrival poisson # to fast-forward 365 days and replay for 1 day. This region day has 2250 jobs with 1650 jobs executed. - raps run -f /path/to/LAST/Lassen-Supercomputer-Job-Dataset --system lassen --start '2019-08-22T00:00:00+00:00' -t 1d + raps run -f ./data/lassen/Lassen-Supercomputer-Job-Dataset --system lassen --start '2019-08-22T00:00:00+00:00' -t 1d # For the network replay this command gives suiteable snapshots: - raps run -f /path/to/LAST/Lassen-Supercomputer-Job-Dataset --system lassen --policy fcfs --backfill firstfit -t 12h --arrival poisson # noqa + raps run -f ./data/lassen/Lassen-Supercomputer-Job-Dataset --system lassen --policy fcfs --backfill firstfit -t 12h --arrival poisson # noqa """ import math diff --git a/raps/dataloaders/marconi100.py b/raps/dataloaders/marconi100.py index 23ba3ab..c74ebd6 100644 --- a/raps/dataloaders/marconi100.py +++ b/raps/dataloaders/marconi100.py @@ -6,7 +6,11 @@ of The International Conference on High Performance Computing, Network, Storage, and Analysis. 2023. # get the data -Download `job_table.parquet` from https://zenodo.org/records/10127767 +Download the dataset with +``` +raps download --system marconi100 +``` +This will download the dataset from https://zenodo.org/records/10127767 # to simulate the dataset raps run -f /path/to/job_table.parquet --system marconi100 -- GitLab From a017cac6ab607ade926f8031237949d0f7f890ae Mon Sep 17 00:00:00 2001 From: Jesse Hines Date: Tue, 21 Oct 2025 10:53:55 -0400 Subject: [PATCH 377/388] Allow overriding dataloader --- raps/sim_config.py | 7 +++++++ raps/telemetry.py | 8 ++++++-- 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/raps/sim_config.py b/raps/sim_config.py index a12512f..aebfc54 100644 --- a/raps/sim_config.py +++ b/raps/sim_config.py @@ -118,6 +118,13 @@ class SimConfig(RAPSBaseModel, abc.ABC): replay: list[ResolvedPath] | None = None """ Either: path/to/joblive path/to/jobprofile OR filename.npz """ + dataloader: str | None = None + """ + Python module path to use as the dataloader when loading replay data. Only relevant if replay is + set. E.g. Defaults to "raps.dataloaders." but can be set to your own custom dataloader + as well. + """ + encrypt: bool = False """ Encrypt sensitive data in telemetry """ diff --git a/raps/telemetry.py b/raps/telemetry.py index 238f2bb..09f94e4 100644 --- a/raps/telemetry.py +++ b/raps/telemetry.py @@ -85,9 +85,13 @@ class Telemetry: self.system = kwargs['system'] self.config = kwargs.get('config') + if kwargs.get("dataloader"): + module = kwargs['dataloader'] + else: + module = f"raps.dataloaders.{self.system.split('/')[0]}" + try: - module = self.system.split("/")[0] - self.dataloader = importlib.import_module(f"raps.dataloaders.{module}", package=__package__) + self.dataloader = importlib.import_module(module, package=__package__) except ImportError as e: print(f"WARNING: Failed to load dataloader: {e}") self.dataloader = None -- GitLab From 64a90f8ecc5e7567d76fb733800e7383c0391481 Mon Sep 17 00:00:00 2001 From: Jesse Hines Date: Wed, 22 Oct 2025 14:35:33 -0400 Subject: [PATCH 378/388] Shift telemetry_start back to match sim_config.start With the current dataloaders, WorkloadData.start_date can end up being after sim_config.start. Most of the dataloaders set timestep_start and start_date to whatever the first job in the dataset is. This issue is most apparent when the dataloader does any filtering based on start/end so start_date will be whichever job started in the interval. Eventually I think we should refactor the dataloaders to return absolute unix timestamps to avoid all these relative vs non-relative timestep complications. But for now I'm just going to shift the telemetry_start back to match the sim_config.start if needed. This does mean that telemetry_start can now be a negative value, but that should work fine. --- raps/engine.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/raps/engine.py b/raps/engine.py index d502e45..a9ea4a0 100644 --- a/raps/engine.py +++ b/raps/engine.py @@ -200,11 +200,9 @@ class Engine: if sim_config.start: start = sim_config.start diff = start - wd.start_date - if diff.total_seconds() < 0: - raise Exception( - f"{start.isoformat()} is before data range in workload. " - + f"Workload data begins at {wd.start_date.isoformat()}" - ) + # diff may be negative if start is before the first job in the workload. We'll still + # shift telemetry_start to match with sim_config.start, even if that leaves a blank + # spot at the beginning. wd.telemetry_start += int(diff.total_seconds()) wd.start_date = start else: -- GitLab From 9d988c9664ffa200c418beb628523b915ce7ec0c Mon Sep 17 00:00:00 2001 From: Jesse Hines Date: Wed, 29 Oct 2025 11:49:41 -0400 Subject: [PATCH 379/388] Fix selene nodes_per_blade config --- config/selene.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/config/selene.yaml b/config/selene.yaml index 0520da1..fcf8c1a 100644 --- a/config/selene.yaml +++ b/config/selene.yaml @@ -4,7 +4,7 @@ system: nodes_per_rack: 4 rectifiers_per_rack: 32 chassis_per_rack: 4 - nodes_per_blade: 2 + nodes_per_blade: 1 switches_per_chassis: 4 nics_per_node: 4 rectifiers_per_chassis: 4 -- GitLab From a0289115c913d517abb1b13d23d1e104cd2cbadf Mon Sep 17 00:00:00 2001 From: Jesse Hines Date: Wed, 29 Oct 2025 13:29:49 -0400 Subject: [PATCH 380/388] Fix config handling of replay policy --- raps/schedulers/replay.py | 61 --------------------------------------- raps/sim_config.py | 18 ++++++++---- 2 files changed, 12 insertions(+), 67 deletions(-) delete mode 100644 raps/schedulers/replay.py diff --git a/raps/schedulers/replay.py b/raps/schedulers/replay.py deleted file mode 100644 index 7a0abcf..0000000 --- a/raps/schedulers/replay.py +++ /dev/null @@ -1,61 +0,0 @@ -from ..policy import PolicyType - - -class Scheduler: - """ - Mock Scheduler only considering start time. - There is no scheduling going on but job placement according to start time. - - Default job scheduler with various scheduling policies. - """ - - def __init__(self, config, policy, resource_manager=None): - self.config = config - self.policy = PolicyType(policy) - if resource_manager is None: - raise ValueError("Scheduler requires a ResourceManager instance") - self.resource_manager = resource_manager - self.debug = False - - def sort_jobs(self, queue, accounts=None): - """Sort jobs based on the selected scheduling policy.""" - return sorted(queue, key=lambda job: job.start_time) - - def prepare_system_state(self, queue, running): - return queue - - def schedule(self, queue, running, current_time, accounts=None, sorted=False, debug=False): - # Sort the queue in place. - if not sorted: - queue[:] = self.sort_jobs(queue, accounts) - - for job in queue[:]: - # Skip jobs in queue with start time in the future - if job.start_time >= current_time: - continue - - nodes_available = False - if job.nodes_required <= len(self.resource_manager.available_nodes): - if self.policy == PolicyType.REPLAY and job.scheduled_nodes: # Check if we need exact set - # is exact set available: - nodes_available = set(job.scheduled_nodes).issubset(set(self.resource_manager.available_nodes)) - else: - # we dont need the exact set: - nodes_available = True # Checked above - if job.nodes_required == 0: - raise ValueError(f"Job Requested zero nodes: {job}") - # clear scheduled nodes - job.scheduled_nodes = [] - else: - pass # not enough nodes available - - if nodes_available: - self.resource_manager.assign_nodes_to_job(job, current_time) - running.append(job) - queue.remove(job) - else: - # This is a replay so this should not happen - raise ValueError( - f"Nodes not available!\nRequested:{job.scheduled_nodes}\n" - f"Available:{self.resource_manager.available_nodes}\n{job.__dict__}; " - f"Policy: {self.policy}") diff --git a/raps/sim_config.py b/raps/sim_config.py index aebfc54..32cc043 100644 --- a/raps/sim_config.py +++ b/raps/sim_config.py @@ -319,12 +319,18 @@ class SimConfig(RAPSBaseModel, abc.ABC): if td is not None: convert_to_time_unit(td, self.time_unit) # will throw if invalid - if "workload" not in self.model_fields_set and self.replay: - self.workload = "replay" # default to replay if --replay is set - if self.workload == "replay" and not self.replay: - raise ValueError('--replay must be set when workload type is "replay"') - elif self.workload != "replay" and self.replay: - raise ValueError('workload must be either omitted or "replay" when --replay is set') + if self.replay: + if "workload" not in self.model_fields_set: + self.workload = "replay" # default to replay if --replay is set + if not self.policy: + self.policy = "replay" + if self.workload != "replay" or self.policy != 'replay': + raise ValueError('workload & policy must be either omitted or "replay" when --replay is set') + if self.scheduler != 'default': + raise ValueError('scheduler must be omitted or set to default when --replay is set') + else: + if self.workload == "replay" or self.policy == "replay": + raise ValueError('--replay must be set when workload type is "replay"') if self.cooling: self.layout = "layout2" -- GitLab From 8dbd8ecd2b91d00bfc4870caced06f983df2921f Mon Sep 17 00:00:00 2001 From: Jesse Hines Date: Thu, 30 Oct 2025 14:44:18 -0400 Subject: [PATCH 381/388] Formatting fixes --- raps/dataloaders/adastraMI250.py | 2 +- raps/dataloaders/frontier.py | 24 ++++++++++++------------ raps/dataloaders/fugaku.py | 6 +++--- raps/dataloaders/lassen.py | 5 +++-- raps/dataloaders/marconi100.py | 2 +- raps/stats.py | 2 +- raps/telemetry.py | 2 +- raps/utils.py | 5 ++++- 8 files changed, 26 insertions(+), 22 deletions(-) diff --git a/raps/dataloaders/adastraMI250.py b/raps/dataloaders/adastraMI250.py index 51fd9fd..0f7f366 100644 --- a/raps/dataloaders/adastraMI250.py +++ b/raps/dataloaders/adastraMI250.py @@ -284,7 +284,7 @@ def cdu_pos(index: int, config: dict) -> tuple[int, int]: def download(dest: Path, start: datetime | None, end: datetime | None): - dest.mkdir(parents = True) + dest.mkdir(parents=True) filename = "AdastaJobsMI250_15days.parquet" print(f"Downloading {filename}") urllib.request.urlretrieve(f"https://zenodo.org/records/14007065/files/{filename}", dest / filename) diff --git a/raps/dataloaders/frontier.py b/raps/dataloaders/frontier.py index 9c2d232..391a84e 100644 --- a/raps/dataloaders/frontier.py +++ b/raps/dataloaders/frontier.py @@ -624,11 +624,11 @@ def download(dest: Path, start: datetime | None, end: datetime | None): # jobs are indexed by submission time so download a few extra days to make sure we get all that # ran over start -> end if start: - start = (start - timedelta(days = 2)).astimezone(ZoneInfo("UTC")) + start = (start - timedelta(days=2)).astimezone(ZoneInfo("UTC")) else: start = datetime.fromisoformat("2023-09-01T00:00:00Z") if end: - end = (end + timedelta(days = 2)).astimezone(ZoneInfo("UTC")) + end = (end + timedelta(days=2)).astimezone(ZoneInfo("UTC")) else: end = datetime.now(ZoneInfo("UTC")) @@ -636,18 +636,18 @@ def download(dest: Path, start: datetime | None, end: datetime | None): dest.mkdir(parents=True) subprocess.run(["rsync", "-rvm", - *[f"--include=date={d.date().isoformat()}/***" for d in days], - "--exclude", '*', - f"{USERNAME}@{HOST}:{DATA_LAKE}/jobprofile/jobprofile/", - str(dest / "jobprofile") - ], check=True, text=True) + *[f"--include=date={d.date().isoformat()}/***" for d in days], + "--exclude", '*', + f"{USERNAME}@{HOST}:{DATA_LAKE}/jobprofile/jobprofile/", + str(dest / "jobprofile") + ], check=True, text=True) (dest / 'slurm').mkdir(parents=True) subprocess.run(["rsync", "-rvm", - *[f"--include=date={d.date().isoformat()}/***" for d in days], - "--exclude", '*', - f"{USERNAME}@{HOST}:{DATA_LAKE}/slurm/joblive/", - str(dest / "slurm/joblive") - ], check=True, text=True) + *[f"--include=date={d.date().isoformat()}/***" for d in days], + "--exclude", '*', + f"{USERNAME}@{HOST}:{DATA_LAKE}/slurm/joblive/", + str(dest / "slurm/joblive") + ], check=True, text=True) print("Done!") diff --git a/raps/dataloaders/fugaku.py b/raps/dataloaders/fugaku.py index 80b0fcb..1442ad2 100644 --- a/raps/dataloaders/fugaku.py +++ b/raps/dataloaders/fugaku.py @@ -192,7 +192,7 @@ def download(dest: Path, start: datetime | None, end: datetime | None): files = requests.get("https://zenodo.org/api/records/11467483").json()["files"] files = [f for f in files if f['key'].endswith(".parquet")] - files = sorted(files, key = lambda f: f['key']) + files = sorted(files, key=lambda f: f['key']) # TODO: I think fugaku data is indexed by submission time not start time, so filtering by # filename will probably miss some jobs that ran over start -> end @@ -202,8 +202,8 @@ def download(dest: Path, start: datetime | None, end: datetime | None): if end: end_file = end.astimezone(tz).strftime("%y_%m.parquet") files = [f for f in files if f['key'] <= end_file] - - dest.mkdir(parents = True) + + dest.mkdir(parents=True) for file in files: print(f"Downloading {file['key']}") urllib.request.urlretrieve(file['links']['self'], dest / file['key']) diff --git a/raps/dataloaders/lassen.py b/raps/dataloaders/lassen.py index d60a5db..06d9a98 100644 --- a/raps/dataloaders/lassen.py +++ b/raps/dataloaders/lassen.py @@ -35,7 +35,8 @@ import numpy as np import pandas as pd from tqdm import tqdm from pathlib import Path -import subprocess, shutil +import subprocess +import shutil from datetime import datetime, timedelta from ..job import job_dict, Job @@ -343,7 +344,7 @@ if __name__ == "__main__": def download(dest: Path, start: datetime | None, end: datetime | None): - dest.mkdir(parents = True) + dest.mkdir(parents=True) subprocess.run(["git", "clone", "https://github.com/LLNL/LAST/", str(dest / 'repo')], check=True, text=True) subprocess.run(["git", "lfs", "pull"], check=True, text=True, cwd=dest / "repo") (dest / "repo" / "Lassen-Supercomputer-Job-Dataset").rename(dest / "Lassen-Supercomputer-Job-Dataset") diff --git a/raps/dataloaders/marconi100.py b/raps/dataloaders/marconi100.py index c74ebd6..3ee7570 100644 --- a/raps/dataloaders/marconi100.py +++ b/raps/dataloaders/marconi100.py @@ -254,7 +254,7 @@ def download(dest: Path, start: datetime | None, end: datetime | None): files = requests.get("https://zenodo.org/api/records/10127767").json()["files"] # marconi100 is just one big parquet, nothing to pre-filter - dest.mkdir(parents = True) + dest.mkdir(parents=True) for file in files: print(f"Downloading {file['key']}") urllib.request.urlretrieve(file['links']['self'], dest / file['key']) diff --git a/raps/stats.py b/raps/stats.py index 7df6208..6906e67 100644 --- a/raps/stats.py +++ b/raps/stats.py @@ -454,7 +454,7 @@ class RunningStats: # Infinite generator used for the RunningStats logic def running_sum_values(values, last_value, last_index): return last_value + sum_values(values[last_index:]) - + def running_min_value(values, last_value, last_index): if last_index < len(values): new_min = min_value(values[last_index:]) diff --git a/raps/telemetry.py b/raps/telemetry.py index 09f94e4..0da391a 100644 --- a/raps/telemetry.py +++ b/raps/telemetry.py @@ -390,6 +390,6 @@ def run_download_add_parser(subparsers: SubParsers): def run_download(args: DownloadArgs): config = get_system_config(args.system).get_legacy() - td = Telemetry(system = args.system, config = config) + td = Telemetry(system=args.system, config=config) dest = args.dest if args.dest else Path("./data").resolve() / args.system td.download_data(dest, args.start, args.end) diff --git a/raps/utils.py b/raps/utils.py index 8385361..55c30b8 100644 --- a/raps/utils.py +++ b/raps/utils.py @@ -71,8 +71,11 @@ def to_dict(arg): else: raise ValueError(f"Cannot convert {arg} to dict") + DateType = TypeVar("DateType", date, datetime) -def date_range(start: DateType, end: DateType, step = timedelta(days=1)) -> Iterable[DateType]: + + +def date_range(start: DateType, end: DateType, step=timedelta(days=1)) -> Iterable[DateType]: window_start = start while window_start < end: yield window_start -- GitLab From b5b543e150afe4a9ac2f889681b7e914e48a3af1 Mon Sep 17 00:00:00 2001 From: Matthias Maiterth Date: Mon, 3 Nov 2025 15:59:43 -0500 Subject: [PATCH 382/388] Added long marker to the network test for lassen even without data! It has been running over an hour now. --- tests/systems/test_main_network_run.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tests/systems/test_main_network_run.py b/tests/systems/test_main_network_run.py index ea693b4..5c13989 100644 --- a/tests/systems/test_main_network_run.py +++ b/tests/systems/test_main_network_run.py @@ -11,7 +11,10 @@ pytestmark = [ ] -def test_main_network_run(system, system_config, sim_output): +def test_main_network_run(system, system_config, sim_output, pytestconfig): + if system == "lassen" and not pytestconfig.getoption("--runlong"): + pytest.skip("This test for \"lassen\" is very long; pass --runlong to run it") + if not system_config.get("main", False): pytest.skip(f"{system} does not support basic main run.") -- GitLab From 6ca091f304a82aa40ab1ee9bab20421c39cc3f59 Mon Sep 17 00:00:00 2001 From: Matthias Maiterth Date: Tue, 4 Nov 2025 10:03:56 -0500 Subject: [PATCH 383/388] Fixed dragonfly test as it contained logical errors. --- tests/unit/test_net_dragonfly.py | 30 ++++++++++++++++++++---------- 1 file changed, 20 insertions(+), 10 deletions(-) diff --git a/tests/unit/test_net_dragonfly.py b/tests/unit/test_net_dragonfly.py index a36afdc..b8711fd 100644 --- a/tests/unit/test_net_dragonfly.py +++ b/tests/unit/test_net_dragonfly.py @@ -1,32 +1,42 @@ -import pytest from raps.network.dragonfly import build_dragonfly, dragonfly_node_id_to_host_name + def test_build_dragonfly(): """Test building a small dragonfly network.""" - D, A, P = 2, 2, 2 + D = 2 # Routers per group + A = 2 # Gloobal connections per router + P = 2 # Compute nodes per router G = build_dragonfly(D, A, P) # Check number of nodes - num_routers = D * A - num_hosts = D * A * P + num_routers = D * (A + 1) + num_hosts = num_routers * P total_nodes = num_routers + num_hosts assert len(G.nodes) == total_nodes # Check number of edges - # Intra-group edges (clique) - intra_group_edges = D * (A * (A - 1) // 2) + routers_per_group = D + # Edges of the router clique: + router_clique_edges_per_group = ((routers_per_group * (routers_per_group - 1)) // 2) + # Edges for all router compute nodes: + compute_node_edges_per_router = P + # Total Intra-group edges: + intra_group_edges = router_clique_edges_per_group + compute_node_edges_per_router * D + # Inter-group edges - inter_group_edges = A * (D * (D - 1) // 2) + total_groups = A + 1 + inter_group_edges_simple_clique = ((total_groups * (total_groups-1)) // 2) + inter_group_edges = inter_group_edges_simple_clique * D # Host to router edges - host_router_edges = num_hosts - total_edges = intra_group_edges + inter_group_edges + host_router_edges + total_edges = intra_group_edges * total_groups + inter_group_edges assert len(G.edges) == total_edges # Check node types - node_types = [data["type"] for _, data in G.nodes(data=True)] + node_types = [data["layer"] for _, data in G.nodes(data=True)] assert node_types.count("router") == num_routers assert node_types.count("host") == num_hosts + def test_dragonfly_node_id_to_host_name(): """Test the dragonfly_node_id_to_host_name function.""" D, A, P = 2, 2, 2 -- GitLab From d06d0ac1927facac224f198467648eef89f610ac Mon Sep 17 00:00:00 2001 From: Matthias Maiterth Date: Tue, 4 Nov 2025 11:16:34 -0500 Subject: [PATCH 384/388] Updated test for 3d torus. Implementation is currently wrong, test should be correct. The 3dtorus implementation currently is only a cube not a 3dtorus. This needs to be fixed and verified. --- tests/unit/test_net_torus3d.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/tests/unit/test_net_torus3d.py b/tests/unit/test_net_torus3d.py index b18cbfa..3e38eb4 100644 --- a/tests/unit/test_net_torus3d.py +++ b/tests/unit/test_net_torus3d.py @@ -1,6 +1,6 @@ -import pytest from raps.network.torus3d import build_torus3d, torus_route_xyz + def test_build_torus3d(): """Test building a small 3D torus network.""" dims = (2, 2, 2) @@ -8,23 +8,25 @@ def test_build_torus3d(): # Check number of nodes num_routers = dims[0] * dims[1] * dims[2] - num_hosts = num_routers # hosts_per_router=1 + hosts_per_router = 1 # Default! Assumption + num_hosts = num_routers * hosts_per_router total_nodes = num_routers + num_hosts assert len(G.nodes) == total_nodes # Check number of edges # Router to router edges - router_edges = (num_routers * 3) // 2 # Each router has 3 neighbors in a 3D torus + router_edges = (num_routers * 3) # Each router has 3 neighbors in a 3D torus # Host to router edges - host_router_edges = num_hosts + host_router_edges = num_routers * hosts_per_router total_edges = router_edges + host_router_edges assert len(G.edges) == total_edges # Check node types - node_types = [data["kind"] for _, data in G.nodes(data=True)] + node_types = [data["type"] for _, data in G.nodes(data=True)] assert node_types.count("router") == num_routers assert node_types.count("host") == num_hosts + def test_torus_route_xyz(): """Test the torus_route_xyz function.""" dims = (4, 4, 4) -- GitLab From 97603503bac176c3672ed2fdcab1d5ac326455e2 Mon Sep 17 00:00:00 2001 From: Matthias Maiterth Date: Tue, 4 Nov 2025 11:49:18 -0500 Subject: [PATCH 385/388] Fixed get_current_utilization to work with trace_quanta set to None again, if trace is an integer or float. This was a regression, however should be fixed by introducing a trace type containing the information, what the trace is and not rely on the type only. This also would allow to introduce trace types that are e.g. a trace per node of a job. and traces that are timeseries compared to arrays. --- raps/utils.py | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/raps/utils.py b/raps/utils.py index 55c30b8..0c77d3f 100644 --- a/raps/utils.py +++ b/raps/utils.py @@ -648,15 +648,16 @@ def get_current_utilization(trace, job: Job): """Return utilization for a trace at the job's current running time. Note: this should move to a trace.py and a Trace class! """ - if not job.trace_quanta: - raise ValueError("job.trace_quanta is not set; cannot compute utilization.") - - time_quanta_index = int((job.current_run_time - job.trace_start_time) // job.trace_quanta) - if time_quanta_index < 0: - time_quanta_index = 0 - if (isinstance(trace, list) and trace) or \ (isinstance(trace, np.ndarray) and trace.size != 0): + + if not job.trace_quanta: + raise ValueError("job.trace_quanta is not set; cannot compute utilization.") + + time_quanta_index = int((job.current_run_time - job.trace_start_time) // job.trace_quanta) + if time_quanta_index < 0: + time_quanta_index = 0 + if time_quanta_index < len(trace): util = get_utilization(trace, time_quanta_index) else: @@ -664,6 +665,7 @@ def get_current_utilization(trace, job: Job): elif isinstance(trace, (float, int)): util = trace else: + raise ValueError(f"trace is of unexpected type: {type(trace)}.") util = 0.0 return util -- GitLab From a10d0e7d5d369c32585d85b4a709c736be518c0e Mon Sep 17 00:00:00 2001 From: Matthias Maiterth Date: Tue, 4 Nov 2025 12:23:19 -0500 Subject: [PATCH 386/388] Changed normal run with data to 20 minutes as this loads data for gcloud. If 10 minutes is desired, changing the arrival rate to poisson may ensure that data is available for this test and check. There may be datasets where there is no data in the first minutes. (This should be solved by the start parameter as a datetime, however this fails currently for gcloud (Reprocude of failing test in: tests/systems/test_main_withdata_range_run.py, use: ``RAPS_DATA_DIR=/opt/data pytest -m "withdata and gcloudv2" -n auto --runlong'') --- tests/systems/test_main_withdata_run.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/systems/test_main_withdata_run.py b/tests/systems/test_main_withdata_run.py index ed1a944..1ca6413 100644 --- a/tests/systems/test_main_withdata_run.py +++ b/tests/systems/test_main_withdata_run.py @@ -16,11 +16,11 @@ def test_main_withdata_run(system, system_config, system_files, sim_output): engine, stats = run_engine({ "system": system, - "time": "10m", + "time": "20m", "replay": system_files, }) # Check that it at least loaded some data - assert stats['tick_count'] == 10 * 60 + assert stats['tick_count'] == 20 * 60 assert stats['job']['jobs_total'] > 0 assert len(stats['job']['jobs_still_running']) + stats['job']['jobs_completed'] > 0 -- GitLab From a5e079295eb2e9a9bb7ff42f6d613cfe69b61b52 Mon Sep 17 00:00:00 2001 From: Matthias Maiterth Date: Wed, 5 Nov 2025 16:17:18 -0500 Subject: [PATCH 387/388] Updated contributors before merge to main. --- CONTRIBUTORS.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/CONTRIBUTORS.txt b/CONTRIBUTORS.txt index e351ce9..98c2a9b 100644 --- a/CONTRIBUTORS.txt +++ b/CONTRIBUTORS.txt @@ -11,3 +11,4 @@ Rashadul Kabir (rashadul.kabir@colostate.edu), Colorado State University Bertrand Cirou (cirou@cines.fr), Centre Informatique National de l’Enseignement Supérieur Kevin Menear (kmenear@nrel.gov), National Renewable Energy Laboratory Tim Dykes (tim.dykes@hpe.com), Hewlett Packard Enterprise +Srishti Kalepu (skalepu3@gatech.edu), Georgia Institute of Technology -- GitLab From d55203440430a71a3baa904c340f61f95793e25e Mon Sep 17 00:00:00 2001 From: Matthias Maiterth Date: Wed, 5 Nov 2025 16:20:57 -0500 Subject: [PATCH 388/388] Added additional contributors. --- CONTRIBUTORS.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/CONTRIBUTORS.txt b/CONTRIBUTORS.txt index 98c2a9b..fd95582 100644 --- a/CONTRIBUTORS.txt +++ b/CONTRIBUTORS.txt @@ -12,3 +12,4 @@ Bertrand Cirou (cirou@cines.fr), Centre Informatique National de l’Enseignemen Kevin Menear (kmenear@nrel.gov), National Renewable Energy Laboratory Tim Dykes (tim.dykes@hpe.com), Hewlett Packard Enterprise Srishti Kalepu (skalepu3@gatech.edu), Georgia Institute of Technology +Damien Fay (damien.fay@hpe.com), Hewlett Packard Enterprise -- GitLab