Commit 4259c4a0 authored Feb 03, 2025 by Brewer, Wes

Fix a few bugs related to the addition of account management

parent 9395b8d3

main.py

+0 −1

Original line number	Diff line number	Diff line
		@@ -104,7 +104,6 @@ if args.replay:
		else: # custom data loader
		print(*args.replay)
		jobs = td.load_data(args.replay)
		for job in jobs: job['priority'] = sc.policy.aging_boost(job['nodes_required'])
		td.save_snapshot(jobs, filename=DIR_NAME)

		# Set number of timesteps based on the last job running which we assume

raps/account.py

+4 −1

Original line number	Diff line number	Diff line
		@@ -32,6 +32,9 @@ class Account:
		self.total_jobs += 1
		self.time_allocated += jobstats.run_time
		self.energy_allocated += jobstats.energy
		if self.time_allocated == 0:
		self.avg_power = 0
		else:
		self.avg_power = self.energy_allocated / self.time_allocated
		if average_user.avg_power == 0: # If this is the first job use own power
		average_user.avg_power = self.avg_power

raps/dataloaders/adastraMI250.py

+2 −1

Original line number	Diff line number	Diff line
		@@ -82,6 +82,7 @@ def load_data_from_df(jobs_df: pd.DataFrame, **kwargs):
		# Map dataframe to job state. Add results to jobs list
		for jidx in tqdm(range(num_jobs - 1), total=num_jobs, desc="Processing Jobs"):

		account = jobs_df.loc[jidx, 'user_id'] # or 'group_id'
		job_id = jobs_df.loc[jidx, 'job_id']

		if not jid == '*':
		@@ -163,7 +164,7 @@ def load_data_from_df(jobs_df: pd.DataFrame, **kwargs):

		if time_offset >= 0 and wall_time > 0:
		#print("start_time",time_start,"\tend_time",time_end,"\twall_time",wall_time,"\tquanta wall time",gpu_trace.size * TRACE_QUANTA )
		job_info = job_dict(nodes_required, name, cpu_trace, gpu_trace, [],[],wall_time,
		job_info = job_dict(nodes_required, name, account, cpu_trace, gpu_trace, [],[],wall_time,
		end_state, scheduled_nodes, time_offset, job_id, priority)
		jobs.append(job_info)
		else:

raps/dataloaders/frontier.py

+3 −1

Original line number	Diff line number	Diff line
		@@ -95,6 +95,8 @@ def load_data_from_df(jobs_df: pd.DataFrame, jobprofile_df: pd.DataFrame, **kwar
		# Map dataframe to job state. Add results to jobs list
		for jidx in tqdm(range(num_jobs - 1), total=num_jobs, desc="Processing Jobs"):

		user = jobs_df.loc[jidx, 'user']
		account = jobs_df.loc[jidx, 'account']
		job_id = jobs_df.loc[jidx, 'job_id']
		allocation_id = jobs_df.loc[jidx, 'allocation_id']
		nodes_required = jobs_df.loc[jidx, 'node_count']
		@@ -154,7 +156,7 @@ def load_data_from_df(jobs_df: pd.DataFrame, jobprofile_df: pd.DataFrame, **kwar
		scheduled_nodes.append(indices)

		if gpu_trace.size > 0 and (jid == job_id or jid == '*') and time_offset > 0:
		job_info = job_dict(nodes_required, name, cpu_trace, gpu_trace, [], [], wall_time,
		job_info = job_dict(nodes_required, name, account, cpu_trace, gpu_trace, [], [], wall_time,
		end_state, scheduled_nodes, time_offset, job_id, priority)
		jobs.append(job_info)

raps/dataloaders/fugaku.py

+2 −0

Original line number	Diff line number	Diff line
		@@ -68,6 +68,7 @@ def load_data_from_df(df, **kwargs):
		# Loop through the DataFrame rows to extract job information
		for _, row in tqdm(df.iterrows(), total=len(df), desc="Processing Jobs"):
		nodes_required = row['nnumr'] if 'nnumr' in df.columns else 0
		account = row['usr']
		name = row['jnam'] if 'jnam' in df.columns else 'unknown'

		if validate:
		@@ -95,6 +96,7 @@ def load_data_from_df(df, **kwargs):
		job_info = job_dict(
		nodes_required=nodes_required,
		name=name,
		account=account,
		cpu_trace=cpu_trace,
		gpu_trace=gpu_trace,
		ntx_trace=[],