Commit 38895605 authored by Brewer, Wes's avatar Brewer, Wes
Browse files

Move args.scale implementation from within dataloader to main.py

parent 2c4a714a
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
@@ -25,7 +25,7 @@ parser.add_argument('-p', '--plot', nargs='+', choices=['power', 'loss', 'pue',
                    help='Specify one or more types of plots to generate: power, loss, pue, util, temp')
choices = ['png', 'svg', 'jpg', 'pdf', 'eps']
parser.add_argument('--imtype', type=str, choices=choices, default=choices[0], help='Plot image type')
parser.add_argument('--scale', type=int, default=0, help='Scale telemetry to fit on target system/partition (currently only suupported for marconi100 data)')
parser.add_argument('--scale', type=int, default=0, help='Scale telemetry to max nodes specified in order to run telemetry on a smaller smaller target system/partition, e.g., --scale 192')
parser.add_argument('--system', type=str, default='frontier', help='System config to use')
choices = [policy.value for policy in PolicyType]
parser.add_argument('-s', '--schedule', type=str, choices=choices, default=choices[0], help='Schedule policy to use')
+8 −1
Original line number Diff line number Diff line
@@ -86,11 +86,18 @@ if args.replay:
    if args.replay[0].endswith(".npz"):
        print(f"Loading {args.replay[0]}...")
        jobs = td.load_snapshot(args.replay[0])

        if args.scale:
            for job in tqdm(jobs, desc=f"Scaling jobs to {args.scale} nodes"):
                job['nodes_required'] = random.randint(1, args.scale)
                args.reschedule = True

        if args.reschedule:
            print("available nodes:", config['AVAILABLE_NODES'])
            for job in tqdm(jobs, desc="Updating requested_nodes"):
            for job in tqdm(jobs, desc="Rescheduling jobs"):
                job['requested_nodes'] = None
                job['submit_time'] = next_arrival(1 / config['JOB_ARRIVAL_TIME'])

    else:
        print(*args.replay)
        jobs = td.load_data(args.replay)
+6 −1
Original line number Diff line number Diff line
@@ -20,6 +20,11 @@ configs = [ConfigManager(system_name=partition).get_config() for partition in pa
args_dicts = [{**vars(args), 'config': config} for config in configs]

# Initialize Workload with all configurations
if args.replay:

    td = Telemetry(**args_dict)

else:
    wl = Workload(*configs)

# Generate jobs based on workload type
+0 −3
Original line number Diff line number Diff line
@@ -63,7 +63,6 @@ def load_data_from_df(jobs_df: pd.DataFrame, **kwargs):
    fastforward = kwargs.get('fastforward')
    validate = kwargs.get('validate')
    jid = kwargs.get('jid', '*')
    scale = kwargs.get('scale')

    if fastforward: print(f"fast-forwarding {fastforward} seconds")

@@ -150,8 +149,6 @@ def load_data_from_df(jobs_df: pd.DataFrame, **kwargs):
        else: # Prescribed replay
            scheduled_nodes = (jobs_df.loc[jidx, 'nodes']).tolist()
            
        if scale > 0: nodes_required = random.randint(1, scale)

        if gpu_trace.size > 0 and time_offset >= 0:
            job_info = job_dict(nodes_required, name, cpu_trace, gpu_trace, [], [], wall_time,
                                end_state, scheduled_nodes, time_offset, job_id, priority)