Commit 8e043ed6 authored by Maiterth, Matthias's avatar Maiterth, Matthias
Browse files

Merge branch 'reschedule-dataloaders' into scheduleflow-take2

parents 36fd4ef8 bd5ed077
Loading
Loading
Loading
Loading
+66 −4
Original line number Diff line number Diff line
@@ -50,21 +50,31 @@ def min_max_sum(value,min,max,sum):
    sum += value
    return min,max,sum


def get_job_stats(engine: Engine):
    """ Return job statistics processed over the engine execution"""
    # Information on Job-Mix
    min_job_size, max_job_size, sum_job_size = sys.maxsize, -sys.maxsize - 1, 0
    min_runtime, max_runtime, sum_runtime = sys.maxsize, -sys.maxsize - 1, 0

    min_energy, max_energy, sum_energy = sys.maxsize, -sys.maxsize - 1, 0
    min_edp, max_edp, sum_edp = sys.maxsize, -sys.maxsize - 1, 0
    min_edp2, max_edp2, sum_edp2 = sys.maxsize, -sys.maxsize - 1, 0

    min_agg_node_hours, max_agg_node_hours, sum_agg_node_hours = sys.maxsize, -sys.maxsize - 1, 0
    # Completion statistics
    throughput = engine.jobs_completed / engine.timesteps * 3600 if engine.timesteps else 0  # Jobs per hour

    min_wait_time, max_wait_time, sum_wait_time = sys.maxsize, -sys.maxsize - 1, 0
    min_turnaround_time, max_turnaround_time, sum_turnaround_time = sys.maxsize, -sys.maxsize - 1, 0

    min_psf_partial_num, max_psf_partial_num, sum_psf_partial_num = sys.maxsize, -sys.maxsize - 1, 0
    min_psf_partial_den, max_psf_partial_den, sum_psf_partial_den = sys.maxsize, -sys.maxsize - 1, 0
    min_awrt, max_awrt, sum_awrt = sys.maxsize, -sys.maxsize - 1, 0

    jobsSmall = 0
    jobsMedium = 0
    jobsLarge = 0
    jobsHuge = 0

    # Information on Job-Mix
    for job in engine.job_history_dict:
        job_size = job['num_nodes']
@@ -75,6 +85,17 @@ def get_job_stats(engine: Engine):
        min_runtime, max_runtime, sum_runtime = \
            min_max_sum(runtime, min_runtime, max_runtime, sum_runtime)

        energy = job['energy']
        min_energy, max_energy, sum_energy = \
            min_max_sum(energy, min_energy, max_energy, sum_energy)
        edp = energy * runtime
        min_edp, max_edp, sum_edp = \
            min_max_sum(edp, min_edp, max_edp, sum_edp)

        edp2 = energy * runtime**2
        min_edp2, max_edp2, sum_edp2 = \
            min_max_sum(edp2, min_edp2, max_edp2, sum_edp2)

        agg_node_hours = runtime * job_size  # Aggreagte node hours
        min_agg_node_hours, max_agg_node_hours, sum_agg_node_hours = \
            min_max_sum(agg_node_hours, min_agg_node_hours, max_agg_node_hours, sum_agg_node_hours)
@@ -88,30 +109,61 @@ def get_job_stats(engine: Engine):
        min_turnaround_time, max_turnaround_time, sum_turnaround_time = \
            min_max_sum(turnaround_time, min_turnaround_time, max_turnaround_time, sum_turnaround_time)

        # Area Weighted Average Response Time
        awrt = agg_node_hours * turnaround_time  # Area Weighted Response Time
        min_awrt, max_awrt, sum_awrt = min_max_sum(awrt, min_awrt, max_awrt, sum_awrt)

        # Priority Weighted Specific Response Time
        psf_partial_num = job_size * (turnaround_time**4 - wait_time**4)
        psf_partial_den = job_size * (turnaround_time**3 - wait_time**3)

        min_psf_partial_num, max_psf_partial_num, sum_psf_partial_num = \
            min_max_sum(psf_partial_num, min_psf_partial_num, max_psf_partial_num, sum_psf_partial_num)
        min_psf_partial_den, max_psf_partial_den, sum_psf_partial_den = \
            min_max_sum(psf_partial_den, min_psf_partial_den, max_psf_partial_den, sum_psf_partial_den)

        if job['num_nodes'] <= 5:
            jobsSmall += 1
        elif job['num_nodes'] <= 50:
            jobsMedium += 1
        elif job['num_nodes'] <= 250:
            jobsLarge += 1
        else:  # job['nodes_required'] > 250:
            jobsHuge += 1

    if len(engine.job_history_dict) != 0:
        avg_job_size = sum_job_size / len(engine.job_history_dict)
        avg_runtime = sum_runtime / len(engine.job_history_dict)
        avg_energy = sum_energy / len(engine.job_history_dict)
        avg_edp= sum_edp / len(engine.job_history_dict)
        avg_edp2= sum_edp2 / len(engine.job_history_dict)
        avg_agg_node_hours = sum_agg_node_hours / len(engine.job_history_dict)
        avg_wait_time = sum_wait_time / len(engine.job_history_dict)
        avg_turnaround_time = sum_turnaround_time / len(engine.job_history_dict)
        avg_awrt = sum_awrt / len(engine.job_history_dict)
        avg_awrt = sum_awrt / sum_agg_node_hours
        psf = (3 * sum_psf_partial_num) / (4 * sum_psf_partial_den)
    else:
        # Set these to -1 to indicate nothing ran
        min_job_size, max_job_size, avg_job_size = -1,-1,-1
        min_runtime, max_runtime, avg_runtime = -1,-1,-1
        min_energy, max_energy, avg_energy = -1,-1,-1
        min_edp, max_edp, avg_edp = -1,-1,-1
        min_edp2, max_edp2, avg_edp2 = -1,-1,-1
        min_agg_node_hours, max_agg_node_hours, avg_agg_node_hours = -1,-1,-1
        min_wait_time, max_wait_time, avg_wait_time = -1,-1,-1
        min_turnaround_time, max_turnaround_time, avg_turnaround_time = -1,-1,-1
        min_awrt, max_awrt, avg_awrt = -1,-1,-1
        psf = -1

    job_stats = {
        'jobs completed': engine.jobs_completed,
        'throughput': f'{throughput:.2f} jobs/hour',
        'jobs still running': [job.id for job in engine.running],
        'jobs still in queue': [job.id for job in engine.queue],
        'Jobs <= 5 nodes': jobsSmall,
        'Jobs <= 50 nodes': jobsMedium,
        'Jobs <= 250 nodes': jobsLarge,
        'Jobs > 250 nodes': jobsHuge,
        # Information on job-mix executed
        'min job size': min_job_size,
        'max job size': max_job_size,
@@ -119,6 +171,15 @@ def get_job_stats(engine: Engine):
        'min runtime': min_runtime,
        'max runtime': max_runtime,
        'average runtime': avg_runtime,
        'min energy': min_energy,
        'max energy': max_energy,
        'avg energy': avg_energy,
        'min edp': min_edp,
        'max edp': max_edp,
        'avg edp': avg_edp,
        'min edp^2': min_edp2,
        'max edp^2': max_edp2,
        'avg edp^2': avg_edp2,
        'min_aggregate_node_hours': min_agg_node_hours,
        'max_aggregate_node_hours': max_agg_node_hours,
        'avg_aggregate_node_hours': avg_agg_node_hours,
@@ -131,6 +192,7 @@ def get_job_stats(engine: Engine):
        'average_turnaround_time': avg_turnaround_time,
        'min_area_weighted_response_time': min_awrt,
        'max_area_weighted_response_time': max_awrt,
        'avg_area_weighted_response_time': avg_awrt
        'area_weighted_avg_response_time': avg_awrt,
        'priority_weighted_specific_response_time': psf
    }
    return job_stats
+64 −0
Original line number Diff line number Diff line
#!/bin/env python3
import pandas as pd
import pyarrow.parquet as pq
import matplotlib.pyplot as plt

import sys

if len(sys.argv) > 1:
    path = sys.argv[1]
else:
    print(f"Usage: python {sys.argv[0]} <simulation_result/dir>")
    exit()

# e.g. path = "$HOME/Repositories/exadigit/raps/simulation_results/b803010"

files = ['cooling_model.parquet', 'loss_history.parquet', 'power_history.parquet', 'util.parquet']

full_files = [f"{path}/{file}" for file in files]


def iter_to_seconds(i):
    return i * 15


for i in [1]:
    fig, ax1 = plt.subplots(figsize=(10, 6))

    power = path + "/" + files[2]
    loss = path + "/" + files[1]
    util = path + "/" + files[3]

    df_power = pd.read_parquet(power)
    df_power = df_power.rename(columns={0:'time',1:'power [kw]'})
    ax1.plot(df_power['time'],df_power['power [kw]'], color='black', label='Power kW]')

    #df_loss = pd.read_parquet(loss)
    #df_loss = df_loss.rename(columns={0:'time',1:'loss [kw]'})
    #ax1.plot(df_loss['time'],df_loss['loss [kw]'], color='red', label='Loss [kW]')

    ax2 = ax1.twinx()

    #df_cooling = pd.read_parquet(cooling)
    #df_cooling['index'] = df_cooling.index
    #df_cooling['time'] = df_cooling['index'].apply(iter_to_seconds)
    #ymax = max(df_cooling['pue'])
    #ax2.plot(df_cooling['time'],df_cooling['pue'], color='blue', label='PUE')

    df_util = pd.read_parquet(util)
    df_util = df_util.rename(columns={0:'time', 1:'utilization [%]'})
    df_util['utilization'] = df_util['utilization [%]'] / 100
    ax2.plot(df_util['time'],df_util['utilization'], color='orange', label='Utilization')

    #ymax = max(max(df_cooling['pue']),max(df_util['utilization']))
    ymax = max(0,max(df_util['utilization']))
    ax2.set_ylim([0, ymax * 1.05])

    ax1.set_xlabel('time [s]')
    ax1.set_ylabel('[kW]')
    ax2.set_ylabel('[%]')
    plt.title(path)
    ax1.legend(loc='upper left')
    ax2.legend(loc='upper right')
    # plt.show()
    plt.savefig("test.png")