Commit e4586949 authored by Maiterth, Matthias's avatar Maiterth, Matthias
Browse files

Added additional stats, and move them to raps/stats.py

parent a8f2613e
Loading
Loading
Loading
Loading
+9 −6
Original line number Diff line number Diff line
@@ -32,6 +32,7 @@ from raps.workload import Workload
from raps.account import Accounts
from raps.weather import Weather
from raps.utils import create_casename, convert_to_seconds, write_dict_to_file, next_arrival
from raps.stats import get_engine_stats, get_job_stats

config = ConfigManager(system_name=args.system).get_config()

@@ -158,15 +159,15 @@ if args.verbose:

layout_manager.run(jobs, timesteps=timesteps)

system_stats = sc.get_system_stats()
job_stats = sc.get_job_stats()
engine_stats = get_engine_stats(sc)
job_stats = get_job_stats(sc)
# Following b/c we get the following error when we use PM100 telemetry dataset
# TypeError: Object of type int64 is not JSON serializable
try:
    print(json.dumps(system_stats, indent=4))
    print(json.dumps(engine_stats, indent=4))
    print(json.dumps(job_stats, indent=4))
except:
    print(system_stats)
    print(engine_stats)
    print(job_stats)


@@ -245,9 +246,11 @@ if args.output:

        try:
            with open(OPATH / 'stats.out', 'w') as f:
                json.dump(output_stats, f, indent=4)
                json.dump(engine_stats, f, indent=4)
                json.dump(job_stats, f, indent=4)
        except:
            write_dict_to_file(output_stats, OPATH / 'stats.out')
            write_dict_to_file(engine_stats, OPATH / 'stats.out')
            write_dict_to_file(job_stats, OPATH / 'stats.out')

        try:
            with open(OPATH / 'accounts.json', 'w') as f:
+0 −76
Original line number Diff line number Diff line
@@ -230,81 +230,5 @@ class Engine:

            yield self.tick()

    def get_system_stats(self):
        """ Return output statistics """
        num_samples = len(self.power_manager.history) if self.power_manager else 0

        average_power_mw = sum_values(self.power_manager.history) / num_samples / 1000 if num_samples else 0
        average_loss_mw = sum_values(self.power_manager.loss_history) / num_samples / 1000 if num_samples else 0
        min_loss_mw = min_value(self.power_manager.loss_history) / 1000 if num_samples else 0
        max_loss_mw = max_value(self.power_manager.loss_history) / 1000 if num_samples else 0

        loss_fraction = average_loss_mw / average_power_mw if average_power_mw else 0
        efficiency = 1 - loss_fraction if loss_fraction else 0
        total_energy_consumed = average_power_mw * self.timesteps / 3600 if self.timesteps else 0  # MW-hr
        emissions = total_energy_consumed * 852.3 / 2204.6 / efficiency if efficiency else 0
        total_cost = total_energy_consumed * 1000 * self.config.get('POWER_COST', 0)  # Total cost in dollars

        stats = {
            'num_samples': num_samples,
            'average power': f'{average_power_mw:.2f} MW',
            'min loss': f'{min_loss_mw:.2f} MW',
            'average loss': f'{average_loss_mw:.2f} MW',
            'max loss': f'{max_loss_mw:.2f} MW',
            'system power efficiency': f'{efficiency * 100:.2f}%',
            'total energy consumed': f'{total_energy_consumed:.2f} MW-hr',
            'carbon emissions': f'{emissions:.2f} metric tons CO2',
            'total cost': f'${total_cost:.2f}'
        }

        return stats

    def get_job_history_dict(self):
        return self.job_history_dict

    def get_job_stats(self):
        throughput = self.jobs_completed / self.timesteps * 3600 if self.timesteps else 0  # Jobs per hour
        min_wait_time = sys.maxsize
        max_wait_time = -sys.maxsize - 1
        aggregate_wait_time = 0
        min_turnaround_time = sys.maxsize
        max_turnaround_time = -sys.maxsize - 1
        aggregate_turnaround_time = 0
        for job in self.job_history_dict:
            wait_time = job["start_time"] - job["submit_time"]
            aggregate_wait_time += wait_time
            turnaround_time = job["end_time"] - job["submit_time"]
            aggregate_turnaround_time += turnaround_time
            if wait_time < min_wait_time:
                min_wait_time = wait_time
            if wait_time > max_wait_time:
                max_wait_time = wait_time
            if turnaround_time < min_turnaround_time:
                min_turnaround_time = turnaround_time
            if turnaround_time > max_turnaround_time:
                max_turnaround_time = turnaround_time

        if len(self.job_history_dict) != 0:
            average_wait_time = aggregate_wait_time / len(self.job_history_dict)
            average_turnaround_time = aggregate_turnaround_time / len(self.job_history_dict)
        else:
            # Set these to -1 to indicate nothing ran
            min_wait_time = -1
            max_wait_time = -1
            min_turnaround_time = -1
            max_turnaround_time = -1
            average_wait_time = -1
            average_turnaround_time = -1
        job_stats = {
            'jobs completed': self.jobs_completed,
            'throughput': f'{throughput:.2f} jobs/hour',
            'jobs still running': [job.id for job in self.running],
            'jobs still in queue': [job.id for job in self.queue],
            'min_wait_time': min_wait_time,
            'max_wait_time': max_wait_time,
            'average_wait_time': average_wait_time,
            'min_turnaround_time': min_turnaround_time,
            'max_turnaround_time': max_turnaround_time,
            'average_turnaround_time': average_turnaround_time
        }
        return job_stats

raps/stats.py

0 → 100644
+136 −0
Original line number Diff line number Diff line
"""
This module provides functionality for generating statistics.
These are statistics on
the engine
the jobs

Both could be part of the engine or jobs class, but as the are very verbose, try to keep statistics consolidated in this file.
"""
import sys
from .utils import sum_values, min_value, max_value

from .engine import Engine


def get_engine_stats(engine: Engine):
    """ Return engine statistics """
    num_samples = len(engine.power_manager.history) if engine.power_manager else 0

    average_power_mw = sum_values(engine.power_manager.history) / num_samples / 1000 if num_samples else 0
    average_loss_mw = sum_values(engine.power_manager.loss_history) / num_samples / 1000 if num_samples else 0
    min_loss_mw = min_value(engine.power_manager.loss_history) / 1000 if num_samples else 0
    max_loss_mw = max_value(engine.power_manager.loss_history) / 1000 if num_samples else 0

    loss_fraction = average_loss_mw / average_power_mw if average_power_mw else 0
    efficiency = 1 - loss_fraction if loss_fraction else 0
    total_energy_consumed = average_power_mw * engine.timesteps / 3600 if engine.timesteps else 0  # MW-hr
    emissions = total_energy_consumed * 852.3 / 2204.6 / efficiency if efficiency else 0
    total_cost = total_energy_consumed * 1000 * engine.config.get('POWER_COST', 0)  # Total cost in dollars

    stats = {
        'num_samples': num_samples,
        'average power': f'{average_power_mw:.2f} MW',
        'min loss': f'{min_loss_mw:.2f} MW',
        'average loss': f'{average_loss_mw:.2f} MW',
        'max loss': f'{max_loss_mw:.2f} MW',
        'system power efficiency': f'{efficiency * 100:.2f}%',
        'total energy consumed': f'{total_energy_consumed:.2f} MW-hr',
        'carbon emissions': f'{emissions:.2f} metric tons CO2',
        'total cost': f'${total_cost:.2f}'
    }

    return stats


def min_max_sum(value,min,max,sum):
    if value < min:
        min = value
    if value > max:
        max = value
    sum += value
    return min,max,sum


def get_job_stats(engine: Engine):
    """ Return job statistics processed over the engine execution"""
    # Information on Job-Mix
    min_job_size, max_job_size, sum_job_size = sys.maxsize, -sys.maxsize - 1, 0
    min_runtime, max_runtime, sum_runtime = sys.maxsize, -sys.maxsize - 1, 0
    min_agg_node_hours, max_agg_node_hours, sum_agg_node_hours = sys.maxsize, -sys.maxsize - 1, 0
    # Completion statistics
    throughput = engine.jobs_completed / engine.timesteps * 3600 if engine.timesteps else 0  # Jobs per hour

    min_wait_time, max_wait_time, sum_wait_time = sys.maxsize, -sys.maxsize - 1, 0
    min_turnaround_time, max_turnaround_time, sum_turnaround_time = sys.maxsize, -sys.maxsize - 1, 0

    min_awrt, max_awrt, sum_awrt = sys.maxsize, -sys.maxsize - 1, 0

    # Information on Job-Mix
    for job in engine.job_history_dict:
        job_size = job['num_nodes']
        min_job_size,max_job_size,sum_job_size = \
            min_max_sum(job_size, min_job_size, max_job_size, sum_job_size)

        runtime = job['end_time'] - job['start_time']
        min_runtime, max_runtime, sum_runtime = \
            min_max_sum(runtime, min_runtime, max_runtime, sum_runtime)

        agg_node_hours = runtime * job_size  # Aggreagte node hours
        min_agg_node_hours, max_agg_node_hours, sum_agg_node_hours = \
            min_max_sum(agg_node_hours, min_agg_node_hours, max_agg_node_hours, sum_agg_node_hours)

        # Completion statistics
        wait_time = job["start_time"] - job["submit_time"]
        min_wait_time,max_wait_time,sum_wait_time = \
            min_max_sum(wait_time, min_wait_time, max_wait_time, sum_wait_time)

        turnaround_time = job["end_time"] - job["submit_time"]
        min_turnaround_time, max_turnaround_time, sum_turnaround_time = \
            min_max_sum(turnaround_time, min_turnaround_time, max_turnaround_time, sum_turnaround_time)

        awrt = agg_node_hours * turnaround_time  # Area Weighted Response Time
        min_awrt, max_awrt, sum_awrt = min_max_sum(awrt, min_awrt, max_awrt, sum_awrt)

    if len(engine.job_history_dict) != 0:
        avg_job_size = sum_job_size / len(engine.job_history_dict)
        avg_runtime = sum_runtime / len(engine.job_history_dict)
        avg_agg_node_hours = sum_agg_node_hours / len(engine.job_history_dict)
        avg_wait_time = sum_wait_time / len(engine.job_history_dict)
        avg_turnaround_time = sum_turnaround_time / len(engine.job_history_dict)
        avg_awrt = sum_awrt / len(engine.job_history_dict)
    else:
        # Set these to -1 to indicate nothing ran
        min_job_size, max_job_size, avg_job_size = -1,-1,-1
        min_runtime, max_runtime, avg_runtime = -1,-1,-1
        min_agg_node_hours, max_agg_node_hours, avg_agg_node_hours = -1,-1,-1
        min_wait_time, max_wait_time, avg_wait_time = -1,-1,-1
        min_turnaround_time, max_turnaround_time, avg_turnaround_time = -1,-1,-1
        min_awrt, max_awrt, avg_awrt = -1,-1,-1

    job_stats = {
        'jobs completed': engine.jobs_completed,
        'throughput': f'{throughput:.2f} jobs/hour',
        'jobs still running': [job.id for job in engine.running],
        'jobs still in queue': [job.id for job in engine.queue],
        # Information on job-mix executed
        'min job size': min_job_size,
        'max job size': max_job_size,
        'average job size': avg_job_size,
        'min runtime': min_runtime,
        'max runtime': max_runtime,
        'average runtime': avg_runtime,
        'min_aggregate_node_hours': min_agg_node_hours,
        'max_aggregate_node_hours': max_agg_node_hours,
        'avg_aggregate_node_hours': avg_agg_node_hours,
        # Completion statistics
        'min_wait_time': min_wait_time,
        'max_wait_time': max_wait_time,
        'average_wait_time': avg_wait_time,
        'min_turnaround_time': min_turnaround_time,
        'max_turnaround_time': max_turnaround_time,
        'average_turnaround_time': avg_turnaround_time,
        'min_area_weighted_response_time': min_awrt,
        'max_area_weighted_response_time': max_awrt,
        'avg_area_weighted_response_time': avg_awrt
    }
    return job_stats