Commit e9bd67ab authored by Hines, Jesse's avatar Hines, Jesse
Browse files

Remove layout_manager from scheduler

This separates the view/ui logic from the simulation itself making it easier to swap out different UIs. It should also make
maintaining the server after RAPS updates easier as now both the server and the RAPS CLI use the same code path and TickData object.
parent ec202afb
Loading
Loading
Loading
Loading
+4 −3
Original line number Diff line number Diff line
@@ -97,13 +97,13 @@ else:
        power_manager = PowerManager(compute_node_power, **config)

flops_manager = FLOPSManager(**config)
layout_manager = LayoutManager(args.layout, args.debug, **config)
args_dict['config'] = config
sc = Scheduler(
    power_manager = power_manager, flops_manager = flops_manager, layout_manager = layout_manager,
    power_manager = power_manager, flops_manager = flops_manager,
    cooling_model = cooling_model,
    **args_dict,
)
layout_manager = LayoutManager(args.layout, scheduler = sc, debug = args.debug, **config)

if args.replay:

@@ -173,7 +173,8 @@ if args.plot or args.output:
if args.verbose:
    print(jobs)

sc.run_simulation_blocking(jobs, timesteps=timesteps)
layout_manager.run(jobs, timesteps=timesteps)

output_stats = sc.get_stats()
# Following b/c we get the following error when we use PM100 telemetry dataset
# TypeError: Object of type int64 is not JSON serializable
+1 −22
Original line number Diff line number Diff line
@@ -80,7 +80,7 @@ def get_utilization(trace, time_quanta_index):

class Scheduler:
    """Job scheduler and simulation manager."""
    def __init__(self, *, power_manager, flops_manager, layout_manager, cooling_model=None, config, **kwargs):
    def __init__(self, *, power_manager, flops_manager, cooling_model=None, config, **kwargs):
        self.config = config
        self.down_nodes = summarize_ranges(self.config['DOWN_NODES'])
        self.available_nodes = list(set(range(self.config['TOTAL_NODES'])) - set(self.config['DOWN_NODES']))
@@ -91,7 +91,6 @@ class Scheduler:
        self.jobs_completed = 0
        self.current_time = 0
        self.cooling_model = cooling_model
        self.layout_manager = layout_manager
        self.power_manager = power_manager
        self.flops_manager = flops_manager
        self.debug = kwargs.get('debug')
@@ -310,21 +309,6 @@ class Scheduler:
                # Get a dataframe of the power data
                power_df = self.power_manager.get_power_df(rack_power, rack_loss)

        if self.current_time % self.config['UI_UPDATE_FREQ'] == 0 and self.layout_manager and not self.debug:
            if self.cooling_model:
                self.layout_manager.update_powertemp_array(power_df, \
                            cooling_outputs, pflops, gflop_per_watt, \
                            system_util, uncertainties=self.power_manager.uncertainties)
                self.layout_manager.update_pressflow_array(cooling_outputs)

            self.layout_manager.update_scheduled_jobs(self.running + self.queue)
            self.layout_manager.update_status(self.current_time, len(self.running),
                                            len(self.queue), self.num_active_nodes,
                                            self.num_free_nodes, self.down_nodes[1:])
            self.layout_manager.update_power_array(power_df, pflops, gflop_per_watt, \
                                system_util, uncertainties=self.power_manager.uncertainties)
            self.layout_manager.render()

        tick_data = TickData(
            current_time = self.current_time,
            completed = completed_jobs,
@@ -384,11 +368,6 @@ class Scheduler:
            if self.debug and timestep % self.config['UI_UPDATE_FREQ'] == 0:
                    print(".", end="", flush=True)

    def run_simulation_blocking(self, jobs, timesteps):
        """ Calls run_simulation and blocks until it is complete """
        for _ in self.run_simulation(jobs, timesteps):
            pass

    def get_stats(self):
        """ Return output statistics """
        sum_values = lambda values : sum(x[1] for x in values)
+30 −1
Original line number Diff line number Diff line
@@ -7,10 +7,12 @@ from rich.panel import Panel
from rich.table import Table
from .utils import summarize_ranges, convert_seconds
from .constants import ELLIPSES
from .scheduler import TickData, Scheduler


class LayoutManager:
    def __init__(self, layout_type, debug, **config):
    def __init__(self, layout_type, scheduler: Scheduler, debug, **config):
        self.scheduler = scheduler
        self.config = config
        self.console = Console()
        self.layout = Layout()
@@ -369,7 +371,34 @@ class LayoutManager:

            self.layout["lower"].update(Panel(Align(total_table, align="center"), title="Power and Performance"))

    def update(self, data: TickData):
        uncertainties = self.scheduler.power_manager.uncertainties

        if self.scheduler.cooling_model:
            self.update_powertemp_array(
                data.power_df, data.fmu_outputs, data.p_flops, data.g_flops_w, data.system_util,
                uncertainties = uncertainties,
            )
            self.update_pressflow_array(data.fmu_outputs)

        self.update_scheduled_jobs(data.running + data.queue)
        self.update_status(
            data.current_time, len(data.running), len(data.queue), data.num_active_nodes,
            data.num_free_nodes, data.down_nodes,
        )
        self.update_power_array(
            data.power_df, data.p_flops, data.g_flops_w,
            data.system_util, uncertainties = uncertainties,
        )

    def render(self):
        if not self.debug:
            self.console.clear()
            self.console.print(self.layout)

    def run(self, jobs, timesteps):
        """ Runs the UI, blocking until the simulation is complete """
        for data in self.scheduler.run_simulation(jobs, timesteps):
            if data.current_time % self.config['UI_UPDATE_FREQ'] == 0:
                self.update(data)
                self.render()