Commit 77b3f670 authored by Brewer, Wes's avatar Brewer, Wes
Browse files

Setup initial skeleton for fugaku system

parent 3942cd0c
Loading
Loading
Loading
Loading
+18 −0
Original line number Diff line number Diff line
{
    "POWER_GPU_IDLE": 75,
    "POWER_GPU_MAX": 300,
    "POWER_CPU_IDLE": 47.25,
    "POWER_CPU_MAX": 300,
    "POWER_MEM": 74.26,
    "POWER_NIC": 21,
    "POWER_NVME": 45,
    "POWER_SWITCH": 0,
    "POWER_CDU": 0,
    "POWER_UPDATE_FREQ": 10,
    "RECTIFIER_PEAK_THRESHOLD": 0,
    "SIVOC_LOSS_CONSTANT": 13,
    "SIVOC_EFFICIENCY": 0.98,
    "RECTIFIER_LOSS_CONSTANT": 17,
    "RECTIFIER_EFFICIENCY": 0.96,
    "POWER_COST": 0.094
}
+18 −0
Original line number Diff line number Diff line
{
    "SEED": 42,
    "JOB_ARRIVAL_TIME": 60,
    "MTBF": 11,
    "MAX_TIME": 88200,
    "TRACE_QUANTA": 10,
    "MIN_WALL_TIME": 3600,
    "MAX_WALL_TIME": 43200,
    "UI_UPDATE_FREQ": 3600,
    "MAX_NODES_PER_JOB": 3000,
    "JOB_END_PROBS": {
        "COMPLETED": 0.63,
        "FAILED": 0.13,
        "CANCELLED": 0.12,
        "TIMEOUT": 0.11,
        "NODE_FAIL": 0.01
    }
}
+20 −0
Original line number Diff line number Diff line
{
    "NUM_CDUS": 50,
    "RACKS_PER_CDU": 2,
    "NODES_PER_RACK": 384,
    "RECTIFIERS_PER_RACK": 8,
    "CHASSIS_PER_RACK": 8,
    "NODES_PER_BLADE": 1,
    "SWITCHES_PER_CHASSIS": 2,
    "NICS_PER_NODE": 1,
    "RECTIFIERS_PER_CHASSIS": 1,
    "NODES_PER_RECTIFIER": 48,
    "MISSING_RACKS": [],
    "DOWN_NODES": [],
    "CPUS_PER_NODE": 1,
    "GPUS_PER_NODE": 0, 
    "CPU_PEAK_FLOPS": 3.072E12,
    "GPU_PEAK_FLOPS": 0,
    "CPU_FP_RATIO": 1.0,
    "GPU_FP_RATIO": 0.0
}
+67 −0
Original line number Diff line number Diff line
import pandas as pd
from ..job import job_dict


def load_data(path, **kwargs):
    """
    Loads data from the given Parquet file path and returns job info.

    Parameters:
    path (str): Path to the Parquet file.
    
    Returns:
    list: List of job dictionaries.
    """
    # Load the parquet file
    parquet_file = path[0]  # Assuming path is a list containing the path to the parquet file
    df = pd.read_parquet(parquet_file)

    # Process the DataFrame and pass to load_data_from_df
    return load_data_from_df(df, **kwargs)


def load_data_from_df(df, **kwargs):
    """
    Processes DataFrame to extract relevant job information.

    Parameters:
    df (pd.DataFrame): DataFrame containing job information.
    
    Returns:
    list: List of job dictionaries.
    """
    job_list = []
    
    # Loop through the DataFrame rows to extract job information
    for _, row in df.iterrows():
        nodes_required = row['nnumr'] if 'nnumr' in df.columns else 0
        name = row['jnam'] if 'jnam' in df.columns else 'unknown'
        cpu_trace = row['perf1'] if 'perf1' in df.columns else 0  # Assuming some performance metric as cpu_trace
        gpu_trace = 0  # Set to 0 as GPU trace is not explicitly provided
        wall_time = row['duration'] if 'duration' in df.columns else 0
        end_state = row['exit state'] if 'exit state' in df.columns else 'unknown'
        scheduled_nodes = row['nnuma'] if 'nnuma' in df.columns else 0
        time_offset = row['adt'] if 'adt' in df.columns else pd.Timestamp(0)  # Submission time
        job_id = row['jid'] if 'jid' in df.columns else 'unknown'
        priority = row['pri'] if 'pri' in df.columns else 0
        
        # Create job dictionary
        job_info = job_dict(
            nodes_required=nodes_required,
            name=name,
            cpu_trace=cpu_trace,
            gpu_trace=gpu_trace,
            wall_time=wall_time,
            end_state=end_state,
            scheduled_nodes=scheduled_nodes,
            time_offset=time_offset,
            job_id=job_id,
            priority=priority
        )
        
        job_list.append(job_info)
    
    return job_list

# Sample usage:
# fugaku_jobs = load_data(['/path/to/21_04.parquet'])