Commit 2468b9f1 authored by Hines, Jesse's avatar Hines, Jesse
Browse files

Make node names generic

parent 2ded9f04
Loading
Loading
Loading
Loading
+3 −4
Original line number Diff line number Diff line
@@ -49,8 +49,8 @@
                    },
                    {
                        "type": "expression",
                        "name": "xnames",
                        "expression": "array_to_string(xnames, ',')"
                        "name": "nodes",
                        "expression": "array_to_string(nodes, ',')"
                    }
                ]
            },
@@ -83,8 +83,7 @@
                        "name": "time_end"
                    },
                    "state_current",
                    "node_ranges",
                    "xnames"
                    "nodes"
                ]
            },
            "granularitySpec": {
+6 −13
Original line number Diff line number Diff line
@@ -25,15 +25,9 @@ class SchedulerSimJob(BaseModel):
    time_end: Optional[AwareDatetime] = None
    state_current: Optional[JobStateEnum] = None

    node_ranges: Optional[str] = None
    nodes: Optional[list[str]] = None
    """
    The Slurm hosts the job is running on as a Slurm hosts string.
    E.g. frontier[03629-03630,03633-03635]
    """
    xnames: Optional[list[str]] = None
    """
    The nodes the job is running on, but as a list of xnames rather than a slurm hostname string.
    E.g. ['x2307c3s0b1', 'x2408c5s2b1']
    The nodes the job is running on ['x2307c3s0b1', 'x2408c5s2b1']
    """

    # Removing these for now, they are constant and just what you set in the input.
@@ -53,8 +47,7 @@ SCHEDULER_SIM_JOB_API_FIELDS = {
    'time_start': 'date',
    'time_end': 'date',
    'state_current': 'string',
    'node_ranges': 'string',
    'xnames': 'array[string]',
    'nodes': 'array[string]',
    # 'cpu_util': 'number',
    # 'gpu_util': 'number',
    # 'cpu_trace': 'array[number]',
@@ -66,10 +59,10 @@ SCHEDULER_SIM_JOB_FIELD_SELECTORS = {
}

SCHEDULER_SIM_JOB_FILTERS = filter_params(
    omit(SCHEDULER_SIM_JOB_API_FIELDS, ['time_snapshot', 'node_ranges', 'xnames']) # TODO: Allow filtering on nodes
    omit(SCHEDULER_SIM_JOB_API_FIELDS, ['time_snapshot', 'nodes']) # TODO: Allow filtering on nodes
)
SCHEDULER_SIM_JOB_SORT = sort_params(
    omit(SCHEDULER_SIM_JOB_API_FIELDS, ['time_snapshot', 'node_ranges', 'xnames']),
    omit(SCHEDULER_SIM_JOB_API_FIELDS, ['time_snapshot', 'nodes']),
    ["asc:time_start", "asc:time_end", "asc:job_id"],
)

@@ -94,7 +87,7 @@ class SchedulerSimSystem(BaseModel):
    timestamp: AwareDatetime
    
    down_nodes: list[str]
    """ List of xnames that are currently down in the simulation """
    """ List of nodes that are currently down in the simulation """
    
    num_samples: int

+1 −2
Original line number Diff line number Diff line
@@ -20,8 +20,7 @@ scheduler_sim_job = Table(
    Column("time_start", String),
    Column("time_end", String),
    Column("state_current", String),
    Column("node_ranges", String),
    Column("xnames", String),
    Column("nodes", String),
)

cooling_sim_cdu = Table(
+3 −5
Original line number Diff line number Diff line
@@ -526,9 +526,7 @@ def build_scheduler_sim_jobs_query(*,
        "time_start": to_timestamp(latest(tbl.c.time_start, 32)),
        "time_end": to_timestamp(latest(tbl.c.time_end, 32)),
        "state_current": latest(tbl.c.state_current, 12),
        # TODO: These aggregations are going to have performance problems on larger datasets
        "node_ranges": latest(tbl.c.node_ranges, 20 * 1024),
        "xnames": latest(tbl.c.xnames, 128 * 1024),
        "nodes": latest(tbl.c.nodes, 128 * 1024),
    }


@@ -582,10 +580,10 @@ def query_scheduler_sim_jobs(*,

    with druid_engine.connect() as conn:
        results = (r._asdict() for r in execute_ignore_missing(conn, stmt))
        if 'xnames' in fields:
        if 'nodes' in fields:
            results = [{
                **j,
                'xnames': _split_list(j['xnames']),
                'nodes': _split_list(j['nodes']),
            } for j in results]
        else:
            results = [j for j in results]
+0 −81
Original line number Diff line number Diff line
"""
base.py - base classes for system specific modules
"""
from abc import ABC
from ClusterShell.NodeSet import NodeSet
import re


class XnameMapper:
    """ Class that maps hostnames to xnames """
    def __init__(self,
        system: str, xnames_pattern: str, 
        hostname_nid_start: int, hostname_nid_digits: int,
    ):
        self.system = system
        self.xnames_pattern = xnames_pattern
        self.hostname_nid_start = hostname_nid_start
        self.hostname_nid_digits = hostname_nid_digits
        
        self._hostname_regex = re.compile(rf'^{system}(\d+)$')
        xname_node_set = NodeSet(xnames_pattern)
        self._host_nid_to_xname = {
            i + self.hostname_nid_start: xname
            for i, xname in enumerate(xname_node_set)
        }
        self._xname_to_host_nid = {v: k for k, v in self._host_nid_to_xname.items()}


    def hostname_to_xname(self, hostname: str):
        match = self._hostname_regex.match(hostname)
        xname = None
        if match:
            nid = int(match.group(1))
            xname = self._host_nid_to_xname.get(nid)
        
        if not xname:
            raise ValueError(f'hostname "{hostname}" is not valid')
        return xname


    def xname_to_hostname(self, xname: str):
        nid = self._xname_to_host_nid.get(xname)
        if nid is None:
            raise ValueError(f'xname "{xname}" is not valid')
        return f'{self.system}{nid:0{self.hostname_nid_digits}}'



class SystemNodeSet(NodeSet, ABC):
    """
    SystemNodes - a NodeSet with system specific settings to handle xnames.
    """
    # Set this in child classes
    xname_mapper: XnameMapper

    def hostnames(self) -> list[str]:
        """ Return a list of hostnames """
        return [n for n in self]


    @classmethod
    def hostname_to_xname(cls, xname: str):
        return cls.xname_mapper.hostname_to_xname(xname)


    @classmethod
    def xname_to_hostname(cls, hostname: str):
        return cls.xname_mapper.xname_to_hostname(hostname)


    def xnames(self) -> list[str]:
        """ Return a list of xnames """
        return [self.xname_mapper.hostname_to_xname(hostname) for hostname in self]


class FrontierNodeSet(SystemNodeSet):
    xname_mapper = XnameMapper(
        system = 'frontier',
        xnames_pattern = 'x[2000-2011,2100-2111,2200-2211,2300-2304,2306-2311,2400-2411,2500-2511,2600-2604,2606-2611]c[0-7]s[0-7]b[0-1]',
        hostname_nid_start = 1, hostname_nid_digits = 5,
    )
Loading