Commit a121e132 authored by Brewer, Wes's avatar Brewer, Wes
Browse files

Add adaptive routing support for Fat-tree topology

Implement ECMP and Adaptive ECMP routing for fat-tree networks (InfiniBand
style Adaptive Routing). ECMP randomly selects among equal-cost paths.
Adaptive ECMP selects the least congested path.

- Add ECMP and ADAPTIVE to RoutingAlgorithm enum
- Implement fat-tree routing functions (all_shortest_paths, ecmp, adaptive)
- Update NetworkModel to support fat-tree adaptive routing with link tracking
- Configure Lassen with InfiniBand adaptive routing
- Add comprehensive unit tests for fat-tree routing (19 tests)

🤖 Generated with [Claude Code](https://claude.com/claude-code

)

Co-Authored-By: default avatarClaude Opus 4.5 <noreply@anthropic.com>
parent dd00519b
Loading
Loading
Loading
Loading
+2 −10
Original line number Diff line number Diff line
@@ -120,15 +120,7 @@ cooling:
  w_cts_key: "simulator[1].centralEnergyPlant[1].coolingTowerLoop[1].summary.W_flow_CT_kW"
network:
  topology: fat-tree
  network_max_bw: 12.5e9
  network_max_bw: 12.5e9  # InfiniBand EDR 100 Gbps = 12.5 GB/s
  routing_algorithm: adaptive  # InfiniBand Adaptive Routing
  fattree_k: 32
  dragonfly_d: 11
  dragonfly_a: 9
  dragonfly_p: 8
  latency: 1
  torus_x: 17
  torus_y: 17
  torus_z: 8
  torus_wrap: true
  hosts_per_router: 2
  torus_routing: DOR_XYZ
+24 −2
Original line number Diff line number Diff line
@@ -78,6 +78,12 @@ class NetworkModel:
            # TODO: future testing of subsampling feature
            #self.net_graph = subsample_hosts(self.net_graph, num_hosts=4626)

            # Initialize global link loads for adaptive routing
            self.global_link_loads = {tuple(sorted(edge)): 0.0 for edge in self.net_graph.edges()}

            routing_info = f"routing={self.routing_algorithm}"
            print(f"[DEBUG] Fat-tree k={self.fattree_k}: {total_nodes} nodes, {routing_info}")

        elif self.topology == "torus3d":
            dims = (
                int(config["TORUS_X"]),
@@ -169,10 +175,26 @@ class NetworkModel:

        if self.topology == "fat-tree":
            host_list = [node_id_to_host_name(n, self.fattree_k) for n in job.scheduled_nodes]
            loads = link_loads_for_pattern(self.net_graph, host_list, effective_tx, comm_pattern)
            net_cong = worst_link_util(loads, max_throughput)
            if debug:
                print("  fat-tree hosts:", host_list)
                print(f"  routing: {self.routing_algorithm}")

            loads = link_loads_for_pattern(
                self.net_graph,
                host_list,
                effective_tx,
                comm_pattern,
                routing_algorithm=self.routing_algorithm,
                link_loads=self.global_link_loads,
            )
            net_cong = worst_link_util(loads, max_throughput)

            # Update global link loads for adaptive routing decisions
            if self.routing_algorithm in ('ecmp', 'adaptive'):
                for edge, load in loads.items():
                    edge_key = tuple(sorted(edge))
                    if edge_key in self.global_link_loads:
                        self.global_link_loads[edge_key] += load

        elif self.topology == "dragonfly":
            D = self.config["DRAGONFLY_D"]
+17 −2
Original line number Diff line number Diff line
@@ -354,6 +354,7 @@ def link_loads_for_pattern(
    *,
    routing_algorithm: str | None = None,
    dragonfly_params: dict | None = None,
    fattree_params: dict | None = None,
    link_loads: dict | None = None,
):
    """
@@ -365,19 +366,23 @@ def link_loads_for_pattern(
        job_hosts: List of host names
        tx_volume_bytes: Total transmit volume per host
        comm_pattern: CommunicationPattern enum value
        routing_algorithm: Routing algorithm for Dragonfly ('minimal', 'ugal', 'valiant')
        routing_algorithm: Routing algorithm
            - Dragonfly: 'minimal', 'ugal', 'valiant'
            - Fat-tree: 'minimal', 'ecmp', 'adaptive'
        dragonfly_params: Dict with 'd', 'a', 'ugal_threshold', 'valiant_bias' for Dragonfly
        fattree_params: Dict with 'k' for fat-tree (optional, for future use)
        link_loads: Current global link loads (for adaptive routing decisions)

    Returns:
        dict {(u,v): bytes, ...} of link loads
    """
    from raps.network.dragonfly import link_loads_for_job_dragonfly_adaptive
    from raps.network.fat_tree import link_loads_for_job_fattree_adaptive

    comm_pattern = normalize_comm_pattern(comm_pattern)

    # Handle adaptive routing for Dragonfly
    if routing_algorithm and dragonfly_params and routing_algorithm != 'minimal':
    if routing_algorithm and dragonfly_params and routing_algorithm in ('ugal', 'valiant'):
        return link_loads_for_job_dragonfly_adaptive(
            G,
            job_hosts,
@@ -390,6 +395,16 @@ def link_loads_for_pattern(
            valiant_bias=dragonfly_params.get('valiant_bias', 0.0),
        )

    # Handle adaptive routing for Fat-tree
    if routing_algorithm and routing_algorithm in ('ecmp', 'adaptive'):
        return link_loads_for_job_fattree_adaptive(
            G,
            job_hosts,
            tx_volume_bytes,
            algorithm=routing_algorithm,
            link_loads=link_loads,
        )

    # Standard routing (shortest path)
    if comm_pattern == CommunicationPattern.STENCIL_3D:
        return link_loads_for_job_stencil_3d(G, job_hosts, tx_volume_bytes)
+213 −0
Original line number Diff line number Diff line
import random
from typing import Tuple, List
import networkx as nx


@@ -80,3 +81,215 @@ def subsample_hosts(G, num_hosts):
        remove = [n for n in hosts if n not in keep]
        G.remove_nodes_from(remove)
    return G


# =============================================================================
# Adaptive Routing Functions for Fat-Tree
# =============================================================================

def parse_fattree_host(name: str) -> Tuple[int, int, int]:
    """Parse a fat-tree host name into (pod, edge, host_idx).

    Args:
        name: Host name in format "h_{pod}_{edge}_{host}"

    Returns:
        Tuple of (pod, edge_switch_idx, host_idx)
    """
    parts = name.split("_")
    return int(parts[1]), int(parts[2]), int(parts[3])


def get_host_edge_switch(host: str) -> str:
    """Get the edge switch connected to a host.

    Args:
        host: Host name in format "h_{pod}_{edge}_{host}"

    Returns:
        Edge switch name "e_{pod}_{edge}"
    """
    pod, edge, _ = parse_fattree_host(host)
    return f"e_{pod}_{edge}"


def fattree_all_shortest_paths(G: nx.Graph, src: str, dst: str) -> List[List[str]]:
    """Find all shortest paths between source and destination in a fat-tree.

    In a fat-tree, there can be multiple equal-cost paths between hosts,
    especially when they are in different pods (going through different
    aggregation and core switches).

    Args:
        G: Fat-tree graph
        src: Source host name
        dst: Destination host name

    Returns:
        List of all shortest paths, each path is a list of node names
    """
    if src == dst:
        return [[src]]

    try:
        return list(nx.all_shortest_paths(G, src, dst))
    except nx.NetworkXNoPath:
        return []


def fattree_ecmp_select_path(G: nx.Graph, src: str, dst: str) -> List[str]:
    """Select a path using ECMP (Equal-Cost Multi-Path) routing.

    Randomly selects one of the shortest paths between source and destination.

    Args:
        G: Fat-tree graph
        src: Source host name
        dst: Destination host name

    Returns:
        Selected path as a list of node names
    """
    paths = fattree_all_shortest_paths(G, src, dst)
    if not paths:
        return []
    return random.choice(paths)


def estimate_path_load(path: List[str], link_loads: dict) -> float:
    """Estimate the total load on a path based on link loads.

    Args:
        path: List of node names forming the path
        link_loads: Dictionary mapping (node1, node2) tuples to load values

    Returns:
        Sum of loads on all links in the path
    """
    total_load = 0.0
    for i in range(len(path) - 1):
        edge = tuple(sorted([path[i], path[i + 1]]))
        total_load += link_loads.get(edge, 0.0)
    return total_load


def fattree_adaptive_select_path(
    G: nx.Graph,
    src: str,
    dst: str,
    link_loads: dict,
) -> List[str]:
    """Select the least congested path using Adaptive ECMP routing.

    This implements InfiniBand-style Adaptive Routing (AR), which selects
    the path with the lowest congestion among all equal-cost paths.

    Args:
        G: Fat-tree graph
        src: Source host name
        dst: Destination host name
        link_loads: Dictionary mapping edge tuples to current load values

    Returns:
        Selected path as a list of node names (least congested)
    """
    paths = fattree_all_shortest_paths(G, src, dst)
    if not paths:
        return []
    if len(paths) == 1:
        return paths[0]

    # Find path with minimum total load
    best_path = paths[0]
    best_load = estimate_path_load(paths[0], link_loads)

    for path in paths[1:]:
        load = estimate_path_load(path, link_loads)
        if load < best_load:
            best_load = load
            best_path = path

    return best_path


def fattree_route(
    G: nx.Graph,
    src: str,
    dst: str,
    algorithm: str = 'minimal',
    link_loads: dict = None,
) -> List[str]:
    """Main routing dispatcher for fat-tree topology.

    Args:
        G: Fat-tree graph
        src: Source host name
        dst: Destination host name
        algorithm: Routing algorithm ('minimal', 'ecmp', or 'adaptive')
        link_loads: Current link loads (required for 'adaptive')

    Returns:
        Selected path as a list of node names
    """
    if src == dst:
        return [src]

    if algorithm == 'minimal':
        try:
            return nx.shortest_path(G, src, dst)
        except nx.NetworkXNoPath:
            return []

    elif algorithm == 'ecmp':
        return fattree_ecmp_select_path(G, src, dst)

    elif algorithm == 'adaptive':
        if link_loads is None:
            link_loads = {}
        return fattree_adaptive_select_path(G, src, dst, link_loads)

    else:
        raise ValueError(f"Unknown fat-tree routing algorithm: {algorithm}")


def link_loads_for_job_fattree_adaptive(
    G: nx.Graph,
    job_hosts: List[str],
    tx_volume_bytes: float,
    algorithm: str = 'minimal',
    link_loads: dict = None,
) -> dict:
    """Compute link loads for a job using adaptive routing on fat-tree.

    Args:
        G: Fat-tree network graph
        job_hosts: List of host names assigned to the job
        tx_volume_bytes: Traffic volume per host pair
        algorithm: Routing algorithm ('minimal', 'ecmp', or 'adaptive')
        link_loads: Current global link loads for adaptive decisions

    Returns:
        Dictionary mapping edge tuples to accumulated load values
    """
    if link_loads is None:
        link_loads = {}

    loads = {}
    n = len(job_hosts)
    if n <= 1:
        return loads

    # All-to-all traffic pattern
    for i, src in enumerate(job_hosts):
        for j, dst in enumerate(job_hosts):
            if i == j:
                continue

            path = fattree_route(G, src, dst, algorithm, link_loads)

            # Accumulate loads on path edges
            for k in range(len(path) - 1):
                edge = tuple(sorted([path[k], path[k + 1]]))
                loads[edge] = loads.get(edge, 0.0) + tx_volume_bytes

    return loads
+9 −0
Original line number Diff line number Diff line
@@ -46,6 +46,7 @@ class RoutingAlgorithm(ValueComparableEnum):
    "Study of Workload Interference with Intelligent Routing on Dragonfly"
    (Kang et al., SC22)

    Dragonfly algorithms:
    MINIMAL: Always use shortest/minimal path routing.
             For Dragonfly: at most 3 hops (local-global-local).
    VALIANT: Valiant load balancing - route via random intermediate group.
@@ -55,7 +56,15 @@ class RoutingAlgorithm(ValueComparableEnum):
          Dynamically chooses minimal or non-minimal based on congestion.
          Uses threshold comparison: if min_latency < threshold * nonmin_latency,
          use minimal path; otherwise use non-minimal.

    Fat-tree algorithms:
    ECMP: Equal-Cost Multi-Path routing. Randomly selects among all
          shortest paths between source and destination.
    ADAPTIVE: Adaptive ECMP routing (InfiniBand AR). Selects the least
              congested path among all equal-cost shortest paths.
    """
    MINIMAL = 'minimal'
    VALIANT = 'valiant'
    UGAL = 'ugal'
    ECMP = 'ecmp'
    ADAPTIVE = 'adaptive'
Loading