Loading raps/engine.py +0 −1 Original line number Diff line number Diff line Loading @@ -3,7 +3,6 @@ import dataclasses import pandas as pd from .job import Job, JobState from .account import Accounts from .network import network_utilization from .utils import summarize_ranges, expand_ranges, get_utilization from .resmgr import ResourceManager Loading raps/resmgr.py +8 −1 Original line number Diff line number Diff line import numpy as np from .job import JobState from .utils import expand_ranges from scipy.stats import weibull_min Loading Loading @@ -58,10 +59,15 @@ class ResourceManager: scale_parameter = mtbf * 3600 # Convert to seconds # Create a NumPy array of node indices, excluding down nodes #print(self.down_nodes) #down_nodes = expand_ranges(self.down_nodes) #all_nodes = np.setdiff1d(np.arange(self.config['TOTAL_NODES']), np.array(self.down_nodes, dtype=int)) all_nodes = np.array(sorted(set(range(self.total_nodes)) - set(self.down_nodes))) # Sample the Weibull distribution for all nodes at once random_values = weibull_min.rvs(shape_parameter, scale=scale_parameter, size=all_nodes.size) failure_threshold = 0.1 failed_nodes = [node for node, r in zip(all_nodes, random_values) if r < failure_threshold] # Identify nodes that have failed failure_threshold = 0.1 Loading @@ -72,6 +78,7 @@ class ResourceManager: for node_index in newly_downed_nodes: if node_index in self.available_nodes: self.available_nodes.remove(node_index) self.down_nodes.add(str(node_index)) self.down_nodes.append(str(node_index)) self.power_manager.set_idle(node_index) return newly_downed_nodes.tolist() Loading
raps/engine.py +0 −1 Original line number Diff line number Diff line Loading @@ -3,7 +3,6 @@ import dataclasses import pandas as pd from .job import Job, JobState from .account import Accounts from .network import network_utilization from .utils import summarize_ranges, expand_ranges, get_utilization from .resmgr import ResourceManager Loading
raps/resmgr.py +8 −1 Original line number Diff line number Diff line import numpy as np from .job import JobState from .utils import expand_ranges from scipy.stats import weibull_min Loading Loading @@ -58,10 +59,15 @@ class ResourceManager: scale_parameter = mtbf * 3600 # Convert to seconds # Create a NumPy array of node indices, excluding down nodes #print(self.down_nodes) #down_nodes = expand_ranges(self.down_nodes) #all_nodes = np.setdiff1d(np.arange(self.config['TOTAL_NODES']), np.array(self.down_nodes, dtype=int)) all_nodes = np.array(sorted(set(range(self.total_nodes)) - set(self.down_nodes))) # Sample the Weibull distribution for all nodes at once random_values = weibull_min.rvs(shape_parameter, scale=scale_parameter, size=all_nodes.size) failure_threshold = 0.1 failed_nodes = [node for node, r in zip(all_nodes, random_values) if r < failure_threshold] # Identify nodes that have failed failure_threshold = 0.1 Loading @@ -72,6 +78,7 @@ class ResourceManager: for node_index in newly_downed_nodes: if node_index in self.available_nodes: self.available_nodes.remove(node_index) self.down_nodes.add(str(node_index)) self.down_nodes.append(str(node_index)) self.power_manager.set_idle(node_index) return newly_downed_nodes.tolist()