Loading raps/resmgr.py +0 −7 Original line number Diff line number Diff line import numpy as np from .job import JobState from .utils import expand_ranges from scipy.stats import weibull_min Loading Loading @@ -59,15 +58,10 @@ class ResourceManager: scale_parameter = mtbf * 3600 # Convert to seconds # Create a NumPy array of node indices, excluding down nodes #print(self.down_nodes) #down_nodes = expand_ranges(self.down_nodes) #all_nodes = np.setdiff1d(np.arange(self.config['TOTAL_NODES']), np.array(self.down_nodes, dtype=int)) all_nodes = np.array(sorted(set(range(self.total_nodes)) - set(self.down_nodes))) # Sample the Weibull distribution for all nodes at once random_values = weibull_min.rvs(shape_parameter, scale=scale_parameter, size=all_nodes.size) failure_threshold = 0.1 failed_nodes = [node for node, r in zip(all_nodes, random_values) if r < failure_threshold] # Identify nodes that have failed failure_threshold = 0.1 Loading @@ -79,6 +73,5 @@ class ResourceManager: if node_index in self.available_nodes: self.available_nodes.remove(node_index) self.down_nodes.add(str(node_index)) self.power_manager.set_idle(node_index) return newly_downed_nodes.tolist() Loading
raps/resmgr.py +0 −7 Original line number Diff line number Diff line import numpy as np from .job import JobState from .utils import expand_ranges from scipy.stats import weibull_min Loading Loading @@ -59,15 +58,10 @@ class ResourceManager: scale_parameter = mtbf * 3600 # Convert to seconds # Create a NumPy array of node indices, excluding down nodes #print(self.down_nodes) #down_nodes = expand_ranges(self.down_nodes) #all_nodes = np.setdiff1d(np.arange(self.config['TOTAL_NODES']), np.array(self.down_nodes, dtype=int)) all_nodes = np.array(sorted(set(range(self.total_nodes)) - set(self.down_nodes))) # Sample the Weibull distribution for all nodes at once random_values = weibull_min.rvs(shape_parameter, scale=scale_parameter, size=all_nodes.size) failure_threshold = 0.1 failed_nodes = [node for node, r in zip(all_nodes, random_values) if r < failure_threshold] # Identify nodes that have failed failure_threshold = 0.1 Loading @@ -79,6 +73,5 @@ class ResourceManager: if node_index in self.available_nodes: self.available_nodes.remove(node_index) self.down_nodes.add(str(node_index)) self.power_manager.set_idle(node_index) return newly_downed_nodes.tolist()