migrating to ROCm 6.0.0 (79987307) · Commits · workflow / OLCF-6 Workflow Benchmark

env.sh

+3 −3

Original line number	Diff line number	Diff line
		module load PrgEnv-gnu
		module load gcc/10.3.0
		module load rocm/5.2.0
		ROOT=REPLACE_PWD/miniconda3
		module load rocm/6.0.0
		ROOT=/lustre/orion/world-shared/stf053/olcf-6-benchmark/miniconda3
		export PATH=${ROOT}/bin:$PATH
		source ${ROOT}/etc/profile.d/conda.sh
		conda activate ${ROOT}/../topaz_env
		module load python/3.10-miniforge3
		module load miniforge3/23.11.0

pf/data/init.py

100755 → 100644

+11 −0

Original line number	Diff line number	Diff line
		@@ -4,6 +4,16 @@ Datasets, etc. for timeseries data.
		Handling timeseries data is not trivial. It requires special treatment. This sub-package provides the necessary tools
		to abstracts the necessary work.
		"""
		#from pytorch_forecasting.data.encoders import (
		# EncoderNormalizer,
		# GroupNormalizer,
		# MultiNormalizer,
		# NaNLabelEncoder,
		# TorchNormalizer,
		#)
		#from pytorch_forecasting.data.samplers import TimeSynchronizedBatchSampler
		#from pytorch_forecasting.data.timeseries import TimeSeriesDataSet

		from pf.data.encoders import (
		EncoderNormalizer,
		GroupNormalizer,
		@@ -11,6 +21,7 @@ from pf.data.encoders import (
		NaNLabelEncoder,
		TorchNormalizer,
		)

		from pf.data.samplers import TimeSynchronizedBatchSampler
		from pf.data.timeseries import TimeSeriesDataSet

pf/data/encoders.py

100755 → 100644

+18 −16

Original line number	Diff line number	Diff line
		@@ -21,7 +21,7 @@ from torch.distributions.transforms import (
		import torch.nn.functional as F
		from torch.nn.utils import rnn

		from pf.utils import InitialParameterRepresenterMixIn
		from pytorch_forecasting.utils import InitialParameterRepresenterMixIn


		def _plus_one(x):
		@@ -80,7 +80,6 @@ class SoftplusTransform(Transform):


		class Expm1Transform(ExpTransform):

		codomain = constraints.greater_than_eq(-1.0)

		def _call(self, x):
		@@ -254,7 +253,9 @@ class NaNLabelEncoder(InitialParameterRepresenterMixIn, BaseEstimator, Transform
		Returns:
		bool: True if series is numeric
		"""
		return y.dtype.kind in "bcif" or (isinstance(y, pd.CategoricalDtype) and y.cat.categories.dtype.kind in "bcif")
		return y.dtype.kind in "bcif" or (
		isinstance(y.dtype, pd.CategoricalDtype) and y.cat.categories.dtype.kind in "bcif"
		)

		def fit(self, y: pd.Series, overwrite: bool = False):
		"""
		@@ -413,7 +414,7 @@ class TorchNormalizer(InitialParameterRepresenterMixIn, BaseEstimator, Transform

		* None (default): No transformation of values
		* log: Estimate in log-space leading to a multiplicative model
		* logp1: Estimate in log-space but add 1 to values before transforming for stability
		* log1p: Estimate in log-space but add 1 to values before transforming for stability
		(e.g. if many small values <<1 are present).
		Note, that inverse transform is still only `torch.exp()` and not `torch.expm1()`.
		* logit: Apply logit transformation on values that are between 0 and 1
		@@ -468,7 +469,7 @@ class TorchNormalizer(InitialParameterRepresenterMixIn, BaseEstimator, Transform
		if isinstance(y_center, torch.Tensor):
		eps = torch.finfo(y_center.dtype).eps
		else:
		eps = np.finfo(np.float).eps
		eps = np.finfo(np.float16).eps
		if self.method == "identity":
		if isinstance(y_center, torch.Tensor):
		self.center_ = torch.zeros(y_center.size()[:-1])
		@@ -499,11 +500,9 @@ class TorchNormalizer(InitialParameterRepresenterMixIn, BaseEstimator, Transform

		elif self.method == "robust":
		if isinstance(y_center, torch.Tensor):
		self.center_ = y_center.kthvalue(
		int(len(y_center) * self.method_kwargs.get("center", 0.5)), dim=-1
		).values
		q_75 = y_scale.kthvalue(int(len(y_scale) * self.method_kwargs.get("upper", 0.75)), dim=-1).values
		q_25 = y_scale.kthvalue(int(len(y_scale) * self.method_kwargs.get("lower", 0.25)), dim=-1).values
		self.center_ = y_center.quantile(self.method_kwargs.get("center", 0.5), dim=-1)
		q_75 = y_scale.quantile(self.method_kwargs.get("upper", 0.75), dim=-1)
		q_25 = y_scale.quantile(self.method_kwargs.get("lower", 0.25), dim=-1)
		elif isinstance(y_center, np.ndarray):
		self.center_ = np.percentile(y_center, self.method_kwargs.get("center", 0.5) * 100, axis=-1)
		q_75 = np.percentile(y_scale, self.method_kwargs.get("upper", 0.75) * 100, axis=-1)
		@@ -647,7 +646,7 @@ class EncoderNormalizer(TorchNormalizer):

		* None (default): No transformation of values
		* log: Estimate in log-space leading to a multiplicative model
		* logp1: Estimate in log-space but add 1 to values before transforming for stability
		* log1p: Estimate in log-space but add 1 to values before transforming for stability
		(e.g. if many small values <<1 are present).
		Note, that inverse transform is still only `torch.exp()` and not `torch.expm1()`.
		* logit: Apply logit transformation on values that are between 0 and 1
		@@ -754,7 +753,7 @@ class GroupNormalizer(TorchNormalizer):

		* None (default): No transformation of values
		* log: Estimate in log-space leading to a multiplicative model
		* logp1: Estimate in log-space but add 1 to values before transforming for stability
		* log1p: Estimate in log-space but add 1 to values before transforming for stability
		(e.g. if many small values <<1 are present).
		Note, that inverse transform is still only `torch.exp()` and not `torch.expm1()`.
		* logit: Apply logit transformation on values that are between 0 and 1
		@@ -785,7 +784,7 @@ class GroupNormalizer(TorchNormalizer):
		self
		"""
		y = self.preprocess(y)
		eps = np.finfo(np.float).eps
		eps = np.finfo(np.float16).eps
		if len(self.groups) == 0:
		assert not self.scale_by_group, "No groups are defined, i.e. `scale_by_group=[]`"
		if self.method == "standard":
		@@ -953,10 +952,13 @@ class GroupNormalizer(TorchNormalizer):
		Union[np.ndarray, Tuple[np.ndarray, np.ndarray]]: Scaled data, if ``return_norm=True``, returns also scales
		as second element
		"""
		# # check if arguments are wrong way round
		if isinstance(y, pd.DataFrame) and not isinstance(X, pd.DataFrame):
		raise ValueError("X and y is in wrong positions")
		if target_scale is None:
		assert X is not None, "either target_scale or X has to be passed"
		target_scale = self.get_norm(X)
		return super().transform(y=y, return_norm=return_norm, target_scale=target_scale)
		return super().transform(y, return_norm=return_norm, target_scale=target_scale)

		def get_parameters(self, groups: Union[torch.Tensor, list, tuple], group_names: List[str] = None) -> np.ndarray:
		"""
		@@ -1060,7 +1062,7 @@ class MultiNormalizer(TorchNormalizer):

		for idx, normalizer in enumerate(self.normalizers):
		if isinstance(normalizer, GroupNormalizer):
		normalizer.fit(y[:, idx], X=X)
		normalizer.fit(y[:, idx], X)
		else:
		normalizer.fit(y[:, idx])

		@@ -1119,7 +1121,7 @@ class MultiNormalizer(TorchNormalizer):
		else:
		scale = None
		if isinstance(normalizer, GroupNormalizer):
		r = normalizer.transform(y[idx], X=X, return_norm=return_norm, target_scale=scale)
		r = normalizer.transform(y[idx], X, return_norm=return_norm, target_scale=scale)
		else:
		r = normalizer.transform(y[idx], return_norm=return_norm, target_scale=scale)
		res.append(r)

pf/data/samplers.py

100755 → 100644

+8 −38

Original line number	Diff line number	Diff line
		@@ -9,37 +9,6 @@ from sklearn.utils import shuffle
		import torch
		from torch.utils.data.sampler import Sampler

		class MultiSeqsSampler(Sampler):
		"""
		(Testing) Samples mini-batch randomly from multiple sequences of the TOPAZ data
		Every Sampler subclass has to provide an :meth:`__iter__` method, providing a
		way to iterate over indices of dataset elements, and a :meth:`__len__` method
		that returns the length of the returned iterators.
		"""
		def __init__(self,
		data_source, ## (TimeSeriesDataSet) which should include 2 or more sequences
		batch_size: int = 64,
		shuffle: bool = False,
		drop_last: bool = False,
		):
		self.data_source = data_source
		self.batch_size = batch_size
		self.drop_last = drop_last
		self.shuffle = shuffle

		### do something here


		### overwrite the def __iter__(self) -> Iterator[]
		def __iter__(self):
		# do something
		pass
		# return

		### overwrite the def __len__(self) method
		def __len__(self) -> int:
		return len(self.data_source)


		class GroupedSampler(Sampler):
		"""
		@@ -51,7 +20,7 @@ class GroupedSampler(Sampler):

		def __init__(
		self,
		data_source,
		sampler: Sampler,
		batch_size: int = 64,
		shuffle: bool = False,
		drop_last: bool = False,
		@@ -60,7 +29,7 @@ class GroupedSampler(Sampler):
		Initialize.

		Args:
		data_source (TimeSeriesDataSet): timeseries dataset.
		sampler (Sampler or Iterable): Base sampler. Can be any iterable object
		drop_last (bool): if to drop last mini-batch from a group if it is smaller than batch_size.
		Defaults to False.
		shuffle (bool): if to shuffle dataset. Defaults to False.
		@@ -77,20 +46,20 @@ class GroupedSampler(Sampler):
		)
		if not isinstance(drop_last, bool):
		raise ValueError("drop_last should be a boolean value, but got " "drop_last={}".format(drop_last))
		self.data_source = data_source
		self.sampler = sampler
		self.batch_size = batch_size
		self.drop_last = drop_last
		self.shuffle = shuffle
		# make groups and construct new index to sample from
		groups = self.get_groups(self.data_source)
		groups = self.get_groups(self.sampler)
		self.construct_batch_groups(groups)

		def get_groups(self, data_source):
		def get_groups(self, sampler: Sampler):
		"""
		Create the groups which can be sampled.

		Args:
		data_source (TimeSeriesDataSet): timeseries dataset.
		sampler (Sampler): will have attribute data_source which is of type TimeSeriesDataSet.

		Returns:
		dict-like: dictionary-like object with data_source.index as values and group names as keys
		@@ -153,7 +122,8 @@ class TimeSynchronizedBatchSampler(GroupedSampler):
		This sampler does not support missing values in the dataset.
		"""

		def get_groups(self, data_source):
		def get_groups(self, sampler: Sampler):
		data_source = sampler.data_source
		index = data_source.index
		# get groups, i.e. group all samples by first predict time
		last_time = data_source.data["time"][index["index_end"].to_numpy()].numpy()

pf/metrics/_mqf2_utils.py

100755 → 100644

+0 −1

Original line number	Diff line number	Diff line
		@@ -264,7 +264,6 @@ class MQF2Distribution(Distribution):
		threshold_input: float = 100.0,
		validate_args: bool = False,
		) -> None:

		self.picnn = picnn
		self.hidden_state = hidden_state
		self.prediction_length = prediction_length