Commit 79987307 authored by Ferreira Da Silva, Rafael's avatar Ferreira Da Silva, Rafael
Browse files

migrating to ROCm 6.0.0

parent 7f5748b2
Loading
Loading
Loading
Loading
+3 −3
Original line number Diff line number Diff line
module load PrgEnv-gnu
module load gcc/10.3.0
module load rocm/5.2.0
ROOT=REPLACE_PWD/miniconda3
module load rocm/6.0.0
ROOT=/lustre/orion/world-shared/stf053/olcf-6-benchmark/miniconda3
export PATH=${ROOT}/bin:$PATH
source ${ROOT}/etc/profile.d/conda.sh
conda activate ${ROOT}/../topaz_env
module load python/3.10-miniforge3
module load miniforge3/23.11.0

pf/data/__init__.py

100755 → 100644
+11 −0
Original line number Diff line number Diff line
@@ -4,6 +4,16 @@ Datasets, etc. for timeseries data.
Handling timeseries data is not trivial. It requires special treatment. This sub-package provides the necessary tools
to abstracts the necessary work.
"""
#from pytorch_forecasting.data.encoders import (
#    EncoderNormalizer,
#    GroupNormalizer,
#    MultiNormalizer,
#    NaNLabelEncoder,
#    TorchNormalizer,
#)
#from pytorch_forecasting.data.samplers import TimeSynchronizedBatchSampler
#from pytorch_forecasting.data.timeseries import TimeSeriesDataSet

from pf.data.encoders import (
    EncoderNormalizer,
    GroupNormalizer,
@@ -11,6 +21,7 @@ from pf.data.encoders import (
    NaNLabelEncoder,
    TorchNormalizer,
)

from pf.data.samplers import TimeSynchronizedBatchSampler
from pf.data.timeseries import TimeSeriesDataSet

pf/data/encoders.py

100755 → 100644
+18 −16
Original line number Diff line number Diff line
@@ -21,7 +21,7 @@ from torch.distributions.transforms import (
import torch.nn.functional as F
from torch.nn.utils import rnn

from pf.utils import InitialParameterRepresenterMixIn
from pytorch_forecasting.utils import InitialParameterRepresenterMixIn


def _plus_one(x):
@@ -80,7 +80,6 @@ class SoftplusTransform(Transform):


class Expm1Transform(ExpTransform):

    codomain = constraints.greater_than_eq(-1.0)

    def _call(self, x):
@@ -254,7 +253,9 @@ class NaNLabelEncoder(InitialParameterRepresenterMixIn, BaseEstimator, Transform
        Returns:
            bool: True if series is numeric
        """
        return y.dtype.kind in "bcif" or (isinstance(y, pd.CategoricalDtype) and y.cat.categories.dtype.kind in "bcif")
        return y.dtype.kind in "bcif" or (
            isinstance(y.dtype, pd.CategoricalDtype) and y.cat.categories.dtype.kind in "bcif"
        )

    def fit(self, y: pd.Series, overwrite: bool = False):
        """
@@ -413,7 +414,7 @@ class TorchNormalizer(InitialParameterRepresenterMixIn, BaseEstimator, Transform

                * None (default): No transformation of values
                * log: Estimate in log-space leading to a multiplicative model
                * logp1: Estimate in log-space but add 1 to values before transforming for stability
                * log1p: Estimate in log-space but add 1 to values before transforming for stability
                  (e.g. if many small values <<1 are present).
                  Note, that inverse transform is still only `torch.exp()` and not `torch.expm1()`.
                * logit: Apply logit transformation on values that are between 0 and 1
@@ -468,7 +469,7 @@ class TorchNormalizer(InitialParameterRepresenterMixIn, BaseEstimator, Transform
        if isinstance(y_center, torch.Tensor):
            eps = torch.finfo(y_center.dtype).eps
        else:
            eps = np.finfo(np.float).eps
            eps = np.finfo(np.float16).eps
        if self.method == "identity":
            if isinstance(y_center, torch.Tensor):
                self.center_ = torch.zeros(y_center.size()[:-1])
@@ -499,11 +500,9 @@ class TorchNormalizer(InitialParameterRepresenterMixIn, BaseEstimator, Transform

        elif self.method == "robust":
            if isinstance(y_center, torch.Tensor):
                self.center_ = y_center.kthvalue(
                    int(len(y_center) * self.method_kwargs.get("center", 0.5)), dim=-1
                ).values
                q_75 = y_scale.kthvalue(int(len(y_scale) * self.method_kwargs.get("upper", 0.75)), dim=-1).values
                q_25 = y_scale.kthvalue(int(len(y_scale) * self.method_kwargs.get("lower", 0.25)), dim=-1).values
                self.center_ = y_center.quantile(self.method_kwargs.get("center", 0.5), dim=-1)
                q_75 = y_scale.quantile(self.method_kwargs.get("upper", 0.75), dim=-1)
                q_25 = y_scale.quantile(self.method_kwargs.get("lower", 0.25), dim=-1)
            elif isinstance(y_center, np.ndarray):
                self.center_ = np.percentile(y_center, self.method_kwargs.get("center", 0.5) * 100, axis=-1)
                q_75 = np.percentile(y_scale, self.method_kwargs.get("upper", 0.75) * 100, axis=-1)
@@ -647,7 +646,7 @@ class EncoderNormalizer(TorchNormalizer):

                * None (default): No transformation of values
                * log: Estimate in log-space leading to a multiplicative model
                * logp1: Estimate in log-space but add 1 to values before transforming for stability
                * log1p: Estimate in log-space but add 1 to values before transforming for stability
                    (e.g. if many small values <<1 are present).
                    Note, that inverse transform is still only `torch.exp()` and not `torch.expm1()`.
                * logit: Apply logit transformation on values that are between 0 and 1
@@ -754,7 +753,7 @@ class GroupNormalizer(TorchNormalizer):

                * None (default): No transformation of values
                * log: Estimate in log-space leading to a multiplicative model
                * logp1: Estimate in log-space but add 1 to values before transforming for stability
                * log1p: Estimate in log-space but add 1 to values before transforming for stability
                    (e.g. if many small values <<1 are present).
                    Note, that inverse transform is still only `torch.exp()` and not `torch.expm1()`.
                * logit: Apply logit transformation on values that are between 0 and 1
@@ -785,7 +784,7 @@ class GroupNormalizer(TorchNormalizer):
            self
        """
        y = self.preprocess(y)
        eps = np.finfo(np.float).eps
        eps = np.finfo(np.float16).eps
        if len(self.groups) == 0:
            assert not self.scale_by_group, "No groups are defined, i.e. `scale_by_group=[]`"
            if self.method == "standard":
@@ -953,10 +952,13 @@ class GroupNormalizer(TorchNormalizer):
            Union[np.ndarray, Tuple[np.ndarray, np.ndarray]]: Scaled data, if ``return_norm=True``, returns also scales
                as second element
        """
        # # check if arguments are wrong way round
        if isinstance(y, pd.DataFrame) and not isinstance(X, pd.DataFrame):
            raise ValueError("X and y is in wrong positions")
        if target_scale is None:
            assert X is not None, "either target_scale or X has to be passed"
            target_scale = self.get_norm(X)
        return super().transform(y=y, return_norm=return_norm, target_scale=target_scale)
        return super().transform(y, return_norm=return_norm, target_scale=target_scale)

    def get_parameters(self, groups: Union[torch.Tensor, list, tuple], group_names: List[str] = None) -> np.ndarray:
        """
@@ -1060,7 +1062,7 @@ class MultiNormalizer(TorchNormalizer):

        for idx, normalizer in enumerate(self.normalizers):
            if isinstance(normalizer, GroupNormalizer):
                normalizer.fit(y[:, idx], X=X)
                normalizer.fit(y[:, idx], X)
            else:
                normalizer.fit(y[:, idx])

@@ -1119,7 +1121,7 @@ class MultiNormalizer(TorchNormalizer):
            else:
                scale = None
            if isinstance(normalizer, GroupNormalizer):
                r = normalizer.transform(y[idx], X=X, return_norm=return_norm, target_scale=scale)
                r = normalizer.transform(y[idx], X, return_norm=return_norm, target_scale=scale)
            else:
                r = normalizer.transform(y[idx], return_norm=return_norm, target_scale=scale)
            res.append(r)

pf/data/samplers.py

100755 → 100644
+8 −38
Original line number Diff line number Diff line
@@ -9,37 +9,6 @@ from sklearn.utils import shuffle
import torch
from torch.utils.data.sampler import Sampler

class MultiSeqsSampler(Sampler):
    """
    (Testing) Samples mini-batch randomly from multiple sequences of the TOPAZ data 
    Every Sampler subclass has to provide an :meth:`__iter__` method, providing a
    way to iterate over indices of dataset elements, and a :meth:`__len__` method
    that returns the length of the returned iterators.
    """
    def __init__(self,
        data_source,   ## (TimeSeriesDataSet) which should include 2 or more sequences
        batch_size: int = 64,
        shuffle: bool = False,
        drop_last: bool = False,
        ):
        self.data_source = data_source
        self.batch_size = batch_size
        self.drop_last = drop_last
        self.shuffle = shuffle

        ### do something here


        ### overwrite the def __iter__(self) -> Iterator[]
        def __iter__(self):
            # do something
            pass
            # return

        ### overwrite the def __len__(self) method
        def __len__(self) -> int:
            return len(self.data_source)


class GroupedSampler(Sampler):
    """
@@ -51,7 +20,7 @@ class GroupedSampler(Sampler):

    def __init__(
        self,
        data_source,
        sampler: Sampler,
        batch_size: int = 64,
        shuffle: bool = False,
        drop_last: bool = False,
@@ -60,7 +29,7 @@ class GroupedSampler(Sampler):
        Initialize.

        Args:
            data_source (TimeSeriesDataSet): timeseries dataset.
            sampler (Sampler or Iterable): Base sampler. Can be any iterable object
            drop_last (bool): if to drop last mini-batch from a group if it is smaller than batch_size.
                Defaults to False.
            shuffle (bool): if to shuffle dataset. Defaults to False.
@@ -77,20 +46,20 @@ class GroupedSampler(Sampler):
            )
        if not isinstance(drop_last, bool):
            raise ValueError("drop_last should be a boolean value, but got " "drop_last={}".format(drop_last))
        self.data_source = data_source
        self.sampler = sampler
        self.batch_size = batch_size
        self.drop_last = drop_last
        self.shuffle = shuffle
        # make groups and construct new index to sample from
        groups = self.get_groups(self.data_source)
        groups = self.get_groups(self.sampler)
        self.construct_batch_groups(groups)

    def get_groups(self, data_source):
    def get_groups(self, sampler: Sampler):
        """
        Create the groups which can be sampled.

        Args:
            data_source (TimeSeriesDataSet): timeseries dataset.
            sampler (Sampler): will have attribute data_source which is of type TimeSeriesDataSet.

        Returns:
            dict-like: dictionary-like object with data_source.index as values and group names as keys
@@ -153,7 +122,8 @@ class TimeSynchronizedBatchSampler(GroupedSampler):
    This sampler does not support missing values in the dataset.
    """

    def get_groups(self, data_source):
    def get_groups(self, sampler: Sampler):
        data_source = sampler.data_source
        index = data_source.index
        # get groups, i.e. group all samples by first predict time
        last_time = data_source.data["time"][index["index_end"].to_numpy()].numpy()

pf/metrics/_mqf2_utils.py

100755 → 100644
+0 −1
Original line number Diff line number Diff line
@@ -264,7 +264,6 @@ class MQF2Distribution(Distribution):
        threshold_input: float = 100.0,
        validate_args: bool = False,
    ) -> None:

        self.picnn = picnn
        self.hidden_state = hidden_state
        self.prediction_length = prediction_length
Loading