Unverified Commit 46d980b6 authored by Duggan, John's avatar Duggan, John Committed by GitHub
Browse files

Merge pull request #131 from nova-model/70-add-component-for-selecting-datafiles-from-oncat

Implement ONCat backend for NeutronDataSelector
parents 44f574e2 45a5ddf9
Loading
Loading
Loading
Loading
Loading
+4 −0
Original line number Diff line number Diff line
### nova-trame, 0.26.0

* Added data_source and projection parameters to NeutronDataSelector to allow populating data files from ONCat (thanks to Andrew Ayres and John Duggan).

### nova-trame, 0.25.5

* NeutronDataSelector will no longer show duplicates of a file that matches multiple extensions (thanks to John Duggan).
+58 −5
Original line number Diff line number Diff line
@@ -252,7 +252,7 @@ version = "2025.7.14"
description = "Python package for providing Mozilla's CA Bundle."
optional = false
python-versions = ">=3.7"
groups = ["dev"]
groups = ["main", "dev"]
files = [
    {file = "certifi-2025.7.14-py3-none-any.whl", hash = "sha256:6b31f564a415d79ee77df69d757bb49a5bb53bd9f756cbbe24394ffd6fc1f4b2"},
    {file = "certifi-2025.7.14.tar.gz", hash = "sha256:8ea99dbdfaaf2ba2f9bac77b9249ef62ec5218e7c2b2e903378ed5fccf765995"},
@@ -357,7 +357,7 @@ version = "3.4.2"
description = "The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet."
optional = false
python-versions = ">=3.7"
groups = ["dev"]
groups = ["main", "dev"]
files = [
    {file = "charset_normalizer-3.4.2-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:7c48ed483eb946e6c04ccbe02c6b4d1d48e51944b6db70f697e089c193404941"},
    {file = "charset_normalizer-3.4.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b2d318c11350e10662026ad0eb71bb51c7812fc8590825304ae0bdd4ac283acd"},
@@ -1873,6 +1873,23 @@ files = [
    {file = "numpy-2.3.2.tar.gz", hash = "sha256:e0486a11ec30cdecb53f184d496d1c6a20786c81e55e41640270130056f8ee48"},
]

[[package]]
name = "oauthlib"
version = "3.3.1"
description = "A generic, spec-compliant, thorough implementation of the OAuth request-signing logic"
optional = false
python-versions = ">=3.8"
groups = ["main"]
files = [
    {file = "oauthlib-3.3.1-py3-none-any.whl", hash = "sha256:88119c938d2b8fb88561af5f6ee0eec8cc8d552b7bb1f712743136eb7523b7a1"},
    {file = "oauthlib-3.3.1.tar.gz", hash = "sha256:0f0f8aa759826a193cf66c12ea1af1637f87b9b4622d46e866952bb022e538c9"},
]

[package.extras]
rsa = ["cryptography (>=3.0.0)"]
signals = ["blinker (>=1.4.0)"]
signedtoken = ["cryptography (>=3.0.0)", "pyjwt (>=2.0.0,<3)"]

[[package]]
name = "orderly-set"
version = "5.5.0"
@@ -2484,6 +2501,23 @@ files = [
[package.extras]
windows-terminal = ["colorama (>=0.4.6)"]

[[package]]
name = "pyoncat"
version = "2.1"
description = "A Python Client for ONCat (the ORNL Neutron Catalog)."
optional = false
python-versions = "<4,>=3.7"
groups = ["main"]
files = [
    {file = "pyoncat-2.1-py3-none-any.whl", hash = "sha256:4bf742fca50ac5e10564c76fd226d50bb06a408e17e320fd4fb9e38cc98c0a28"},
    {file = "pyoncat-2.1.tar.gz", hash = "sha256:4b7ad0792833269aed5207ec42b7898a3e0b74860c69d0be585335b31982da72"},
]

[package.dependencies]
oauthlib = "*"
requests = "*"
requests-oauthlib = "*"

[[package]]
name = "pyparsing"
version = "3.2.3"
@@ -2649,7 +2683,7 @@ version = "2.32.4"
description = "Python HTTP for Humans."
optional = false
python-versions = ">=3.8"
groups = ["dev"]
groups = ["main", "dev"]
files = [
    {file = "requests-2.32.4-py3-none-any.whl", hash = "sha256:27babd3cda2a6d50b30443204ee89830707d396671944c998b5975b031ac2b2c"},
    {file = "requests-2.32.4.tar.gz", hash = "sha256:27d0316682c8a29834d3264820024b62a36942083d52caf2f14c0591336d3422"},
@@ -2665,6 +2699,25 @@ urllib3 = ">=1.21.1,<3"
socks = ["PySocks (>=1.5.6,!=1.5.7)"]
use-chardet-on-py3 = ["chardet (>=3.0.2,<6)"]

[[package]]
name = "requests-oauthlib"
version = "2.0.0"
description = "OAuthlib authentication support for Requests."
optional = false
python-versions = ">=3.4"
groups = ["main"]
files = [
    {file = "requests-oauthlib-2.0.0.tar.gz", hash = "sha256:b3dffaebd884d8cd778494369603a9e7b58d29111bf6b41bdc2dcd87203af4e9"},
    {file = "requests_oauthlib-2.0.0-py2.py3-none-any.whl", hash = "sha256:7dd8a5c40426b779b0868c404bdef9768deccf22749cde15852df527e6269b36"},
]

[package.dependencies]
oauthlib = ">=3.0.0"
requests = ">=2.0.0"

[package.extras]
rsa = ["oauthlib[signedtoken] (>=3.0.0)"]

[[package]]
name = "roman-numerals-py"
version = "3.1.0"
@@ -3476,7 +3529,7 @@ version = "2.5.0"
description = "HTTP library with thread-safe connection pooling, file post, and more."
optional = false
python-versions = ">=3.9"
groups = ["dev"]
groups = ["main", "dev"]
files = [
    {file = "urllib3-2.5.0-py3-none-any.whl", hash = "sha256:e6b01673c0fa6a13e374b50871808eb3bf7046c4b125b216f6bf1cc604cff0dc"},
    {file = "urllib3-2.5.0.tar.gz", hash = "sha256:3fc47733c7e419d4bc3f6b3dc2b4f890bb743906a30d56ba4a5bfa4bbff92760"},
@@ -3700,4 +3753,4 @@ propcache = ">=0.2.1"
[metadata]
lock-version = "2.1"
python-versions = ">=3.10,<4.0"
content-hash = "31876c7e35b3ec4a17f28052650688a71ff619845fd46bab640ada5eb521af5b"
content-hash = "52edc453ed1a32c2b714eabfef08b5aaf4697d15ed16c55e51f41338eb8f3342"
+2 −1
Original line number Diff line number Diff line
[project]
name = "nova-trame"
version = "0.25.5"
version = "0.26.0"
description = "A Python Package for injecting curated themes and custom components into Trame applications"
authors = [
    { name = "John Duggan", email = "dugganjw@ornl.gov" },
@@ -37,6 +37,7 @@ pydantic = "*"
nova-common = ">=0.2.2"
blinker = "^1.9.0"
natsort = "^8.4.0"
pyoncat = "^2.1"

[tool.poetry.group.dev.dependencies]
mypy = "^1.10.0"
+166 −0
Original line number Diff line number Diff line
"""Analysis cluster filesystem backend for NeutronDataSelector."""

import os
from pathlib import Path
from typing import Any, Dict, List, Optional
from warnings import warn

from natsort import natsorted
from pydantic import Field, model_validator
from typing_extensions import Self

from .neutron_data_selector import NeutronDataSelectorModel, NeutronDataSelectorState

CUSTOM_DIRECTORIES_LABEL = "Custom Directory"

INSTRUMENTS = {
    "HFIR": {
        "CG-1A": "CG1A",
        "DEV BEAM": "CG1B",
        "MARS": "CG1D",
        "GP-SANS": "CG2",
        "BIO-SANS": "CG3",
        "CNPDB": "CG4B",
        "CTAX": "CG4C",
        "IMAGINE": "CG4D",
        "PTAX": "HB1",
        "VERITAS": "HB1A",
        "POWDER": "HB2A",
        "HIDRA": "HB2B",
        "WAND²": "HB2C",
        "TAX": "HB3",
        "DEMAND": "HB3A",
        "NOWG": "NOWG",
        "NOWV": "NOWV",
    },
    "SNS": {
        "ARCS": "ARCS",
        "BL-0": "BL0",
        "BASIS": "BSS",
        "CNCS": "CNCS",
        "CORELLI": "CORELLI",
        "EQ-SANS": "EQSANS",
        "HYSPEC": "HYS",
        "MANDI": "MANDI",
        "NOMAD": "NOM",
        "NOWB": "NOWB",
        "NOWD": "NOWD",
        "NSE": "NSE",
        "POWGEN": "PG3",
        "LIQREF": "REF_L",
        "MAGREF": "REF_M",
        "SEQUOIA": "SEQ",
        "SNAP": "SNAP",
        "TOPAZ": "TOPAZ",
        "USANS": "USANS",
        "VENUS": "VENUS",
        "VISION": "VIS",
        "VULCAN": "VULCAN",
    },
}


class AnalysisDataSelectorState(NeutronDataSelectorState):
    """Selection state for identifying datafiles."""

    allow_custom_directories: bool = Field(default=False)
    custom_directory: str = Field(default="", title="Custom Directory")

    @model_validator(mode="after")
    def validate_state(self) -> Self:
        valid_facilities = self.get_facilities()
        if self.facility and self.facility not in valid_facilities:
            warn(
                f"Facility '{self.facility}' could not be found. Valid options: {valid_facilities}",
                stacklevel=1,
            )

        valid_instruments = self.get_instruments()
        if self.instrument and self.facility != CUSTOM_DIRECTORIES_LABEL and self.instrument not in valid_instruments:
            warn(
                (
                    f"Instrument '{self.instrument}' could not be found in '{self.facility}'. "
                    f"Valid options: {valid_instruments}"
                ),
                stacklevel=1,
            )
        # Validating the experiment is expensive and will fail in our CI due to the filesystem not being mounted there.

        return self

    def get_facilities(self) -> List[str]:
        facilities = list(INSTRUMENTS.keys())
        if self.allow_custom_directories:
            facilities.append(CUSTOM_DIRECTORIES_LABEL)
        return facilities

    def get_instruments(self) -> List[str]:
        return list(INSTRUMENTS.get(self.facility, {}).keys())


class AnalysisDataSelectorModel(NeutronDataSelectorModel):
    """Analysis cluster filesystem backend for NeutronDataSelector."""

    def __init__(self, state: AnalysisDataSelectorState) -> None:
        super().__init__(state)
        self.state: AnalysisDataSelectorState = state

    def set_binding_parameters(self, **kwargs: Any) -> None:
        super().set_binding_parameters(**kwargs)

        if "allow_custom_directories" in kwargs:
            self.state.allow_custom_directories = kwargs["allow_custom_directories"]

    def get_custom_directory_path(self) -> Optional[Path]:
        # Don't expose the full file system
        if not self.state.custom_directory:
            return None

        return Path(self.state.custom_directory)

    def get_experiment_directory_path(self) -> Optional[Path]:
        if not self.state.experiment:
            return None

        return Path("/") / self.state.facility / self.get_instrument_dir() / self.state.experiment

    def get_instrument_dir(self) -> str:
        return INSTRUMENTS.get(self.state.facility, {}).get(self.state.instrument, "")

    def get_experiments(self) -> List[str]:
        experiments = []

        instrument_path = Path("/") / self.state.facility / self.get_instrument_dir()
        try:
            for dirname in os.listdir(instrument_path):
                if dirname.startswith("IPTS-") and os.access(instrument_path / dirname, mode=os.R_OK):
                    experiments.append(dirname)
        except OSError:
            pass

        return natsorted(experiments)

    def get_directories(self, base_path: Optional[Path] = None) -> List[Dict[str, Any]]:
        using_custom_directory = self.state.facility == CUSTOM_DIRECTORIES_LABEL
        if base_path:
            pass
        elif using_custom_directory:
            base_path = self.get_custom_directory_path()
        else:
            base_path = self.get_experiment_directory_path()

        if not base_path:
            return []

        return self.get_directories_from_path(base_path)

    def get_datafiles(self, *args: Any, **kwargs: Any) -> List[Any]:
        using_custom_directory = self.state.facility == CUSTOM_DIRECTORIES_LABEL
        if self.state.experiment:
            base_path = Path("/") / self.state.facility / self.get_instrument_dir() / self.state.experiment
        elif using_custom_directory and self.state.custom_directory:
            base_path = Path(self.state.custom_directory)
        else:
            return []

        return [{"path": path} for path in self.get_datafiles_from_path(base_path)]
+8 −129
Original line number Diff line number Diff line
"""Model implementation for DataSelector."""

import os
from pathlib import Path
from typing import Any, Dict, List, Optional
from warnings import warn

from natsort import natsorted
from pydantic import Field, field_validator, model_validator
from typing_extensions import Self
from pydantic import Field, field_validator

from ..data_selector import DataSelectorModel, DataSelectorState

CUSTOM_DIRECTORIES_LABEL = "Custom Directory"

INSTRUMENTS = {
    "HFIR": {
        "CG-1A": "CG1A",
        "CG-1B": "CG1B",
        "CG-1D": "CG1D",
        "CG-2": "CG2",
        "CG-3": "CG3",
        "CG-4B": "CG4B",
        "CG-4C": "CG4C",
        "CG-4D": "CG4D",
        "HB-1": "HB1",
        "HB-1A": "HB1A",
        "HB-2A": "HB2A",
        "HB-2B": "HB2B",
        "HB-2C": "HB2C",
        "HB-3": "HB3",
        "HB-3A": "HB3A",
        "NOW-G": "NOWG",
        "NOW-V": "NOWV",
    },
    "SNS": {
        "BL-18": "ARCS",
        "BL-0": "BL0",
        "BL-2": "BSS",
        "BL-5": "CNCS",
        "BL-9": "CORELLI",
        "BL-6": "EQSANS",
        "BL-14B": "HYS",
        "BL-11B": "MANDI",
        "BL-1B": "NOM",
        "NOW-G": "NOWG",
        "BL-15": "NSE",
        "BL-11A": "PG3",
        "BL-4B": "REF_L",
        "BL-4A": "REF_M",
        "BL-17": "SEQ",
        "BL-3": "SNAP",
        "BL-12": "TOPAZ",
        "BL-1A": "USANS",
        "BL-10": "VENUS",
        "BL-16B": "VIS",
        "BL-7": "VULCAN",
    },
}


class NeutronDataSelectorState(DataSelectorState):
    """Selection state for identifying datafiles."""

    allow_custom_directories: bool = Field(default=False)
    facility: str = Field(default="", title="Facility")
    instrument: str = Field(default="", title="Instrument")
    experiment: str = Field(default="", title="Experiment")
    custom_directory: str = Field(default="", title="Custom Directory")

    @field_validator("experiment", mode="after")
    @classmethod
@@ -75,33 +23,11 @@ class NeutronDataSelectorState(DataSelectorState):
            raise ValueError("experiment must begin with IPTS-")
        return experiment

    @model_validator(mode="after")
    def validate_state(self) -> Self:
        valid_facilities = self.get_facilities()
        if self.facility and self.facility not in valid_facilities:
            warn(f"Facility '{self.facility}' could not be found. Valid options: {valid_facilities}", stacklevel=1)

        valid_instruments = self.get_instruments()
        if self.instrument and self.facility != CUSTOM_DIRECTORIES_LABEL and self.instrument not in valid_instruments:
            warn(
                (
                    f"Instrument '{self.instrument}' could not be found in '{self.facility}'. "
                    f"Valid options: {valid_instruments}"
                ),
                stacklevel=1,
            )
        # Validating the experiment is expensive and will fail in our CI due to the filesystem not being mounted there.

        return self

    def get_facilities(self) -> List[str]:
        facilities = list(INSTRUMENTS.keys())
        if self.allow_custom_directories:
            facilities.append(CUSTOM_DIRECTORIES_LABEL)
        return facilities
        raise NotImplementedError()

    def get_instruments(self) -> List[str]:
        return list(INSTRUMENTS.get(self.facility, {}).keys())
        raise NotImplementedError()


class NeutronDataSelectorModel(DataSelectorModel):
@@ -120,65 +46,18 @@ class NeutronDataSelectorModel(DataSelectorModel):
            self.state.instrument = kwargs["instrument"]
        if "experiment" in kwargs:
            self.state.experiment = kwargs["experiment"]
        if "allow_custom_directories" in kwargs:
            self.state.allow_custom_directories = kwargs["allow_custom_directories"]

    def get_facilities(self) -> List[str]:
        return natsorted(self.state.get_facilities())

    def get_instrument_dir(self) -> str:
        return INSTRUMENTS.get(self.state.facility, {}).get(self.state.instrument, "")

    def get_instruments(self) -> List[str]:
        return natsorted(self.state.get_instruments())

    def get_experiments(self) -> List[str]:
        experiments = []

        instrument_path = Path("/") / self.state.facility / self.get_instrument_dir()
        try:
            for dirname in os.listdir(instrument_path):
                if dirname.startswith("IPTS-") and os.access(instrument_path / dirname, mode=os.R_OK):
                    experiments.append(dirname)
        except OSError:
            pass

        return natsorted(experiments)

    def get_experiment_directory_path(self) -> Optional[Path]:
        if not self.state.experiment:
            return None

        return Path("/") / self.state.facility / self.get_instrument_dir() / self.state.experiment

    def get_custom_directory_path(self) -> Optional[Path]:
        # Don't expose the full file system
        if not self.state.custom_directory:
            return None

        return Path(self.state.custom_directory)
        raise NotImplementedError()

    def get_directories(self, base_path: Optional[Path] = None) -> List[Dict[str, Any]]:
        using_custom_directory = self.state.facility == CUSTOM_DIRECTORIES_LABEL
        if base_path:
            pass
        elif using_custom_directory:
            base_path = self.get_custom_directory_path()
        else:
            base_path = self.get_experiment_directory_path()

        if not base_path:
            return []

        return self.get_directories_from_path(base_path)

    def get_datafiles(self) -> List[str]:
        using_custom_directory = self.state.facility == CUSTOM_DIRECTORIES_LABEL
        if self.state.experiment:
            base_path = Path("/") / self.state.facility / self.get_instrument_dir() / self.state.experiment
        elif using_custom_directory and self.state.custom_directory:
            base_path = Path(self.state.custom_directory)
        else:
            return []

        return self.get_datafiles_from_path(base_path)
        raise NotImplementedError()

    def get_datafiles(self, *args: Any, **kwargs: Any) -> List[str]:
        raise NotImplementedError()
Loading