Loading .dockerignore +0 −1 Original line number Diff line number Diff line dockerfiles docs/examples/multiple_inputs.rst +1 −4 Original line number Diff line number Diff line Loading @@ -7,7 +7,7 @@ This example shows how to run a tool that takes multiple datasets as input. .. code-block:: python from nova.galaxy import Nova, Dataset, Tool, Parameters, upload_datasets from nova.galaxy import Nova, Dataset, Tool, Parameters galaxy_url = "your_galaxy_url" galaxy_key = "your_galaxy_api_key" Loading @@ -20,9 +20,6 @@ This example shows how to run a tool that takes multiple datasets as input. dataset1 = Dataset("path/to/file1.txt", name="File 1") dataset2 = Dataset("path/to/file2.txt", name="File 2") # Upload multiple datasets in parallel upload_datasets(data_store, {"input1": dataset1, "input2": dataset2}) # Define parameters, using the uploaded datasets params = Parameters() params.add_input("input1", dataset1) Loading pyproject.toml +1 −1 Original line number Diff line number Diff line [tool.poetry] name = "nova-galaxy" version = "0.8.2" version = "0.9.0" description = "Utilties for accessing the ORNL Galaxy instance" authors = ["Greg Watson <watsongr@ornl.gov>", "Gregory Cage <cagege@ornl.gov>"] readme = "README.md" Loading src/nova/galaxy/__init__.py +1 −2 Original line number Diff line number Diff line Loading @@ -2,7 +2,7 @@ import importlib.metadata from .connection import Connection from .data_store import Datastore from .dataset import Dataset, DatasetCollection, upload_datasets from .dataset import Dataset, DatasetCollection from .outputs import Outputs from .parameters import Parameters from .tool import Tool Loading @@ -13,7 +13,6 @@ __all__ = [ "Datastore", "Dataset", "DatasetCollection", "upload_datasets", "Outputs", "Parameters", "Tool", Loading src/nova/galaxy/dataset.py +19 −26 Original line number Diff line number Diff line Loading @@ -7,7 +7,7 @@ as well as output data from Galaxy tools. from abc import ABC, abstractmethod from enum import Enum from pathlib import Path from typing import TYPE_CHECKING, Any, Dict, Optional, Union from typing import TYPE_CHECKING, Any, Optional, Union from bioblend.galaxy.dataset_collections import DatasetCollectionClient from bioblend.galaxy.datasets import DatasetClient Loading Loading @@ -47,6 +47,7 @@ class AbstractData(ABC): super().__init__() self.path: str = "" self.id: Union[str, None] = "" self.name: str = "" self.store: Union[None, "Datastore"] = None @abstractmethod Loading Loading @@ -79,19 +80,32 @@ class Dataset(AbstractData): self.file_type: str = Path(path).suffix self._content: Any = None def upload(self, store: "Datastore") -> None: def upload(self, store: "Datastore", name: Optional[str] = None) -> None: """Uploads this dataset to the data store given. This method will automatically set the id, and store class variables for future use. Parameters ---------- store: Datastore The data store to upload this dataset to. name: Optional[str] The name that will be used for the dataset upstream. Defaults to the local name. """ galaxy_instance = store.nova_connection.galaxy_instance dataset_client = DatasetClient(galaxy_instance) history_id = galaxy_instance.histories.get_histories(name=store.name)[0]["id"] if name: file_name = name else: file_name = self.name if self._content: dataset_id = galaxy_instance.tools.paste_content(content=self._content, history_id=history_id) dataset_info = galaxy_instance.tools.paste_content( content=self._content, history_id=history_id, file_name=file_name ) else: dataset_id = galaxy_instance.tools.upload_file(path=self.path, history_id=history_id) self.id = dataset_id["outputs"][0]["id"] dataset_info = galaxy_instance.tools.upload_file(path=self.path, history_id=history_id, file_name=file_name) self.id = dataset_info["outputs"][0]["id"] self.store = store dataset_client.wait_for_dataset(self.id) Loading Loading @@ -168,24 +182,3 @@ class DatasetCollection(AbstractData): return info["elements"] else: raise Exception("Dataset collection is not present in Galaxy.") def upload_datasets(store: "Datastore", datasets: Dict[str, AbstractData]) -> Dict[str, str]: """Helper method to upload multiple datasets or collections in parallel.""" galaxy_instance = store.nova_connection.galaxy_instance dataset_client = DatasetClient(galaxy_instance) history_id = galaxy_instance.histories.get_histories(name=store.name)[0]["id"] dataset_ids = {} for name, dataset in datasets.items(): if len(dataset.path) < 1 and dataset.get_content(): dataset_info = galaxy_instance.tools.paste_content( content=str(dataset.get_content()), history_id=history_id ) else: dataset_info = galaxy_instance.tools.upload_file(path=dataset.path, history_id=history_id) dataset_ids[name] = dataset_info["outputs"][0]["id"] dataset.id = dataset_info["outputs"][0]["id"] dataset.store = store for dataset_output in dataset_ids.values(): dataset_client.wait_for_dataset(dataset_output) return dataset_ids Loading
docs/examples/multiple_inputs.rst +1 −4 Original line number Diff line number Diff line Loading @@ -7,7 +7,7 @@ This example shows how to run a tool that takes multiple datasets as input. .. code-block:: python from nova.galaxy import Nova, Dataset, Tool, Parameters, upload_datasets from nova.galaxy import Nova, Dataset, Tool, Parameters galaxy_url = "your_galaxy_url" galaxy_key = "your_galaxy_api_key" Loading @@ -20,9 +20,6 @@ This example shows how to run a tool that takes multiple datasets as input. dataset1 = Dataset("path/to/file1.txt", name="File 1") dataset2 = Dataset("path/to/file2.txt", name="File 2") # Upload multiple datasets in parallel upload_datasets(data_store, {"input1": dataset1, "input2": dataset2}) # Define parameters, using the uploaded datasets params = Parameters() params.add_input("input1", dataset1) Loading
pyproject.toml +1 −1 Original line number Diff line number Diff line [tool.poetry] name = "nova-galaxy" version = "0.8.2" version = "0.9.0" description = "Utilties for accessing the ORNL Galaxy instance" authors = ["Greg Watson <watsongr@ornl.gov>", "Gregory Cage <cagege@ornl.gov>"] readme = "README.md" Loading
src/nova/galaxy/__init__.py +1 −2 Original line number Diff line number Diff line Loading @@ -2,7 +2,7 @@ import importlib.metadata from .connection import Connection from .data_store import Datastore from .dataset import Dataset, DatasetCollection, upload_datasets from .dataset import Dataset, DatasetCollection from .outputs import Outputs from .parameters import Parameters from .tool import Tool Loading @@ -13,7 +13,6 @@ __all__ = [ "Datastore", "Dataset", "DatasetCollection", "upload_datasets", "Outputs", "Parameters", "Tool", Loading
src/nova/galaxy/dataset.py +19 −26 Original line number Diff line number Diff line Loading @@ -7,7 +7,7 @@ as well as output data from Galaxy tools. from abc import ABC, abstractmethod from enum import Enum from pathlib import Path from typing import TYPE_CHECKING, Any, Dict, Optional, Union from typing import TYPE_CHECKING, Any, Optional, Union from bioblend.galaxy.dataset_collections import DatasetCollectionClient from bioblend.galaxy.datasets import DatasetClient Loading Loading @@ -47,6 +47,7 @@ class AbstractData(ABC): super().__init__() self.path: str = "" self.id: Union[str, None] = "" self.name: str = "" self.store: Union[None, "Datastore"] = None @abstractmethod Loading Loading @@ -79,19 +80,32 @@ class Dataset(AbstractData): self.file_type: str = Path(path).suffix self._content: Any = None def upload(self, store: "Datastore") -> None: def upload(self, store: "Datastore", name: Optional[str] = None) -> None: """Uploads this dataset to the data store given. This method will automatically set the id, and store class variables for future use. Parameters ---------- store: Datastore The data store to upload this dataset to. name: Optional[str] The name that will be used for the dataset upstream. Defaults to the local name. """ galaxy_instance = store.nova_connection.galaxy_instance dataset_client = DatasetClient(galaxy_instance) history_id = galaxy_instance.histories.get_histories(name=store.name)[0]["id"] if name: file_name = name else: file_name = self.name if self._content: dataset_id = galaxy_instance.tools.paste_content(content=self._content, history_id=history_id) dataset_info = galaxy_instance.tools.paste_content( content=self._content, history_id=history_id, file_name=file_name ) else: dataset_id = galaxy_instance.tools.upload_file(path=self.path, history_id=history_id) self.id = dataset_id["outputs"][0]["id"] dataset_info = galaxy_instance.tools.upload_file(path=self.path, history_id=history_id, file_name=file_name) self.id = dataset_info["outputs"][0]["id"] self.store = store dataset_client.wait_for_dataset(self.id) Loading Loading @@ -168,24 +182,3 @@ class DatasetCollection(AbstractData): return info["elements"] else: raise Exception("Dataset collection is not present in Galaxy.") def upload_datasets(store: "Datastore", datasets: Dict[str, AbstractData]) -> Dict[str, str]: """Helper method to upload multiple datasets or collections in parallel.""" galaxy_instance = store.nova_connection.galaxy_instance dataset_client = DatasetClient(galaxy_instance) history_id = galaxy_instance.histories.get_histories(name=store.name)[0]["id"] dataset_ids = {} for name, dataset in datasets.items(): if len(dataset.path) < 1 and dataset.get_content(): dataset_info = galaxy_instance.tools.paste_content( content=str(dataset.get_content()), history_id=history_id ) else: dataset_info = galaxy_instance.tools.upload_file(path=dataset.path, history_id=history_id) dataset_ids[name] = dataset_info["outputs"][0]["id"] dataset.id = dataset_info["outputs"][0]["id"] dataset.store = store for dataset_output in dataset_ids.values(): dataset_client.wait_for_dataset(dataset_output) return dataset_ids