Commit 47ac47aa authored by Cage, Gregory's avatar Cage, Gregory
Browse files

Merge branch '30-job-canceling-improvement' into 'main'

Job canceling and dataset enhancements

Closes #30

See merge request ndip/public-packages/nova-galaxy!22
parents 34abb071 1b963dfa
Loading
Loading
Loading
Loading
Loading
+0 −1
Original line number Diff line number Diff line
dockerfiles
+1 −4
Original line number Diff line number Diff line
@@ -7,7 +7,7 @@ This example shows how to run a tool that takes multiple datasets as input.

.. code-block:: python

   from nova.galaxy import Nova, Dataset, Tool, Parameters, upload_datasets
   from nova.galaxy import Nova, Dataset, Tool, Parameters

   galaxy_url = "your_galaxy_url"
   galaxy_key = "your_galaxy_api_key"
@@ -20,9 +20,6 @@ This example shows how to run a tool that takes multiple datasets as input.
       dataset1 = Dataset("path/to/file1.txt", name="File 1")
       dataset2 = Dataset("path/to/file2.txt", name="File 2")

       # Upload multiple datasets in parallel
       upload_datasets(data_store, {"input1": dataset1, "input2": dataset2})

       # Define parameters, using the uploaded datasets
       params = Parameters()
       params.add_input("input1", dataset1)
+1 −1
Original line number Diff line number Diff line
[tool.poetry]
name = "nova-galaxy"
version = "0.8.2"
version = "0.9.0"
description = "Utilties for accessing the ORNL Galaxy instance"
authors = ["Greg Watson <watsongr@ornl.gov>", "Gregory Cage <cagege@ornl.gov>"]
readme = "README.md"
+1 −2
Original line number Diff line number Diff line
@@ -2,7 +2,7 @@ import importlib.metadata

from .connection import Connection
from .data_store import Datastore
from .dataset import Dataset, DatasetCollection, upload_datasets
from .dataset import Dataset, DatasetCollection
from .outputs import Outputs
from .parameters import Parameters
from .tool import Tool
@@ -13,7 +13,6 @@ __all__ = [
    "Datastore",
    "Dataset",
    "DatasetCollection",
    "upload_datasets",
    "Outputs",
    "Parameters",
    "Tool",
+19 −26
Original line number Diff line number Diff line
@@ -7,7 +7,7 @@ as well as output data from Galaxy tools.
from abc import ABC, abstractmethod
from enum import Enum
from pathlib import Path
from typing import TYPE_CHECKING, Any, Dict, Optional, Union
from typing import TYPE_CHECKING, Any, Optional, Union

from bioblend.galaxy.dataset_collections import DatasetCollectionClient
from bioblend.galaxy.datasets import DatasetClient
@@ -47,6 +47,7 @@ class AbstractData(ABC):
        super().__init__()
        self.path: str = ""
        self.id: Union[str, None] = ""
        self.name: str = ""
        self.store: Union[None, "Datastore"] = None

    @abstractmethod
@@ -79,19 +80,32 @@ class Dataset(AbstractData):
        self.file_type: str = Path(path).suffix
        self._content: Any = None

    def upload(self, store: "Datastore") -> None:
    def upload(self, store: "Datastore", name: Optional[str] = None) -> None:
        """Uploads this dataset to the data store given.

        This method will automatically set the id, and store class variables for future use.

        Parameters
        ----------
        store: Datastore
            The data store to upload this dataset to.
        name: Optional[str]
            The name that will be used for the dataset upstream. Defaults to the local name.
        """
        galaxy_instance = store.nova_connection.galaxy_instance
        dataset_client = DatasetClient(galaxy_instance)
        history_id = galaxy_instance.histories.get_histories(name=store.name)[0]["id"]
        if name:
            file_name = name
        else:
            file_name = self.name
        if self._content:
            dataset_id = galaxy_instance.tools.paste_content(content=self._content, history_id=history_id)
            dataset_info = galaxy_instance.tools.paste_content(
                content=self._content, history_id=history_id, file_name=file_name
            )
        else:
            dataset_id = galaxy_instance.tools.upload_file(path=self.path, history_id=history_id)
        self.id = dataset_id["outputs"][0]["id"]
            dataset_info = galaxy_instance.tools.upload_file(path=self.path, history_id=history_id, file_name=file_name)
        self.id = dataset_info["outputs"][0]["id"]
        self.store = store
        dataset_client.wait_for_dataset(self.id)

@@ -168,24 +182,3 @@ class DatasetCollection(AbstractData):
            return info["elements"]
        else:
            raise Exception("Dataset collection is not present in Galaxy.")


def upload_datasets(store: "Datastore", datasets: Dict[str, AbstractData]) -> Dict[str, str]:
    """Helper method to upload multiple datasets or collections in parallel."""
    galaxy_instance = store.nova_connection.galaxy_instance
    dataset_client = DatasetClient(galaxy_instance)
    history_id = galaxy_instance.histories.get_histories(name=store.name)[0]["id"]
    dataset_ids = {}
    for name, dataset in datasets.items():
        if len(dataset.path) < 1 and dataset.get_content():
            dataset_info = galaxy_instance.tools.paste_content(
                content=str(dataset.get_content()), history_id=history_id
            )
        else:
            dataset_info = galaxy_instance.tools.upload_file(path=dataset.path, history_id=history_id)
        dataset_ids[name] = dataset_info["outputs"][0]["id"]
        dataset.id = dataset_info["outputs"][0]["id"]
        dataset.store = store
    for dataset_output in dataset_ids.values():
        dataset_client.wait_for_dataset(dataset_output)
    return dataset_ids
Loading