Unverified Commit 96b1f7aa authored by mvdbeek's avatar mvdbeek
Browse files

Reuse code for getting extra files rewrite path, make pick up location more consistent

parent ba0a6490
Loading
Loading
Loading
Loading
+7 −6
Original line number Diff line number Diff line
@@ -12,6 +12,11 @@ from galaxy.job_execution.setup import JobIO
from galaxy.model import Job


def dataset_path_to_extra_path(path: str) -> str:
    base_path = path[0 : -len(".dat")]
    return f"{base_path}_files"


class ComputeEnvironment(metaclass=ABCMeta):
    """Definition of the job as it will be run on the (potentially) remote
    compute server.
@@ -128,15 +133,11 @@ class SharedComputeEnvironment(SimpleComputeEnvironment, ComputeEnvironment):

    def input_extra_files_rewrite(self, dataset):
        input_path_rewrite = self.input_path_rewrite(dataset)
        base_input_path = input_path_rewrite[0 : -len(".dat")]
        remote_extra_files_path_rewrite = f"{base_input_path}_files"
        return remote_extra_files_path_rewrite
        return dataset_path_to_extra_path(input_path_rewrite)

    def output_extra_files_rewrite(self, dataset):
        output_path_rewrite = self.output_path_rewrite(dataset)
        base_output_path = output_path_rewrite[0 : -len(".dat")]
        remote_extra_files_path_rewrite = f"{base_output_path}_files"
        return remote_extra_files_path_rewrite
        return dataset_path_to_extra_path(output_path_rewrite)

    def input_metadata_rewrite(self, dataset, metadata_value):
        return None
+6 −5
Original line number Diff line number Diff line
@@ -37,7 +37,10 @@ from pulsar.client import (
from pulsar.client.staging import DEFAULT_DYNAMIC_COLLECTION_PATTERN

from galaxy import model
from galaxy.job_execution.compute_environment import ComputeEnvironment
from galaxy.job_execution.compute_environment import (
    ComputeEnvironment,
    dataset_path_to_extra_path,
)
from galaxy.jobs import JobDestination
from galaxy.jobs.command_factory import build_command
from galaxy.jobs.runners import (
@@ -1104,15 +1107,13 @@ class PulsarComputeEnvironment(ComputeEnvironment):

    def input_extra_files_rewrite(self, dataset):
        input_path_rewrite = self.input_path_rewrite(dataset)
        base_input_path = input_path_rewrite[0 : -len(".dat")]
        remote_extra_files_path_rewrite = f"{base_input_path}_files"
        remote_extra_files_path_rewrite = dataset_path_to_extra_path(input_path_rewrite)
        self.path_rewrites_input_extra[dataset.extra_files_path] = remote_extra_files_path_rewrite
        return remote_extra_files_path_rewrite

    def output_extra_files_rewrite(self, dataset):
        output_path_rewrite = self.output_path_rewrite(dataset)
        base_output_path = output_path_rewrite[0 : -len(".dat")]
        remote_extra_files_path_rewrite = f"{base_output_path}_files"
        remote_extra_files_path_rewrite = dataset_path_to_extra_path(output_path_rewrite)
        return remote_extra_files_path_rewrite

    def input_metadata_rewrite(self, dataset, metadata_val):
+16 −16
Original line number Diff line number Diff line
@@ -29,6 +29,7 @@ import galaxy.datatypes.registry
import galaxy.model.mapping
from galaxy.datatypes import sniff
from galaxy.datatypes.data import validate
from galaxy.job_execution.compute_environment import dataset_path_to_extra_path
from galaxy.job_execution.output_collect import (
    collect_dynamic_outputs,
    collect_extra_files,
@@ -391,26 +392,25 @@ def set_metadata_portable(
                    # Only set external filename if we're dealing with files in job working directory.
                    # Fixes link_data_only uploads
                    dataset.dataset.external_filename = external_filename
                    store_by = output_dict.get("object_store_store_by", "id")
                    extra_files_dir_name = f"dataset_{getattr(dataset.dataset, store_by)}_files"
                    # We derive extra_files_dir_name from external_filename, because OutputsToWorkingDirectoryPathRewriter
                    # always rewrites the path to include the uuid, even if store_by is set to id, and the extra files
                    # rewrite is derived from the dataset path (since https://github.com/galaxyproject/galaxy/pull/16541).
                    extra_files_dir_name = os.path.basename(dataset_path_to_extra_path(external_filename))
                    # TODO: all extra file outputs should be stored in outputs, but keep fallback for running jobs. Remove in 23.2.
                    for output_location in ("outputs", "working"):
                        files_path = os.path.abspath(
                        os.path.join(tool_job_working_directory, "working", extra_files_dir_name)
                            os.path.join(tool_job_working_directory, output_location, extra_files_dir_name)
                        )
                        if os.path.exists(files_path):
                            dataset.dataset.external_extra_files_path = files_path
                            break
                    else:
                        # could be pulsar, stores extra files in outputs directory
                        pulsar_extra_files_path = os.path.join(
                            tool_job_working_directory, "outputs", extra_files_dir_name
                        )
                        if os.path.exists(pulsar_extra_files_path):
                            dataset.dataset.external_extra_files_path = pulsar_extra_files_path
                        elif dataset_filename_override and not object_store:
                            # pulsar, no remote metadata and no extended metadata
                        # extra files dir didn't exist in working or outputs directory
                        if dataset_filename_override and not object_store:
                            # not extended metadata (so no object store) and outputs_to_working_directory off
                            dataset.dataset.external_extra_files_path = os.path.join(
                                os.path.dirname(dataset_filename_override), extra_files_dir_name
                            )

            file_dict = tool_provided_metadata.get_dataset_meta(output_name, dataset.dataset.id, dataset.dataset.uuid)
            if "ext" in file_dict:
                dataset.extension = file_dict["ext"]