Loading lib/galaxy/job_execution/compute_environment.py +11 −8 Original line number Diff line number Diff line Loading @@ -12,6 +12,11 @@ from galaxy.job_execution.setup import JobIO from galaxy.model import Job def dataset_path_to_extra_path(path: str) -> str: base_path = path[0 : -len(".dat")] return f"{base_path}_files" class ComputeEnvironment(metaclass=ABCMeta): """Definition of the job as it will be run on the (potentially) remote compute server. Loading Loading @@ -121,20 +126,18 @@ class SharedComputeEnvironment(SimpleComputeEnvironment, ComputeEnvironment): return self.job_io.get_output_fnames() def input_path_rewrite(self, dataset): return self.job_io.get_input_path(dataset).false_path return str(self.job_io.get_input_path(dataset)) def output_path_rewrite(self, dataset): dataset_path = self.job_io.get_output_path(dataset) if hasattr(dataset_path, "false_path"): return dataset_path.false_path else: return dataset_path return str(self.job_io.get_output_path(dataset)) def input_extra_files_rewrite(self, dataset): return None input_path_rewrite = self.input_path_rewrite(dataset) return dataset_path_to_extra_path(input_path_rewrite) def output_extra_files_rewrite(self, dataset): return None output_path_rewrite = self.output_path_rewrite(dataset) return dataset_path_to_extra_path(output_path_rewrite) def input_metadata_rewrite(self, dataset, metadata_value): return None Loading lib/galaxy/job_execution/datasets.py +1 −1 Original line number Diff line number Diff line Loading @@ -94,7 +94,7 @@ class OutputsToWorkingDirectoryPathRewriter(DatasetPathRewriter): if self.outputs_directory_name is not None: base_output_directory = os.path.join(base_output_directory, self.outputs_directory_name) # set false_path to uuid, no harm even if object store uses id false_path = os.path.join(base_output_directory, f"galaxy_dataset_{dataset.dataset.uuid}.dat") false_path = os.path.join(base_output_directory, f"dataset_{dataset.dataset.uuid}.dat") return false_path else: return None Loading lib/galaxy/job_execution/output_collect.py +8 −2 Original line number Diff line number Diff line Loading @@ -719,8 +719,14 @@ def default_exit_code_file(files_dir, id_tag): def collect_extra_files(object_store, dataset, job_working_directory): # TODO: should this use compute_environment to determine the extra files path ? file_name = dataset.dataset.extra_files_path_name_from(object_store) temp_file_path = os.path.join(job_working_directory, "working", file_name) output_location = "outputs" temp_file_path = os.path.join(job_working_directory, output_location, file_name) if not os.path.exists(temp_file_path): # Fall back to working dir, remove in 23.2 output_location = "working" temp_file_path = os.path.join(job_working_directory, output_location, file_name) extra_dir = None try: # This skips creation of directories - object store Loading @@ -728,7 +734,7 @@ def collect_extra_files(object_store, dataset, job_working_directory): # not be created in the object store at all, which might be a # problem. for root, _dirs, files in os.walk(temp_file_path): extra_dir = root.replace(os.path.join(job_working_directory, "working"), "", 1).lstrip(os.path.sep) extra_dir = root.replace(os.path.join(job_working_directory, output_location), "", 1).lstrip(os.path.sep) for f in files: object_store.update_from_file( dataset.dataset, Loading lib/galaxy/jobs/runners/pulsar.py +6 −5 Original line number Diff line number Diff line Loading @@ -37,7 +37,10 @@ from pulsar.client import ( from pulsar.client.staging import DEFAULT_DYNAMIC_COLLECTION_PATTERN from galaxy import model from galaxy.job_execution.compute_environment import ComputeEnvironment from galaxy.job_execution.compute_environment import ( ComputeEnvironment, dataset_path_to_extra_path, ) from galaxy.jobs import JobDestination from galaxy.jobs.command_factory import build_command from galaxy.jobs.runners import ( Loading Loading @@ -1104,15 +1107,13 @@ class PulsarComputeEnvironment(ComputeEnvironment): def input_extra_files_rewrite(self, dataset): input_path_rewrite = self.input_path_rewrite(dataset) base_input_path = input_path_rewrite[0 : -len(".dat")] remote_extra_files_path_rewrite = f"{base_input_path}_files" remote_extra_files_path_rewrite = dataset_path_to_extra_path(input_path_rewrite) self.path_rewrites_input_extra[dataset.extra_files_path] = remote_extra_files_path_rewrite return remote_extra_files_path_rewrite def output_extra_files_rewrite(self, dataset): output_path_rewrite = self.output_path_rewrite(dataset) base_output_path = output_path_rewrite[0 : -len(".dat")] remote_extra_files_path_rewrite = f"{base_output_path}_files" remote_extra_files_path_rewrite = dataset_path_to_extra_path(output_path_rewrite) return remote_extra_files_path_rewrite def input_metadata_rewrite(self, dataset, metadata_val): Loading lib/galaxy/metadata/set_metadata.py +16 −16 Original line number Diff line number Diff line Loading @@ -29,6 +29,7 @@ import galaxy.datatypes.registry import galaxy.model.mapping from galaxy.datatypes import sniff from galaxy.datatypes.data import validate from galaxy.job_execution.compute_environment import dataset_path_to_extra_path from galaxy.job_execution.output_collect import ( collect_dynamic_outputs, collect_extra_files, Loading Loading @@ -391,26 +392,25 @@ def set_metadata_portable( # Only set external filename if we're dealing with files in job working directory. # Fixes link_data_only uploads dataset.dataset.external_filename = external_filename store_by = output_dict.get("object_store_store_by", "id") extra_files_dir_name = f"dataset_{getattr(dataset.dataset, store_by)}_files" # We derive extra_files_dir_name from external_filename, because OutputsToWorkingDirectoryPathRewriter # always rewrites the path to include the uuid, even if store_by is set to id, and the extra files # rewrite is derived from the dataset path (since https://github.com/galaxyproject/galaxy/pull/16541). extra_files_dir_name = os.path.basename(dataset_path_to_extra_path(external_filename)) # TODO: all extra file outputs should be stored in outputs, but keep fallback for running jobs. Remove in 23.2. for output_location in ("outputs", "working"): files_path = os.path.abspath( os.path.join(tool_job_working_directory, "working", extra_files_dir_name) os.path.join(tool_job_working_directory, output_location, extra_files_dir_name) ) if os.path.exists(files_path): dataset.dataset.external_extra_files_path = files_path break else: # could be pulsar, stores extra files in outputs directory pulsar_extra_files_path = os.path.join( tool_job_working_directory, "outputs", extra_files_dir_name ) if os.path.exists(pulsar_extra_files_path): dataset.dataset.external_extra_files_path = pulsar_extra_files_path elif dataset_filename_override and not object_store: # pulsar, no remote metadata and no extended metadata # extra files dir didn't exist in working or outputs directory if dataset_filename_override and not object_store: # not extended metadata (so no object store) and outputs_to_working_directory off dataset.dataset.external_extra_files_path = os.path.join( os.path.dirname(dataset_filename_override), extra_files_dir_name ) file_dict = tool_provided_metadata.get_dataset_meta(output_name, dataset.dataset.id, dataset.dataset.uuid) if "ext" in file_dict: dataset.extension = file_dict["ext"] Loading Loading
lib/galaxy/job_execution/compute_environment.py +11 −8 Original line number Diff line number Diff line Loading @@ -12,6 +12,11 @@ from galaxy.job_execution.setup import JobIO from galaxy.model import Job def dataset_path_to_extra_path(path: str) -> str: base_path = path[0 : -len(".dat")] return f"{base_path}_files" class ComputeEnvironment(metaclass=ABCMeta): """Definition of the job as it will be run on the (potentially) remote compute server. Loading Loading @@ -121,20 +126,18 @@ class SharedComputeEnvironment(SimpleComputeEnvironment, ComputeEnvironment): return self.job_io.get_output_fnames() def input_path_rewrite(self, dataset): return self.job_io.get_input_path(dataset).false_path return str(self.job_io.get_input_path(dataset)) def output_path_rewrite(self, dataset): dataset_path = self.job_io.get_output_path(dataset) if hasattr(dataset_path, "false_path"): return dataset_path.false_path else: return dataset_path return str(self.job_io.get_output_path(dataset)) def input_extra_files_rewrite(self, dataset): return None input_path_rewrite = self.input_path_rewrite(dataset) return dataset_path_to_extra_path(input_path_rewrite) def output_extra_files_rewrite(self, dataset): return None output_path_rewrite = self.output_path_rewrite(dataset) return dataset_path_to_extra_path(output_path_rewrite) def input_metadata_rewrite(self, dataset, metadata_value): return None Loading
lib/galaxy/job_execution/datasets.py +1 −1 Original line number Diff line number Diff line Loading @@ -94,7 +94,7 @@ class OutputsToWorkingDirectoryPathRewriter(DatasetPathRewriter): if self.outputs_directory_name is not None: base_output_directory = os.path.join(base_output_directory, self.outputs_directory_name) # set false_path to uuid, no harm even if object store uses id false_path = os.path.join(base_output_directory, f"galaxy_dataset_{dataset.dataset.uuid}.dat") false_path = os.path.join(base_output_directory, f"dataset_{dataset.dataset.uuid}.dat") return false_path else: return None Loading
lib/galaxy/job_execution/output_collect.py +8 −2 Original line number Diff line number Diff line Loading @@ -719,8 +719,14 @@ def default_exit_code_file(files_dir, id_tag): def collect_extra_files(object_store, dataset, job_working_directory): # TODO: should this use compute_environment to determine the extra files path ? file_name = dataset.dataset.extra_files_path_name_from(object_store) temp_file_path = os.path.join(job_working_directory, "working", file_name) output_location = "outputs" temp_file_path = os.path.join(job_working_directory, output_location, file_name) if not os.path.exists(temp_file_path): # Fall back to working dir, remove in 23.2 output_location = "working" temp_file_path = os.path.join(job_working_directory, output_location, file_name) extra_dir = None try: # This skips creation of directories - object store Loading @@ -728,7 +734,7 @@ def collect_extra_files(object_store, dataset, job_working_directory): # not be created in the object store at all, which might be a # problem. for root, _dirs, files in os.walk(temp_file_path): extra_dir = root.replace(os.path.join(job_working_directory, "working"), "", 1).lstrip(os.path.sep) extra_dir = root.replace(os.path.join(job_working_directory, output_location), "", 1).lstrip(os.path.sep) for f in files: object_store.update_from_file( dataset.dataset, Loading
lib/galaxy/jobs/runners/pulsar.py +6 −5 Original line number Diff line number Diff line Loading @@ -37,7 +37,10 @@ from pulsar.client import ( from pulsar.client.staging import DEFAULT_DYNAMIC_COLLECTION_PATTERN from galaxy import model from galaxy.job_execution.compute_environment import ComputeEnvironment from galaxy.job_execution.compute_environment import ( ComputeEnvironment, dataset_path_to_extra_path, ) from galaxy.jobs import JobDestination from galaxy.jobs.command_factory import build_command from galaxy.jobs.runners import ( Loading Loading @@ -1104,15 +1107,13 @@ class PulsarComputeEnvironment(ComputeEnvironment): def input_extra_files_rewrite(self, dataset): input_path_rewrite = self.input_path_rewrite(dataset) base_input_path = input_path_rewrite[0 : -len(".dat")] remote_extra_files_path_rewrite = f"{base_input_path}_files" remote_extra_files_path_rewrite = dataset_path_to_extra_path(input_path_rewrite) self.path_rewrites_input_extra[dataset.extra_files_path] = remote_extra_files_path_rewrite return remote_extra_files_path_rewrite def output_extra_files_rewrite(self, dataset): output_path_rewrite = self.output_path_rewrite(dataset) base_output_path = output_path_rewrite[0 : -len(".dat")] remote_extra_files_path_rewrite = f"{base_output_path}_files" remote_extra_files_path_rewrite = dataset_path_to_extra_path(output_path_rewrite) return remote_extra_files_path_rewrite def input_metadata_rewrite(self, dataset, metadata_val): Loading
lib/galaxy/metadata/set_metadata.py +16 −16 Original line number Diff line number Diff line Loading @@ -29,6 +29,7 @@ import galaxy.datatypes.registry import galaxy.model.mapping from galaxy.datatypes import sniff from galaxy.datatypes.data import validate from galaxy.job_execution.compute_environment import dataset_path_to_extra_path from galaxy.job_execution.output_collect import ( collect_dynamic_outputs, collect_extra_files, Loading Loading @@ -391,26 +392,25 @@ def set_metadata_portable( # Only set external filename if we're dealing with files in job working directory. # Fixes link_data_only uploads dataset.dataset.external_filename = external_filename store_by = output_dict.get("object_store_store_by", "id") extra_files_dir_name = f"dataset_{getattr(dataset.dataset, store_by)}_files" # We derive extra_files_dir_name from external_filename, because OutputsToWorkingDirectoryPathRewriter # always rewrites the path to include the uuid, even if store_by is set to id, and the extra files # rewrite is derived from the dataset path (since https://github.com/galaxyproject/galaxy/pull/16541). extra_files_dir_name = os.path.basename(dataset_path_to_extra_path(external_filename)) # TODO: all extra file outputs should be stored in outputs, but keep fallback for running jobs. Remove in 23.2. for output_location in ("outputs", "working"): files_path = os.path.abspath( os.path.join(tool_job_working_directory, "working", extra_files_dir_name) os.path.join(tool_job_working_directory, output_location, extra_files_dir_name) ) if os.path.exists(files_path): dataset.dataset.external_extra_files_path = files_path break else: # could be pulsar, stores extra files in outputs directory pulsar_extra_files_path = os.path.join( tool_job_working_directory, "outputs", extra_files_dir_name ) if os.path.exists(pulsar_extra_files_path): dataset.dataset.external_extra_files_path = pulsar_extra_files_path elif dataset_filename_override and not object_store: # pulsar, no remote metadata and no extended metadata # extra files dir didn't exist in working or outputs directory if dataset_filename_override and not object_store: # not extended metadata (so no object store) and outputs_to_working_directory off dataset.dataset.external_extra_files_path = os.path.join( os.path.dirname(dataset_filename_override), extra_files_dir_name ) file_dict = tool_provided_metadata.get_dataset_meta(output_name, dataset.dataset.id, dataset.dataset.uuid) if "ext" in file_dict: dataset.extension = file_dict["ext"] Loading