Loading client/src/components/History/Content/model/states.js +1 −1 Original line number Diff line number Diff line Loading @@ -11,7 +11,7 @@ export const STATES = { }, deferred: { status: "info", text: "This dataset is remote, has not be ingested by Galaxy, and full metadata may not be available.", text: "This dataset is remote, has not been ingested by Galaxy, and full metadata may not be available.", icon: "cloud", }, /** has no data */ Loading lib/galaxy/metadata/set_metadata.py +2 −2 Original line number Diff line number Diff line Loading @@ -434,7 +434,7 @@ def set_metadata_portable( if not object_store or not export_store: # Can't happen, but type system doesn't know raise Exception("object_store not built") if not link_data_only and os.path.getsize(external_filename): if not is_deferred and not link_data_only and os.path.getsize(external_filename): # Here we might be updating a disk based objectstore when outputs_to_working_directory is used, # or a remote object store from its cache path. object_store.update_from_file(dataset.dataset, file_name=external_filename, create=True) Loading Loading @@ -469,7 +469,7 @@ def set_metadata_portable( context_value = context[context_key] setattr(dataset, context_key, context_value) # We only want to persist the external_filename if the dataset has been linked in. if not link_data_only: if not is_deferred and not link_data_only: dataset.dataset.external_filename = None dataset.dataset.extra_files_path = None export_store.add_dataset(dataset) Loading lib/galaxy/model/deferred.py +10 −6 Original line number Diff line number Diff line Loading @@ -40,6 +40,7 @@ log = logging.getLogger(__name__) class TransientDatasetPaths(NamedTuple): external_filename: str external_extra_files_path: str metadata_files_dir: str class TransientPathMapper: Loading @@ -61,7 +62,7 @@ class SimpleTransientPathMapper(TransientPathMapper): external_filename = os.path.join(self._staging_directory, external_filename_basename) external_extras_basename = "dataset_%s_files" % str(old_dataset.uuid) external_extras = os.path.join(self._staging_directory, external_extras_basename) return TransientDatasetPaths(external_filename, external_extras) return TransientDatasetPaths(external_filename, external_extras, self._staging_directory) class DatasetInstanceMaterializer: Loading Loading @@ -110,6 +111,7 @@ class DatasetInstanceMaterializer: materialized_dataset.hashes = [h.copy() for h in dataset.hashes] target_source = self._find_closest_dataset_source(dataset) transient_paths = None if attached: object_store_populator = self._object_store_populator assert object_store_populator Loading Loading @@ -157,12 +159,14 @@ class DatasetInstanceMaterializer: materialized_dataset_instance.has_metadata_files or materialized_dataset_instance.metadata_deferred ) if require_metadata_regeneration: if attached and self._sa_session: # as of mid April 2022, we now get JSON encoding errors if this # isn't bound to the session before metadata generation. self._sa_session.add(materialized_dataset_instance) materialized_dataset_instance.init_meta() materialized_dataset_instance.set_meta() if transient_paths: metadata_tmp_files_dir = transient_paths.metadata_files_dir else: # If metadata_tmp_files_dir is set we generate a MetdataTempFile, # which we don't want when we're generating an attached materialized dataset instance metadata_tmp_files_dir = None materialized_dataset_instance.set_meta(metadata_tmp_files_dir=metadata_tmp_files_dir) materialized_dataset_instance.metadata_deferred = False return materialized_dataset_instance Loading lib/galaxy_test/api/test_tools.py +48 −0 Original line number Diff line number Diff line Loading @@ -2729,6 +2729,54 @@ class ToolsTestCase(ApiTestCase, TestsTools): output_content = self.dataset_populator.get_history_dataset_content(history_id, dataset=output) assert output_content.startswith("chr1 147962192 147962580 CCDS989.1_cds_0_0_chr1_147962193_r 0 -") @skip_without_tool("metadata_bam") @uses_test_history(require_new=False) def test_run_deferred_dataset_with_metadata_options_filter(self, history_id): details = self.dataset_populator.create_deferred_hda( history_id, "https://raw.githubusercontent.com/galaxyproject/galaxy/dev/test-data/1.bam", ext="bam" ) inputs = {"input_bam": dataset_to_param(details), "ref_names": "chrM"} run_response = self.dataset_populator.run_tool(tool_id="metadata_bam", inputs=inputs, history_id=history_id) output = run_response["outputs"][0] output_details = self.dataset_populator.get_history_dataset_details( history_id, dataset=output, wait=True, assert_ok=True ) assert output_details["state"] == "ok" output_content = self.dataset_populator.get_history_dataset_content(history_id, dataset=output) assert output_content.startswith("chrM") @skip_without_tool("pileup") @uses_test_history(require_new=False) def test_metadata_validator_on_deferred_input(self, history_id): deferred_bam_details = self.dataset_populator.create_deferred_hda( history_id, "https://raw.githubusercontent.com/galaxyproject/galaxy/dev/test-data/1.bam", ext="bam" ) fasta1_contents = open(self.get_filename("1.fasta")).read() fasta = self.dataset_populator.new_dataset(history_id, content=fasta1_contents) inputs = {"input1": dataset_to_param(deferred_bam_details), "reference": dataset_to_param(fasta)} run_response = self.dataset_populator.run_tool(tool_id="pileup", inputs=inputs, history_id=history_id) self.dataset_populator.wait_for_job(run_response["jobs"][0]["id"], assert_ok=True) @pytest.mark.xfail @skip_without_tool("pileup") @uses_test_history(require_new=False) def test_metadata_validator_can_fail_on_deferred_input(self, history_id): # This test fails because we just skip the validator # Fixing this is a TODO deferred_bam_details = self.dataset_populator.create_deferred_hda( history_id, "https://github.com/galaxyproject/galaxy/blob/dev/test-data/3unsorted.bam?raw=true", ext="unsorted.bam", ) fasta1_contents = open(self.get_filename("1.fasta")).read() fasta = self.dataset_populator.new_dataset(history_id, content=fasta1_contents) inputs = {"input1": dataset_to_param(deferred_bam_details), "reference": dataset_to_param(fasta)} run_response = self.dataset_populator.run_tool(tool_id="pileup", inputs=inputs, history_id=history_id) self.dataset_populator.wait_for_job(run_response["jobs"][0]["id"], assert_ok=False) job_id = run_response["jobs"][0]["id"] job_details = self.dataset_populator.get_job_details(job_id=job_id).json() assert job_details["state"] == "failed" @skip_without_tool("cat1") @uses_test_history(require_new=False) def test_run_deferred_mapping(self, history_id: str): Loading test/functional/tools/for_workflows/pileup.xml +1 −1 Original line number Diff line number Diff line Loading @@ -3,7 +3,7 @@ printf 'Summary' > '$out_file1' ]]></command> <inputs> <param name="input1" type="data" format="bam" multiple="true" min="1" label="BAM Inputs"> <param name="input1" type="data" format="unsorted.bam" multiple="true" min="1" label="BAM Inputs"> <validator check="bam_index" message="Metadata missing, click the pencil icon in the history item and use the auto-detect feature to correct this issue." type="metadata" /> </param> <param name="reference" type="data" format="fasta" label="Fasta Input"/> Loading Loading
client/src/components/History/Content/model/states.js +1 −1 Original line number Diff line number Diff line Loading @@ -11,7 +11,7 @@ export const STATES = { }, deferred: { status: "info", text: "This dataset is remote, has not be ingested by Galaxy, and full metadata may not be available.", text: "This dataset is remote, has not been ingested by Galaxy, and full metadata may not be available.", icon: "cloud", }, /** has no data */ Loading
lib/galaxy/metadata/set_metadata.py +2 −2 Original line number Diff line number Diff line Loading @@ -434,7 +434,7 @@ def set_metadata_portable( if not object_store or not export_store: # Can't happen, but type system doesn't know raise Exception("object_store not built") if not link_data_only and os.path.getsize(external_filename): if not is_deferred and not link_data_only and os.path.getsize(external_filename): # Here we might be updating a disk based objectstore when outputs_to_working_directory is used, # or a remote object store from its cache path. object_store.update_from_file(dataset.dataset, file_name=external_filename, create=True) Loading Loading @@ -469,7 +469,7 @@ def set_metadata_portable( context_value = context[context_key] setattr(dataset, context_key, context_value) # We only want to persist the external_filename if the dataset has been linked in. if not link_data_only: if not is_deferred and not link_data_only: dataset.dataset.external_filename = None dataset.dataset.extra_files_path = None export_store.add_dataset(dataset) Loading
lib/galaxy/model/deferred.py +10 −6 Original line number Diff line number Diff line Loading @@ -40,6 +40,7 @@ log = logging.getLogger(__name__) class TransientDatasetPaths(NamedTuple): external_filename: str external_extra_files_path: str metadata_files_dir: str class TransientPathMapper: Loading @@ -61,7 +62,7 @@ class SimpleTransientPathMapper(TransientPathMapper): external_filename = os.path.join(self._staging_directory, external_filename_basename) external_extras_basename = "dataset_%s_files" % str(old_dataset.uuid) external_extras = os.path.join(self._staging_directory, external_extras_basename) return TransientDatasetPaths(external_filename, external_extras) return TransientDatasetPaths(external_filename, external_extras, self._staging_directory) class DatasetInstanceMaterializer: Loading Loading @@ -110,6 +111,7 @@ class DatasetInstanceMaterializer: materialized_dataset.hashes = [h.copy() for h in dataset.hashes] target_source = self._find_closest_dataset_source(dataset) transient_paths = None if attached: object_store_populator = self._object_store_populator assert object_store_populator Loading Loading @@ -157,12 +159,14 @@ class DatasetInstanceMaterializer: materialized_dataset_instance.has_metadata_files or materialized_dataset_instance.metadata_deferred ) if require_metadata_regeneration: if attached and self._sa_session: # as of mid April 2022, we now get JSON encoding errors if this # isn't bound to the session before metadata generation. self._sa_session.add(materialized_dataset_instance) materialized_dataset_instance.init_meta() materialized_dataset_instance.set_meta() if transient_paths: metadata_tmp_files_dir = transient_paths.metadata_files_dir else: # If metadata_tmp_files_dir is set we generate a MetdataTempFile, # which we don't want when we're generating an attached materialized dataset instance metadata_tmp_files_dir = None materialized_dataset_instance.set_meta(metadata_tmp_files_dir=metadata_tmp_files_dir) materialized_dataset_instance.metadata_deferred = False return materialized_dataset_instance Loading
lib/galaxy_test/api/test_tools.py +48 −0 Original line number Diff line number Diff line Loading @@ -2729,6 +2729,54 @@ class ToolsTestCase(ApiTestCase, TestsTools): output_content = self.dataset_populator.get_history_dataset_content(history_id, dataset=output) assert output_content.startswith("chr1 147962192 147962580 CCDS989.1_cds_0_0_chr1_147962193_r 0 -") @skip_without_tool("metadata_bam") @uses_test_history(require_new=False) def test_run_deferred_dataset_with_metadata_options_filter(self, history_id): details = self.dataset_populator.create_deferred_hda( history_id, "https://raw.githubusercontent.com/galaxyproject/galaxy/dev/test-data/1.bam", ext="bam" ) inputs = {"input_bam": dataset_to_param(details), "ref_names": "chrM"} run_response = self.dataset_populator.run_tool(tool_id="metadata_bam", inputs=inputs, history_id=history_id) output = run_response["outputs"][0] output_details = self.dataset_populator.get_history_dataset_details( history_id, dataset=output, wait=True, assert_ok=True ) assert output_details["state"] == "ok" output_content = self.dataset_populator.get_history_dataset_content(history_id, dataset=output) assert output_content.startswith("chrM") @skip_without_tool("pileup") @uses_test_history(require_new=False) def test_metadata_validator_on_deferred_input(self, history_id): deferred_bam_details = self.dataset_populator.create_deferred_hda( history_id, "https://raw.githubusercontent.com/galaxyproject/galaxy/dev/test-data/1.bam", ext="bam" ) fasta1_contents = open(self.get_filename("1.fasta")).read() fasta = self.dataset_populator.new_dataset(history_id, content=fasta1_contents) inputs = {"input1": dataset_to_param(deferred_bam_details), "reference": dataset_to_param(fasta)} run_response = self.dataset_populator.run_tool(tool_id="pileup", inputs=inputs, history_id=history_id) self.dataset_populator.wait_for_job(run_response["jobs"][0]["id"], assert_ok=True) @pytest.mark.xfail @skip_without_tool("pileup") @uses_test_history(require_new=False) def test_metadata_validator_can_fail_on_deferred_input(self, history_id): # This test fails because we just skip the validator # Fixing this is a TODO deferred_bam_details = self.dataset_populator.create_deferred_hda( history_id, "https://github.com/galaxyproject/galaxy/blob/dev/test-data/3unsorted.bam?raw=true", ext="unsorted.bam", ) fasta1_contents = open(self.get_filename("1.fasta")).read() fasta = self.dataset_populator.new_dataset(history_id, content=fasta1_contents) inputs = {"input1": dataset_to_param(deferred_bam_details), "reference": dataset_to_param(fasta)} run_response = self.dataset_populator.run_tool(tool_id="pileup", inputs=inputs, history_id=history_id) self.dataset_populator.wait_for_job(run_response["jobs"][0]["id"], assert_ok=False) job_id = run_response["jobs"][0]["id"] job_details = self.dataset_populator.get_job_details(job_id=job_id).json() assert job_details["state"] == "failed" @skip_without_tool("cat1") @uses_test_history(require_new=False) def test_run_deferred_mapping(self, history_id: str): Loading
test/functional/tools/for_workflows/pileup.xml +1 −1 Original line number Diff line number Diff line Loading @@ -3,7 +3,7 @@ printf 'Summary' > '$out_file1' ]]></command> <inputs> <param name="input1" type="data" format="bam" multiple="true" min="1" label="BAM Inputs"> <param name="input1" type="data" format="unsorted.bam" multiple="true" min="1" label="BAM Inputs"> <validator check="bam_index" message="Metadata missing, click the pencil icon in the history item and use the auto-detect feature to correct this issue." type="metadata" /> </param> <param name="reference" type="data" format="fasta" label="Fasta Input"/> Loading