Loading client/yarn.lock +2 −41 Original line number Diff line number Diff line Loading @@ -3550,14 +3550,6 @@ agent-base@^7.0.2: dependencies: debug "^4.3.4" aggregate-error@^3.0.0: version "3.1.0" resolved "https://registry.npmjs.org/aggregate-error/-/aggregate-error-3.1.0.tgz" integrity sha512-4I7Td01quW/RpocfNayFdFVk1qSuoh0E7JrbRJ16nH01HhKFQ88INq9Sd+nd72zqRySlr9BmDA8xlEJ6vJMrYA== dependencies: clean-stack "^2.0.0" indent-string "^4.0.0" aggregate-error@^4.0.0: version "4.0.1" resolved "https://registry.yarnpkg.com/aggregate-error/-/aggregate-error-4.0.1.tgz#25091fe1573b9e0be892aeda15c7c66a545f758e" Loading Loading @@ -4378,11 +4370,6 @@ cjs-module-lexer@^1.0.0: resolved "https://registry.npmjs.org/cjs-module-lexer/-/cjs-module-lexer-1.2.2.tgz" integrity sha512-cOU9usZw8/dXIXKtwa8pM0OTJQuJkxMN6w30csNRUerHfeQ5R6U3kkU/FtJeIf3M202OHfY2U8ccInBG7/xogA== clean-stack@^2.0.0: version "2.2.0" resolved "https://registry.npmjs.org/clean-stack/-/clean-stack-2.2.0.tgz" integrity sha512-4diC9HaTE+KRAMWhDhrGOECgWZxoevMc5TlkObMqNSsVU62PYzXZ/SMTjzyGAFF1YusgxGcSWTEXBhp0CPwQ1A== clean-stack@^4.0.0: version "4.2.0" resolved "https://registry.yarnpkg.com/clean-stack/-/clean-stack-4.2.0.tgz#c464e4cde4ac789f4e0735c5d75beb49d7b30b31" Loading Loading @@ -5312,20 +5299,6 @@ define-properties@^1.1.3, define-properties@^1.1.4, define-properties@^1.2.0: has-property-descriptors "^1.0.0" object-keys "^1.1.1" del@^6.0.0: version "6.1.1" resolved "https://registry.npmjs.org/del/-/del-6.1.1.tgz" integrity sha512-ua8BhapfP0JUJKC/zV9yHHDW/rDoDxP4Zhn3AkA6/xT6gY7jYXJiaeyBZznYVujhZZET+UgcbZiQ7sN3WqcImg== dependencies: globby "^11.0.1" graceful-fs "^4.2.4" is-glob "^4.0.1" is-path-cwd "^2.2.0" is-path-inside "^3.0.2" p-map "^4.0.0" rimraf "^3.0.2" slash "^3.0.0" delaunator@5: version "5.0.0" resolved "https://registry.yarnpkg.com/delaunator/-/delaunator-5.0.0.tgz#60f052b28bd91c9b4566850ebf7756efe821d81b" Loading Loading @@ -6708,7 +6681,7 @@ globalthis@^1.0.3: dependencies: define-properties "^1.1.3" globby@^11.0.1, globby@^11.1.0: globby@^11.1.0: version "11.1.0" resolved "https://registry.npmjs.org/globby/-/globby-11.1.0.tgz" integrity sha512-jhIXaOzy1sb8IyocaruWSn1TjmnBVs8Ayhcy83rmxNJ8q2uWKCAj3CnJY+KpGSXCueAPc0i05kVvVKtP1t9S3g== Loading Loading @@ -7370,12 +7343,7 @@ is-number@^7.0.0: resolved "https://registry.npmjs.org/is-number/-/is-number-7.0.0.tgz" integrity sha512-41Cifkg6e8TylSpdtTpeLVMqvSBEVzTttHvERD741+pnZ8ANv0004MRL43QKPDlK9cGvNp6NZWZUBlbGXYxxng== is-path-cwd@^2.2.0: version "2.2.0" resolved "https://registry.npmjs.org/is-path-cwd/-/is-path-cwd-2.2.0.tgz" integrity sha512-w942bTcih8fdJPJmQHFzkS76NEP8Kzzvmw92cXsazb8intwLqPibPPdXf4ANdKV3rYMuuQYGIWtvz9JilB3NFQ== is-path-inside@^3.0.2, is-path-inside@^3.0.3: is-path-inside@^3.0.3: version "3.0.3" resolved "https://registry.npmjs.org/is-path-inside/-/is-path-inside-3.0.3.tgz" integrity sha512-Fd4gABb+ycGAmKou8eMftCupSir5lRxqf4aD/vd0cD2qc4HL07OjCeuHMr8Ro4CoMaeCKDB0/ECBOVWjTwUvPQ== Loading Loading @@ -9211,13 +9179,6 @@ p-locate@^5.0.0: dependencies: p-limit "^3.0.2" p-map@^4.0.0: version "4.0.0" resolved "https://registry.npmjs.org/p-map/-/p-map-4.0.0.tgz" integrity sha512-/bjOqmgETBYB5BoEeGVea8dmvHb2m9GLy1E9W43yeyfP6QQCZGFNa+XRceJEuDB6zqr+gKpIAmlLebMpykw/MQ== dependencies: aggregate-error "^3.0.0" p-map@^5.1.0: version "5.5.0" resolved "https://registry.yarnpkg.com/p-map/-/p-map-5.5.0.tgz#054ca8ca778dfa4cf3f8db6638ccb5b937266715" Loading lib/galaxy/datatypes/binary.py +30 −13 Original line number Diff line number Diff line Loading @@ -549,22 +549,29 @@ class _BamOrSam: Helper class to set the metadata common to sam and bam files """ max_references = 100000 def set_meta(self, dataset: DatasetProtocol, overwrite: bool = True, **kwd) -> None: try: bam_file = pysam.AlignmentFile(dataset.get_file_name(), mode="rb") with pysam.AlignmentFile(dataset.get_file_name(), mode="rb", check_sq=False) as bam_file: # TODO: Reference names, lengths, read_groups and headers can become very large, truncate when necessary if bam_file.nreferences <= self.max_references: dataset.metadata.reference_names = list(bam_file.references) dataset.metadata.reference_lengths = list(bam_file.lengths) dataset.metadata.bam_header = dict(bam_file.header.items()) # type: ignore [attr-defined] dataset.metadata.read_groups = [ read_group["ID"] for read_group in dataset.metadata.bam_header.get("RG", []) if "ID" in read_group read_group["ID"] for read_group in dataset.metadata.bam_header.get("RG", []) if "ID" in read_group ] dataset.metadata.sort_order = dataset.metadata.bam_header.get("HD", {}).get("SO", None) dataset.metadata.bam_version = dataset.metadata.bam_header.get("HD", {}).get("VN", None) else: dataset.metadata.metadata_incomplete = True dataset.metadata.sort_order = bam_file.header.get("HD", {}).get("SO", None) # type: ignore [attr-defined] dataset.metadata.bam_version = bam_file.header.get("HD", {}).get("VN", None) # type: ignore [attr-defined] except Exception: # Per Dan, don't log here because doing so will cause datasets that # fail metadata to end in the error state pass dataset.metadata.metadata_incomplete = True class BamNative(CompressedArchive, _BamOrSam): Loading Loading @@ -653,6 +660,16 @@ class BamNative(CompressedArchive, _BamOrSam): optional=True, no_value={}, ) MetadataElement( name="metadata_incomplete", default=False, desc="Indicates if metadata is incomplete", param=MetadataParameter, readonly=True, visible=False, optional=True, no_value=False, ) def set_meta(self, dataset: DatasetProtocol, overwrite: bool = True, **kwd) -> None: _BamOrSam().set_meta(dataset, overwrite=overwrite, **kwd) Loading Loading @@ -1051,7 +1068,7 @@ class BamInputSorted(BamNative): """ # The best way to ensure that BAM files are coordinate-sorted and indexable # is to actually index them. with pysam.AlignmentFile(filename=file_name) as f: with pysam.AlignmentFile(filename=file_name, check_sq=False) as f: # The only sure thing we know here is that the sort order can't be coordinate return f.header.get("HD", {}).get("SO") == "coordinate" # type: ignore[attr-defined] Loading @@ -1071,7 +1088,7 @@ class BamQuerynameSorted(BamInputSorted): """ # The best way to ensure that BAM files are coordinate-sorted and indexable # is to actually index them. with pysam.AlignmentFile(filename=file_name) as f: with pysam.AlignmentFile(filename=file_name, check_sq=False) as f: return f.header.get("HD", {}).get("SO") != "queryname" # type: ignore[attr-defined] Loading lib/galaxy/workflow/modules.py +31 −15 Original line number Diff line number Diff line Loading @@ -1647,21 +1647,13 @@ class InputParameterModule(WorkflowModule): ] def execute( self, trans, progress: "WorkflowProgress", invocation_step, use_cached_job: bool = False self, trans, progress: "WorkflowProgress", invocation_step: "WorkflowInvocationStep", use_cached_job: bool = False, ) -> Optional[bool]: step = invocation_step.workflow_step if step.id in progress.inputs_by_step_id: input_value = progress.inputs_by_step_id[step.id] else: input_value = step.state.inputs["input"] if input_value is NO_REPLACEMENT: default_value = step.get_input_default_value(NO_REPLACEMENT) # TODO: look at parameter type and infer if value should be a dictionary # instead. Guessing only field parameter types in CWL branch would have # default as dictionary like this. if not isinstance(default_value, dict): default_value = {"value": default_value} input_value = default_value.get("value", NO_REPLACEMENT) input_value = self.get_input_value(progress, invocation_step) input_param = self.get_runtime_inputs(self)["input"] # TODO: raise DelayedWorkflowEvaluation if replacement not ready ? Need test try: Loading @@ -1681,13 +1673,37 @@ class InputParameterModule(WorkflowModule): except ValueError as e: raise FailWorkflowEvaluation( why=InvocationFailureWorkflowParameterInvalid( reason=FailureReason.workflow_parameter_invalid, workflow_step_id=step.id, details=str(e) reason=FailureReason.workflow_parameter_invalid, workflow_step_id=invocation_step.workflow_step_id, details=str(e), ) ) step_outputs = dict(output=input_value) progress.set_outputs_for_input(invocation_step, step_outputs) return None def get_input_value(self, progress: "WorkflowProgress", invocation_step: "WorkflowInvocationStep"): step = invocation_step.workflow_step if step.id in progress.inputs_by_step_id: input_value = progress.inputs_by_step_id[step.id] else: assert step.state input_value = step.state.inputs["input"] if input_value is NO_REPLACEMENT: default_value = step.get_input_default_value(NO_REPLACEMENT) # TODO: look at parameter type and infer if value should be a dictionary # instead. Guessing only field parameter types in CWL branch would have # default as dictionary like this. if not isinstance(default_value, dict): default_value = {"value": default_value} input_value = default_value.get("value", NO_REPLACEMENT) return input_value def recover_mapping(self, invocation_step: "WorkflowInvocationStep", progress: "WorkflowProgress"): input_value = self.get_input_value(progress, invocation_step) step_outputs = dict(output=input_value) progress.set_outputs_for_input(invocation_step, step_outputs, already_persisted=True) def step_state_to_tool_state(self, state): state = safe_loads(state) default_set, default_value = False, None Loading lib/galaxy/workflow/run.py +0 −8 Original line number Diff line number Diff line Loading @@ -608,17 +608,9 @@ class WorkflowProgress: outputs[invocation_step.output_value.workflow_output.output_name] = invocation_step.output_value.value self.outputs[step.id] = outputs if not already_persisted: workflow_outputs_by_name = {wo.output_name: wo for wo in step.workflow_outputs} for output_name, output_object in outputs.items(): if hasattr(output_object, "history_content_type"): invocation_step.add_output(output_name, output_object) else: # Add this non-data, non workflow-output output to the workflow outputs. # This is required for recovering the output in the next scheduling iteration, # and should be replaced with a WorkflowInvocationStepOutputValue ASAP. if not workflow_outputs_by_name.get(output_name) and output_object is not NO_REPLACEMENT: workflow_output = model.WorkflowOutput(step, output_name=output_name) step.workflow_outputs.append(workflow_output) for workflow_output in step.workflow_outputs: assert workflow_output.output_name output_name = workflow_output.output_name Loading Loading
client/yarn.lock +2 −41 Original line number Diff line number Diff line Loading @@ -3550,14 +3550,6 @@ agent-base@^7.0.2: dependencies: debug "^4.3.4" aggregate-error@^3.0.0: version "3.1.0" resolved "https://registry.npmjs.org/aggregate-error/-/aggregate-error-3.1.0.tgz" integrity sha512-4I7Td01quW/RpocfNayFdFVk1qSuoh0E7JrbRJ16nH01HhKFQ88INq9Sd+nd72zqRySlr9BmDA8xlEJ6vJMrYA== dependencies: clean-stack "^2.0.0" indent-string "^4.0.0" aggregate-error@^4.0.0: version "4.0.1" resolved "https://registry.yarnpkg.com/aggregate-error/-/aggregate-error-4.0.1.tgz#25091fe1573b9e0be892aeda15c7c66a545f758e" Loading Loading @@ -4378,11 +4370,6 @@ cjs-module-lexer@^1.0.0: resolved "https://registry.npmjs.org/cjs-module-lexer/-/cjs-module-lexer-1.2.2.tgz" integrity sha512-cOU9usZw8/dXIXKtwa8pM0OTJQuJkxMN6w30csNRUerHfeQ5R6U3kkU/FtJeIf3M202OHfY2U8ccInBG7/xogA== clean-stack@^2.0.0: version "2.2.0" resolved "https://registry.npmjs.org/clean-stack/-/clean-stack-2.2.0.tgz" integrity sha512-4diC9HaTE+KRAMWhDhrGOECgWZxoevMc5TlkObMqNSsVU62PYzXZ/SMTjzyGAFF1YusgxGcSWTEXBhp0CPwQ1A== clean-stack@^4.0.0: version "4.2.0" resolved "https://registry.yarnpkg.com/clean-stack/-/clean-stack-4.2.0.tgz#c464e4cde4ac789f4e0735c5d75beb49d7b30b31" Loading Loading @@ -5312,20 +5299,6 @@ define-properties@^1.1.3, define-properties@^1.1.4, define-properties@^1.2.0: has-property-descriptors "^1.0.0" object-keys "^1.1.1" del@^6.0.0: version "6.1.1" resolved "https://registry.npmjs.org/del/-/del-6.1.1.tgz" integrity sha512-ua8BhapfP0JUJKC/zV9yHHDW/rDoDxP4Zhn3AkA6/xT6gY7jYXJiaeyBZznYVujhZZET+UgcbZiQ7sN3WqcImg== dependencies: globby "^11.0.1" graceful-fs "^4.2.4" is-glob "^4.0.1" is-path-cwd "^2.2.0" is-path-inside "^3.0.2" p-map "^4.0.0" rimraf "^3.0.2" slash "^3.0.0" delaunator@5: version "5.0.0" resolved "https://registry.yarnpkg.com/delaunator/-/delaunator-5.0.0.tgz#60f052b28bd91c9b4566850ebf7756efe821d81b" Loading Loading @@ -6708,7 +6681,7 @@ globalthis@^1.0.3: dependencies: define-properties "^1.1.3" globby@^11.0.1, globby@^11.1.0: globby@^11.1.0: version "11.1.0" resolved "https://registry.npmjs.org/globby/-/globby-11.1.0.tgz" integrity sha512-jhIXaOzy1sb8IyocaruWSn1TjmnBVs8Ayhcy83rmxNJ8q2uWKCAj3CnJY+KpGSXCueAPc0i05kVvVKtP1t9S3g== Loading Loading @@ -7370,12 +7343,7 @@ is-number@^7.0.0: resolved "https://registry.npmjs.org/is-number/-/is-number-7.0.0.tgz" integrity sha512-41Cifkg6e8TylSpdtTpeLVMqvSBEVzTttHvERD741+pnZ8ANv0004MRL43QKPDlK9cGvNp6NZWZUBlbGXYxxng== is-path-cwd@^2.2.0: version "2.2.0" resolved "https://registry.npmjs.org/is-path-cwd/-/is-path-cwd-2.2.0.tgz" integrity sha512-w942bTcih8fdJPJmQHFzkS76NEP8Kzzvmw92cXsazb8intwLqPibPPdXf4ANdKV3rYMuuQYGIWtvz9JilB3NFQ== is-path-inside@^3.0.2, is-path-inside@^3.0.3: is-path-inside@^3.0.3: version "3.0.3" resolved "https://registry.npmjs.org/is-path-inside/-/is-path-inside-3.0.3.tgz" integrity sha512-Fd4gABb+ycGAmKou8eMftCupSir5lRxqf4aD/vd0cD2qc4HL07OjCeuHMr8Ro4CoMaeCKDB0/ECBOVWjTwUvPQ== Loading Loading @@ -9211,13 +9179,6 @@ p-locate@^5.0.0: dependencies: p-limit "^3.0.2" p-map@^4.0.0: version "4.0.0" resolved "https://registry.npmjs.org/p-map/-/p-map-4.0.0.tgz" integrity sha512-/bjOqmgETBYB5BoEeGVea8dmvHb2m9GLy1E9W43yeyfP6QQCZGFNa+XRceJEuDB6zqr+gKpIAmlLebMpykw/MQ== dependencies: aggregate-error "^3.0.0" p-map@^5.1.0: version "5.5.0" resolved "https://registry.yarnpkg.com/p-map/-/p-map-5.5.0.tgz#054ca8ca778dfa4cf3f8db6638ccb5b937266715" Loading
lib/galaxy/datatypes/binary.py +30 −13 Original line number Diff line number Diff line Loading @@ -549,22 +549,29 @@ class _BamOrSam: Helper class to set the metadata common to sam and bam files """ max_references = 100000 def set_meta(self, dataset: DatasetProtocol, overwrite: bool = True, **kwd) -> None: try: bam_file = pysam.AlignmentFile(dataset.get_file_name(), mode="rb") with pysam.AlignmentFile(dataset.get_file_name(), mode="rb", check_sq=False) as bam_file: # TODO: Reference names, lengths, read_groups and headers can become very large, truncate when necessary if bam_file.nreferences <= self.max_references: dataset.metadata.reference_names = list(bam_file.references) dataset.metadata.reference_lengths = list(bam_file.lengths) dataset.metadata.bam_header = dict(bam_file.header.items()) # type: ignore [attr-defined] dataset.metadata.read_groups = [ read_group["ID"] for read_group in dataset.metadata.bam_header.get("RG", []) if "ID" in read_group read_group["ID"] for read_group in dataset.metadata.bam_header.get("RG", []) if "ID" in read_group ] dataset.metadata.sort_order = dataset.metadata.bam_header.get("HD", {}).get("SO", None) dataset.metadata.bam_version = dataset.metadata.bam_header.get("HD", {}).get("VN", None) else: dataset.metadata.metadata_incomplete = True dataset.metadata.sort_order = bam_file.header.get("HD", {}).get("SO", None) # type: ignore [attr-defined] dataset.metadata.bam_version = bam_file.header.get("HD", {}).get("VN", None) # type: ignore [attr-defined] except Exception: # Per Dan, don't log here because doing so will cause datasets that # fail metadata to end in the error state pass dataset.metadata.metadata_incomplete = True class BamNative(CompressedArchive, _BamOrSam): Loading Loading @@ -653,6 +660,16 @@ class BamNative(CompressedArchive, _BamOrSam): optional=True, no_value={}, ) MetadataElement( name="metadata_incomplete", default=False, desc="Indicates if metadata is incomplete", param=MetadataParameter, readonly=True, visible=False, optional=True, no_value=False, ) def set_meta(self, dataset: DatasetProtocol, overwrite: bool = True, **kwd) -> None: _BamOrSam().set_meta(dataset, overwrite=overwrite, **kwd) Loading Loading @@ -1051,7 +1068,7 @@ class BamInputSorted(BamNative): """ # The best way to ensure that BAM files are coordinate-sorted and indexable # is to actually index them. with pysam.AlignmentFile(filename=file_name) as f: with pysam.AlignmentFile(filename=file_name, check_sq=False) as f: # The only sure thing we know here is that the sort order can't be coordinate return f.header.get("HD", {}).get("SO") == "coordinate" # type: ignore[attr-defined] Loading @@ -1071,7 +1088,7 @@ class BamQuerynameSorted(BamInputSorted): """ # The best way to ensure that BAM files are coordinate-sorted and indexable # is to actually index them. with pysam.AlignmentFile(filename=file_name) as f: with pysam.AlignmentFile(filename=file_name, check_sq=False) as f: return f.header.get("HD", {}).get("SO") != "queryname" # type: ignore[attr-defined] Loading
lib/galaxy/workflow/modules.py +31 −15 Original line number Diff line number Diff line Loading @@ -1647,21 +1647,13 @@ class InputParameterModule(WorkflowModule): ] def execute( self, trans, progress: "WorkflowProgress", invocation_step, use_cached_job: bool = False self, trans, progress: "WorkflowProgress", invocation_step: "WorkflowInvocationStep", use_cached_job: bool = False, ) -> Optional[bool]: step = invocation_step.workflow_step if step.id in progress.inputs_by_step_id: input_value = progress.inputs_by_step_id[step.id] else: input_value = step.state.inputs["input"] if input_value is NO_REPLACEMENT: default_value = step.get_input_default_value(NO_REPLACEMENT) # TODO: look at parameter type and infer if value should be a dictionary # instead. Guessing only field parameter types in CWL branch would have # default as dictionary like this. if not isinstance(default_value, dict): default_value = {"value": default_value} input_value = default_value.get("value", NO_REPLACEMENT) input_value = self.get_input_value(progress, invocation_step) input_param = self.get_runtime_inputs(self)["input"] # TODO: raise DelayedWorkflowEvaluation if replacement not ready ? Need test try: Loading @@ -1681,13 +1673,37 @@ class InputParameterModule(WorkflowModule): except ValueError as e: raise FailWorkflowEvaluation( why=InvocationFailureWorkflowParameterInvalid( reason=FailureReason.workflow_parameter_invalid, workflow_step_id=step.id, details=str(e) reason=FailureReason.workflow_parameter_invalid, workflow_step_id=invocation_step.workflow_step_id, details=str(e), ) ) step_outputs = dict(output=input_value) progress.set_outputs_for_input(invocation_step, step_outputs) return None def get_input_value(self, progress: "WorkflowProgress", invocation_step: "WorkflowInvocationStep"): step = invocation_step.workflow_step if step.id in progress.inputs_by_step_id: input_value = progress.inputs_by_step_id[step.id] else: assert step.state input_value = step.state.inputs["input"] if input_value is NO_REPLACEMENT: default_value = step.get_input_default_value(NO_REPLACEMENT) # TODO: look at parameter type and infer if value should be a dictionary # instead. Guessing only field parameter types in CWL branch would have # default as dictionary like this. if not isinstance(default_value, dict): default_value = {"value": default_value} input_value = default_value.get("value", NO_REPLACEMENT) return input_value def recover_mapping(self, invocation_step: "WorkflowInvocationStep", progress: "WorkflowProgress"): input_value = self.get_input_value(progress, invocation_step) step_outputs = dict(output=input_value) progress.set_outputs_for_input(invocation_step, step_outputs, already_persisted=True) def step_state_to_tool_state(self, state): state = safe_loads(state) default_set, default_value = False, None Loading
lib/galaxy/workflow/run.py +0 −8 Original line number Diff line number Diff line Loading @@ -608,17 +608,9 @@ class WorkflowProgress: outputs[invocation_step.output_value.workflow_output.output_name] = invocation_step.output_value.value self.outputs[step.id] = outputs if not already_persisted: workflow_outputs_by_name = {wo.output_name: wo for wo in step.workflow_outputs} for output_name, output_object in outputs.items(): if hasattr(output_object, "history_content_type"): invocation_step.add_output(output_name, output_object) else: # Add this non-data, non workflow-output output to the workflow outputs. # This is required for recovering the output in the next scheduling iteration, # and should be replaced with a WorkflowInvocationStepOutputValue ASAP. if not workflow_outputs_by_name.get(output_name) and output_object is not NO_REPLACEMENT: workflow_output = model.WorkflowOutput(step, output_name=output_name) step.workflow_outputs.append(workflow_output) for workflow_output in step.workflow_outputs: assert workflow_output.output_name output_name = workflow_output.output_name Loading