Unverified Commit 7a0bb123 authored by mvdbeek's avatar mvdbeek
Browse files

Merge branch 'release_25.0' into dev

parents e5ab06ff 0aa5616b
Loading
Loading
Loading
Loading
+2 −41
Original line number Diff line number Diff line
@@ -3550,14 +3550,6 @@ agent-base@^7.0.2:
  dependencies:
    debug "^4.3.4"
aggregate-error@^3.0.0:
  version "3.1.0"
  resolved "https://registry.npmjs.org/aggregate-error/-/aggregate-error-3.1.0.tgz"
  integrity sha512-4I7Td01quW/RpocfNayFdFVk1qSuoh0E7JrbRJ16nH01HhKFQ88INq9Sd+nd72zqRySlr9BmDA8xlEJ6vJMrYA==
  dependencies:
    clean-stack "^2.0.0"
    indent-string "^4.0.0"
aggregate-error@^4.0.0:
  version "4.0.1"
  resolved "https://registry.yarnpkg.com/aggregate-error/-/aggregate-error-4.0.1.tgz#25091fe1573b9e0be892aeda15c7c66a545f758e"
@@ -4378,11 +4370,6 @@ cjs-module-lexer@^1.0.0:
  resolved "https://registry.npmjs.org/cjs-module-lexer/-/cjs-module-lexer-1.2.2.tgz"
  integrity sha512-cOU9usZw8/dXIXKtwa8pM0OTJQuJkxMN6w30csNRUerHfeQ5R6U3kkU/FtJeIf3M202OHfY2U8ccInBG7/xogA==
clean-stack@^2.0.0:
  version "2.2.0"
  resolved "https://registry.npmjs.org/clean-stack/-/clean-stack-2.2.0.tgz"
  integrity sha512-4diC9HaTE+KRAMWhDhrGOECgWZxoevMc5TlkObMqNSsVU62PYzXZ/SMTjzyGAFF1YusgxGcSWTEXBhp0CPwQ1A==
clean-stack@^4.0.0:
  version "4.2.0"
  resolved "https://registry.yarnpkg.com/clean-stack/-/clean-stack-4.2.0.tgz#c464e4cde4ac789f4e0735c5d75beb49d7b30b31"
@@ -5312,20 +5299,6 @@ define-properties@^1.1.3, define-properties@^1.1.4, define-properties@^1.2.0:
    has-property-descriptors "^1.0.0"
    object-keys "^1.1.1"
del@^6.0.0:
  version "6.1.1"
  resolved "https://registry.npmjs.org/del/-/del-6.1.1.tgz"
  integrity sha512-ua8BhapfP0JUJKC/zV9yHHDW/rDoDxP4Zhn3AkA6/xT6gY7jYXJiaeyBZznYVujhZZET+UgcbZiQ7sN3WqcImg==
  dependencies:
    globby "^11.0.1"
    graceful-fs "^4.2.4"
    is-glob "^4.0.1"
    is-path-cwd "^2.2.0"
    is-path-inside "^3.0.2"
    p-map "^4.0.0"
    rimraf "^3.0.2"
    slash "^3.0.0"
delaunator@5:
  version "5.0.0"
  resolved "https://registry.yarnpkg.com/delaunator/-/delaunator-5.0.0.tgz#60f052b28bd91c9b4566850ebf7756efe821d81b"
@@ -6708,7 +6681,7 @@ globalthis@^1.0.3:
  dependencies:
    define-properties "^1.1.3"
globby@^11.0.1, globby@^11.1.0:
globby@^11.1.0:
  version "11.1.0"
  resolved "https://registry.npmjs.org/globby/-/globby-11.1.0.tgz"
  integrity sha512-jhIXaOzy1sb8IyocaruWSn1TjmnBVs8Ayhcy83rmxNJ8q2uWKCAj3CnJY+KpGSXCueAPc0i05kVvVKtP1t9S3g==
@@ -7370,12 +7343,7 @@ is-number@^7.0.0:
  resolved "https://registry.npmjs.org/is-number/-/is-number-7.0.0.tgz"
  integrity sha512-41Cifkg6e8TylSpdtTpeLVMqvSBEVzTttHvERD741+pnZ8ANv0004MRL43QKPDlK9cGvNp6NZWZUBlbGXYxxng==
is-path-cwd@^2.2.0:
  version "2.2.0"
  resolved "https://registry.npmjs.org/is-path-cwd/-/is-path-cwd-2.2.0.tgz"
  integrity sha512-w942bTcih8fdJPJmQHFzkS76NEP8Kzzvmw92cXsazb8intwLqPibPPdXf4ANdKV3rYMuuQYGIWtvz9JilB3NFQ==
is-path-inside@^3.0.2, is-path-inside@^3.0.3:
is-path-inside@^3.0.3:
  version "3.0.3"
  resolved "https://registry.npmjs.org/is-path-inside/-/is-path-inside-3.0.3.tgz"
  integrity sha512-Fd4gABb+ycGAmKou8eMftCupSir5lRxqf4aD/vd0cD2qc4HL07OjCeuHMr8Ro4CoMaeCKDB0/ECBOVWjTwUvPQ==
@@ -9211,13 +9179,6 @@ p-locate@^5.0.0:
  dependencies:
    p-limit "^3.0.2"
p-map@^4.0.0:
  version "4.0.0"
  resolved "https://registry.npmjs.org/p-map/-/p-map-4.0.0.tgz"
  integrity sha512-/bjOqmgETBYB5BoEeGVea8dmvHb2m9GLy1E9W43yeyfP6QQCZGFNa+XRceJEuDB6zqr+gKpIAmlLebMpykw/MQ==
  dependencies:
    aggregate-error "^3.0.0"
p-map@^5.1.0:
  version "5.5.0"
  resolved "https://registry.yarnpkg.com/p-map/-/p-map-5.5.0.tgz#054ca8ca778dfa4cf3f8db6638ccb5b937266715"
+30 −13
Original line number Diff line number Diff line
@@ -549,22 +549,29 @@ class _BamOrSam:
    Helper class to set the metadata common to sam and bam files
    """

    max_references = 100000

    def set_meta(self, dataset: DatasetProtocol, overwrite: bool = True, **kwd) -> None:
        try:
            bam_file = pysam.AlignmentFile(dataset.get_file_name(), mode="rb")
            with pysam.AlignmentFile(dataset.get_file_name(), mode="rb", check_sq=False) as bam_file:
                # TODO: Reference names, lengths, read_groups and headers can become very large, truncate when necessary
                if bam_file.nreferences <= self.max_references:
                    dataset.metadata.reference_names = list(bam_file.references)
                    dataset.metadata.reference_lengths = list(bam_file.lengths)
                    dataset.metadata.bam_header = dict(bam_file.header.items())  # type: ignore [attr-defined]
                    dataset.metadata.read_groups = [
                read_group["ID"] for read_group in dataset.metadata.bam_header.get("RG", []) if "ID" in read_group
                        read_group["ID"]
                        for read_group in dataset.metadata.bam_header.get("RG", [])
                        if "ID" in read_group
                    ]
            dataset.metadata.sort_order = dataset.metadata.bam_header.get("HD", {}).get("SO", None)
            dataset.metadata.bam_version = dataset.metadata.bam_header.get("HD", {}).get("VN", None)
                else:
                    dataset.metadata.metadata_incomplete = True
                dataset.metadata.sort_order = bam_file.header.get("HD", {}).get("SO", None)  # type: ignore [attr-defined]
                dataset.metadata.bam_version = bam_file.header.get("HD", {}).get("VN", None)  # type: ignore [attr-defined]
        except Exception:
            # Per Dan, don't log here because doing so will cause datasets that
            # fail metadata to end in the error state
            pass
            dataset.metadata.metadata_incomplete = True


class BamNative(CompressedArchive, _BamOrSam):
@@ -653,6 +660,16 @@ class BamNative(CompressedArchive, _BamOrSam):
        optional=True,
        no_value={},
    )
    MetadataElement(
        name="metadata_incomplete",
        default=False,
        desc="Indicates if metadata is incomplete",
        param=MetadataParameter,
        readonly=True,
        visible=False,
        optional=True,
        no_value=False,
    )

    def set_meta(self, dataset: DatasetProtocol, overwrite: bool = True, **kwd) -> None:
        _BamOrSam().set_meta(dataset, overwrite=overwrite, **kwd)
@@ -1051,7 +1068,7 @@ class BamInputSorted(BamNative):
        """
        # The best way to ensure that BAM files are coordinate-sorted and indexable
        # is to actually index them.
        with pysam.AlignmentFile(filename=file_name) as f:
        with pysam.AlignmentFile(filename=file_name, check_sq=False) as f:
            # The only sure thing we know here is that the sort order can't be coordinate
            return f.header.get("HD", {}).get("SO") == "coordinate"  # type: ignore[attr-defined]

@@ -1071,7 +1088,7 @@ class BamQuerynameSorted(BamInputSorted):
        """
        # The best way to ensure that BAM files are coordinate-sorted and indexable
        # is to actually index them.
        with pysam.AlignmentFile(filename=file_name) as f:
        with pysam.AlignmentFile(filename=file_name, check_sq=False) as f:
            return f.header.get("HD", {}).get("SO") != "queryname"  # type: ignore[attr-defined]


+31 −15
Original line number Diff line number Diff line
@@ -1647,21 +1647,13 @@ class InputParameterModule(WorkflowModule):
        ]

    def execute(
        self, trans, progress: "WorkflowProgress", invocation_step, use_cached_job: bool = False
        self,
        trans,
        progress: "WorkflowProgress",
        invocation_step: "WorkflowInvocationStep",
        use_cached_job: bool = False,
    ) -> Optional[bool]:
        step = invocation_step.workflow_step
        if step.id in progress.inputs_by_step_id:
            input_value = progress.inputs_by_step_id[step.id]
        else:
            input_value = step.state.inputs["input"]
        if input_value is NO_REPLACEMENT:
            default_value = step.get_input_default_value(NO_REPLACEMENT)
            # TODO: look at parameter type and infer if value should be a dictionary
            # instead. Guessing only field parameter types in CWL branch would have
            # default as dictionary like this.
            if not isinstance(default_value, dict):
                default_value = {"value": default_value}
            input_value = default_value.get("value", NO_REPLACEMENT)
        input_value = self.get_input_value(progress, invocation_step)
        input_param = self.get_runtime_inputs(self)["input"]
        # TODO: raise DelayedWorkflowEvaluation if replacement not ready ? Need test
        try:
@@ -1681,13 +1673,37 @@ class InputParameterModule(WorkflowModule):
        except ValueError as e:
            raise FailWorkflowEvaluation(
                why=InvocationFailureWorkflowParameterInvalid(
                    reason=FailureReason.workflow_parameter_invalid, workflow_step_id=step.id, details=str(e)
                    reason=FailureReason.workflow_parameter_invalid,
                    workflow_step_id=invocation_step.workflow_step_id,
                    details=str(e),
                )
            )
        step_outputs = dict(output=input_value)
        progress.set_outputs_for_input(invocation_step, step_outputs)
        return None

    def get_input_value(self, progress: "WorkflowProgress", invocation_step: "WorkflowInvocationStep"):
        step = invocation_step.workflow_step
        if step.id in progress.inputs_by_step_id:
            input_value = progress.inputs_by_step_id[step.id]
        else:
            assert step.state
            input_value = step.state.inputs["input"]
        if input_value is NO_REPLACEMENT:
            default_value = step.get_input_default_value(NO_REPLACEMENT)
            # TODO: look at parameter type and infer if value should be a dictionary
            # instead. Guessing only field parameter types in CWL branch would have
            # default as dictionary like this.
            if not isinstance(default_value, dict):
                default_value = {"value": default_value}
            input_value = default_value.get("value", NO_REPLACEMENT)
        return input_value

    def recover_mapping(self, invocation_step: "WorkflowInvocationStep", progress: "WorkflowProgress"):
        input_value = self.get_input_value(progress, invocation_step)
        step_outputs = dict(output=input_value)
        progress.set_outputs_for_input(invocation_step, step_outputs, already_persisted=True)

    def step_state_to_tool_state(self, state):
        state = safe_loads(state)
        default_set, default_value = False, None
+0 −8
Original line number Diff line number Diff line
@@ -608,17 +608,9 @@ class WorkflowProgress:
            outputs[invocation_step.output_value.workflow_output.output_name] = invocation_step.output_value.value
        self.outputs[step.id] = outputs
        if not already_persisted:
            workflow_outputs_by_name = {wo.output_name: wo for wo in step.workflow_outputs}
            for output_name, output_object in outputs.items():
                if hasattr(output_object, "history_content_type"):
                    invocation_step.add_output(output_name, output_object)
                else:
                    # Add this non-data, non workflow-output output to the workflow outputs.
                    # This is required for recovering the output in the next scheduling iteration,
                    # and should be replaced with a WorkflowInvocationStepOutputValue ASAP.
                    if not workflow_outputs_by_name.get(output_name) and output_object is not NO_REPLACEMENT:
                        workflow_output = model.WorkflowOutput(step, output_name=output_name)
                        step.workflow_outputs.append(workflow_output)
            for workflow_output in step.workflow_outputs:
                assert workflow_output.output_name
                output_name = workflow_output.output_name