Unverified Commit 802d8976 authored by Marius van den Beek's avatar Marius van den Beek Committed by GitHub
Browse files

Merge pull request #14149 from davelopez/avoid_celery_set_meta_on_deferred

[22.05] Fix: skip setting metadata on deferred datasets when changing datatype in bulk
parents 2464f33b c58e3f45
Loading
Loading
Loading
Loading
+19 −10
Original line number Diff line number Diff line
@@ -19,6 +19,7 @@ from galaxy.datatypes import sniff
from galaxy.datatypes.registry import Registry as DatatypesRegistry
from galaxy.jobs import MinimalJobWrapper
from galaxy.managers.collections import DatasetCollectionManager
from galaxy.managers.datasets import DatasetAssociationManager
from galaxy.managers.hdas import HDAManager
from galaxy.managers.lddas import LDDAManager
from galaxy.managers.markdown_util import generate_branded_pdf
@@ -109,7 +110,12 @@ def change_datatype(
    datatype: str,
    model_class: str = "HistoryDatasetAssociation",
):
    dataset_instance = _get_dataset_by_id(hda_manager, ldda_manager, dataset_id, model_class)
    manager = _get_dataset_manager(hda_manager, ldda_manager, model_class)
    dataset_instance = manager.by_id(dataset_id)
    can_change_datatype = manager.ensure_can_change_datatype(dataset_instance, raiseException=False)
    if not can_change_datatype:
        log.info(f"Changing datatype is not allowed for {model_class} {dataset_instance.id}")
        return
    if datatype == "auto":
        path = dataset_instance.dataset.file_name
        datatype = sniff.guess_ext(path, datatypes_registry.sniff_order)
@@ -127,7 +133,12 @@ def set_metadata(
    model_class: str = "HistoryDatasetAssociation",
    overwrite: bool = True,
):
    dataset_instance = _get_dataset_by_id(hda_manager, ldda_manager, dataset_id, model_class)
    manager = _get_dataset_manager(hda_manager, ldda_manager, model_class)
    dataset_instance = manager.by_id(dataset_id)
    can_set_metadata = manager.ensure_can_set_metadata(dataset_instance, raiseException=False)
    if not can_set_metadata:
        log.info(f"Setting metadata is not allowed for {model_class} {dataset_instance.id}")
        return
    try:
        if overwrite:
            hda_manager.overwrite_metadata(dataset_instance)
@@ -140,17 +151,15 @@ def set_metadata(
    sa_session.flush()


def _get_dataset_by_id(
    hda_manager: HDAManager, ldda_manager: LDDAManager, dataset_id: int, model_class: str = "HistoryDatasetAssociation"
) -> model.DatasetInstance:
    dataset: model.DatasetInstance
def _get_dataset_manager(
    hda_manager: HDAManager, ldda_manager: LDDAManager, model_class: str = "HistoryDatasetAssociation"
) -> DatasetAssociationManager:
    if model_class == "HistoryDatasetAssociation":
        dataset = hda_manager.by_id(dataset_id)
        return hda_manager
    elif model_class == "LibraryDatasetDatasetAssociation":
        dataset = ldda_manager.by_id(dataset_id)
        return ldda_manager
    else:
        raise NotImplementedError(f"Cannot set metadata for model_class {model_class}")
    return dataset
        raise NotImplementedError(f"Cannot find manager for model_class {model_class}")


@galaxy_task(bind=True)
+8 −2
Original line number Diff line number Diff line
@@ -382,17 +382,23 @@ class DatasetAssociationManager(
            rval["modify_item_roles"] = modify_item_role_list
        return rval

    def ensure_can_change_datatype(self, dataset: model.DatasetInstance):
    def ensure_can_change_datatype(self, dataset: model.DatasetInstance, raiseException: bool = True) -> bool:
        if not dataset.datatype.is_datatype_change_allowed():
            if not raiseException:
                return False
            raise exceptions.InsufficientPermissionsException(
                f'Changing datatype "{dataset.extension}" is not allowed.'
            )
        return True

    def ensure_can_set_metadata(self, dataset: model.DatasetInstance):
    def ensure_can_set_metadata(self, dataset: model.DatasetInstance, raiseException: bool = True) -> bool:
        if not dataset.ok_to_edit_metadata():
            if not raiseException:
                return False
            raise exceptions.ItemAccessibilityException(
                "This dataset is currently being used as input or output. You cannot change datatype until the jobs have completed or you have canceled them."
            )
        return True

    def detect_datatype(self, trans, dataset_assoc):
        """Sniff and assign the datatype to a given dataset association (ldda or hda)"""
+10 −1
Original line number Diff line number Diff line
@@ -1621,8 +1621,17 @@ class HistoryItemOperator:
        if isinstance(item, HistoryDatasetAssociation):
            self.hda_manager.ensure_can_change_datatype(item)
            self.hda_manager.ensure_can_set_metadata(item)
            is_deferred = item.has_deferred_data
            item.dataset.state = item.dataset.states.SETTING_METADATA
            trans.sa_session.flush()
            if is_deferred:
                if params.datatype == "auto":  # if `auto` just keep the original guessed datatype
                    item.update()  # TODO: remove this `update` when we can properly track the operation results to notify the history
                else:
                    trans.app.datatypes_registry.change_datatype(item, params.datatype)
                item.dataset.state = item.dataset.states.DEFERRED
                trans.sa_session.flush()
            else:
                change_datatype.delay(dataset_id=item.id, datatype=params.datatype)

    def _change_dbkey(self, item: HistoryItemModel, params: ChangeDbkeyOperationParams):
+33 −0
Original line number Diff line number Diff line
@@ -1266,6 +1266,39 @@ class HistoryContentsApiBulkOperationTestCase(ApiTestCase):
                assert item["data_type"] == "galaxy.datatypes.tabular.Tabular"
                assert "metadata_column_names" in item

    def test_bulk_datatype_change_should_skip_set_metadata_on_deferred_data(self):
        with self.dataset_populator.test_history() as history_id:
            details = self.dataset_populator.create_deferred_hda(
                history_id, "https://raw.githubusercontent.com/galaxyproject/galaxy/dev/test-data/1.bed", ext="bed"
            )
            assert details["state"] == "deferred"
            assert details["extension"] == "bed"
            assert details["data_type"] == "galaxy.datatypes.interval.Bed"
            assert "metadata_columns" in details
            assert "metadata_delimiter" in details
            assert "metadata_comment_lines" in details

            new_datatype = "txt"
            payload = {
                "operation": "change_datatype",
                "params": {
                    "type": "change_datatype",
                    "datatype": new_datatype,
                },
            }
            bulk_operation_result = self._apply_bulk_operation(history_id, payload)
            self._assert_bulk_success(bulk_operation_result, expected_success_count=1)

            history_contents = self._get_history_contents(history_id, query="?v=dev&view=detailed")
            for item in history_contents:
                assert item["state"] == "deferred"
                assert item["extension"] == "txt"
                assert item["data_type"] == "galaxy.datatypes.data.Text"
                # It should discard old metadata
                assert "metadata_columns" not in item
                assert "metadata_delimiter" not in item
                assert "metadata_comment_lines" not in item

    @skip_without_tool("cat_data_and_sleep")
    def test_bulk_datatype_change_errors(self):
        with self.dataset_populator.test_history() as history_id: