Unverified Commit e3e61fb7 authored by Marius van den Beek's avatar Marius van den Beek Committed by GitHub
Browse files

Merge pull request #15305 from davelopez/22.05_fix_bulk_dataset_purge

[22.05] Fix immediate dataset purge in bulk
parents ca77e184 543bb905
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
@@ -557,7 +557,7 @@ class DatasetsService(ServiceBase, UsesVisualizationMixin):
                if dataset.src == DatasetSourceType.hda:
                    self.hda_manager.error_if_uploading(dataset_instance)
                if payload.purge:
                    manager.purge(dataset_instance, flush=False)
                    manager.purge(dataset_instance, flush=True)
                else:
                    manager.delete(dataset_instance, flush=False)
                success_count += 1
+1 −1
Original line number Diff line number Diff line
@@ -1609,7 +1609,7 @@ class HistoryItemOperator:
            return
        if isinstance(item, HistoryDatasetCollectionAssociation):
            return self.dataset_collection_manager.delete(trans, "history", item.id, recursive=True, purge=True)
        self.hda_manager.purge(item, flush=self.flush)
        self.hda_manager.purge(item, flush=True)

    def _change_datatype(
        self, item: HistoryItemModel, params: ChangeDatatypeOperationParams, trans: ProvidesHistoryContext
+2 −0
Original line number Diff line number Diff line
@@ -446,6 +446,8 @@ class DatasetsApiTestCase(ApiTestCase):
            hda = self.dataset_populator.new_dataset(history_id)
            dataset_map[index] = hda["id"]

        self.dataset_populator.wait_for_history(history_id)

        expected_deleted_source_ids = [
            {"id": dataset_map[1], "src": "hda"},
            {"id": dataset_map[2], "src": "hda"},
+81 −0
Original line number Diff line number Diff line
import os
from typing import (
    Callable,
    Optional,
)

from galaxy_test.base.populators import DatasetPopulator
from galaxy_test.driver import integration_util


class PurgeDatasetsIntegrationTestCase(integration_util.IntegrationTestCase):
    def setUp(self):
        super().setUp()
        self.dataset_populator = DatasetPopulator(self.galaxy_interactor)
        self.history_id = self.dataset_populator.new_history()

    @classmethod
    def handle_galaxy_config_kwds(cls, config):
        super().handle_galaxy_config_kwds(config)
        config["allow_user_dataset_purge"] = True

    def test_purge_dataset_batch_removes_underlying_dataset_from_disk(self):
        self._expect_dataset_purged_on(self._purge_hda_using_batch)

    def test_purge_history_content_bulk_removes_underlying_dataset_from_disk(self):
        self._expect_dataset_purged_on(self._purge_hda_using_bulk)

    def _expect_dataset_purged_on(self, purge_operation: Callable):
        hda = self.dataset_populator.new_dataset(self.history_id, wait=True)
        hda_id = hda["id"]

        # Ensure dataset file exists on disk
        dataset_file = self._get_underlying_dataset_on_disk(hda_id)
        assert self._file_exists_on_disk(dataset_file)

        # Purge dataset
        purge_operation(hda_id)

        # Ensure dataset is purged
        self.dataset_populator.wait_for_purge(self.history_id, hda_id)

        # Ensure dataset file is removed from disk after purge
        assert not self._file_exists_on_disk(dataset_file)

    def _purge_hda_using_batch(self, hda_id):
        payload = {
            "purge": True,
            "datasets": [
                {"id": hda_id, "src": "hda"},
            ],
        }
        purge_response = self._delete("datasets", data=payload, json=True)
        self._assert_status_code_is_ok(purge_response)
        purge_result = purge_response.json()
        assert purge_result["success_count"] == 1

    def _purge_hda_using_bulk(self, hda_id):
        payload = {
            "operation": "purge",
            "items": [
                {
                    "id": hda_id,
                    "history_content_type": "dataset",
                },
            ],
        }
        purge_response = self._put(
            f"histories/{self.history_id}/contents/bulk",
            data=payload,
            json=True,
        )
        self._assert_status_code_is_ok(purge_response)
        purge_result = purge_response.json()
        assert purge_result["success_count"] == 1

    def _get_underlying_dataset_on_disk(self, hda_id: str) -> Optional[str]:
        detailed_response = self._get(f"datasets/{hda_id}", admin=True).json()
        return detailed_response.get("file_name")

    def _file_exists_on_disk(self, filename: Optional[str]) -> bool:
        return os.path.isfile(filename) if filename else False