Unverified Commit 193affd2 authored by Ahmed Hamid Awan's avatar Ahmed Hamid Awan Committed by GitHub
Browse files

Merge pull request #20319 from mvdbeek/performance_fix_deleted_output_check

[25.0] Improve performance of job cache query
parents 38f322f2 32914f2e
Loading
Loading
Loading
Loading
+684 −112

File changed.

Preview size limit exceeded, changes collapsed.

+0 −27
Original line number Diff line number Diff line
@@ -1614,9 +1614,6 @@ class Job(Base, JobLike, UsesCreateAndUpdateTime, Dictifiable, Serializable):
        back_populates="job", uselist=False
    )

    any_output_dataset_collection_instances_deleted = None
    any_output_dataset_deleted = None

    dict_collection_visible_keys = ["id", "state", "exit_code", "update_time", "create_time", "galaxy_version"]
    dict_element_visible_keys = [
        "id",
@@ -12082,30 +12079,6 @@ mapper_registry.map_imperatively(
# ----------------------------------------------------------------------------------------
# The following statements must not precede the mapped models defined above.

Job.any_output_dataset_collection_instances_deleted = deferred(
    column_property(  # type:ignore[assignment]
        exists(HistoryDatasetCollectionAssociation.id).where(
            and_(
                Job.id == JobToOutputDatasetCollectionAssociation.job_id,
                HistoryDatasetCollectionAssociation.id == JobToOutputDatasetCollectionAssociation.dataset_collection_id,
                HistoryDatasetCollectionAssociation.deleted == true(),
            )
        ),
    )
)

Job.any_output_dataset_deleted = deferred(
    column_property(  # type:ignore[assignment]
        exists(HistoryDatasetAssociation.id).where(
            and_(
                Job.id == JobToOutputDatasetAssociation.job_id,
                HistoryDatasetAssociation.table.c.id == JobToOutputDatasetAssociation.dataset_id,
                HistoryDatasetAssociation.table.c.deleted == true(),
            )
        ),
    )
)

History.average_rating = column_property(  # type:ignore[assignment]
    select(func.avg(HistoryRatingAssociation.rating))
    .where(HistoryRatingAssociation.history_id == History.id)
+52 −0
Original line number Diff line number Diff line
@@ -1056,6 +1056,41 @@ class TestToolsApi(ApiTestCase, TestsTools):
            assert len(filenames) == 3, filenames
            assert len(set(filenames)) <= 2, filenames

    @skip_without_tool("cat_list")
    @skip_without_tool("__SORTLIST__")
    def test_run_cat_list_hdca_sort_order_respecrted_use_cached_job(self):
        with self.dataset_populator.test_history_for(
            self.test_run_cat_list_hdca_sort_order_respecrted_use_cached_job
        ) as history_id:
            fetch_response = self.dataset_collection_populator.create_list_in_history(
                history_id, wait=True, contents=[("C", "3"), ("B", "2"), ("A", "1")]
            ).json()
            hdca_not_sorted_id = fetch_response["output_collections"][0]["id"]
            result = self._run(
                tool_id="__SORTLIST__",
                history_id=history_id,
                inputs={"input": {"src": "hdca", "id": hdca_not_sorted_id}},
                assert_ok=True,
            )
            hdca_sorted_id = result["output_collections"][0]["id"]
            self.dataset_populator.get_history_collection_details(history_id, content_id=hdca_sorted_id)
            hdca_sorted = self.dataset_populator.get_history_collection_details(history_id, content_id=hdca_sorted_id)
            hdca_not_sorted = self.dataset_populator.get_history_collection_details(
                history_id, content_id=hdca_not_sorted_id
            )
            assert hdca_sorted["elements"][0]["object"]["name"] == "A"
            assert hdca_not_sorted["elements"][0]["object"]["name"] == "C"
            self._run("cat_list", history_id, inputs={"input1": {"src": "hdca", "id": hdca_sorted_id}}, assert_ok=True)
            job = self._run(
                "cat_list",
                history_id,
                inputs={"input1": {"src": "hdca", "id": hdca_not_sorted_id}},
                assert_ok=True,
                use_cached_job=True,
            )
            job_details = self.dataset_populator.get_job_details(job["jobs"][0]["id"], full=True).json()
            assert not job_details["copied_from_job_id"]

    @skip_without_tool("cat1")
    @requires_new_history
    def test_run_cat1_use_cached_job_from_public_history(self):
@@ -1097,6 +1132,23 @@ class TestToolsApi(ApiTestCase, TestsTools):
            job_details = self.dataset_populator.get_job_details(copied_job_id, full=True).json()
            assert job_details["copied_from_job_id"] == outputs_one["jobs"][0]["id"]

    @skip_without_tool("cat_list")
    @requires_new_history
    def test_run_cat_list_use_cached_job_repeated_input(self):
        with self.dataset_populator.test_history_for(
            self.test_run_cat_list_use_cached_job_repeated_input
        ) as history_id:
            # Run simple non-upload tool with an input data parameter.
            input_value = dataset_to_param(self.dataset_populator.new_dataset(history_id=history_id))
            inputs = {"input1": {"batch": False, "values": [input_value, input_value]}}
            outputs_one = self._run("cat_list", history_id, inputs, assert_ok=True, wait_for_job=True)
            outputs_two = self._run(
                "cat_list", history_id, inputs, assert_ok=True, wait_for_job=True, use_cached_job=True
            )
            copied_job_id = outputs_two["jobs"][0]["id"]
            job_details = self.dataset_populator.get_job_details(copied_job_id, full=True).json()
            assert job_details["copied_from_job_id"] == outputs_one["jobs"][0]["id"]

    @skip_without_tool("collection_creates_list")
    @requires_new_history
    def test_run_collection_creates_list_use_cached_job(self):