Merge pull request #20319 from mvdbeek/performance_fix_deleted_output_check (193affd2) · Commits · NDIP / Galaxy

lib/galaxy/managers/jobs.py

+684 −112

File changed.

Preview size limit exceeded, changes collapsed.

lib/galaxy/model/init.py

+0 −27

Original line number	Diff line number	Diff line
		@@ -1614,9 +1614,6 @@ class Job(Base, JobLike, UsesCreateAndUpdateTime, Dictifiable, Serializable):
		back_populates="job", uselist=False
		)

		any_output_dataset_collection_instances_deleted = None
		any_output_dataset_deleted = None

		dict_collection_visible_keys = ["id", "state", "exit_code", "update_time", "create_time", "galaxy_version"]
		dict_element_visible_keys = [
		"id",
		@@ -12082,30 +12079,6 @@ mapper_registry.map_imperatively(
		# ----------------------------------------------------------------------------------------
		# The following statements must not precede the mapped models defined above.

		Job.any_output_dataset_collection_instances_deleted = deferred(
		column_property( # type:ignore[assignment]
		exists(HistoryDatasetCollectionAssociation.id).where(
		and_(
		Job.id == JobToOutputDatasetCollectionAssociation.job_id,
		HistoryDatasetCollectionAssociation.id == JobToOutputDatasetCollectionAssociation.dataset_collection_id,
		HistoryDatasetCollectionAssociation.deleted == true(),
		)
		),
		)
		)

		Job.any_output_dataset_deleted = deferred(
		column_property( # type:ignore[assignment]
		exists(HistoryDatasetAssociation.id).where(
		and_(
		Job.id == JobToOutputDatasetAssociation.job_id,
		HistoryDatasetAssociation.table.c.id == JobToOutputDatasetAssociation.dataset_id,
		HistoryDatasetAssociation.table.c.deleted == true(),
		)
		),
		)
		)

		History.average_rating = column_property( # type:ignore[assignment]
		select(func.avg(HistoryRatingAssociation.rating))
		.where(HistoryRatingAssociation.history_id == History.id)

lib/galaxy_test/api/test_tools.py

+52 −0

Original line number	Diff line number	Diff line
		@@ -1056,6 +1056,41 @@ class TestToolsApi(ApiTestCase, TestsTools):
		assert len(filenames) == 3, filenames
		assert len(set(filenames)) <= 2, filenames

		@skip_without_tool("cat_list")
		@skip_without_tool("__SORTLIST__")
		def test_run_cat_list_hdca_sort_order_respecrted_use_cached_job(self):
		with self.dataset_populator.test_history_for(
		self.test_run_cat_list_hdca_sort_order_respecrted_use_cached_job
		) as history_id:
		fetch_response = self.dataset_collection_populator.create_list_in_history(
		history_id, wait=True, contents=[("C", "3"), ("B", "2"), ("A", "1")]
		).json()
		hdca_not_sorted_id = fetch_response["output_collections"][0]["id"]
		result = self._run(
		tool_id="__SORTLIST__",
		history_id=history_id,
		inputs={"input": {"src": "hdca", "id": hdca_not_sorted_id}},
		assert_ok=True,
		)
		hdca_sorted_id = result["output_collections"][0]["id"]
		self.dataset_populator.get_history_collection_details(history_id, content_id=hdca_sorted_id)
		hdca_sorted = self.dataset_populator.get_history_collection_details(history_id, content_id=hdca_sorted_id)
		hdca_not_sorted = self.dataset_populator.get_history_collection_details(
		history_id, content_id=hdca_not_sorted_id
		)
		assert hdca_sorted["elements"][0]["object"]["name"] == "A"
		assert hdca_not_sorted["elements"][0]["object"]["name"] == "C"
		self._run("cat_list", history_id, inputs={"input1": {"src": "hdca", "id": hdca_sorted_id}}, assert_ok=True)
		job = self._run(
		"cat_list",
		history_id,
		inputs={"input1": {"src": "hdca", "id": hdca_not_sorted_id}},
		assert_ok=True,
		use_cached_job=True,
		)
		job_details = self.dataset_populator.get_job_details(job["jobs"][0]["id"], full=True).json()
		assert not job_details["copied_from_job_id"]

		@skip_without_tool("cat1")
		@requires_new_history
		def test_run_cat1_use_cached_job_from_public_history(self):
		@@ -1097,6 +1132,23 @@ class TestToolsApi(ApiTestCase, TestsTools):
		job_details = self.dataset_populator.get_job_details(copied_job_id, full=True).json()
		assert job_details["copied_from_job_id"] == outputs_one["jobs"][0]["id"]

		@skip_without_tool("cat_list")
		@requires_new_history
		def test_run_cat_list_use_cached_job_repeated_input(self):
		with self.dataset_populator.test_history_for(
		self.test_run_cat_list_use_cached_job_repeated_input
		) as history_id:
		# Run simple non-upload tool with an input data parameter.
		input_value = dataset_to_param(self.dataset_populator.new_dataset(history_id=history_id))
		inputs = {"input1": {"batch": False, "values": [input_value, input_value]}}
		outputs_one = self._run("cat_list", history_id, inputs, assert_ok=True, wait_for_job=True)
		outputs_two = self._run(
		"cat_list", history_id, inputs, assert_ok=True, wait_for_job=True, use_cached_job=True
		)
		copied_job_id = outputs_two["jobs"][0]["id"]
		job_details = self.dataset_populator.get_job_details(copied_job_id, full=True).json()
		assert job_details["copied_from_job_id"] == outputs_one["jobs"][0]["id"]

		@skip_without_tool("collection_creates_list")
		@requires_new_history
		def test_run_collection_creates_list_use_cached_job(self):