Loading lib/galaxy/managers/jobs.py +69 −46 Original line number Diff line number Diff line Loading @@ -466,9 +466,9 @@ class JobSearch: data_types = [] used_ids: List = [] for k, input_list in input_data.items(): # k will be matched against the JobParameter.name column. This can be prefixed depending on whethter # k will be matched against the JobParameter.name column. This can be prefixed depending on whether # the input is in a repeat, or not (section and conditional) for type_values in input_list: for value_index, type_values in enumerate(input_list): t = type_values["src"] v = type_values["id"] requested_ids.append(v) Loading @@ -476,14 +476,25 @@ class JobSearch: identifier = type_values["identifier"] if t == "hda": stmt = self._build_stmt_for_hda( stmt, data_conditions, used_ids, k, v, identifier, require_name_match=require_name_match stmt, data_conditions, used_ids, k, v, identifier, require_name_match=require_name_match, value_index=value_index, ) elif t == "ldda": stmt = self._build_stmt_for_ldda(stmt, data_conditions, used_ids, k, v) stmt = self._build_stmt_for_ldda(stmt, data_conditions, used_ids, k, v, value_index=value_index) elif t == "hdca": stmt = self._build_stmt_for_hdca(stmt, data_conditions, used_ids, k, v, user.id) stmt = self._build_stmt_for_hdca( stmt, data_conditions, used_ids, k, v, user.id, value_index=value_index ) elif t == "dce": stmt = self._build_stmt_for_dce(stmt, data_conditions, used_ids, k, v, user.id) stmt = self._build_stmt_for_dce( stmt, data_conditions, used_ids, k, v, user.id, value_index=value_index ) else: log.error("Unknown input data type %s", t) return None Loading Loading @@ -670,13 +681,15 @@ class JobSearch: ) return final_ordered_stmt def _build_stmt_for_hda(self, stmt, data_conditions, used_ids, k, v, identifier, require_name_match=True): def _build_stmt_for_hda( self, stmt, data_conditions, used_ids, k, v, identifier, value_index, require_name_match=True ): a = aliased(model.JobToInputDatasetAssociation) b = aliased(model.HistoryDatasetAssociation) c = aliased(model.HistoryDatasetAssociation) d = aliased(model.JobParameter) e = aliased(model.HistoryDatasetAssociationHistory) labeled_col = a.dataset_id.label(f"{k}_{v}") labeled_col = a.dataset_id.label(f"{k}_{value_index}") stmt = stmt.add_columns(labeled_col) used_ids.append(labeled_col) stmt = stmt.join(a, a.job_id == model.Job.id) Loading Loading @@ -725,16 +738,18 @@ class JobSearch: ) return stmt def _build_stmt_for_ldda(self, stmt, data_conditions, used_ids, k, v): def _build_stmt_for_ldda(self, stmt, data_conditions, used_ids, k, v, value_index): a = aliased(model.JobToInputLibraryDatasetAssociation) labeled_col = a.ldda_id.label(f"{k}_{v}") labeled_col = a.ldda_id.label(f"{k}_{value_index}") stmt = stmt.add_columns(labeled_col) stmt = stmt.join(a, a.job_id == model.Job.id) data_conditions.append(and_(a.name == k, a.ldda_id == v)) used_ids.append(labeled_col) return stmt def _build_stmt_for_hdca(self, stmt, data_conditions, used_ids, k, v, user_id, require_name_match=True): def _build_stmt_for_hdca( self, stmt, data_conditions, used_ids, k, v, user_id, value_index, require_name_match=True ): # Strategy for efficiently finding equivalent HDCAs: # 1. Determine the structural depth of the target HDCA by its collection_type. # 2. For the target HDCA (identified by 'v'): Loading Loading @@ -763,9 +778,12 @@ class JobSearch: ) depth = collection_type.count(":") if collection_type else 0 a = aliased(model.JobToInputDatasetCollectionAssociation, name=f"job_to_input_dataset_collection_1_{k}_{v}") a = aliased( model.JobToInputDatasetCollectionAssociation, name=f"job_to_input_dataset_collection_1_{k}_{value_index}" ) hdca_input = aliased( model.HistoryDatasetCollectionAssociation, name=f"history_dataset_collection_association_1_{k}_{v}" model.HistoryDatasetCollectionAssociation, name=f"history_dataset_collection_association_1_{k}_{value_index}", ) _hdca_target_cte_ref = aliased(model.HistoryDatasetCollectionAssociation, name="_hdca_target_cte_ref") Loading Loading @@ -798,7 +816,7 @@ class JobSearch: .where(_hdca_target_cte_ref.id == v) .distinct() ) reference_all_dataset_ids_cte = reference_all_dataset_ids_select.cte(f"ref_all_ds_ids_{k}_{v}") reference_all_dataset_ids_cte = reference_all_dataset_ids_select.cte(f"ref_all_ds_ids_{k}_{value_index}") # --- END NEW CTE --- # CTE 1: signature_elements_cte (for the reference HDCA) Loading Loading @@ -829,7 +847,7 @@ class JobSearch: _hda_cte_ref, _hda_cte_ref.id == _leaf_cte_ref.hda_id ) signature_elements_select = signature_elements_select.where(_hdca_target_cte_ref.id == v) signature_elements_cte = signature_elements_select.cte(f"signature_elements_{k}_{v}") signature_elements_cte = signature_elements_select.cte(f"signature_elements_{k}_{value_index}") # CTE 2: reference_full_signature_cte # This CTE aggregates the path signature strings of the reference HDCA into a Loading @@ -843,7 +861,7 @@ class JobSearch: ).label("signature_array") ) .select_from(signature_elements_cte) .cte(f"reference_full_signature_{k}_{v}") .cte(f"reference_full_signature_{k}_{value_index}") ) candidate_hdca = aliased(model.HistoryDatasetCollectionAssociation, name="candidate_hdca") Loading Loading @@ -883,7 +901,7 @@ class JobSearch: .where(candidate_hda.dataset_id.in_(select(reference_all_dataset_ids_cte.c.ref_dataset_id_for_overlap))) ) candidate_hdca_pre_filter_ids_cte = candidate_hdca_pre_filter_ids_select.cte( f"cand_hdca_pre_filter_ids_{k}_{v}" f"cand_hdca_pre_filter_ids_{k}_{value_index}" ) # --- END NEW CTE --- Loading Loading @@ -919,7 +937,7 @@ class JobSearch: candidate_hda, candidate_hda.id == _leaf_candidate_dce.hda_id ) candidate_signature_elements_cte = candidate_signature_elements_select.cte( f"candidate_signature_elements_{k}_{v}" f"candidate_signature_elements_{k}_{value_index}" ) # CTE 4: candidate_full_signatures_cte Loading @@ -936,7 +954,7 @@ class JobSearch: ) .select_from(candidate_signature_elements_cte) .group_by(candidate_signature_elements_cte.c.candidate_hdca_id) .cte(f"candidate_full_signatures_{k}_{v}") .cte(f"candidate_full_signatures_{k}_{value_index}") ) # CTE 5: equivalent_hdca_ids_cte Loading @@ -948,13 +966,13 @@ class JobSearch: candidate_full_signatures_cte.c.full_signature_array == select(reference_full_signature_cte.c.signature_array).scalar_subquery() ) .cte(f"equivalent_hdca_ids_{k}_{v}") .cte(f"equivalent_hdca_ids_{k}_{value_index}") ) # Main query `stmt` construction # This section joins the base job statement with the associations and then filters # by the HDCAs identified as equivalent in the CTEs. labeled_col = a.dataset_collection_id.label(f"{k}_{v}") labeled_col = a.dataset_collection_id.label(f"{k}_{value_index}") stmt = stmt.add_columns(labeled_col) stmt = stmt.join(a, a.job_id == model.Job.id) Loading @@ -972,7 +990,7 @@ class JobSearch: data_conditions.append(a.name == k) return stmt def _build_stmt_for_dce(self, stmt, data_conditions, used_ids, k, v, user_id): def _build_stmt_for_dce(self, stmt, data_conditions, used_ids, k, v, user_id, value_index): dce_root_target = self.sa_session.get_one(model.DatasetCollectionElement, v) # Determine if the target DCE points to an HDA or a child collection Loading @@ -987,15 +1005,18 @@ class JobSearch: depth = collection_type.count(":") if collection_type else 0 # Aliases for the target DCE's collection structure _dce_target_root_ref = aliased(model.DatasetCollectionElement, name=f"_dce_target_root_ref_{k}_{v}") _dce_target_root_ref = aliased( model.DatasetCollectionElement, name=f"_dce_target_root_ref_{k}_{value_index}" ) _dce_target_child_collection_ref = aliased( model.DatasetCollection, name=f"_dce_target_child_collection_ref_{k}_{v}" model.DatasetCollection, name=f"_dce_target_child_collection_ref_{k}_{value_index}" ) # List of aliases for each potential nested level of DatasetCollectionElements _dce_target_level_list = [ aliased(model.DatasetCollectionElement, name=f"_dce_target_level_{k}_{v}_{i}") for i in range(depth + 1) aliased(model.DatasetCollectionElement, name=f"_dce_target_level_{k}_{value_index}_{i}") for i in range(depth + 1) ] _hda_target_ref = aliased(model.HistoryDatasetAssociation, name=f"_hda_target_ref_{k}_{v}") _hda_target_ref = aliased(model.HistoryDatasetAssociation, name=f"_hda_target_ref_{k}_{value_index}") # --- CTE: reference_dce_all_dataset_ids_cte --- # This CTE (Common Table Expression) identifies all distinct dataset IDs Loading Loading @@ -1028,7 +1049,7 @@ class JobSearch: .where(_dce_target_root_ref.id == v) .distinct() ) reference_all_dataset_ids_cte = reference_all_dataset_ids_select.cte(f"ref_all_ds_ids_{k}_{v}") reference_all_dataset_ids_cte = reference_all_dataset_ids_select.cte(f"ref_all_ds_ids_{k}_{value_index}") # --- CTE: reference_dce_signature_elements_cte --- # This CTE generates a "path signature string" for each individual element Loading Loading @@ -1069,7 +1090,7 @@ class JobSearch: _hda_target_ref, _hda_target_ref.id == _leaf_target_dce_ref.hda_id ).where(_dce_target_root_ref.id == v) reference_dce_signature_elements_cte = reference_dce_signature_elements_select.cte( f"ref_dce_sig_els_{k}_{v}" f"ref_dce_sig_els_{k}_{value_index}" ) # --- CTE: reference_full_signature_cte --- Loading @@ -1093,7 +1114,7 @@ class JobSearch: ), # Count elements based on path_signature_string ) .select_from(reference_dce_signature_elements_cte) .cte(f"ref_dce_full_sig_{k}_{v}") .cte(f"ref_dce_full_sig_{k}_{value_index}") ) # --- Aliases for Candidate Dataset Collection Structure --- Loading @@ -1101,14 +1122,14 @@ class JobSearch: # in the database, which will be compared against the reference. candidate_dce_root = aliased(model.DatasetCollectionElement, name=f"candidate_dce_root_{k}_{v}") candidate_dce_child_collection = aliased( model.DatasetCollection, name=f"candidate_dce_child_collection_{k}_{v}" model.DatasetCollection, name=f"candidate_dce_child_collection_{k}_{value_index}" ) candidate_dce_level_list = [ aliased(model.DatasetCollectionElement, name=f"candidate_dce_level_{k}_{v}_{i}") aliased(model.DatasetCollectionElement, name=f"candidate_dce_level_{k}_{value_index}_{i}") for i in range(depth + 1) ] candidate_hda = aliased(model.HistoryDatasetAssociation, name=f"candidate_hda_{k}_{v}") candidate_history = aliased(model.History, name=f"candidate_history_{k}_{v}") candidate_hda = aliased(model.HistoryDatasetAssociation, name=f"candidate_hda_{k}_{value_index}") candidate_history = aliased(model.History, name=f"candidate_history_{k}_{value_index}") # --- CTE: candidate_dce_pre_filter_ids_cte (Initial Candidate Filtering) --- # This CTE performs a first pass to quickly narrow down potential candidate Loading Loading @@ -1146,7 +1167,7 @@ class JobSearch: .where(candidate_hda.dataset_id.in_(select(reference_all_dataset_ids_cte.c.ref_dataset_id_for_overlap))) ) candidate_dce_pre_filter_ids_cte = candidate_dce_pre_filter_ids_select.cte( f"cand_dce_pre_filter_ids_{k}_{v}" f"cand_dce_pre_filter_ids_{k}_{value_index}" ) # --- CTE: candidate_dce_signature_elements_cte --- Loading Loading @@ -1195,7 +1216,7 @@ class JobSearch: .where(or_(candidate_history.published == true(), candidate_history.user_id == user_id)) ) candidate_dce_signature_elements_cte = candidate_dce_signature_elements_select.cte( f"cand_dce_sig_els_{k}_{v}" f"cand_dce_sig_els_{k}_{value_index}" ) # --- CTE: candidate_pre_signatures_cte (Candidate Aggregation for Comparison) --- Loading @@ -1218,7 +1239,7 @@ class JobSearch: ) .select_from(candidate_dce_signature_elements_cte) .group_by(candidate_dce_signature_elements_cte.c.candidate_dce_id) .cte(f"cand_dce_pre_sig_{k}_{v}") .cte(f"cand_dce_pre_sig_{k}_{value_index}") ) # --- CTE: filtered_cand_dce_by_dataset_ids_cte (Filtering by Element Count and Dataset ID Array) --- Loading @@ -1238,7 +1259,7 @@ class JobSearch: == reference_full_signature_cte.c.ordered_dataset_id_array, ) ) .cte(f"filtered_cand_dce_{k}_{v}") .cte(f"filtered_cand_dce_{k}_{value_index}") ) # --- CTE: final_candidate_signatures_cte (Final Full Signature Calculation for Matched Candidates) --- Loading @@ -1261,16 +1282,17 @@ class JobSearch: ) ) .group_by(candidate_dce_signature_elements_cte.c.candidate_dce_id) .cte(f"final_cand_dce_full_sig_{k}_{v}") .cte(f"final_cand_dce_full_sig_{k}_{value_index}") ) # --- Main Query Construction for Dataset Collection Elements --- # This section joins the main `stmt` (representing jobs) with the CTEs # to filter jobs whose input DCE matches the reference DCE's full signature. a = aliased( model.JobToInputDatasetCollectionElementAssociation, name=f"job_to_input_dce_association_{k}_{v}" model.JobToInputDatasetCollectionElementAssociation, name=f"job_to_input_dce_association_{k}_{value_index}", ) labeled_col = a.dataset_collection_element_id.label(f"{k}_{v}") labeled_col = a.dataset_collection_element_id.label(f"{k}_{value_index}") stmt = stmt.add_columns(labeled_col) stmt = stmt.join(a, a.job_id == model.Job.id) Loading Loading @@ -1302,17 +1324,18 @@ class JobSearch: # Aliases for the "left" side (job to input DCE path) a = aliased( model.JobToInputDatasetCollectionElementAssociation, name=f"job_to_input_dce_association_{k}_{v}" model.JobToInputDatasetCollectionElementAssociation, name=f"job_to_input_dce_association_{k}_{value_index}", ) dce_left = aliased(model.DatasetCollectionElement, name=f"dce_left_{k}_{v}") hda_left = aliased(model.HistoryDatasetAssociation, name=f"hda_left_{k}_{v}") dce_left = aliased(model.DatasetCollectionElement, name=f"dce_left_{k}_{value_index}") hda_left = aliased(model.HistoryDatasetAssociation, name=f"hda_left_{k}_{value_index}") # Aliases for the "right" side (target DCE path in the main query) dce_right = aliased(model.DatasetCollectionElement, name=f"dce_right_{k}_{v}") hda_right = aliased(model.HistoryDatasetAssociation, name=f"hda_right_{k}_{v}") dce_right = aliased(model.DatasetCollectionElement, name=f"dce_right_{k}_{value_index}") hda_right = aliased(model.HistoryDatasetAssociation, name=f"hda_right_{k}_{value_index}") # Start joins from job → input DCE association → first-level DCE (left side) labeled_col = a.dataset_collection_element_id.label(f"{k}_{v}") labeled_col = a.dataset_collection_element_id.label(f"{k}_{value_index}") stmt = stmt.add_columns(labeled_col) stmt = stmt.join(a, a.job_id == model.Job.id) stmt = stmt.join(dce_left, dce_left.id == a.dataset_collection_element_id) Loading lib/galaxy_test/api/test_tools.py +17 −0 Original line number Diff line number Diff line Loading @@ -1097,6 +1097,23 @@ class TestToolsApi(ApiTestCase, TestsTools): job_details = self.dataset_populator.get_job_details(copied_job_id, full=True).json() assert job_details["copied_from_job_id"] == outputs_one["jobs"][0]["id"] @skip_without_tool("cat_list") @requires_new_history def test_run_cat_list_use_cached_job_repeated_input(self): with self.dataset_populator.test_history_for( self.test_run_cat_list_use_cached_job_repeated_input ) as history_id: # Run simple non-upload tool with an input data parameter. input_value = dataset_to_param(self.dataset_populator.new_dataset(history_id=history_id)) inputs = {"input1": {"batch": False, "values": [input_value, input_value]}} outputs_one = self._run("cat_list", history_id, inputs, assert_ok=True, wait_for_job=True) outputs_two = self._run( "cat_list", history_id, inputs, assert_ok=True, wait_for_job=True, use_cached_job=True ) copied_job_id = outputs_two["jobs"][0]["id"] job_details = self.dataset_populator.get_job_details(copied_job_id, full=True).json() assert job_details["copied_from_job_id"] == outputs_one["jobs"][0]["id"] @skip_without_tool("collection_creates_list") @requires_new_history def test_run_collection_creates_list_use_cached_job(self): Loading Loading
lib/galaxy/managers/jobs.py +69 −46 Original line number Diff line number Diff line Loading @@ -466,9 +466,9 @@ class JobSearch: data_types = [] used_ids: List = [] for k, input_list in input_data.items(): # k will be matched against the JobParameter.name column. This can be prefixed depending on whethter # k will be matched against the JobParameter.name column. This can be prefixed depending on whether # the input is in a repeat, or not (section and conditional) for type_values in input_list: for value_index, type_values in enumerate(input_list): t = type_values["src"] v = type_values["id"] requested_ids.append(v) Loading @@ -476,14 +476,25 @@ class JobSearch: identifier = type_values["identifier"] if t == "hda": stmt = self._build_stmt_for_hda( stmt, data_conditions, used_ids, k, v, identifier, require_name_match=require_name_match stmt, data_conditions, used_ids, k, v, identifier, require_name_match=require_name_match, value_index=value_index, ) elif t == "ldda": stmt = self._build_stmt_for_ldda(stmt, data_conditions, used_ids, k, v) stmt = self._build_stmt_for_ldda(stmt, data_conditions, used_ids, k, v, value_index=value_index) elif t == "hdca": stmt = self._build_stmt_for_hdca(stmt, data_conditions, used_ids, k, v, user.id) stmt = self._build_stmt_for_hdca( stmt, data_conditions, used_ids, k, v, user.id, value_index=value_index ) elif t == "dce": stmt = self._build_stmt_for_dce(stmt, data_conditions, used_ids, k, v, user.id) stmt = self._build_stmt_for_dce( stmt, data_conditions, used_ids, k, v, user.id, value_index=value_index ) else: log.error("Unknown input data type %s", t) return None Loading Loading @@ -670,13 +681,15 @@ class JobSearch: ) return final_ordered_stmt def _build_stmt_for_hda(self, stmt, data_conditions, used_ids, k, v, identifier, require_name_match=True): def _build_stmt_for_hda( self, stmt, data_conditions, used_ids, k, v, identifier, value_index, require_name_match=True ): a = aliased(model.JobToInputDatasetAssociation) b = aliased(model.HistoryDatasetAssociation) c = aliased(model.HistoryDatasetAssociation) d = aliased(model.JobParameter) e = aliased(model.HistoryDatasetAssociationHistory) labeled_col = a.dataset_id.label(f"{k}_{v}") labeled_col = a.dataset_id.label(f"{k}_{value_index}") stmt = stmt.add_columns(labeled_col) used_ids.append(labeled_col) stmt = stmt.join(a, a.job_id == model.Job.id) Loading Loading @@ -725,16 +738,18 @@ class JobSearch: ) return stmt def _build_stmt_for_ldda(self, stmt, data_conditions, used_ids, k, v): def _build_stmt_for_ldda(self, stmt, data_conditions, used_ids, k, v, value_index): a = aliased(model.JobToInputLibraryDatasetAssociation) labeled_col = a.ldda_id.label(f"{k}_{v}") labeled_col = a.ldda_id.label(f"{k}_{value_index}") stmt = stmt.add_columns(labeled_col) stmt = stmt.join(a, a.job_id == model.Job.id) data_conditions.append(and_(a.name == k, a.ldda_id == v)) used_ids.append(labeled_col) return stmt def _build_stmt_for_hdca(self, stmt, data_conditions, used_ids, k, v, user_id, require_name_match=True): def _build_stmt_for_hdca( self, stmt, data_conditions, used_ids, k, v, user_id, value_index, require_name_match=True ): # Strategy for efficiently finding equivalent HDCAs: # 1. Determine the structural depth of the target HDCA by its collection_type. # 2. For the target HDCA (identified by 'v'): Loading Loading @@ -763,9 +778,12 @@ class JobSearch: ) depth = collection_type.count(":") if collection_type else 0 a = aliased(model.JobToInputDatasetCollectionAssociation, name=f"job_to_input_dataset_collection_1_{k}_{v}") a = aliased( model.JobToInputDatasetCollectionAssociation, name=f"job_to_input_dataset_collection_1_{k}_{value_index}" ) hdca_input = aliased( model.HistoryDatasetCollectionAssociation, name=f"history_dataset_collection_association_1_{k}_{v}" model.HistoryDatasetCollectionAssociation, name=f"history_dataset_collection_association_1_{k}_{value_index}", ) _hdca_target_cte_ref = aliased(model.HistoryDatasetCollectionAssociation, name="_hdca_target_cte_ref") Loading Loading @@ -798,7 +816,7 @@ class JobSearch: .where(_hdca_target_cte_ref.id == v) .distinct() ) reference_all_dataset_ids_cte = reference_all_dataset_ids_select.cte(f"ref_all_ds_ids_{k}_{v}") reference_all_dataset_ids_cte = reference_all_dataset_ids_select.cte(f"ref_all_ds_ids_{k}_{value_index}") # --- END NEW CTE --- # CTE 1: signature_elements_cte (for the reference HDCA) Loading Loading @@ -829,7 +847,7 @@ class JobSearch: _hda_cte_ref, _hda_cte_ref.id == _leaf_cte_ref.hda_id ) signature_elements_select = signature_elements_select.where(_hdca_target_cte_ref.id == v) signature_elements_cte = signature_elements_select.cte(f"signature_elements_{k}_{v}") signature_elements_cte = signature_elements_select.cte(f"signature_elements_{k}_{value_index}") # CTE 2: reference_full_signature_cte # This CTE aggregates the path signature strings of the reference HDCA into a Loading @@ -843,7 +861,7 @@ class JobSearch: ).label("signature_array") ) .select_from(signature_elements_cte) .cte(f"reference_full_signature_{k}_{v}") .cte(f"reference_full_signature_{k}_{value_index}") ) candidate_hdca = aliased(model.HistoryDatasetCollectionAssociation, name="candidate_hdca") Loading Loading @@ -883,7 +901,7 @@ class JobSearch: .where(candidate_hda.dataset_id.in_(select(reference_all_dataset_ids_cte.c.ref_dataset_id_for_overlap))) ) candidate_hdca_pre_filter_ids_cte = candidate_hdca_pre_filter_ids_select.cte( f"cand_hdca_pre_filter_ids_{k}_{v}" f"cand_hdca_pre_filter_ids_{k}_{value_index}" ) # --- END NEW CTE --- Loading Loading @@ -919,7 +937,7 @@ class JobSearch: candidate_hda, candidate_hda.id == _leaf_candidate_dce.hda_id ) candidate_signature_elements_cte = candidate_signature_elements_select.cte( f"candidate_signature_elements_{k}_{v}" f"candidate_signature_elements_{k}_{value_index}" ) # CTE 4: candidate_full_signatures_cte Loading @@ -936,7 +954,7 @@ class JobSearch: ) .select_from(candidate_signature_elements_cte) .group_by(candidate_signature_elements_cte.c.candidate_hdca_id) .cte(f"candidate_full_signatures_{k}_{v}") .cte(f"candidate_full_signatures_{k}_{value_index}") ) # CTE 5: equivalent_hdca_ids_cte Loading @@ -948,13 +966,13 @@ class JobSearch: candidate_full_signatures_cte.c.full_signature_array == select(reference_full_signature_cte.c.signature_array).scalar_subquery() ) .cte(f"equivalent_hdca_ids_{k}_{v}") .cte(f"equivalent_hdca_ids_{k}_{value_index}") ) # Main query `stmt` construction # This section joins the base job statement with the associations and then filters # by the HDCAs identified as equivalent in the CTEs. labeled_col = a.dataset_collection_id.label(f"{k}_{v}") labeled_col = a.dataset_collection_id.label(f"{k}_{value_index}") stmt = stmt.add_columns(labeled_col) stmt = stmt.join(a, a.job_id == model.Job.id) Loading @@ -972,7 +990,7 @@ class JobSearch: data_conditions.append(a.name == k) return stmt def _build_stmt_for_dce(self, stmt, data_conditions, used_ids, k, v, user_id): def _build_stmt_for_dce(self, stmt, data_conditions, used_ids, k, v, user_id, value_index): dce_root_target = self.sa_session.get_one(model.DatasetCollectionElement, v) # Determine if the target DCE points to an HDA or a child collection Loading @@ -987,15 +1005,18 @@ class JobSearch: depth = collection_type.count(":") if collection_type else 0 # Aliases for the target DCE's collection structure _dce_target_root_ref = aliased(model.DatasetCollectionElement, name=f"_dce_target_root_ref_{k}_{v}") _dce_target_root_ref = aliased( model.DatasetCollectionElement, name=f"_dce_target_root_ref_{k}_{value_index}" ) _dce_target_child_collection_ref = aliased( model.DatasetCollection, name=f"_dce_target_child_collection_ref_{k}_{v}" model.DatasetCollection, name=f"_dce_target_child_collection_ref_{k}_{value_index}" ) # List of aliases for each potential nested level of DatasetCollectionElements _dce_target_level_list = [ aliased(model.DatasetCollectionElement, name=f"_dce_target_level_{k}_{v}_{i}") for i in range(depth + 1) aliased(model.DatasetCollectionElement, name=f"_dce_target_level_{k}_{value_index}_{i}") for i in range(depth + 1) ] _hda_target_ref = aliased(model.HistoryDatasetAssociation, name=f"_hda_target_ref_{k}_{v}") _hda_target_ref = aliased(model.HistoryDatasetAssociation, name=f"_hda_target_ref_{k}_{value_index}") # --- CTE: reference_dce_all_dataset_ids_cte --- # This CTE (Common Table Expression) identifies all distinct dataset IDs Loading Loading @@ -1028,7 +1049,7 @@ class JobSearch: .where(_dce_target_root_ref.id == v) .distinct() ) reference_all_dataset_ids_cte = reference_all_dataset_ids_select.cte(f"ref_all_ds_ids_{k}_{v}") reference_all_dataset_ids_cte = reference_all_dataset_ids_select.cte(f"ref_all_ds_ids_{k}_{value_index}") # --- CTE: reference_dce_signature_elements_cte --- # This CTE generates a "path signature string" for each individual element Loading Loading @@ -1069,7 +1090,7 @@ class JobSearch: _hda_target_ref, _hda_target_ref.id == _leaf_target_dce_ref.hda_id ).where(_dce_target_root_ref.id == v) reference_dce_signature_elements_cte = reference_dce_signature_elements_select.cte( f"ref_dce_sig_els_{k}_{v}" f"ref_dce_sig_els_{k}_{value_index}" ) # --- CTE: reference_full_signature_cte --- Loading @@ -1093,7 +1114,7 @@ class JobSearch: ), # Count elements based on path_signature_string ) .select_from(reference_dce_signature_elements_cte) .cte(f"ref_dce_full_sig_{k}_{v}") .cte(f"ref_dce_full_sig_{k}_{value_index}") ) # --- Aliases for Candidate Dataset Collection Structure --- Loading @@ -1101,14 +1122,14 @@ class JobSearch: # in the database, which will be compared against the reference. candidate_dce_root = aliased(model.DatasetCollectionElement, name=f"candidate_dce_root_{k}_{v}") candidate_dce_child_collection = aliased( model.DatasetCollection, name=f"candidate_dce_child_collection_{k}_{v}" model.DatasetCollection, name=f"candidate_dce_child_collection_{k}_{value_index}" ) candidate_dce_level_list = [ aliased(model.DatasetCollectionElement, name=f"candidate_dce_level_{k}_{v}_{i}") aliased(model.DatasetCollectionElement, name=f"candidate_dce_level_{k}_{value_index}_{i}") for i in range(depth + 1) ] candidate_hda = aliased(model.HistoryDatasetAssociation, name=f"candidate_hda_{k}_{v}") candidate_history = aliased(model.History, name=f"candidate_history_{k}_{v}") candidate_hda = aliased(model.HistoryDatasetAssociation, name=f"candidate_hda_{k}_{value_index}") candidate_history = aliased(model.History, name=f"candidate_history_{k}_{value_index}") # --- CTE: candidate_dce_pre_filter_ids_cte (Initial Candidate Filtering) --- # This CTE performs a first pass to quickly narrow down potential candidate Loading Loading @@ -1146,7 +1167,7 @@ class JobSearch: .where(candidate_hda.dataset_id.in_(select(reference_all_dataset_ids_cte.c.ref_dataset_id_for_overlap))) ) candidate_dce_pre_filter_ids_cte = candidate_dce_pre_filter_ids_select.cte( f"cand_dce_pre_filter_ids_{k}_{v}" f"cand_dce_pre_filter_ids_{k}_{value_index}" ) # --- CTE: candidate_dce_signature_elements_cte --- Loading Loading @@ -1195,7 +1216,7 @@ class JobSearch: .where(or_(candidate_history.published == true(), candidate_history.user_id == user_id)) ) candidate_dce_signature_elements_cte = candidate_dce_signature_elements_select.cte( f"cand_dce_sig_els_{k}_{v}" f"cand_dce_sig_els_{k}_{value_index}" ) # --- CTE: candidate_pre_signatures_cte (Candidate Aggregation for Comparison) --- Loading @@ -1218,7 +1239,7 @@ class JobSearch: ) .select_from(candidate_dce_signature_elements_cte) .group_by(candidate_dce_signature_elements_cte.c.candidate_dce_id) .cte(f"cand_dce_pre_sig_{k}_{v}") .cte(f"cand_dce_pre_sig_{k}_{value_index}") ) # --- CTE: filtered_cand_dce_by_dataset_ids_cte (Filtering by Element Count and Dataset ID Array) --- Loading @@ -1238,7 +1259,7 @@ class JobSearch: == reference_full_signature_cte.c.ordered_dataset_id_array, ) ) .cte(f"filtered_cand_dce_{k}_{v}") .cte(f"filtered_cand_dce_{k}_{value_index}") ) # --- CTE: final_candidate_signatures_cte (Final Full Signature Calculation for Matched Candidates) --- Loading @@ -1261,16 +1282,17 @@ class JobSearch: ) ) .group_by(candidate_dce_signature_elements_cte.c.candidate_dce_id) .cte(f"final_cand_dce_full_sig_{k}_{v}") .cte(f"final_cand_dce_full_sig_{k}_{value_index}") ) # --- Main Query Construction for Dataset Collection Elements --- # This section joins the main `stmt` (representing jobs) with the CTEs # to filter jobs whose input DCE matches the reference DCE's full signature. a = aliased( model.JobToInputDatasetCollectionElementAssociation, name=f"job_to_input_dce_association_{k}_{v}" model.JobToInputDatasetCollectionElementAssociation, name=f"job_to_input_dce_association_{k}_{value_index}", ) labeled_col = a.dataset_collection_element_id.label(f"{k}_{v}") labeled_col = a.dataset_collection_element_id.label(f"{k}_{value_index}") stmt = stmt.add_columns(labeled_col) stmt = stmt.join(a, a.job_id == model.Job.id) Loading Loading @@ -1302,17 +1324,18 @@ class JobSearch: # Aliases for the "left" side (job to input DCE path) a = aliased( model.JobToInputDatasetCollectionElementAssociation, name=f"job_to_input_dce_association_{k}_{v}" model.JobToInputDatasetCollectionElementAssociation, name=f"job_to_input_dce_association_{k}_{value_index}", ) dce_left = aliased(model.DatasetCollectionElement, name=f"dce_left_{k}_{v}") hda_left = aliased(model.HistoryDatasetAssociation, name=f"hda_left_{k}_{v}") dce_left = aliased(model.DatasetCollectionElement, name=f"dce_left_{k}_{value_index}") hda_left = aliased(model.HistoryDatasetAssociation, name=f"hda_left_{k}_{value_index}") # Aliases for the "right" side (target DCE path in the main query) dce_right = aliased(model.DatasetCollectionElement, name=f"dce_right_{k}_{v}") hda_right = aliased(model.HistoryDatasetAssociation, name=f"hda_right_{k}_{v}") dce_right = aliased(model.DatasetCollectionElement, name=f"dce_right_{k}_{value_index}") hda_right = aliased(model.HistoryDatasetAssociation, name=f"hda_right_{k}_{value_index}") # Start joins from job → input DCE association → first-level DCE (left side) labeled_col = a.dataset_collection_element_id.label(f"{k}_{v}") labeled_col = a.dataset_collection_element_id.label(f"{k}_{value_index}") stmt = stmt.add_columns(labeled_col) stmt = stmt.join(a, a.job_id == model.Job.id) stmt = stmt.join(dce_left, dce_left.id == a.dataset_collection_element_id) Loading
lib/galaxy_test/api/test_tools.py +17 −0 Original line number Diff line number Diff line Loading @@ -1097,6 +1097,23 @@ class TestToolsApi(ApiTestCase, TestsTools): job_details = self.dataset_populator.get_job_details(copied_job_id, full=True).json() assert job_details["copied_from_job_id"] == outputs_one["jobs"][0]["id"] @skip_without_tool("cat_list") @requires_new_history def test_run_cat_list_use_cached_job_repeated_input(self): with self.dataset_populator.test_history_for( self.test_run_cat_list_use_cached_job_repeated_input ) as history_id: # Run simple non-upload tool with an input data parameter. input_value = dataset_to_param(self.dataset_populator.new_dataset(history_id=history_id)) inputs = {"input1": {"batch": False, "values": [input_value, input_value]}} outputs_one = self._run("cat_list", history_id, inputs, assert_ok=True, wait_for_job=True) outputs_two = self._run( "cat_list", history_id, inputs, assert_ok=True, wait_for_job=True, use_cached_job=True ) copied_job_id = outputs_two["jobs"][0]["id"] job_details = self.dataset_populator.get_job_details(copied_job_id, full=True).json() assert job_details["copied_from_job_id"] == outputs_one["jobs"][0]["id"] @skip_without_tool("collection_creates_list") @requires_new_history def test_run_collection_creates_list_use_cached_job(self): Loading