Merge pull request #12223 from mvdbeek/release_21.01_performance_fix_job_script (252ec1aa) · Commits · NDIP / Galaxy

lib/galaxy/jobs/init.py

+2 −0

Original line number	Diff line number	Diff line
		@@ -1093,6 +1093,7 @@ class JobWrapper(HasResourceParameters):
		Prepare the job to run by creating the working directory and the
		config files.
		"""
		prepare_timer = util.ExecutionTimer()
		self.sa_session.expunge_all() # this prevents the metadata reverting that has been seen in conjunction with the PBS job runner

		if not os.path.exists(self.working_directory):
		@@ -1139,6 +1140,7 @@ class JobWrapper(HasResourceParameters):
		self.write_version_cmd = f"{version_string_cmd} > {compute_environment.version_path()} 2>&1"
		else:
		self.write_version_cmd = None
		log.debug(f"Job wrapper for Job [{job.id}] prepared {prepare_timer}")
		return self.extra_filenames

		def _setup_working_directory(self, job=None):

+5 −5

Original line number	Diff line number	Diff line
		@@ -272,17 +272,17 @@ class ToolEvaluator:
		# - Only necessary when self.check_values is False (==external dataset
		# tool?: can this be abstracted out as part of being a datasouce tool?)
		# For now we try to not wrap unnecessarily, but this should be untangled at some point.
		matches = None
		for name, data in input_datasets.items():
		param_dict_value = param_dict.get(name, None)
		if data and param_dict_value is None:
		# We may have a nested parameter that is not fully prefixed.
		# We try recovering from param_dict, but tool authors should really use fully-qualified
		# variables
		wrappers = find_instance_nested(param_dict,
		instances=(DatasetFilenameWrapper, DatasetListWrapper),
		match_key=name)
		if len(wrappers) == 1:
		wrapper = wrappers[0]
		if matches is None:
		matches = find_instance_nested(param_dict, instances=(DatasetFilenameWrapper, DatasetListWrapper))
		wrapper = matches.get(name)
		if wrapper:
		param_dict[name] = wrapper
		continue
		if not isinstance(param_dict_value, (DatasetFilenameWrapper, DatasetListWrapper)):

+7 −6

Original line number	Diff line number	Diff line
		@@ -621,20 +621,21 @@ def sanitize_for_filename(text, default=None):
		return out


		def find_instance_nested(item, instances, match_key=None):
		def find_instance_nested(item, instances):
		"""
		Recursively find instances from lists, dicts, tuples.

		`instances` should be a tuple of valid instances
		If match_key is given the key must match for an instance to be added to the list of found instances.
		`instances` should be a tuple of valid instances.
		Returns a dictionary, where keys are the deepest key at which an instance has been found,
		and the value is the matched instance.
		"""

		matches = []
		matches = {}

		def visit(path, key, value):
		if isinstance(value, instances):
		if match_key is None or match_key == key:
		matches.append(value)
		if key not in matches:
		matches[key] = value
		return key, value

		def enter(path, key, value):

+10 −3

Original line number	Diff line number	Diff line
		@@ -1673,7 +1673,8 @@ class ToolsTestCase(ApiTestCase, TestsTools):
		@skip_without_tool("identifier_in_conditional")
		@uses_test_history(require_new=False)
		def test_identifier_map_over_input_in_conditional(self, history_id):
		hdca_id = self._build_pair(history_id, ["123", "456"])
		# Run cat tool, so HDA names are different from element identifiers
		hdca_id = self._build_pair(history_id, ["123", "456"], run_cat=True)
		inputs = {
		"outer_cond\|input1": {'batch': True, 'values': [{'src': 'hdca', 'id': hdca_id}]},
		"outer_cond\|multi_input": False,
		@@ -2509,9 +2510,15 @@ class ToolsTestCase(ApiTestCase, TestsTools):
		hdca_list_id = response.json()["outputs"][0]["id"]
		return hdca_list_id

		def _build_pair(self, history_id, contents):
		def _build_pair(self, history_id, contents, run_cat=False):
		create_response = self.dataset_collection_populator.create_pair_in_history(history_id, contents=contents, direct_upload=True)
		hdca_id = create_response.json()["outputs"][0]["id"]
		hdca_id = create_response.json()["output_collections"][0]["id"]
		inputs = {
		"input1": {'batch': True, 'values': [dict(src="hdca", id=hdca_id)]},
		}
		if run_cat:
		outputs = self._run_cat(history_id, inputs=inputs, assert_ok=True)
		hdca_id = outputs['implicit_collections'][0]['id']
		return hdca_id

		def _assert_dataset_permission_denied_response(self, response):