TST: added more tests (88fed0ff) · Commits · NDIP / Galaxy

lib/galaxy/datatypes/qiime2.py

+24 −9

Original line number	Diff line number	Diff line
		@@ -12,7 +12,8 @@ from galaxy.datatypes.tabular import Tabular
		from galaxy.datatypes.sniff import build_sniff_from_prefix


		class _QIIME2Result(CompressedZipArchive):
		class _QIIME2ResultBase(CompressedZipArchive):
		"""Base class for QIIME2Artifact and QIIME2Visualization"""
		MetadataElement(name="semantic_type", readonly=True)
		MetadataElement(name="semantic_type_simple", readonly=True, visible=False)
		MetadataElement(name="uuid", readonly=True)
		@@ -52,12 +53,13 @@ class _QIIME2Result(CompressedZipArchive):
		('Type', dataset.metadata.semantic_type),
		('UUID', dataset.metadata.uuid)]
		if not simple:
		if dataset.metadata.format is not None:
		if dataset.metadata.semantic_type != 'Visualization':
		peek.append(('Format', dataset.metadata.format))
		peek.append(('Version', dataset.metadata.version))
		return peek

		def _sniff(self, filename):
		"""Helper method for use in inherited datatypes"""
		try:
		if not zipfile.is_zipfile(filename):
		raise Exception()
		@@ -66,7 +68,7 @@ class _QIIME2Result(CompressedZipArchive):
		return False


		class QIIME2Artifact(_QIIME2Result):
		class QIIME2Artifact(_QIIME2ResultBase):
		file_ext = "qza"

		def sniff(self, filename):
		@@ -74,7 +76,7 @@ class QIIME2Artifact(_QIIME2Result):
		return metadata and metadata['semantic_type'] != 'Visualization'


		class QIIME2Visualization(_QIIME2Result):
		class QIIME2Visualization(_QIIME2ResultBase):
		file_ext = "qzv"

		def sniff(self, filename):
		@@ -86,7 +88,7 @@ class QIIME2Visualization(_QIIME2Result):
		class QIIME2Metadata(Tabular):
		"""
		QIIME 2 supports overriding the type of a column to Categorical when
		a specific directive `#Q2:types` is present under the ID row.
		a specific directive `#q2:types` is present under the ID row.

		Galaxy already understands column types quite well, however we sometimes
		want to override its inferred type.
		@@ -96,16 +98,16 @@ class QIIME2Metadata(Tabular):
		and interacts best with the current implementation of Tabular.
		"""
		file_ext = "qiime2.tabular"
		_TYPES_DIRECTIVE = '#q2:types'
		is_subclass = False

		_TYPES_DIRECTIVE = '#q2:types'
		_search_lines = 2

		def get_column_names(self, first_line=None):
		if first_line is None:
		return None
		return first_line.strip().split('\t')


		def set_meta(self, dataset, **kwargs):
		"""
		Let Galaxy's Tabular format handle most of this. We will just jump
		@@ -116,7 +118,7 @@ class QIIME2Metadata(Tabular):
		if dataset.has_data():
		with open(dataset.file_name) as dataset_fh:
		line = None
		for line, _ in zip(dataset_fh, range(2)):
		for line, _ in zip(dataset_fh, range(self._search_lines)):
		if line.startswith(self._TYPES_DIRECTIVE):
		break
		if line is None:
		@@ -144,14 +146,27 @@ class QIIME2Metadata(Tabular):
		dataset.metadata.column_types[idx] = 'str'

		def sniff_prefix(self, file_prefix):
		for _, line in zip(range(4), file_prefix.line_iterator()):
		for _, line in zip(range(self._search_lines),
		file_prefix.line_iterator()):
		if line.startswith(self._TYPES_DIRECTIVE):
		return True

		return False


		##############################################################################
		# Helpers
		##############################################################################


		def _strip_properties(expression):
		# This is necessary because QIIME 2's semantic types include a limited
		# form of intersection type, which means that `A & B` is a subtype of `A`
		# as well as a subtype of `B`. This means it is not generally speaking
		# possible or practical to enumerate all valid subtypes and then do an
		# exact match using <options options_filter_attribute="Some[Type]">
		# So instead filter out 90% of the invalid inputs and let QIIME 2 raise an
		# error on the finer details such as these "properties".
		try:
		expression_tree = ast.parse(expression)
		reconstructer = _PredicateRemover()

lib/galaxy/datatypes/test/qiime2.qza

0 → 100644

+5.26 KiB

File added.

No diff preview for this file type.

View file

lib/galaxy/datatypes/test/qiime2.qzv

0 → 100644

+6.53 KiB

File added.

No diff preview for this file type.

View file

lib/galaxy/datatypes/test/qiime2.tsv

0 → 100644

+5 −0

Original line number	Diff line number	Diff line
		id col1 col2 col3
		#q2:types categorical categorical numeric
		id1 a 1 1
		id2 b 2 2
		id3 c 3 3

test/unit/data/datatypes/test_qiime2.py

+164 −52

Original line number	Diff line number	Diff line
		import unittest
		from galaxy.datatypes.qiime2 import (_strip_properties, QIIME2Artifact,
		QIIME2Visualization, QIIME2Metadata)
		from .util import MockDataset, get_input_files

		from galaxy.datatypes.qiime2 import strip_properties

		# Tests for QIIME2Artifact:

		def test_qza_sniff():
		qza = QIIME2Artifact()
		with get_input_files('qiime2.qza') as input_files:
		assert qza.sniff(input_files[0]) is True


		def test_qza_set_meta():
		qza = QIIME2Artifact()
		with get_input_files('qiime2.qza') as input_files:
		dataset = MockDataset(1)
		dataset.file_name = input_files[0]

		qza.set_meta(dataset)

		assert dataset.metadata.uuid == 'ba8c55e1-a2bc-47ea-beb2-b37b0b3b4032'
		assert dataset.metadata.version == '2022.2.1'
		assert dataset.metadata.format == 'SingleIntDirectoryFormat'
		assert dataset.metadata.semantic_type == 'SingleInt1'
		assert dataset.metadata.semantic_type_simple == 'SingleInt1'


		def test_qza_set_peek():
		qza = QIIME2Artifact()
		with get_input_files('qiime2.qza') as input_files:
		dataset = MockDataset(1)
		dataset.file_name = input_files[0]

		qza.set_meta(dataset)
		qza.set_peek(dataset)

		assert dataset.peek == '''Type: SingleInt1
		UUID: ba8c55e1-a2bc-47ea-beb2-b37b0b3b4032
		Format: SingleIntDirectoryFormat
		Version: 2022.2.1'''


		# Tests for QIIME2Visualization:

		def test_qzv_sniff():
		qzv = QIIME2Visualization()
		with get_input_files('qiime2.qzv') as input_files:
		assert qzv.sniff(input_files[0]) is True


		def test_qzv_set_meta():
		qzv = QIIME2Visualization()
		with get_input_files('qiime2.qzv') as input_files:
		dataset = MockDataset(1)
		dataset.file_name = input_files[0]

		qzv.set_meta(dataset)

		assert dataset.metadata.uuid == '368ba1e7-3a7c-4dbc-98da-79f41aeece63'
		assert dataset.metadata.version == '2022.2.1'
		assert dataset.metadata.semantic_type == 'Visualization'
		assert dataset.metadata.semantic_type_simple == 'Visualization'


		def test_qzv_set_peek():
		qzv = QIIME2Visualization()
		with get_input_files('qiime2.qzv') as input_files:
		dataset = MockDataset(1)
		dataset.file_name = input_files[0]

		qzv.set_meta(dataset)
		qzv.set_peek(dataset)

		assert dataset.peek == '''Type: Visualization
		UUID: 368ba1e7-3a7c-4dbc-98da-79f41aeece63
		Version: 2022.2.1'''


		# Tets for QIIME2Metadata:

		def test_qiime2tabular_sniff():
		q2md = QIIME2Metadata()
		with get_input_files('qiime2.tsv') as input_files:
		assert q2md.sniff(input_files[0]) is True


		def test_qiime2tabular_sniff_false():
		q2md = QIIME2Metadata()
		with get_input_files('test_tab1.tabular') as input_files:
		assert q2md.sniff(input_files[0]) is False


		def test_qiime2tabular_set_meta():
		q2md = QIIME2Metadata()
		with get_input_files('qiime2.tsv') as input_files:
		dataset = MockDataset(1)
		dataset.file_name = input_files[0]

		q2md.set_meta(dataset)

		# Show override of type inferrence on the second to last column:
		assert dataset.metadata.column_types == ['str', 'str', 'str', 'int']


		# Tests for _strip_properties, which is rather complicated so worth testing
		# on it's own.

		# Note: Not all the expressions here are completely valid types they are just
		# representative examples
		class TestStripProperties(unittest.TestCase):
		def test_simple(self):

		def test_strip_properties_simple():
		simple_expression = 'Taxonomy % Properties("SILVIA")'
		stripped_expression = 'Taxonomy'

		reconstructed_expression = strip_properties(simple_expression)
		self.assertEqual(reconstructed_expression, stripped_expression)
		reconstructed_expression = _strip_properties(simple_expression)

		assert reconstructed_expression == stripped_expression


		def test_single(self):
		def test_strip_properties_single():
		single_expression = 'FeatureData[Taxonomy % Properties("SILVIA")]'
		stripped_expression = 'FeatureData[Taxonomy]'

		reconstructed_expression = strip_properties(single_expression)
		self.assertEqual(reconstructed_expression, stripped_expression)
		reconstructed_expression = _strip_properties(single_expression)

		def test_double(self):
		assert reconstructed_expression == stripped_expression


		def test_strip_properties_double():
		double_expression = ('FeatureData[Taxonomy % Properties("SILVIA"), '
		'DistanceMatrix % Axes("ASV", "ASV")]')
		stripped_expression = 'FeatureData[Taxonomy, DistanceMatrix]'

		reconstructed_expression = strip_properties(double_expression)
		self.assertEqual(reconstructed_expression, stripped_expression)
		reconstructed_expression = _strip_properties(double_expression)

		assert reconstructed_expression == stripped_expression

		def test_nested(self):

		def test_strip_properties_nested():
		nested_expression = ('Tuple[FeatureData[Taxonomy % '
		'Properties("SILVIA")] % Axes("ASV", "ASV")]')
		stripped_expression = 'Tuple[FeatureData[Taxonomy]]'

		reconstructed_expression = strip_properties(nested_expression)
		self.assertEqual(reconstructed_expression, stripped_expression)
		reconstructed_expression = _strip_properties(nested_expression)

		assert reconstructed_expression == stripped_expression


		def test_complex(self):
		def test_strip_properties_complex():
		complex_expression = \
		('Tuple[FeatureData[Taxonomy % Properties("SILVA")] % Axis("ASV")'
		', DistanceMatrix % Axes("ASV", "ASV")] % Unique')
		stripped_expression = 'Tuple[FeatureData[Taxonomy], DistanceMatrix]'

		reconstructed_expression = strip_properties(complex_expression)
		self.assertEqual(reconstructed_expression, stripped_expression)
		reconstructed_expression = _strip_properties(complex_expression)

		def test_keep_different_binop(self):
		assert reconstructed_expression == stripped_expression


		def test_strip_properties_keeps_different_binop():
		expression_with_different_binop = \
		('FeatureData[Taxonomy % Properties("SILVIA"), '
		'Taxonomy & Properties]')
		@@ -53,22 +166,21 @@ class TestStripProperties(unittest.TestCase):
		'FeatureData[Taxonomy, Taxonomy & Properties]'

		reconstructed_expression = \
		strip_properties(expression_with_different_binop)
		self.assertEqual(reconstructed_expression, stripped_expression)
		_strip_properties(expression_with_different_binop)

		assert reconstructed_expression == stripped_expression

		def test_multiple_strings(self):

		def test_strip_properties_multiple_strings():
		simple_expression = 'Taxonomy % Properties("SILVIA")'
		stripped_simple_expression = 'Taxonomy'

		reconstructed_simple_expression = strip_properties(simple_expression)
		reconstructed_simple_expression = _strip_properties(simple_expression)

		single_expression = 'FeatureData[Taxonomy % Properties("SILVIA")]'
		stripped_single_expression = 'FeatureData[Taxonomy]'

		reconstructed_single_expression = strip_properties(single_expression)

		self.assertEqual(reconstructed_simple_expression,
		stripped_simple_expression)
		self.assertEqual(reconstructed_single_expression,
		stripped_single_expression)
		reconstructed_single_expression = _strip_properties(single_expression)

		assert reconstructed_simple_expression == stripped_simple_expression
		assert reconstructed_single_expression == stripped_single_expression