Commit 88fed0ff authored by Evan Bolyen's avatar Evan Bolyen
Browse files

TST: added more tests

parent fb77a31a
Loading
Loading
Loading
Loading
+24 −9
Original line number Diff line number Diff line
@@ -12,7 +12,8 @@ from galaxy.datatypes.tabular import Tabular
from galaxy.datatypes.sniff import build_sniff_from_prefix


class _QIIME2Result(CompressedZipArchive):
class _QIIME2ResultBase(CompressedZipArchive):
    """Base class for QIIME2Artifact and QIIME2Visualization"""
    MetadataElement(name="semantic_type", readonly=True)
    MetadataElement(name="semantic_type_simple", readonly=True, visible=False)
    MetadataElement(name="uuid", readonly=True)
@@ -52,12 +53,13 @@ class _QIIME2Result(CompressedZipArchive):
            ('Type', dataset.metadata.semantic_type),
            ('UUID', dataset.metadata.uuid)]
        if not simple:
            if dataset.metadata.format is not None:
            if dataset.metadata.semantic_type != 'Visualization':
                peek.append(('Format', dataset.metadata.format))
            peek.append(('Version', dataset.metadata.version))
        return peek

    def _sniff(self, filename):
        """Helper method for use in inherited datatypes"""
        try:
            if not zipfile.is_zipfile(filename):
                raise Exception()
@@ -66,7 +68,7 @@ class _QIIME2Result(CompressedZipArchive):
            return False


class QIIME2Artifact(_QIIME2Result):
class QIIME2Artifact(_QIIME2ResultBase):
    file_ext = "qza"

    def sniff(self, filename):
@@ -74,7 +76,7 @@ class QIIME2Artifact(_QIIME2Result):
        return metadata and metadata['semantic_type'] != 'Visualization'


class QIIME2Visualization(_QIIME2Result):
class QIIME2Visualization(_QIIME2ResultBase):
    file_ext = "qzv"

    def sniff(self, filename):
@@ -86,7 +88,7 @@ class QIIME2Visualization(_QIIME2Result):
class QIIME2Metadata(Tabular):
    """
    QIIME 2 supports overriding the type of a column to Categorical when
    a specific directive `#Q2:types` is present under the ID row.
    a specific directive `#q2:types` is present under the ID row.

    Galaxy already understands column types quite well, however we sometimes
    want to override its inferred type.
@@ -96,16 +98,16 @@ class QIIME2Metadata(Tabular):
    and interacts best with the current implementation of Tabular.
    """
    file_ext = "qiime2.tabular"
    _TYPES_DIRECTIVE = '#q2:types'
    is_subclass = False

    _TYPES_DIRECTIVE = '#q2:types'
    _search_lines = 2

    def get_column_names(self, first_line=None):
        if first_line is None:
            return None
        return first_line.strip().split('\t')


    def set_meta(self, dataset, **kwargs):
        """
        Let Galaxy's Tabular format handle most of this. We will just jump
@@ -116,7 +118,7 @@ class QIIME2Metadata(Tabular):
        if dataset.has_data():
            with open(dataset.file_name) as dataset_fh:
                line = None
                for line, _ in zip(dataset_fh, range(2)):
                for line, _ in zip(dataset_fh, range(self._search_lines)):
                    if line.startswith(self._TYPES_DIRECTIVE):
                        break
                if line is None:
@@ -144,14 +146,27 @@ class QIIME2Metadata(Tabular):
                        dataset.metadata.column_types[idx] = 'str'

    def sniff_prefix(self, file_prefix):
        for _, line in zip(range(4), file_prefix.line_iterator()):
        for _, line in zip(range(self._search_lines),
                           file_prefix.line_iterator()):
            if line.startswith(self._TYPES_DIRECTIVE):
                return True

        return False


##############################################################################
# Helpers
##############################################################################


def _strip_properties(expression):
    # This is necessary because QIIME 2's semantic types include a limited
    # form of intersection type, which means that `A & B` is a subtype of `A`
    # as well as a subtype of `B`. This means it is not generally speaking
    # possible or practical to enumerate all valid subtypes and then do an
    # exact match using <options options_filter_attribute="Some[Type]">
    # So instead filter out 90% of the invalid inputs and let QIIME 2 raise an
    # error on the finer details such as these "properties".
    try:
        expression_tree = ast.parse(expression)
        reconstructer = _PredicateRemover()
+5.26 KiB

File added.

No diff preview for this file type.

+6.53 KiB

File added.

No diff preview for this file type.

+5 −0
Original line number Diff line number Diff line
id	col1	col2	col3
#q2:types	categorical	categorical	numeric
id1	a	1	1
id2	b	2	2
id3	c	3	3
+164 −52
Original line number Diff line number Diff line
import unittest
from galaxy.datatypes.qiime2 import (_strip_properties, QIIME2Artifact,
                                     QIIME2Visualization, QIIME2Metadata)
from .util import MockDataset, get_input_files

from galaxy.datatypes.qiime2 import strip_properties

# Tests for QIIME2Artifact:

def test_qza_sniff():
    qza = QIIME2Artifact()
    with get_input_files('qiime2.qza') as input_files:
        assert qza.sniff(input_files[0]) is True


def test_qza_set_meta():
    qza = QIIME2Artifact()
    with get_input_files('qiime2.qza') as input_files:
        dataset = MockDataset(1)
        dataset.file_name = input_files[0]

        qza.set_meta(dataset)

        assert dataset.metadata.uuid == 'ba8c55e1-a2bc-47ea-beb2-b37b0b3b4032'
        assert dataset.metadata.version == '2022.2.1'
        assert dataset.metadata.format == 'SingleIntDirectoryFormat'
        assert dataset.metadata.semantic_type == 'SingleInt1'
        assert dataset.metadata.semantic_type_simple == 'SingleInt1'


def test_qza_set_peek():
    qza = QIIME2Artifact()
    with get_input_files('qiime2.qza') as input_files:
        dataset = MockDataset(1)
        dataset.file_name = input_files[0]

        qza.set_meta(dataset)
        qza.set_peek(dataset)

        assert dataset.peek == '''Type: SingleInt1
UUID: ba8c55e1-a2bc-47ea-beb2-b37b0b3b4032
Format: SingleIntDirectoryFormat
Version: 2022.2.1'''


# Tests for QIIME2Visualization:

def test_qzv_sniff():
    qzv = QIIME2Visualization()
    with get_input_files('qiime2.qzv') as input_files:
        assert qzv.sniff(input_files[0]) is True


def test_qzv_set_meta():
    qzv = QIIME2Visualization()
    with get_input_files('qiime2.qzv') as input_files:
        dataset = MockDataset(1)
        dataset.file_name = input_files[0]

        qzv.set_meta(dataset)

        assert dataset.metadata.uuid == '368ba1e7-3a7c-4dbc-98da-79f41aeece63'
        assert dataset.metadata.version == '2022.2.1'
        assert dataset.metadata.semantic_type == 'Visualization'
        assert dataset.metadata.semantic_type_simple == 'Visualization'


def test_qzv_set_peek():
    qzv = QIIME2Visualization()
    with get_input_files('qiime2.qzv') as input_files:
        dataset = MockDataset(1)
        dataset.file_name = input_files[0]

        qzv.set_meta(dataset)
        qzv.set_peek(dataset)

        assert dataset.peek == '''Type: Visualization
UUID: 368ba1e7-3a7c-4dbc-98da-79f41aeece63
Version: 2022.2.1'''


# Tets for QIIME2Metadata:

def test_qiime2tabular_sniff():
    q2md = QIIME2Metadata()
    with get_input_files('qiime2.tsv') as input_files:
        assert q2md.sniff(input_files[0]) is True


def test_qiime2tabular_sniff_false():
    q2md = QIIME2Metadata()
    with get_input_files('test_tab1.tabular') as input_files:
        assert q2md.sniff(input_files[0]) is False


def test_qiime2tabular_set_meta():
    q2md = QIIME2Metadata()
    with get_input_files('qiime2.tsv') as input_files:
        dataset = MockDataset(1)
        dataset.file_name = input_files[0]

        q2md.set_meta(dataset)

        # Show override of type inferrence on the second to last column:
        assert dataset.metadata.column_types == ['str', 'str', 'str', 'int']


# Tests for _strip_properties, which is rather complicated so worth testing
# on it's own.

# Note: Not all the expressions here are completely valid types they are just
# representative examples
class TestStripProperties(unittest.TestCase):
    def test_simple(self):

def test_strip_properties_simple():
    simple_expression = 'Taxonomy % Properties("SILVIA")'
    stripped_expression = 'Taxonomy'

        reconstructed_expression = strip_properties(simple_expression)
        self.assertEqual(reconstructed_expression, stripped_expression)
    reconstructed_expression = _strip_properties(simple_expression)

    assert reconstructed_expression == stripped_expression


    def test_single(self):
def test_strip_properties_single():
    single_expression = 'FeatureData[Taxonomy % Properties("SILVIA")]'
    stripped_expression = 'FeatureData[Taxonomy]'

        reconstructed_expression = strip_properties(single_expression)
        self.assertEqual(reconstructed_expression, stripped_expression)
    reconstructed_expression = _strip_properties(single_expression)

    def test_double(self):
    assert reconstructed_expression == stripped_expression


def test_strip_properties_double():
    double_expression = ('FeatureData[Taxonomy % Properties("SILVIA"), '
                         'DistanceMatrix % Axes("ASV", "ASV")]')
    stripped_expression = 'FeatureData[Taxonomy, DistanceMatrix]'

        reconstructed_expression = strip_properties(double_expression)
        self.assertEqual(reconstructed_expression, stripped_expression)
    reconstructed_expression = _strip_properties(double_expression)

    assert reconstructed_expression == stripped_expression

    def test_nested(self):

def test_strip_properties_nested():
    nested_expression = ('Tuple[FeatureData[Taxonomy % '
                         'Properties("SILVIA")] % Axes("ASV", "ASV")]')
    stripped_expression = 'Tuple[FeatureData[Taxonomy]]'

        reconstructed_expression = strip_properties(nested_expression)
        self.assertEqual(reconstructed_expression, stripped_expression)
    reconstructed_expression = _strip_properties(nested_expression)

    assert reconstructed_expression == stripped_expression


    def test_complex(self):
def test_strip_properties_complex():
    complex_expression = \
        ('Tuple[FeatureData[Taxonomy % Properties("SILVA")] % Axis("ASV")'
         ', DistanceMatrix % Axes("ASV", "ASV")] % Unique')
    stripped_expression = 'Tuple[FeatureData[Taxonomy], DistanceMatrix]'

        reconstructed_expression = strip_properties(complex_expression)
        self.assertEqual(reconstructed_expression, stripped_expression)
    reconstructed_expression = _strip_properties(complex_expression)

    def test_keep_different_binop(self):
    assert reconstructed_expression == stripped_expression


def test_strip_properties_keeps_different_binop():
    expression_with_different_binop = \
        ('FeatureData[Taxonomy % Properties("SILVIA"), '
         'Taxonomy & Properties]')
@@ -53,22 +166,21 @@ class TestStripProperties(unittest.TestCase):
        'FeatureData[Taxonomy, Taxonomy & Properties]'

    reconstructed_expression = \
            strip_properties(expression_with_different_binop)
        self.assertEqual(reconstructed_expression, stripped_expression)
        _strip_properties(expression_with_different_binop)

    assert reconstructed_expression == stripped_expression

    def test_multiple_strings(self):

def test_strip_properties_multiple_strings():
    simple_expression = 'Taxonomy % Properties("SILVIA")'
    stripped_simple_expression = 'Taxonomy'

        reconstructed_simple_expression = strip_properties(simple_expression)
    reconstructed_simple_expression = _strip_properties(simple_expression)

    single_expression = 'FeatureData[Taxonomy % Properties("SILVIA")]'
    stripped_single_expression = 'FeatureData[Taxonomy]'

        reconstructed_single_expression = strip_properties(single_expression)

        self.assertEqual(reconstructed_simple_expression,
                         stripped_simple_expression)
        self.assertEqual(reconstructed_single_expression,
                         stripped_single_expression)
    reconstructed_single_expression = _strip_properties(single_expression)

    assert reconstructed_simple_expression == stripped_simple_expression
    assert reconstructed_single_expression == stripped_single_expression