Commit 38b1bfc4 authored by Oddant1's avatar Oddant1 Committed by Evan Bolyen
Browse files

QIIME 2: Strip properties from semantic types (#2)

parent 2db3d504
Loading
Loading
Loading
Loading
+78 −2
Original line number Diff line number Diff line
import io
import zipfile
import ast
import uuid as _uuid
import zipfile

import yaml

@@ -8,6 +9,19 @@ from galaxy.datatypes.binary import CompressedZipArchive
from galaxy.datatypes.metadata import MetadataElement


def strip_properties(expression):
    try:
        expression_tree = ast.parse(expression)
        reconstructer = PredicateRemover()
        reconstructer.visit(expression_tree)
        return reconstructer.expression
    # If we have any problems stripping properties just use the full expression
    # this punts the error off to q2galaxy so if we error we do so there and
    # not here
    except Exception:
        return expression


class QIIME2Result(CompressedZipArchive):
    MetadataElement(name="semantic_type", readonly=True)
    MetadataElement(name="semantic_type_simple", readonly=True, visible=False)
@@ -21,7 +35,8 @@ class QIIME2Result(CompressedZipArchive):
            if value:
                setattr(dataset.metadata, key, value)

        dataset.metadata.semantic_type_simple = 'TODO'
        dataset.metadata.semantic_type_simple = \
            strip_properties(dataset.metadata.semantic_type)

    def set_peek(self, dataset, is_multi_byte=False):
        if dataset.metadata.semantic_type == 'Visualization':
@@ -76,6 +91,67 @@ class QIIME2Visualization(QIIME2Result):
        return metadata and metadata['semantic_type'] == 'Visualization'


# Python 3.9 has a built in unparse. We can probably use this in the future
# when we are using 3.9
# https://docs.python.org/3.9/library/ast.html#ast.unparse
class PredicateRemover(ast.NodeVisitor):
    binops = {
        ast.Add: ' + ',
        ast.Sub: ' - ',
        ast.Mult: ' * ',
        ast.Div: ' / ',
        ast.FloorDiv: ' // ',
        ast.Pow: ' ** ',
        ast.LShift: ' << ',
        ast.RShift: ' >> ',
        ast.BitOr: ' | ',
        ast.BitXor: ' ^ ',
        ast.BitAnd: ' & ',
        ast.MatMult: ' @ '
    }

    def __init__(self):
        self.expression = ''
        self.tuple_count = 0
        self.in_index = False

        super().__init__()

    def visit_Name(self, node):
        print(node.id)
        if self.tuple_count == 0:
            self.expression += node.id
        else:
            self.expression += node.id + ', '
            self.tuple_count -= 1

        self.generic_visit(node)

    def visit_Index(self, node):
        pre_strip = len(self.expression)
        self.expression = self.expression.rstrip(', ')
        post_strip = len(self.expression)

        self.expression += '['
        self.generic_visit(node)
        self.expression += ']'

        # If we stripped the space for the next tuple element to accomodate a
        # nested index we now need to add that space back
        if post_strip < pre_strip:
            self.expression += ', '

    def visit_Tuple(self, node):
        self.tuple_count = len(node.elts) - 1
        self.generic_visit(node)

    def visit_BinOp(self, node):
        self.visit(node.left)
        if not isinstance(node.op, ast.Mod):
            self.expression += self.binops[node.op.__class__]
            self.visit(node.right)


def _get_metadata_from_archive(archive):
    uuid = _get_uuid(archive)
    archive_version, framework_version = _get_versions(archive, uuid)
+74 −0
Original line number Diff line number Diff line
import unittest

from galaxy.datatypes.qiime2 import strip_properties


# Note: Not all the expressions here are completely valid types they are just
# representative examples
class TestStripProperties(unittest.TestCase):
    def test_simple(self):
        simple_expression = 'Taxonomy % Properties("SILVIA")'
        stripped_expression = 'Taxonomy'

        reconstructed_expression = strip_properties(simple_expression)
        self.assertEqual(reconstructed_expression, stripped_expression)

    def test_single(self):
        single_expression = 'FeatureData[Taxonomy % Properties("SILVIA")]'
        stripped_expression = 'FeatureData[Taxonomy]'

        reconstructed_expression = strip_properties(single_expression)
        self.assertEqual(reconstructed_expression, stripped_expression)

    def test_double(self):
        double_expression = ('FeatureData[Taxonomy % Properties("SILVIA"), '
                             'DistanceMatrix % Axes("ASV", "ASV")]')
        stripped_expression = 'FeatureData[Taxonomy, DistanceMatrix]'

        reconstructed_expression = strip_properties(double_expression)
        self.assertEqual(reconstructed_expression, stripped_expression)

    def test_nested(self):
        nested_expression = ('Tuple[FeatureData[Taxonomy % '
                             'Properties("SILVIA")] % Axes("ASV", "ASV")]')
        stripped_expression = 'Tuple[FeatureData[Taxonomy]]'

        reconstructed_expression = strip_properties(nested_expression)
        self.assertEqual(reconstructed_expression, stripped_expression)

    def test_complex(self):
        complex_expression = \
            ('Tuple[FeatureData[Taxonomy % Properties("SILVA")] % Axis("ASV")'
             ', DistanceMatrix % Axes("ASV", "ASV")] % Unique')
        stripped_expression = 'Tuple[FeatureData[Taxonomy], DistanceMatrix]'

        reconstructed_expression = strip_properties(complex_expression)
        self.assertEqual(reconstructed_expression, stripped_expression)

    def test_keep_different_binop(self):
        expression_with_different_binop = \
            ('FeatureData[Taxonomy % Properties("SILVIA"), '
             'Taxonomy & Properties]')
        stripped_expression = \
            'FeatureData[Taxonomy, Taxonomy & Properties]'

        reconstructed_expression = \
            strip_properties(expression_with_different_binop)
        self.assertEqual(reconstructed_expression, stripped_expression)

    def test_multiple_strings(self):
        simple_expression = 'Taxonomy % Properties("SILVIA")'
        stripped_simple_expression = 'Taxonomy'

        reconstructed_simple_expression = strip_properties(simple_expression)

        single_expression = 'FeatureData[Taxonomy % Properties("SILVIA")]'
        stripped_single_expression = 'FeatureData[Taxonomy]'

        reconstructed_single_expression = strip_properties(single_expression)

        self.assertEqual(reconstructed_simple_expression,
                         stripped_simple_expression)
        self.assertEqual(reconstructed_single_expression,
                         stripped_single_expression)