Unverified Commit 8321b869 authored by Nicola Soranzo's avatar Nicola Soranzo
Browse files

Fix filtering by an extension list in dataset API

Fix the following traceback:

```
2022-07-13T20:32:11.5995637Z galaxy.web.framework.decorators ERROR 2022-07-13 20:20:39,564 [pN:main,p:2622,tN:worker 1] Uncaught exception in exposed API method:
2022-07-13T20:32:11.5995754Z Traceback (most recent call last):
2022-07-13T20:32:11.5996289Z   File "/home/runner/work/bioblend/bioblend/galaxy-release_22.01/lib/galaxy/web/framework/decorators.py", line 320, in decorator
2022-07-13T20:32:11.5996434Z     rval = func(self, trans, *args, **kwargs)
2022-07-13T20:32:11.5996812Z   File "/home/runner/work/bioblend/bioblend/galaxy-release_22.01/lib/galaxy/webapps/galaxy/api/datasets.py", line 387, in index
2022-07-13T20:32:11.5996977Z     trans, history_id, serialization_params, filter_parameters
2022-07-13T20:32:11.5997364Z   File "/home/runner/work/bioblend/bioblend/galaxy-release_22.01/lib/galaxy/webapps/galaxy/services/datasets.py", line 214, in index
2022-07-13T20:32:11.5997749Z     filters = self.history_contents_filters.parse_query_filters(filter_query_params)
2022-07-13T20:32:11.5998133Z   File "/home/runner/work/bioblend/bioblend/galaxy-release_22.01/lib/galaxy/managers/base.py", line 1031, in parse_query_filters
2022-07-13T20:32:11.5998269Z     return self.parse_filters(filter_params)
2022-07-13T20:32:11.5998634Z   File "/home/runner/work/bioblend/bioblend/galaxy-release_22.01/lib/galaxy/managers/base.py", line 1040, in parse_filters
2022-07-13T20:32:11.5998770Z     filter_ = self.parse_filter(attr, op, val)
2022-07-13T20:32:11.5999135Z   File "/home/runner/work/bioblend/bioblend/galaxy-release_22.01/lib/galaxy/managers/base.py", line 1060, in parse_filter
2022-07-13T20:32:11.5999286Z     orm_filter = self._parse_orm_filter(attr, op, val)
2022-07-13T20:32:11.5999683Z   File "/home/runner/work/bioblend/bioblend/galaxy-release_22.01/lib/galaxy/managers/history_contents.py", line 528, in _parse_orm_filter
2022-07-13T20:32:11.5999828Z     return super()._parse_orm_filter(attr, op, val)
2022-07-13T20:32:11.6000195Z   File "/home/runner/work/bioblend/bioblend/galaxy-release_22.01/lib/galaxy/managers/base.py", line 1136, in _parse_orm_filter
2022-07-13T20:32:11.6000293Z     orm_filter = op(val)
2022-07-13T20:32:11.6000703Z   File "/home/runner/work/bioblend/bioblend/galaxy-release_22.01/.venv/lib/python3.7/site-packages/sqlalchemy/sql/operators.py", line 604, in in_
2022-07-13T20:32:11.6000834Z     return self.operate(in_op, other)
2022-07-13T20:32:11.6001252Z   File "/home/runner/work/bioblend/bioblend/galaxy-release_22.01/.venv/lib/python3.7/site-packages/sqlalchemy/sql/elements.py", line 861, in operate
2022-07-13T20:32:11.6001467Z     return op(self.comparator, *other, **kwargs)
2022-07-13T20:32:11.6001884Z   File "/home/runner/work/bioblend/bioblend/galaxy-release_22.01/.venv/lib/python3.7/site-packages/sqlalchemy/sql/operators.py", line 1386, in in_op
2022-07-13T20:32:11.6001975Z     return a.in_(b)
2022-07-13T20:32:11.6002370Z   File "/home/runner/work/bioblend/bioblend/galaxy-release_22.01/.venv/lib/python3.7/site-packages/sqlalchemy/sql/operators.py", line 604, in in_
2022-07-13T20:32:11.6002496Z     return self.operate(in_op, other)
2022-07-13T20:32:11.6002903Z   File "/home/runner/work/bioblend/bioblend/galaxy-release_22.01/.venv/lib/python3.7/site-packages/sqlalchemy/sql/type_api.py", line 1343, in operate
2022-07-13T20:32:11.6003001Z     op, *other, **kwargs
2022-07-13T20:32:11.6003401Z   File "/home/runner/work/bioblend/bioblend/galaxy-release_22.01/.venv/lib/python3.7/site-packages/sqlalchemy/sql/type_api.py", line 76, in operate
2022-07-13T20:32:11.6003546Z     return o[0](self.expr, op, *(other + o[1:]), **kwargs)
2022-07-13T20:32:11.6003982Z   File "/home/runner/work/bioblend/bioblend/galaxy-release_22.01/.venv/lib/python3.7/site-packages/sqlalchemy/sql/default_comparator.py", line 160, in _in_impl
2022-07-13T20:32:11.6004157Z     roles.InElementRole, seq_or_selectable, expr=expr, operator=op
2022-07-13T20:32:11.6004557Z   File "/home/runner/work/bioblend/bioblend/galaxy-release_22.01/.venv/lib/python3.7/site-packages/sqlalchemy/sql/coercions.py", line 189, in expect
2022-07-13T20:32:11.6004673Z     element, argname=argname, **kw
2022-07-13T20:32:11.6005102Z   File "/home/runner/work/bioblend/bioblend/galaxy-release_22.01/.venv/lib/python3.7/site-packages/sqlalchemy/sql/coercions.py", line 587, in _literal_coercion
2022-07-13T20:32:11.6005234Z     self._raise_for_expected(element, **kw)
2022-07-13T20:32:11.6005662Z   File "/home/runner/work/bioblend/bioblend/galaxy-release_22.01/.venv/lib/python3.7/site-packages/sqlalchemy/sql/coercions.py", line 283, in _raise_for_expected
2022-07-13T20:32:11.6005841Z     util.raise_(exc.ArgumentError(msg, code=code), replace_context=err)
2022-07-13T20:32:11.6006258Z   File "/home/runner/work/bioblend/bioblend/galaxy-release_22.01/.venv/lib/python3.7/site-packages/sqlalchemy/util/compat.py", line 207, in raise_
2022-07-13T20:32:11.6006348Z     raise exception
2022-07-13T20:32:11.6006989Z sqlalchemy.exc.ArgumentError: IN expression list, SELECT construct, or bound parameter object expected, got 'bam,txt'.
2022-07-13T20:32:11.6007456Z 127.0.0.1 - - [13/Jul/2022:20:20:39 +0000] "GET /api/datasets?limit=500&offset=0&order=create_time-dsc&history_id=4b187121143038ff&q=extension-in&qv=bam%2Ctxt HTTP/1.1" 500 - "-" "python-requests/2.28.1"
```
parent 80b726eb
Loading
Loading
Loading
Loading
+23 −17
Original line number Diff line number Diff line
@@ -977,15 +977,17 @@ class ModelFilterParser(HasAModelManager):
        Set up, extend, or alter `orm_filter_parsers` and `fn_filter_parsers`.
        """
        # note: these are the default filters for all models
        self.orm_filter_parsers.update({
        self.orm_filter_parsers.update(
            {
                # (prob.) applicable to all models
            'id': {'op': ('in')},
            'encoded_id': {'column': 'id', 'op': ('in'), 'val': self.parse_id_list},
                "id": {"op": ("in")},
                "encoded_id": {"column": "id", "op": ("in"), "val": self.parse_id_list},
                # dates can be directly passed through the orm into a filter (no need to parse into datetime object)
            'extension': {'op': ('eq', 'like', 'in')},
            'create_time': {'op': ('le', 'ge', 'lt', 'gt'), 'val': self.parse_date},
            'update_time': {'op': ('le', 'ge', 'lt', 'gt'), 'val': self.parse_date},
        })
                "extension": {"op": ("eq", "like", "in"), "val": {"in": lambda v: v.split(",")}},
                "create_time": {"op": ("le", "ge", "lt", "gt"), "val": self.parse_date},
                "update_time": {"op": ("le", "ge", "lt", "gt"), "val": self.parse_date},
            }
        )

    def build_filter_params(
        self,
@@ -1075,7 +1077,7 @@ class ModelFilterParser(HasAModelManager):
        """
        Attempt to parse a non-ORM filter function.
        """
        # fn_filter_list is a dict: fn_filter_list[ attr ] = { 'opname1' : opfn1, 'opname2' : opfn2, etc. }
        # fn_filter_parsers is a dict: fn_filter_parsers[attr] = {"opname1": opfn1, "opname2": opfn2, etc. }

        # attr, op is a nested dictionary pointing to the filter fn
        attr_map = self.fn_filter_parsers.get(attr, None)
@@ -1095,13 +1097,13 @@ class ModelFilterParser(HasAModelManager):
        return self.parsed_filter(filter_type="function", filter=lambda i: filter_fn(i, val))

    # ---- ORM filters
    def _parse_orm_filter(self, attr, op, val):
    def _parse_orm_filter(self, attr, op, val) -> Optional[ParsedFilter]:
        """
        Attempt to parse a ORM-based filter.

        Using SQLAlchemy, this would yield a sql.elements.BinaryExpression.
        """
        # orm_filter_list is a dict: orm_filter_list[ attr ] = <list of allowed ops>
        # orm_filter_parsers is a dict: orm_filter_parsers[attr] = <column map>
        column_map = self.orm_filter_parsers.get(attr, None)
        if not column_map:
            # no column mapping (not allowlisted)
@@ -1124,16 +1126,20 @@ class ModelFilterParser(HasAModelManager):
        allowed_ops = column_map['op']
        if op not in allowed_ops:
            return None
        op = self._convert_op_string_to_fn(column, op)
        if not op:
        converted_op = self._convert_op_string_to_fn(column, op)
        if not converted_op:
            return None

        # parse the val from string using the 'val' parser if present (otherwise, leave as string)
        val_parser = column_map.get('val', None)
        val_parser = column_map.get("val")
        # val_parser can be a dictionary indexed by the operations, in case different functions
        # need to be called depending on the operation
        if isinstance(val_parser, dict):
            val_parser = val_parser.get(op)
        if val_parser:
            val = val_parser(val)

        orm_filter = op(val)
        orm_filter = converted_op(val)
        return self.parsed_filter(filter_type="orm", filter=orm_filter)

    #: these are the easier/shorter string equivalents to the python operator fn names that need '__' around them
@@ -1171,7 +1177,7 @@ class ModelFilterParser(HasAModelManager):
    # TODO: These should go somewhere central - we've got ~6 parser modules/sections now
    def parse_id_list(self, id_list_string, sep=','):
        """
        Split `id_list_string` at `sep`.
        Split `id_list_string` at `sep` and decode as ids.
        """
        # TODO: move id decoding out
        id_list = [self.app.security.decode_id(id_) for id_ in id_list_string.split(sep)]
+33 −0
Original line number Diff line number Diff line
@@ -86,6 +86,39 @@ class DatasetsApiTestCase(ApiTestCase):
        result = self._get("datasets", payload).json()
        assert len(result) == 0

    def test_search_by_extension(self):
        self.dataset_populator.new_dataset(self.history_id, wait=True)
        payload = {
            "q": ["extension"],
            "qv": ["txt"],
            "history_id": self.history_id,
        }
        assert len(self._get("datasets", payload).json()) == 1
        payload = {
            "q": ["extension"],
            "qv": ["bam"],
            "history_id": self.history_id,
        }
        assert len(self._get("datasets", payload).json()) == 0
        payload = {
            "q": ["extension-in"],
            "qv": ["bam,txt"],
            "history_id": self.history_id,
        }
        assert len(self._get("datasets", payload).json()) == 1
        payload = {
            "q": ["extension-like"],
            "qv": ["t%t"],
            "history_id": self.history_id,
        }
        assert len(self._get("datasets", payload).json()) == 1
        payload = {
            "q": ["extension-like"],
            "qv": ["b%m"],
            "history_id": self.history_id,
        }
        assert len(self._get("datasets", payload).json()) == 0

    def test_invalid_search(self):
        payload = {'limit': 10, 'offset': 0, 'q': ['history_content_type', 'tag-invalid_op'], 'qv': ['dataset', 'notag']}
        index_response = self._get("datasets", payload)