Unverified Commit 649804e1 authored by mvdbeek's avatar mvdbeek
Browse files

Optimize active_contents query

On a small history:
```json
contents_active: {
deleted: 22,
hidden: 19,
active: 29
},
```

reduces request time for history/current_history_json from ~ 46ms to ~
27ms.
I imagine the speedup increases the bigger the history is.

Replaces this query:

```
SELECT deleted, visible, count(:count_2) AS count_1
FROM (SELECT anon_2.history_id AS history_id, anon_2.history_content_type AS history_content_type, anon_2.id AS id, anon_2.type_id AS type_id, anon_2.hid AS hid, anon_2.extension AS extension, anon_2.dataset_id AS dataset_id, anon_2.collection_id AS collection_id, anon_2.name AS name, anon_2.state AS state, anon_2.size AS size, anon_2.deleted AS deleted, anon_2.purged AS purged, anon_2.visible AS visible, anon_2.create_time AS create_time, anon_2.update_time AS update_time
FROM (SELECT history_dataset_association.history_id AS history_id, :param_1 AS history_content_type, history_dataset_association.id AS id, :param_2 || :param_3 || history_dataset_association.id AS type_id, history_dataset_association.hid AS hid, history_dataset_association.extension AS extension, history_dataset_association.dataset_id AS dataset_id, :param_4 AS collection_id, history_dataset_association.name AS name, dataset.state AS state, dataset.file_size AS size, history_dataset_association.deleted AS deleted, history_dataset_association.purged AS purged, history_dataset_association.visible AS visible, history_dataset_association.create_time AS create_time, history_dataset_association.update_time AS update_time
FROM history_dataset_association JOIN dataset ON dataset.id = history_dataset_association.dataset_id
WHERE history_dataset_association.history_id = :history_id_1 UNION ALL SELECT history_dataset_collection_association.history_id AS history_id, :param_5 AS history_content_type, history_dataset_collection_association.id AS id, :param_6 || :param_7 || history_dataset_collection_association.id AS type_id, history_dataset_collection_association.hid AS hid, :param_8 AS extension, :param_9 AS dataset_id, history_dataset_collection_association.collection_id AS collection_id, history_dataset_collection_association.name AS name, dataset_collection.populated_state AS state, :param_10 AS size, history_dataset_collection_association.deleted AS deleted, :param_11 AS purged, history_dataset_collection_association.visible AS visible, history_dataset_collection_association.create_time AS create_time, history_dataset_collection_association.update_time AS update_time
FROM history_dataset_collection_association JOIN dataset_collection ON dataset_collection.id = history_dataset_collection_association.collection_id
WHERE history_dataset_collection_association.history_id = :history_id_2) AS anon_2 ORDER BY anon_2.hid) AS anon_1 GROUP BY deleted, visible
```

with

```
SELECT CAST(sum(anon_1.deleted) AS INTEGER) AS deleted, CAST(sum(anon_1.hidden) AS INTEGER) AS hidden, CAST(sum(anon_1.active) AS INTEGER) AS active
FROM (SELECT sum(CAST(history_dataset_association.deleted AS INTEGER)) AS deleted, sum(CAST(history_dataset_association.visible = false AS INTEGER)) AS hidden, sum(abs(CAST(history_dataset_association.visible AS INTEGER) * (CAST(history_dataset_association.deleted AS INTEGER) - :param_1))) AS active
FROM history_dataset_association
WHERE history_dataset_association.history_id = :history_id_1 UNION ALL SELECT sum(CAST(history_dataset_collection_association.deleted AS INTEGER)) AS deleted, sum(CAST(history_dataset_collection_association.visible = false AS INTEGER)) AS hidden, sum(abs(CAST(history_dataset_collection_association.visible AS INTEGER) * (CAST(history_dataset_collection_association.deleted AS INTEGER) - :param_2))) AS active
FROM history_dataset_collection_association
WHERE history_dataset_collection_association.history_id = :history_id_2) AS anon_1
```
parent 0f417def
Loading
Loading
Loading
Loading
+26 −14
Original line number Diff line number Diff line
@@ -11,12 +11,15 @@ from typing import (

from sqlalchemy import (
    asc,
    cast,
    desc,
    false,
    func,
    Integer,
    literal,
    nullsfirst,
    nullslast,
    select,
    sql,
    true,
)
@@ -186,21 +189,30 @@ class HistoryContentsManager(base.SortableManager):
        Note: counts for deleted and hidden overlap; In other words, a dataset that's
        both deleted and hidden will be added to both totals.
        """
        returned = dict(deleted=0, hidden=0, active=0)
        contents_subquery = self._union_of_contents_query(history).subquery()
        columns = [sql.column("deleted"), sql.column("visible"), func.count("*")]
        statement = (
            sql.select(columns).select_from(contents_subquery).group_by(sql.column("deleted"), sql.column("visible"))
        hda_select = self._active_counts_statement(model.HistoryDatasetAssociation, history.id)
        hdca_select = self._active_counts_statement(model.HistoryDatasetCollectionAssociation, history.id)
        subquery = hda_select.union_all(hdca_select).subquery()
        statement = select(
            cast(func.sum(subquery.c.deleted), Integer).label("deleted"),
            cast(func.sum(subquery.c.hidden), Integer).label("hidden"),
            cast(func.sum(subquery.c.active), Integer).label("active"),
        )
        returned = self.app.model.context.execute(statement).one()
        return dict(returned)

    def _active_counts_statement(self, model_class, history_id):
        deleted_attr = model_class.deleted
        visible_attr = model_class.visible
        table_attr = model_class.table
        return (
            select(
                func.sum(cast(deleted_attr, Integer)).label("deleted"),
                func.sum(cast(visible_attr == false(), Integer)).label("hidden"),
                func.sum(func.abs(cast(visible_attr, Integer) * (cast(deleted_attr, Integer) - 1))).label("active"),
            )
            .select_from(table_attr)
            .filter_by(history_id=history_id)
        )
        groups = self.app.model.context.execute(statement).fetchall()
        for deleted, visible, count in groups:
            if deleted:
                returned["deleted"] += count
            if not visible:
                returned["hidden"] += count
            if not deleted and visible:
                returned["active"] += count
        return returned

    def map_datasets(self, history, fn, **kwargs):
        """