Unverified Commit d4a4562a authored by Marius van den Beek's avatar Marius van den Beek Committed by GitHub
Browse files

Merge pull request #20872 from mvdbeek/perf_fix_anton_invocation_2

[25.0] Use ``populated_optimized`` when serializing collection
parents 81f9e6da c6e52c07
Loading
Loading
Loading
Loading
+9 −23
Original line number Diff line number Diff line
@@ -17,7 +17,6 @@ import pwd
import random
import string
from collections import defaultdict
from collections.abc import Callable
from dataclasses import dataclass
from datetime import (
    datetime,
@@ -133,7 +132,6 @@ from sqlalchemy.orm import (
from sqlalchemy.orm.attributes import flag_modified
from sqlalchemy.orm.collections import attribute_keyed_dict
from sqlalchemy.orm.session import Session
from sqlalchemy.sql import exists
from sqlalchemy.sql.expression import FromClause
from typing_extensions import (
    Literal,
@@ -6666,15 +6664,6 @@ class ImplicitlyConvertedDatasetAssociation(Base, Serializable):
DEFAULT_COLLECTION_NAME = "Unnamed Collection"


class InnerCollectionFilter(NamedTuple):
    column: str
    operator_function: Callable
    expected_value: Union[str, int, float, bool]

    def produce_filter(self, table):
        return self.operator_function(getattr(table, self.column), self.expected_value)


class DatasetCollection(Base, Dictifiable, UsesAnnotations, Serializable):
    __tablename__ = "dataset_collection"

@@ -6732,7 +6721,6 @@ class DatasetCollection(Base, Dictifiable, UsesAnnotations, Serializable):
                ]
            ]
        ] = None,
        inner_filter: Optional[InnerCollectionFilter] = None,
    ):
        collection_attributes = collection_attributes or ()
        element_attributes = element_attributes or ()
@@ -6768,7 +6756,7 @@ class DatasetCollection(Base, Dictifiable, UsesAnnotations, Serializable):
            order_by_columns.append(inner_dce.c.element_index)
            q = q.outerjoin(inner_dce, inner_dce.c.dataset_collection_id == dce.c.child_collection_id)
            if collection_attributes:
                q = q.join(inner_dc, inner_dc.c.id == dce.c.child_collection_id)
                q = q.outerjoin(inner_dc, inner_dc.c.id == dce.c.child_collection_id)
                q = q.add_columns(
                    *attribute_columns(inner_dc.c, collection_attributes, nesting_level),
                )
@@ -6776,8 +6764,6 @@ class DatasetCollection(Base, Dictifiable, UsesAnnotations, Serializable):
            dce = inner_dce
            dc = inner_dc
            depth_collection_type = depth_collection_type.split(":", 1)[1]
        if inner_filter:
            q = q.filter(inner_filter.produce_filter(dc.c))

        if (
            hda_attributes
@@ -6874,15 +6860,12 @@ class DatasetCollection(Base, Dictifiable, UsesAnnotations, Serializable):
            else:
                stmt = self._build_nested_collection_attributes_stmt(
                    collection_attributes=("populated_state",),
                    inner_filter=InnerCollectionFilter(
                        "populated_state", operator.__ne__, DatasetCollection.populated_states.OK
                    ),
                )
                stmt = stmt.subquery()
                stmt = select(~exists(stmt))
                session = required_object_session(self)
                _populated_optimized = session.scalar(stmt)

                for row in session.execute(stmt):
                    if any(state not in (DatasetCollection.populated_states.OK, None) for state in row):
                        _populated_optimized = False
                        break
            self._populated_optimized = _populated_optimized

        return self._populated_optimized
@@ -6895,6 +6878,9 @@ class DatasetCollection(Base, Dictifiable, UsesAnnotations, Serializable):
    def populated(self):
        top_level_populated = self.populated_state == DatasetCollection.populated_states.OK
        if top_level_populated and self.has_subcollections:
            if self.id:
                return self.populated_optimized
            else:
                return all(e.child_collection and e.child_collection.populated for e in self.elements)
        return top_level_populated