Unverified Commit be3db670 authored by mvdbeek's avatar mvdbeek
Browse files

Skip data_manager_json files that aren't bundles

The file_size hack turns out to not work, since people report related
bugs that they shouldn't have been able to run into without generating
bundles.
It's probably still true that bundles should produce a distinct
datatype.
parent 172ef05f
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -138,6 +138,7 @@ class DataManagerJson(Json):
    MetadataElement(
        name="data_tables", default=None, desc="Data tables represented by this dataset", readonly=True, visible=True
    )
    MetadataElement(name="is_bundle", default=False, desc="Dataset represents bundle", readonly=True, visible=True)

    def set_meta(self, dataset: DatasetProtocol, overwrite: bool = True, **kwd):
        super().set_meta(dataset=dataset, overwrite=overwrite, **kwd)
+3 −17
Original line number Diff line number Diff line
@@ -31,8 +31,6 @@ from pydantic import (
)
from sqlalchemy import (
    and_,
    Cast,
    ColumnElement,
    desc,
    false,
    func,
@@ -40,7 +38,6 @@ from sqlalchemy import (
    select,
    true,
)
from sqlalchemy.dialects.postgresql import JSONB
from sqlalchemy.orm import (
    aliased,
    joinedload,
@@ -72,6 +69,7 @@ from galaxy.model import (
    StoredWorkflow,
    StoredWorkflowTagAssociation,
    StoredWorkflowUserShareAssociation,
    to_json,
    User,
    Workflow,
    WorkflowInvocation,
@@ -2067,26 +2065,14 @@ class WorkflowContentsManager(UsesAnnotations):
    ) -> Optional[model.StoredWorkflow]:
        sa_session = self.app.model.session

        def to_json(column, keys: List[str]):
            assert sa_session.bind
            if sa_session.bind.dialect.name == "postgresql":
                cast: Union[ColumnElement[Any], Cast[Any]] = func.cast(func.convert_from(column, "UTF8"), JSONB)
                for key in keys:
                    cast = cast.__getitem__(key)
                return cast.astext
            else:
                for key in keys:
                    column = func.json_extract(column, f"$.{key}")
                return column

        stmnt = (
            select(model.StoredWorkflow)
            .join(model.Workflow, model.Workflow.id == model.StoredWorkflow.latest_workflow_id)
            .filter(
                and_(
                    model.StoredWorkflow.deleted == false(),
                    to_json(model.Workflow.source_metadata, ["trs_tool_id"]) == trs_id,
                    to_json(model.Workflow.source_metadata, ["trs_version_id"]) == trs_version,
                    to_json(sa_session, model.Workflow.source_metadata, ["trs_tool_id"]) == trs_id,
                    to_json(sa_session, model.Workflow.source_metadata, ["trs_version_id"]) == trs_version,
                )
            )
        )
+17 −1
Original line number Diff line number Diff line
@@ -68,8 +68,10 @@ from sqlalchemy import (
    bindparam,
    Boolean,
    case,
    Cast,
    Column,
    column,
    ColumnElement,
    DateTime,
    delete,
    desc,
@@ -100,6 +102,7 @@ from sqlalchemy import (
    update,
    VARCHAR,
)
from sqlalchemy.dialects.postgresql import JSONB
from sqlalchemy.exc import (
    CompileError,
    OperationalError,
@@ -318,6 +321,19 @@ def get_uuid(uuid: Optional[Union[UUID, str]] = None) -> UUID:
    return UUID(str(uuid))


def to_json(sa_session, column, keys: List[str]):
    assert sa_session.bind
    if sa_session.bind.dialect.name == "postgresql":
        cast: Union[ColumnElement[Any], Cast[Any]] = func.cast(func.convert_from(column, "UTF8"), JSONB)
        for key in keys:
            cast = cast.__getitem__(key)
        return cast.astext
    else:
        for key in keys:
            column = func.json_extract(column, f"$.{key}")
        return column


class Base(DeclarativeBase, _HasTable):
    __abstract__ = True
    metadata = MetaData(naming_convention=NAMING_CONVENTION)
@@ -904,8 +920,8 @@ class User(Base, Dictifiable, RepresentById):
                Dataset.state == "ok",
                # excludes data manager runs that actually populated tables.
                # maybe track this formally by creating a different datatype for bundles ?
                Dataset.total_size != Dataset.file_size,
                HistoryDatasetAssociation._metadata.contains(data_table),
                to_json(session, HistoryDatasetAssociation._metadata, ["is_bundle"]) == "true",
            )
            .order_by(HistoryDatasetAssociation.id)
        )
+1 −0
Original line number Diff line number Diff line
@@ -3261,6 +3261,7 @@ class DataManagerTool(OutputParameterJSONTool):
                    create=True,
                    preserve_symlinks=True,
                )
                hda.metadata.is_bundle = True

        else:
            raise Exception("Unknown data manager mode encountered type...")