Unverified Commit 76bf9bfc authored by Marius van den Beek's avatar Marius van den Beek Committed by GitHub
Browse files

Merge pull request #13764 from natefoo/distributed-no-search

[22.01] Add an option to disable searching for distributed object store datasets that don't have an object_store_id
parents 5aad0131 5dc725c7
Loading
Loading
Loading
Loading
+7 −1
Original line number Diff line number Diff line
@@ -59,10 +59,16 @@

    In distributed and hierarchical world, you can choose that some backends are
    automatically unused whenever they become too full. Setting the maxpctfull
    attribute (on top level object_store it behaves as a global default) enables
    attribute (on top level backends tag it behaves as a global default) enables
    this, or it can be applied to individual backends to override a global
    setting. This only applies to disk based backends and not remote object
    stores.

    By default, if a dataset should exist but its object_store_id is null, all
    backends will be searched until it is found. This is to aid in Galaxy
    servers moving from non-distributed to distributed object stores, but this
    behavior can be disabled by setting search_for_missing="false" on the top
    level backends tag.
-->
<!--
<object_store type="distributed">
+10 −2
Original line number Diff line number Diff line
@@ -3238,12 +3238,20 @@ class Dataset(StorableObject, Serializable, _HasTable):
        return self.state in self.ready_states

    def get_file_name(self):
        if self.purged:
            log.warning(f"Attempt to get file name of purged dataset {self.id}")
            return ''
        if not self.external_filename:
            assert self.object_store is not None, f"Object Store has not been initialized for dataset {self.id}"
            if self.object_store.exists(self):
                return self.object_store.get_filename(self)
                file_name = self.object_store.get_filename(self)
            else:
                return ''
                file_name = ''
            if not file_name and self.state not in (self.states.NEW, self.states.QUEUED):
                # Queued datasets can be assigned an object store and have a filename, but they aren't guaranteed to.
                # Anything after queued should have a file name.
                log.warning(f"Failed to determine file name for dataset {self.id}")
            return file_name
        else:
            filename = self.external_filename
        # Make filename absolute
+13 −9
Original line number Diff line number Diff line
@@ -18,6 +18,7 @@ import yaml

from galaxy.exceptions import ObjectInvalid, ObjectNotFound
from galaxy.util import (
    asbool,
    directory_hash_id,
    force_symlink,
    parse_xml,
@@ -775,6 +776,7 @@ class DistributedObjectStore(NestedObjectStore):
        self.original_weighted_backend_ids = []
        self.max_percent_full = {}
        self.global_max_percent_full = config_dict.get("global_max_percent_full", 0)
        self.search_for_missing = config_dict.get("search_for_missing", True)
        random.seed()

        for backend_def in config_dict["backends"]:
@@ -812,6 +814,7 @@ class DistributedObjectStore(NestedObjectStore):

        backends: List[Dict[str, Any]] = []
        config_dict = {
            'search_for_missing': asbool(backends_root.get('search_for_missing', True)),
            'global_max_percent_full': float(backends_root.get('maxpctfull', 0)),
            'backends': backends,
        }
@@ -857,6 +860,7 @@ class DistributedObjectStore(NestedObjectStore):
    def to_dict(self) -> Dict[str, Any]:
        as_dict = super().to_dict()
        as_dict["global_max_percent_full"] = self.global_max_percent_full
        as_dict["search_for_missing"] = self.search_for_missing
        backends: List[Dict[str, Any]] = []
        for backend_id, backend in self.backends.items():
            backend_as_dict = backend.to_dict()
@@ -918,13 +922,13 @@ class DistributedObjectStore(NestedObjectStore):
            else:
                log.warning('The backend object store ID (%s) for %s object with ID %s is invalid'
                            % (obj.object_store_id, obj.__class__.__name__, obj.id))
        elif self.search_for_missing:
            # if this instance has been switched from a non-distributed to a
            # distributed object store, or if the object's store id is invalid,
            # try to locate the object
            for id, store in self.backends.items():
                if store.exists(obj, **kwargs):
                log.warning('%s object with ID %s found in backend object store with ID %s'
                            % (obj.__class__.__name__, obj.id, id))
                    log.warning(f"{obj.__class__.__name__} object with ID {obj.id} found in backend object store with ID {id}")
                    obj.object_store_id = id
                    return id
        return None