Commit c796dec0 authored by John Chilton's avatar John Chilton
Browse files

Rework galaxy.model.store.discover to not require unique paths for incoming files.

parent e394b184
Loading
Loading
Loading
Loading
+4 −4
Original line number Diff line number Diff line
@@ -199,14 +199,14 @@ class BaseJobContext(ModelPersistenceContext):
    def add_dataset_collection(self, collection):
        pass

    def find_files(self, output_name, collection, dataset_collectors):
        filenames = {}
    def find_files(self, output_name, collection, dataset_collectors) -> list:
        discovered_files = []
        for discovered_file in discover_files(
            output_name, self.tool_provided_metadata, dataset_collectors, self.job_working_directory, collection
        ):
            self.increment_discovered_file_count()
            filenames[discovered_file.path] = discovered_file
        return filenames
            discovered_files.append(discovered_file)
        return discovered_files

    def get_job_id(self):
        return None  # overwritten in subclasses
+14 −7
Original line number Diff line number Diff line
@@ -253,7 +253,13 @@ class ModelPersistenceContext(metaclass=abc.ABCMeta):
                log.exception("Exception occured while setting dataset peek")

    def populate_collection_elements(
        self, collection, root_collection_builder, filenames, name=None, metadata_source_name=None, final_job_state="ok"
        self,
        collection,
        root_collection_builder,
        discovered_files,
        name=None,
        metadata_source_name=None,
        final_job_state="ok",
    ):
        # TODO: allow configurable sorting.
        #    <sort by="lexical" /> <!-- default -->
@@ -263,7 +269,7 @@ class ModelPersistenceContext(metaclass=abc.ABCMeta):
        if name is None:
            name = "unnamed output"
        if self.flush_per_n_datasets and self.flush_per_n_datasets > 0:
            for chunk in chunk_iterable(filenames.items(), size=self.flush_per_n_datasets):
            for chunk in chunk_iterable(discovered_files, size=self.flush_per_n_datasets):
                self._populate_elements(
                    chunk=chunk,
                    name=name,
@@ -278,7 +284,7 @@ class ModelPersistenceContext(metaclass=abc.ABCMeta):
                    self.flush()
        else:
            self._populate_elements(
                chunk=filenames.items(),
                chunk=discovered_files,
                name=name,
                root_collection_builder=root_collection_builder,
                metadata_source_name=metadata_source_name,
@@ -293,7 +299,8 @@ class ModelPersistenceContext(metaclass=abc.ABCMeta):
            "paths": [],
            "extra_files": [],
        }
        for filename, discovered_file in chunk:
        for discovered_file in chunk:
            filename = discovered_file.path
            create_dataset_timer = ExecutionTimer()
            fields_match = discovered_file.match
            if not fields_match:
@@ -688,7 +695,7 @@ def persist_elements_to_hdca(
    hdca,
    collector=None,
):
    filenames = {}
    discovered_files = []

    def add_to_discovered_files(elements, parent_identifiers=None):
        parent_identifiers = parent_identifiers or []
@@ -699,7 +706,7 @@ def persist_elements_to_hdca(
                discovered_file = discovered_file_for_element(
                    element, model_persistence_context, parent_identifiers, collector=collector
                )
                filenames[discovered_file.path] = discovered_file
                discovered_files.append(discovered_file)

    add_to_discovered_files(elements)

@@ -708,7 +715,7 @@ def persist_elements_to_hdca(
    model_persistence_context.populate_collection_elements(
        collection,
        collection_builder,
        filenames,
        discovered_files,
    )
    collection_builder.populate()