Unverified Commit a7af3526 authored by Marius van den Beek's avatar Marius van den Beek Committed by GitHub
Browse files

Merge pull request #18083 from davelopez/23.2_backport_18052

[23.2] Fix history export with missing dataset hids 
parents ecef3ef5 419e4954
Loading
Loading
Loading
Loading
+8 −10
Original line number Diff line number Diff line
@@ -1961,8 +1961,6 @@ class DirectoryModelExportStore(ModelExportStore):

        dir_name = "datasets"
        dir_path = os.path.join(export_directory, dir_name)
        dataset_hid = as_dict["hid"]
        assert dataset_hid, as_dict

        if dataset.dataset.id in self.dataset_id_to_path:
            file_name, extra_files_path = self.dataset_id_to_path[dataset.dataset.id]
@@ -1981,7 +1979,7 @@ class DirectoryModelExportStore(ModelExportStore):
                self.serialization_options.get_identifier(self.security, conversion) if conversion else None
            )
            target_filename = get_export_dataset_filename(
                as_dict["name"], as_dict["extension"], dataset_hid, conversion_key=conversion_key
                as_dict["name"], as_dict["extension"], as_dict["encoded_id"], conversion_key=conversion_key
            )
            arcname = os.path.join(dir_name, target_filename)
            src = file_name
@@ -1997,7 +1995,7 @@ class DirectoryModelExportStore(ModelExportStore):

            if len(file_list):
                extra_files_target_filename = get_export_dataset_extra_files_dir_name(
                    as_dict["name"], as_dict["extension"], dataset_hid, conversion_key=conversion_key
                    as_dict["encoded_id"], conversion_key=conversion_key
                )
                arcname = os.path.join(dir_name, extra_files_target_filename)
                add(extra_files_path, os.path.join(export_directory, arcname))
@@ -2967,22 +2965,22 @@ def tar_export_directory(export_directory: StrPath, out_file: StrPath, gzip: boo
            store_archive.add(os.path.join(export_directory, export_path), arcname=export_path)


def get_export_dataset_filename(name: str, ext: str, hid: int, conversion_key: Optional[str]) -> str:
def get_export_dataset_filename(name: str, ext: str, encoded_id: str, conversion_key: Optional[str]) -> str:
    """
    Builds a filename for a dataset using its name an extension.
    """
    base = "".join(c in FILENAME_VALID_CHARS and c or "_" for c in name)
    if not conversion_key:
        return f"{base}_{hid}.{ext}"
        return f"{base}_{encoded_id}.{ext}"
    else:
        return f"{base}_{hid}_conversion_{conversion_key}.{ext}"
        return f"{base}_{encoded_id}_conversion_{conversion_key}.{ext}"


def get_export_dataset_extra_files_dir_name(name: str, ext: str, hid: int, conversion_key: Optional[str]) -> str:
def get_export_dataset_extra_files_dir_name(encoded_id: str, conversion_key: Optional[str]) -> str:
    if not conversion_key:
        return f"extra_files_path_{hid}"
        return f"extra_files_path_{encoded_id}"
    else:
        return f"extra_files_path_{hid}_conversion_{conversion_key}"
        return f"extra_files_path_{encoded_id}_conversion_{conversion_key}"


def imported_store_for_metadata(
+15 −0
Original line number Diff line number Diff line
@@ -544,6 +544,21 @@ def validate_invocation_collection_crate_directory(crate_directory):
        assert dataset in root["hasPart"]


def test_export_history_with_missing_hid():
    # The dataset's hid was used to compose the file name during the export but it
    # can be missing sometimes. We now use the dataset's encoded id instead.
    app = _mock_app()
    u, history, d1, d2, j = _setup_simple_cat_job(app)

    # Remove hid from d1
    d1.hid = None
    app.commit()

    temp_directory = mkdtemp()
    with store.DirectoryModelExportStore(temp_directory, app=app, export_files="copy") as export_store:
        export_store.export_history(history)


def test_export_history_to_ro_crate(tmp_path):
    app = _mock_app()
    u, history, d1, d2, j = _setup_simple_cat_job(app)