Unverified Commit 2b4772d2 authored by David López's avatar David López Committed by GitHub
Browse files

Merge pull request #19367 from KaiOnGitHub/dataverse-integration

Add Dataverse RDM repository integration
parents 95b74bd9 c257b7d9
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -130,6 +130,7 @@ tool_test_output.json
client/**/jsconfig.json
vetur.config.js
.pre-commit-config.yaml
galaxy.code-workspace

# Chrom len files
*.len
+1 −0
Original line number Diff line number Diff line
@@ -13,6 +13,7 @@ export const URI_PREFIXES = [
    "drs://",
    "invenio://",
    "zenodo://",
    "dataverse://",
    "elabftw://",
];

+18 −0
Original line number Diff line number Diff line
@@ -229,6 +229,24 @@
  public_name: ${user.preferences['zenodo_sandbox|public_name']}
  writable: true

- type: dataverse
  id: dataverse
  doc: Dataverse is an open-source data repository platform designed for sharing, preserving, and managing research data, offering tools for data citation, exploration, and collaboration.
  label: Dataverse
  url: https://dataverse.org
  token: ${user.user_vault.read_secret('preferences/dataverse/token')}
  public_name: ${user.preferences['dataverse|public_name']}
  writable: true

- type: dataverse
  id: dataverse_sandbox
  doc: This is the sandbox instance of Dataverse. It is used for testing purposes only, content is NOT preserved. DOIs created in this instance are not real and will not resolve.
  label: Dataverse Sandbox (use only for testing purposes)
  url: https://demo.dataverse.org
  token: ${user.user_vault.read_secret('preferences/dataverse_sandbox/token')}
  public_name: ${user.preferences['dataverse_sandbox|public_name']}
  writable: true

# Note for developers: you can easily set up a minimal, dockerized Onedata environment
# using the so-called "demo-mode": https://onedata.org/#/home/documentation/topic/stable/demo-mode
- type: onedata
+26 −0
Original line number Diff line number Diff line
@@ -136,6 +136,32 @@ preferences:
              type: text
              required: False
    
    dataverse:
        description: Your Dataverse Integration Settings
        inputs:
            - name: token
              label: API Token used to create draft records and to upload files. You can manage your tokens at https://YOUR_INSTANCE/dataverseuser.xhtml?selectTab=apiTokenTab (Replace YOUR_INSTANCE with your Dataverse instance URL)
              type: secret
              # store: vault # Requires setting up vault_config_file in your galaxy.yml
              required: False
            - name: public_name
              label: Creator name to associate with new datasets (formatted as "Last name, First name"). If left blank "Anonymous Galaxy User" will be used. You can always change this by editing your dataset directly.
              type: text
              required: False

    dataverse_sandbox:
        description: Your Dataverse Integration Settings (TESTING ONLY)
        inputs:
            - name: token
              label: API Token used to create draft records and to upload files. You can manage your tokens at https://demo.dataverse.org/dataverseuser.xhtml?selectTab=apiTokenTab (Replace demo.dataverse.org with your Dataverse instance URL)
              type: secret
              # store: vault # Requires setting up vault_config_file in your galaxy.yml
              required: False
            - name: public_name
              label: Creator name to associate with new datasets (formatted as "Last name, First name"). If left blank "Anonymous Galaxy User" will be used. You can always change this by editing your dataset directly.
              type: text
              required: False

    # Used in file_sources_conf.yml
    onedata:
        description: Your Onedata account
+46 −58
Original line number Diff line number Diff line
@@ -25,9 +25,11 @@ class RDMFilesSourceProperties(FilesSourceProperties):
    public_name: str


class RecordFilename(NamedTuple):
    record_id: str
    filename: str
class ContainerAndFileIdentifier(NamedTuple):
    """The file_identifier could be a filename or a file_id."""

    container_id: str
    file_identifier: str


class RDMRepositoryInteractor:
@@ -35,6 +37,10 @@ class RDMRepositoryInteractor:

    This class is not intended to be used directly, but rather to be subclassed
    by file sources that interact with RDM repositories.

    Different RDM repositories use different terminology. Also they use the same term for different things.
    To prevent confusion, we use the term "container" in the base repository.
    This is an abstract term for the entity that contains multiple files.
    """

    def __init__(self, repository_url: str, plugin: "RDMFilesSource"):
@@ -54,13 +60,13 @@ class RDMRepositoryInteractor:
        """
        return self._repository_url

    def to_plugin_uri(self, record_id: str, filename: Optional[str] = None) -> str:
        """Creates a valid plugin URI to reference the given record_id.
    def to_plugin_uri(self, container_id: str, filename: Optional[str] = None) -> str:
        """Creates a valid plugin URI to reference the given container_id.

        If a filename is provided, the URI will reference the specific file in the record."""
        If a filename is provided, the URI will reference the specific file in the container."""
        raise NotImplementedError()

    def get_records(
    def get_file_containers(
        self,
        writeable: bool,
        user_context: OptionalUserContext = None,
@@ -69,54 +75,57 @@ class RDMRepositoryInteractor:
        query: Optional[str] = None,
        sort_by: Optional[str] = None,
    ) -> Tuple[List[RemoteDirectory], int]:
        """Returns the list of records in the repository and the total count of records.
        """Returns the list of file containers in the repository and the total count containers.

        If writeable is True, only records that the user can write to will be returned.
        If writeable is True, only containers that the user can write to will be returned.
        The user_context might be required to authenticate the user in the repository.
        """
        raise NotImplementedError()

    def get_files_in_record(
        self, record_id: str, writeable: bool, user_context: OptionalUserContext = None
    def get_files_in_container(
        self,
        container_id: str,
        writeable: bool,
        user_context: OptionalUserContext = None,
        query: Optional[str] = None,
    ) -> List[RemoteFile]:
        """Returns the list of files contained in the given record.
        """Returns the list of files of a file container.

        If writeable is True, we are signaling that the user intends to write to the record.
        If writeable is True, we are signaling that the user intends to write to the container.
        """
        raise NotImplementedError()

    def create_draft_record(
        self, title: str, public_name: Optional[str] = None, user_context: OptionalUserContext = None
    ):
        """Creates a draft record (directory) in the repository with basic metadata.
    def create_draft_file_container(self, title: str, public_name: str, user_context: OptionalUserContext = None):
        """Creates a draft file container in the repository with basic metadata.

        The metadata is usually just the title of the record and the user that created it.
        The metadata is usually just the title of the container and the user that created it.
        Some plugins might also provide additional metadata defaults in the user settings."""
        raise NotImplementedError()

    def upload_file_to_draft_record(
    def upload_file_to_draft_container(
        self,
        record_id: str,
        container_id: str,
        filename: str,
        file_path: str,
        user_context: OptionalUserContext = None,
    ) -> None:
        """Uploads a file with the provided filename (from file_path) to a draft record with the given record_id.
        """Uploads a file with the provided filename (from file_path) to a draft container with the given container_id.

        The draft container must have been created in advance with the `create_draft_file_container` method.

        The draft record must have been created in advance with the `create_draft_record` method.
        The file must exist in the file system at the given file_path.
        The user_context might be required to authenticate the user in the repository.
        """
        raise NotImplementedError()

    def download_file_from_record(
    def download_file_from_container(
        self,
        record_id: str,
        filename: str,
        container_id: str,
        file_identifier: str,
        file_path: str,
        user_context: OptionalUserContext = None,
    ) -> None:
        """Downloads a file with the provided filename from the record with the given record_id.
        """Downloads a file with the provided filename from the container with the given container_id.

        The file will be downloaded to the file system at the given file_path.
        The user_context might be required to authenticate the user in the repository if the
@@ -132,13 +141,11 @@ class RDMFilesSource(BaseFilesSource):
    by file sources that interact with RDM repositories.

    A RDM file source is similar to a regular file source, but instead of tree of
    files and directories, it provides a (one level) list of records (representing directories)
    files and directories, it provides a (one level) list of containers
    that can contain only files (no subdirectories).

    In addition, RDM file sources might need to create a new record (directory) in advance in the
    repository, and then upload a file to it. This is done by calling the `create_entry`
    method.

    In addition, RDM file sources might need to create a new container in advance in the
    repository, and then upload a file to it. This is done by calling the `_create_entry` method.
    """

    plugin_kind = PluginKind.rdm
@@ -164,35 +171,16 @@ class RDMFilesSource(BaseFilesSource):
        This must be implemented by subclasses."""
        raise NotImplementedError()

    def parse_path(self, source_path: str, record_id_only: bool = False) -> RecordFilename:
        """Parses the given source path and returns the record_id and filename.
    def parse_path(self, source_path: str, container_id_only: bool = False) -> ContainerAndFileIdentifier:
        """Parses the given source path and returns the container_id and filename.

        The source path must have the format '/<record_id>/<file_name>'.
        If record_id_only is True, the source path must have the format '/<record_id>' and an
        empty filename will be returned.
        """
        If container_id_only is True, an empty filename will be returned.

        def get_error_msg(details: str) -> str:
            return f"Invalid source path: '{source_path}'. Expected format: '{expected_format}'. {details}"

        expected_format = "/<record_id>"
        if not source_path.startswith("/"):
            raise ValueError(get_error_msg("Must start with '/'."))
        parts = source_path[1:].split("/", 2)
        if record_id_only:
            if len(parts) != 1:
                raise ValueError(get_error_msg("Please provide the record_id only."))
            return RecordFilename(record_id=parts[0], filename="")
        expected_format = "/<record_id>/<file_name>"
        if len(parts) < 2:
            raise ValueError(get_error_msg("Please provide both the record_id and file_name."))
        if len(parts) > 2:
            raise ValueError(get_error_msg("Too many parts. Please provide the record_id and file_name only."))
        record_id, file_name = parts
        return RecordFilename(record_id=record_id, filename=file_name)

    def get_record_id_from_path(self, source_path: str) -> str:
        return self.parse_path(source_path, record_id_only=True).record_id
        This must be implemented by subclasses."""
        raise NotImplementedError()

    def get_container_id_from_path(self, source_path: str) -> str:
        raise NotImplementedError()

    def _serialization_props(self, user_context: OptionalUserContext = None):
        effective_props = {}
Loading