Commit cb32611e authored by Yakubov, Sergey's avatar Yakubov, Sergey
Browse files

implement display function for all datatypes

display the first 100K of a binary file or a file whose type is unknown to Galaxy
as ASCII text instead of downloading this file.
parent 6f90feeb
Loading
Loading
Loading
Loading
Loading
+100 −49
Original line number Diff line number Diff line
@@ -157,6 +157,36 @@ class DataMeta(type):
        metadata.Statement.process(cls)


def _is_binary_file(data):
    from galaxy.datatypes import binary

    return isinstance(data.datatype, binary.Binary) or type(data.datatype) is Data


def _get_max_peak_size(data):
    from galaxy.datatypes import (
        binary,
        text,
    )

    max_peek_size = DEFAULT_MAX_PEEK_SIZE  # 1 MB
    if isinstance(data.datatype, text.Html):
        max_peek_size = 10000000  # 10 MB for html
    elif isinstance(data.datatype, binary.Binary):
        max_peek_size = 100000  # 100 KB for binary
    return max_peek_size


def _get_file_size(data):
    file_size = int(data.dataset.file_size or 0)
    if file_size == 0:
        if data.dataset.object_store:
            file_size = data.dataset.object_store.size(data.dataset)
        else:
            file_size = os.stat(data.file_name).st_size
    return file_size


@p_dataproviders.decorators.has_dataproviders
class Data(metaclass=DataMeta):
    """
@@ -435,6 +465,57 @@ class Data(metaclass=DataMeta):
            file_paths.append(dataset.file_name)
        return zip(file_paths, rel_paths)

    def _serve_file_download(self, headers, data, trans, to_ext, file_size, **kwd):
        composite_extensions = trans.app.datatypes_registry.get_composite_extensions()
        composite_extensions.append("html")  # for archiving composite datatypes
        composite_extensions.append("data_manager_json")  # for downloading bundles if bundled.

        if data.extension in composite_extensions:
            return self._archive_composite_dataset(trans, data, headers, do_action=kwd.get("do_action", "zip"))
        else:
            headers["Content-Length"] = str(file_size)
            filename = self._download_filename(
                data,
                to_ext,
                hdca=kwd.get("hdca"),
                element_identifier=kwd.get("element_identifier"),
                filename_pattern=kwd.get("filename_pattern"),
            )
            headers[
                "content-type"
            ] = "application/octet-stream"  # force octet-stream so Safari doesn't append mime extensions to filename
            headers["Content-Disposition"] = f'attachment; filename="{filename}"'
            return open(data.file_name, "rb"), headers

    def _serve_binary_file_contents_as_text(self, trans, data, headers, file_size, max_peek_size):
        headers["content-type"] = "text/html"
        return (
            trans.fill_template_mako(
                "/dataset/binary_file.mako",
                data=data,
                file_contents=open(data.file_name, "rb").read(max_peek_size),
                file_size=util.nice_size(file_size),
                truncated=file_size > max_peek_size,
            ),
            headers,
        )

    def _serve_file_contents(self, trans, data, headers, preview, file_size, max_peek_size):
        from galaxy.datatypes import images

        preview = util.string_as_bool(preview)
        if not preview or isinstance(data.datatype, images.Image) or file_size < max_peek_size:
            return self._yield_user_file_content(trans, data, data.file_name, headers), headers

        # preview large text file
        headers["content-type"] = "text/html"
        return (
            trans.fill_template_mako(
                "/dataset/large_file.mako", truncated_data=open(data.file_name, "rb").read(max_peek_size), data=data
            ),
            headers,
        )

    def display_data(
        self,
        trans,
@@ -454,13 +535,10 @@ class Data(metaclass=DataMeta):
        providers?).
        """
        headers = kwd.get("headers", {})
        # Relocate all composite datatype display to a common location.
        composite_extensions = trans.app.datatypes_registry.get_composite_extensions()
        composite_extensions.append("html")  # for archiving composite datatypes
        composite_extensions.append("data_manager_json")  # for downloading bundles if bundled.
        # Prevent IE8 from sniffing content type since we're explicit about it.  This prevents intentionally text/plain
        # content from being rendered in the browser
        headers["X-Content-Type-Options"] = "nosniff"

        if filename and filename != "index":
            # For files in extra_files_path
            extra_dir = dataset.dataset.extra_files_path_name
@@ -509,55 +587,28 @@ class Data(metaclass=DataMeta):
                raise ObjectNotFound(f"Could not find '{filename}' on the extra files path {file_path}.")
        self._clean_and_set_mime_type(trans, dataset.get_mime(), headers)

        trans.log_event(f"Display dataset id: {str(dataset.id)}")
        from galaxy.datatypes import (  # DBTODO REMOVE THIS AT REFACTOR
            binary,
            images,
            text,
        )

        if dataset.dataset.object_store:
            dataset.dataset.object_store.update_cache(dataset.dataset, trans=trans)

        if to_ext or isinstance(dataset.datatype, binary.Binary):  # Saving the file, or binary file
            if dataset.extension in composite_extensions:
                return self._archive_composite_dataset(trans, dataset, headers, do_action=kwd.get("do_action", "zip"))
            else:
                headers["Content-Length"] = str(os.stat(dataset.file_name).st_size)
                filename = self._download_filename(
                    dataset,
                    to_ext,
                    hdca=kwd.get("hdca"),
                    element_identifier=kwd.get("element_identifier"),
                    filename_pattern=kwd.get("filename_pattern"),
                )
                headers[
                    "content-type"
                ] = "application/octet-stream"  # force octet-stream so Safari doesn't append mime extensions to filename
                headers["Content-Disposition"] = f'attachment; filename="{filename}"'
                return open(dataset.file_name, "rb"), headers

        downloading = to_ext is not None
        file_size = _get_file_size(dataset)

        if not os.path.exists(dataset.file_name):
            raise ObjectNotFound(f"File Not Found ({dataset.file_name}).")
        max_peek_size = DEFAULT_MAX_PEEK_SIZE  # 1 MB
        if isinstance(dataset.datatype, text.Html):
            max_peek_size = 10000000  # 10 MB for html
        preview = util.string_as_bool(preview)

        if downloading:
            trans.log_event(f"Download dataset id: {str(dataset.id)}")
            return self._serve_file_download(headers, dataset, trans, to_ext, file_size, **kwd)
        else:  # displaying
            trans.log_event(f"Display dataset id: {str(dataset.id)}")
            max_peek_size = _get_max_peak_size(dataset)
            if (
            not preview
            or isinstance(dataset.datatype, images.Image)
            or os.stat(dataset.file_name).st_size < max_peek_size
        ):
            return self._yield_user_file_content(trans, dataset, dataset.file_name, headers), headers
        else:
            headers["content-type"] = "text/html"
            return (
                trans.fill_template_mako(
                    "/dataset/large_file.mako",
                    truncated_data=open(dataset.file_name, "rb").read(max_peek_size),
                    data=dataset,
                ),
                headers,
            )
                _is_binary_file(dataset) and preview and hasattr(trans, "fill_template_mako")
            ):  # preview file which format is unknown (to Galaxy), we still try to display this as text
                return self._serve_binary_file_contents_as_text(trans, dataset, headers, file_size, max_peek_size)
            else:  # text/html, or image, or display was called without preview flag
                return self._serve_file_contents(trans, dataset, headers, preview, file_size, max_peek_size)

    def display_as_markdown(self, dataset_instance: DatasetProtocol) -> str:
        """Prepare for embedding dataset into a basic Markdown document.
@@ -566,7 +617,7 @@ class Data(metaclass=DataMeta):
        on datatypes not tightly tied to a Galaxy version (e.g. datatypes in the
        Tool Shed).

        Speaking very losely - the datatype should should load a bounded amount
        Speaking very losely - the datatype should load a bounded amount
        of data from the supplied dataset instance and prepare for embedding it
        into Markdown. This should be relatively vanilla Markdown - the result of
        this is bleached and it should not contain nested Galaxy Markdown
+1 −1
Original line number Diff line number Diff line
@@ -4,7 +4,7 @@
${ render_deleted_data_message( data ) }

<div class="warningmessagelarge">
    This is a binary dataset of size ${ file_size }. Preview is not implemented for this filetype. Displaying
    This is a binary (or unknown to Galaxy) dataset of size ${ file_size }. Preview is not implemented for this filetype. Displaying
    %if truncated:
first 100KB
    %endif