Unverified Commit 82c428d3 authored by Marius van den Beek's avatar Marius van den Beek Committed by GitHub
Browse files

Merge pull request #19563 from mvdbeek/replace_sanitize_html

[24.2] Set content-type to text/plain if dataset not safe
parents 816e4b78 12dc5cd1
Loading
Loading
Loading
Loading
+93 −40
Original line number Diff line number Diff line
<script setup lang="ts">
import { storeToRefs } from "pinia";
import { computed, ref, watch } from "vue";

import { useUserStore } from "@/stores/userStore";
import { withPrefix } from "@/utils/redirect";

import Alert from "@/components/Alert.vue";
import LoadingSpan from "@/components/LoadingSpan.vue";

const emit = defineEmits(["load"]);
const props = withDefaults(
    defineProps<{
        id?: string;
        src?: string;
        isPreview?: boolean;
    }>(),
    {
        id: "frame",
        src: "",
        isPreview: false,
    }
);

const { isAdmin } = storeToRefs(useUserStore());
const srcWithRoot = computed(() => withPrefix(props.src));
const sanitizedImport = ref(false);
const sanitizedToolId = ref<String | false>(false);
const isLoading = ref(true);
watch(
    () => srcWithRoot.value,
    async () => {
        sanitizedImport.value = false;
        sanitizedToolId.value = false;
        if (props.isPreview) {
            try {
                const response = await fetch(srcWithRoot.value, { method: "HEAD" });
                const isImported = response.headers.get("x-sanitized-job-imported");
                const toolId = response.headers.get("x-sanitized-tool-id");
                if (isImported !== null) {
                    sanitizedImport.value = true;
                } else if (toolId !== null) {
                    sanitizedToolId.value = toolId;
                }
            } catch (e) {
                // I guess that's fine and the center panel will show something
                console.error(e);
            }
        }
    },
    { immediate: true }
);

const plainText = "Contents are shown as plain text.";
const sanitizedMessage = computed(() => {
    if (sanitizedImport.value) {
        return `Dataset has been imported. ${plainText}`;
    } else if (sanitizedToolId.value) {
        return `Dataset created by a tool that is not known to create safe HTML. ${plainText}`;
    }
    return undefined;
});

function onLoad(ev: Event) {
    isLoading.value = false;
    const iframe = ev.currentTarget as HTMLIFrameElement;
    const location = iframe?.contentWindow && iframe.contentWindow.location;
    try {
        if (location && location.host && location.pathname != "/") {
            emit("load");
        }
    } catch (err) {
        console.warn("CenterFrame - onLoad location access forbidden.", ev, location);
    }
}
</script>
<template>
    <div class="h-100">
        <Alert v-if="sanitizedMessage" :dismissible="true" variant="warning" data-description="sanitization warning">
            {{ sanitizedMessage }}
            <span v-if="isAdmin && sanitizedToolId">
                <br />
                <router-link data-description="allowlist link" to="/admin/sanitize_allow">Review Allowlist</router-link>
                if outputs of {{ sanitizedToolId }} are trusted and should be shown as HTML.
            </span>
        </Alert>
        <LoadingSpan v-if="isLoading">Loading ...</LoadingSpan>
        <iframe
            :id="id"
            :name="id"
@@ -9,38 +95,5 @@
            width="100%"
            height="100%"
            @load="onLoad" />
    </div>
</template>
<script>
import { withPrefix } from "utils/redirect";

export default {
    props: {
        id: {
            type: String,
            default: "frame",
        },
        src: {
            type: String,
            default: "",
        },
    },
    computed: {
        srcWithRoot() {
            return withPrefix(this.src);
        },
    },
    methods: {
        onLoad(ev) {
            const iframe = ev.currentTarget;
            const location = iframe.contentWindow && iframe.contentWindow.location;
            try {
                if (location && location.host && location.pathname != "/") {
                    this.$emit("load");
                }
            } catch (err) {
                console.warn("CenterFrame - onLoad location access forbidden.", ev, location);
            }
        },
    },
};
</script>
+1 −0
Original line number Diff line number Diff line
@@ -238,6 +238,7 @@ export function getRouter(Galaxy) {
                        component: CenterFrame,
                        props: (route) => ({
                            src: `/datasets/${route.params.datasetId}/display/?preview=True`,
                            isPreview: true,
                        }),
                    },
                    {
+9 −13
Original line number Diff line number Diff line
@@ -60,7 +60,6 @@ from galaxy.util.markdown import (
    indicate_data_truncated,
    literal_via_fence,
)
from galaxy.util.sanitize_html import sanitize_html
from galaxy.util.zipstream import ZipstreamWrapper
from . import (
    dataproviders as p_dataproviders,
@@ -635,21 +634,18 @@ class Data(metaclass=DataMeta):
        return result

    def _yield_user_file_content(self, trans, from_dataset: HasCreatingJob, filename: str, headers: Headers) -> IO:
        """This method is responsible for sanitizing the HTML if needed."""
        """This method sets the content type header to text/plain if we don't trust html content."""
        if trans.app.config.sanitize_all_html and headers.get("content-type", None) == "text/html":
            # Sanitize anytime we respond with plain text/html content.
            # Check to see if this dataset's parent job is allowlisted
            # We cannot currently trust imported datasets for rendering.
            if not from_dataset.creating_job.imported and from_dataset.creating_job.tool_id.startswith(
                tuple(trans.app.config.sanitize_allowlist)
            ):
                return open(filename, mode="rb")

            # This is returning to the browser, it needs to be encoded.
            # TODO Ideally this happens a layer higher, but this is a bad
            # issue affecting many tools
            with open(filename) as f:
                return sanitize_html(f.read()).encode("utf-8")
            content_type = "text/html"
            if from_dataset.creating_job.imported:
                content_type = "text/plain"
                headers["x-sanitized-job-imported"] = True
            if not from_dataset.creating_job.tool_id.startswith(tuple(trans.app.config.sanitize_allowlist)):
                content_type = "text/plain"
                headers["x-sanitized-tool-id"] = from_dataset.creating_job.tool_id
            headers["content-type"] = content_type

        return open(filename, mode="rb")

+3 −0
Original line number Diff line number Diff line
@@ -116,6 +116,9 @@ class HasDriver:
    def element_absent(self, selector_template: Target) -> bool:
        return len(self.find_elements(selector_template)) == 0

    def switch_to_frame(self, name: str = "frame"):
        return self._wait_on(ec.frame_to_be_available_and_switch_to_it((By.NAME, name)))

    def wait_for_xpath(self, xpath: str, **kwds) -> WebElement:
        element = self._wait_on(
            ec.presence_of_element_located((By.XPATH, xpath)), f"XPATH selector [{xpath}] to become present", **kwds
+25 −0
Original line number Diff line number Diff line
<tool id="html_output" name="html_output" version="1.0.0">
    <command><![CDATA[
cp '$html_file' '$output'
    ]]></command>
    <configfiles>
        <configfile name="html_file"><![CDATA[
<!DOCTYPE html>
<html lang="en">
<head>
    <meta charset="UTF-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <title data-description="hello-world">Hello World</title>
</head>
<body>
    <main>
       <h1>Hello, World!</h1>
    </main>
</body>
</html>
        ]]></configfile>
    </configfiles>
    <outputs>
        <data name="output" format="html" />
    </outputs>
</tool>
Loading