Unverified Commit 59b5b6be authored by Marius van den Beek's avatar Marius van den Beek Committed by GitHub
Browse files

Merge pull request #20874 from abretaud/hic_datatype

[25.0] Add HiC datatype
parents cfb2f752 4045f569
Loading
Loading
Loading
Loading
+3 −1
Original line number Diff line number Diff line
@@ -239,6 +239,7 @@
    <datatype extension="mbi" type="galaxy.datatypes.binary:H5" subclass="true" mimetype="application/octet-stream" display_in_upload="true" description="MBI is MOBILion's proprietary HDF5-based format for its ion mobility mass spectrometry data." />
    <datatype extension="hyphy_results.json" type="galaxy.datatypes.text:Json" mimetype="application/json" subclass="true" display_in_upload="false"/>
    <datatype extension="hivtrace" type="galaxy.datatypes.text:Json" mimetype="application/json" subclass="true" display_in_upload="false"/>
    <datatype extension="hic" type="galaxy.datatypes.binary:Hic" mimetype="application/octet-stream" display_in_upload="true"/>
    <datatype extension="cool" type="galaxy.datatypes.binary:Cool" mimetype="application/octet-stream" display_in_upload="true"/>
    <datatype extension="mcool" type="galaxy.datatypes.binary:MCool" mimetype="application/octet-stream" display_in_upload="true"/>
    <datatype extension="h5mlm" type="galaxy.datatypes.binary:H5MLM" mimetype="application/octet-stream" display_in_upload="true"/>
@@ -1223,6 +1224,7 @@
    <sniffer type="galaxy.datatypes.binary:NcbiTaxonomySQlite"/>
    <sniffer type="galaxy.datatypes.binary:SQlite"/>
    <sniffer type="galaxy.datatypes.binary:H5MLM"/>
    <sniffer type="galaxy.datatypes.binary:Hic"/>
    <sniffer type="galaxy.datatypes.binary:Cool"/>
    <sniffer type="galaxy.datatypes.binary:MCool"/>
    <sniffer type="galaxy.datatypes.binary:Loom"/>
+60 −0
Original line number Diff line number Diff line
@@ -4770,3 +4770,63 @@ class Numpy(Binary):
            return dataset.peek
        except Exception:
            return f"Binary numpy file ({nice_size(dataset.get_size())})"


@build_sniff_from_prefix
class Hic(Binary):
    """
    Hic: highly compressed binary file that stores contact matrices
    from multiple resolutions in a clever way, allowing random access.
    https://github.com/aidenlab/hic-format

    >>> from galaxy.datatypes.sniff import get_test_fname
    >>> fname = get_test_fname('merlin.hic')
    >>> Hic().sniff(fname)
    True
    >>> fname = get_test_fname('test.mz5')
    >>> Hic().sniff(fname)
    False
    """

    file_ext = "hic"

    MetadataElement(
        name="version",
        default="",
        param=MetadataParameter,
        desc="Version of the HiC file format",
        readonly=True,
        visible=True,
        no_value=0,
        optional=True,
    )

    def __init__(self, **kwd):
        super().__init__(**kwd)
        self._magic = b"HIC"

    def sniff_prefix(self, file_prefix: FilePrefix) -> bool:
        return file_prefix.startswith_bytes(self._magic)

    def set_peek(self, dataset: DatasetProtocol, **kwd) -> None:
        if not dataset.dataset.purged:
            dataset.peek = "Binary HiC file"
            dataset.blurb = f"{nice_size(dataset.get_size())}"
            dataset.blurb += f"\nHiC Format v{dataset.metadata.version}"
        else:
            dataset.peek = "file does not exist"
            dataset.blurb = "file purged from disk"

    def display_peek(self, dataset: DatasetProtocol) -> str:
        try:
            return dataset.peek
        except Exception:
            return f"Binary HiC file ({nice_size(dataset.get_size())})"

    def set_meta(self, dataset: DatasetProtocol, overwrite: bool = True, **kwd) -> None:
        """
        Set metadata for HiC file.
        """
        with open(dataset.get_file_name(), "rb") as handle:
            header_bytes = handle.read(8)
        dataset.metadata.version = struct.unpack("<i", header_bytes[4:8])[0]
+931 B

File added.

No diff preview for this file type.