Unverified Commit 4768d3d1 authored by Nicola Soranzo's avatar Nicola Soranzo
Browse files

Drop Python helper script from sort1 tool

Also:
- Incidentally address the deprecation of the `argparse.FileType` type
  converter in Python 3.14 , see
  https://docs.python.org/3.14/whatsnew/3.14.html#deprecated
  https://github.com/galaxyproject/galaxy/issues/16854
- Drop the now unnecessary Python requirement.
- Use CDATA.
parent 2fa7b43a
Loading
Loading
Loading
Loading

tools/filters/sorter.py

deleted100644 → 0
+0 −71
Original line number Diff line number Diff line
"""
Sorts tabular data on one or more columns. All comments of the file are collected
and placed at the beginning of the sorted output file.
"""

# 03/05/2013 guerler

import argparse
import subprocess
import sys


def stop_err(msg):
    sys.exit(msg)


def main():
    # define options
    parser = argparse.ArgumentParser(description=__doc__)
    parser.add_argument("-i", "--input", type=argparse.FileType("r"), help="Tabular file to be sorted")
    parser.add_argument("-o", "--output", type=argparse.FileType("w"), help="Sorted output file")
    parser.add_argument("-k", "--key", action="append", help="Key (see manual for bash/sort)")
    parser.add_argument("-H", "--header_lines", type=int, help="Number of header lines to ignore")

    # parse
    args = parser.parse_args()

    try:
        # retrieve options
        input_fh = args.input
        output_fh = args.output
        header_lines = args.header_lines
        key_args = []
        for k in args.key:
            key_args.extend(["-k", k])

        # sed header
        if header_lines > 0:
            sed_header = ["sed", "-n", f"1,{header_lines:d}p"]
            subprocess.check_call(sed_header, stdin=input_fh, stdout=output_fh)
            input_fh.seek(0)

        # grep comments
        grep_comments = ["grep", "^#"]
        exit_code = subprocess.call(grep_comments, stdin=input_fh, stdout=output_fh)
        input_fh.seek(0)
        if exit_code not in [0, 1]:
            stop_err("Searching for comment lines failed")

        # grep and sort columns
        if header_lines > 0:
            sed_cmd = ["sed", f"1,{header_lines:d}d"]
            sed_header_restore = subprocess.Popen(sed_cmd, stdin=input_fh, stdout=subprocess.PIPE)
            pipe_stdin = sed_header_restore.stdout
        else:
            pipe_stdin = input_fh
        grep = subprocess.Popen(["grep", "^[^#]"], stdin=pipe_stdin, stdout=subprocess.PIPE)
        sort = subprocess.Popen(["sort", "-f", "-t", "\t"] + key_args, stdin=grep.stdout, stdout=output_fh)
        # wait for commands to complete
        sort.communicate()
        assert sort.returncode == 0, f"sort pipeline exited with non-zero exit code: {sort.returncode:d}"

    except Exception as ex:
        stop_err("Error running sorter.py\n" + str(ex))

    # exit
    sys.exit(0)


if __name__ == "__main__":
    main()
+17 −11
Original line number Diff line number Diff line
<tool id="sort1" name="Sort" version="1.2.0">
    <description>data in ascending or descending order</description>
    <requirements>
        <requirement type="package" version="3.8">python</requirement>
        <requirement type="package" version="2.14">grep</requirement>
        <requirement type="package" version="4.4">sed</requirement>
        <requirement type="package" version="8.31">coreutils</requirement>
    </requirements>
    <command detect_errors="exit_code">
python '$__tool_directory__/sorter.py'

--input='$input'
--output='$out_file1'
    <command detect_errors="exit_code"><![CDATA[
## Sorts tabular data on one or more columns. All comments of the file are
## collected and placed at the beginning of the sorted output file.
#if $header_lines > 0:
    sed -n 1,${header_lines}p '$input' > '$out_file1' &&
#end if
grep '^#' '$input' >> '$out_file1';
#if $header_lines > 0:
    sed 1,${header_lines}d '$input' |
#else:
    < '$input'
#end if
grep '^[^#]' | sort -f -t \$'\t'

#if (str($style) == 'num'):
    #set $style = 'n'
@@ -22,8 +29,7 @@ python '$__tool_directory__/sorter.py'

#set $order = '' if (str($order) == 'ASC') else 'r'

--key=${column},${column}${style}${order}

-k ${column},${column}${style}${order}

#for $col in $column_set:
    #set $other_column = str($col.other_column)
@@ -37,10 +43,10 @@ python '$__tool_directory__/sorter.py'
    #end if

    #set $other_order = '' if (str($col.other_order) == "ASC") else 'r'
    --key=${other_column},${other_column}${other_style}${other_order}
    -k ${other_column},${other_column}${other_style}${other_order}
#end for
--header $header_lines
    </command>
>> '$out_file1'
    ]]></command>
    <inputs>
        <param format="tabular" name="input" type="data" label="Sort Dataset" />
        <param name="column" label="on column" type="data_column" data_ref="input" accept_default="true"/>