Unverified Commit 8a8112e6 authored by Marius van den Beek's avatar Marius van den Beek Committed by GitHub
Browse files

Merge pull request #20876 from nsoranzo/sort1_drop_Python_script

Drop Python helper script from sort1 tool
parents 2fa7b43a 4768d3d1
Loading
Loading
Loading
Loading

tools/filters/sorter.py

deleted100644 → 0
+0 −71
Original line number Diff line number Diff line
"""
Sorts tabular data on one or more columns. All comments of the file are collected
and placed at the beginning of the sorted output file.
"""

# 03/05/2013 guerler

import argparse
import subprocess
import sys


def stop_err(msg):
    sys.exit(msg)


def main():
    # define options
    parser = argparse.ArgumentParser(description=__doc__)
    parser.add_argument("-i", "--input", type=argparse.FileType("r"), help="Tabular file to be sorted")
    parser.add_argument("-o", "--output", type=argparse.FileType("w"), help="Sorted output file")
    parser.add_argument("-k", "--key", action="append", help="Key (see manual for bash/sort)")
    parser.add_argument("-H", "--header_lines", type=int, help="Number of header lines to ignore")

    # parse
    args = parser.parse_args()

    try:
        # retrieve options
        input_fh = args.input
        output_fh = args.output
        header_lines = args.header_lines
        key_args = []
        for k in args.key:
            key_args.extend(["-k", k])

        # sed header
        if header_lines > 0:
            sed_header = ["sed", "-n", f"1,{header_lines:d}p"]
            subprocess.check_call(sed_header, stdin=input_fh, stdout=output_fh)
            input_fh.seek(0)

        # grep comments
        grep_comments = ["grep", "^#"]
        exit_code = subprocess.call(grep_comments, stdin=input_fh, stdout=output_fh)
        input_fh.seek(0)
        if exit_code not in [0, 1]:
            stop_err("Searching for comment lines failed")

        # grep and sort columns
        if header_lines > 0:
            sed_cmd = ["sed", f"1,{header_lines:d}d"]
            sed_header_restore = subprocess.Popen(sed_cmd, stdin=input_fh, stdout=subprocess.PIPE)
            pipe_stdin = sed_header_restore.stdout
        else:
            pipe_stdin = input_fh
        grep = subprocess.Popen(["grep", "^[^#]"], stdin=pipe_stdin, stdout=subprocess.PIPE)
        sort = subprocess.Popen(["sort", "-f", "-t", "\t"] + key_args, stdin=grep.stdout, stdout=output_fh)
        # wait for commands to complete
        sort.communicate()
        assert sort.returncode == 0, f"sort pipeline exited with non-zero exit code: {sort.returncode:d}"

    except Exception as ex:
        stop_err("Error running sorter.py\n" + str(ex))

    # exit
    sys.exit(0)


if __name__ == "__main__":
    main()
+17 −11
Original line number Diff line number Diff line
<tool id="sort1" name="Sort" version="1.2.0">
    <description>data in ascending or descending order</description>
    <requirements>
        <requirement type="package" version="3.8">python</requirement>
        <requirement type="package" version="2.14">grep</requirement>
        <requirement type="package" version="4.4">sed</requirement>
        <requirement type="package" version="8.31">coreutils</requirement>
    </requirements>
    <command detect_errors="exit_code">
python '$__tool_directory__/sorter.py'

--input='$input'
--output='$out_file1'
    <command detect_errors="exit_code"><![CDATA[
## Sorts tabular data on one or more columns. All comments of the file are
## collected and placed at the beginning of the sorted output file.
#if $header_lines > 0:
    sed -n 1,${header_lines}p '$input' > '$out_file1' &&
#end if
grep '^#' '$input' >> '$out_file1';
#if $header_lines > 0:
    sed 1,${header_lines}d '$input' |
#else:
    < '$input'
#end if
grep '^[^#]' | sort -f -t \$'\t'

#if (str($style) == 'num'):
    #set $style = 'n'
@@ -22,8 +29,7 @@ python '$__tool_directory__/sorter.py'

#set $order = '' if (str($order) == 'ASC') else 'r'

--key=${column},${column}${style}${order}

-k ${column},${column}${style}${order}

#for $col in $column_set:
    #set $other_column = str($col.other_column)
@@ -37,10 +43,10 @@ python '$__tool_directory__/sorter.py'
    #end if

    #set $other_order = '' if (str($col.other_order) == "ASC") else 'r'
    --key=${other_column},${other_column}${other_style}${other_order}
    -k ${other_column},${other_column}${other_style}${other_order}
#end for
--header $header_lines
    </command>
>> '$out_file1'
    ]]></command>
    <inputs>
        <param format="tabular" name="input" type="data" label="Sort Dataset" />
        <param name="column" label="on column" type="data_column" data_ref="input" accept_default="true"/>