Commit b2bb36e7 authored by Crow, Michael C.'s avatar Crow, Michael C.
Browse files

added argument to process a single file and verbose argument

parent 37285b93
Loading
Loading
Loading
Loading

ess-dive-meta.py

100644 → 100755
+53 −15
Original line number Diff line number Diff line
@@ -14,6 +14,9 @@ from hashlib import md5
from io import StringIO
from datetime import datetime as dt

verbose = False
debug = False
trace = False

demo_configuration = {
    'columns': {
@@ -155,16 +158,35 @@ def text_find_header(input_file: str):
    with open(input_file, "r") as f:

        # Get the lines.
        try:
            lns = f.readlines()
        except:
            print('failed')
            if verbose:
                try:
                    lns = []
                    for l in f:
                        print('l:',l)
                        lns.append(l)
                    print('----------------------')
                    print(lns)
                    print('----------------------')
                except:
                    sys.exit(1)
        if verbose:
            n = len([l for l in lns if l.startswith('#')])
            print('lines ignored:', n)

    # Get the number of lines.
    ln_count = len(lns)

    # Get one quarter tail of the file; make pseudo file.
    tail_io = StringIO("\n".join(lns[-1*int(ln_count/4):]))
    if trace: print(tail_io.readlines())

    # Read with pandas and count the number of columns.
    tail_cols = len(list(pd.read_csv(tail_io)))
    #, skiprows=[0])))

    # Loop until we find the header row.
    for i, ln in enumerate(lns):
@@ -196,7 +218,6 @@ def text_find_header(input_file: str):
    return None



def text_parser(file: str, metadata: bool=False, dropna: bool=True):
    """
    A table parser for any input file that can be read as text.
@@ -212,7 +233,6 @@ def text_parser(file: str, metadata: bool=False, dropna: bool=True):
    an integer indicating the header row number (if hdrline is True).

    """
    
    # Find the header row.
    header_row = text_find_header(file)

@@ -412,16 +432,23 @@ def buildConfiguration(fields):

if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument('-d', '--directory', dest='dir', type=str, help='directory containing csv files', required=True)
    parser.add_argument('-v', '--verbose', action='store_true', help='turn on additional printing')
    parser.add_argument('-b', '--debug', action='store_true', help='turn on additional printing for debugging')
    parser.add_argument('-t', '--trace', action='store_true', help='turn on much more printing for debugging')
    parser.add_argument('-g', '--config', dest='config', type=argparse.FileType('r'), default=demo_configuration,
                        help='csv file with a single row containing items to retrieve from the files - Ex: site,plot,latitude,soil_depth')
    group = parser.add_mutually_exclusive_group(required=True)
    group.add_argument('-d', '--directory', dest='dir', type=str, help='directory containing csv files')#, required=True)
    group.add_argument('-f', '--file', dest='file', type=argparse.FileType('r'), help='csv file')#, required=True)
    args = parser.parse_args()

    if args.config != demo_configuration:
        fields = pd.read_csv(args.config)
        cfg = buildConfiguration(fields)
    else:
        cfg = args.config
        args.config = buildConfiguration(fields)

    verbose = args.verbose
    debug = args.debug
    trace = args.trace

    # if args.config != demo_configuration:
    #     items = list(pd.read_csv(args.config).keys())
@@ -429,20 +456,31 @@ if __name__ == "__main__":
    #     items = None
    #     config = args.config

    if os.path.isdir(args.dir):
    if args.dir:
        if not os.path.isdir(args.dir):
            print('Must be a valid directory')
            parser.print_usage()
            sys.exit(1)

        for (root, dirs, files) in os.walk(args.dir):

            paths = [ os.path.join(root, file) for file in files if file.endswith('csv') ]

            for path in paths:

                # Call main.
                output = main(path, config=cfg)
                output = main(path, config=args.config)

                # Dump the dict as a json to stdout.
                print(json.dumps(output, indent=2, cls=NumpyEncoder))
    else:
        print('Must be a valid directory')
        parser.print_usage()

    elif args.file:
        output = main(args.file.name, config=args.config)
        print(json.dumps(output, indent=2, cls=NumpyEncoder))

    else:
        print('something went wrong')
        sys.exit(1)

    # Exit demo.
    sys.exit()