Commit b2bb36e7 authored by Crow, Michael C's avatar Crow, Michael C
Browse files

added argument to process a single file and verbose argument

parent 37285b93
......@@ -14,6 +14,9 @@ from hashlib import md5
from io import StringIO
from datetime import datetime as dt
verbose = False
debug = False
trace = False
demo_configuration = {
'columns': {
......@@ -155,16 +158,35 @@ def text_find_header(input_file: str):
with open(input_file, "r") as f:
# Get the lines.
lns = f.readlines()
lns = f.readlines()
if verbose:
lns = []
for l in f:
if verbose:
n = len([l for l in lns if l.startswith('#')])
print('lines ignored:', n)
# Get the number of lines.
ln_count = len(lns)
# Get one quarter tail of the file; make pseudo file.
tail_io = StringIO("\n".join(lns[-1*int(ln_count/4):]))
if trace: print(tail_io.readlines())
# Read with pandas and count the number of columns.
tail_cols = len(list(pd.read_csv(tail_io)))
#, skiprows=[0])))
# Loop until we find the header row.
for i, ln in enumerate(lns):
......@@ -196,7 +218,6 @@ def text_find_header(input_file: str):
return None
def text_parser(file: str, metadata: bool=False, dropna: bool=True):
A table parser for any input file that can be read as text.
......@@ -212,10 +233,9 @@ def text_parser(file: str, metadata: bool=False, dropna: bool=True):
an integer indicating the header row number (if hdrline is True).
# Find the header row.
header_row = text_find_header(file)
# Read the table.
df = pd.read_csv(file, header=header_row)
......@@ -327,10 +347,10 @@ def coord_summary(data_frame, config: dict):
# Grab the column.
column = data_frame[match]
# Get the min and max of the matched columns.
coords[coord] = {'min': column.min(), 'max': column.max()}
except Exception as e:
raise e
# Ignore exceptions (for now).
......@@ -412,16 +432,23 @@ def buildConfiguration(fields):
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument('-d', '--directory', dest='dir', type=str, help='directory containing csv files', required=True)
parser.add_argument('-v', '--verbose', action='store_true', help='turn on additional printing')
parser.add_argument('-b', '--debug', action='store_true', help='turn on additional printing for debugging')
parser.add_argument('-t', '--trace', action='store_true', help='turn on much more printing for debugging')
parser.add_argument('-g', '--config', dest='config', type=argparse.FileType('r'), default=demo_configuration,
help='csv file with a single row containing items to retrieve from the files - Ex: site,plot,latitude,soil_depth')
group = parser.add_mutually_exclusive_group(required=True)
group.add_argument('-d', '--directory', dest='dir', type=str, help='directory containing csv files')#, required=True)
group.add_argument('-f', '--file', dest='file', type=argparse.FileType('r'), help='csv file')#, required=True)
args = parser.parse_args()
if args.config != demo_configuration:
fields = pd.read_csv(args.config)
cfg = buildConfiguration(fields)
cfg = args.config
args.config = buildConfiguration(fields)
verbose = args.verbose
debug = args.debug
trace = args.trace
# if args.config != demo_configuration:
# items = list(pd.read_csv(args.config).keys())
......@@ -429,20 +456,31 @@ if __name__ == "__main__":
# items = None
# config = args.config
if os.path.isdir(args.dir):
if args.dir:
if not os.path.isdir(args.dir):
print('Must be a valid directory')
for (root, dirs, files) in os.walk(args.dir):
paths = [ os.path.join(root, file) for file in files if file.endswith('csv') ]
for path in paths:
# Call main.
output = main(path, config=cfg)
output = main(path, config=args.config)
# Dump the dict as a json to stdout.
print(json.dumps(output, indent=2, cls=NumpyEncoder))
print('Must be a valid directory')
elif args.file:
output = main(, config=args.config)
print(json.dumps(output, indent=2, cls=NumpyEncoder))
print('something went wrong')
# Exit demo.
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment