Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Menu
Open sidebar
ngee-arctic
ess-dive-meta
Commits
b2bb36e7
Commit
b2bb36e7
authored
Nov 30, 2020
by
Crow, Michael C
Browse files
added argument to process a single file and verbose argument
parent
37285b93
Changes
1
Hide whitespace changes
Inline
Side-by-side
ess-dive-meta.py
100644 → 100755
View file @
b2bb36e7
...
...
@@ -14,6 +14,9 @@ from hashlib import md5
from
io
import
StringIO
from
datetime
import
datetime
as
dt
verbose
=
False
debug
=
False
trace
=
False
demo_configuration
=
{
'columns'
:
{
...
...
@@ -155,16 +158,35 @@ def text_find_header(input_file: str):
with
open
(
input_file
,
"r"
)
as
f
:
# Get the lines.
lns
=
f
.
readlines
()
try
:
lns
=
f
.
readlines
()
except
:
print
(
'failed'
)
if
verbose
:
try
:
lns
=
[]
for
l
in
f
:
print
(
'l:'
,
l
)
lns
.
append
(
l
)
print
(
'----------------------'
)
print
(
lns
)
print
(
'----------------------'
)
except
:
sys
.
exit
(
1
)
if
verbose
:
n
=
len
([
l
for
l
in
lns
if
l
.
startswith
(
'#'
)])
print
(
'lines ignored:'
,
n
)
# Get the number of lines.
ln_count
=
len
(
lns
)
# Get one quarter tail of the file; make pseudo file.
tail_io
=
StringIO
(
"
\n
"
.
join
(
lns
[
-
1
*
int
(
ln_count
/
4
):]))
if
trace
:
print
(
tail_io
.
readlines
())
# Read with pandas and count the number of columns.
tail_cols
=
len
(
list
(
pd
.
read_csv
(
tail_io
)))
#, skiprows=[0])))
# Loop until we find the header row.
for
i
,
ln
in
enumerate
(
lns
):
...
...
@@ -196,7 +218,6 @@ def text_find_header(input_file: str):
return
None
def
text_parser
(
file
:
str
,
metadata
:
bool
=
False
,
dropna
:
bool
=
True
):
"""
A table parser for any input file that can be read as text.
...
...
@@ -212,10 +233,9 @@ def text_parser(file: str, metadata: bool=False, dropna: bool=True):
an integer indicating the header row number (if hdrline is True).
"""
# Find the header row.
header_row
=
text_find_header
(
file
)
# Read the table.
df
=
pd
.
read_csv
(
file
,
header
=
header_row
)
...
...
@@ -327,10 +347,10 @@ def coord_summary(data_frame, config: dict):
# Grab the column.
column
=
data_frame
[
match
]
# Get the min and max of the matched columns.
coords
[
coord
]
=
{
'min'
:
column
.
min
(),
'max'
:
column
.
max
()}
except
Exception
as
e
:
raise
e
# Ignore exceptions (for now).
...
...
@@ -412,16 +432,23 @@ def buildConfiguration(fields):
if
__name__
==
"__main__"
:
parser
=
argparse
.
ArgumentParser
()
parser
.
add_argument
(
'-d'
,
'--directory'
,
dest
=
'dir'
,
type
=
str
,
help
=
'directory containing csv files'
,
required
=
True
)
parser
.
add_argument
(
'-v'
,
'--verbose'
,
action
=
'store_true'
,
help
=
'turn on additional printing'
)
parser
.
add_argument
(
'-b'
,
'--debug'
,
action
=
'store_true'
,
help
=
'turn on additional printing for debugging'
)
parser
.
add_argument
(
'-t'
,
'--trace'
,
action
=
'store_true'
,
help
=
'turn on much more printing for debugging'
)
parser
.
add_argument
(
'-g'
,
'--config'
,
dest
=
'config'
,
type
=
argparse
.
FileType
(
'r'
),
default
=
demo_configuration
,
help
=
'csv file with a single row containing items to retrieve from the files - Ex: site,plot,latitude,soil_depth'
)
group
=
parser
.
add_mutually_exclusive_group
(
required
=
True
)
group
.
add_argument
(
'-d'
,
'--directory'
,
dest
=
'dir'
,
type
=
str
,
help
=
'directory containing csv files'
)
#, required=True)
group
.
add_argument
(
'-f'
,
'--file'
,
dest
=
'file'
,
type
=
argparse
.
FileType
(
'r'
),
help
=
'csv file'
)
#, required=True)
args
=
parser
.
parse_args
()
if
args
.
config
!=
demo_configuration
:
fields
=
pd
.
read_csv
(
args
.
config
)
cfg
=
buildConfiguration
(
fields
)
else
:
cfg
=
args
.
config
args
.
config
=
buildConfiguration
(
fields
)
verbose
=
args
.
verbose
debug
=
args
.
debug
trace
=
args
.
trace
# if args.config != demo_configuration:
# items = list(pd.read_csv(args.config).keys())
...
...
@@ -429,20 +456,31 @@ if __name__ == "__main__":
# items = None
# config = args.config
if
os
.
path
.
isdir
(
args
.
dir
):
if
args
.
dir
:
if
not
os
.
path
.
isdir
(
args
.
dir
):
print
(
'Must be a valid directory'
)
parser
.
print_usage
()
sys
.
exit
(
1
)
for
(
root
,
dirs
,
files
)
in
os
.
walk
(
args
.
dir
):
paths
=
[
os
.
path
.
join
(
root
,
file
)
for
file
in
files
if
file
.
endswith
(
'csv'
)
]
for
path
in
paths
:
# Call main.
output
=
main
(
path
,
config
=
cf
g
)
output
=
main
(
path
,
config
=
args
.
confi
g
)
# Dump the dict as a json to stdout.
print
(
json
.
dumps
(
output
,
indent
=
2
,
cls
=
NumpyEncoder
))
else
:
print
(
'Must be a valid directory'
)
parser
.
print_usage
()
elif
args
.
file
:
output
=
main
(
args
.
file
.
name
,
config
=
args
.
config
)
print
(
json
.
dumps
(
output
,
indent
=
2
,
cls
=
NumpyEncoder
))
else
:
print
(
'something went wrong'
)
sys
.
exit
(
1
)
# Exit demo.
sys
.
exit
()
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment