Commit 30f24e1a authored by David M. Rogers's avatar David M. Rogers
Browse files

Added breakup script.

parent db86fb65
TODO: - check output files of this run (ex:)
/gpfs/alpine/world-shared/bif128/docked/149875/g02n03/p2498.7.tgz
- run for 4 hrs on 90 nodes
* v. 1.0
* Setup to run with multiple job types (rules.yaml)
* loadem.py is passed a worker index (e.g. SLURM_ARRAY_TASK_ID)
and resumes assigned tasks on startup
* DB keys changed to 'ready', 'a_0', 'a_1', ..., 'errors'
* removed 'done' set.
* v. 0.6
* Added logfile into tgz outputs
* Added 'hopper' global counter to terminate after running n ds
* Added some comments to key steps
* Added logfile into tgz outputs
* Added 'hopper' global counter to terminate after running n ds
* Added some comments to key steps
* v. 0.5
* fix stupid bug
......
#!/usr/bin/env python3
# Break up large parquet into smaller ones.
import pandas as pd
def main(argv):
assert len(argv) == 2, "Usage: %s <inp.pq> <out-fmt.pq>"
out = argv[2]
df = pd.read_parquet(argv[1])
chunk = 1024
off = len(df)%chunk
n = len(df) // chunk # number of output files
chunk += off // n # extra number of ligands per output
low = off % n # outputs 0, 1, ..., low-1 get +1 ligand
k = 0
for i in range(n):
onema = out%i
print(oname)
m = chunk + (i < low)
df.iloc[k:k+m].to_parquet(oname,
compression='snappy',
engine='pyarrow'
)
k += m
if __name__=="__main__":
import sys
main(sys.argv)
......@@ -36,7 +36,7 @@ def run_redis(fn, host, db=0):
del r
return u
def get_item(host, db):
def get_item(host, db, assigned):
# TODO: make enqueue atomic
def enqueue(r):
if hopper: # use a counter to terminate early
......@@ -46,7 +46,7 @@ def get_item(host, db):
item = r.spop('ready')
if item is not None:
r.sadd('assigned', item)
r.sadd(assigned, item)
return item
item = run_redis(enqueue, host, db)
......@@ -122,7 +122,7 @@ def main(argv):
errors = 0
consecutive_errors = 0
while True:
item = get_item(host, db)
item = get_item(host, db, assigned)
if item is None: # graceful shutdown
break
ret = False
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment