Loading README.rst +4 −4 Original line number Diff line number Diff line Loading @@ -30,15 +30,15 @@ Job types contain run-scripts and list input/output files:: cpu: 42 jsrun_attr: "-l gpu-cpu -d plane:6 -b packed:7" inp: params: parameter_file.txt - parameter_file.txt out: log: run.log data: grid.hd5 - run.log - grid.hd5 setup: | module load gcc spectrum-mpi export OMP_NUM_THREADS=7 script: | {mpirun} sim_flow -s {sauce} {inp[params} {out[data]} >{out[log]} {mpirun} sim_flow -s {sauce} {inp[0]} {out[1]} >{out[0]} The jobs are used within an allocation and make use of all available resources in parallel:: Loading examples/rules.yaml +16 −16 Original line number Diff line number Diff line Loading @@ -4,9 +4,9 @@ docs: nrs: 1 cpu: 1 out: txt: file_{n}.txt # 1 - file_{n}.txt # 1 script: | # 3 echo {n} >{out[txt]} echo {n} >{out[0]} # These jobtypes are make-rules that know # how to generate a group of output files from a group of input files. Loading @@ -22,18 +22,18 @@ grompp: #jsrun_attr: "" #srun_attr: "" inp: mdp: grompp.mdp top: topol.top conf: conf.gro - grompp.mdp - topol.top - conf.gro out: tpr: topol.tpr - topol.tpr setup: | module load gcc gromacs/2020-rdtscp_off-analysis export GMX_MAXBACKUP=-1 export OMP_NUM_THREADS=7 GMX=gmx script: | {mpirun} $GMX grompp -f {inp[mdp]} -p {inp[top]} -c {inp[conf]} -o {out[tpr]} -maxwarn 99 {mpirun} $GMX grompp -f {inp[0]} -p {inp[1]} -c {inp[2]} -o {out[0]} -maxwarn 99 mdrun: resource: Loading @@ -43,20 +43,20 @@ mdrun: gpu: 6 jsrun_attr: "-l gpu-cpu -d plane:6 -b packed:7" inp: tpr: topol.tpr - topol.tpr out: log: run.log edr: run.edr conf: run.gro xtc: run.xtc cpt: run.cpt - run.log - run.edr - run.gro - run.xtc - run.cpt setup: | module load gcc spectrum-mpi gromacs/2020-rdtscp_off export GMX_MAXBACKUP=-1 export OMP_NUM_THREADS=7 GMX=gmx_mpi script: | {mpirun} $GMX mdrun -cpi {out[cpt]} \ -s {inp[tpr]} -g {out[log]} -e {out[edr]} \ -c {out[conf]} -x {out[xtc]} -cpo {out[cpt]} \ {mpirun} $GMX mdrun -cpi run.cpt \ -s {inp[0]} -g run.log -e run.edr \ -c run.gro -x run.xtc -cpo run.cpt \ -pme gpu -npme 1 -nb gpu -bonded gpu -pin off examples/targets.yaml +4 −4 Original line number Diff line number Diff line Loading @@ -4,18 +4,18 @@ Cov1: dirname: Cov1 replicas: 20 out: xtc: run.xtc txt: file_00.txt - run.xtc - file_00.txt Cov2: dirname: Cov2 replicas: 10 out: xtc: run.xtc - run.xtc loop: n: "range({replicas})" # range(10) tgt: txt: file_{n:02d}.txt - file_{n:02d}.txt # Implied computation graph # Loading graph.py +13 −16 Original line number Diff line number Diff line Loading @@ -19,8 +19,8 @@ log = logging.getLogger(__name__) # leaf-set of jobs ready to run def gen_targets(params, tag): for ttype, tname in params['inp'].items(): yield ttype, tname for tname in params['inp']: yield tname if 'loop' in params: assert len(params['loop']) == 2, "Loop must have a single variable." tgtname = 'tgt' Loading @@ -40,9 +40,9 @@ def gen_targets(params, tag): for n in eval(r, {}, {}): fmt = {} dict_merge(fmt, params) for ttype, tgt in params['loop'][tgtname].items(): for tgt in params['loop'][tgtname]: fmt[var] = n yield ttype, tgt.format(**fmt) yield tgt.format(**fmt) # Determine if dirname / tname must be generated by pmake def must_generate(dirname, tname, types, args): Loading @@ -50,18 +50,18 @@ def must_generate(dirname, tname, types, args): if not fname.exists(): return True try: nrule, ftype = types[tname] nrule = nrule(args) RuleClass = types[tname] rule = RuleClass(args) except KeyError: return False # don't know how to generate # Check for newer inputs. mtime = fname.stat().st_mtime for t,f in gen_targets(nrule.params, nrule.id): # need to re-run for f in gen_targets(rule.params, rule.id): # need to re-run f = dirname / f if f.exists() and f.stat().st_mtime > mtime: log.warning("File %s is newer than %s - re-running %s", f, fname, nrule.id) f, fname, rule.id) return True return False Loading @@ -75,21 +75,18 @@ def append_graph(G, types, rule, args, verb=False): checked = set(addl) # set of rules already set to be added while len(addl) > 0: rule = addl.pop() for ttype, tname in gen_targets(rule.params, rule.id): for tname in gen_targets(rule.params, rule.id): p = rule.params['dirname'] / tname if not must_generate(rule.params['dirname'], tname, types, args): continue try: nrule, ftype = types[tname] RuleClass = types[tname] except KeyError: raise TargetError("No rule to make target '%s' needed by '%s'."%(p, rule.id)) rulename = nrule.params['rulename'] if ftype != ttype: raise TargetError("Rule %s produces target file '%s' of type '%s' (but %s was requested)."%( rulename, tname, ftype, ttype)) rulename = RuleClass.params['rulename'] # instantiate the Rule -- creating a job job = nrule(args) job = RuleClass(args) if job not in checked: addl.append(job) # follow-up with inputs of this job checked.add(job) Loading @@ -109,7 +106,7 @@ class TaskGraph: if not args['dirname'].is_dir(): raise KeyError("Target %s: invalid directory %s"%(tgt,args['dirname'])) if 'out' not in args: raise KeyError("Target %s is missing 'out:' a dictionary of target outputs!"%tgt) raise KeyError("Target %s is missing 'out:' a list of target outputs!"%tgt) task = Top(tgt, args) task.params['inp'] = args['out'] Loading helpers.py +4 −4 Original line number Diff line number Diff line Loading @@ -112,15 +112,15 @@ class JobState: return str_warn("job %s"%(self.dirname / self.jobname)) def file_paths(self, fnames, verb=False): # Get the file paths from this job's dir. # returns (True/False, {key:path}), where True == all paths present # returns (True/False, [Path]), where True == all paths present ok = True paths = {} for key, name in fnames.items(): paths = [] for name in fnames: path = self.dirname/name if (verb or ok) and not path.exists(): ok = False log.error("Job %s is missing file %s.", self.jobname, path) paths[key] = path paths.append( path ) return ok, paths def input_paths(self, fnames): self.all_inputs_present, self.inp = self.file_paths(fnames, True) Loading Loading
README.rst +4 −4 Original line number Diff line number Diff line Loading @@ -30,15 +30,15 @@ Job types contain run-scripts and list input/output files:: cpu: 42 jsrun_attr: "-l gpu-cpu -d plane:6 -b packed:7" inp: params: parameter_file.txt - parameter_file.txt out: log: run.log data: grid.hd5 - run.log - grid.hd5 setup: | module load gcc spectrum-mpi export OMP_NUM_THREADS=7 script: | {mpirun} sim_flow -s {sauce} {inp[params} {out[data]} >{out[log]} {mpirun} sim_flow -s {sauce} {inp[0]} {out[1]} >{out[0]} The jobs are used within an allocation and make use of all available resources in parallel:: Loading
examples/rules.yaml +16 −16 Original line number Diff line number Diff line Loading @@ -4,9 +4,9 @@ docs: nrs: 1 cpu: 1 out: txt: file_{n}.txt # 1 - file_{n}.txt # 1 script: | # 3 echo {n} >{out[txt]} echo {n} >{out[0]} # These jobtypes are make-rules that know # how to generate a group of output files from a group of input files. Loading @@ -22,18 +22,18 @@ grompp: #jsrun_attr: "" #srun_attr: "" inp: mdp: grompp.mdp top: topol.top conf: conf.gro - grompp.mdp - topol.top - conf.gro out: tpr: topol.tpr - topol.tpr setup: | module load gcc gromacs/2020-rdtscp_off-analysis export GMX_MAXBACKUP=-1 export OMP_NUM_THREADS=7 GMX=gmx script: | {mpirun} $GMX grompp -f {inp[mdp]} -p {inp[top]} -c {inp[conf]} -o {out[tpr]} -maxwarn 99 {mpirun} $GMX grompp -f {inp[0]} -p {inp[1]} -c {inp[2]} -o {out[0]} -maxwarn 99 mdrun: resource: Loading @@ -43,20 +43,20 @@ mdrun: gpu: 6 jsrun_attr: "-l gpu-cpu -d plane:6 -b packed:7" inp: tpr: topol.tpr - topol.tpr out: log: run.log edr: run.edr conf: run.gro xtc: run.xtc cpt: run.cpt - run.log - run.edr - run.gro - run.xtc - run.cpt setup: | module load gcc spectrum-mpi gromacs/2020-rdtscp_off export GMX_MAXBACKUP=-1 export OMP_NUM_THREADS=7 GMX=gmx_mpi script: | {mpirun} $GMX mdrun -cpi {out[cpt]} \ -s {inp[tpr]} -g {out[log]} -e {out[edr]} \ -c {out[conf]} -x {out[xtc]} -cpo {out[cpt]} \ {mpirun} $GMX mdrun -cpi run.cpt \ -s {inp[0]} -g run.log -e run.edr \ -c run.gro -x run.xtc -cpo run.cpt \ -pme gpu -npme 1 -nb gpu -bonded gpu -pin off
examples/targets.yaml +4 −4 Original line number Diff line number Diff line Loading @@ -4,18 +4,18 @@ Cov1: dirname: Cov1 replicas: 20 out: xtc: run.xtc txt: file_00.txt - run.xtc - file_00.txt Cov2: dirname: Cov2 replicas: 10 out: xtc: run.xtc - run.xtc loop: n: "range({replicas})" # range(10) tgt: txt: file_{n:02d}.txt - file_{n:02d}.txt # Implied computation graph # Loading
graph.py +13 −16 Original line number Diff line number Diff line Loading @@ -19,8 +19,8 @@ log = logging.getLogger(__name__) # leaf-set of jobs ready to run def gen_targets(params, tag): for ttype, tname in params['inp'].items(): yield ttype, tname for tname in params['inp']: yield tname if 'loop' in params: assert len(params['loop']) == 2, "Loop must have a single variable." tgtname = 'tgt' Loading @@ -40,9 +40,9 @@ def gen_targets(params, tag): for n in eval(r, {}, {}): fmt = {} dict_merge(fmt, params) for ttype, tgt in params['loop'][tgtname].items(): for tgt in params['loop'][tgtname]: fmt[var] = n yield ttype, tgt.format(**fmt) yield tgt.format(**fmt) # Determine if dirname / tname must be generated by pmake def must_generate(dirname, tname, types, args): Loading @@ -50,18 +50,18 @@ def must_generate(dirname, tname, types, args): if not fname.exists(): return True try: nrule, ftype = types[tname] nrule = nrule(args) RuleClass = types[tname] rule = RuleClass(args) except KeyError: return False # don't know how to generate # Check for newer inputs. mtime = fname.stat().st_mtime for t,f in gen_targets(nrule.params, nrule.id): # need to re-run for f in gen_targets(rule.params, rule.id): # need to re-run f = dirname / f if f.exists() and f.stat().st_mtime > mtime: log.warning("File %s is newer than %s - re-running %s", f, fname, nrule.id) f, fname, rule.id) return True return False Loading @@ -75,21 +75,18 @@ def append_graph(G, types, rule, args, verb=False): checked = set(addl) # set of rules already set to be added while len(addl) > 0: rule = addl.pop() for ttype, tname in gen_targets(rule.params, rule.id): for tname in gen_targets(rule.params, rule.id): p = rule.params['dirname'] / tname if not must_generate(rule.params['dirname'], tname, types, args): continue try: nrule, ftype = types[tname] RuleClass = types[tname] except KeyError: raise TargetError("No rule to make target '%s' needed by '%s'."%(p, rule.id)) rulename = nrule.params['rulename'] if ftype != ttype: raise TargetError("Rule %s produces target file '%s' of type '%s' (but %s was requested)."%( rulename, tname, ftype, ttype)) rulename = RuleClass.params['rulename'] # instantiate the Rule -- creating a job job = nrule(args) job = RuleClass(args) if job not in checked: addl.append(job) # follow-up with inputs of this job checked.add(job) Loading @@ -109,7 +106,7 @@ class TaskGraph: if not args['dirname'].is_dir(): raise KeyError("Target %s: invalid directory %s"%(tgt,args['dirname'])) if 'out' not in args: raise KeyError("Target %s is missing 'out:' a dictionary of target outputs!"%tgt) raise KeyError("Target %s is missing 'out:' a list of target outputs!"%tgt) task = Top(tgt, args) task.params['inp'] = args['out'] Loading
helpers.py +4 −4 Original line number Diff line number Diff line Loading @@ -112,15 +112,15 @@ class JobState: return str_warn("job %s"%(self.dirname / self.jobname)) def file_paths(self, fnames, verb=False): # Get the file paths from this job's dir. # returns (True/False, {key:path}), where True == all paths present # returns (True/False, [Path]), where True == all paths present ok = True paths = {} for key, name in fnames.items(): paths = [] for name in fnames: path = self.dirname/name if (verb or ok) and not path.exists(): ok = False log.error("Job %s is missing file %s.", self.jobname, path) paths[key] = path paths.append( path ) return ok, paths def input_paths(self, fnames): self.all_inputs_present, self.inp = self.file_paths(fnames, True) Loading