Replaced target dicts with lists (no more file types). (30bf545d) · Commits · Rogers, David / pmake

README.rst

+4 −4

Original line number	Diff line number	Diff line
		@@ -30,15 +30,15 @@ Job types contain run-scripts and list input/output files::
		cpu: 42
		jsrun_attr: "-l gpu-cpu -d plane:6 -b packed:7"
		inp:
		params: parameter_file.txt
		- parameter_file.txt
		out:
		log: run.log
		data: grid.hd5
		- run.log
		- grid.hd5
		setup: \|
		module load gcc spectrum-mpi
		export OMP_NUM_THREADS=7
		script: \|
		{mpirun} sim_flow -s {sauce} {inp[params} {out[data]} >{out[log]}
		{mpirun} sim_flow -s {sauce} {inp[0]} {out[1]} >{out[0]}

		The jobs are used within an allocation and make use of all available resources in parallel::

examples/rules.yaml

+16 −16

Original line number	Diff line number	Diff line
		@@ -4,9 +4,9 @@ docs:
		nrs: 1
		cpu: 1
		out:
		txt: file_{n}.txt # 1
		- file_{n}.txt # 1
		script: \| # 3
		echo {n} >{out[txt]}
		echo {n} >{out[0]}

		# These jobtypes are make-rules that know
		# how to generate a group of output files from a group of input files.
		@@ -22,18 +22,18 @@ grompp:
		#jsrun_attr: ""
		#srun_attr: ""
		inp:
		mdp: grompp.mdp
		top: topol.top
		conf: conf.gro
		- grompp.mdp
		- topol.top
		- conf.gro
		out:
		tpr: topol.tpr
		- topol.tpr
		setup: \|
		module load gcc gromacs/2020-rdtscp_off-analysis
		export GMX_MAXBACKUP=-1
		export OMP_NUM_THREADS=7
		GMX=gmx
		script: \|
		{mpirun} $GMX grompp -f {inp[mdp]} -p {inp[top]} -c {inp[conf]} -o {out[tpr]} -maxwarn 99
		{mpirun} $GMX grompp -f {inp[0]} -p {inp[1]} -c {inp[2]} -o {out[0]} -maxwarn 99

		mdrun:
		resource:
		@@ -43,20 +43,20 @@ mdrun:
		gpu: 6
		jsrun_attr: "-l gpu-cpu -d plane:6 -b packed:7"
		inp:
		tpr: topol.tpr
		- topol.tpr
		out:
		log: run.log
		edr: run.edr
		conf: run.gro
		xtc: run.xtc
		cpt: run.cpt
		- run.log
		- run.edr
		- run.gro
		- run.xtc
		- run.cpt
		setup: \|
		module load gcc spectrum-mpi gromacs/2020-rdtscp_off
		export GMX_MAXBACKUP=-1
		export OMP_NUM_THREADS=7
		GMX=gmx_mpi
		script: \|
		{mpirun} $GMX mdrun -cpi {out[cpt]} \
		-s {inp[tpr]} -g {out[log]} -e {out[edr]} \
		-c {out[conf]} -x {out[xtc]} -cpo {out[cpt]} \
		{mpirun} $GMX mdrun -cpi run.cpt \
		-s {inp[0]} -g run.log -e run.edr \
		-c run.gro -x run.xtc -cpo run.cpt \
		-pme gpu -npme 1 -nb gpu -bonded gpu -pin off

examples/targets.yaml

+4 −4

Original line number	Diff line number	Diff line
		@@ -4,18 +4,18 @@ Cov1:
		dirname: Cov1
		replicas: 20
		out:
		xtc: run.xtc
		txt: file_00.txt
		- run.xtc
		- file_00.txt

		Cov2:
		dirname: Cov2
		replicas: 10
		out:
		xtc: run.xtc
		- run.xtc
		loop:
		n: "range({replicas})" # range(10)
		tgt:
		txt: file_{n:02d}.txt
		- file_{n:02d}.txt

		# Implied computation graph
		#

graph.py

+13 −16

Original line number	Diff line number	Diff line
		@@ -19,8 +19,8 @@ log = logging.getLogger(__name__)
		# leaf-set of jobs ready to run

		def gen_targets(params, tag):
		for ttype, tname in params['inp'].items():
		yield ttype, tname
		for tname in params['inp']:
		yield tname
		if 'loop' in params:
		assert len(params['loop']) == 2, "Loop must have a single variable."
		tgtname = 'tgt'
		@@ -40,9 +40,9 @@ def gen_targets(params, tag):
		for n in eval(r, {}, {}):
		fmt = {}
		dict_merge(fmt, params)
		for ttype, tgt in params['loop'][tgtname].items():
		for tgt in params['loop'][tgtname]:
		fmt[var] = n
		yield ttype, tgt.format(**fmt)
		yield tgt.format(**fmt)

		# Determine if dirname / tname must be generated by pmake
		def must_generate(dirname, tname, types, args):
		@@ -50,18 +50,18 @@ def must_generate(dirname, tname, types, args):
		if not fname.exists():
		return True
		try:
		nrule, ftype = types[tname]
		nrule = nrule(args)
		RuleClass = types[tname]
		rule = RuleClass(args)
		except KeyError:
		return False # don't know how to generate

		# Check for newer inputs.
		mtime = fname.stat().st_mtime
		for t,f in gen_targets(nrule.params, nrule.id): # need to re-run
		for f in gen_targets(rule.params, rule.id): # need to re-run
		f = dirname / f
		if f.exists() and f.stat().st_mtime > mtime:
		log.warning("File %s is newer than %s - re-running %s",
		f, fname, nrule.id)
		f, fname, rule.id)
		return True
		return False

		@@ -75,21 +75,18 @@ def append_graph(G, types, rule, args, verb=False):
		checked = set(addl) # set of rules already set to be added
		while len(addl) > 0:
		rule = addl.pop()
		for ttype, tname in gen_targets(rule.params, rule.id):
		for tname in gen_targets(rule.params, rule.id):
		p = rule.params['dirname'] / tname
		if not must_generate(rule.params['dirname'], tname, types, args):
		continue
		try:
		nrule, ftype = types[tname]
		RuleClass = types[tname]
		except KeyError:
		raise TargetError("No rule to make target '%s' needed by '%s'."%(p, rule.id))
		rulename = nrule.params['rulename']
		if ftype != ttype:
		raise TargetError("Rule %s produces target file '%s' of type '%s' (but %s was requested)."%(
		rulename, tname, ftype, ttype))
		rulename = RuleClass.params['rulename']

		# instantiate the Rule -- creating a job
		job = nrule(args)
		job = RuleClass(args)
		if job not in checked:
		addl.append(job) # follow-up with inputs of this job
		checked.add(job)
		@@ -109,7 +106,7 @@ class TaskGraph:
		if not args['dirname'].is_dir():
		raise KeyError("Target %s: invalid directory %s"%(tgt,args['dirname']))
		if 'out' not in args:
		raise KeyError("Target %s is missing 'out:' a dictionary of target outputs!"%tgt)
		raise KeyError("Target %s is missing 'out:' a list of target outputs!"%tgt)

		task = Top(tgt, args)
		task.params['inp'] = args['out']

helpers.py

+4 −4

Original line number	Diff line number	Diff line
		@@ -112,15 +112,15 @@ class JobState:
		return str_warn("job %s"%(self.dirname / self.jobname))
		def file_paths(self, fnames, verb=False):
		# Get the file paths from this job's dir.
		# returns (True/False, {key:path}), where True == all paths present
		# returns (True/False, [Path]), where True == all paths present
		ok = True
		paths = {}
		for key, name in fnames.items():
		paths = []
		for name in fnames:
		path = self.dirname/name
		if (verb or ok) and not path.exists():
		ok = False
		log.error("Job %s is missing file %s.", self.jobname, path)
		paths[key] = path
		paths.append( path )
		return ok, paths
		def input_paths(self, fnames):
		self.all_inputs_present, self.inp = self.file_paths(fnames, True)

Admin message