Commit 30bf545d authored by David M. Rogers's avatar David M. Rogers
Browse files

Replaced target dicts with lists (no more file types).

parent f7cd92c0
Loading
Loading
Loading
Loading
+4 −4
Original line number Diff line number Diff line
@@ -30,15 +30,15 @@ Job types contain run-scripts and list input/output files::
        cpu:  42
        jsrun_attr: "-l gpu-cpu -d plane:6 -b packed:7"
      inp:
        params: parameter_file.txt
        - parameter_file.txt
      out:
        log: run.log
        data: grid.hd5
        - run.log
        - grid.hd5
      setup: |
        module load gcc spectrum-mpi
        export OMP_NUM_THREADS=7
      script: |
        {mpirun} sim_flow -s {sauce} {inp[params} {out[data]} >{out[log]}
        {mpirun} sim_flow -s {sauce} {inp[0]} {out[1]} >{out[0]}

The jobs are used within an allocation and make use of all available resources in parallel::

+16 −16
Original line number Diff line number Diff line
@@ -4,9 +4,9 @@ docs:
        nrs:  1
        cpu:  1
    out:
        txt: file_{n}.txt # 1
        - file_{n}.txt # 1
    script: | # 3
        echo {n} >{out[txt]}
        echo {n} >{out[0]}

# These jobtypes are make-rules that know
# how to generate a group of output files from a group of input files.
@@ -22,18 +22,18 @@ grompp:
      #jsrun_attr: ""
      #srun_attr: ""
  inp:
    mdp: grompp.mdp
    top: topol.top
    conf: conf.gro
    - grompp.mdp
    - topol.top
    - conf.gro
  out:
    tpr: topol.tpr
    - topol.tpr
  setup: |
    module load gcc gromacs/2020-rdtscp_off-analysis
    export GMX_MAXBACKUP=-1
    export OMP_NUM_THREADS=7
    GMX=gmx
  script: |
    {mpirun} $GMX grompp -f {inp[mdp]} -p {inp[top]} -c {inp[conf]} -o {out[tpr]} -maxwarn 99
    {mpirun} $GMX grompp -f {inp[0]} -p {inp[1]} -c {inp[2]} -o {out[0]} -maxwarn 99

mdrun:
  resource:
@@ -43,20 +43,20 @@ mdrun:
      gpu: 6
      jsrun_attr: "-l gpu-cpu -d plane:6 -b packed:7"
  inp:
      tpr: topol.tpr
      - topol.tpr
  out:
      log: run.log
      edr: run.edr
      conf: run.gro
      xtc: run.xtc
      cpt: run.cpt
      - run.log
      - run.edr
      - run.gro
      - run.xtc
      - run.cpt
  setup: |
      module load gcc spectrum-mpi gromacs/2020-rdtscp_off
      export GMX_MAXBACKUP=-1
      export OMP_NUM_THREADS=7
      GMX=gmx_mpi
  script: |
      {mpirun} $GMX mdrun -cpi {out[cpt]} \
                        -s {inp[tpr]} -g {out[log]} -e {out[edr]} \
                        -c {out[conf]} -x {out[xtc]} -cpo {out[cpt]} \
      {mpirun} $GMX mdrun -cpi run.cpt \
                        -s {inp[0]} -g run.log -e run.edr \
                        -c run.gro -x run.xtc -cpo run.cpt \
                        -pme gpu -npme 1 -nb gpu -bonded gpu -pin off
+4 −4
Original line number Diff line number Diff line
@@ -4,18 +4,18 @@ Cov1:
  dirname: Cov1
  replicas: 20
  out:
      xtc: run.xtc
      txt: file_00.txt
      - run.xtc
      - file_00.txt

Cov2:
  dirname: Cov2
  replicas: 10
  out:
      xtc: run.xtc
      - run.xtc
  loop:
      n: "range({replicas})" # range(10)
      tgt:
          txt: file_{n:02d}.txt
          - file_{n:02d}.txt

# Implied computation graph
#
+13 −16
Original line number Diff line number Diff line
@@ -19,8 +19,8 @@ log = logging.getLogger(__name__)
#   leaf-set of jobs ready to run

def gen_targets(params, tag):
    for ttype, tname in params['inp'].items():
        yield ttype, tname
    for tname in params['inp']:
        yield tname
    if 'loop' in params:
        assert len(params['loop']) == 2, "Loop must have a single variable."
        tgtname = 'tgt'
@@ -40,9 +40,9 @@ def gen_targets(params, tag):
        for n in eval(r, {}, {}):
            fmt = {}
            dict_merge(fmt, params)
            for ttype, tgt in params['loop'][tgtname].items():
            for tgt in params['loop'][tgtname]:
                fmt[var] = n
                yield ttype, tgt.format(**fmt)
                yield tgt.format(**fmt)

# Determine if dirname / tname must be generated by pmake
def must_generate(dirname, tname, types, args):
@@ -50,18 +50,18 @@ def must_generate(dirname, tname, types, args):
    if not fname.exists():
        return True
    try:
        nrule, ftype = types[tname]
        nrule = nrule(args)
        RuleClass = types[tname]
        rule = RuleClass(args)
    except KeyError:
        return False # don't know how to generate

    # Check for newer inputs.
    mtime = fname.stat().st_mtime
    for t,f in gen_targets(nrule.params, nrule.id): # need to re-run
    for f in gen_targets(rule.params, rule.id): # need to re-run
        f = dirname / f
        if f.exists() and f.stat().st_mtime > mtime:
            log.warning("File %s is newer than %s - re-running %s",
                        f, fname, nrule.id)
                        f, fname, rule.id)
            return True
    return False

@@ -75,21 +75,18 @@ def append_graph(G, types, rule, args, verb=False):
    checked = set(addl) # set of rules already set to be added
    while len(addl) > 0:
        rule = addl.pop()
        for ttype, tname in gen_targets(rule.params, rule.id):
        for tname in gen_targets(rule.params, rule.id):
            p = rule.params['dirname'] / tname
            if not must_generate(rule.params['dirname'], tname, types, args):
                continue
            try:
                nrule, ftype = types[tname]
                RuleClass = types[tname]
            except KeyError:
                raise TargetError("No rule to make target '%s' needed by '%s'."%(p, rule.id))
            rulename = nrule.params['rulename']
            if ftype != ttype:
                raise TargetError("Rule %s produces target file '%s' of type '%s' (but %s was requested)."%(
                        rulename, tname, ftype, ttype))
            rulename = RuleClass.params['rulename']

            # instantiate the Rule -- creating a job
            job = nrule(args)
            job = RuleClass(args)
            if job not in checked:
                addl.append(job) # follow-up with inputs of this job
                checked.add(job)
@@ -109,7 +106,7 @@ class TaskGraph:
            if not args['dirname'].is_dir():
                raise KeyError("Target %s: invalid directory %s"%(tgt,args['dirname']))
            if 'out' not in args:
                raise KeyError("Target %s is missing 'out:' a dictionary of target outputs!"%tgt)
                raise KeyError("Target %s is missing 'out:' a list of target outputs!"%tgt)
    
            task = Top(tgt, args)
            task.params['inp'] = args['out']
+4 −4
Original line number Diff line number Diff line
@@ -112,15 +112,15 @@ class JobState:
        return str_warn("job %s"%(self.dirname / self.jobname))
    def file_paths(self, fnames, verb=False):
        # Get the file paths from this job's dir.
        # returns (True/False, {key:path}), where True == all paths present
        # returns (True/False, [Path]), where True == all paths present
        ok = True
        paths = {}
        for key, name in fnames.items():
        paths = []
        for name in fnames:
            path = self.dirname/name
            if (verb or ok) and not path.exists():
                ok = False
                log.error("Job %s is missing file %s.", self.jobname, path)
            paths[key] = path
            paths.append( path )
        return ok, paths
    def input_paths(self, fnames):
        self.all_inputs_present, self.inp = self.file_paths(fnames, True)
Loading