Loading scripts/summit_scripts/job.bs +23 −13 Original line number Diff line number Diff line #!/bin/bash -l #BSUB -P gen113 #BSUB -P LRN001 #BSUB -J namsa #BSUB -o logs.o%J #BSUB -W 15 #BSUB -nnodes 12 #BSUB -alloc_flags "smt4 nvme" #BSUB -W 00:30 #BSUB -nnodes 512 #BSUB -alloc_flags "smt4 nvme maximizegpfs" #BSUB -q batch ##BSUB -N ##BSUB -csm y ##BSUB -alloc_flags "smt4 gpumps nvme" NODES=$(cat ${LSB_DJOB_HOSTFILE} | sort | uniq | grep -v login | grep -v batch | wc -l) HOME="/gpfs/wolf/gen113/scratch/nl7/work" BUILDS=${PROJWORK}/lrn001/nl/builds ### modules ### module load gcc/6.4.0 module load fftw hdf5 cuda ### python ### PYTHON=${HOME}/anaconda3 PYTHON=${BUILDS}/miniconda3 export PATH=$PYTHON/bin:$PATH export PYTHONIOENCODING="utf8" export LD_LIBRARY_PATH=${PYTHON}/lib:$LD_LIBRARY_PATH CONDA_ENV_NAME="torch1p0" source activate $CONDA_ENV_NAME echo $(which python) ### namsa ### cd ${HOME}/MSA IODIR="${HOME}/MSA/input_output" export CIF="${IODIR}/cif_files/Si.cif" export H5F="${IODIR}/outputs_${LSB_JOBID}.h5" LOG="${IODIR}/namsa_log_${LSB_JOBID}.log" CIFDIR="$(pwd)/data/materialsgenomics" H5FDIR="$(pwd)/data/h5_files" H5FDIR="/mnt/bb/${USER}" #export H5F="/mnt/bb/${USER}/outputs_${LSB_JOBID}.h5" export PYCUDA_DISABLE_CACHE=1 LOG="$(pwd)/namsa_log_${LSB_JOBID}.log" EXEC="${PYTHON}/bin/python -u ./test_namsa_mpi.py 0.75 0" EXEC="python -u sim_batch.py $CIFDIR $H5FDIR" ### pami ibv ### #export PAMI_ENABLE_STRIPING=0 #export PAMI_IBV_DEVICE_NAME="mlx5_0:1" #export PAMI_IBV_DEVICE_NAME_1="mlx5_3:1" ### run ### jsrun -n${NODES} -a6 -c42 -g6 -r1 --bind=proportional-packed:7 --launch_distribution=packed stdbuf -o0 ./launch.sh "${EXEC}" > $LOG jsrun -n${NODES} -a6 -c42 -g6 -r1 --bind=proportional-packed:7 --launch_distribution=packed ${EXEC} > $LOG scripts/summit_scripts/sim_batch.py +8 −4 Original line number Diff line number Diff line Loading @@ -171,6 +171,7 @@ def simulate(h5g, cif_path, gpu_id=0, clean_up=False): msa.clean_up(ctx=None, vars=msa.vars) def main(cifdir_path, h5dir_path): t = time() cifpath_list = get_cif_paths(cifdir_path) h5path = os.path.join(h5dir_path, 'batch_%d.h5'% comm_rank) if os.path.exists(h5path): Loading @@ -178,18 +179,21 @@ def main(cifdir_path, h5dir_path): else: mode ='w' with h5py.File(h5path, mode=mode) as f: for idx in range(comm_rank, 10, comm_size): for idx in range(comm_rank, len(cifpath_list), comm_size): cif_path = cifpath_list[idx] manual = idx < (10 - comm_size) manual = idx < ( len(cifpath_list) - comm_size) spgroup_num, matname = parse_cif_path(cif_path) try: h5g = f.create_group(matname) except Exception as e: print("rank=%d" % comm_rank, e, "group=%s exists" % matname) h5g = f[matname] if comm_rank == 0: print('current idx: %d' %idx) if comm_rank == 0 and bool(idx % 500): print('time=%3.2f, idx= %d' %(time() - t, idx)) simulate(h5g, cif_path, gpu_id=int(np.mod(comm_rank, 6)), clean_up=manual) sim_t = time() - t if comm_rank == 0: print("took %3.3f seconds" % sim_t) def main_test(cifdir_path): cifpath_list = get_cif_paths(cifdir_path) Loading Loading
scripts/summit_scripts/job.bs +23 −13 Original line number Diff line number Diff line #!/bin/bash -l #BSUB -P gen113 #BSUB -P LRN001 #BSUB -J namsa #BSUB -o logs.o%J #BSUB -W 15 #BSUB -nnodes 12 #BSUB -alloc_flags "smt4 nvme" #BSUB -W 00:30 #BSUB -nnodes 512 #BSUB -alloc_flags "smt4 nvme maximizegpfs" #BSUB -q batch ##BSUB -N ##BSUB -csm y ##BSUB -alloc_flags "smt4 gpumps nvme" NODES=$(cat ${LSB_DJOB_HOSTFILE} | sort | uniq | grep -v login | grep -v batch | wc -l) HOME="/gpfs/wolf/gen113/scratch/nl7/work" BUILDS=${PROJWORK}/lrn001/nl/builds ### modules ### module load gcc/6.4.0 module load fftw hdf5 cuda ### python ### PYTHON=${HOME}/anaconda3 PYTHON=${BUILDS}/miniconda3 export PATH=$PYTHON/bin:$PATH export PYTHONIOENCODING="utf8" export LD_LIBRARY_PATH=${PYTHON}/lib:$LD_LIBRARY_PATH CONDA_ENV_NAME="torch1p0" source activate $CONDA_ENV_NAME echo $(which python) ### namsa ### cd ${HOME}/MSA IODIR="${HOME}/MSA/input_output" export CIF="${IODIR}/cif_files/Si.cif" export H5F="${IODIR}/outputs_${LSB_JOBID}.h5" LOG="${IODIR}/namsa_log_${LSB_JOBID}.log" CIFDIR="$(pwd)/data/materialsgenomics" H5FDIR="$(pwd)/data/h5_files" H5FDIR="/mnt/bb/${USER}" #export H5F="/mnt/bb/${USER}/outputs_${LSB_JOBID}.h5" export PYCUDA_DISABLE_CACHE=1 LOG="$(pwd)/namsa_log_${LSB_JOBID}.log" EXEC="${PYTHON}/bin/python -u ./test_namsa_mpi.py 0.75 0" EXEC="python -u sim_batch.py $CIFDIR $H5FDIR" ### pami ibv ### #export PAMI_ENABLE_STRIPING=0 #export PAMI_IBV_DEVICE_NAME="mlx5_0:1" #export PAMI_IBV_DEVICE_NAME_1="mlx5_3:1" ### run ### jsrun -n${NODES} -a6 -c42 -g6 -r1 --bind=proportional-packed:7 --launch_distribution=packed stdbuf -o0 ./launch.sh "${EXEC}" > $LOG jsrun -n${NODES} -a6 -c42 -g6 -r1 --bind=proportional-packed:7 --launch_distribution=packed ${EXEC} > $LOG
scripts/summit_scripts/sim_batch.py +8 −4 Original line number Diff line number Diff line Loading @@ -171,6 +171,7 @@ def simulate(h5g, cif_path, gpu_id=0, clean_up=False): msa.clean_up(ctx=None, vars=msa.vars) def main(cifdir_path, h5dir_path): t = time() cifpath_list = get_cif_paths(cifdir_path) h5path = os.path.join(h5dir_path, 'batch_%d.h5'% comm_rank) if os.path.exists(h5path): Loading @@ -178,18 +179,21 @@ def main(cifdir_path, h5dir_path): else: mode ='w' with h5py.File(h5path, mode=mode) as f: for idx in range(comm_rank, 10, comm_size): for idx in range(comm_rank, len(cifpath_list), comm_size): cif_path = cifpath_list[idx] manual = idx < (10 - comm_size) manual = idx < ( len(cifpath_list) - comm_size) spgroup_num, matname = parse_cif_path(cif_path) try: h5g = f.create_group(matname) except Exception as e: print("rank=%d" % comm_rank, e, "group=%s exists" % matname) h5g = f[matname] if comm_rank == 0: print('current idx: %d' %idx) if comm_rank == 0 and bool(idx % 500): print('time=%3.2f, idx= %d' %(time() - t, idx)) simulate(h5g, cif_path, gpu_id=int(np.mod(comm_rank, 6)), clean_up=manual) sim_t = time() - t if comm_rank == 0: print("took %3.3f seconds" % sim_t) def main_test(cifdir_path): cifpath_list = get_cif_paths(cifdir_path) Loading