diff --git a/scripts/summit_scripts/job.bs b/scripts/summit_scripts/job.bs index 4b43201e81f5283781b6b924c96af98894072d1a..9c01b93d340442bf570e3e393bf9d3c9de139638 100644 --- a/scripts/summit_scripts/job.bs +++ b/scripts/summit_scripts/job.bs @@ -1,35 +1,45 @@ #!/bin/bash -l -#BSUB -P gen113 +#BSUB -P LRN001 #BSUB -J namsa #BSUB -o logs.o%J -#BSUB -W 15 -#BSUB -nnodes 12 -#BSUB -alloc_flags "smt4 nvme" +#BSUB -W 00:30 +#BSUB -nnodes 512 +#BSUB -alloc_flags "smt4 nvme maximizegpfs" #BSUB -q batch ##BSUB -N ##BSUB -csm y ##BSUB -alloc_flags "smt4 gpumps nvme" NODES=$(cat ${LSB_DJOB_HOSTFILE} | sort | uniq | grep -v login | grep -v batch | wc -l) -HOME="/gpfs/wolf/gen113/scratch/nl7/work" +BUILDS=${PROJWORK}/lrn001/nl/builds ### modules ### module load gcc/6.4.0 module load fftw hdf5 cuda ### python ### -PYTHON=${HOME}/anaconda3 +PYTHON=${BUILDS}/miniconda3 +export PATH=$PYTHON/bin:$PATH export PYTHONIOENCODING="utf8" export LD_LIBRARY_PATH=${PYTHON}/lib:$LD_LIBRARY_PATH +CONDA_ENV_NAME="torch1p0" +source activate $CONDA_ENV_NAME +echo $(which python) ### namsa ### -cd ${HOME}/MSA -IODIR="${HOME}/MSA/input_output" -export CIF="${IODIR}/cif_files/Si.cif" -export H5F="${IODIR}/outputs_${LSB_JOBID}.h5" -LOG="${IODIR}/namsa_log_${LSB_JOBID}.log" +CIFDIR="$(pwd)/data/materialsgenomics" +H5FDIR="$(pwd)/data/h5_files" +H5FDIR="/mnt/bb/${USER}" +#export H5F="/mnt/bb/${USER}/outputs_${LSB_JOBID}.h5" +export PYCUDA_DISABLE_CACHE=1 +LOG="$(pwd)/namsa_log_${LSB_JOBID}.log" -EXEC="${PYTHON}/bin/python -u ./test_namsa_mpi.py 0.75 0" +EXEC="python -u sim_batch.py $CIFDIR $H5FDIR" + +### pami ibv ### +#export PAMI_ENABLE_STRIPING=0 +#export PAMI_IBV_DEVICE_NAME="mlx5_0:1" +#export PAMI_IBV_DEVICE_NAME_1="mlx5_3:1" ### run ### -jsrun -n${NODES} -a6 -c42 -g6 -r1 --bind=proportional-packed:7 --launch_distribution=packed stdbuf -o0 ./launch.sh "${EXEC}" > $LOG +jsrun -n${NODES} -a6 -c42 -g6 -r1 --bind=proportional-packed:7 --launch_distribution=packed ${EXEC} > $LOG diff --git a/scripts/summit_scripts/sim_batch.py b/scripts/summit_scripts/sim_batch.py index b81591eac2caa7a6d886cdfe11827434fb1f87d9..dbacdc61180e394f0edb37ade60321aaa4ef6912 100644 --- a/scripts/summit_scripts/sim_batch.py +++ b/scripts/summit_scripts/sim_batch.py @@ -171,6 +171,7 @@ def simulate(h5g, cif_path, gpu_id=0, clean_up=False): msa.clean_up(ctx=None, vars=msa.vars) def main(cifdir_path, h5dir_path): + t = time() cifpath_list = get_cif_paths(cifdir_path) h5path = os.path.join(h5dir_path, 'batch_%d.h5'% comm_rank) if os.path.exists(h5path): @@ -178,18 +179,21 @@ def main(cifdir_path, h5dir_path): else: mode ='w' with h5py.File(h5path, mode=mode) as f: - for idx in range(comm_rank, 10, comm_size): + for idx in range(comm_rank, len(cifpath_list), comm_size): cif_path = cifpath_list[idx] - manual = idx < (10 - comm_size) + manual = idx < ( len(cifpath_list) - comm_size) spgroup_num, matname = parse_cif_path(cif_path) try: h5g = f.create_group(matname) except Exception as e: print("rank=%d" % comm_rank, e, "group=%s exists" % matname) h5g = f[matname] - if comm_rank == 0: - print('current idx: %d' %idx) + if comm_rank == 0 and bool(idx % 500): + print('time=%3.2f, idx= %d' %(time() - t, idx)) simulate(h5g, cif_path, gpu_id=int(np.mod(comm_rank, 6)), clean_up=manual) + sim_t = time() - t + if comm_rank == 0: + print("took %3.3f seconds" % sim_t) def main_test(cifdir_path): cifpath_list = get_cif_paths(cifdir_path)