Loading pretrain_pubmed.lsfdeleted 100644 → 0 +0 −174 Original line number Diff line number Diff line #!/bin/bash #BSUB -nnodes 2 #BSUB -W 0:30 #BSUB -P med106 #BSUB -alloc_flags "smt4 nvme" #BSUB -J debug #BSUB -o debug.%J #BSUB -q debug set +x module load open-ce conda deactivate conda activate /gpfs/alpine/med106/world-shared/irl1/rhel8/mytorch module list export OMP_NUM_THREADS=1 #export PYTHONPATH=$PYTHONPATH:/gpfs/alpine/med106/world-shared/irl1/rhel8/fork-megatron/megatron/fused_kernels #export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/gpfs/alpine/med106/world-shared/irl1/rhel8/mytorch/lib/python3.8/site-packages/torch/lib #export PATH=$PATH:/gpfs/alpine/med106/world-shared/irl1/rhel8/mytorch/lib/python3.8/site-packages/torch/include nodes=($(cat ${LSB_DJOB_HOSTFILE} | sort | uniq | grep -v login | grep -v batch)) nnodes=${#nodes[@]} echo $nnodes DATASET_0="/gpfs/alpine/med106/world-shared/irl1/rhel8/fork-megatron/preprocess_data/pm000_abstract_sentence" DATASET_1="/gpfs/alpine/med106/world-shared/irl1/rhel8/fork-megatron/preprocess_data/pm001_abstract_sentence" DATASET_2="/gpfs/alpine/med106/world-shared/irl1/rhel8/fork-megatron/preprocess_data/pm002_abstract_sentence" DATASET_3="/gpfs/alpine/med106/world-shared/irl1/rhel8/fork-megatron/preprocess_data/pm003_abstract_sentence" DATASET_4="/gpfs/alpine/med106/world-shared/irl1/rhel8/fork-megatron/preprocess_data/pm004_abstract_sentence" DATASET_5="/gpfs/alpine/med106/world-shared/irl1/rhel8/fork-megatron/preprocess_data/pm005_abstract_sentence" DATASET_6="/gpfs/alpine/med106/world-shared/irl1/rhel8/fork-megatron/preprocess_data/pm006_abstract_sentence" DATASET_7="/gpfs/alpine/med106/world-shared/irl1/rhel8/fork-megatron/preprocess_data/pm007_abstract_sentence" DATASET_8="/gpfs/alpine/med106/world-shared/irl1/rhel8/fork-megatron/preprocess_data/pm008_abstract_sentence" DATASET_9="/gpfs/alpine/med106/world-shared/irl1/rhel8/fork-megatron/preprocess_data/pm009_abstract_sentence" DATASET_10="/gpfs/alpine/med106/world-shared/irl1/rhel8/fork-megatron/preprocess_data/pm010_abstract_sentence" DATASET_11="/gpfs/alpine/med106/world-shared/irl1/rhel8/fork-megatron/preprocess_data/pm011_abstract_sentence" DATASET_12="/gpfs/alpine/med106/world-shared/irl1/rhel8/fork-megatron/preprocess_data/pm012_abstract_sentence" DATASET_13="/gpfs/alpine/med106/world-shared/irl1/rhel8/fork-megatron/preprocess_data/pm013_abstract_sentence" DATASET_14="/gpfs/alpine/med106/world-shared/irl1/rhel8/fork-megatron/preprocess_data/pm014_abstract_sentence" DATASET_15="/gpfs/alpine/med106/world-shared/irl1/rhel8/fork-megatron/preprocess_data/pm015_abstract_sentence" DATASET_16="/gpfs/alpine/med106/world-shared/irl1/rhel8/fork-megatron/preprocess_data/pm016_abstract_sentence" DATASET_17="/gpfs/alpine/med106/world-shared/irl1/rhel8/fork-megatron/preprocess_data/pm017_abstract_sentence" DATASET_18="/gpfs/alpine/med106/world-shared/irl1/rhel8/fork-megatron/preprocess_data/pm018_abstract_sentence" DATASET_19="/gpfs/alpine/med106/world-shared/irl1/rhel8/fork-megatron/preprocess_data/pm019_abstract_sentence" DATASET_20="/gpfs/alpine/med106/world-shared/irl1/rhel8/fork-megatron/preprocess_data/pm020_abstract_sentence" DATASET_21="/gpfs/alpine/med106/world-shared/irl1/rhel8/fork-megatron/preprocess_data/pm021_abstract_sentence" DATASET_22="/gpfs/alpine/med106/world-shared/irl1/rhel8/fork-megatron/preprocess_data/pm022_abstract_sentence" DATASET_23="/gpfs/alpine/med106/world-shared/irl1/rhel8/fork-megatron/preprocess_data/pm023_abstract_sentence" DATASET_24="/gpfs/alpine/med106/world-shared/irl1/rhel8/fork-megatron/preprocess_data/pm024_abstract_sentence" DATASET_25="/gpfs/alpine/med106/world-shared/irl1/rhel8/fork-megatron/preprocess_data/pm025_abstract_sentence" DATASET_26="/gpfs/alpine/med106/world-shared/irl1/rhel8/fork-megatron/preprocess_data/pm026_abstract_sentence" DATASET_27="/gpfs/alpine/med106/world-shared/irl1/rhel8/fork-megatron/preprocess_data/pm027_abstract_sentence" DATASET_28="/gpfs/alpine/med106/world-shared/irl1/rhel8/fork-megatron/preprocess_data/pm028_abstract_sentence" DATASET_29="/gpfs/alpine/med106/world-shared/irl1/rhel8/fork-megatron/preprocess_data/pm029_abstract_sentence" DATASET_30="/gpfs/alpine/med106/world-shared/irl1/rhel8/fork-megatron/preprocess_data/pm030_abstract_sentence" DATASET_31="/gpfs/alpine/med106/world-shared/irl1/rhel8/fork-megatron/preprocess_data/pm031_abstract_sentence" DATASET_32="/gpfs/alpine/med106/world-shared/irl1/rhel8/fork-megatron/preprocess_data/pm032_abstract_sentence" DATASET_33="/gpfs/alpine/med106/world-shared/irl1/rhel8/fork-megatron/preprocess_data/pm033_abstract_sentence" DATASET_34="/gpfs/alpine/med106/world-shared/irl1/rhel8/fork-megatron/preprocess_data/pm034_abstract_sentence" DATASET_35="/gpfs/alpine/med106/world-shared/irl1/rhel8/fork-megatron/preprocess_data/pm035_abstract_sentence" DATASET_36="/gpfs/alpine/med106/world-shared/irl1/rhel8/fork-megatron/preprocess_data/pm036_abstract_sentence" DATASET_37="/gpfs/alpine/med106/world-shared/irl1/rhel8/fork-megatron/preprocess_data/pm037_abstract_sentence" DATASET_38="/gpfs/alpine/med106/world-shared/irl1/rhel8/fork-megatron/preprocess_data/pm038_abstract_sentence" DATASET_39="/gpfs/alpine/med106/world-shared/irl1/rhel8/fork-megatron/preprocess_data/pm039_abstract_sentence" DATASET_40="/gpfs/alpine/med106/world-shared/irl1/rhel8/fork-megatron/preprocess_data/pm040_abstract_sentence" DATASET_41="/gpfs/alpine/med106/world-shared/irl1/rhel8/fork-megatron/preprocess_data/pm041_abstract_sentence" DATASET_42="/gpfs/alpine/med106/world-shared/irl1/rhel8/fork-megatron/preprocess_data/pm042_abstract_sentence" DATASET_43="/gpfs/alpine/med106/world-shared/irl1/rhel8/fork-megatron/preprocess_data/pm043_abstract_sentence" DATASET_44="/gpfs/alpine/med106/world-shared/irl1/rhel8/fork-megatron/preprocess_data/pm044_abstract_sentence" DATASET_45="/gpfs/alpine/med106/world-shared/irl1/rhel8/fork-megatron/preprocess_data/pm045_abstract_sentence" DATASET_46="/gpfs/alpine/med106/world-shared/irl1/rhel8/fork-megatron/preprocess_data/pm046_abstract_sentence" DATASET_47="/gpfs/alpine/med106/world-shared/irl1/rhel8/fork-megatron/preprocess_data/pm047_abstract_sentence" DATASET_48="/gpfs/alpine/med106/world-shared/irl1/rhel8/fork-megatron/preprocess_data/pm048_abstract_sentence" DATASET_49="/gpfs/alpine/med106/world-shared/irl1/rhel8/fork-megatron/preprocess_data/pm049_abstract_sentence" DATASET_50="/gpfs/alpine/med106/world-shared/irl1/rhel8/fork-megatron/preprocess_data/pm050_abstract_sentence" DATASET_51="/gpfs/alpine/med106/world-shared/irl1/rhel8/fork-megatron/preprocess_data/pm051_abstract_sentence" DATASET_52="/gpfs/alpine/med106/world-shared/irl1/rhel8/fork-megatron/preprocess_data/pm052_abstract_sentence" DATASET_53="/gpfs/alpine/med106/world-shared/irl1/rhel8/fork-megatron/preprocess_data/pm053_abstract_sentence" DATASET_54="/gpfs/alpine/med106/world-shared/irl1/rhel8/fork-megatron/preprocess_data/pm054_abstract_sentence" DATASET_55="/gpfs/alpine/med106/world-shared/irl1/rhel8/fork-megatron/preprocess_data/pm055_abstract_sentence" DATASET_56="/gpfs/alpine/med106/world-shared/irl1/rhel8/fork-megatron/preprocess_data/pm056_abstract_sentence" DATASET_57="/gpfs/alpine/med106/world-shared/irl1/rhel8/fork-megatron/preprocess_data/pm057_abstract_sentence" DATASET_58="/gpfs/alpine/med106/world-shared/irl1/rhel8/fork-megatron/preprocess_data/pm058_abstract_sentence" DATASET_59="/gpfs/alpine/med106/world-shared/irl1/rhel8/fork-megatron/preprocess_data/pm059_abstract_sentence" DATASET_60="/gpfs/alpine/med106/world-shared/irl1/rhel8/fork-megatron/preprocess_data/pm060_abstract_sentence" DATASET_61="/gpfs/alpine/med106/world-shared/irl1/rhel8/fork-megatron/preprocess_data/pm061_abstract_sentence" DATASET_62="/gpfs/alpine/med106/world-shared/irl1/rhel8/fork-megatron/preprocess_data/pm062_abstract_sentence" DATASET_63="/gpfs/alpine/med106/world-shared/irl1/rhel8/fork-megatron/preprocess_data/pm063_abstract_sentence" DATASET_64="/gpfs/alpine/med106/world-shared/irl1/rhel8/fork-megatron/preprocess_data/pm064_abstract_sentence" DATASET_65="/gpfs/alpine/med106/world-shared/irl1/rhel8/fork-megatron/preprocess_data/pm065_abstract_sentence" DATASET_66="/gpfs/alpine/med106/world-shared/irl1/rhel8/fork-megatron/preprocess_data/pm066_abstract_sentence" DATASET_67="/gpfs/alpine/med106/world-shared/irl1/rhel8/fork-megatron/preprocess_data/pm067_abstract_sentence" DATASET_68="/gpfs/alpine/med106/world-shared/irl1/rhel8/fork-megatron/preprocess_data/pm068_abstract_sentence" DATASET_69="/gpfs/alpine/med106/world-shared/irl1/rhel8/fork-megatron/preprocess_data/pm069_abstract_sentence" DATASET_70="/gpfs/alpine/med106/world-shared/irl1/rhel8/fork-megatron/preprocess_data/pm070_abstract_sentence" DATASET_71="/gpfs/alpine/med106/world-shared/irl1/rhel8/fork-megatron/preprocess_data/pm071_abstract_sentence" DATASET_72="/gpfs/alpine/med106/world-shared/irl1/rhel8/fork-megatron/preprocess_data/pm072_abstract_sentence" DATASET_73="/gpfs/alpine/med106/world-shared/irl1/rhel8/fork-megatron/preprocess_data/pm073_abstract_sentence" DATASET_74="/gpfs/alpine/med106/world-shared/irl1/rhel8/fork-megatron/preprocess_data/pm074_abstract_sentence" DATASET_75="/gpfs/alpine/med106/world-shared/irl1/rhel8/fork-megatron/preprocess_data/pm075_abstract_sentence" DATASET_76="/gpfs/alpine/med106/world-shared/irl1/rhel8/fork-megatron/preprocess_data/pm076_abstract_sentence" DATASET_77="/gpfs/alpine/med106/world-shared/irl1/rhel8/fork-megatron/preprocess_data/pm077_abstract_sentence" DATASET_78="/gpfs/alpine/med106/world-shared/irl1/rhel8/fork-megatron/preprocess_data/pm078_abstract_sentence" DATASET_79="/gpfs/alpine/med106/world-shared/irl1/rhel8/fork-megatron/preprocess_data/pm079_abstract_sentence" DATASET_80="/gpfs/alpine/med106/world-shared/irl1/rhel8/fork-megatron/preprocess_data/pm080_abstract_sentence" DATASET_81="/gpfs/alpine/med106/world-shared/irl1/rhel8/fork-megatron/preprocess_data/pm081_abstract_sentence" DATASET_82="/gpfs/alpine/med106/world-shared/irl1/rhel8/fork-megatron/preprocess_data/pm082_abstract_sentence" DATASET_83="/gpfs/alpine/med106/world-shared/irl1/rhel8/fork-megatron/preprocess_data/pm083_abstract_sentence" DATASET_84="/gpfs/alpine/med106/world-shared/irl1/rhel8/fork-megatron/preprocess_data/pm084_abstract_sentence" DATASET_85="/gpfs/alpine/med106/world-shared/irl1/rhel8/fork-megatron/preprocess_data/pm085_abstract_sentence" DATASET_86="/gpfs/alpine/med106/world-shared/irl1/rhel8/fork-megatron/preprocess_data/pm086_abstract_sentence" DATASET_87="/gpfs/alpine/med106/world-shared/irl1/rhel8/fork-megatron/preprocess_data/pm087_abstract_sentence" DATASET_88="/gpfs/alpine/med106/world-shared/irl1/rhel8/fork-megatron/preprocess_data/pm088_abstract_sentence" DATASET_89="/gpfs/alpine/med106/world-shared/irl1/rhel8/fork-megatron/preprocess_data/pm089_abstract_sentence" DATASET_90="/gpfs/alpine/med106/world-shared/irl1/rhel8/fork-megatron/preprocess_data/pm090_abstract_sentence" DATASET_91="/gpfs/alpine/med106/world-shared/irl1/rhel8/fork-megatron/preprocess_data/pm091_abstract_sentence" DATASET_92="/gpfs/alpine/med106/world-shared/irl1/rhel8/fork-megatron/preprocess_data/pm092_abstract_sentence" DATASET_93="/gpfs/alpine/med106/world-shared/irl1/rhel8/fork-megatron/preprocess_data/pm093_abstract_sentence" DATASET_94="/gpfs/alpine/med106/world-shared/irl1/rhel8/fork-megatron/preprocess_data/pm094_abstract_sentence" DATASET_95="/gpfs/alpine/med106/world-shared/irl1/rhel8/fork-megatron/preprocess_data/pm095_abstract_sentence" DATASET_96="/gpfs/alpine/med106/world-shared/irl1/rhel8/fork-megatron/preprocess_data/pm096_abstract_sentence" DATASET_97="/gpfs/alpine/med106/world-shared/irl1/rhel8/fork-megatron/preprocess_data/pm097_abstract_sentence" DATASET_98="/gpfs/alpine/med106/world-shared/irl1/rhel8/fork-megatron/preprocess_data/pm098_abstract_sentence" DATASET_99="/gpfs/alpine/med106/world-shared/irl1/rhel8/fork-megatron/preprocess_data/pm099_abstract_sentence" DATASET_100="/gpfs/alpine/med106/world-shared/irl1/rhel8/fork-megatron/preprocess_data/pm100_abstract_sentence" DATASET_101="/gpfs/alpine/med106/world-shared/irl1/rhel8/fork-megatron/preprocess_data/pm101_abstract_sentence" DATASET_102="/gpfs/alpine/med106/world-shared/irl1/rhel8/fork-megatron/preprocess_data/pm102_abstract_sentence" DATASET_103="/gpfs/alpine/med106/world-shared/irl1/rhel8/fork-megatron/preprocess_data/pm103_abstract_sentence" DATASET_104="/gpfs/alpine/med106/world-shared/irl1/rhel8/fork-megatron/preprocess_data/pm104_abstract_sentence" DATASET_105="/gpfs/alpine/med106/world-shared/irl1/rhel8/fork-megatron/preprocess_data/pm105_abstract_sentence" DATASET_106="/gpfs/alpine/med106/world-shared/irl1/rhel8/fork-megatron/preprocess_data/pm106_abstract_sentence" DATASET_107="/gpfs/alpine/med106/world-shared/irl1/rhel8/fork-megatron/preprocess_data/pm107_abstract_sentence" DATASET_108="/gpfs/alpine/med106/world-shared/irl1/rhel8/fork-megatron/preprocess_data/pm108_abstract_sentence" DATASET_109="/gpfs/alpine/med106/world-shared/irl1/rhel8/fork-megatron/preprocess_data/pm109_abstract_sentence" DATASET_110="/gpfs/alpine/med106/world-shared/irl1/rhel8/fork-megatron/preprocess_data/pm110_abstract_sentence" DATASET_111="/gpfs/alpine/med106/world-shared/irl1/rhel8/fork-megatron/preprocess_data/pm111_abstract_sentence" DATASET="1.0 ${DATASET_0} 1.0 ${DATASET_1} 1.0 ${DATASET_2} 1.0 ${DATASET_3} 1.0 ${DATASET_4} 1.0 ${DATASET_5} 1.0 ${DATASET_6} 1.0 ${DATASET_7} 1.0 ${DATASET_8} 1.0 ${DATASET_9} 1.0 ${DATASET_10} 1.0 ${DATASET_11} 1.0 ${DATASET_12} 1.0 ${DATASET_13} 1.0 ${DATASET_14} 1.0 ${DATASET_15} 1.0 ${DATASET_16} 1.0 ${DATASET_17} 1.0 ${DATASET_18} 1.0 ${DATASET_19} 1.0 ${DATASET_20} 1.0 ${DATASET_21} 1.0 ${DATASET_22} 1.0 ${DATASET_23} 1.0 ${DATASET_24} 1.0 ${DATASET_25} 1.0 ${DATASET_26} 1.0 ${DATASET_27} 1.0 ${DATASET_28} 1.0 ${DATASET_29} 1.0 ${DATASET_30} 1.0 ${DATASET_31} 1.0 ${DATASET_32} 1.0 ${DATASET_33} 1.0 ${DATASET_34} 1.0 ${DATASET_35} 1.0 ${DATASET_36} 1.0 ${DATASET_37} 1.0 ${DATASET_38} 1.0 ${DATASET_39} 1.0 ${DATASET_40} 1.0 ${DATASET_41} 1.0 ${DATASET_42} 1.0 ${DATASET_43} 1.0 ${DATASET_44} 1.0 ${DATASET_45} 1.0 ${DATASET_46} 1.0 ${DATASET_47} 1.0 ${DATASET_48} 1.0 ${DATASET_49} 1.0 ${DATASET_50} 1.0 ${DATASET_51} 1.0 ${DATASET_52} 1.0 ${DATASET_53} 1.0 ${DATASET_54} 1.0 ${DATASET_55} 1.0 ${DATASET_56} 1.0 ${DATASET_57} 1.0 ${DATASET_58} 1.0 ${DATASET_59} 1.0 ${DATASET_60} 1.0 ${DATASET_61} 1.0 ${DATASET_62} 1.0 ${DATASET_63} 1.0 ${DATASET_64} 1.0 ${DATASET_65} 1.0 ${DATASET_66} 1.0 ${DATASET_67} 1.0 ${DATASET_68} 1.0 ${DATASET_69} 1.0 ${DATASET_70} 1.0 ${DATASET_71} 1.0 ${DATASET_72} 1.0 ${DATASET_73} 1.0 ${DATASET_74} 1.0 ${DATASET_75} 1.0 ${DATASET_76} 1.0 ${DATASET_77} 1.0 ${DATASET_78} 1.0 ${DATASET_79} 1.0 ${DATASET_80} 1.0 ${DATASET_81} 1.0 ${DATASET_82} 1.0 ${DATASET_83} 1.0 ${DATASET_84} 1.0 ${DATASET_85} 1.0 ${DATASET_86} 1.0 ${DATASET_87} 1.0 ${DATASET_88} 1.0 ${DATASET_89} 1.0 ${DATASET_90} 1.0 ${DATASET_91} 1.0 ${DATASET_92} 1.0 ${DATASET_93} 1.0 ${DATASET_94} 1.0 ${DATASET_95} 1.0 ${DATASET_96} 1.0 ${DATASET_97} 1.0 ${DATASET_98} 1.0 ${DATASET_99} 1.0 ${DATASET_100} 1.0 ${DATASET_101} 1.0 ${DATASET_102} 1.0 ${DATASET_103} 1.0 ${DATASET_104} 1.0 ${DATASET_105} 1.0 ${DATASET_106} 1.0 ${DATASET_107} 1.0 ${DATASET_108} 1.0 ${DATASET_109} 1.0 ${DATASET_110} 1.0 ${DATASET_111}" export VOCAB_FILE=/gpfs/alpine/world-shared/med106/g8o/pubmed_bert-vocab.txt export CHECKPOINT_PATH=/gpfs/alpine/med106/world-shared/irl1/rhel8/fork-megatron/2N6D2T1P jsrun --smpiargs="-disable_gpu_hooks" -n $nnodes -r 1 -g 6 -a 6 -c 42 python pretrain_bert.py \ --tensor-model-parallel-size 2 \ --pipeline-model-parallel-size 1 \ --num-layers 24 \ --hidden-size 1024 \ --num-attention-heads 16 \ --micro-batch-size 2 \ --global-batch-size 12 \ --seq-length 512 \ --max-position-embeddings 512 \ --train-iters 2000 \ --lr-decay-iters 990 \ --save $CHECKPOINT_PATH \ --data-path $DATASET \ --vocab-file $VOCAB_FILE \ --data-impl mmap \ --split 949,50,1 \ --distributed-backend nccl \ --lr 0.0001 \ --min-lr 0.00001 \ --lr-decay-style linear \ --lr-warmup-fraction .01 \ --weight-decay 1e-2 \ --clip-grad 1.0 \ --log-interval 100 \ --save-interval 10000 \ --eval-interval 1000 \ --eval-iters 10 \ --fp16 #--DDP-impl torch \ Loading
pretrain_pubmed.lsfdeleted 100644 → 0 +0 −174 Original line number Diff line number Diff line #!/bin/bash #BSUB -nnodes 2 #BSUB -W 0:30 #BSUB -P med106 #BSUB -alloc_flags "smt4 nvme" #BSUB -J debug #BSUB -o debug.%J #BSUB -q debug set +x module load open-ce conda deactivate conda activate /gpfs/alpine/med106/world-shared/irl1/rhel8/mytorch module list export OMP_NUM_THREADS=1 #export PYTHONPATH=$PYTHONPATH:/gpfs/alpine/med106/world-shared/irl1/rhel8/fork-megatron/megatron/fused_kernels #export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/gpfs/alpine/med106/world-shared/irl1/rhel8/mytorch/lib/python3.8/site-packages/torch/lib #export PATH=$PATH:/gpfs/alpine/med106/world-shared/irl1/rhel8/mytorch/lib/python3.8/site-packages/torch/include nodes=($(cat ${LSB_DJOB_HOSTFILE} | sort | uniq | grep -v login | grep -v batch)) nnodes=${#nodes[@]} echo $nnodes DATASET_0="/gpfs/alpine/med106/world-shared/irl1/rhel8/fork-megatron/preprocess_data/pm000_abstract_sentence" DATASET_1="/gpfs/alpine/med106/world-shared/irl1/rhel8/fork-megatron/preprocess_data/pm001_abstract_sentence" DATASET_2="/gpfs/alpine/med106/world-shared/irl1/rhel8/fork-megatron/preprocess_data/pm002_abstract_sentence" DATASET_3="/gpfs/alpine/med106/world-shared/irl1/rhel8/fork-megatron/preprocess_data/pm003_abstract_sentence" DATASET_4="/gpfs/alpine/med106/world-shared/irl1/rhel8/fork-megatron/preprocess_data/pm004_abstract_sentence" DATASET_5="/gpfs/alpine/med106/world-shared/irl1/rhel8/fork-megatron/preprocess_data/pm005_abstract_sentence" DATASET_6="/gpfs/alpine/med106/world-shared/irl1/rhel8/fork-megatron/preprocess_data/pm006_abstract_sentence" DATASET_7="/gpfs/alpine/med106/world-shared/irl1/rhel8/fork-megatron/preprocess_data/pm007_abstract_sentence" DATASET_8="/gpfs/alpine/med106/world-shared/irl1/rhel8/fork-megatron/preprocess_data/pm008_abstract_sentence" DATASET_9="/gpfs/alpine/med106/world-shared/irl1/rhel8/fork-megatron/preprocess_data/pm009_abstract_sentence" DATASET_10="/gpfs/alpine/med106/world-shared/irl1/rhel8/fork-megatron/preprocess_data/pm010_abstract_sentence" DATASET_11="/gpfs/alpine/med106/world-shared/irl1/rhel8/fork-megatron/preprocess_data/pm011_abstract_sentence" DATASET_12="/gpfs/alpine/med106/world-shared/irl1/rhel8/fork-megatron/preprocess_data/pm012_abstract_sentence" DATASET_13="/gpfs/alpine/med106/world-shared/irl1/rhel8/fork-megatron/preprocess_data/pm013_abstract_sentence" DATASET_14="/gpfs/alpine/med106/world-shared/irl1/rhel8/fork-megatron/preprocess_data/pm014_abstract_sentence" DATASET_15="/gpfs/alpine/med106/world-shared/irl1/rhel8/fork-megatron/preprocess_data/pm015_abstract_sentence" DATASET_16="/gpfs/alpine/med106/world-shared/irl1/rhel8/fork-megatron/preprocess_data/pm016_abstract_sentence" DATASET_17="/gpfs/alpine/med106/world-shared/irl1/rhel8/fork-megatron/preprocess_data/pm017_abstract_sentence" DATASET_18="/gpfs/alpine/med106/world-shared/irl1/rhel8/fork-megatron/preprocess_data/pm018_abstract_sentence" DATASET_19="/gpfs/alpine/med106/world-shared/irl1/rhel8/fork-megatron/preprocess_data/pm019_abstract_sentence" DATASET_20="/gpfs/alpine/med106/world-shared/irl1/rhel8/fork-megatron/preprocess_data/pm020_abstract_sentence" DATASET_21="/gpfs/alpine/med106/world-shared/irl1/rhel8/fork-megatron/preprocess_data/pm021_abstract_sentence" DATASET_22="/gpfs/alpine/med106/world-shared/irl1/rhel8/fork-megatron/preprocess_data/pm022_abstract_sentence" DATASET_23="/gpfs/alpine/med106/world-shared/irl1/rhel8/fork-megatron/preprocess_data/pm023_abstract_sentence" DATASET_24="/gpfs/alpine/med106/world-shared/irl1/rhel8/fork-megatron/preprocess_data/pm024_abstract_sentence" DATASET_25="/gpfs/alpine/med106/world-shared/irl1/rhel8/fork-megatron/preprocess_data/pm025_abstract_sentence" DATASET_26="/gpfs/alpine/med106/world-shared/irl1/rhel8/fork-megatron/preprocess_data/pm026_abstract_sentence" DATASET_27="/gpfs/alpine/med106/world-shared/irl1/rhel8/fork-megatron/preprocess_data/pm027_abstract_sentence" DATASET_28="/gpfs/alpine/med106/world-shared/irl1/rhel8/fork-megatron/preprocess_data/pm028_abstract_sentence" DATASET_29="/gpfs/alpine/med106/world-shared/irl1/rhel8/fork-megatron/preprocess_data/pm029_abstract_sentence" DATASET_30="/gpfs/alpine/med106/world-shared/irl1/rhel8/fork-megatron/preprocess_data/pm030_abstract_sentence" DATASET_31="/gpfs/alpine/med106/world-shared/irl1/rhel8/fork-megatron/preprocess_data/pm031_abstract_sentence" DATASET_32="/gpfs/alpine/med106/world-shared/irl1/rhel8/fork-megatron/preprocess_data/pm032_abstract_sentence" DATASET_33="/gpfs/alpine/med106/world-shared/irl1/rhel8/fork-megatron/preprocess_data/pm033_abstract_sentence" DATASET_34="/gpfs/alpine/med106/world-shared/irl1/rhel8/fork-megatron/preprocess_data/pm034_abstract_sentence" DATASET_35="/gpfs/alpine/med106/world-shared/irl1/rhel8/fork-megatron/preprocess_data/pm035_abstract_sentence" DATASET_36="/gpfs/alpine/med106/world-shared/irl1/rhel8/fork-megatron/preprocess_data/pm036_abstract_sentence" DATASET_37="/gpfs/alpine/med106/world-shared/irl1/rhel8/fork-megatron/preprocess_data/pm037_abstract_sentence" DATASET_38="/gpfs/alpine/med106/world-shared/irl1/rhel8/fork-megatron/preprocess_data/pm038_abstract_sentence" DATASET_39="/gpfs/alpine/med106/world-shared/irl1/rhel8/fork-megatron/preprocess_data/pm039_abstract_sentence" DATASET_40="/gpfs/alpine/med106/world-shared/irl1/rhel8/fork-megatron/preprocess_data/pm040_abstract_sentence" DATASET_41="/gpfs/alpine/med106/world-shared/irl1/rhel8/fork-megatron/preprocess_data/pm041_abstract_sentence" DATASET_42="/gpfs/alpine/med106/world-shared/irl1/rhel8/fork-megatron/preprocess_data/pm042_abstract_sentence" DATASET_43="/gpfs/alpine/med106/world-shared/irl1/rhel8/fork-megatron/preprocess_data/pm043_abstract_sentence" DATASET_44="/gpfs/alpine/med106/world-shared/irl1/rhel8/fork-megatron/preprocess_data/pm044_abstract_sentence" DATASET_45="/gpfs/alpine/med106/world-shared/irl1/rhel8/fork-megatron/preprocess_data/pm045_abstract_sentence" DATASET_46="/gpfs/alpine/med106/world-shared/irl1/rhel8/fork-megatron/preprocess_data/pm046_abstract_sentence" DATASET_47="/gpfs/alpine/med106/world-shared/irl1/rhel8/fork-megatron/preprocess_data/pm047_abstract_sentence" DATASET_48="/gpfs/alpine/med106/world-shared/irl1/rhel8/fork-megatron/preprocess_data/pm048_abstract_sentence" DATASET_49="/gpfs/alpine/med106/world-shared/irl1/rhel8/fork-megatron/preprocess_data/pm049_abstract_sentence" DATASET_50="/gpfs/alpine/med106/world-shared/irl1/rhel8/fork-megatron/preprocess_data/pm050_abstract_sentence" DATASET_51="/gpfs/alpine/med106/world-shared/irl1/rhel8/fork-megatron/preprocess_data/pm051_abstract_sentence" DATASET_52="/gpfs/alpine/med106/world-shared/irl1/rhel8/fork-megatron/preprocess_data/pm052_abstract_sentence" DATASET_53="/gpfs/alpine/med106/world-shared/irl1/rhel8/fork-megatron/preprocess_data/pm053_abstract_sentence" DATASET_54="/gpfs/alpine/med106/world-shared/irl1/rhel8/fork-megatron/preprocess_data/pm054_abstract_sentence" DATASET_55="/gpfs/alpine/med106/world-shared/irl1/rhel8/fork-megatron/preprocess_data/pm055_abstract_sentence" DATASET_56="/gpfs/alpine/med106/world-shared/irl1/rhel8/fork-megatron/preprocess_data/pm056_abstract_sentence" DATASET_57="/gpfs/alpine/med106/world-shared/irl1/rhel8/fork-megatron/preprocess_data/pm057_abstract_sentence" DATASET_58="/gpfs/alpine/med106/world-shared/irl1/rhel8/fork-megatron/preprocess_data/pm058_abstract_sentence" DATASET_59="/gpfs/alpine/med106/world-shared/irl1/rhel8/fork-megatron/preprocess_data/pm059_abstract_sentence" DATASET_60="/gpfs/alpine/med106/world-shared/irl1/rhel8/fork-megatron/preprocess_data/pm060_abstract_sentence" DATASET_61="/gpfs/alpine/med106/world-shared/irl1/rhel8/fork-megatron/preprocess_data/pm061_abstract_sentence" DATASET_62="/gpfs/alpine/med106/world-shared/irl1/rhel8/fork-megatron/preprocess_data/pm062_abstract_sentence" DATASET_63="/gpfs/alpine/med106/world-shared/irl1/rhel8/fork-megatron/preprocess_data/pm063_abstract_sentence" DATASET_64="/gpfs/alpine/med106/world-shared/irl1/rhel8/fork-megatron/preprocess_data/pm064_abstract_sentence" DATASET_65="/gpfs/alpine/med106/world-shared/irl1/rhel8/fork-megatron/preprocess_data/pm065_abstract_sentence" DATASET_66="/gpfs/alpine/med106/world-shared/irl1/rhel8/fork-megatron/preprocess_data/pm066_abstract_sentence" DATASET_67="/gpfs/alpine/med106/world-shared/irl1/rhel8/fork-megatron/preprocess_data/pm067_abstract_sentence" DATASET_68="/gpfs/alpine/med106/world-shared/irl1/rhel8/fork-megatron/preprocess_data/pm068_abstract_sentence" DATASET_69="/gpfs/alpine/med106/world-shared/irl1/rhel8/fork-megatron/preprocess_data/pm069_abstract_sentence" DATASET_70="/gpfs/alpine/med106/world-shared/irl1/rhel8/fork-megatron/preprocess_data/pm070_abstract_sentence" DATASET_71="/gpfs/alpine/med106/world-shared/irl1/rhel8/fork-megatron/preprocess_data/pm071_abstract_sentence" DATASET_72="/gpfs/alpine/med106/world-shared/irl1/rhel8/fork-megatron/preprocess_data/pm072_abstract_sentence" DATASET_73="/gpfs/alpine/med106/world-shared/irl1/rhel8/fork-megatron/preprocess_data/pm073_abstract_sentence" DATASET_74="/gpfs/alpine/med106/world-shared/irl1/rhel8/fork-megatron/preprocess_data/pm074_abstract_sentence" DATASET_75="/gpfs/alpine/med106/world-shared/irl1/rhel8/fork-megatron/preprocess_data/pm075_abstract_sentence" DATASET_76="/gpfs/alpine/med106/world-shared/irl1/rhel8/fork-megatron/preprocess_data/pm076_abstract_sentence" DATASET_77="/gpfs/alpine/med106/world-shared/irl1/rhel8/fork-megatron/preprocess_data/pm077_abstract_sentence" DATASET_78="/gpfs/alpine/med106/world-shared/irl1/rhel8/fork-megatron/preprocess_data/pm078_abstract_sentence" DATASET_79="/gpfs/alpine/med106/world-shared/irl1/rhel8/fork-megatron/preprocess_data/pm079_abstract_sentence" DATASET_80="/gpfs/alpine/med106/world-shared/irl1/rhel8/fork-megatron/preprocess_data/pm080_abstract_sentence" DATASET_81="/gpfs/alpine/med106/world-shared/irl1/rhel8/fork-megatron/preprocess_data/pm081_abstract_sentence" DATASET_82="/gpfs/alpine/med106/world-shared/irl1/rhel8/fork-megatron/preprocess_data/pm082_abstract_sentence" DATASET_83="/gpfs/alpine/med106/world-shared/irl1/rhel8/fork-megatron/preprocess_data/pm083_abstract_sentence" DATASET_84="/gpfs/alpine/med106/world-shared/irl1/rhel8/fork-megatron/preprocess_data/pm084_abstract_sentence" DATASET_85="/gpfs/alpine/med106/world-shared/irl1/rhel8/fork-megatron/preprocess_data/pm085_abstract_sentence" DATASET_86="/gpfs/alpine/med106/world-shared/irl1/rhel8/fork-megatron/preprocess_data/pm086_abstract_sentence" DATASET_87="/gpfs/alpine/med106/world-shared/irl1/rhel8/fork-megatron/preprocess_data/pm087_abstract_sentence" DATASET_88="/gpfs/alpine/med106/world-shared/irl1/rhel8/fork-megatron/preprocess_data/pm088_abstract_sentence" DATASET_89="/gpfs/alpine/med106/world-shared/irl1/rhel8/fork-megatron/preprocess_data/pm089_abstract_sentence" DATASET_90="/gpfs/alpine/med106/world-shared/irl1/rhel8/fork-megatron/preprocess_data/pm090_abstract_sentence" DATASET_91="/gpfs/alpine/med106/world-shared/irl1/rhel8/fork-megatron/preprocess_data/pm091_abstract_sentence" DATASET_92="/gpfs/alpine/med106/world-shared/irl1/rhel8/fork-megatron/preprocess_data/pm092_abstract_sentence" DATASET_93="/gpfs/alpine/med106/world-shared/irl1/rhel8/fork-megatron/preprocess_data/pm093_abstract_sentence" DATASET_94="/gpfs/alpine/med106/world-shared/irl1/rhel8/fork-megatron/preprocess_data/pm094_abstract_sentence" DATASET_95="/gpfs/alpine/med106/world-shared/irl1/rhel8/fork-megatron/preprocess_data/pm095_abstract_sentence" DATASET_96="/gpfs/alpine/med106/world-shared/irl1/rhel8/fork-megatron/preprocess_data/pm096_abstract_sentence" DATASET_97="/gpfs/alpine/med106/world-shared/irl1/rhel8/fork-megatron/preprocess_data/pm097_abstract_sentence" DATASET_98="/gpfs/alpine/med106/world-shared/irl1/rhel8/fork-megatron/preprocess_data/pm098_abstract_sentence" DATASET_99="/gpfs/alpine/med106/world-shared/irl1/rhel8/fork-megatron/preprocess_data/pm099_abstract_sentence" DATASET_100="/gpfs/alpine/med106/world-shared/irl1/rhel8/fork-megatron/preprocess_data/pm100_abstract_sentence" DATASET_101="/gpfs/alpine/med106/world-shared/irl1/rhel8/fork-megatron/preprocess_data/pm101_abstract_sentence" DATASET_102="/gpfs/alpine/med106/world-shared/irl1/rhel8/fork-megatron/preprocess_data/pm102_abstract_sentence" DATASET_103="/gpfs/alpine/med106/world-shared/irl1/rhel8/fork-megatron/preprocess_data/pm103_abstract_sentence" DATASET_104="/gpfs/alpine/med106/world-shared/irl1/rhel8/fork-megatron/preprocess_data/pm104_abstract_sentence" DATASET_105="/gpfs/alpine/med106/world-shared/irl1/rhel8/fork-megatron/preprocess_data/pm105_abstract_sentence" DATASET_106="/gpfs/alpine/med106/world-shared/irl1/rhel8/fork-megatron/preprocess_data/pm106_abstract_sentence" DATASET_107="/gpfs/alpine/med106/world-shared/irl1/rhel8/fork-megatron/preprocess_data/pm107_abstract_sentence" DATASET_108="/gpfs/alpine/med106/world-shared/irl1/rhel8/fork-megatron/preprocess_data/pm108_abstract_sentence" DATASET_109="/gpfs/alpine/med106/world-shared/irl1/rhel8/fork-megatron/preprocess_data/pm109_abstract_sentence" DATASET_110="/gpfs/alpine/med106/world-shared/irl1/rhel8/fork-megatron/preprocess_data/pm110_abstract_sentence" DATASET_111="/gpfs/alpine/med106/world-shared/irl1/rhel8/fork-megatron/preprocess_data/pm111_abstract_sentence" DATASET="1.0 ${DATASET_0} 1.0 ${DATASET_1} 1.0 ${DATASET_2} 1.0 ${DATASET_3} 1.0 ${DATASET_4} 1.0 ${DATASET_5} 1.0 ${DATASET_6} 1.0 ${DATASET_7} 1.0 ${DATASET_8} 1.0 ${DATASET_9} 1.0 ${DATASET_10} 1.0 ${DATASET_11} 1.0 ${DATASET_12} 1.0 ${DATASET_13} 1.0 ${DATASET_14} 1.0 ${DATASET_15} 1.0 ${DATASET_16} 1.0 ${DATASET_17} 1.0 ${DATASET_18} 1.0 ${DATASET_19} 1.0 ${DATASET_20} 1.0 ${DATASET_21} 1.0 ${DATASET_22} 1.0 ${DATASET_23} 1.0 ${DATASET_24} 1.0 ${DATASET_25} 1.0 ${DATASET_26} 1.0 ${DATASET_27} 1.0 ${DATASET_28} 1.0 ${DATASET_29} 1.0 ${DATASET_30} 1.0 ${DATASET_31} 1.0 ${DATASET_32} 1.0 ${DATASET_33} 1.0 ${DATASET_34} 1.0 ${DATASET_35} 1.0 ${DATASET_36} 1.0 ${DATASET_37} 1.0 ${DATASET_38} 1.0 ${DATASET_39} 1.0 ${DATASET_40} 1.0 ${DATASET_41} 1.0 ${DATASET_42} 1.0 ${DATASET_43} 1.0 ${DATASET_44} 1.0 ${DATASET_45} 1.0 ${DATASET_46} 1.0 ${DATASET_47} 1.0 ${DATASET_48} 1.0 ${DATASET_49} 1.0 ${DATASET_50} 1.0 ${DATASET_51} 1.0 ${DATASET_52} 1.0 ${DATASET_53} 1.0 ${DATASET_54} 1.0 ${DATASET_55} 1.0 ${DATASET_56} 1.0 ${DATASET_57} 1.0 ${DATASET_58} 1.0 ${DATASET_59} 1.0 ${DATASET_60} 1.0 ${DATASET_61} 1.0 ${DATASET_62} 1.0 ${DATASET_63} 1.0 ${DATASET_64} 1.0 ${DATASET_65} 1.0 ${DATASET_66} 1.0 ${DATASET_67} 1.0 ${DATASET_68} 1.0 ${DATASET_69} 1.0 ${DATASET_70} 1.0 ${DATASET_71} 1.0 ${DATASET_72} 1.0 ${DATASET_73} 1.0 ${DATASET_74} 1.0 ${DATASET_75} 1.0 ${DATASET_76} 1.0 ${DATASET_77} 1.0 ${DATASET_78} 1.0 ${DATASET_79} 1.0 ${DATASET_80} 1.0 ${DATASET_81} 1.0 ${DATASET_82} 1.0 ${DATASET_83} 1.0 ${DATASET_84} 1.0 ${DATASET_85} 1.0 ${DATASET_86} 1.0 ${DATASET_87} 1.0 ${DATASET_88} 1.0 ${DATASET_89} 1.0 ${DATASET_90} 1.0 ${DATASET_91} 1.0 ${DATASET_92} 1.0 ${DATASET_93} 1.0 ${DATASET_94} 1.0 ${DATASET_95} 1.0 ${DATASET_96} 1.0 ${DATASET_97} 1.0 ${DATASET_98} 1.0 ${DATASET_99} 1.0 ${DATASET_100} 1.0 ${DATASET_101} 1.0 ${DATASET_102} 1.0 ${DATASET_103} 1.0 ${DATASET_104} 1.0 ${DATASET_105} 1.0 ${DATASET_106} 1.0 ${DATASET_107} 1.0 ${DATASET_108} 1.0 ${DATASET_109} 1.0 ${DATASET_110} 1.0 ${DATASET_111}" export VOCAB_FILE=/gpfs/alpine/world-shared/med106/g8o/pubmed_bert-vocab.txt export CHECKPOINT_PATH=/gpfs/alpine/med106/world-shared/irl1/rhel8/fork-megatron/2N6D2T1P jsrun --smpiargs="-disable_gpu_hooks" -n $nnodes -r 1 -g 6 -a 6 -c 42 python pretrain_bert.py \ --tensor-model-parallel-size 2 \ --pipeline-model-parallel-size 1 \ --num-layers 24 \ --hidden-size 1024 \ --num-attention-heads 16 \ --micro-batch-size 2 \ --global-batch-size 12 \ --seq-length 512 \ --max-position-embeddings 512 \ --train-iters 2000 \ --lr-decay-iters 990 \ --save $CHECKPOINT_PATH \ --data-path $DATASET \ --vocab-file $VOCAB_FILE \ --data-impl mmap \ --split 949,50,1 \ --distributed-backend nccl \ --lr 0.0001 \ --min-lr 0.00001 \ --lr-decay-style linear \ --lr-warmup-fraction .01 \ --weight-decay 1e-2 \ --clip-grad 1.0 \ --log-interval 100 \ --save-interval 10000 \ --eval-interval 1000 \ --eval-iters 10 \ --fp16 #--DDP-impl torch \