Commit 4f3a64a6 authored by Gounley, John's avatar Gounley, John
Browse files

Stash slurm script

parent 30bc9021
Loading
Loading
Loading
Loading
Loading
−644 B

File deleted.

+0 −19
Original line number Diff line number Diff line
# ninja log v5
0	30398	1648152419124710894	scaled_upper_triang_masked_softmax.o	2bbbee3ad21ff364
0	95676	1648152484366679873	scaled_upper_triang_masked_softmax_hip.cuda.o	3d00e903ef9da0e2
95676	95972	1648152484696107911	scaled_upper_triang_masked_softmax_cuda.so	30899eb9ae554dba
0	31042	1648152515842879231	scaled_masked_softmax.o	2e7fd1ff56a6385
0	91820	1648152576584912073	scaled_masked_softmax_hip.cuda.o	d5976b0b1e7335ce
91820	92120	1648152576918573467	scaled_masked_softmax_cuda.so	b51db70916400fda
0	30570	1648152607609532922	layer_norm_cuda.o	ac480ca7cf48ac12
0	54747	1648152631767985021	layer_norm_hip_kernel.cuda.o	69e6ad2229227d9c
54750	55040	1648152632076249846	fused_mix_prec_layer_norm_cuda.so	f176c6a9cc995b36
2	33075	1655304764109486547	scaled_upper_triang_masked_softmax.o	646ae4e890be9292
3	98835	1655304829844256564	scaled_upper_triang_masked_softmax_hip.cuda.o	dd0a0f302fdc3bd2
98835	114030	1655304831054806263	scaled_upper_triang_masked_softmax_cuda.so	a979577d0fda6f57
0	30880	1655304876071557826	scaled_masked_softmax.o	b05db2eb4ba0fabb
0	91253	1655304936415577613	scaled_masked_softmax_hip.cuda.o	66cab1ff267c9b5f
91254	103942	1655304937334091252	scaled_masked_softmax_cuda.so	471854a9206c81b4
0	32210	1655304981510355343	layer_norm_cuda.o	36e6abe9de9695d7
0	56394	1655305005677609918	layer_norm_hip_kernel.cuda.o	a2d2bda5cf306935
56396	69555	1655305006565399456	fused_mix_prec_layer_norm_cuda.so	bb2aa3351d50f3
+0 −28
Original line number Diff line number Diff line
ninja_required_version = 1.3
cxx = g++
nvcc = /opt/rocm-5.1.0/bin/hipcc

cflags = -DTORCH_EXTENSION_NAME=fused_mix_prec_layer_norm_cuda -DTORCH_API_INCLUDE_EXTENSION_H -DPYBIND11_COMPILER_TYPE=\"_gcc\" -DPYBIND11_STDLIB=\"_libstdcpp\" -DPYBIND11_BUILD_ABI=\"_cxxabi1016\" -I/gpfs/alpine/med106/scratch/xf9/Megatron-LM/megatron/fused_kernels -isystem /ccs/proj/med106/working_clone/lib/python3.8/site-packages/torch/include -isystem /ccs/proj/med106/working_clone/lib/python3.8/site-packages/torch/include/torch/csrc/api/include -isystem /ccs/proj/med106/working_clone/lib/python3.8/site-packages/torch/include/TH -isystem /ccs/proj/med106/working_clone/lib/python3.8/site-packages/torch/include/THC -isystem /ccs/proj/med106/working_clone/lib/python3.8/site-packages/torch/include/THH -isystem /opt/rocm-5.1.0/include -isystem /opt/rocm-5.1.0/miopen/include -isystem /ccs/proj/med106/working_clone/include/python3.8 -D_GLIBCXX_USE_CXX11_ABI=1 -fPIC -std=c++14 -O3 -D__HIP_PLATFORM_AMD__=1 -DCMAKE_CXX_COMPILER=g++ -DCMAKE_C_COMPILER=gcc
post_cflags = 
cuda_cflags = -DWITH_HIP -DTORCH_EXTENSION_NAME=fused_mix_prec_layer_norm_cuda -DTORCH_API_INCLUDE_EXTENSION_H -DPYBIND11_COMPILER_TYPE=\"_gcc\" -DPYBIND11_STDLIB=\"_libstdcpp\" -DPYBIND11_BUILD_ABI=\"_cxxabi1016\" -I/gpfs/alpine/med106/scratch/xf9/Megatron-LM/megatron/fused_kernels -isystem /ccs/proj/med106/working_clone/lib/python3.8/site-packages/torch/include -isystem /ccs/proj/med106/working_clone/lib/python3.8/site-packages/torch/include/torch/csrc/api/include -isystem /ccs/proj/med106/working_clone/lib/python3.8/site-packages/torch/include/TH -isystem /ccs/proj/med106/working_clone/lib/python3.8/site-packages/torch/include/THC -isystem /ccs/proj/med106/working_clone/lib/python3.8/site-packages/torch/include/THH -isystem /opt/rocm-5.1.0/include -isystem /opt/rocm-5.1.0/miopen/include -isystem /ccs/proj/med106/working_clone/include/python3.8 -D_GLIBCXX_USE_CXX11_ABI=1 -fPIC -std=c++14 -O3 -D__HIP_PLATFORM_AMD__=1 -DCMAKE_CXX_COMPILER=g++ -DCMAKE_C_COMPILER=gcc -fPIC -D__HIP_PLATFORM_HCC__=1 -DUSE_ROCM=1 -DCUDA_HAS_FP16=1 -D__HIP_NO_HALF_OPERATORS__=1 -D__HIP_NO_HALF_CONVERSIONS__=1 -O3 -D__HIP_PLATFORM_AMD__=1 --amdgpu-target=gfx900 --amdgpu-target=gfx906 --amdgpu-target=gfx908 --amdgpu-target=gfx90a -fno-gpu-rdc
cuda_post_cflags = 
ldflags = -shared -L/ccs/proj/med106/working_clone/lib/python3.8/site-packages/torch/lib -lc10 -lc10_hip -ltorch_cpu -ltorch_hip -ltorch -ltorch_python -L/opt/rocm-5.1.0/lib -lamdhip64

rule compile
  command = $cxx -MMD -MF $out.d $cflags -c $in -o $out $post_cflags
  depfile = $out.d
  deps = gcc

rule cuda_compile
  command = $nvcc  $cuda_cflags -c $in -o $out $cuda_post_cflags

rule link
  command = $cxx $in $ldflags -o $out

build layer_norm_cuda.o: compile /gpfs/alpine/med106/scratch/xf9/Megatron-LM/megatron/fused_kernels/layer_norm_cuda.cpp
build layer_norm_hip_kernel.cuda.o: cuda_compile /gpfs/alpine/med106/scratch/xf9/Megatron-LM/megatron/fused_kernels/layer_norm_hip_kernel.hip

build fused_mix_prec_layer_norm_cuda.so: link layer_norm_cuda.o layer_norm_hip_kernel.cuda.o

default fused_mix_prec_layer_norm_cuda.so
−607 KiB

File deleted.

−259 KiB

File deleted.

Loading