avoid non deterministic arch order in compilation (37d77ca0) · Commits · candle / Megatron-LM

megatron/fused_kernels/init.py

+8 −0

Original line number	Diff line number	Diff line
		@@ -15,8 +15,16 @@

		import pathlib
		import subprocess
		import os
		from torch.utils import cpp_extension

		# Setting this param to a list has a problem of generating
		# different compilation commands (with diferent order of architectures)
		# and leading to recompilation of fused kernels.
		# set it to empty string to avoid recompilation
		# and assign arch flags explicity in extra_cuda_cflags below
		os.environ["TORCH_CUDA_ARCH_LIST"] = ""

		def get_cuda_bare_metal_version(cuda_dir):
		raw_output = subprocess.check_output([cuda_dir + "/bin/nvcc", "-V"],
		universal_newlines=True)