cudaPackages.saxpy: init at unstable-2023-07-11 (251d3166) · Commits · nix / nixpkgs

pkgs/development/compilers/cudatoolkit/extension.nix

+2 −0

Original line number	Diff line number	Diff line
		@@ -71,4 +71,6 @@ in
		cudaFlags
		markForCudatoolkitRootHook
		setupCudaHook;

		saxpy = final.callPackage ./saxpy { };
		}

pkgs/development/compilers/cudatoolkit/saxpy/CMakeLists.txt

0 → 100644

+12 −0

Original line number	Diff line number	Diff line
		cmake_minimum_required(VERSION 3.25)
		project(saxpy LANGUAGES CXX CUDA)

		find_package(CUDAToolkit REQUIRED COMPONENTS cudart cublas)

		add_executable(saxpy saxpy.cu)
		target_link_libraries(saxpy PUBLIC CUDA::cublas CUDA::cudart m)
		target_compile_features(saxpy PRIVATE cxx_std_14)
		target_compile_options(saxpy PRIVATE $<$<COMPILE_LANGUAGE:CUDA>:
		--expt-relaxed-constexpr>)

		install(TARGETS saxpy)

pkgs/development/compilers/cudatoolkit/saxpy/default.nix

0 → 100644

+50 −0

Original line number	Diff line number	Diff line
		{ autoAddOpenGLRunpathHook
		, backendStdenv
		, cmake
		, cuda_cccl
		, cuda_cudart
		, cudaFlags
		, cuda_nvcc
		, lib
		, libcublas
		, setupCudaHook
		, stdenv
		}:

		backendStdenv.mkDerivation {
		pname = "saxpy";
		version = "unstable-2023-07-11";

		src = ./.;

		buildInputs = [
		libcublas
		cuda_cudart
		cuda_cccl
		];
		nativeBuildInputs = [
		cmake

		# NOTE: this needs to be pkgs.buildPackages.cudaPackages_XX_Y.cuda_nvcc for
		# cross-compilation to work. This should work automatically once we move to
		# spliced scopes. Delete this comment once that happens
		cuda_nvcc

		# Alternatively, we could remove the propagated hook from cuda_nvcc and add
		# directly:
		# setupCudaHook
		autoAddOpenGLRunpathHook
		];

		cmakeFlags = [
		"-DCMAKE_VERBOSE_MAKEFILE=ON"
		"-DCMAKE_CUDA_ARCHITECTURES=${with cudaFlags; builtins.concatStringsSep ";" (map dropDot cudaCapabilities)}"
		];

		meta = {
		description = "A simple (Single-precision AX Plus Y) FindCUDAToolkit.cmake example for testing cross-compilation";
		license = lib.licenses.mit;
		maintainers = lib.teams.cuda.members;
		platforms = lib.platforms.unix;
		};
		}

pkgs/development/compilers/cudatoolkit/saxpy/saxpy.cu

0 → 100644

+68 −0

Original line number	Diff line number	Diff line
		#include <cublas_v2.h>
		#include <cuda_runtime.h>
		#include <vector>

		#include <stdio.h>

		static inline void check(cudaError_t err, const char *context) {
		if (err != cudaSuccess) {
		fprintf(stderr, "CUDA error at %s: %s\n", context, cudaGetErrorString(err));
		std::exit(EXIT_FAILURE);
		}
		}

		#define CHECK(x) check(x, #x)

		__global__ void saxpy(int n, float a, float x, float y) {
		int i = blockIdx.x * blockDim.x + threadIdx.x;
		if (i < n)
		y[i] = a * x[i] + y[i];
		}

		int main(void) {
		setbuf(stderr, NULL);
		fprintf(stderr, "Start\n");

		int rtVersion, driverVersion;
		CHECK(cudaRuntimeGetVersion(&rtVersion));
		CHECK(cudaDriverGetVersion(&driverVersion));

		fprintf(stderr, "Runtime version: %d\n", rtVersion);
		fprintf(stderr, "Driver version: %d\n", driverVersion);

		constexpr int N = 1 << 10;

		std::vector<float> xHost(N), yHost(N);
		for (int i = 0; i < N; i++) {
		xHost[i] = 1.0f;
		yHost[i] = 2.0f;
		}

		fprintf(stderr, "Host memory initialized, copying to the device\n");
		fflush(stderr);

		float xDevice, yDevice;
		CHECK(cudaMalloc(&xDevice, N * sizeof(float)));
		CHECK(cudaMalloc(&yDevice, N * sizeof(float)));

		CHECK(cudaMemcpy(xDevice, xHost.data(), N * sizeof(float),
		cudaMemcpyHostToDevice));
		CHECK(cudaMemcpy(yDevice, yHost.data(), N * sizeof(float),
		cudaMemcpyHostToDevice));
		fprintf(stderr, "Scheduled a cudaMemcpy, calling the kernel\n");

		saxpy<<<(N + 255) / 256, 256>>>(N, 2.0f, xDevice, yDevice);
		fprintf(stderr, "Scheduled a kernel call\n");
		CHECK(cudaGetLastError());

		CHECK(cudaMemcpy(yHost.data(), yDevice, N * sizeof(float),
		cudaMemcpyDeviceToHost));

		float maxError = 0.0f;
		for (int i = 0; i < N; i++)
		maxError = max(maxError, abs(yHost[i] - 4.0f));
		fprintf(stderr, "Max error: %f\n", maxError);

		CHECK(cudaFree(xDevice));
		CHECK(cudaFree(yDevice));
		}

Admin message