From d2a6148b27aee81e34d0110aee9c5e0545a787ac Mon Sep 17 00:00:00 2001 From: cianciosa Date: Fri, 14 Feb 2025 16:26:02 -0500 Subject: [PATCH] Enable fastmath optimization of JIT code. Units tests show no regressions but this has not been tested on cuda yet. --- graph_framework/cpu_context.hpp | 1 + graph_framework/cuda_context.hpp | 7 ++++--- graph_framework/metal_context.hpp | 2 +- 3 files changed, 6 insertions(+), 4 deletions(-) diff --git a/graph_framework/cpu_context.hpp b/graph_framework/cpu_context.hpp index 4fa0968..5c2a098 100644 --- a/graph_framework/cpu_context.hpp +++ b/graph_framework/cpu_context.hpp @@ -149,6 +149,7 @@ namespace gpu { llvm::SmallVector args = split_string(arg_string); args.push_back(filename.c_str()); #ifdef NDEBUG + args.push_back("-ffast-math"); args.push_back("-O3"); #else args.push_back("-debug-info-kind=standalone"); diff --git a/graph_framework/cuda_context.hpp b/graph_framework/cuda_context.hpp index 044b4cb..c954d19 100644 --- a/graph_framework/cuda_context.hpp +++ b/graph_framework/cuda_context.hpp @@ -232,14 +232,15 @@ namespace gpu { } const std::string temp = arch.str(); - std::array options({ + std::array options({ temp.c_str(), "--std=c++17", "--relocatable-device-code=false", "--include-path=" CUDA_INCLUDE, "--include-path=" HEADER_DIR, "--extra-device-vectorization", - "--device-as-default-execution-space" + "--device-as-default-execution-space", + "--use_fast_math" }); if (nvrtcCompileProgram(kernel_program, options.size(), options.data())) { @@ -283,7 +284,7 @@ namespace gpu { reinterpret_cast (0) }; - check_error(cuModuleLoadDataEx(&module, ptx, 1, + check_error(cuModuleLoadDataEx(&module, ptx, module_options.size(), module_options.data(), module_values.data()), "cuModuleLoadDataEx"); diff --git a/graph_framework/metal_context.hpp b/graph_framework/metal_context.hpp index 04faba2..c796108 100644 --- a/graph_framework/metal_context.hpp +++ b/graph_framework/metal_context.hpp @@ -301,7 +301,7 @@ namespace gpu { //------------------------------------------------------------------------------ MTLCompileOptions *compile_options() { MTLCompileOptions *options = [MTLCompileOptions new]; - options.fastMathEnabled = NO; + options.fastMathEnabled = YES; return options; } -- GitLab