diff --git a/graph_framework/cpu_context.hpp b/graph_framework/cpu_context.hpp index 4fa096858b8bd7369e9bdcd2fe713e4135282a52..5c2a098edab47e93f9a5968dae615fa533044518 100644 --- a/graph_framework/cpu_context.hpp +++ b/graph_framework/cpu_context.hpp @@ -149,6 +149,7 @@ namespace gpu { llvm::SmallVector args = split_string(arg_string); args.push_back(filename.c_str()); #ifdef NDEBUG + args.push_back("-ffast-math"); args.push_back("-O3"); #else args.push_back("-debug-info-kind=standalone"); diff --git a/graph_framework/cuda_context.hpp b/graph_framework/cuda_context.hpp index 044b4cb954eb6f17337094215de6f96e0d08544b..c954d19753b9eec94e106190f86b4eef4fffe441 100644 --- a/graph_framework/cuda_context.hpp +++ b/graph_framework/cuda_context.hpp @@ -232,14 +232,15 @@ namespace gpu { } const std::string temp = arch.str(); - std::array options({ + std::array options({ temp.c_str(), "--std=c++17", "--relocatable-device-code=false", "--include-path=" CUDA_INCLUDE, "--include-path=" HEADER_DIR, "--extra-device-vectorization", - "--device-as-default-execution-space" + "--device-as-default-execution-space", + "--use_fast_math" }); if (nvrtcCompileProgram(kernel_program, options.size(), options.data())) { @@ -283,7 +284,7 @@ namespace gpu { reinterpret_cast (0) }; - check_error(cuModuleLoadDataEx(&module, ptx, 1, + check_error(cuModuleLoadDataEx(&module, ptx, module_options.size(), module_options.data(), module_values.data()), "cuModuleLoadDataEx"); diff --git a/graph_framework/metal_context.hpp b/graph_framework/metal_context.hpp index 04faba2a1100c5b9cff150df44f9c3e770e85b7f..c796108b668e10470767c7d1021babbf70c186ce 100644 --- a/graph_framework/metal_context.hpp +++ b/graph_framework/metal_context.hpp @@ -301,7 +301,7 @@ namespace gpu { //------------------------------------------------------------------------------ MTLCompileOptions *compile_options() { MTLCompileOptions *options = [MTLCompileOptions new]; - options.fastMathEnabled = NO; + options.fastMathEnabled = YES; return options; }