diff --git a/graph_framework/cpu_context.hpp b/graph_framework/cpu_context.hpp
index 4fa096858b8bd7369e9bdcd2fe713e4135282a52..5c2a098edab47e93f9a5968dae615fa533044518 100644
--- a/graph_framework/cpu_context.hpp
+++ b/graph_framework/cpu_context.hpp
@@ -149,6 +149,7 @@ namespace gpu {
             llvm::SmallVector<const char *, 8> args = split_string(arg_string);
             args.push_back(filename.c_str());
 #ifdef NDEBUG
+            args.push_back("-ffast-math");
             args.push_back("-O3");
 #else
             args.push_back("-debug-info-kind=standalone");
diff --git a/graph_framework/cuda_context.hpp b/graph_framework/cuda_context.hpp
index 044b4cb954eb6f17337094215de6f96e0d08544b..c954d19753b9eec94e106190f86b4eef4fffe441 100644
--- a/graph_framework/cuda_context.hpp
+++ b/graph_framework/cuda_context.hpp
@@ -232,14 +232,15 @@ namespace gpu {
             }
 
             const std::string temp = arch.str();
-            std::array<const char *, 7> options({
+            std::array<const char *, 8> options({
                 temp.c_str(),
                 "--std=c++17",
                 "--relocatable-device-code=false",
                 "--include-path=" CUDA_INCLUDE,
                 "--include-path=" HEADER_DIR,
                 "--extra-device-vectorization",
-                "--device-as-default-execution-space"
+                "--device-as-default-execution-space",
+                "--use_fast_math"
             });
 
             if (nvrtcCompileProgram(kernel_program, options.size(), options.data())) {
@@ -283,7 +284,7 @@ namespace gpu {
                 reinterpret_cast<void *> (0)
             };
 
-            check_error(cuModuleLoadDataEx(&module, ptx, 1,
+            check_error(cuModuleLoadDataEx(&module, ptx, module_options.size(),
                                            module_options.data(),
                                            module_values.data()), "cuModuleLoadDataEx");
 
diff --git a/graph_framework/metal_context.hpp b/graph_framework/metal_context.hpp
index 04faba2a1100c5b9cff150df44f9c3e770e85b7f..c796108b668e10470767c7d1021babbf70c186ce 100644
--- a/graph_framework/metal_context.hpp
+++ b/graph_framework/metal_context.hpp
@@ -301,7 +301,7 @@ namespace gpu {
 //------------------------------------------------------------------------------
         MTLCompileOptions *compile_options() {
             MTLCompileOptions *options = [MTLCompileOptions new];
-            options.fastMathEnabled = NO;
+            options.fastMathEnabled = YES;
             return options;
         }