Commit a6c15db3 authored by Cianciosa, Mark's avatar Cianciosa, Mark
Browse files

Merge branch 'fast_math' into 'main'

Enable fastmath optimization of JIT code. Units tests show no regressions but...

See merge request !62
parents bece6c7b d2a6148b
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -149,6 +149,7 @@ namespace gpu {
            llvm::SmallVector<const char *, 8> args = split_string(arg_string);
            args.push_back(filename.c_str());
#ifdef NDEBUG
            args.push_back("-ffast-math");
            args.push_back("-O3");
#else
            args.push_back("-debug-info-kind=standalone");
+4 −3
Original line number Diff line number Diff line
@@ -232,14 +232,15 @@ namespace gpu {
            }

            const std::string temp = arch.str();
            std::array<const char *, 7> options({
            std::array<const char *, 8> options({
                temp.c_str(),
                "--std=c++17",
                "--relocatable-device-code=false",
                "--include-path=" CUDA_INCLUDE,
                "--include-path=" HEADER_DIR,
                "--extra-device-vectorization",
                "--device-as-default-execution-space"
                "--device-as-default-execution-space",
                "--use_fast_math"
            });

            if (nvrtcCompileProgram(kernel_program, options.size(), options.data())) {
@@ -283,7 +284,7 @@ namespace gpu {
                reinterpret_cast<void *> (0)
            };

            check_error(cuModuleLoadDataEx(&module, ptx, 1,
            check_error(cuModuleLoadDataEx(&module, ptx, module_options.size(),
                                           module_options.data(),
                                           module_values.data()), "cuModuleLoadDataEx");

+1 −1
Original line number Diff line number Diff line
@@ -301,7 +301,7 @@ namespace gpu {
//------------------------------------------------------------------------------
        MTLCompileOptions *compile_options() {
            MTLCompileOptions *options = [MTLCompileOptions new];
            options.fastMathEnabled = NO;
            options.fastMathEnabled = YES;
            return options;
        }