Commit d2a6148b authored by cianciosa's avatar cianciosa
Browse files

Enable fastmath optimization of JIT code. Units tests show no regressions but...

Enable fastmath optimization of JIT code. Units tests show no regressions but this has not been tested on cuda yet.
parent bece6c7b
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -149,6 +149,7 @@ namespace gpu {
            llvm::SmallVector<const char *, 8> args = split_string(arg_string);
            args.push_back(filename.c_str());
#ifdef NDEBUG
            args.push_back("-ffast-math");
            args.push_back("-O3");
#else
            args.push_back("-debug-info-kind=standalone");
+4 −3
Original line number Diff line number Diff line
@@ -232,14 +232,15 @@ namespace gpu {
            }

            const std::string temp = arch.str();
            std::array<const char *, 7> options({
            std::array<const char *, 8> options({
                temp.c_str(),
                "--std=c++17",
                "--relocatable-device-code=false",
                "--include-path=" CUDA_INCLUDE,
                "--include-path=" HEADER_DIR,
                "--extra-device-vectorization",
                "--device-as-default-execution-space"
                "--device-as-default-execution-space",
                "--use_fast_math"
            });

            if (nvrtcCompileProgram(kernel_program, options.size(), options.data())) {
@@ -283,7 +284,7 @@ namespace gpu {
                reinterpret_cast<void *> (0)
            };

            check_error(cuModuleLoadDataEx(&module, ptx, 1,
            check_error(cuModuleLoadDataEx(&module, ptx, module_options.size(),
                                           module_options.data(),
                                           module_values.data()), "cuModuleLoadDataEx");

+1 −1
Original line number Diff line number Diff line
@@ -301,7 +301,7 @@ namespace gpu {
//------------------------------------------------------------------------------
        MTLCompileOptions *compile_options() {
            MTLCompileOptions *options = [MTLCompileOptions new];
            options.fastMathEnabled = NO;
            options.fastMathEnabled = YES;
            return options;
        }