Commit 1bba57c0 authored by cianciosa's avatar cianciosa
Browse files

Fix cuda issuew where constant literals would cause an ambiguous fma type...

Fix cuda issuew where constant literals would cause an ambiguous fma type selection. Reduce divides of divides.
parent a9cb823a
Loading
Loading
Loading
Loading
+174 −0
Original line number Diff line number Diff line
@@ -2283,6 +2283,93 @@
				CODE_SIGN_STYLE = Automatic;
				DEAD_CODE_STRIPPING = YES;
				MACOSX_DEPLOYMENT_TARGET = 13.3;
				OTHER_LDFLAGS = (
					"-lnetcdf",
					"-ld_classic",
					"-L/Users/m4c/Projects/graph_framework/build/_deps/llvm-build/lib",
					"-lz",
					"-lLLVMCoverage",
					"-lLLVMSupport",
					"-lLLVMDebugInfoCodeView",
					"-lLLVMRemarks",
					"-lLLVMJITLink",
					"-lLLVMLinker",
					"-lLLVMTextAPI",
					"-lLLVMRuntimeDyld",
					"-lLLVMOrcShared",
					"-lLLVMOrcDebugging",
					"-lLLVMOrcTargetProcess",
					"-lLLVMOrcJIT",
					"-lLLVMHipStdPar",
					"-lLLVMAggressiveInstCombine",
					"-lLLVMVectorize",
					"-lLLVMAsmParser",
					"-lLLVMOption",
					"-lLLVMLTO",
					"-lLLVMObject",
					"-lLLVMWindowsDriver",
					"-lLLVMDemangle",
					"-lLLVMIRReader",
					"-lLLVMIRPrinter",
					"-lLLVMInstCombine",
					"-lLLVMBinaryFormat",
					"-lLLVMCoroutines",
					"-lLLVMBitstreamReader",
					"-lLLVMBitReader",
					"-lLLVMBitWriter",
					"-lLLVMDebugInfoDWARF",
					"-lLLVMInstrumentation",
					"-lLLVMCFGuard",
					"-lLLVMObjCARCOpts",
					"-lLLVMipo",
					"-lLLVMGlobalISel",
					"-lLLVMExecutionEngine",
					"-lLLVMFrontendDriver",
					"-lLLVMFrontendHLSL",
					"-lLLVMFrontendOpenMP",
					"-lLLVMFrontendOffloading",
					"-lLLVMSelectionDAG",
					"-lLLVMProfileData",
					"-lLLVMAnalysis",
					"-lLLVMScalarOpts",
					"-lLLVMCodeGenTypes",
					"-lLLVMCodeGenData",
					"-lLLVMCodeGen",
					"-lLLVMTargetParser",
					"-lLLVMScalarOpts",
					"-lLLVMTarget",
					"-lLLVMTransformUtils",
					"-lLLVMPasses",
					"-lLLVMSupport",
					"-lLLVMMCParser",
					"-lLLVMMC",
					"-lLLVMCore",
					"-lLLVMAsmPrinter",
					"-lLLVMAArch64Utils",
					"-lLLVMAArch64Info",
					"-lLLVMAArch64Desc",
					"-lLLVMAArch64AsmParser",
					"-lLLVMCGData",
					"-lLLVMSandboxIR",
					"-lLLVMFrontendAtomic",
					"-lLLVMAArch64CodeGen",
					"-lclangFrontend",
					"-lclangBasic",
					"-lclangEdit",
					"-lclangLex",
					"-lclangDriver",
					"-lclangSerialization",
					"-lclangAST",
					"-lclangSema",
					"-lclangAnalysis",
					"-lclangASTMatchers",
					"-lclangSupport",
					"-lclangParse",
					"-lclangAPINotes",
					"-lclangCodeGen",
					"-rpath",
					/usr/local/lib,
				);
				PRODUCT_NAME = "$(TARGET_NAME)";
				SDKROOT = macosx;
			};
@@ -2296,6 +2383,93 @@
				CODE_SIGN_STYLE = Automatic;
				DEAD_CODE_STRIPPING = YES;
				MACOSX_DEPLOYMENT_TARGET = 13.3;
				OTHER_LDFLAGS = (
					"-lnetcdf",
					"-ld_classic",
					"-L/Users/m4c/Projects/graph_framework/build/_deps/llvm-build/lib",
					"-lz",
					"-lLLVMCoverage",
					"-lLLVMSupport",
					"-lLLVMDebugInfoCodeView",
					"-lLLVMRemarks",
					"-lLLVMJITLink",
					"-lLLVMLinker",
					"-lLLVMTextAPI",
					"-lLLVMRuntimeDyld",
					"-lLLVMOrcShared",
					"-lLLVMOrcDebugging",
					"-lLLVMOrcTargetProcess",
					"-lLLVMOrcJIT",
					"-lLLVMHipStdPar",
					"-lLLVMAggressiveInstCombine",
					"-lLLVMVectorize",
					"-lLLVMAsmParser",
					"-lLLVMOption",
					"-lLLVMLTO",
					"-lLLVMObject",
					"-lLLVMWindowsDriver",
					"-lLLVMDemangle",
					"-lLLVMIRReader",
					"-lLLVMIRPrinter",
					"-lLLVMInstCombine",
					"-lLLVMBinaryFormat",
					"-lLLVMCoroutines",
					"-lLLVMBitstreamReader",
					"-lLLVMBitReader",
					"-lLLVMBitWriter",
					"-lLLVMDebugInfoDWARF",
					"-lLLVMInstrumentation",
					"-lLLVMCFGuard",
					"-lLLVMObjCARCOpts",
					"-lLLVMipo",
					"-lLLVMGlobalISel",
					"-lLLVMExecutionEngine",
					"-lLLVMFrontendDriver",
					"-lLLVMFrontendHLSL",
					"-lLLVMFrontendOpenMP",
					"-lLLVMFrontendOffloading",
					"-lLLVMSelectionDAG",
					"-lLLVMProfileData",
					"-lLLVMAnalysis",
					"-lLLVMScalarOpts",
					"-lLLVMCodeGenTypes",
					"-lLLVMCodeGenData",
					"-lLLVMCodeGen",
					"-lLLVMTargetParser",
					"-lLLVMScalarOpts",
					"-lLLVMTarget",
					"-lLLVMTransformUtils",
					"-lLLVMPasses",
					"-lLLVMSupport",
					"-lLLVMMCParser",
					"-lLLVMMC",
					"-lLLVMCore",
					"-lLLVMAsmPrinter",
					"-lLLVMAArch64Utils",
					"-lLLVMAArch64Info",
					"-lLLVMAArch64Desc",
					"-lLLVMAArch64AsmParser",
					"-lLLVMCGData",
					"-lLLVMSandboxIR",
					"-lLLVMFrontendAtomic",
					"-lLLVMAArch64CodeGen",
					"-lclangFrontend",
					"-lclangBasic",
					"-lclangEdit",
					"-lclangLex",
					"-lclangDriver",
					"-lclangSerialization",
					"-lclangAST",
					"-lclangSema",
					"-lclangAnalysis",
					"-lclangASTMatchers",
					"-lclangSupport",
					"-lclangParse",
					"-lclangAPINotes",
					"-lclangCodeGen",
					"-rpath",
					/usr/local/lib,
				);
				PRODUCT_NAME = "$(TARGET_NAME)";
				SDKROOT = macosx;
			};
+13 −3
Original line number Diff line number Diff line
@@ -2883,10 +2883,15 @@ namespace graph {
            }

//  (a/b)/c -> a/(b*c)
//  a/(b/c) -> a*c/b
            auto ld = divide_cast(this->left);
            auto rd = divide_cast(this->right);
            if (ld.get()) {
                return ld->get_left()/(ld->get_right()*this->right);
            }
            if (rd.get()) {
                return this->left*rd->get_right()/rd->get_left();
            }

//  Power reductions.
            if (is_variable_combineable(this->left,
@@ -3223,7 +3228,6 @@ namespace graph {

//  exp(a)/(c/exp(b)) -> (exp(a)*exp(b))/c
//  exp(a)/(exp(b)/c) -> c*(exp(a)/exp(b))
            auto rd = divide_cast(this->right);
            if (rd.get() && lexp.get()) {
                auto rdre = exp_cast(rd->get_right());
                if (rdre.get()) {
@@ -3836,6 +3840,12 @@ namespace graph {

//  Common denominator reductions.
            if (ld.get() && rd.get()) {
//  fma(b/c,a,b,d) -> b(a/c + 1/d)
                if (ld->get_left()->is_match(rd->get_left())) {
                    return ld->get_left()*(this->middle/ld->get_right() +
                                           1.0/rd->get_right());
                }

//  fma(a/(b*c),d,e/c) -> fma(a,d,e*b)/(b*c)
//  fma(a/(c*b),d,e/c) -> fma(a,d,e*b)/(c*b)
//  fma(a/c,d,e/(c*b)) -> fma(a*b,d,e)/(b*c)
+3 −2
Original line number Diff line number Diff line
@@ -464,7 +464,8 @@ namespace graph {
                                    + jit::format_to_string(this->evaluate().at(0))
                                    + ")";
                } else {
                    registers[this] = jit::format_to_string(this->evaluate().at(0));
                    registers[this] = "(" + jit::get_type_string<T> () + ")"
                                    + jit::format_to_string(this->evaluate().at(0));
                }
#endif
            }
+3 −3
Original line number Diff line number Diff line
@@ -60,7 +60,7 @@ void run_korc() {
            
            auto gamma = graph::variable<T> (local_num_particles, "\\gamma");
            
            auto dt = graph::constant<T> (0.25);
            auto dt = graph::constant<T> (0.5);
            
            auto gamma_init = 1.0/graph::sqrt(1.0 - u_vec->dot(u_vec));
            
@@ -143,11 +143,11 @@ void run_korc() {
            const timeing::measure_diagnostic t_run("Run Time");
            work.pre_run();
            for (size_t i = 0; i < 1000000; i++) {
                /*sync.join();
                sync.join();
                work.wait();
                sync = std::thread([&file, &dataset] () -> void {
                    dataset.write(file);
                });*/
                });
                
                work.run();
            }
+7 −4
Original line number Diff line number Diff line
@@ -2597,6 +2597,9 @@ template<jit::float_scalar T> void test_divide() {
//  (c*a)*b/c -> a*b
    assert((((c*a)*b)/c)->is_match(a*b) && "Expected a*b");

//  a/(b/c) -> a*c/b
    assert((a/(b/c))->is_match(a*c/b) && "Expected a*b/c");

//  (a*b*c)^2/a^2 -> (b*c)^2
//  (a*b*c)^2/(a^2*d) -> (b*c)^2/d
//  (e*(a*b*c)^2)/(a^2*d) -> e*(b*c)^2/d
@@ -2713,9 +2716,9 @@ template<jit::float_scalar T> void test_fma() {
           "Expected a value of one.");

//  Test reduction.
    auto var_a = graph::variable<T> (1, "");
    auto var_b = graph::variable<T> (1, "");
    auto var_c = graph::variable<T> (1, "");
    auto var_a = graph::variable<T> (1, "a");
    auto var_b = graph::variable<T> (1, "b");
    auto var_c = graph::variable<T> (1, "c");

//  fma(1,a,b) = a + b
    auto one_times_vara_plus_varb = graph::fma(one, var_a, var_b);
@@ -2764,7 +2767,7 @@ template<jit::float_scalar T> void test_fma() {
           "Expected common var_b");

//  fma(a, b, fma(c, b, d)) -> fma(b, a + c, d)
    auto var_d = graph::variable<T> (1, "");
    auto var_d = graph::variable<T> (1, "d");
    auto match1 = graph::fma(var_b, var_a + var_c, var_d);
    auto nested_fma1 = graph::fma(var_a, var_b, 
                                  graph::fma(var_c, var_b, var_d));