Commit d3e53a4d authored by Cianciosa, Mark's avatar Cianciosa, Mark
Browse files

Merge branch 'cuda_fix' into 'main'

Fix cuda issuew where constant literals would cause an ambiguous fma type...

See merge request !57
parents a9cb823a 5b58c0fc
Loading
Loading
Loading
Loading
+174 −0
Original line number Diff line number Diff line
@@ -2283,6 +2283,93 @@
				CODE_SIGN_STYLE = Automatic;
				DEAD_CODE_STRIPPING = YES;
				MACOSX_DEPLOYMENT_TARGET = 13.3;
				OTHER_LDFLAGS = (
					"-lnetcdf",
					"-ld_classic",
					"-L/Users/m4c/Projects/graph_framework/build/_deps/llvm-build/lib",
					"-lz",
					"-lLLVMCoverage",
					"-lLLVMSupport",
					"-lLLVMDebugInfoCodeView",
					"-lLLVMRemarks",
					"-lLLVMJITLink",
					"-lLLVMLinker",
					"-lLLVMTextAPI",
					"-lLLVMRuntimeDyld",
					"-lLLVMOrcShared",
					"-lLLVMOrcDebugging",
					"-lLLVMOrcTargetProcess",
					"-lLLVMOrcJIT",
					"-lLLVMHipStdPar",
					"-lLLVMAggressiveInstCombine",
					"-lLLVMVectorize",
					"-lLLVMAsmParser",
					"-lLLVMOption",
					"-lLLVMLTO",
					"-lLLVMObject",
					"-lLLVMWindowsDriver",
					"-lLLVMDemangle",
					"-lLLVMIRReader",
					"-lLLVMIRPrinter",
					"-lLLVMInstCombine",
					"-lLLVMBinaryFormat",
					"-lLLVMCoroutines",
					"-lLLVMBitstreamReader",
					"-lLLVMBitReader",
					"-lLLVMBitWriter",
					"-lLLVMDebugInfoDWARF",
					"-lLLVMInstrumentation",
					"-lLLVMCFGuard",
					"-lLLVMObjCARCOpts",
					"-lLLVMipo",
					"-lLLVMGlobalISel",
					"-lLLVMExecutionEngine",
					"-lLLVMFrontendDriver",
					"-lLLVMFrontendHLSL",
					"-lLLVMFrontendOpenMP",
					"-lLLVMFrontendOffloading",
					"-lLLVMSelectionDAG",
					"-lLLVMProfileData",
					"-lLLVMAnalysis",
					"-lLLVMScalarOpts",
					"-lLLVMCodeGenTypes",
					"-lLLVMCodeGenData",
					"-lLLVMCodeGen",
					"-lLLVMTargetParser",
					"-lLLVMScalarOpts",
					"-lLLVMTarget",
					"-lLLVMTransformUtils",
					"-lLLVMPasses",
					"-lLLVMSupport",
					"-lLLVMMCParser",
					"-lLLVMMC",
					"-lLLVMCore",
					"-lLLVMAsmPrinter",
					"-lLLVMAArch64Utils",
					"-lLLVMAArch64Info",
					"-lLLVMAArch64Desc",
					"-lLLVMAArch64AsmParser",
					"-lLLVMCGData",
					"-lLLVMSandboxIR",
					"-lLLVMFrontendAtomic",
					"-lLLVMAArch64CodeGen",
					"-lclangFrontend",
					"-lclangBasic",
					"-lclangEdit",
					"-lclangLex",
					"-lclangDriver",
					"-lclangSerialization",
					"-lclangAST",
					"-lclangSema",
					"-lclangAnalysis",
					"-lclangASTMatchers",
					"-lclangSupport",
					"-lclangParse",
					"-lclangAPINotes",
					"-lclangCodeGen",
					"-rpath",
					/usr/local/lib,
				);
				PRODUCT_NAME = "$(TARGET_NAME)";
				SDKROOT = macosx;
			};
@@ -2296,6 +2383,93 @@
				CODE_SIGN_STYLE = Automatic;
				DEAD_CODE_STRIPPING = YES;
				MACOSX_DEPLOYMENT_TARGET = 13.3;
				OTHER_LDFLAGS = (
					"-lnetcdf",
					"-ld_classic",
					"-L/Users/m4c/Projects/graph_framework/build/_deps/llvm-build/lib",
					"-lz",
					"-lLLVMCoverage",
					"-lLLVMSupport",
					"-lLLVMDebugInfoCodeView",
					"-lLLVMRemarks",
					"-lLLVMJITLink",
					"-lLLVMLinker",
					"-lLLVMTextAPI",
					"-lLLVMRuntimeDyld",
					"-lLLVMOrcShared",
					"-lLLVMOrcDebugging",
					"-lLLVMOrcTargetProcess",
					"-lLLVMOrcJIT",
					"-lLLVMHipStdPar",
					"-lLLVMAggressiveInstCombine",
					"-lLLVMVectorize",
					"-lLLVMAsmParser",
					"-lLLVMOption",
					"-lLLVMLTO",
					"-lLLVMObject",
					"-lLLVMWindowsDriver",
					"-lLLVMDemangle",
					"-lLLVMIRReader",
					"-lLLVMIRPrinter",
					"-lLLVMInstCombine",
					"-lLLVMBinaryFormat",
					"-lLLVMCoroutines",
					"-lLLVMBitstreamReader",
					"-lLLVMBitReader",
					"-lLLVMBitWriter",
					"-lLLVMDebugInfoDWARF",
					"-lLLVMInstrumentation",
					"-lLLVMCFGuard",
					"-lLLVMObjCARCOpts",
					"-lLLVMipo",
					"-lLLVMGlobalISel",
					"-lLLVMExecutionEngine",
					"-lLLVMFrontendDriver",
					"-lLLVMFrontendHLSL",
					"-lLLVMFrontendOpenMP",
					"-lLLVMFrontendOffloading",
					"-lLLVMSelectionDAG",
					"-lLLVMProfileData",
					"-lLLVMAnalysis",
					"-lLLVMScalarOpts",
					"-lLLVMCodeGenTypes",
					"-lLLVMCodeGenData",
					"-lLLVMCodeGen",
					"-lLLVMTargetParser",
					"-lLLVMScalarOpts",
					"-lLLVMTarget",
					"-lLLVMTransformUtils",
					"-lLLVMPasses",
					"-lLLVMSupport",
					"-lLLVMMCParser",
					"-lLLVMMC",
					"-lLLVMCore",
					"-lLLVMAsmPrinter",
					"-lLLVMAArch64Utils",
					"-lLLVMAArch64Info",
					"-lLLVMAArch64Desc",
					"-lLLVMAArch64AsmParser",
					"-lLLVMCGData",
					"-lLLVMSandboxIR",
					"-lLLVMFrontendAtomic",
					"-lLLVMAArch64CodeGen",
					"-lclangFrontend",
					"-lclangBasic",
					"-lclangEdit",
					"-lclangLex",
					"-lclangDriver",
					"-lclangSerialization",
					"-lclangAST",
					"-lclangSema",
					"-lclangAnalysis",
					"-lclangASTMatchers",
					"-lclangSupport",
					"-lclangParse",
					"-lclangAPINotes",
					"-lclangCodeGen",
					"-rpath",
					/usr/local/lib,
				);
				PRODUCT_NAME = "$(TARGET_NAME)";
				SDKROOT = macosx;
			};
+13 −3
Original line number Diff line number Diff line
@@ -2883,10 +2883,15 @@ namespace graph {
            }

//  (a/b)/c -> a/(b*c)
//  a/(b/c) -> a*c/b
            auto ld = divide_cast(this->left);
            auto rd = divide_cast(this->right);
            if (ld.get()) {
                return ld->get_left()/(ld->get_right()*this->right);
            }
            if (rd.get()) {
                return this->left*rd->get_right()/rd->get_left();
            }

//  Power reductions.
            if (is_variable_combineable(this->left,
@@ -3223,7 +3228,6 @@ namespace graph {

//  exp(a)/(c/exp(b)) -> (exp(a)*exp(b))/c
//  exp(a)/(exp(b)/c) -> c*(exp(a)/exp(b))
            auto rd = divide_cast(this->right);
            if (rd.get() && lexp.get()) {
                auto rdre = exp_cast(rd->get_right());
                if (rdre.get()) {
@@ -3836,6 +3840,12 @@ namespace graph {

//  Common denominator reductions.
            if (ld.get() && rd.get()) {
//  fma(b/c,a,b,d) -> b(a/c + 1/d)
                if (ld->get_left()->is_match(rd->get_left())) {
                    return ld->get_left()*(this->middle/ld->get_right() +
                                           1.0/rd->get_right());
                }

//  fma(a/(b*c),d,e/c) -> fma(a,d,e*b)/(b*c)
//  fma(a/(c*b),d,e/c) -> fma(a,d,e*b)/(c*b)
//  fma(a/c,d,e/(c*b)) -> fma(a*b,d,e)/(b*c)
+3 −2
Original line number Diff line number Diff line
@@ -464,7 +464,8 @@ namespace graph {
                                    + jit::format_to_string(this->evaluate().at(0))
                                    + ")";
                } else {
                    registers[this] = jit::format_to_string(this->evaluate().at(0));
                    registers[this] = "(" + jit::get_type_string<T> () + ")"
                                    + jit::format_to_string(this->evaluate().at(0));
                }
#endif
            }
+3 −3
Original line number Diff line number Diff line
@@ -60,7 +60,7 @@ void run_korc() {
            
            auto gamma = graph::variable<T> (local_num_particles, "\\gamma");
            
            auto dt = graph::constant<T> (0.25);
            auto dt = graph::constant<T> (0.5);
            
            auto gamma_init = 1.0/graph::sqrt(1.0 - u_vec->dot(u_vec));
            
@@ -143,11 +143,11 @@ void run_korc() {
            const timeing::measure_diagnostic t_run("Run Time");
            work.pre_run();
            for (size_t i = 0; i < 1000000; i++) {
                /*sync.join();
                sync.join();
                work.wait();
                sync = std::thread([&file, &dataset] () -> void {
                    dataset.write(file);
                });*/
                });
                
                work.run();
            }
+13 −10
Original line number Diff line number Diff line
@@ -2597,6 +2597,9 @@ template<jit::float_scalar T> void test_divide() {
//  (c*a)*b/c -> a*b
    assert((((c*a)*b)/c)->is_match(a*b) && "Expected a*b");

//  a/(b/c) -> a*c/b
    assert((a/(b/c))->is_match(a*c/b) && "Expected a*b/c");

//  (a*b*c)^2/a^2 -> (b*c)^2
//  (a*b*c)^2/(a^2*d) -> (b*c)^2/d
//  (e*(a*b*c)^2)/(a^2*d) -> e*(b*c)^2/d
Loading