Commit 867f2f8c authored by Cianciosa, Mark's avatar Cianciosa, Mark
Browse files

Merge branch 'reduce_memreads' into 'main'

Support row and column wise reductions in piecewise nodes.

See merge request !30
parents a91da774 92881d34
Loading
Loading
Loading
Loading
Loading
+15 −0
Original line number Diff line number Diff line
@@ -7,6 +7,7 @@ project (rays CXX)
#-------------------------------------------------------------------------------
option (USE_PCH "Enable the use of precompiled headers" ON)
option (USE_STATIC "Limits the dyamics for testing." OFF)
option (SAVE_KERNEL_SOURCE "Writes the kernel source code to a file." OFF)

#-------------------------------------------------------------------------------
#  Set the cmake module path.
@@ -60,9 +61,12 @@ else ()

        find_package (CUDAToolkit REQUIRED)

        option (USE_CUDA_TEXTURES "Enable the use of cuda textures" OFF)

        target_compile_definitions (cuda_lib
                                    INTERFACE
                                    USE_CUDA
                                    $<$<BOOL:${USE_CUDA_TEXTURES}>:USE_CUDA_TEXTURES>
                                    CUDA_INCLUDE="${CUDAToolkit_INCLUDE_DIRS}"
        )
        target_link_libraries (cuda_lib
@@ -73,12 +77,18 @@ else ()
    endif ()
endif ()

option (USE_INPUT_CACHE "Cache the values kernel input values." OFF)

add_library (gpu_lib INTERFACE)
target_link_libraries (gpu_lib
                       INTERFACE
                       $<$<BOOL:${USE_METAL}>:metal_lib>
                       $<$<BOOL:${USE_CUDA}>:cuda_lib>
)
target_compile_definitions (gpu_lib
                            INTERFACE
                            $<$<BOOL:${USE_INPUT_CACHE}>:USE_INPUT_CACHE>
)

#-------------------------------------------------------------------------------
#  Sanitizer options
@@ -234,6 +244,9 @@ add_dependencies (cuda-resource-headers pull_llvm)
add_dependencies (scan-build-py pull_llvm)
add_dependencies (x86-resource-headers pull_llvm)
add_dependencies (obj.clangSupport pull_llvm)
add_dependencies (arm-common-resource-headers pull_llvm)
add_dependencies (arm-resource-headers pull_llvm)
add_dependencies (aarch64-resource-headers pull_llvm)

add_library (llvm_dep INTERFACE)
target_include_directories (llvm_dep
@@ -259,6 +272,8 @@ target_link_libraries (llvm_dep
                       clangCodeGen
                       LLVM${LLVM_NATIVE_ARCH}CodeGen
                       LLVMOrcJIT
                       LLVMOrcDebugging
                       LLVMOrcTargetProcess
)

#-------------------------------------------------------------------------------
+17 −5
Original line number Diff line number Diff line
@@ -38,10 +38,14 @@ void bench_runner() {
    const size_t batch = NUM_RAYS/threads.size();
    const size_t extra = NUM_RAYS%threads.size();

    timeing::measure_diagnostic_threaded timing;
    timeing::measure_diagnostic_threaded time_setup("Setup Time");
    timeing::measure_diagnostic_threaded time_init("Init Time");
    timeing::measure_diagnostic_threaded time_compile("Compile Time");
    timeing::measure_diagnostic_threaded time_steps("Time Steps");

    for (size_t i = 0, ie = threads.size(); i < ie; i++) {
        threads[i] = std::thread([&timing, batch, extra] (const size_t thread_number) -> void {
        threads[i] = std::thread([&time_setup, &time_init, &time_compile, &time_steps, batch, extra] (const size_t thread_number) -> void {
            time_setup.start_time(thread_number);

            const size_t local_num_rays = batch
                                        + (extra > thread_number ? 1 : 0);
@@ -78,25 +82,33 @@ void bench_runner() {
                                                          eq, "",
                                                          local_num_rays,
                                                          thread_number);
            time_setup.end_time(thread_number);

            time_init.start_time(thread_number);
            solve.init(kx);
            time_init.end_time(thread_number);
            time_compile.start_time(thread_number);
            solve.compile();
            time_compile.end_time(thread_number);

            timing.start_time(thread_number);
            time_steps.start_time(thread_number);
            for (size_t j = 0; j < num_steps; j++) {
                for (size_t k = 0; k < SUB_STEPS; k++) {
                    solve.step();
                }
            }
            solve.sync_host();
            timing.end_time(thread_number);
            time_steps.end_time(thread_number);
        }, i);
    }

    for (std::thread &t : threads) {
        t.join();
    }
    timing.print();
    time_setup.print();
    time_init.print();
    time_compile.print();
    time_steps.print();

    std::cout << "--------------------------------------------------------------------------------"
              << std::endl << std::endl;
+301 −13
Original line number Diff line number Diff line
@@ -886,7 +886,7 @@
			isa = PBXProject;
			attributes = {
				BuildIndependentTargetsInParallel = YES;
				LastUpgradeCheck = 1530;
				LastUpgradeCheck = 1540;
				ORGANIZATIONNAME = "Cianciosa, Mark R.";
				TargetAttributes = {
					C73690302A38C498001733B0 = {
@@ -1282,7 +1282,6 @@
					"VMEC_FILE=\\\"/Users/m4c/Projects/graph_framework/graph_tests/vmec.nc\\\"",
					"EFIT_FILE=\\\"/Users/m4c/Projects/graph_framework/graph_tests/efit.nc\\\"",
					USE_METAL,
					"CXX=\\\"c++\\ -I/Users/m4c/Projects/graph_framework/graph_framework\\ -std=gnu++2a\\\"",
					"$(inherited)",
				);
				MACOSX_DEPLOYMENT_TARGET = 13.3;
@@ -1338,8 +1337,7 @@
					"EFIT_FILE=\\\"/Users/m4c/Projects/graph_framework/graph_tests/efit.nc\\\"",
					"VMEC_FILE=\\\"/Users/m4c/Projects/graph_framework/graph_tests/vmec.nc\\\"",
					USE_METAL,
					"CXX_FLAGS=\\\"-g\\\"",
					"\"CXX_ARGS=\\\"-I/Users/m4c/Projects/graph_framework/graph_framework -std=gnu++2a\\\"\"",
					"\"CXX_ARGS=\\\"-I/Users/m4c/Projects/graph_framework/graph_framework -I/usr/local/include -I/Applications/Xcode.app/Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs/MacOSX.sdk/usr/include/c++/v1 -I/Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/lib/clang/15.0.0/include -I/Applications/Xcode.app/Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs/MacOSX.sdk/usr/include -I/Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/include -I/Applications/Xcode.app/Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs/MacOSX.sdk/System/Library/Frameworks -fgnuc-version=4.2.1 -std=gnu++2a\\\"\"",
					STATIC,
					"DEBUG=1",
					"$(inherited)",
@@ -1366,9 +1364,69 @@
				OTHER_LDFLAGS = (
					"-lnetcdf",
					"-ld_classic",
					"-rpath",
					/usr/local/lib,
					"-lLLVM",
					"-L/Users/m4c/Projects/graph_framework/build/_deps/llvm-build/lib",
					"-lz",
					"-lLLVMCoverage",
					"-lLLVMSupport",
					"-lLLVMDebugInfoCodeView",
					"-lLLVMRemarks",
					"-lLLVMJITLink",
					"-lLLVMLinker",
					"-lLLVMTextAPI",
					"-lLLVMRuntimeDyld",
					"-lLLVMOrcShared",
					"-lLLVMOrcDebugging",
					"-lLLVMOrcTargetProcess",
					"-lLLVMOrcJIT",
					"-lLLVMHipStdPar",
					"-lLLVMAggressiveInstCombine",
					"-lLLVMVectorize",
					"-lLLVMAsmParser",
					"-lLLVMOption",
					"-lLLVMLTO",
					"-lLLVMObject",
					"-lLLVMWindowsDriver",
					"-lLLVMDemangle",
					"-lLLVMIRReader",
					"-lLLVMIRPrinter",
					"-lLLVMInstCombine",
					"-lLLVMBinaryFormat",
					"-lLLVMCoroutines",
					"-lLLVMBitstreamReader",
					"-lLLVMBitReader",
					"-lLLVMBitWriter",
					"-lLLVMDebugInfoDWARF",
					"-lLLVMInstrumentation",
					"-lLLVMCFGuard",
					"-lLLVMObjCARCOpts",
					"-lLLVMipo",
					"-lLLVMGlobalISel",
					"-lLLVMExecutionEngine",
					"-lLLVMFrontendDriver",
					"-lLLVMFrontendHLSL",
					"-lLLVMFrontendOpenMP",
					"-lLLVMFrontendOffloading",
					"-lLLVMSelectionDAG",
					"-lLLVMProfileData",
					"-lLLVMAnalysis",
					"-lLLVMScalarOpts",
					"-lLLVMCodeGenTypes",
					"-lLLVMCodeGen",
					"-lLLVMTargetParser",
					"-lLLVMScalarOpts",
					"-lLLVMTarget",
					"-lLLVMTransformUtils",
					"-lLLVMPasses",
					"-lLLVMSupport",
					"-lLLVMMCParser",
					"-lLLVMMC",
					"-lLLVMCore",
					"-lLLVMAsmPrinter",
					"-lLLVMAArch64Utils",
					"-lLLVMAArch64Info",
					"-lLLVMAArch64Desc",
					"-lLLVMAArch64AsmParser",
					"-lLLVMAArch64CodeGen",
					"-lclangFrontend",
					"-lclangBasic",
					"-lclangEdit",
@@ -1383,6 +1441,8 @@
					"-lclangParse",
					"-lclangAPINotes",
					"-lclangCodeGen",
					"-rpath",
					/usr/local/lib,
				);
				SDKROOT = macosx;
				SYSTEM_HEADER_SEARCH_PATHS = "";
@@ -1441,7 +1501,7 @@
					"EFIT_FILE=\\\"/Users/m4c/Projects/graph_framework/graph_tests/efit.nc\\\"",
					"VMEC_FILE=\\\"/Users/m4c/Projects/graph_framework/graph_tests/vmec.nc\\\"",
					USE_METAL,
					"\"CXX_ARGS=\\\"-I/Users/m4c/Projects/graph_framework/graph_framework -std=gnu++2a\\\"\"",
					"\"CXX_ARGS=\\\"-I/Users/m4c/Projects/graph_framework/graph_framework -I/usr/local/include -I/Applications/Xcode.app/Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs/MacOSX.sdk/usr/include/c++/v1 -I/Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/lib/clang/15.0.0/include -I/Applications/Xcode.app/Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs/MacOSX.sdk/usr/include -I/Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/include -I/Applications/Xcode.app/Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs/MacOSX.sdk/System/Library/Frameworks -fgnuc-version=4.2.1 -std=gnu++2a\\\"\"",
					"$(inherited)",
				);
				GCC_WARN_64_TO_32_BIT_CONVERSION = YES;
@@ -1466,9 +1526,69 @@
				OTHER_LDFLAGS = (
					"-lnetcdf",
					"-ld_classic",
					"-rpath",
					/usr/local/lib,
					"-lLLVM",
					"-L/Users/m4c/Projects/graph_framework/build/_deps/llvm-build/lib",
					"-lz",
					"-lLLVMCoverage",
					"-lLLVMSupport",
					"-lLLVMDebugInfoCodeView",
					"-lLLVMRemarks",
					"-lLLVMJITLink",
					"-lLLVMLinker",
					"-lLLVMTextAPI",
					"-lLLVMRuntimeDyld",
					"-lLLVMOrcShared",
					"-lLLVMOrcDebugging",
					"-lLLVMOrcTargetProcess",
					"-lLLVMOrcJIT",
					"-lLLVMHipStdPar",
					"-lLLVMAggressiveInstCombine",
					"-lLLVMVectorize",
					"-lLLVMAsmParser",
					"-lLLVMOption",
					"-lLLVMLTO",
					"-lLLVMObject",
					"-lLLVMWindowsDriver",
					"-lLLVMDemangle",
					"-lLLVMIRReader",
					"-lLLVMIRPrinter",
					"-lLLVMInstCombine",
					"-lLLVMBinaryFormat",
					"-lLLVMCoroutines",
					"-lLLVMBitstreamReader",
					"-lLLVMBitReader",
					"-lLLVMBitWriter",
					"-lLLVMDebugInfoDWARF",
					"-lLLVMInstrumentation",
					"-lLLVMCFGuard",
					"-lLLVMObjCARCOpts",
					"-lLLVMipo",
					"-lLLVMGlobalISel",
					"-lLLVMExecutionEngine",
					"-lLLVMFrontendDriver",
					"-lLLVMFrontendHLSL",
					"-lLLVMFrontendOpenMP",
					"-lLLVMFrontendOffloading",
					"-lLLVMSelectionDAG",
					"-lLLVMProfileData",
					"-lLLVMAnalysis",
					"-lLLVMScalarOpts",
					"-lLLVMCodeGenTypes",
					"-lLLVMCodeGen",
					"-lLLVMTargetParser",
					"-lLLVMScalarOpts",
					"-lLLVMTarget",
					"-lLLVMTransformUtils",
					"-lLLVMPasses",
					"-lLLVMSupport",
					"-lLLVMMCParser",
					"-lLLVMMC",
					"-lLLVMCore",
					"-lLLVMAsmPrinter",
					"-lLLVMAArch64Utils",
					"-lLLVMAArch64Info",
					"-lLLVMAArch64Desc",
					"-lLLVMAArch64AsmParser",
					"-lLLVMAArch64CodeGen",
					"-lclangFrontend",
					"-lclangBasic",
					"-lclangEdit",
@@ -1483,6 +1603,8 @@
					"-lclangParse",
					"-lclangAPINotes",
					"-lclangCodeGen",
					"-rpath",
					/usr/local/lib,
				);
				SDKROOT = macosx;
				SYSTEM_HEADER_SEARCH_PATHS = "";
@@ -1747,11 +1869,94 @@
				GCC_PREPROCESSOR_DEFINITIONS = (
					"EFIT_FILE=\\\"/Users/m4c/Projects/graph_framework/graph_tests/efit.nc\\\"",
					USE_METAL,
					"CXX=\\\"c++\\\"",
					"DEBUG=1",
					"$(inherited)",
				);
				MACOSX_DEPLOYMENT_TARGET = 13.3;
				OTHER_LDFLAGS = (
					"-lnetcdf",
					"-ld_classic",
					"-L/Users/m4c/Projects/graph_framework/build/_deps/llvm-build/lib",
					"-lz",
					"-lLLVMCoverage",
					"-lLLVMSupport",
					"-lLLVMDebugInfoCodeView",
					"-lLLVMRemarks",
					"-lLLVMJITLink",
					"-lLLVMLinker",
					"-lLLVMTextAPI",
					"-lLLVMRuntimeDyld",
					"-lLLVMOrcShared",
					"-lLLVMOrcTargetProcess",
					"-lLLVMOrcDebugging",
					"-lLLVMOrcTargetProcess",
					"-lLLVMOrcJIT",
					"-lLLVMHipStdPar",
					"-lLLVMAggressiveInstCombine",
					"-lLLVMVectorize",
					"-lLLVMAsmParser",
					"-lLLVMOption",
					"-lLLVMLTO",
					"-lLLVMObject",
					"-lLLVMWindowsDriver",
					"-lLLVMDemangle",
					"-lLLVMIRReader",
					"-lLLVMIRPrinter",
					"-lLLVMInstCombine",
					"-lLLVMBinaryFormat",
					"-lLLVMCoroutines",
					"-lLLVMBitstreamReader",
					"-lLLVMBitReader",
					"-lLLVMBitWriter",
					"-lLLVMDebugInfoDWARF",
					"-lLLVMInstrumentation",
					"-lLLVMCFGuard",
					"-lLLVMObjCARCOpts",
					"-lLLVMipo",
					"-lLLVMGlobalISel",
					"-lLLVMExecutionEngine",
					"-lLLVMFrontendDriver",
					"-lLLVMFrontendHLSL",
					"-lLLVMFrontendOpenMP",
					"-lLLVMFrontendOffloading",
					"-lLLVMSelectionDAG",
					"-lLLVMProfileData",
					"-lLLVMAnalysis",
					"-lLLVMScalarOpts",
					"-lLLVMCodeGenTypes",
					"-lLLVMCodeGen",
					"-lLLVMTargetParser",
					"-lLLVMScalarOpts",
					"-lLLVMTarget",
					"-lLLVMTransformUtils",
					"-lLLVMPasses",
					"-lLLVMSupport",
					"-lLLVMMCParser",
					"-lLLVMMC",
					"-lLLVMCore",
					"-lLLVMAsmPrinter",
					"-lLLVMAArch64Utils",
					"-lLLVMAArch64Info",
					"-lLLVMAArch64Desc",
					"-lLLVMAArch64AsmParser",
					"-lLLVMAArch64CodeGen",
					"-lclangFrontend",
					"-lclangBasic",
					"-lclangEdit",
					"-lclangLex",
					"-lclangDriver",
					"-lclangSerialization",
					"-lclangAST",
					"-lclangSema",
					"-lclangAnalysis",
					"-lclangASTMatchers",
					"-lclangSupport",
					"-lclangParse",
					"-lclangAPINotes",
					"-lclangCodeGen",
					"-rpath",
					/usr/local/lib,
				);
				PRODUCT_NAME = "$(TARGET_NAME)";
			};
			name = Debug;
@@ -1766,10 +1971,93 @@
				GCC_PREPROCESSOR_DEFINITIONS = (
					"EFIT_FILE=\\\"/Users/m4c/Projects/graph_framework/graph_tests/efit.nc\\\"",
					USE_METAL,
					"CXX=\\\"c++\\\"",
					"$(inherited)",
				);
				MACOSX_DEPLOYMENT_TARGET = 13.3;
				OTHER_LDFLAGS = (
					"-lnetcdf",
					"-ld_classic",
					"-L/Users/m4c/Projects/graph_framework/build/_deps/llvm-build/lib",
					"-lz",
					"-lLLVMCoverage",
					"-lLLVMSupport",
					"-lLLVMDebugInfoCodeView",
					"-lLLVMRemarks",
					"-lLLVMJITLink",
					"-lLLVMLinker",
					"-lLLVMTextAPI",
					"-lLLVMRuntimeDyld",
					"-lLLVMOrcShared",
					"-lLLVMOrcTargetProcess",
					"-lLLVMOrcDebugging",
					"-lLLVMOrcTargetProcess",
					"-lLLVMOrcJIT",
					"-lLLVMHipStdPar",
					"-lLLVMAggressiveInstCombine",
					"-lLLVMVectorize",
					"-lLLVMAsmParser",
					"-lLLVMOption",
					"-lLLVMLTO",
					"-lLLVMObject",
					"-lLLVMWindowsDriver",
					"-lLLVMDemangle",
					"-lLLVMIRReader",
					"-lLLVMIRPrinter",
					"-lLLVMInstCombine",
					"-lLLVMBinaryFormat",
					"-lLLVMCoroutines",
					"-lLLVMBitstreamReader",
					"-lLLVMBitReader",
					"-lLLVMBitWriter",
					"-lLLVMDebugInfoDWARF",
					"-lLLVMInstrumentation",
					"-lLLVMCFGuard",
					"-lLLVMObjCARCOpts",
					"-lLLVMipo",
					"-lLLVMGlobalISel",
					"-lLLVMExecutionEngine",
					"-lLLVMFrontendDriver",
					"-lLLVMFrontendHLSL",
					"-lLLVMFrontendOpenMP",
					"-lLLVMFrontendOffloading",
					"-lLLVMSelectionDAG",
					"-lLLVMProfileData",
					"-lLLVMAnalysis",
					"-lLLVMScalarOpts",
					"-lLLVMCodeGenTypes",
					"-lLLVMCodeGen",
					"-lLLVMTargetParser",
					"-lLLVMScalarOpts",
					"-lLLVMTarget",
					"-lLLVMTransformUtils",
					"-lLLVMPasses",
					"-lLLVMSupport",
					"-lLLVMMCParser",
					"-lLLVMMC",
					"-lLLVMCore",
					"-lLLVMAsmPrinter",
					"-lLLVMAArch64Utils",
					"-lLLVMAArch64Info",
					"-lLLVMAArch64Desc",
					"-lLLVMAArch64AsmParser",
					"-lLLVMAArch64CodeGen",
					"-lclangFrontend",
					"-lclangBasic",
					"-lclangEdit",
					"-lclangLex",
					"-lclangDriver",
					"-lclangSerialization",
					"-lclangAST",
					"-lclangSema",
					"-lclangAnalysis",
					"-lclangASTMatchers",
					"-lclangSupport",
					"-lclangParse",
					"-lclangAPINotes",
					"-lclangCodeGen",
					"-rpath",
					/usr/local/lib,
				);
				PRODUCT_NAME = "$(TARGET_NAME)";
			};
			name = Release;
+1 −1
Original line number Diff line number Diff line
<?xml version="1.0" encoding="UTF-8"?>
<Scheme
   LastUpgradeVersion = "1530"
   LastUpgradeVersion = "1540"
   version = "1.7">
   <BuildAction
      parallelizeBuildables = "YES"
+1 −1
Original line number Diff line number Diff line
<?xml version="1.0" encoding="UTF-8"?>
<Scheme
   LastUpgradeVersion = "1530"
   LastUpgradeVersion = "1540"
   version = "1.3">
   <BuildAction
      parallelizeBuildables = "YES"
Loading