From deca429b564a52a902c545928c45ce6406077091 Mon Sep 17 00:00:00 2001 From: cianciosa Date: Fri, 20 Mar 2026 17:13:02 -0400 Subject: [PATCH 1/2] Create a prototype pic code. Fix an issue where kernel maps couldn't be set by the same output node. --- CMakeLists.txt | 1 + graph_docs/kernel_optimization.dox | 2 +- graph_framework.xcodeproj/project.pbxproj | 200 ++++++++++++++++++ .../xcshareddata/xcschemes/graph_pic.xcscheme | 79 +++++++ graph_framework/cpu_context.hpp | 3 +- graph_framework/cuda_context.hpp | 3 +- graph_framework/metal_context.hpp | 3 +- graph_framework/workflow.hpp | 68 ++++++ graph_pic/CMakeLists.txt | 6 + graph_pic/xpic.cpp | 192 +++++++++++++++++ graph_tests/CMakeLists.txt | 1 + graph_tests/workflow_test.cpp | 96 +++++++++ 12 files changed, 647 insertions(+), 7 deletions(-) create mode 100644 graph_framework.xcodeproj/xcshareddata/xcschemes/graph_pic.xcscheme create mode 100644 graph_pic/CMakeLists.txt create mode 100644 graph_pic/xpic.cpp create mode 100644 graph_tests/workflow_test.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 241020a..9faf51a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -383,6 +383,7 @@ add_subdirectory (graph_driver) add_subdirectory (graph_benchmark) add_subdirectory (graph_playground) add_subdirectory (graph_korc) +add_subdirectory (graph_pic) #------------------------------------------------------------------------------- # Define macro function to register tests. diff --git a/graph_docs/kernel_optimization.dox b/graph_docs/kernel_optimization.dox index 66af03b..e4302fc 100644 --- a/graph_docs/kernel_optimization.dox +++ b/graph_docs/kernel_optimization.dox @@ -48,7 +48,7 @@ void field_solve_example() { // call. for (size_t i = 1; i < batch; i++) { indexed_particle = graph::index_1D(particle_positions, - particle_index, + next_index, static_cast (1), static_cast (0)); next_index = next_index + static_cast (1.0); diff --git a/graph_framework.xcodeproj/project.pbxproj b/graph_framework.xcodeproj/project.pbxproj index 5558843..0d0f892 100644 --- a/graph_framework.xcodeproj/project.pbxproj +++ b/graph_framework.xcodeproj/project.pbxproj @@ -37,6 +37,10 @@ C73BBE8229F820810027BB7F /* Metal.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = C71342682947F36100672AD4 /* Metal.framework */; }; C73E2A7A2A4A216400BED03A /* Metal.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = C71342682947F36100672AD4 /* Metal.framework */; }; C74DF4602AA8BD1900319113 /* Metal.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = C71342682947F36100672AD4 /* Metal.framework */; }; + C74F2ADC2F6D9AFF00B48216 /* xpic.cpp in Sources */ = {isa = PBXBuildFile; fileRef = C723737B2F5F6707005A5C62 /* xpic.cpp */; }; + C74F2ADD2F6D9B0D00B48216 /* Metal.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = C71342682947F36100672AD4 /* Metal.framework */; }; + C74F2AEA2F6DE8E400B48216 /* workflow_test.cpp in Sources */ = {isa = PBXBuildFile; fileRef = C74F2ADE2F6DC10E00B48216 /* workflow_test.cpp */; }; + C74F2AEB2F6DE8EC00B48216 /* Metal.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = C71342682947F36100672AD4 /* Metal.framework */; }; C78F3D972DC41AF2002E3D94 /* random_test.cpp in Sources */ = {isa = PBXBuildFile; fileRef = C78F3D8A2DC122C7002E3D94 /* random_test.cpp */; }; C78F3D982DC41B05002E3D94 /* Metal.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = C71342682947F36100672AD4 /* Metal.framework */; }; C78F3DA72DC41BB8002E3D94 /* xkorc.cpp in Sources */ = {isa = PBXBuildFile; fileRef = C78F3D882DC122B1002E3D94 /* xkorc.cpp */; }; @@ -214,6 +218,24 @@ ); runOnlyForDeploymentPostprocessing = 1; }; + C74F2AD02F6D9A6E00B48216 /* CopyFiles */ = { + isa = PBXCopyFilesBuildPhase; + buildActionMask = 2147483647; + dstPath = /usr/share/man/man1/; + dstSubfolderSpec = 0; + files = ( + ); + runOnlyForDeploymentPostprocessing = 1; + }; + C74F2AE12F6DE8C500B48216 /* CopyFiles */ = { + isa = PBXCopyFilesBuildPhase; + buildActionMask = 2147483647; + dstPath = /usr/share/man/man1/; + dstSubfolderSpec = 0; + files = ( + ); + runOnlyForDeploymentPostprocessing = 1; + }; C78F3D8D2DC41ACA002E3D94 /* CopyFiles */ = { isa = PBXCopyFilesBuildPhase; buildActionMask = 2147483647; @@ -343,6 +365,8 @@ C721EA992833FF7800EAFB2D /* equilibrium.hpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; path = equilibrium.hpp; sourceTree = ""; }; C723210222DC0D0A006BBF13 /* arithmetic.hpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.h; path = arithmetic.hpp; sourceTree = ""; }; C72358F52C4027A10084A489 /* commandline_parser.hpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.h; path = commandline_parser.hpp; sourceTree = ""; }; + C723737B2F5F6707005A5C62 /* xpic.cpp */ = {isa = PBXFileReference; explicitFileType = sourcecode.cpp.objcpp; path = xpic.cpp; sourceTree = ""; }; + C723737D2F5F672F005A5C62 /* CMakeLists.txt */ = {isa = PBXFileReference; lastKnownFileType = text; path = CMakeLists.txt; sourceTree = ""; }; C725CD792840088000D0EDE2 /* physics_test.cpp */ = {isa = PBXFileReference; explicitFileType = sourcecode.cpp.objcpp; path = physics_test.cpp; sourceTree = ""; }; C729B8B12ED7521A00A2559D /* discription.dox */ = {isa = PBXFileReference; lastKnownFileType = text; path = discription.dox; sourceTree = ""; }; C729B8B22ED7536B00A2559D /* use_cases.dox */ = {isa = PBXFileReference; lastKnownFileType = text; path = use_cases.dox; sourceTree = ""; }; @@ -358,6 +382,9 @@ C73BBE7D29F816E60027BB7F /* piecewise_test.cpp */ = {isa = PBXFileReference; explicitFileType = sourcecode.cpp.objcpp; path = piecewise_test.cpp; sourceTree = ""; }; C73BBE9629F8669F0027BB7F /* newton.hpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.h; path = newton.hpp; sourceTree = ""; }; C74DF4572AA8BC7300319113 /* graph_benchmark */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; path = graph_benchmark; sourceTree = BUILT_PRODUCTS_DIR; }; + C74F2AD22F6D9A6E00B48216 /* graph_pic */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; path = graph_pic; sourceTree = BUILT_PRODUCTS_DIR; }; + C74F2ADE2F6DC10E00B48216 /* workflow_test.cpp */ = {isa = PBXFileReference; explicitFileType = sourcecode.cpp.objcpp; path = workflow_test.cpp; sourceTree = ""; }; + C74F2AE32F6DE8C500B48216 /* workflow_test */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; path = workflow_test; sourceTree = BUILT_PRODUCTS_DIR; }; C75C42912E5CA60B00B0950B /* compiling.dox */ = {isa = PBXFileReference; lastKnownFileType = text; path = compiling.dox; sourceTree = ""; }; C75C42922E5CA60B00B0950B /* general.dox */ = {isa = PBXFileReference; lastKnownFileType = text; path = general.dox; sourceTree = ""; }; C75C42932E5CA60B00B0950B /* main.dox */ = {isa = PBXFileReference; lastKnownFileType = text; path = main.dox; sourceTree = ""; }; @@ -467,6 +494,22 @@ ); runOnlyForDeploymentPostprocessing = 0; }; + C74F2ACF2F6D9A6E00B48216 /* Frameworks */ = { + isa = PBXFrameworksBuildPhase; + buildActionMask = 2147483647; + files = ( + C74F2ADD2F6D9B0D00B48216 /* Metal.framework in Frameworks */, + ); + runOnlyForDeploymentPostprocessing = 0; + }; + C74F2AE02F6DE8C500B48216 /* Frameworks */ = { + isa = PBXFrameworksBuildPhase; + buildActionMask = 2147483647; + files = ( + C74F2AEB2F6DE8EC00B48216 /* Metal.framework in Frameworks */, + ); + runOnlyForDeploymentPostprocessing = 0; + }; C78F3D8C2DC41ACA002E3D94 /* Frameworks */ = { isa = PBXFrameworksBuildPhase; buildActionMask = 2147483647; @@ -601,6 +644,15 @@ path = cmake; sourceTree = ""; }; + C723737C2F5F6707005A5C62 /* graph_pic */ = { + isa = PBXGroup; + children = ( + C723737D2F5F672F005A5C62 /* CMakeLists.txt */, + C723737B2F5F6707005A5C62 /* xpic.cpp */, + ); + path = graph_pic; + sourceTree = ""; + }; C736E6B02C9B52CA00AAE3C0 /* graph_playground */ = { isa = PBXGroup; children = ( @@ -657,6 +709,7 @@ C74DF4582AA8BC7300319113 /* graph_benchmark */, C736E6B02C9B52CA00AAE3C0 /* graph_playground */, C78F3D892DC122B1002E3D94 /* graph_korc */, + C723737C2F5F6707005A5C62 /* graph_pic */, C75C42942E5CA60B00B0950B /* graph_docs */, C7167B212AC5CE8500E03131 /* utilities */, C717CB8C2A02E361008FBDD8 /* cmake */, @@ -688,6 +741,8 @@ C78F3D8F2DC41ACA002E3D94 /* random_test */, C78F3D9D2DC41B26002E3D94 /* graph_korc */, C7DC9EE82E39789900524F6F /* libgraph_c.a */, + C74F2AD22F6D9A6E00B48216 /* graph_pic */, + C74F2AE32F6DE8C500B48216 /* workflow_test */, ); name = Products; sourceTree = ""; @@ -754,6 +809,7 @@ C78F3D8A2DC122C7002E3D94 /* random_test.cpp */, C7DC9EF12E3A688F00524F6F /* c_binding_test.c */, C7AE06662E3C2AEE00586BCD /* f_binding_test.f90 */, + C74F2ADE2F6DC10E00B48216 /* workflow_test.cpp */, ); path = graph_tests; sourceTree = ""; @@ -924,6 +980,44 @@ productReference = C74DF4572AA8BC7300319113 /* graph_benchmark */; productType = "com.apple.product-type.tool"; }; + C74F2AD12F6D9A6E00B48216 /* graph_pic */ = { + isa = PBXNativeTarget; + buildConfigurationList = C74F2AD82F6D9A6E00B48216 /* Build configuration list for PBXNativeTarget "graph_pic" */; + buildPhases = ( + C74F2ACE2F6D9A6E00B48216 /* Sources */, + C74F2ACF2F6D9A6E00B48216 /* Frameworks */, + C74F2AD02F6D9A6E00B48216 /* CopyFiles */, + ); + buildRules = ( + ); + dependencies = ( + ); + name = graph_pic; + packageProductDependencies = ( + ); + productName = graph_pic; + productReference = C74F2AD22F6D9A6E00B48216 /* graph_pic */; + productType = "com.apple.product-type.tool"; + }; + C74F2AE22F6DE8C500B48216 /* workflow_test */ = { + isa = PBXNativeTarget; + buildConfigurationList = C74F2AE72F6DE8C500B48216 /* Build configuration list for PBXNativeTarget "workflow_test" */; + buildPhases = ( + C74F2ADF2F6DE8C500B48216 /* Sources */, + C74F2AE02F6DE8C500B48216 /* Frameworks */, + C74F2AE12F6DE8C500B48216 /* CopyFiles */, + ); + buildRules = ( + ); + dependencies = ( + ); + name = workflow_test; + packageProductDependencies = ( + ); + productName = workflow_test; + productReference = C74F2AE32F6DE8C500B48216 /* workflow_test */; + productType = "com.apple.product-type.tool"; + }; C78F3D8E2DC41ACA002E3D94 /* random_test */ = { isa = PBXNativeTarget; buildConfigurationList = C78F3D932DC41ACA002E3D94 /* Build configuration list for PBXNativeTarget "random_test" */; @@ -1207,6 +1301,12 @@ C74DF4562AA8BC7300319113 = { CreatedOnToolsVersion = 14.3.1; }; + C74F2AD12F6D9A6E00B48216 = { + CreatedOnToolsVersion = 26.1; + }; + C74F2AE22F6DE8C500B48216 = { + CreatedOnToolsVersion = 26.1; + }; C78F3D8E2DC41ACA002E3D94 = { CreatedOnToolsVersion = 16.3; }; @@ -1284,6 +1384,8 @@ C78F3D8E2DC41ACA002E3D94 /* random_test */, C78F3D9C2DC41B26002E3D94 /* graph_korc */, C7DC9EE72E39789900524F6F /* graph_c */, + C74F2AD12F6D9A6E00B48216 /* graph_pic */, + C74F2AE22F6DE8C500B48216 /* workflow_test */, ); }; /* End PBXProject section */ @@ -1337,6 +1439,22 @@ ); runOnlyForDeploymentPostprocessing = 0; }; + C74F2ACE2F6D9A6E00B48216 /* Sources */ = { + isa = PBXSourcesBuildPhase; + buildActionMask = 2147483647; + files = ( + C74F2ADC2F6D9AFF00B48216 /* xpic.cpp in Sources */, + ); + runOnlyForDeploymentPostprocessing = 0; + }; + C74F2ADF2F6DE8C500B48216 /* Sources */ = { + isa = PBXSourcesBuildPhase; + buildActionMask = 2147483647; + files = ( + C74F2AEA2F6DE8E400B48216 /* workflow_test.cpp in Sources */, + ); + runOnlyForDeploymentPostprocessing = 0; + }; C78F3D8B2DC41ACA002E3D94 /* Sources */ = { isa = PBXSourcesBuildPhase; buildActionMask = 2147483647; @@ -1726,6 +1844,70 @@ }; name = Release; }; + C74F2AD62F6D9A6E00B48216 /* Debug */ = { + isa = XCBuildConfiguration; + buildSettings = { + ASSETCATALOG_COMPILER_GENERATE_SWIFT_ASSET_SYMBOL_EXTENSIONS = YES; + CLANG_CXX_LANGUAGE_STANDARD = "gnu++20"; + "CODE_SIGN_IDENTITY[sdk=macosx*]" = "-"; + CODE_SIGN_STYLE = Automatic; + GCC_C_LANGUAGE_STANDARD = gnu17; + GCC_PREPROCESSOR_DEFINITIONS = ( + "DEBUG=1", + "$(inherited)", + ); + LOCALIZATION_PREFERS_STRING_CATALOGS = YES; + MACOSX_DEPLOYMENT_TARGET = 26.1; + PRODUCT_NAME = "$(TARGET_NAME)"; + }; + name = Debug; + }; + C74F2AD72F6D9A6E00B48216 /* Release */ = { + isa = XCBuildConfiguration; + buildSettings = { + ASSETCATALOG_COMPILER_GENERATE_SWIFT_ASSET_SYMBOL_EXTENSIONS = YES; + CLANG_CXX_LANGUAGE_STANDARD = "gnu++20"; + "CODE_SIGN_IDENTITY[sdk=macosx*]" = "-"; + CODE_SIGN_STYLE = Automatic; + GCC_C_LANGUAGE_STANDARD = gnu17; + LOCALIZATION_PREFERS_STRING_CATALOGS = YES; + MACOSX_DEPLOYMENT_TARGET = 26.1; + PRODUCT_NAME = "$(TARGET_NAME)"; + }; + name = Release; + }; + C74F2AE82F6DE8C500B48216 /* Debug */ = { + isa = XCBuildConfiguration; + buildSettings = { + ASSETCATALOG_COMPILER_GENERATE_SWIFT_ASSET_SYMBOL_EXTENSIONS = YES; + CLANG_CXX_LANGUAGE_STANDARD = "gnu++20"; + "CODE_SIGN_IDENTITY[sdk=macosx*]" = "-"; + CODE_SIGN_STYLE = Automatic; + GCC_C_LANGUAGE_STANDARD = gnu17; + GCC_PREPROCESSOR_DEFINITIONS = ( + "DEBUG=1", + "$(inherited)", + ); + LOCALIZATION_PREFERS_STRING_CATALOGS = YES; + MACOSX_DEPLOYMENT_TARGET = 26.1; + PRODUCT_NAME = "$(TARGET_NAME)"; + }; + name = Debug; + }; + C74F2AE92F6DE8C500B48216 /* Release */ = { + isa = XCBuildConfiguration; + buildSettings = { + ASSETCATALOG_COMPILER_GENERATE_SWIFT_ASSET_SYMBOL_EXTENSIONS = YES; + CLANG_CXX_LANGUAGE_STANDARD = "gnu++20"; + "CODE_SIGN_IDENTITY[sdk=macosx*]" = "-"; + CODE_SIGN_STYLE = Automatic; + GCC_C_LANGUAGE_STANDARD = gnu17; + LOCALIZATION_PREFERS_STRING_CATALOGS = YES; + MACOSX_DEPLOYMENT_TARGET = 26.1; + PRODUCT_NAME = "$(TARGET_NAME)"; + }; + name = Release; + }; C78F3D942DC41ACA002E3D94 /* Debug */ = { isa = XCBuildConfiguration; buildSettings = { @@ -2559,6 +2741,24 @@ defaultConfigurationIsVisible = 0; defaultConfigurationName = Release; }; + C74F2AD82F6D9A6E00B48216 /* Build configuration list for PBXNativeTarget "graph_pic" */ = { + isa = XCConfigurationList; + buildConfigurations = ( + C74F2AD62F6D9A6E00B48216 /* Debug */, + C74F2AD72F6D9A6E00B48216 /* Release */, + ); + defaultConfigurationIsVisible = 0; + defaultConfigurationName = Release; + }; + C74F2AE72F6DE8C500B48216 /* Build configuration list for PBXNativeTarget "workflow_test" */ = { + isa = XCConfigurationList; + buildConfigurations = ( + C74F2AE82F6DE8C500B48216 /* Debug */, + C74F2AE92F6DE8C500B48216 /* Release */, + ); + defaultConfigurationIsVisible = 0; + defaultConfigurationName = Release; + }; C78F3D932DC41ACA002E3D94 /* Build configuration list for PBXNativeTarget "random_test" */ = { isa = XCConfigurationList; buildConfigurations = ( diff --git a/graph_framework.xcodeproj/xcshareddata/xcschemes/graph_pic.xcscheme b/graph_framework.xcodeproj/xcshareddata/xcschemes/graph_pic.xcscheme new file mode 100644 index 0000000..093a65d --- /dev/null +++ b/graph_framework.xcodeproj/xcshareddata/xcschemes/graph_pic.xcscheme @@ -0,0 +1,79 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/graph_framework/cpu_context.hpp b/graph_framework/cpu_context.hpp index 9e27c9d..d0b48db 100644 --- a/graph_framework/cpu_context.hpp +++ b/graph_framework/cpu_context.hpp @@ -520,8 +520,7 @@ namespace gpu { const jit::register_usage &usage) { std::unordered_set out_registers; for (auto &[out, in] : setters) { - if (!out->is_match(in) && - !out_registers.contains(out.get())) { + if (!out->is_match(in)) { graph::shared_leaf a = out->compile(source_buffer, registers, indices, diff --git a/graph_framework/cuda_context.hpp b/graph_framework/cuda_context.hpp index 741bafa..57b333e 100644 --- a/graph_framework/cuda_context.hpp +++ b/graph_framework/cuda_context.hpp @@ -868,8 +868,7 @@ namespace gpu { const jit::register_usage &usage) { std::unordered_set out_registers; for (auto &[out, in] : setters) { - if (!out->is_match(in) && - !out_registers.contains(out.get())) { + if (!out->is_match(in)) { graph::shared_leaf a = out->compile(source_buffer, registers, indices, diff --git a/graph_framework/metal_context.hpp b/graph_framework/metal_context.hpp index fd91bee..b88c7ce 100644 --- a/graph_framework/metal_context.hpp +++ b/graph_framework/metal_context.hpp @@ -586,8 +586,7 @@ namespace gpu { const jit::register_usage &usage) { std::unordered_set out_registers; for (auto &[out, in] : setters) { - if (!out->is_match(in) && - !out_registers.contains(out.get())) { + if (!out->is_match(in)) { graph::shared_leaf a = out->compile(source_buffer, registers, indices, diff --git a/graph_framework/workflow.hpp b/graph_framework/workflow.hpp index 012892c..50be5f2 100644 --- a/graph_framework/workflow.hpp +++ b/graph_framework/workflow.hpp @@ -75,6 +75,50 @@ namespace workflow { } }; +//------------------------------------------------------------------------------ +/// @brief Run a work item in a fixed iteration loop. +/// +/// @tparam T Base type of the calculation. +/// @tparam SAFE_MATH Use @ref general_concepts_safe_math operations. +//------------------------------------------------------------------------------ + template + class loop_item final : public work_item { +/// Iterations. + const size_t num_iterations; + + public: +//------------------------------------------------------------------------------ +/// @brief Construct a workflow item. +/// +/// @param[in] inputs Input variables. +/// @param[in] outputs Output nodes. +/// @param[in] maps Setter maps. +/// @param[in] state Random state node. +/// @param[in] name Name of the work item. +/// @param[in] size Size of the work item. +/// @param[in,out] context Jit context. +/// @param[in] iterations Number of iterations to run the loop. +//------------------------------------------------------------------------------ + loop_item(graph::input_nodes inputs, + graph::output_nodes outputs, + graph::map_nodes maps, + graph::shared_random_state state, + const std::string name, const size_t size, + jit::context &context, + const size_t iterations) : + work_item (inputs, outputs, maps, state, name, size, context), + num_iterations(iterations) {} + +//------------------------------------------------------------------------------ +/// @brief Run the workitem. +//------------------------------------------------------------------------------ + virtual void run() { + for (size_t i = 0; i < num_iterations; i++) { + work_item::run(); + } + } + }; + //------------------------------------------------------------------------------ /// @brief Class representing a convergence work item. /// @@ -235,6 +279,30 @@ namespace workflow { context)); } +//------------------------------------------------------------------------------ +/// @brief Add a workflow item. +/// +/// @param[in] in Input variables. +/// @param[in] out Output nodes. +/// @param[in] maps Setter maps. +/// @param[in] state Random state node. +/// @param[in] name Name of the work item. +/// @param[in] size Size of the work item. +/// @param[in] iterations Number of iterations. +//------------------------------------------------------------------------------ + void add_loop_item(graph::input_nodes in, + graph::output_nodes out, + graph::map_nodes maps, + graph::shared_random_state state, + const std::string name, const size_t size, + const size_t iterations) { + items.push_back(std::make_unique> (in, out, + maps, state, + name, size, + context, + iterations)); + } + //------------------------------------------------------------------------------ /// @brief Add a converge item. /// diff --git a/graph_pic/CMakeLists.txt b/graph_pic/CMakeLists.txt new file mode 100644 index 0000000..51167bd --- /dev/null +++ b/graph_pic/CMakeLists.txt @@ -0,0 +1,6 @@ + +add_tool_target (xpic cpp) + +if (${USE_PCH} AND NOT ${BUILD_C_BINDING}) + target_precompile_headers (xrays_bench REUSE_FROM xrays) +endif () diff --git a/graph_pic/xpic.cpp b/graph_pic/xpic.cpp new file mode 100644 index 0000000..b36e90f --- /dev/null +++ b/graph_pic/xpic.cpp @@ -0,0 +1,192 @@ +//------------------------------------------------------------------------------ +/// @file xpic.cpp +/// @brief Driver program for the Particle In Cell (PIC) demo. +//------------------------------------------------------------------------------ + +#include + +#include "../graph_framework/graph_framework.hpp" + +//------------------------------------------------------------------------------ +/// @brief Build density. +/// +/// @tparam T Base type of the calculation. +/// +/// @param[in] x The particle position. +//------------------------------------------------------------------------------ +template +graph::shared_leaf build_density(graph::shared_leaf x) { + return graph::exp(x*x/static_cast (-0.0001)); +} + +//------------------------------------------------------------------------------ +/// @brief Build parallel electric field. +/// +/// @tparam T Base type of the calculation. +/// +/// @param[in] x The particle position. +//------------------------------------------------------------------------------ +template +graph::shared_leaf build_parallel_electric_field(graph::shared_leaf x) { + const T te = 1; + const T q = 1;//1.602176634E-19; + auto n = build_density (x); + auto pe = n*te; + return static_cast (-1)/(q*n)*pe->df(x); +} + +//------------------------------------------------------------------------------ +/// @brief Pic code. +/// +/// @tparam T Base type of the calculation. +//------------------------------------------------------------------------------ +template +void run_pic() { + const size_t num_particles = 1000000; + auto x = graph::variable (num_particles, "x"); + auto vpara = graph::variable (num_particles, "v||"); + + std::normal_distribution norm(0, 0.25); + std::random_device rand_d; + std::mt19937_64 engine(rand_d()); + backend::buffer a(num_particles); + backend::buffer b(num_particles); + for (size_t i = 0; i < num_particles; i++) { + a[i] = norm(engine); + b[i] = norm(engine); + } + x->set(a); + vpara->set(b); + + const T m = 1;//9.1093837139E-31; + const T q = 1;//1.602176634E-19; + const T te = 1; + const T dt = 0.00001; + + const size_t num_grid = 1000; + auto epara = graph::variable (num_grid, "e||"); + auto n = graph::variable (num_grid, "n"); + auto grid_position = graph::variable (num_grid, "x_i"); + auto particle_index = graph::variable (num_grid, "i"); + + const T scale = 2.0/999.0; + const T offset = -1.0; + backend::buffer c(num_grid); + for (size_t i = 0; i < num_grid; i++) { + c[i] = scale*i + offset; + } + grid_position->set(c); + + auto x1 = dt*vpara; + auto vpara1 = -q/m*graph::index_1D(epara, x, scale, offset); + + auto x2 = dt*(vpara + vpara1/2.0); + auto vpara2 = -q/m*graph::index_1D(epara, x + x1/2.0, scale, offset); + + auto x3 = dt*(vpara + vpara2/2.0); + auto vpara3 = -q/m*graph::index_1D(epara, x + x2/2.0, scale, offset); + + auto x4 = dt*(vpara + vpara3); + auto vpara4 = -q/m*graph::index_1D(epara, x + x3, scale, offset); + + auto x_next = x + (x1 + static_cast (2)*(x2 + x3) + x4)/static_cast (6); + auto vpara_next = vpara + (vpara1 + static_cast (2)*(vpara2 + vpara3) + vpara4)/static_cast (6); + + auto next_index = particle_index; + auto next_epara = epara; + auto next_n = n; + + const size_t batch = 1000; +// Unroll the loop + for (size_t i = 0; i < batch; i++) { + auto indexed_particle = graph::index_1D(x, next_index, + static_cast (1), + static_cast (0)); + next_index = next_index + static_cast (1); + next_epara = next_epara + + build_parallel_electric_field (indexed_particle - grid_position); + next_n = next_n + build_density(indexed_particle - grid_position); + } + + workflow::manager work(0); + work.add_item({ + graph::variable_cast(particle_index), + graph::variable_cast(epara), + graph::variable_cast(n) + }, {}, { + {graph::zero (), graph::variable_cast(particle_index)}, + {graph::zero (), graph::variable_cast(epara)}, + {graph::zero (), graph::variable_cast(n)} + }, NULL, "Index_reset", num_grid); + work.add_loop_item({ + graph::variable_cast(epara), + graph::variable_cast(n), + graph::variable_cast(grid_position), + graph::variable_cast(particle_index), + graph::variable_cast(x) + }, {}, { + {next_epara, graph::variable_cast(epara)}, + {next_index, graph::variable_cast(particle_index)}, + {next_n, graph::variable_cast(n)} + }, NULL, "Compute_efield", num_grid, num_particles/batch); + work.add_item({ + graph::variable_cast(x), + graph::variable_cast(vpara), + graph::variable_cast(epara) + }, {}, { + {x_next, graph::variable_cast(x)}, + {vpara_next, graph::variable_cast(vpara)} + }, NULL, "Particle_Push", num_particles); + + work.compile(); + + output::result_file particles_file("pic_particles.nc", num_particles); + output::data_set p_dataset(particles_file); + + p_dataset.create_variable(particles_file, "x", x, work.get_context()); + p_dataset.create_variable(particles_file, "vpara", vpara, work.get_context()); + + particles_file.end_define_mode(); + + output::result_file fields_file("pic_fields.nc", num_grid); + output::data_set f_dataset(fields_file); + + f_dataset.create_variable(fields_file, "epara", epara, work.get_context()); + f_dataset.create_variable(fields_file, "n", n, work.get_context()); + + fields_file.end_define_mode(); + std::thread sync_particles([]{}); + std::thread sync_fields([]{}); + + const size_t num_steps = 1000; + for (size_t i = 0; i < num_steps; i++) { + sync_particles.join(); + sync_fields.join(); + work.run(); + sync_particles = std::thread([&particles_file, &p_dataset] () -> void { + p_dataset.write(particles_file); + }); + sync_fields = std::thread([&fields_file, &f_dataset] () -> void { + f_dataset.write(fields_file); + }); + } + work.wait(); + sync_particles.join(); + sync_fields.join(); +} + +//------------------------------------------------------------------------------ +/// @brief Main program of the driver. +/// +/// @param[in] argc Number of commandline arguments. +/// @param[in] argv Array of commandline arguments. +//------------------------------------------------------------------------------ +int main(int argc, const char * argv[]) { + START_GPU + (void)argc; + (void)argv; + + run_pic (); + + END_GPU +} diff --git a/graph_tests/CMakeLists.txt b/graph_tests/CMakeLists.txt index 5b090c3..9a2fcc0 100644 --- a/graph_tests/CMakeLists.txt +++ b/graph_tests/CMakeLists.txt @@ -12,6 +12,7 @@ add_test_target (piecewise_test cpp) add_test_target (erfi_test cpp) add_test_target (efit_test cpp) add_test_target (random_test cpp) +add_test_target (workflow_test cpp) target_compile_definitions (erfi_test PRIVATE diff --git a/graph_tests/workflow_test.cpp b/graph_tests/workflow_test.cpp new file mode 100644 index 0000000..e61b619 --- /dev/null +++ b/graph_tests/workflow_test.cpp @@ -0,0 +1,96 @@ +//------------------------------------------------------------------------------ +/// @file workflow_test.cpp +/// @brief Tests for workflows. +//------------------------------------------------------------------------------ + +// Turn on asserts even in release builds. +#ifdef NDEBUG +#undef NDEBUG +#endif + +#include + +#include "../graph_framework/graph_framework.hpp" + +//------------------------------------------------------------------------------ +/// @brief Test setting multiple variables with the same map. +/// +/// @tparam T Base type of the calculation. +//------------------------------------------------------------------------------ +template void test_maps() { + auto a = graph::variable (1, ""); + auto b = graph::variable (1, ""); + backend::buffer buffer(1, static_cast (1)); + a->set(buffer); + b->set(buffer); + + auto zero = graph::zero (); + + workflow::manager work(0); + work.add_item({ + graph::variable_cast(a), + graph::variable_cast(b) + }, {}, { + {zero, graph::variable_cast(a)}, + {zero, graph::variable_cast(b)} + }, NULL, "test_maps", 1); + + work.compile(); + + assert(work.check_value(0, a) == static_cast (1) && "Expected one."); + assert(work.check_value(0, b) == static_cast (1) && "Expected one."); + work.run(); + assert(work.check_value(0, a) == static_cast (0) && "Expected zero."); + assert(work.check_value(0, b) == static_cast (0) && "Expected zero."); +} + +//------------------------------------------------------------------------------ +/// @brief Test loop items. +/// +/// @tparam T Base type of the calculation. +//------------------------------------------------------------------------------ +template void test_loops() { + auto a = graph::variable (1, ""); + backend::buffer buffer(1, static_cast (0)); + a->set(buffer); + + auto a_next = a + static_cast (1); + + workflow::manager work(0); + work.add_loop_item({ + graph::variable_cast(a) + }, {}, { + {a_next, graph::variable_cast(a)} + }, NULL, "test_maps", 1, 10); + + work.compile(); + + assert(work.check_value(0, a) == static_cast (0) && "Expected zero."); + work.run(); + assert(work.check_value(0, a) == static_cast (10) && "Expected ten."); +} + +//------------------------------------------------------------------------------ +/// @brief Run tests with a specified backend. +/// +/// @tparam T Base type of the calculation. +//------------------------------------------------------------------------------ +template void run_tests() { + test_maps (); + test_loops (); +} + +//------------------------------------------------------------------------------ +/// @brief Main program of the test. +/// +/// @param[in] argc Number of commandline arguments. +/// @param[in] argv Array of commandline arguments. +//------------------------------------------------------------------------------ +int main(int argc, const char * argv[]) { + (void)argc; + (void)argv; + run_tests (); + run_tests (); + run_tests> (); + run_tests> (); +} -- GitLab From d571235bdf955f6c12678a2144bc18142ae1370c Mon Sep 17 00:00:00 2001 From: cianciosa Date: Sat, 28 Mar 2026 16:39:39 -0400 Subject: [PATCH 2/2] Fix multithreading issue where multiple threads would try to access the same netcdf file at the same time. --- graph_docs/code_performance.dox | 8 ++++---- graph_framework.xcodeproj/project.pbxproj | 8 +++++--- graph_framework/absorption.hpp | 6 ++---- graph_framework/commandline_parser.hpp | 1 - graph_framework/equilibrium.hpp | 14 ++++++-------- 5 files changed, 17 insertions(+), 20 deletions(-) diff --git a/graph_docs/code_performance.dox b/graph_docs/code_performance.dox index ac08f5b..0fc897e 100644 --- a/graph_docs/code_performance.dox +++ b/graph_docs/code_performance.dox @@ -51,10 +51,10 @@ * JAX due to it's popularity, * and Kokkos for its performance * portability. Source codes for this benchmark case is available in the - * appendix. Figure \ref{fig:compare} shows the through put of pushing $10^{8}$ - * particles for $10^{3}$ time steps. The graph framework consistently shows the - * best throughput on both CPUs and GPUs. Note MLX CPU throughput could by - * improved by splitting the problem to multiple threads. + * appendix. Figure \ref{fig:compare} shows the through put of pushing + * @f$10^{8}@f$ particles for @f$10^{3}@f$ time steps. The graph framework + * consistently shows the best throughput on both CPUs and GPUs. Note MLX CPU + * throughput could by improved by splitting the problem to multiple threads. * * @subsection code_performance_comparison_codes Source codes for throughput benchmark comparison * @subsubsection code_performance_comparison_graph Graph Framework diff --git a/graph_framework.xcodeproj/project.pbxproj b/graph_framework.xcodeproj/project.pbxproj index 0d0f892..8afd529 100644 --- a/graph_framework.xcodeproj/project.pbxproj +++ b/graph_framework.xcodeproj/project.pbxproj @@ -2028,9 +2028,9 @@ "EFIT_FILE=\\\"/Users/m4c/Projects/graph_framework/graph_tests/efit.nc\\\"", "VMEC_FILE=\\\"/Users/m4c/Projects/graph_framework/graph_tests/vmec.nc\\\"", USE_METAL, - "\"CXX_ARGS=\\\"-I/Users/m4c/Projects/graph_framework/graph_framework -I/usr/local/include -I/Applications/Xcode.app/Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs/MacOSX.sdk/usr/include/c++/v1 -I/Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/lib/clang/17/include -I/Applications/Xcode.app/Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs/MacOSX.sdk/usr/include -I/Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/include -I/Applications/Xcode.app/Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs/MacOSX.sdk/System/Library/Frameworks -fgnuc-version=4.2.1 -std=gnu++2a\\\"\"", + "\"CXX_ARGS=\\\"-I/Users/m4c/Projects/graph_framework/graph_framework -I/usr/local/include -I/Applications/Xcode.app/Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs/MacOSX.sdk/usr/include/c++/v1 -I/Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/lib/clang/21/include -I/Applications/Xcode.app/Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs/MacOSX.sdk/usr/include -I/Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/include -I/Applications/Xcode.app/Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs/MacOSX.sdk/System/Library/Frameworks -fgnuc-version=4.2.1 -std=gnu++2a\\\"\"", STATIC, - "MACOS_LIB_RT=\\\"/Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/lib/clang/17.0.0/lib/darwin/libclang_rt.osx.a\\\"", + "MACOS_LIB_RT=\\\"/Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/lib/clang/21.0.0/lib/darwin/libclang_rt.osx.a\\\"", USE_INDEX_CACHE, "USE_VERBOSE=false", "$(inherited)", @@ -2204,8 +2204,10 @@ "EFIT_FILE=\\\"/Users/m4c/Projects/graph_framework/graph_tests/efit.nc\\\"", "VMEC_FILE=\\\"/Users/m4c/Projects/graph_framework/graph_tests/vmec.nc\\\"", USE_METAL, + "MACOS_LIB_RT=\\\"/Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/lib/clang/21.0.0/lib/darwin/libclang_rt.osx.a\\\"", "USE_VERBOSE=false", - "\"CXX_ARGS=\\\"-I/Users/m4c/Projects/graph_framework/graph_framework -I/usr/local/include -I/Applications/Xcode.app/Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs/MacOSX.sdk/usr/include/c++/v1 -I/Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/lib/clang/15.0.0/include -I/Applications/Xcode.app/Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs/MacOSX.sdk/usr/include -I/Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/include -I/Applications/Xcode.app/Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs/MacOSX.sdk/System/Library/Frameworks -fgnuc-version=4.2.1 -std=gnu++2a\\\"\"", + USE_INDEX_CACHE, + "\"CXX_ARGS=\\\"-I/Users/m4c/Projects/graph_framework/graph_framework -I/usr/local/include -I/Applications/Xcode.app/Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs/MacOSX.sdk/usr/include/c++/v1 -I/Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/lib/clang/21.0.0/include -I/Applications/Xcode.app/Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs/MacOSX.sdk/usr/include -I/Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/include -I/Applications/Xcode.app/Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs/MacOSX.sdk/System/Library/Frameworks -fgnuc-version=4.2.1 -std=gnu++2a\\\"\"", "$(inherited)", ); GCC_WARN_64_TO_32_BIT_CONVERSION = YES; diff --git a/graph_framework/absorption.hpp b/graph_framework/absorption.hpp index 9002afa..c7d7bd6 100644 --- a/graph_framework/absorption.hpp +++ b/graph_framework/absorption.hpp @@ -235,8 +235,7 @@ namespace absorption { {graph::zero (), graph::variable_cast(this->kamp)} }; - work.add_item(inputs, {}, setters, - graph::shared_random_state (), + work.add_item(inputs, {}, setters, NULL, "root_find_init_kernel", inputs.back()->size()); inputs.push_back(graph::variable_cast(this->t)); @@ -265,8 +264,7 @@ namespace absorption { setters = { {klen + kamp, graph::variable_cast(this->kamp)} }; - work.add_item(inputs, {}, setters, - graph::shared_random_state (), + work.add_item(inputs, {}, setters, NULL, "final_kamp", inputs.back()->size()); } diff --git a/graph_framework/commandline_parser.hpp b/graph_framework/commandline_parser.hpp index 1bda4c3..bd0fca9 100644 --- a/graph_framework/commandline_parser.hpp +++ b/graph_framework/commandline_parser.hpp @@ -100,7 +100,6 @@ namespace commandline { } } std::cout << std::endl; - sync.unlock(); exit(0); } diff --git a/graph_framework/equilibrium.hpp b/graph_framework/equilibrium.hpp index a35625a..9d03531 100644 --- a/graph_framework/equilibrium.hpp +++ b/graph_framework/equilibrium.hpp @@ -25,7 +25,7 @@ /// @subsection equilibrium_splines_1D Cubic Splines /// Cubic splines are 1D interpolation functions consisting of 4 coefficient /// arrays. They take the form of -/// @f{equation}{y\left(x\right)=C_{0} + C_{1}x + C_{2}x^2 + C_{3}x^2@f} +/// @f{equation}{y\left(x\right)=C_{0} + C_{1}x + C_{2}x^{2} + C_{3}x^{3}@f} /// where @f$x@f$ is a normalized radial index. Cubic splines coefficients can /// be calculated using /// Linear Solvers @@ -219,12 +219,10 @@ #include "math.hpp" #include "arithmetic.hpp" #include "newton.hpp" +#include "output.hpp" /// Name space for equilibrium models. namespace equilibrium { -/// Lock to synchronize netcdf across threads. - static std::mutex sync; - //****************************************************************************** // Equilibrium interface //****************************************************************************** @@ -1629,7 +1627,7 @@ namespace equilibrium { template shared make_efit(const std::string &spline_file) { int ncid; - sync.lock(); + output::sync.lock(); nc_open(spline_file.c_str(), NC_NOWRITE, &ncid); // Load scalar quantities. @@ -1794,7 +1792,7 @@ namespace equilibrium { nc_get_var(ncid, varid, ne_c3_buffer.data()); nc_close(ncid); - sync.unlock(); + output::sync.unlock(); auto rmin = static_cast (rmin_value); auto dr = static_cast (dr_value); @@ -2426,7 +2424,7 @@ namespace equilibrium { template shared make_vmec(const std::string &spline_file) { int ncid; - sync.lock(); + output::sync.lock(); nc_open(spline_file.c_str(), NC_NOWRITE, &ncid); // Load scalar quantities. @@ -2593,7 +2591,7 @@ namespace equilibrium { nc_get_var(ncid, varid, xn_buffer.data()); nc_close(ncid); - sync.unlock(); + output::sync.unlock(); auto sminf = static_cast (sminf_value); auto sminh = static_cast (sminh_value); -- GitLab