Loading graph_framework.xcodeproj/project.pbxproj +5 −0 Original line number Diff line number Diff line Loading @@ -1726,6 +1726,7 @@ "\"CXX_ARGS=\\\"-I/Users/m4c/Projects/graph_framework/graph_framework -I/usr/local/include -I/Applications/Xcode.app/Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs/MacOSX.sdk/usr/include/c++/v1 -I/Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/lib/clang/17/include -I/Applications/Xcode.app/Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs/MacOSX.sdk/usr/include -I/Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/include -I/Applications/Xcode.app/Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs/MacOSX.sdk/System/Library/Frameworks -fgnuc-version=4.2.1 -std=gnu++2a\\\"\"", STATIC, "MACOS_LIB_RT=\\\"/Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/lib/clang/17.0.0/lib/darwin/libclang_rt.osx.a\\\"", USE_INDEX_CACHE, "USE_VERBOSE=false", "$(inherited)", ); Loading Loading @@ -1792,6 +1793,7 @@ "-lLLVMFrontendDriver", "-lLLVMFrontendHLSL", "-lLLVMFrontendOpenMP", "-lLLVMFrontendDirective", "-lLLVMFrontendOffloading", "-lLLVMSelectionDAG", "-lLLVMProfileData", Loading @@ -1813,6 +1815,7 @@ "-lLLVMAArch64Info", "-lLLVMAArch64Desc", "-lLLVMAArch64AsmParser", "-lLLVMDebugInfoDWARFLowLevel", "-lLLVMAArch64CodeGen", "-lLLVMCGData", "-lLLVMSandboxIR", Loading Loading @@ -1958,6 +1961,7 @@ "-lLLVMFrontendDriver", "-lLLVMFrontendHLSL", "-lLLVMFrontendOpenMP", "-lLLVMFrontendDirective", "-lLLVMFrontendOffloading", "-lLLVMSelectionDAG", "-lLLVMProfileData", Loading @@ -1979,6 +1983,7 @@ "-lLLVMAArch64Info", "-lLLVMAArch64Desc", "-lLLVMAArch64AsmParser", "-lLLVMDebugInfoDWARFLowLevel", "-lLLVMAArch64CodeGen", "-lLLVMCGData", "-lLLVMSandboxIR", Loading graph_framework/metal_context.hpp +8 −22 Original line number Diff line number Diff line Loading @@ -29,8 +29,6 @@ namespace gpu { std::map<graph::leaf_node<float, SAFE_MATH> *, id<MTLBuffer>> kernel_arguments; /// Textures. std::map<void *, id<MTLTexture>> texture_arguments; /// Max Buffer. id<MTLBuffer> result; /// Metal command buffer. id<MTLCommandBuffer> command_buffer; /// Metal library. Loading Loading @@ -144,14 +142,14 @@ namespace gpu { backend::buffer<float> buffer = input->evaluate(); kernel_arguments[input.get()] = [device newBufferWithBytes:buffer.data() length:buffer.size()*buffer_element_size options:MTLResourceStorageModeManaged]; options:MTLResourceStorageModeShared]; } buffers.push_back(kernel_arguments[input.get()]); } for (graph::shared_leaf<float, SAFE_MATH> &output : outputs) { if (!kernel_arguments.contains(output.get())) { kernel_arguments[output.get()] = [device newBufferWithLength:num_rays*sizeof(float) options:MTLResourceStorageModeManaged]; options:MTLResourceStorageModeShared]; } buffers.push_back(kernel_arguments[output.get()]); } Loading @@ -160,7 +158,7 @@ namespace gpu { kernel_arguments[state.get()] = [device newBufferWithBytes:state->data() length:state->get_size_bytes() options:MTLResourceCPUCacheModeWriteCombined | MTLResourceStorageModeManaged | MTLResourceStorageModeShared | MTLResourceHazardTrackingModeUntracked]; } buffers.push_back(kernel_arguments[state.get()]); Loading Loading @@ -302,7 +300,7 @@ namespace gpu { } id<MTLBuffer> result = [device newBufferWithLength:sizeof(float) options:MTLResourceStorageModeManaged]; options:MTLResourceStorageModeShared]; id<MTLBuffer> buffer = kernel_arguments[argument.get()]; Loading @@ -329,14 +327,10 @@ namespace gpu { threadsPerThreadgroup:MTLSizeMake(1024, 1, 1)]; [encoder endEncoding]; id<MTLBlitCommandEncoder> blit = [command_buffer blitCommandEncoder]; [blit synchronizeResource:result]; [blit endEncoding]; [command_buffer commit]; [command_buffer waitUntilCompleted]; return static_cast<float *> ([result contents])[0]; return static_cast<float *> (result.contents)[0]; }; } Loading @@ -355,11 +349,6 @@ namespace gpu { //------------------------------------------------------------------------------ void wait() { command_buffer = [queue commandBuffer]; id<MTLBlitCommandEncoder> blit = [command_buffer blitCommandEncoder]; for (const auto &[key, value] : kernel_arguments) { [blit synchronizeResource:value]; } [blit endEncoding]; [command_buffer commit]; [command_buffer waitUntilCompleted]; Loading Loading @@ -416,16 +405,13 @@ namespace gpu { void copy_to_host(graph::shared_leaf<float, SAFE_MATH> node, float *destination) { command_buffer = [queue commandBuffer]; id<MTLBlitCommandEncoder> blit = [command_buffer blitCommandEncoder]; [blit synchronizeResource:kernel_arguments[node.get()]]; [blit endEncoding]; [command_buffer commit]; [command_buffer waitUntilCompleted]; memcpy(destination, [kernel_arguments[node.get()] contents], [kernel_arguments[node.get()] length]); kernel_arguments[node.get()].contents, kernel_arguments[node.get()].length); } //------------------------------------------------------------------------------ Loading Loading @@ -618,7 +604,7 @@ namespace gpu { const size_t size) { source_buffer << std::endl; source_buffer << "kernel void max_reduction(" << std::endl; source_buffer << " device float *input [[buffer(0)]]," << std::endl; source_buffer << " constant float *input [[buffer(0)]]," << std::endl; source_buffer << " device float *result [[buffer(1)]]," << std::endl; source_buffer << " uint i [[thread_position_in_grid]]," << std::endl; source_buffer << " uint j [[simdgroup_index_in_threadgroup]]," << std::endl; Loading graph_framework/piecewise.hpp +17 −20 Original line number Diff line number Diff line Loading @@ -968,14 +968,17 @@ void compile_index(std::ostringstream &stream, const size_t length = leaf_node<T, SAFE_MATH>::caches.backends[data_hash].size(); const size_t num_rows = length/num_columns; #ifdef USE_INDEX_CACHE if (indices.find(this->left.get()) == indices.end()) { #endif shared_leaf<T, SAFE_MATH> x = this->left->compile(stream, registers, indices, usage); shared_leaf<T, SAFE_MATH> y = this->right->compile(stream, registers, indices, usage); #ifdef USE_INDEX_CACHE if (indices.find(x.get()) == indices.end()) { indices[x.get()] = jit::to_string('i', x.get()); stream << " const " << jit::smallest_int_type<T> (num_rows) << " " Loading @@ -984,13 +987,7 @@ void compile_index(std::ostringstream &stream, x_scale, x_offset); x->endline(stream, usage); } if (indices.find(this->right.get()) == indices.end()) { #endif shared_leaf<T, SAFE_MATH> y = this->right->compile(stream, registers, indices, usage); #ifdef USE_INDEX_CACHE if (indices.find(y.get()) == indices.end()) { indices[y.get()] = jit::to_string('i', y.get()); stream << " const " << jit::smallest_int_type<T> (num_columns) << " " Loading @@ -1012,9 +1009,9 @@ void compile_index(std::ostringstream &stream, stream << " const " << jit::smallest_int_type<T> (length) << " " << indices[temp.get()] << " = " << indices[this->left.get()] << indices[x.get()] << "*" << num_columns << " + " << indices[this->right.get()] << indices[y.get()] << ";" << std::endl; } } Loading Loading @@ -1048,9 +1045,9 @@ void compile_index(std::ostringstream &stream, << jit::smallest_int_type<T> (std::max(num_rows, num_columns)) << "2(" << indices[this->right.get()] << indices[y.get()] << "," << indices[this->left.get()] << indices[x.get()] << ")).r"; #else stream << ".read(uint2("; Loading @@ -1065,9 +1062,9 @@ void compile_index(std::ostringstream &stream, } else if constexpr (jit::use_cuda()) { #ifdef USE_INDEX_CACHE stream << ", " << indices[this->right.get()] << indices[y.get()] << ", " << indices[this->left.get()]; << indices[x.get()]; #else stream << ", "; compile_index<T> (stream, registers[y.get()], num_columns, Loading graph_framework/workflow.hpp +3 −3 File changed.Contains only whitespace changes. Show changes Loading
graph_framework.xcodeproj/project.pbxproj +5 −0 Original line number Diff line number Diff line Loading @@ -1726,6 +1726,7 @@ "\"CXX_ARGS=\\\"-I/Users/m4c/Projects/graph_framework/graph_framework -I/usr/local/include -I/Applications/Xcode.app/Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs/MacOSX.sdk/usr/include/c++/v1 -I/Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/lib/clang/17/include -I/Applications/Xcode.app/Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs/MacOSX.sdk/usr/include -I/Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/include -I/Applications/Xcode.app/Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs/MacOSX.sdk/System/Library/Frameworks -fgnuc-version=4.2.1 -std=gnu++2a\\\"\"", STATIC, "MACOS_LIB_RT=\\\"/Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/lib/clang/17.0.0/lib/darwin/libclang_rt.osx.a\\\"", USE_INDEX_CACHE, "USE_VERBOSE=false", "$(inherited)", ); Loading Loading @@ -1792,6 +1793,7 @@ "-lLLVMFrontendDriver", "-lLLVMFrontendHLSL", "-lLLVMFrontendOpenMP", "-lLLVMFrontendDirective", "-lLLVMFrontendOffloading", "-lLLVMSelectionDAG", "-lLLVMProfileData", Loading @@ -1813,6 +1815,7 @@ "-lLLVMAArch64Info", "-lLLVMAArch64Desc", "-lLLVMAArch64AsmParser", "-lLLVMDebugInfoDWARFLowLevel", "-lLLVMAArch64CodeGen", "-lLLVMCGData", "-lLLVMSandboxIR", Loading Loading @@ -1958,6 +1961,7 @@ "-lLLVMFrontendDriver", "-lLLVMFrontendHLSL", "-lLLVMFrontendOpenMP", "-lLLVMFrontendDirective", "-lLLVMFrontendOffloading", "-lLLVMSelectionDAG", "-lLLVMProfileData", Loading @@ -1979,6 +1983,7 @@ "-lLLVMAArch64Info", "-lLLVMAArch64Desc", "-lLLVMAArch64AsmParser", "-lLLVMDebugInfoDWARFLowLevel", "-lLLVMAArch64CodeGen", "-lLLVMCGData", "-lLLVMSandboxIR", Loading
graph_framework/metal_context.hpp +8 −22 Original line number Diff line number Diff line Loading @@ -29,8 +29,6 @@ namespace gpu { std::map<graph::leaf_node<float, SAFE_MATH> *, id<MTLBuffer>> kernel_arguments; /// Textures. std::map<void *, id<MTLTexture>> texture_arguments; /// Max Buffer. id<MTLBuffer> result; /// Metal command buffer. id<MTLCommandBuffer> command_buffer; /// Metal library. Loading Loading @@ -144,14 +142,14 @@ namespace gpu { backend::buffer<float> buffer = input->evaluate(); kernel_arguments[input.get()] = [device newBufferWithBytes:buffer.data() length:buffer.size()*buffer_element_size options:MTLResourceStorageModeManaged]; options:MTLResourceStorageModeShared]; } buffers.push_back(kernel_arguments[input.get()]); } for (graph::shared_leaf<float, SAFE_MATH> &output : outputs) { if (!kernel_arguments.contains(output.get())) { kernel_arguments[output.get()] = [device newBufferWithLength:num_rays*sizeof(float) options:MTLResourceStorageModeManaged]; options:MTLResourceStorageModeShared]; } buffers.push_back(kernel_arguments[output.get()]); } Loading @@ -160,7 +158,7 @@ namespace gpu { kernel_arguments[state.get()] = [device newBufferWithBytes:state->data() length:state->get_size_bytes() options:MTLResourceCPUCacheModeWriteCombined | MTLResourceStorageModeManaged | MTLResourceStorageModeShared | MTLResourceHazardTrackingModeUntracked]; } buffers.push_back(kernel_arguments[state.get()]); Loading Loading @@ -302,7 +300,7 @@ namespace gpu { } id<MTLBuffer> result = [device newBufferWithLength:sizeof(float) options:MTLResourceStorageModeManaged]; options:MTLResourceStorageModeShared]; id<MTLBuffer> buffer = kernel_arguments[argument.get()]; Loading @@ -329,14 +327,10 @@ namespace gpu { threadsPerThreadgroup:MTLSizeMake(1024, 1, 1)]; [encoder endEncoding]; id<MTLBlitCommandEncoder> blit = [command_buffer blitCommandEncoder]; [blit synchronizeResource:result]; [blit endEncoding]; [command_buffer commit]; [command_buffer waitUntilCompleted]; return static_cast<float *> ([result contents])[0]; return static_cast<float *> (result.contents)[0]; }; } Loading @@ -355,11 +349,6 @@ namespace gpu { //------------------------------------------------------------------------------ void wait() { command_buffer = [queue commandBuffer]; id<MTLBlitCommandEncoder> blit = [command_buffer blitCommandEncoder]; for (const auto &[key, value] : kernel_arguments) { [blit synchronizeResource:value]; } [blit endEncoding]; [command_buffer commit]; [command_buffer waitUntilCompleted]; Loading Loading @@ -416,16 +405,13 @@ namespace gpu { void copy_to_host(graph::shared_leaf<float, SAFE_MATH> node, float *destination) { command_buffer = [queue commandBuffer]; id<MTLBlitCommandEncoder> blit = [command_buffer blitCommandEncoder]; [blit synchronizeResource:kernel_arguments[node.get()]]; [blit endEncoding]; [command_buffer commit]; [command_buffer waitUntilCompleted]; memcpy(destination, [kernel_arguments[node.get()] contents], [kernel_arguments[node.get()] length]); kernel_arguments[node.get()].contents, kernel_arguments[node.get()].length); } //------------------------------------------------------------------------------ Loading Loading @@ -618,7 +604,7 @@ namespace gpu { const size_t size) { source_buffer << std::endl; source_buffer << "kernel void max_reduction(" << std::endl; source_buffer << " device float *input [[buffer(0)]]," << std::endl; source_buffer << " constant float *input [[buffer(0)]]," << std::endl; source_buffer << " device float *result [[buffer(1)]]," << std::endl; source_buffer << " uint i [[thread_position_in_grid]]," << std::endl; source_buffer << " uint j [[simdgroup_index_in_threadgroup]]," << std::endl; Loading
graph_framework/piecewise.hpp +17 −20 Original line number Diff line number Diff line Loading @@ -968,14 +968,17 @@ void compile_index(std::ostringstream &stream, const size_t length = leaf_node<T, SAFE_MATH>::caches.backends[data_hash].size(); const size_t num_rows = length/num_columns; #ifdef USE_INDEX_CACHE if (indices.find(this->left.get()) == indices.end()) { #endif shared_leaf<T, SAFE_MATH> x = this->left->compile(stream, registers, indices, usage); shared_leaf<T, SAFE_MATH> y = this->right->compile(stream, registers, indices, usage); #ifdef USE_INDEX_CACHE if (indices.find(x.get()) == indices.end()) { indices[x.get()] = jit::to_string('i', x.get()); stream << " const " << jit::smallest_int_type<T> (num_rows) << " " Loading @@ -984,13 +987,7 @@ void compile_index(std::ostringstream &stream, x_scale, x_offset); x->endline(stream, usage); } if (indices.find(this->right.get()) == indices.end()) { #endif shared_leaf<T, SAFE_MATH> y = this->right->compile(stream, registers, indices, usage); #ifdef USE_INDEX_CACHE if (indices.find(y.get()) == indices.end()) { indices[y.get()] = jit::to_string('i', y.get()); stream << " const " << jit::smallest_int_type<T> (num_columns) << " " Loading @@ -1012,9 +1009,9 @@ void compile_index(std::ostringstream &stream, stream << " const " << jit::smallest_int_type<T> (length) << " " << indices[temp.get()] << " = " << indices[this->left.get()] << indices[x.get()] << "*" << num_columns << " + " << indices[this->right.get()] << indices[y.get()] << ";" << std::endl; } } Loading Loading @@ -1048,9 +1045,9 @@ void compile_index(std::ostringstream &stream, << jit::smallest_int_type<T> (std::max(num_rows, num_columns)) << "2(" << indices[this->right.get()] << indices[y.get()] << "," << indices[this->left.get()] << indices[x.get()] << ")).r"; #else stream << ".read(uint2("; Loading @@ -1065,9 +1062,9 @@ void compile_index(std::ostringstream &stream, } else if constexpr (jit::use_cuda()) { #ifdef USE_INDEX_CACHE stream << ", " << indices[this->right.get()] << indices[y.get()] << ", " << indices[this->left.get()]; << indices[x.get()]; #else stream << ", "; compile_index<T> (stream, registers[y.get()], num_columns, Loading