Commit 096987ac authored by Cianciosa, Mark's avatar Cianciosa, Mark
Browse files

Merge branch 'inderect_command_buffer' into 'main'

Inderect command buffer

See merge request !75
parents cd2b4ef7 ac1b354e
Loading
Loading
Loading
Loading
+5 −0
Original line number Diff line number Diff line
@@ -1726,6 +1726,7 @@
					"\"CXX_ARGS=\\\"-I/Users/m4c/Projects/graph_framework/graph_framework -I/usr/local/include -I/Applications/Xcode.app/Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs/MacOSX.sdk/usr/include/c++/v1 -I/Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/lib/clang/17/include -I/Applications/Xcode.app/Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs/MacOSX.sdk/usr/include -I/Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/include -I/Applications/Xcode.app/Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs/MacOSX.sdk/System/Library/Frameworks -fgnuc-version=4.2.1 -std=gnu++2a\\\"\"",
					STATIC,
					"MACOS_LIB_RT=\\\"/Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/lib/clang/17.0.0/lib/darwin/libclang_rt.osx.a\\\"",
					USE_INDEX_CACHE,
					"USE_VERBOSE=false",
					"$(inherited)",
				);
@@ -1792,6 +1793,7 @@
					"-lLLVMFrontendDriver",
					"-lLLVMFrontendHLSL",
					"-lLLVMFrontendOpenMP",
					"-lLLVMFrontendDirective",
					"-lLLVMFrontendOffloading",
					"-lLLVMSelectionDAG",
					"-lLLVMProfileData",
@@ -1813,6 +1815,7 @@
					"-lLLVMAArch64Info",
					"-lLLVMAArch64Desc",
					"-lLLVMAArch64AsmParser",
					"-lLLVMDebugInfoDWARFLowLevel",
					"-lLLVMAArch64CodeGen",
					"-lLLVMCGData",
					"-lLLVMSandboxIR",
@@ -1958,6 +1961,7 @@
					"-lLLVMFrontendDriver",
					"-lLLVMFrontendHLSL",
					"-lLLVMFrontendOpenMP",
					"-lLLVMFrontendDirective",
					"-lLLVMFrontendOffloading",
					"-lLLVMSelectionDAG",
					"-lLLVMProfileData",
@@ -1979,6 +1983,7 @@
					"-lLLVMAArch64Info",
					"-lLLVMAArch64Desc",
					"-lLLVMAArch64AsmParser",
					"-lLLVMDebugInfoDWARFLowLevel",
					"-lLLVMAArch64CodeGen",
					"-lLLVMCGData",
					"-lLLVMSandboxIR",
+8 −22
Original line number Diff line number Diff line
@@ -29,8 +29,6 @@ namespace gpu {
        std::map<graph::leaf_node<float, SAFE_MATH> *, id<MTLBuffer>> kernel_arguments;
///  Textures.
        std::map<void *, id<MTLTexture>> texture_arguments;
///  Max Buffer.
        id<MTLBuffer> result;
///  Metal command buffer.
        id<MTLCommandBuffer> command_buffer;
///  Metal library.
@@ -144,14 +142,14 @@ namespace gpu {
                    backend::buffer<float> buffer = input->evaluate();
                    kernel_arguments[input.get()] = [device newBufferWithBytes:buffer.data()
                                                                        length:buffer.size()*buffer_element_size
                                                                       options:MTLResourceStorageModeManaged];
                                                                       options:MTLResourceStorageModeShared];
                }
                buffers.push_back(kernel_arguments[input.get()]);
            }
            for (graph::shared_leaf<float, SAFE_MATH> &output : outputs) {
                if (!kernel_arguments.contains(output.get())) {
                    kernel_arguments[output.get()] = [device newBufferWithLength:num_rays*sizeof(float)
                                                                         options:MTLResourceStorageModeManaged];
                                                                         options:MTLResourceStorageModeShared];
                }
                buffers.push_back(kernel_arguments[output.get()]);
            }
@@ -160,7 +158,7 @@ namespace gpu {
                    kernel_arguments[state.get()] = [device newBufferWithBytes:state->data()
                                                                        length:state->get_size_bytes()
                                                                       options:MTLResourceCPUCacheModeWriteCombined |
                                                                               MTLResourceStorageModeManaged        |
                                                                               MTLResourceStorageModeShared         |
                                                                               MTLResourceHazardTrackingModeUntracked];
                }
                buffers.push_back(kernel_arguments[state.get()]);
@@ -302,7 +300,7 @@ namespace gpu {
            }

            id<MTLBuffer> result = [device newBufferWithLength:sizeof(float)
                                                       options:MTLResourceStorageModeManaged];
                                                       options:MTLResourceStorageModeShared];

            id<MTLBuffer> buffer = kernel_arguments[argument.get()];

@@ -329,14 +327,10 @@ namespace gpu {
                        threadsPerThreadgroup:MTLSizeMake(1024, 1, 1)];
                [encoder endEncoding];

                id<MTLBlitCommandEncoder> blit = [command_buffer blitCommandEncoder];
                [blit synchronizeResource:result];
                [blit endEncoding];

                [command_buffer commit];
                [command_buffer waitUntilCompleted];

                return static_cast<float *> ([result contents])[0];
                return static_cast<float *> (result.contents)[0];
            };
        }

@@ -355,11 +349,6 @@ namespace gpu {
//------------------------------------------------------------------------------
        void wait() {
            command_buffer = [queue commandBuffer];
            id<MTLBlitCommandEncoder> blit = [command_buffer blitCommandEncoder];
            for (const auto &[key, value] : kernel_arguments) {
                [blit synchronizeResource:value];
            }
            [blit endEncoding];

            [command_buffer commit];
            [command_buffer waitUntilCompleted];
@@ -416,16 +405,13 @@ namespace gpu {
        void copy_to_host(graph::shared_leaf<float, SAFE_MATH> node,
                          float *destination) {
            command_buffer = [queue commandBuffer];
            id<MTLBlitCommandEncoder> blit = [command_buffer blitCommandEncoder];
            [blit synchronizeResource:kernel_arguments[node.get()]];
            [blit endEncoding];

            [command_buffer commit];
            [command_buffer waitUntilCompleted];

            memcpy(destination,
                   [kernel_arguments[node.get()] contents],
                   [kernel_arguments[node.get()] length]);
                   kernel_arguments[node.get()].contents,
                   kernel_arguments[node.get()].length);
        }

//------------------------------------------------------------------------------
@@ -618,7 +604,7 @@ namespace gpu {
                              const size_t size) {
            source_buffer << std::endl;
            source_buffer << "kernel void max_reduction(" << std::endl;
            source_buffer << "    device float *input [[buffer(0)]]," << std::endl;
            source_buffer << "    constant float *input [[buffer(0)]]," << std::endl;
            source_buffer << "    device float *result [[buffer(1)]]," << std::endl;
            source_buffer << "    uint i [[thread_position_in_grid]]," << std::endl;
            source_buffer << "    uint j [[simdgroup_index_in_threadgroup]]," << std::endl;
+17 −20
Original line number Diff line number Diff line
@@ -968,14 +968,17 @@ void compile_index(std::ostringstream &stream,
                const size_t length = leaf_node<T, SAFE_MATH>::caches.backends[data_hash].size();
                const size_t num_rows = length/num_columns;

#ifdef USE_INDEX_CACHE
                if (indices.find(this->left.get()) == indices.end()) {
#endif
                shared_leaf<T, SAFE_MATH> x = this->left->compile(stream,
                                                                  registers,
                                                                  indices,
                                                                  usage);
                shared_leaf<T, SAFE_MATH> y = this->right->compile(stream,
                                                                   registers,
                                                                   indices,
                                                                   usage);

#ifdef USE_INDEX_CACHE
                if (indices.find(x.get()) == indices.end()) {
                    indices[x.get()] = jit::to_string('i', x.get());
                    stream << "        const "
                           << jit::smallest_int_type<T> (num_rows) << " "
@@ -984,13 +987,7 @@ void compile_index(std::ostringstream &stream,
                                      x_scale, x_offset);
                    x->endline(stream, usage);
                }
                if (indices.find(this->right.get()) == indices.end()) {
#endif
                    shared_leaf<T, SAFE_MATH> y = this->right->compile(stream,
                                                                       registers,
                                                                       indices,
                                                                       usage);
#ifdef USE_INDEX_CACHE
                if (indices.find(y.get()) == indices.end()) {
                    indices[y.get()] = jit::to_string('i', y.get());
                    stream << "        const "
                           << jit::smallest_int_type<T> (num_columns) << " "
@@ -1012,9 +1009,9 @@ void compile_index(std::ostringstream &stream,
                        stream << "        const "
                               << jit::smallest_int_type<T> (length) << " "
                               << indices[temp.get()] << " = "
                               << indices[this->left.get()]
                               << indices[x.get()]
                               << "*" << num_columns << " + "
                               << indices[this->right.get()]
                               << indices[y.get()]
                               << ";" << std::endl;
                    }
                }
@@ -1048,9 +1045,9 @@ void compile_index(std::ostringstream &stream,
                           << jit::smallest_int_type<T> (std::max(num_rows,
                                                                  num_columns))
                           << "2("
                           << indices[this->right.get()]
                           << indices[y.get()]
                           << ","
                           << indices[this->left.get()]
                           << indices[x.get()]
                           << ")).r";
#else
                    stream << ".read(uint2(";
@@ -1065,9 +1062,9 @@ void compile_index(std::ostringstream &stream,
                } else if constexpr (jit::use_cuda()) {
#ifdef USE_INDEX_CACHE
                    stream << ", "
                           << indices[this->right.get()]
                           << indices[y.get()]
                           << ", "
                           << indices[this->left.get()];
                           << indices[x.get()];
#else
                    stream << ", ";
                    compile_index<T> (stream, registers[y.get()], num_columns,
+3 −3

File changed.

Contains only whitespace changes.