Merge branch 'inderect_command_buffer' into 'main' (096987ac) · Commits · Cianciosa, Mark / graph_framework

graph_framework.xcodeproj/project.pbxproj

+5 −0

Original line number	Diff line number	Diff line
		@@ -1726,6 +1726,7 @@
		"\"CXX_ARGS=\\\"-I/Users/m4c/Projects/graph_framework/graph_framework -I/usr/local/include -I/Applications/Xcode.app/Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs/MacOSX.sdk/usr/include/c++/v1 -I/Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/lib/clang/17/include -I/Applications/Xcode.app/Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs/MacOSX.sdk/usr/include -I/Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/include -I/Applications/Xcode.app/Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs/MacOSX.sdk/System/Library/Frameworks -fgnuc-version=4.2.1 -std=gnu++2a\\\"\"",
		STATIC,
		"MACOS_LIB_RT=\\\"/Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/lib/clang/17.0.0/lib/darwin/libclang_rt.osx.a\\\"",
		USE_INDEX_CACHE,
		"USE_VERBOSE=false",
		"$(inherited)",
		);
		@@ -1792,6 +1793,7 @@
		"-lLLVMFrontendDriver",
		"-lLLVMFrontendHLSL",
		"-lLLVMFrontendOpenMP",
		"-lLLVMFrontendDirective",
		"-lLLVMFrontendOffloading",
		"-lLLVMSelectionDAG",
		"-lLLVMProfileData",
		@@ -1813,6 +1815,7 @@
		"-lLLVMAArch64Info",
		"-lLLVMAArch64Desc",
		"-lLLVMAArch64AsmParser",
		"-lLLVMDebugInfoDWARFLowLevel",
		"-lLLVMAArch64CodeGen",
		"-lLLVMCGData",
		"-lLLVMSandboxIR",
		@@ -1958,6 +1961,7 @@
		"-lLLVMFrontendDriver",
		"-lLLVMFrontendHLSL",
		"-lLLVMFrontendOpenMP",
		"-lLLVMFrontendDirective",
		"-lLLVMFrontendOffloading",
		"-lLLVMSelectionDAG",
		"-lLLVMProfileData",
		@@ -1979,6 +1983,7 @@
		"-lLLVMAArch64Info",
		"-lLLVMAArch64Desc",
		"-lLLVMAArch64AsmParser",
		"-lLLVMDebugInfoDWARFLowLevel",
		"-lLLVMAArch64CodeGen",
		"-lLLVMCGData",
		"-lLLVMSandboxIR",

graph_framework/metal_context.hpp

+8 −22

Original line number	Diff line number	Diff line
		@@ -29,8 +29,6 @@ namespace gpu {
		std::map<graph::leaf_node<float, SAFE_MATH> *, id<MTLBuffer>> kernel_arguments;
		/// Textures.
		std::map<void *, id<MTLTexture>> texture_arguments;
		/// Max Buffer.
		id<MTLBuffer> result;
		/// Metal command buffer.
		id<MTLCommandBuffer> command_buffer;
		/// Metal library.
		@@ -144,14 +142,14 @@ namespace gpu {
		backend::buffer<float> buffer = input->evaluate();
		kernel_arguments[input.get()] = [device newBufferWithBytes:buffer.data()
		length:buffer.size()*buffer_element_size
		options:MTLResourceStorageModeManaged];
		options:MTLResourceStorageModeShared];
		}
		buffers.push_back(kernel_arguments[input.get()]);
		}
		for (graph::shared_leaf<float, SAFE_MATH> &output : outputs) {
		if (!kernel_arguments.contains(output.get())) {
		kernel_arguments[output.get()] = [device newBufferWithLength:num_rays*sizeof(float)
		options:MTLResourceStorageModeManaged];
		options:MTLResourceStorageModeShared];
		}
		buffers.push_back(kernel_arguments[output.get()]);
		}
		@@ -160,7 +158,7 @@ namespace gpu {
		kernel_arguments[state.get()] = [device newBufferWithBytes:state->data()
		length:state->get_size_bytes()
		options:MTLResourceCPUCacheModeWriteCombined \|
		MTLResourceStorageModeManaged \|
		MTLResourceStorageModeShared \|
		MTLResourceHazardTrackingModeUntracked];
		}
		buffers.push_back(kernel_arguments[state.get()]);
		@@ -302,7 +300,7 @@ namespace gpu {
		}

		id<MTLBuffer> result = [device newBufferWithLength:sizeof(float)
		options:MTLResourceStorageModeManaged];
		options:MTLResourceStorageModeShared];

		id<MTLBuffer> buffer = kernel_arguments[argument.get()];

		@@ -329,14 +327,10 @@ namespace gpu {
		threadsPerThreadgroup:MTLSizeMake(1024, 1, 1)];
		[encoder endEncoding];

		id<MTLBlitCommandEncoder> blit = [command_buffer blitCommandEncoder];
		[blit synchronizeResource:result];
		[blit endEncoding];

		[command_buffer commit];
		[command_buffer waitUntilCompleted];

		return static_cast<float *> ([result contents])[0];
		return static_cast<float *> (result.contents)[0];
		};
		}

		@@ -355,11 +349,6 @@ namespace gpu {
		//------------------------------------------------------------------------------
		void wait() {
		command_buffer = [queue commandBuffer];
		id<MTLBlitCommandEncoder> blit = [command_buffer blitCommandEncoder];
		for (const auto &[key, value] : kernel_arguments) {
		[blit synchronizeResource:value];
		}
		[blit endEncoding];

		[command_buffer commit];
		[command_buffer waitUntilCompleted];
		@@ -416,16 +405,13 @@ namespace gpu {
		void copy_to_host(graph::shared_leaf<float, SAFE_MATH> node,
		float *destination) {
		command_buffer = [queue commandBuffer];
		id<MTLBlitCommandEncoder> blit = [command_buffer blitCommandEncoder];
		[blit synchronizeResource:kernel_arguments[node.get()]];
		[blit endEncoding];

		[command_buffer commit];
		[command_buffer waitUntilCompleted];

		memcpy(destination,
		[kernel_arguments[node.get()] contents],
		[kernel_arguments[node.get()] length]);
		kernel_arguments[node.get()].contents,
		kernel_arguments[node.get()].length);
		}

		//------------------------------------------------------------------------------
		@@ -618,7 +604,7 @@ namespace gpu {
		const size_t size) {
		source_buffer << std::endl;
		source_buffer << "kernel void max_reduction(" << std::endl;
		source_buffer << " device float *input [[buffer(0)]]," << std::endl;
		source_buffer << " constant float *input [[buffer(0)]]," << std::endl;
		source_buffer << " device float *result [[buffer(1)]]," << std::endl;
		source_buffer << " uint i [[thread_position_in_grid]]," << std::endl;
		source_buffer << " uint j [[simdgroup_index_in_threadgroup]]," << std::endl;

graph_framework/piecewise.hpp

+17 −20

Original line number	Diff line number	Diff line
		@@ -968,14 +968,17 @@ void compile_index(std::ostringstream &stream,
		const size_t length = leaf_node<T, SAFE_MATH>::caches.backends[data_hash].size();
		const size_t num_rows = length/num_columns;

		#ifdef USE_INDEX_CACHE
		if (indices.find(this->left.get()) == indices.end()) {
		#endif
		shared_leaf<T, SAFE_MATH> x = this->left->compile(stream,
		registers,
		indices,
		usage);
		shared_leaf<T, SAFE_MATH> y = this->right->compile(stream,
		registers,
		indices,
		usage);

		#ifdef USE_INDEX_CACHE
		if (indices.find(x.get()) == indices.end()) {
		indices[x.get()] = jit::to_string('i', x.get());
		stream << " const "
		<< jit::smallest_int_type<T> (num_rows) << " "
		@@ -984,13 +987,7 @@ void compile_index(std::ostringstream &stream,
		x_scale, x_offset);
		x->endline(stream, usage);
		}
		if (indices.find(this->right.get()) == indices.end()) {
		#endif
		shared_leaf<T, SAFE_MATH> y = this->right->compile(stream,
		registers,
		indices,
		usage);
		#ifdef USE_INDEX_CACHE
		if (indices.find(y.get()) == indices.end()) {
		indices[y.get()] = jit::to_string('i', y.get());
		stream << " const "
		<< jit::smallest_int_type<T> (num_columns) << " "
		@@ -1012,9 +1009,9 @@ void compile_index(std::ostringstream &stream,
		stream << " const "
		<< jit::smallest_int_type<T> (length) << " "
		<< indices[temp.get()] << " = "
		<< indices[this->left.get()]
		<< indices[x.get()]
		<< "*" << num_columns << " + "
		<< indices[this->right.get()]
		<< indices[y.get()]
		<< ";" << std::endl;
		}
		}
		@@ -1048,9 +1045,9 @@ void compile_index(std::ostringstream &stream,
		<< jit::smallest_int_type<T> (std::max(num_rows,
		num_columns))
		<< "2("
		<< indices[this->right.get()]
		<< indices[y.get()]
		<< ","
		<< indices[this->left.get()]
		<< indices[x.get()]
		<< ")).r";
		#else
		stream << ".read(uint2(";
		@@ -1065,9 +1062,9 @@ void compile_index(std::ostringstream &stream,
		} else if constexpr (jit::use_cuda()) {
		#ifdef USE_INDEX_CACHE
		stream << ", "
		<< indices[this->right.get()]
		<< indices[y.get()]
		<< ", "
		<< indices[this->left.get()];
		<< indices[x.get()];
		#else
		stream << ", ";
		compile_index<T> (stream, registers[y.get()], num_columns,

graph_framework/workflow.hpp

+3 −3

File changed.

Contains only whitespace changes.