Loading include/dca/linalg/matrix.hpp +5 −25 Original line number Diff line number Diff line Loading @@ -199,22 +199,15 @@ public: template <DeviceType rhs_device_name> void set(const Matrix<ScalarType, rhs_device_name>& rhs, int thread_id, int stream_id); #ifdef DCA_HAVE_CUDA // Asynchronous assignment. template <DeviceType rhs_device_name> void setAsync(const Matrix<ScalarType, rhs_device_name>& rhs, cudaStream_t stream); void setAsync(const Matrix<ScalarType, rhs_device_name>& rhs, const util::CudaStream& stream); // Asynchronous assignment (copy with stream = getStream(thread_id, stream_id)) template <DeviceType rhs_device_name> void setAsync(const Matrix<ScalarType, rhs_device_name>& rhs, int thread_id, int stream_id); void setToZero(cudaStream_t stream); #else // Synchronous assignment fallback for SetAsync. template <DeviceType rhs_device_name> void setAsync(const Matrix<ScalarType, rhs_device_name>& rhs, int thread_id, int stream_id); #endif // DCA_HAVE_CUDA void setToZero(const util::CudaStream& stream); // Prints the values of the matrix elements. void print() const; Loading Loading @@ -429,12 +422,10 @@ void Matrix<ScalarType, device_name>::set(const Matrix<ScalarType, rhs_device_na stream_id); } #ifdef DCA_HAVE_CUDA template <typename ScalarType, DeviceType device_name> template <DeviceType rhs_device_name> void Matrix<ScalarType, device_name>::setAsync(const Matrix<ScalarType, rhs_device_name>& rhs, const cudaStream_t stream) { const util::CudaStream& stream) { resizeNoCopy(rhs.size_); util::memoryCopyAsync(data_, leadingDimension(), rhs.data_, rhs.leadingDimension(), size_, stream); } Loading @@ -447,21 +438,10 @@ void Matrix<ScalarType, device_name>::setAsync(const Matrix<ScalarType, rhs_devi } template <typename ScalarType, DeviceType device_name> void Matrix<ScalarType, device_name>::setToZero(cudaStream_t stream) { cudaMemsetAsync(data_, 0, leadingDimension() * nrCols() * sizeof(ScalarType), stream); void Matrix<ScalarType, device_name>::setToZero(const util::CudaStream& stream) { util::Memory<device_name>::setToZeroAsync(data_, leadingDimension() * nrCols(), stream); } #else template <typename ScalarType, DeviceType device_name> template <DeviceType rhs_device_name> void Matrix<ScalarType, device_name>::setAsync(const Matrix<ScalarType, rhs_device_name>& rhs, int /*thread_id*/, int /*stream_id*/) { set(rhs); } #endif // DCA_HAVE_CUDA template <typename ScalarType, DeviceType device_name> void Matrix<ScalarType, device_name>::print() const { if (device_name == GPU) Loading test/unit/linalg/matrix_cpu_gpu_test.cpp +3 −5 Original line number Diff line number Diff line Loading @@ -220,14 +220,12 @@ TEST(MatrixCPUGPUTest, SetAsync) { auto el_value = [](int i, int j) { return 3 * i - 2 * j; }; testing::setMatrixElements(mat, el_value); cudaStream_t stream; cudaStreamCreate(&stream); dca::linalg::util::CudaStream stream; mat_copy.setAsync(mat, stream); mat_copy_copy.setAsync(mat_copy, stream); cudaStreamSynchronize(stream); stream.sync(); EXPECT_EQ(mat, mat_copy_copy); cudaStreamDestroy(stream); } test/unit/linalg/matrix_gpu_test.cpp +2 −5 Original line number Diff line number Diff line Loading @@ -664,16 +664,13 @@ TEST(MatrixGPUTest, setToZero) { auto func = [](int i, int j) { return 10 * i - j; }; testing::setMatrixElements(mat, func); cudaStream_t stream; cudaStreamCreate(&stream); dca::linalg::util::CudaStream stream; mat.setToZero(stream); cudaStreamSynchronize(stream); stream.sync(); dca::linalg::Matrix<long, dca::linalg::CPU> mat_copy(mat); for (int j = 0; j < mat_copy.nrCols(); ++j) for (int i = 0; i < mat_copy.nrRows(); ++i) EXPECT_EQ(0, mat_copy(i, j)); cudaStreamDestroy(stream); } Loading
include/dca/linalg/matrix.hpp +5 −25 Original line number Diff line number Diff line Loading @@ -199,22 +199,15 @@ public: template <DeviceType rhs_device_name> void set(const Matrix<ScalarType, rhs_device_name>& rhs, int thread_id, int stream_id); #ifdef DCA_HAVE_CUDA // Asynchronous assignment. template <DeviceType rhs_device_name> void setAsync(const Matrix<ScalarType, rhs_device_name>& rhs, cudaStream_t stream); void setAsync(const Matrix<ScalarType, rhs_device_name>& rhs, const util::CudaStream& stream); // Asynchronous assignment (copy with stream = getStream(thread_id, stream_id)) template <DeviceType rhs_device_name> void setAsync(const Matrix<ScalarType, rhs_device_name>& rhs, int thread_id, int stream_id); void setToZero(cudaStream_t stream); #else // Synchronous assignment fallback for SetAsync. template <DeviceType rhs_device_name> void setAsync(const Matrix<ScalarType, rhs_device_name>& rhs, int thread_id, int stream_id); #endif // DCA_HAVE_CUDA void setToZero(const util::CudaStream& stream); // Prints the values of the matrix elements. void print() const; Loading Loading @@ -429,12 +422,10 @@ void Matrix<ScalarType, device_name>::set(const Matrix<ScalarType, rhs_device_na stream_id); } #ifdef DCA_HAVE_CUDA template <typename ScalarType, DeviceType device_name> template <DeviceType rhs_device_name> void Matrix<ScalarType, device_name>::setAsync(const Matrix<ScalarType, rhs_device_name>& rhs, const cudaStream_t stream) { const util::CudaStream& stream) { resizeNoCopy(rhs.size_); util::memoryCopyAsync(data_, leadingDimension(), rhs.data_, rhs.leadingDimension(), size_, stream); } Loading @@ -447,21 +438,10 @@ void Matrix<ScalarType, device_name>::setAsync(const Matrix<ScalarType, rhs_devi } template <typename ScalarType, DeviceType device_name> void Matrix<ScalarType, device_name>::setToZero(cudaStream_t stream) { cudaMemsetAsync(data_, 0, leadingDimension() * nrCols() * sizeof(ScalarType), stream); void Matrix<ScalarType, device_name>::setToZero(const util::CudaStream& stream) { util::Memory<device_name>::setToZeroAsync(data_, leadingDimension() * nrCols(), stream); } #else template <typename ScalarType, DeviceType device_name> template <DeviceType rhs_device_name> void Matrix<ScalarType, device_name>::setAsync(const Matrix<ScalarType, rhs_device_name>& rhs, int /*thread_id*/, int /*stream_id*/) { set(rhs); } #endif // DCA_HAVE_CUDA template <typename ScalarType, DeviceType device_name> void Matrix<ScalarType, device_name>::print() const { if (device_name == GPU) Loading
test/unit/linalg/matrix_cpu_gpu_test.cpp +3 −5 Original line number Diff line number Diff line Loading @@ -220,14 +220,12 @@ TEST(MatrixCPUGPUTest, SetAsync) { auto el_value = [](int i, int j) { return 3 * i - 2 * j; }; testing::setMatrixElements(mat, el_value); cudaStream_t stream; cudaStreamCreate(&stream); dca::linalg::util::CudaStream stream; mat_copy.setAsync(mat, stream); mat_copy_copy.setAsync(mat_copy, stream); cudaStreamSynchronize(stream); stream.sync(); EXPECT_EQ(mat, mat_copy_copy); cudaStreamDestroy(stream); }
test/unit/linalg/matrix_gpu_test.cpp +2 −5 Original line number Diff line number Diff line Loading @@ -664,16 +664,13 @@ TEST(MatrixGPUTest, setToZero) { auto func = [](int i, int j) { return 10 * i - j; }; testing::setMatrixElements(mat, func); cudaStream_t stream; cudaStreamCreate(&stream); dca::linalg::util::CudaStream stream; mat.setToZero(stream); cudaStreamSynchronize(stream); stream.sync(); dca::linalg::Matrix<long, dca::linalg::CPU> mat_copy(mat); for (int j = 0; j < mat_copy.nrCols(); ++j) for (int i = 0; i < mat_copy.nrRows(); ++i) EXPECT_EQ(0, mat_copy(i, j)); cudaStreamDestroy(stream); }