Loading include/dca/phys/dca_step/cluster_solver/shared_tools/accumulation/tp/tp_accumulator_mpi_gpu.hpp +6 −6 Original line number Diff line number Diff line Loading @@ -123,10 +123,10 @@ private: std::array<MPI_Request, 2> recv_requests_{MPI_REQUEST_NULL, MPI_REQUEST_NULL}; std::array<MPI_Request, 2> send_requests_{MPI_REQUEST_NULL, MPI_REQUEST_NULL}; #ifndef DCA_WITH_CUDA_AWARE_MPI #ifndef DCA_HAVE_CUDA_AWARE_MPI std::array<std::vector<Complex>, 2> sendbuffer_; std::array<std::vector<Complex>, 2> recvbuffer_; #endif // DCA_WITH_CUDA_AWARE_MPI #endif // DCA_HAVE_CUDA_AWARE_MPI }; template <class Parameters> Loading Loading @@ -366,7 +366,7 @@ void TpAccumulator<Parameters, linalg::GPU, DistType::MPI>::send(const std::arra using dca::parallel::MPITypeMap; const auto g_size = data[0].size().first * data[0].size().second; #ifdef DCA_WITH_CUDA_AWARE_MPI #ifdef DCA_HAVE_CUDA_AWARE_MPI for (int s = 0; s < 2; ++s) { MPI_Isend(data[s].ptr(), g_size, MPITypeMap<Complex>::value(), target, thread_id_ + 1, MPI_COMM_WORLD, &request[s]); Loading @@ -381,7 +381,7 @@ void TpAccumulator<Parameters, linalg::GPU, DistType::MPI>::send(const std::arra MPI_Isend(sendbuffer_[s].data(), g_size, MPITypeMap<Complex>::value(), target, thread_id_ + 1, MPI_COMM_WORLD, &request[s]); } #endif // DCA_WITH_CUDA_AWARE_MPI #endif // DCA_HAVE_CUDA_AWARE_MPI } template <class Parameters> Loading @@ -390,7 +390,7 @@ void TpAccumulator<Parameters, linalg::GPU, DistType::MPI>::receive( using dca::parallel::MPITypeMap; const auto g_size = data[0].size().first * data[0].size().second; #ifdef DCA_WITH_CUDA_AWARE_MPI #ifdef DCA_HAVE_CUDA_AWARE_MPI for (int s = 0; s < 2; ++s) { MPI_Irecv(data[s].ptr(), g_size, MPITypeMap<Complex>::value(), source, thread_id_ + 1, MPI_COMM_WORLD, &request[s]); Loading @@ -408,7 +408,7 @@ void TpAccumulator<Parameters, linalg::GPU, DistType::MPI>::receive( cudaMemcpy(data[s].ptr(), recvbuffer_[s].data(), g_size * sizeof(Complex), cudaMemcpyHostToDevice); } #endif // DCA_WITH_CUDA_AWARE_MPI #endif // DCA_HAVE_CUDA_AWARE_MPI } } // namespace accumulator Loading test/unit/phys/dca_step/cluster_solver/shared_tools/accumulation/tp/tp_accumulator_mpi_gpu_test.cpp +1 −1 Original line number Diff line number Diff line Loading @@ -12,7 +12,7 @@ #include "dca/phys/dca_step/cluster_solver/shared_tools/accumulation/tp/tp_accumulator_gpu.hpp" #include "dca/phys/dca_step/cluster_solver/shared_tools/accumulation/tp/tp_accumulator_mpi_gpu.hpp" #include <array> #include <functional> Loading Loading
include/dca/phys/dca_step/cluster_solver/shared_tools/accumulation/tp/tp_accumulator_mpi_gpu.hpp +6 −6 Original line number Diff line number Diff line Loading @@ -123,10 +123,10 @@ private: std::array<MPI_Request, 2> recv_requests_{MPI_REQUEST_NULL, MPI_REQUEST_NULL}; std::array<MPI_Request, 2> send_requests_{MPI_REQUEST_NULL, MPI_REQUEST_NULL}; #ifndef DCA_WITH_CUDA_AWARE_MPI #ifndef DCA_HAVE_CUDA_AWARE_MPI std::array<std::vector<Complex>, 2> sendbuffer_; std::array<std::vector<Complex>, 2> recvbuffer_; #endif // DCA_WITH_CUDA_AWARE_MPI #endif // DCA_HAVE_CUDA_AWARE_MPI }; template <class Parameters> Loading Loading @@ -366,7 +366,7 @@ void TpAccumulator<Parameters, linalg::GPU, DistType::MPI>::send(const std::arra using dca::parallel::MPITypeMap; const auto g_size = data[0].size().first * data[0].size().second; #ifdef DCA_WITH_CUDA_AWARE_MPI #ifdef DCA_HAVE_CUDA_AWARE_MPI for (int s = 0; s < 2; ++s) { MPI_Isend(data[s].ptr(), g_size, MPITypeMap<Complex>::value(), target, thread_id_ + 1, MPI_COMM_WORLD, &request[s]); Loading @@ -381,7 +381,7 @@ void TpAccumulator<Parameters, linalg::GPU, DistType::MPI>::send(const std::arra MPI_Isend(sendbuffer_[s].data(), g_size, MPITypeMap<Complex>::value(), target, thread_id_ + 1, MPI_COMM_WORLD, &request[s]); } #endif // DCA_WITH_CUDA_AWARE_MPI #endif // DCA_HAVE_CUDA_AWARE_MPI } template <class Parameters> Loading @@ -390,7 +390,7 @@ void TpAccumulator<Parameters, linalg::GPU, DistType::MPI>::receive( using dca::parallel::MPITypeMap; const auto g_size = data[0].size().first * data[0].size().second; #ifdef DCA_WITH_CUDA_AWARE_MPI #ifdef DCA_HAVE_CUDA_AWARE_MPI for (int s = 0; s < 2; ++s) { MPI_Irecv(data[s].ptr(), g_size, MPITypeMap<Complex>::value(), source, thread_id_ + 1, MPI_COMM_WORLD, &request[s]); Loading @@ -408,7 +408,7 @@ void TpAccumulator<Parameters, linalg::GPU, DistType::MPI>::receive( cudaMemcpy(data[s].ptr(), recvbuffer_[s].data(), g_size * sizeof(Complex), cudaMemcpyHostToDevice); } #endif // DCA_WITH_CUDA_AWARE_MPI #endif // DCA_HAVE_CUDA_AWARE_MPI } } // namespace accumulator Loading
test/unit/phys/dca_step/cluster_solver/shared_tools/accumulation/tp/tp_accumulator_mpi_gpu_test.cpp +1 −1 Original line number Diff line number Diff line Loading @@ -12,7 +12,7 @@ #include "dca/phys/dca_step/cluster_solver/shared_tools/accumulation/tp/tp_accumulator_gpu.hpp" #include "dca/phys/dca_step/cluster_solver/shared_tools/accumulation/tp/tp_accumulator_mpi_gpu.hpp" #include <array> #include <functional> Loading