Commit c49ae3ef authored by D'azevedo, Ed's avatar D'azevedo, Ed
Browse files

add cudaMemPrefetchAsync

parent 5d11f282
Loading
Loading
Loading
Loading
+3 −0
Original line number Diff line number Diff line
@@ -4,7 +4,10 @@


#include "cuda.h"
#include "driver_types.h"
#include "cuda_runtime.h"
#include "cuda_runtime_api.h"

#include "magma_types.h"
#include "magma_v2.h"

+42 −0
Original line number Diff line number Diff line
@@ -59,6 +59,16 @@ void *ptr = NULL;

   assert( ierr == cudaSuccess );
   }

   {
     CUdeviceptr devPtr = (CUdeviceptr) ptr;
     size_t count = alloc_size;
     CUmem_advise advice = CU_MEM_ADVISE_SET_PREFERRED_LOCATION;
     CUdevice device =  0;
     CUresult istat = cuMemAdvise( devPtr, count, advice, device );
     assert( istat == CUDA_SUCCESS);
   }

#else
   ptr = (void *) malloc( alloc_size );
#endif
@@ -66,6 +76,38 @@ void *ptr = NULL;
  return (ptr);
}

void dmrg_prefetch_to_device( void *unified_memory_ptr, size_t nbytes )
{
        assert( unified_memory_ptr != 0);
        if (nbytes <= 0) { return; };

#ifdef USE_MAGMA
        int deviceId = 0;
        cudaError_t istat = cudaGetDevice( &deviceId );
        assert( istat == cudaSuccess );

        struct cudaDeviceProp p;

        istat = cudaGetDeviceProperties(&p, deviceId );
        assert( istat == cudaSuccess );

        if (p.concurrentManagedAccess) {
                cudaStream_t stream = 0;
                const void * devPtr = unified_memory_ptr;
                istat = cudaMemPrefetchAsync( devPtr, nbytes, deviceId, stream );
                assert( istat == cudaSuccess );

                istat = cudaDeviceSynchronize();
                assert( istat == cudaSuccess );
        }
#endif

}





void dmrg_free( void *ptr )
{
#ifdef USE_MAGMA
+3 −0
Original line number Diff line number Diff line
@@ -54,6 +54,9 @@ int dmrg_is_managed( const void *ptr );
extern
void *dmrg_malloc( const size_t alloc_size );

extern
void dmrg_prefetch_to_device( void *unified_memory_ptr, size_t nbytes );

extern
void  dmrg_free( void *a_ptr );

+3 −0
Original line number Diff line number Diff line
@@ -438,6 +438,9 @@ void setup_sparse_batch(

   }; /* end for ipatch */

   dmrg_prefetch_to_device( pAmem, sum_Abatch_sizes);
   dmrg_prefetch_to_device( pBmem, sum_Bbatch_sizes);

  };