Loading src/dmrg_magma.h +3 −0 Original line number Diff line number Diff line Loading @@ -4,7 +4,10 @@ #include "cuda.h" #include "driver_types.h" #include "cuda_runtime.h" #include "cuda_runtime_api.h" #include "magma_types.h" #include "magma_v2.h" Loading src/dmrg_malloc.c +42 −0 Original line number Diff line number Diff line Loading @@ -59,6 +59,16 @@ void *ptr = NULL; assert( ierr == cudaSuccess ); } { CUdeviceptr devPtr = (CUdeviceptr) ptr; size_t count = alloc_size; CUmem_advise advice = CU_MEM_ADVISE_SET_PREFERRED_LOCATION; CUdevice device = 0; CUresult istat = cuMemAdvise( devPtr, count, advice, device ); assert( istat == CUDA_SUCCESS); } #else ptr = (void *) malloc( alloc_size ); #endif Loading @@ -66,6 +76,38 @@ void *ptr = NULL; return (ptr); } void dmrg_prefetch_to_device( void *unified_memory_ptr, size_t nbytes ) { assert( unified_memory_ptr != 0); if (nbytes <= 0) { return; }; #ifdef USE_MAGMA int deviceId = 0; cudaError_t istat = cudaGetDevice( &deviceId ); assert( istat == cudaSuccess ); struct cudaDeviceProp p; istat = cudaGetDeviceProperties(&p, deviceId ); assert( istat == cudaSuccess ); if (p.concurrentManagedAccess) { cudaStream_t stream = 0; const void * devPtr = unified_memory_ptr; istat = cudaMemPrefetchAsync( devPtr, nbytes, deviceId, stream ); assert( istat == cudaSuccess ); istat = cudaDeviceSynchronize(); assert( istat == cudaSuccess ); } #endif } void dmrg_free( void *ptr ) { #ifdef USE_MAGMA Loading src/dmrg_vbatch.h +3 −0 Original line number Diff line number Diff line Loading @@ -54,6 +54,9 @@ int dmrg_is_managed( const void *ptr ); extern void *dmrg_malloc( const size_t alloc_size ); extern void dmrg_prefetch_to_device( void *unified_memory_ptr, size_t nbytes ); extern void dmrg_free( void *a_ptr ); Loading src/setup_sparse_batch.c +3 −0 Original line number Diff line number Diff line Loading @@ -438,6 +438,9 @@ void setup_sparse_batch( }; /* end for ipatch */ dmrg_prefetch_to_device( pAmem, sum_Abatch_sizes); dmrg_prefetch_to_device( pBmem, sum_Bbatch_sizes); }; Loading Loading
src/dmrg_magma.h +3 −0 Original line number Diff line number Diff line Loading @@ -4,7 +4,10 @@ #include "cuda.h" #include "driver_types.h" #include "cuda_runtime.h" #include "cuda_runtime_api.h" #include "magma_types.h" #include "magma_v2.h" Loading
src/dmrg_malloc.c +42 −0 Original line number Diff line number Diff line Loading @@ -59,6 +59,16 @@ void *ptr = NULL; assert( ierr == cudaSuccess ); } { CUdeviceptr devPtr = (CUdeviceptr) ptr; size_t count = alloc_size; CUmem_advise advice = CU_MEM_ADVISE_SET_PREFERRED_LOCATION; CUdevice device = 0; CUresult istat = cuMemAdvise( devPtr, count, advice, device ); assert( istat == CUDA_SUCCESS); } #else ptr = (void *) malloc( alloc_size ); #endif Loading @@ -66,6 +76,38 @@ void *ptr = NULL; return (ptr); } void dmrg_prefetch_to_device( void *unified_memory_ptr, size_t nbytes ) { assert( unified_memory_ptr != 0); if (nbytes <= 0) { return; }; #ifdef USE_MAGMA int deviceId = 0; cudaError_t istat = cudaGetDevice( &deviceId ); assert( istat == cudaSuccess ); struct cudaDeviceProp p; istat = cudaGetDeviceProperties(&p, deviceId ); assert( istat == cudaSuccess ); if (p.concurrentManagedAccess) { cudaStream_t stream = 0; const void * devPtr = unified_memory_ptr; istat = cudaMemPrefetchAsync( devPtr, nbytes, deviceId, stream ); assert( istat == cudaSuccess ); istat = cudaDeviceSynchronize(); assert( istat == cudaSuccess ); } #endif } void dmrg_free( void *ptr ) { #ifdef USE_MAGMA Loading
src/dmrg_vbatch.h +3 −0 Original line number Diff line number Diff line Loading @@ -54,6 +54,9 @@ int dmrg_is_managed( const void *ptr ); extern void *dmrg_malloc( const size_t alloc_size ); extern void dmrg_prefetch_to_device( void *unified_memory_ptr, size_t nbytes ); extern void dmrg_free( void *a_ptr ); Loading
src/setup_sparse_batch.c +3 −0 Original line number Diff line number Diff line Loading @@ -438,6 +438,9 @@ void setup_sparse_batch( }; /* end for ipatch */ dmrg_prefetch_to_device( pAmem, sum_Abatch_sizes); dmrg_prefetch_to_device( pBmem, sum_Bbatch_sizes); }; Loading