use dmrg_is_managed() (b711782a) · Commits · Elwasif, Wael / dmrgppPluginSc

src/apply_Htarget_sparse.c

+26 −11

Original line number	Diff line number	Diff line
		@@ -50,6 +50,10 @@ void apply_Htarget_sparse(

		FpType *X_ = &(Xin_[0]);
		FpType *Y_ = &(Yout_[0]);
		#ifdef USE_MAGMA
		int need_allocate_X = !dmrg_is_managed( Xin_ );
		int need_allocate_Y = !dmrg_is_managed( Yout_ );
		#endif

		/*
		------------------
		@@ -78,23 +82,29 @@ void apply_Htarget_sparse(
		right_patch_size(ipatch) = R2 - R1 + 1;
		};

		long Xsize = 0;
		#ifdef USE_MAGMA
		long xy_size = 0;
		for(ipatch=1; ipatch <= npatches; ipatch++) {
		Xsize += left_patch_size(ipatch) * right_patch_size(ipatch);
		xy_size += left_patch_size(ipatch) * right_patch_size(ipatch);
		};
		long xy_size_dim = ialign * ICEIL( xy_size, ialign );

		#ifdef MAGMA
		/*
		* -----------------------
		* allocate unified memory
		* -----------------------
		*/
		X_ = (FpType ) dmrg_malloc( sizeof(FpType) Xsize );
		Y_ = (FpType ) dmrg_malloc( sizeof(FpType) Xsize );
		if (need_allocate_X) {
		X_ = (FpType ) dmrg_malloc( sizeof(FpType) xy_size_dim );
		assert( X_ != NULL );
		memcpy( &(X_[0]), &(Xin_[0]), sizeof(FpType) * xy_size );
		};

		if (need_allocate_Y) {
		Y_ = (FpType ) dmrg_malloc( sizeof(FpType) xy_size_dim );
		assert( Y_ != NULL );
		};

		memcpy( &(X_[0]), &(Xin_[0]), nbytes );

		#endif

		@@ -480,9 +490,14 @@ void apply_Htarget_sparse(
		* free unified memory
		* -------------------
		*/
		memcpy( &(Yout_[0]), &(Y_[0]), sizeof(FpType) * Xsize );

		if (need_allocate_X) {
		dmrg_free( X_ ); X_ = NULL;
		};
		if (need_allocate_Y) {
		memcpy( &(Yout_[0]), &(Y_[0]), sizeof(FpType) * xy_size );
		dmrg_free( Y_ ); Y_ = NULL;
		};
		#endif

		}

+16 −0

Original line number	Diff line number	Diff line
		@@ -6,6 +6,22 @@
		#include "cuda_runtime.h"
		#endif

		int dmrg_is_managed( const void *ptr )
		{
		const int false = (0 == 1);
		int is_managed = false;

		#ifdef USE_MAGMA
		struct cudaPointerAttributes attribute;
		cudaError_t ierr = cudaPointerGetAttributes( &attribute, ptr );
		assert( ierr == cudaSuccess );

		is_managed = attribute.isManaged;
		#endif
		return( is_managed );
		}


		void *dmrg_malloc(const size_t alloc_size )
		{
		void *ptr = NULL;

+2 −0

Original line number	Diff line number	Diff line
		@@ -46,6 +46,8 @@ extern
		void dmrg_init();


		extern
		int dmrg_is_managed( const void *ptr );

		extern
		void *dmrg_malloc( const size_t alloc_size );