Loading src/dmrg_malloc.c +47 −0 Original line number Diff line number Diff line Loading @@ -76,6 +76,53 @@ void *ptr = NULL; return (ptr); } void dmrg_lacpy( const char uplo, const int m, const int n, const FpType *src, const int ld_src, FpType *dest, const int ld_dest ) { #ifdef USE_MAGMA const int is_upper = (uplo == 'U') || (uplo == 'u'); const int is_lower = (uplo == 'L') || (uplo == 'l'); const int is_full = (!is_upper) && (!is_lower); int is_block_copy = is_full && (m == ld_src) && (m == ld_dest); if (is_block_copy) { const size_t nbytes = sizeof(FpType) * m * n; dmrg_memcpy( dest, src, nbytes ); } else { const int min_mn = (m <= n) ? m : n; const int ncol = is_full ? n : min_mn; int jcol = 0; for(jcol=0; jcol < ncol; jcol++) { const int irow = jcol; int istart = is_upper ? 0 : is_lower ? irow : 0; int iend = is_upper ? irow : is_lower ? m-1 : m-1; int count = iend - istart + 1; if (count >= 1) { const FpType *psrc = src + jcol * ld_src + istart; FpType *pdest = dest + jcol * ld_dest + istart; const size_t nbytes = count * sizeof(FpType); // dmrg_memcpy( pdest, psrc, nbytes ); memcpy( pdest, psrc, nbytes ); }; }; }; #else Xlacpy_( &uplo, &m, &n, src, &ld_src, dest, &ld_dest ); #endif } void dmrg_prefetch_to_device( void *unified_memory_ptr, size_t nbytes ) { assert( unified_memory_ptr != 0); Loading src/dmrg_vbatch.h +6 −0 Original line number Diff line number Diff line Loading @@ -54,6 +54,12 @@ int dmrg_is_managed( const void *ptr ); extern void *dmrg_malloc( const size_t alloc_size ); extern void dmrg_lacpy( const char uplo, const int m, const int n, const FpType *src, const int ld_src, FpType *dest, const int ld_dest ); extern void dmrg_prefetch_to_device( void *unified_memory_ptr, size_t nbytes ); Loading src/setup_sparse_batch.c +9 −6 Original line number Diff line number Diff line Loading @@ -408,9 +408,11 @@ void setup_sparse_batch( int isok = (1 <= m) && (1 <= n) && (m <= ld1) && (m <= ld2); assert( isok ); Xlacpy_( &uplo, &m, &n, Asrc, &ld1, Adest, &ld2); // -------------------------------------------------- // Xlacpy_( &uplo, &m, &n, Asrc, &ld1, Adest, &ld2); // -------------------------------------------------- dmrg_lacpy( uplo, m,n, Asrc,ld1, Adest, ld2 ); Adest += (ld2 * n); }; Loading @@ -425,9 +427,10 @@ void setup_sparse_batch( int isok = (1 <= m) && (1 <= n) && (m <= ld1) && (m <= ld2); assert(isok); Xlacpy_( &uplo, &m, &n, Bsrc, &ld1, Bdest, &ld2); // -------------------------------------------------- // Xlacpy_( &uplo, &m, &n, Bsrc, &ld1, Bdest, &ld2); // -------------------------------------------------- dmrg_lacpy( uplo, m,n, Bsrc, ld1, Bdest, ld2 ); Bdest += (ld2 * n); Loading Loading
src/dmrg_malloc.c +47 −0 Original line number Diff line number Diff line Loading @@ -76,6 +76,53 @@ void *ptr = NULL; return (ptr); } void dmrg_lacpy( const char uplo, const int m, const int n, const FpType *src, const int ld_src, FpType *dest, const int ld_dest ) { #ifdef USE_MAGMA const int is_upper = (uplo == 'U') || (uplo == 'u'); const int is_lower = (uplo == 'L') || (uplo == 'l'); const int is_full = (!is_upper) && (!is_lower); int is_block_copy = is_full && (m == ld_src) && (m == ld_dest); if (is_block_copy) { const size_t nbytes = sizeof(FpType) * m * n; dmrg_memcpy( dest, src, nbytes ); } else { const int min_mn = (m <= n) ? m : n; const int ncol = is_full ? n : min_mn; int jcol = 0; for(jcol=0; jcol < ncol; jcol++) { const int irow = jcol; int istart = is_upper ? 0 : is_lower ? irow : 0; int iend = is_upper ? irow : is_lower ? m-1 : m-1; int count = iend - istart + 1; if (count >= 1) { const FpType *psrc = src + jcol * ld_src + istart; FpType *pdest = dest + jcol * ld_dest + istart; const size_t nbytes = count * sizeof(FpType); // dmrg_memcpy( pdest, psrc, nbytes ); memcpy( pdest, psrc, nbytes ); }; }; }; #else Xlacpy_( &uplo, &m, &n, src, &ld_src, dest, &ld_dest ); #endif } void dmrg_prefetch_to_device( void *unified_memory_ptr, size_t nbytes ) { assert( unified_memory_ptr != 0); Loading
src/dmrg_vbatch.h +6 −0 Original line number Diff line number Diff line Loading @@ -54,6 +54,12 @@ int dmrg_is_managed( const void *ptr ); extern void *dmrg_malloc( const size_t alloc_size ); extern void dmrg_lacpy( const char uplo, const int m, const int n, const FpType *src, const int ld_src, FpType *dest, const int ld_dest ); extern void dmrg_prefetch_to_device( void *unified_memory_ptr, size_t nbytes ); Loading
src/setup_sparse_batch.c +9 −6 Original line number Diff line number Diff line Loading @@ -408,9 +408,11 @@ void setup_sparse_batch( int isok = (1 <= m) && (1 <= n) && (m <= ld1) && (m <= ld2); assert( isok ); Xlacpy_( &uplo, &m, &n, Asrc, &ld1, Adest, &ld2); // -------------------------------------------------- // Xlacpy_( &uplo, &m, &n, Asrc, &ld1, Adest, &ld2); // -------------------------------------------------- dmrg_lacpy( uplo, m,n, Asrc,ld1, Adest, ld2 ); Adest += (ld2 * n); }; Loading @@ -425,9 +427,10 @@ void setup_sparse_batch( int isok = (1 <= m) && (1 <= n) && (m <= ld1) && (m <= ld2); assert(isok); Xlacpy_( &uplo, &m, &n, Bsrc, &ld1, Bdest, &ld2); // -------------------------------------------------- // Xlacpy_( &uplo, &m, &n, Bsrc, &ld1, Bdest, &ld2); // -------------------------------------------------- dmrg_lacpy( uplo, m,n, Bsrc, ld1, Bdest, ld2 ); Bdest += (ld2 * n); Loading