Commit 4832b0ed authored by D'azevedo, Ed's avatar D'azevedo, Ed
Browse files

Merge branch 'master' into use_magma_timing

parents a590de66 06a10210
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -3,6 +3,7 @@
#include <stdexcept>

#include "dmrg_types.h"
#include "estimate_work.h"

template<typename T>
class BatchedGemm {
+1 −0
Original line number Diff line number Diff line
@@ -12,6 +12,7 @@ OBJS=\
	setup_sparse_batch.o \
        unsetup_sparse_batch.o \
        apply_Htarget_sparse.o \
        estimate_work.o \
        BatchedGemm.o

libdmrgppPluginSc.a: $(OBJS)
+8 −2
Original line number Diff line number Diff line
@@ -103,7 +103,10 @@ void apply_Htarget_sparse(
  if (need_allocate_X) {
    X_ = (FpType *) dmrg_malloc( sizeof(FpType) * xy_size_dim );
    assert( X_ != NULL );
    memcpy( &(X_[0]), &(Xin_[0]), sizeof(FpType) * xy_size );
    void *dest = (void *) &(X_[0]);
    void *src = (void *) &(Xin_[0]);
    size_t count = sizeof(FpType) * xy_size;
    dmrg_memcpy( dest, src, count );
    };

  if (need_allocate_Y) {
@@ -501,7 +504,10 @@ void apply_Htarget_sparse(
    dmrg_free( X_ ); X_ = NULL;
    };
 if (need_allocate_Y) {
   memcpy( &(Yout_[0]), &(Y_[0]), sizeof(FpType) * xy_size );
   void *dest = &(Yout_[0]);
   void *src = &(Y_[0]);
   size_t count = sizeof(FpType) * xy_size;
   dmrg_memcpy( dest, src, count );
   dmrg_free( Y_ ); Y_ = NULL;
   };
#endif
+2 −1
Original line number Diff line number Diff line
@@ -23,7 +23,8 @@ void cal_kron_flops(
    double flops_BX = (2.0*nrowB) * ncolB * ncolX;
    
    int ncolBX = ncolX;
    double flops_BX_At =  (2.0*nrowY) * ncolY * ncolBX;
    int nrowBX = nrowB;
    double flops_BX_At =  (2.0*nrowBX) * ncolBX * nrowA;
    double flops_method1 = flops_BX + flops_BX_At;
    
    /*
+12 −0
Original line number Diff line number Diff line
@@ -20,6 +20,18 @@ int dmrg_is_managed( const void *ptr )
  return( is_managed );
}
  
void dmrg_memcpy(void *dest, const void *src, size_t count)
{
#ifdef USE_MAGMA
  cudaError_t istat = cudaMemcpy( dest, src, count, cudaMemcpyDefault );
  if (istat != cudaSuccess) {
    fprintf(stderr,"dmrg_memcpy: %s\n", cudaGetErrorString(istat));
    };
  assert( istat == cudaSuccess );
#else
  memcpy( dest, src, count );
#endif
}

void *dmrg_malloc(const size_t alloc_size )
{
Loading