Commit 66f63590 authored by D'azevedo, Ed's avatar D'azevedo, Ed
Browse files

Merge branch 'master' of https://code.ornl.gov/e6d/dmrgppPluginSc

parents 3d4fcde4 50ec5d19
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -12,6 +12,7 @@ OBJS=\
	setup_sparse_batch.o \
        unsetup_sparse_batch.o \
        apply_Htarget_sparse.o \
        estimate_work.o \
        BatchedGemm.o

libdmrgppPluginSc.a: $(OBJS)
+8 −2
Original line number Diff line number Diff line
@@ -103,7 +103,10 @@ void apply_Htarget_sparse(
  if (need_allocate_X) {
    X_ = (FpType *) dmrg_malloc( sizeof(FpType) * xy_size_dim );
    assert( X_ != NULL );
    memcpy( &(X_[0]), &(Xin_[0]), sizeof(FpType) * xy_size );
    void *dest = (void *) &(X_[0]);
    void *src = (void *) &(Xin_[0]);
    size_t count = sizeof(FpType) * xy_size;
    dmrg_memcpy( dest, src, count );
    };

  if (need_allocate_Y) {
@@ -501,7 +504,10 @@ void apply_Htarget_sparse(
    dmrg_free( X_ ); X_ = NULL;
    };
 if (need_allocate_Y) {
   memcpy( &(Yout_[0]), &(Y_[0]), sizeof(FpType) * xy_size );
   void *dest = &(Yout_[0]);
   void *src = &(Y_[0]);
   size_t count = sizeof(FpType) * xy_size;
   dmrg_memcpy( dest, src, count );
   dmrg_free( Y_ ); Y_ = NULL;
   };
#endif
+12 −0
Original line number Diff line number Diff line
@@ -20,6 +20,18 @@ int dmrg_is_managed( const void *ptr )
  return( is_managed );
}
  
void dmrg_memcpy(void *dest, const void *src, size_t count)
{
#ifdef USE_MAGMA
  cudaError_t istat = cudaMemcpy( dest, src, count, cudaMemcpyDefault );
  if (istat != cudaSuccess) {
    fprintf(stderr,"dmrg_memcpy: %s\n", cudaGetErrorString(istat));
    };
  assert( istat == cudaSuccess );
#else
  memcpy( dest, src, count );
#endif
}

void *dmrg_malloc(const size_t alloc_size )
{
+2 −0
Original line number Diff line number Diff line
@@ -45,6 +45,8 @@ extern "C" {
extern
void dmrg_init();

extern
void dmrg_memcpy(void *dest, const void *src, size_t n);

extern
int dmrg_is_managed( const void *ptr );

src/estimate_work.c

0 → 100644
+76 −0
Original line number Diff line number Diff line
#include "test_vbatch.h"

void estimate_work( int npatches, 
                    int left_patch_size_[],
                    int right_patch_size_[],
                    int nC_[], 
                    double *ptotal_gflops, 
                    double *pgmemA, 
                    double *pgmemB, 
                    double *pgmemBX )
#define nC(ipatch,jpatch) nC_[ ((ipatch)-1) + ((jpatch)-1)*npatches ]
#define left_patch_size(ipatch) left_patch_size_[(ipatch)-1]
#define right_patch_size(ipatch) right_patch_size_[(ipatch)-1]
{
/*
 -------------------
 estimate total work
 -------------------
 */
 assert( ptotal_gflops != NULL );
 assert( pgmemA != NULL );
 assert( pgmemB != NULL );
 assert( pgmemBX != NULL );
  
 double gmemA = 0.0;
 double gmemB = 0.0;
 double gmemBX = 0.0;

 double total_flops = 0.0;
 {
 int ipatch = 0;
 int jpatch = 0;


 for(jpatch=1; jpatch <= npatches; jpatch++) {
 for(ipatch=1; ipatch <= npatches; ipatch++) {
    int nop = nC(ipatch,jpatch);
    if (nop <= 0) continue;

    double flops_total = 0.0;
    double flops_method1 = 0.0;
    double flops_method2 = 0.0;

    /*
     --------------------------------------
     Note: evaluate (B * X ) * transpose(A)
     --------------------------------------
     */
     
    
    int nrowA = left_patch_size(ipatch);
    int ncolA = left_patch_size(jpatch);
    int nrowB = right_patch_size(ipatch);
    int ncolB = right_patch_size(jpatch);
    int ncolX  = ncolA;

    gmemA += nop * nrowA * ncolA;
    gmemB += nop * nrowB * ncolB;
    gmemBX += nop * nrowB * ncolX;

    cal_kron_flops( nrowA, nrowB, ncolA, ncolB, 
            &flops_total, &flops_method1,   &flops_method2);

    total_flops += flops_method1*nop;
    };
    };
  };
  
 double total_gflops = total_flops/(1000.0*1000.0*1000.0);
 *ptotal_gflops = total_gflops;

 *pgmemA = gmemA;
 *pgmemB = gmemB;
 *pgmemBX = gmemBX;

}
Loading