Commit 4852f828 authored by D'azevedo, Ed's avatar D'azevedo, Ed
Browse files

add estimate_work()

parent c62b2870
Loading
Loading
Loading
Loading
+8 −2
Original line number Diff line number Diff line
@@ -103,7 +103,10 @@ void apply_Htarget_sparse(
  if (need_allocate_X) {
    X_ = (FpType *) dmrg_malloc( sizeof(FpType) * xy_size_dim );
    assert( X_ != NULL );
    memcpy( &(X_[0]), &(Xin_[0]), sizeof(FpType) * xy_size );
    void *dest = (void *) &(X_[0]);
    void *src = (void *) &(Xin_[0]);
    size_t count = sizeof(FpType) * xy_size;
    dmrg_memcpy( dest, src, count );
    };

  if (need_allocate_Y) {
@@ -501,7 +504,10 @@ void apply_Htarget_sparse(
    dmrg_free( X_ ); X_ = NULL;
    };
 if (need_allocate_Y) {
   memcpy( &(Yout_[0]), &(Y_[0]), sizeof(FpType) * xy_size );
   void *dest = &(Yout_[0]);
   void *src = &(Y_[0]);
   size_t count = sizeof(FpType) * xy_size;
   dmrg_memcpy( dest, src, count );
   dmrg_free( Y_ ); Y_ = NULL;
   };
#endif
+12 −0
Original line number Diff line number Diff line
@@ -20,6 +20,18 @@ int dmrg_is_managed( const void *ptr )
  return( is_managed );
}
  
void dmrg_memcpy(void *dest, const void *src, size_t count)
{
#ifdef USE_MAGMA
  cudaError_t istat = cudaMemcpy( dest, src, count, cudaMemcpyDefault );
  if (istat != cudaSuccess) {
    fprintf(stderr,"dmrg_memcpy: %s\n", cudaGetErrorString(istat));
    };
  assert( istat == cudaSuccess );
#else
  memcpy( dest, src, count );
#endif
}

void *dmrg_malloc(const size_t alloc_size )
{
+2 −0
Original line number Diff line number Diff line
@@ -45,6 +45,8 @@ extern "C" {
extern
void dmrg_init();

extern
void dmrg_memcpy(void *dest, const void *src, size_t n);

extern
int dmrg_is_managed( const void *ptr );
+38 −44
Original line number Diff line number Diff line
@@ -10,7 +10,6 @@
#include <omp.h>
#endif

#include "analysis.h"



@@ -150,41 +149,6 @@ int main(int argc, char *argv[])
     xy_size += (nrowX * ncolX );
     };
 }
/*
 -------------------
 estimate total work
 -------------------
 */
 double total_flops_method1 = 0.0;
 double total_flops_method2 = 0.0;
 double total_flops = 0.0;
 {
 int ipatch = 0;
 int jpatch = 0;

 for(jpatch=1; jpatch <= npatches; jpatch++) {
 for(ipatch=1; ipatch <= npatches; ipatch++) {
    double flops_total = 0.0;
    double flops_method1 = 0.0;
    double flops_method2 = 0.0;
    int nrowA = left_patch_size(ipatch);
    int ncolA = left_patch_size(jpatch);
    int nrowB = right_patch_size(ipatch);
    int ncolB = right_patch_size(jpatch);

    cal_kron_flops( nrowA, nrowB, ncolA, ncolB, 
            &flops_total, &flops_method1,   &flops_method2);

    total_flops += flops_total;
    total_flops_method1 += flops_method1;
    total_flops_method2 += flops_method2;
    };
    };
  };
  
 total_flops *= noperator;
 total_flops_method1 *= noperator;
 total_flops_method2 *= noperator;
    
    

@@ -194,15 +158,7 @@ int main(int argc, char *argv[])
         keep_left_states,    keep_right_states, noperator, (long int) xy_size );

 printf("npatches=%d\n", npatches );
 {
 double total_gflops = total_flops/(1000.0*1000.0*1000.0);
 double total_gflops_method1 = total_flops_method1 / (1000.0*1000.0*1000.0);
 double total_gflops_method2 = total_flops_method2 / (1000.0*1000.0*1000.0);

 printf("total_gflops=%lf, total_gflops_method1=%lf, total_gflops_method2=%lf\n",
         total_gflops,     total_gflops_method1,     total_gflops_method2 );

 };


 if (idebug >= 1) {
@@ -271,6 +227,14 @@ int main(int argc, char *argv[])

   }
 else {
    nC_ = (int *) malloc( sizeof(int) * npatches * npatches );
    assert( nC_ != NULL );

    int i = 0;
    for(i=0; i < npatches*npatches; i++) {
      nC_[i] = noperator;
      };
    
 
    setup_vbatch( noperator, npatches, 
                  &(left_patch_size_[0]), 
@@ -284,6 +248,36 @@ int main(int argc, char *argv[])
                  Amatrix, ld_Amatrix,  Bmatrix, ld_Bmatrix );
    };


 {
 double total_gflops = 0;
 double gmemA = 0;
 double gmemB = 0;
 double gmemBX = 0;
 
 estimate_work( npatches, 
                left_patch_size_, 
                right_patch_size_,
                nC_, 
                &total_gflops, 
                &gmemA, 
                &gmemB, 
                &gmemBX 
              );

 double giga = 1000.0*1000.0*1000.0;
 double gmemA_gbytes = gmemA * sizeof(double)/giga;
 double gmemB_gbytes = gmemB * sizeof(double)/giga;
 double gmemBX_gbytes = gmemBX * sizeof(double)/giga;

 printf("total_gflops=%lf \n", total_gflops );
 printf("memory for Amat=%lf GBytes\n", gmemA_gbytes );
 printf("memory for Bmat=%lf GBytes\n", gmemB_gbytes );
 printf("memory for BXmat=%lf GBytes\n", gmemBX_gbytes );

 }


#define Abatch(i,j)  Abatch_[indx2f(i,j,ld_Abatch)]
#define Bbatch(i,j)  Bbatch_[indx2f(i,j,ld_Bbatch)]

+2 −0
Original line number Diff line number Diff line
@@ -5,6 +5,7 @@
#include <math.h>
#include <assert.h>

#include "analysis.h"
#include "dmrg_types.h"
#include "dmrg_vbatch.h"

@@ -12,5 +13,6 @@
#include "setup_sparse_batch.h"
#include "setup_matrix.h"

#include "estimate_work.h"

#endif