Loading src/estimate_work.c +50 −4 Original line number Diff line number Diff line #include "test_vbatch.h" void get_total_memory( int npatches, int left_patch_size_[], int right_patch_size_[], int nC_[], size_t *ptotal_memory_in_nbytes ) { double total_gflops = 0; double gmemA = 0; double gmemB = 0; double gmemBX = 0; double gmemXY = 0; estimate_work( npatches, left_patch_size_, right_patch_size_, nC_, &total_gflops, &gmemA, &gmemB, &gmemBX, &gmemXY ); size_t total_memory_in_bytes = sizeof(FpType) * (gmemA + gmemB + gmemBX + gmemXY); *ptotal_memory_in_nbytes = total_memory_in_bytes; } void estimate_work( int npatches, int left_patch_size_[], int right_patch_size_[], Loading @@ -7,7 +35,8 @@ void estimate_work( int npatches, double *ptotal_gflops, double *pgmemA, double *pgmemB, double *pgmemBX ) double *pgmemBX, double *pgmemXY ) #define nC(ipatch,jpatch) nC_[ ((ipatch)-1) + ((jpatch)-1)*npatches ] #define left_patch_size(ipatch) left_patch_size_[(ipatch)-1] #define right_patch_size(ipatch) right_patch_size_[(ipatch)-1] Loading @@ -17,6 +46,7 @@ void estimate_work( int npatches, estimate total work ------------------- */ const int ialign = 32; const double giga = 1000.0 * 1000.0 * 1000.0; assert( ptotal_gflops != NULL ); Loading @@ -27,12 +57,24 @@ void estimate_work( int npatches, double gmemA = 0.0; double gmemB = 0.0; double gmemBX = 0.0; double gmemXY = 0.0; double total_flops = 0.0; { int ipatch = 0; int jpatch = 0; for(ipatch=1; ipatch <= npatches; ipatch++) { gmemXY += left_patch_size(ipatch) * right_patch_size(ipatch); }; /* * ----------------------- * count both X, Y vectors * ----------------------- */ gmemXY *= 2; for(jpatch=1; jpatch <= npatches; jpatch++) { for(ipatch=1; ipatch <= npatches; ipatch++) { Loading @@ -56,9 +98,12 @@ void estimate_work( int npatches, int ncolB = right_patch_size(jpatch); int ncolX = ncolA; gmemA += nop * nrowA * ncolA; gmemB += nop * nrowB * ncolB; gmemBX += nop * nrowB * ncolX; int ldA = ialign * (( nrowA + (ialign-1))/ialign ); int ldB = ialign * (( nrowB + (ialign-1))/ialign ); int ldBX = ldB; gmemA += nop * ldA * ncolA; gmemB += nop * ldB * ncolB; gmemBX += nop * ldBX * ncolX; cal_kron_flops( nrowA, nrowB, ncolA, ncolB, &flops_total, &flops_method1, &flops_method2); Loading @@ -74,5 +119,6 @@ void estimate_work( int npatches, *pgmemA = gmemA; *pgmemB = gmemB; *pgmemBX = gmemBX; *pgmemXY = gmemXY; } src/estimate_work.h +10 −1 Original line number Diff line number Diff line Loading @@ -13,8 +13,17 @@ void estimate_work( int npatches, double *ptotal_gflops, double *pgmemA, double *pgmemB, double *pgmemBX double *pgmemBX, double *pgmemXY ); extern void get_total_memory( int npatches, int left_patch_size_[], int right_patch_size_[], int nC_[], size_t *ptotal_memory_in_nbytes ); #ifdef __cplusplus } #endif Loading src/test_vbatch.c +15 −1 Original line number Diff line number Diff line Loading @@ -259,6 +259,7 @@ int main(int argc, char *argv[]) double gmemA = 0; double gmemB = 0; double gmemBX = 0; double gmemXY = 0; estimate_work( npatches, left_patch_size_, Loading @@ -267,18 +268,31 @@ int main(int argc, char *argv[]) &total_gflops, &gmemA, &gmemB, &gmemBX &gmemBX, &gmemXY ); double gmemA_gbytes = gmemA * sizeof(FpType)/giga; double gmemB_gbytes = gmemB * sizeof(FpType)/giga; double gmemBX_gbytes = gmemBX * sizeof(FpType)/giga; double gmemXY_gbytes = gmemXY * sizeof(FpType)/giga; printf("total_gflops=%lf \n", total_gflops ); printf("test_vbatch:estimated memory for Amat=%lf GBytes\n", gmemA_gbytes ); printf("test_vbatch:estimated memory for Bmat=%lf GBytes\n", gmemB_gbytes ); printf("test_vbatch:estimated memory for BXmat=%lf GBytes\n", gmemBX_gbytes ); printf("test_vbatch:estimated memory for X and Y =%lf GBytes\n", gmemXY_gbytes ); size_t total_memory_in_nbytes = 0; get_total_memory( npatches, left_patch_size_, right_patch_size_, nC_, &total_memory_in_nbytes ); printf("total_memory_in_nbytes = %lf GBytes\n", (double) total_memory_in_nbytes/giga ); } Loading Loading
src/estimate_work.c +50 −4 Original line number Diff line number Diff line #include "test_vbatch.h" void get_total_memory( int npatches, int left_patch_size_[], int right_patch_size_[], int nC_[], size_t *ptotal_memory_in_nbytes ) { double total_gflops = 0; double gmemA = 0; double gmemB = 0; double gmemBX = 0; double gmemXY = 0; estimate_work( npatches, left_patch_size_, right_patch_size_, nC_, &total_gflops, &gmemA, &gmemB, &gmemBX, &gmemXY ); size_t total_memory_in_bytes = sizeof(FpType) * (gmemA + gmemB + gmemBX + gmemXY); *ptotal_memory_in_nbytes = total_memory_in_bytes; } void estimate_work( int npatches, int left_patch_size_[], int right_patch_size_[], Loading @@ -7,7 +35,8 @@ void estimate_work( int npatches, double *ptotal_gflops, double *pgmemA, double *pgmemB, double *pgmemBX ) double *pgmemBX, double *pgmemXY ) #define nC(ipatch,jpatch) nC_[ ((ipatch)-1) + ((jpatch)-1)*npatches ] #define left_patch_size(ipatch) left_patch_size_[(ipatch)-1] #define right_patch_size(ipatch) right_patch_size_[(ipatch)-1] Loading @@ -17,6 +46,7 @@ void estimate_work( int npatches, estimate total work ------------------- */ const int ialign = 32; const double giga = 1000.0 * 1000.0 * 1000.0; assert( ptotal_gflops != NULL ); Loading @@ -27,12 +57,24 @@ void estimate_work( int npatches, double gmemA = 0.0; double gmemB = 0.0; double gmemBX = 0.0; double gmemXY = 0.0; double total_flops = 0.0; { int ipatch = 0; int jpatch = 0; for(ipatch=1; ipatch <= npatches; ipatch++) { gmemXY += left_patch_size(ipatch) * right_patch_size(ipatch); }; /* * ----------------------- * count both X, Y vectors * ----------------------- */ gmemXY *= 2; for(jpatch=1; jpatch <= npatches; jpatch++) { for(ipatch=1; ipatch <= npatches; ipatch++) { Loading @@ -56,9 +98,12 @@ void estimate_work( int npatches, int ncolB = right_patch_size(jpatch); int ncolX = ncolA; gmemA += nop * nrowA * ncolA; gmemB += nop * nrowB * ncolB; gmemBX += nop * nrowB * ncolX; int ldA = ialign * (( nrowA + (ialign-1))/ialign ); int ldB = ialign * (( nrowB + (ialign-1))/ialign ); int ldBX = ldB; gmemA += nop * ldA * ncolA; gmemB += nop * ldB * ncolB; gmemBX += nop * ldBX * ncolX; cal_kron_flops( nrowA, nrowB, ncolA, ncolB, &flops_total, &flops_method1, &flops_method2); Loading @@ -74,5 +119,6 @@ void estimate_work( int npatches, *pgmemA = gmemA; *pgmemB = gmemB; *pgmemBX = gmemBX; *pgmemXY = gmemXY; }
src/estimate_work.h +10 −1 Original line number Diff line number Diff line Loading @@ -13,8 +13,17 @@ void estimate_work( int npatches, double *ptotal_gflops, double *pgmemA, double *pgmemB, double *pgmemBX double *pgmemBX, double *pgmemXY ); extern void get_total_memory( int npatches, int left_patch_size_[], int right_patch_size_[], int nC_[], size_t *ptotal_memory_in_nbytes ); #ifdef __cplusplus } #endif Loading
src/test_vbatch.c +15 −1 Original line number Diff line number Diff line Loading @@ -259,6 +259,7 @@ int main(int argc, char *argv[]) double gmemA = 0; double gmemB = 0; double gmemBX = 0; double gmemXY = 0; estimate_work( npatches, left_patch_size_, Loading @@ -267,18 +268,31 @@ int main(int argc, char *argv[]) &total_gflops, &gmemA, &gmemB, &gmemBX &gmemBX, &gmemXY ); double gmemA_gbytes = gmemA * sizeof(FpType)/giga; double gmemB_gbytes = gmemB * sizeof(FpType)/giga; double gmemBX_gbytes = gmemBX * sizeof(FpType)/giga; double gmemXY_gbytes = gmemXY * sizeof(FpType)/giga; printf("total_gflops=%lf \n", total_gflops ); printf("test_vbatch:estimated memory for Amat=%lf GBytes\n", gmemA_gbytes ); printf("test_vbatch:estimated memory for Bmat=%lf GBytes\n", gmemB_gbytes ); printf("test_vbatch:estimated memory for BXmat=%lf GBytes\n", gmemBX_gbytes ); printf("test_vbatch:estimated memory for X and Y =%lf GBytes\n", gmemXY_gbytes ); size_t total_memory_in_nbytes = 0; get_total_memory( npatches, left_patch_size_, right_patch_size_, nC_, &total_memory_in_nbytes ); printf("total_memory_in_nbytes = %lf GBytes\n", (double) total_memory_in_nbytes/giga ); } Loading