Commit a49ebd61 authored by D'azevedo, Ed's avatar D'azevedo, Ed
Browse files

add extra checks

parent 404eeace
Loading
Loading
Loading
Loading
+119 −4
Original line number Diff line number Diff line
@@ -17,8 +17,14 @@

#include "dmrg_magma.h"

#define MAXGPUS 8
   static magma_queue_t queue_array[MAXGPUS];
   static magma_device_t device_array[MAXGPUS];
   static int ngpu = 0;

   static magma_queue_t queue = 0;
   static int device = 0;

#endif


@@ -28,11 +34,35 @@ void dmrg_init()
     is_initialized = 1;

#ifdef USE_MAGMA
     const int idebug = 1;

     device = 0;
     magma_init();

     magma_getdevices( device_array, MAXGPUS, &ngpu );
     assert( ngpu >= 1 );

      
     if (idebug >= 1) {
         printf("dmrg_init: ngpu = %d \n",ngpu);
     };


     int idev = 0;
     for (idev = 0; idev < ngpu; idev++) {
       device = device_array[idev];

       magma_setdevice( device );
       magma_queue_create( device, &queue );
       assert( queue != 0 );
       queue_array[idev] = queue;
     };

     idev = 0;
     queue = queue_array[idev];
     device = device_array[idev];


#endif
     };

@@ -355,7 +385,7 @@ double elapsed_time = 0;
   FpType beta = beta_vbatch[0];


     
  if (ngpu == 1) { 

   magmablas_Xgemm_vbatched( transA, transB,
           m_vbatch, n_vbatch, k_vbatch,
@@ -365,9 +395,94 @@ double elapsed_time = 0;
           beta,
           c_vbatch, ldc_vbatch,
           batch_size, queue );
  }
  else {
    /*
     * --------------------------------------------
     * simple partitioning of work to multiple GPUs
     * --------------------------------------------
     */
    int inc = (batch_size + (ngpu-1))/ngpu;
    int idev = 0;
    for(idev = 0; idev < ngpu; idev++) {
       int istart = idev * inc;
       int iend = istart+inc-1;
       if (iend >= (batch_size-1)) {
           iend = batch_size-1;
           };
       int isize =  (iend - istart + 1);

       device = device_array[idev];
       queue = queue_array[idev];
       magma_setdevice( device );


       int *pm_vbatch = &(m_vbatch[istart]);
       int *pn_vbatch = &(n_vbatch[istart]);
       int *pk_vbatch = &(k_vbatch[istart]);

       FpType **pa_vbatch = &(a_vbatch[istart]);
       FpType **pb_vbatch = &(b_vbatch[istart]);
       FpType **pc_vbatch = &(c_vbatch[istart]);


       int *plda_vbatch = &(lda_vbatch[istart]);
       int *pldb_vbatch = &(ldb_vbatch[istart]);
       int *pldc_vbatch = &(ldc_vbatch[istart]);

       /*
        * ------------
        * extra checks
        * ------------
        */
       int i = 0;
       for(i=0; i < isize; i++) {
          int mm = pm_vbatch[i];
          int nn = pn_vbatch[i];
          int kk = pk_vbatch[i];

          int lda = plda_vbatch[i];
          int ldb = pldb_vbatch[i];
          int ldc = pldc_vbatch[i];

          FpType *Amat = a_vbatch[i];
          FpType *Bmat = b_vbatch[i];
          FpType *Cmat = c_vbatch[i];


          assert( mm >= 1 );
          assert( nn >= 1 );
          assert( kk >= 1 );

          assert( lda >= 1 );
          assert( ldb >= 1 );
          assert( ldc >= 1 );

          assert( Amat != 0 );
          assert( Bmat != 0 );
          assert( Cmat != 0 );
          };

       int pbatch_size = isize;

       if (pbatch_size >= 1) {
         magmablas_Xgemm_vbatched( transA, transB,
           pm_vbatch, pn_vbatch, pk_vbatch,
           alpha,
           (FpType const * const *) pa_vbatch, plda_vbatch, 
           (FpType const * const *) pb_vbatch, pldb_vbatch,
           beta,
           pc_vbatch, pldc_vbatch,
           pbatch_size, queue );
          };

       }; /* end for idev */

       idev = 0;
       device = device_array[idev];
       queue = queue_array[idev];

     }; /* end if (ngpu > 1) */
  };
#else
  {