Commit 08c03d91 authored by Laanait, Nouamane's avatar Laanait, Nouamane
Browse files

minor bugs

parent 821432c9
Loading
Loading
Loading
Loading
+6 −5
Original line number Diff line number Diff line
@@ -467,10 +467,11 @@ class MSAGPU(MSAHybrid):
                                            for Z, yx in zip(Z_arr, yx_arr)])

        # pad sites array to have equal num of element per slice
        shapes = np.array([itm.shape for itm in Zxy_input])
        pads = [(0, shapes.max() - itm.shape) for itm in Zxy_input]
        padded = [np.pad(itm,pad,'constant',constant_values=-1) for pad, itm in zip(pads, Zxy_input)]
        max_shape = np.max(np.array([itm.shape for itm in Zxy_input]))
        pads = [(0, max_shape - itm.shape[0]) for itm in Zxy_input]
        # padded = [np.pad(itm,pad,'constant',constant_values=-1) for pad, itm in zip(pads, Zxy_input)]
        Zxy_input = np.vstack([np.pad(itm, pad, 'constant', constant_values=-1) for pad, itm in zip(pads, Zxy_input)])
        Zxy_input = Zxy_input.astype(np.float32)
        
        # stack atomic potentials of unique elements
        atom_pot_stack = np.array([self.cached_pots[uq_Z] for uq_Z in unique_Z]).astype(np.float32)
+7 −54
Original line number Diff line number Diff line
@@ -2,58 +2,9 @@
#define FULL_MASK 0xffffffff
#include <stdio.h>

    //  __global__ void BuildScatteringPotential(pycuda::complex<float> slice[][{{y_sampling}}][{{x_sampling}}],
    //                               float atom_pot_stack[][{{pot_shape_y}}][{{pot_shape_x}}],
    //                               int sites[][{{sites_size}}],
    //                               float sigma)

    //  {
    //      const int pot_size_y = {{pot_shape_y}}, pot_size_x = {{pot_shape_x}};
    //      const int slice_size_y = {{y_sampling}}, slice_size_x = {{x_sampling}};
    //      const int num_slices = {{num_slices}}, sites_size = {{sites_size}};
    //      int row_idx = blockDim.y * blockIdx.y + threadIdx.y;
    //      int col_idx = blockDim.x * blockIdx.x + threadIdx.x;
    //      int stk_idx = blockDim.z * blockIdx.z + threadIdx.z;
    //     // if (stk_idx == 0  && row_idx == 0  && col_idx == 0)
    //     // {
    //     // #pragma unroll
    //     for (int slice_num=0; slice_num<num_slices; slice_num++)
    //     {
    //          if (stk_idx == slice_num)
    //          {
    //              for(int my_site=0; my_site<sites_size/3; my_site++)
    //              {
    //                  const int Z = sites[stk_idx][3 * my_site];
    //                  const int y_cen = sites[stk_idx][3 * my_site + 1];
    //                  const int x_cen = sites[stk_idx][3 * my_site + 2];
    //                  const int y_start = rintf(y_cen - pot_size_y * 1.f/2);
    //                  const int y_end =  rintf(y_cen + pot_size_y * 1.f /2);
    //                  const int x_start = rintf(x_cen - pot_size_x * 1.f/2);
    //                  const int x_end = rintf(x_cen + pot_size_x * 1.f/2);
    //                  //printf(" slice_num: %d, Z: %d, y_cen:%d, x_cen: %d\\n ", slice_num, Z, y_cen, x_cen);
    //                  if (row_idx >=0 && row_idx < y_end && row_idx < slice_size_y && row_idx >= y_start
    //                      && col_idx >=0 && col_idx < x_end && col_idx < slice_size_x && col_idx >= x_start)
    //                  {
    //                      const int pot_i = row_idx-y_start, pot_j = col_idx-x_start;
    //                      atomicAdd(&slice[stk_idx][row_idx][col_idx]._M_re, atom_pot_stack[Z][pot_i][pot_j]);
    //                  }
    //              }
    //          }
    //      }
    //      __syncthreads();

    //     if (col_idx < slice_size_x && row_idx < slice_size_y && stk_idx < num_slices)
    //     {
    //          slice[stk_idx][row_idx][col_idx] = pycuda::complex<float>(cosf(slice[stk_idx][row_idx][col_idx]._M_re * sigma),
    //                                                                    sinf(slice[stk_idx][row_idx][col_idx]._M_re * sigma));
    //     }

    //  }

     
     __global__ void BuildScatteringPotential(pycuda::complex<float> slice[][{{y_sampling}}][{{x_sampling}}],
                                  float atom_pot_stack[][{{pot_shape_y}}][{{pot_shape_x}}],
                                  int sites[][{{sites_size}}],
                                  float sites[][{{sites_size}}],
                                  float sigma)

     {
@@ -72,9 +23,9 @@
             {
                 for(int my_site=0; my_site<sites_size/3; my_site++)
                 {
                     const int Z = sites[stk_idx][3 * my_site];
                     const int y_cen = sites[stk_idx][3 * my_site + 1];
                     const int x_cen = sites[stk_idx][3 * my_site + 2];
                     const int Z = (int) sites[stk_idx][3 * my_site];
                     const int y_cen = (int) sites[stk_idx][3 * my_site + 1];
                     const int x_cen = (int) sites[stk_idx][3 * my_site + 2];
                     const int y_start = rintf(y_cen - pot_size_y * 1.f/2);
                     const int y_end =  rintf(y_cen + pot_size_y * 1.f /2);
                     const int x_start = rintf(x_cen - pot_size_x * 1.f/2);
@@ -84,11 +35,13 @@
                         && col_idx >=0 && col_idx < x_end && col_idx < slice_size_x && col_idx >= x_start)
                     {
                         const int pot_i = row_idx-y_start, pot_j = col_idx-x_start;
                         if (pot_i < pot_size_y && pot_j < pot_size_x && pot_i >= 0 && pot_j >= 0){
                            atomicAdd(&slice[stk_idx][row_idx][col_idx]._M_re, atom_pot_stack[Z][pot_i][pot_j]);
                         }
                     }
                 }
             }
         }
         __syncthreads();

        if (col_idx < slice_size_x && row_idx < slice_size_y && stk_idx < num_slices)
+1 −1
Original line number Diff line number Diff line
@@ -9,7 +9,7 @@ setup(
    author='Numan Laanait',
    author_email='laanaitn@ornl.gov',
    description='',
    install_requires=['scipy', 'pymatgen', 'numpy', 'pycuda', 'scikit-cuda'],
    install_requires=['scipy', 'pymatgen', 'numpy', 'pycuda==2019.1', 'scikit-cuda', 'mpi4py'],
    #install_requires=['numpy', 'scipy', 'pymatgen', 'pybtex'],
    test_suite='tests',
    python_requires='>=3.6',