Commit 6b160b23 authored by Laanait, Nouamane's avatar Laanait, Nouamane
Browse files

checking for nan in cuda kernels and doubling size of cuda grid in z-direction

parent 3e6e9340
Loading
Loading
Loading
Loading
+4 −3
Original line number Diff line number Diff line
@@ -504,9 +504,10 @@ class MSAGPU(MSAHybrid):
        block, grid = self._get_blockgrid([self.sampling[1], self.sampling[0], self.num_slices],
                    mode='3D')
        build_potential = self.pot_kernels['build_potential']
        print("block:%s, grid:%s" %(format(block), format(grid)))
        print("max, row idx:%d, col idx:%d, stk idx:%d" %(block[0]*grid[0], block[1]*grid[1], block[2]*grid[2]))
        print("sites: %s" %format(Zxy_input.shape))
        grid = (grid[0], grid[1], grid[2]*2)
        # print("block:%s, grid:%s" %(format(block), format(grid)))
        # print("max, row idx:%d, col idx:%d, stk idx:%d" %(block[0]*grid[0], block[1]*grid[1], block[2]*grid[2]))
        # print("sites: %s" %format(Zxy_input.shape))
        # build potential
        build_potential(potential_slices_d, atom_pot_stack_d, sites_d,
                        np.float32(self.sigma), block=block, grid=grid)
+1 −1
Original line number Diff line number Diff line
@@ -91,7 +91,7 @@
         }
         __syncthreads();

        if (col_idx < slice_size_x && row_idx < slice_size_y && stk_idx <= num_slices)
        if (col_idx < slice_size_x && row_idx < slice_size_y && stk_idx < num_slices)
        {
             slice[stk_idx][row_idx][col_idx] = pycuda::complex<float>(cosf(slice[stk_idx][row_idx][col_idx]._M_re * sigma),
                                                                       sinf(slice[stk_idx][row_idx][col_idx]._M_re * sigma));