Commit 2eb26832 authored by Mario Morales Hernandez's avatar Mario Morales Hernandez
Browse files

Fix GHOST_CELL_PADDING invariance for values > 1

Three bugs prevented simulation results from being invariant to
GHOST_CELL_PADDING values greater than 1:

1. wet_dry kernel: incorrect last interior row check
  - Original: ix == nrows - 2*GHOST_CELL_PADDING
  - Fixed:    ix == nrows - GHOST_CELL_PADDING - 1
  - For N=2, original checked row nrows-4 instead of nrows-3

2. halo_copy_to_gpu: wrong destination for bottom ghost cells
  - Original temp offset (nrows - 2N) pointed to interior cells
    instead of ghost cells for N > 1
  - Original loop also overwrote interior edge cells with stale
    data (harmless for N=1 since values were unchanged, but
    corrupts results for N > 1)
  - Fixed: only process N rows (ghost cells), use correct offset
    (nrows - N), and read from MPI-received positions (9N-12N)

3. compute_extbc_values: hardcoded boundary indices
  - Original assumed GHOST_CELL_PADDING=1 with ix==1, iy==1
  - Fixed to use GHOST_CELL_PADDING for first/last interior row/col

All configurations now produce bitwise-identical results:
- GHOST_CELL_PADDING: 1-5
- MPI ranks: 1, 2, 4, 6
parent ec35bc4b
Loading
Loading
Loading
Loading
+27 −17
Original line number Diff line number Diff line
@@ -727,7 +727,7 @@ namespace Kernels
            }

        }else{
          if(ix==nrows - 2*GHOST_CELL_PADDING && mpi_tasks>1){ //last real row
          if(ix==nrows - GHOST_CELL_PADDING - 1 && mpi_tasks>1){ //last real row
            if ((hij + zij < dem[rx - ncols + iy]) && (h_arr[rx - ncols + iy] < EPS12))                       {
              qy_arr[id] = 0.0;
            }
@@ -816,21 +816,31 @@ namespace Kernels
  {
    triton::parallel_for( AUTO_LABEL() , size , KOKKOS_LAMBDA (int id) {

      int index =  id + 2*ncols*GHOST_CELL_PADDING;
      // After MPI exchange, the halo array contains:
      // - Rows 0 to 3N-1: received top halo (from prev rank's bottom interior edge)
      // - Rows 3N to 9N-1: old data (not updated by exchange)
      // - Rows 9N to 12N-1: received bottom halo (from next rank's top interior edge)
      //
      // We should ONLY copy received data to ghost cells in the main arrays.
      // Do NOT overwrite interior edge cells with old data!

      if(id < ncols*GHOST_CELL_PADDING)
      {
        index = id;
      }
      
        // First half: copy received top halo to top ghost cells only
        int index = id;
        h_arr[id]  = halo[index + 0*GHOST_CELL_PADDING*ncols];
        qx_arr[id] = halo[index + 1*GHOST_CELL_PADDING*ncols];
        qy_arr[id] = halo[index + 2*GHOST_CELL_PADDING*ncols];

      int temp = id + (nrows - 2*GHOST_CELL_PADDING)*ncols;

      h_arr[temp]  = halo[index + 6*GHOST_CELL_PADDING*ncols];
      qx_arr[temp] = halo[index + 7*GHOST_CELL_PADDING*ncols];
      qy_arr[temp] = halo[index + 8*GHOST_CELL_PADDING*ncols];
        // Copy received bottom halo to bottom ghost cells
        // Bottom ghost cells are at rows (nrows-N) to (nrows-1)
        int temp = id + (nrows - GHOST_CELL_PADDING)*ncols;
        h_arr[temp]  = halo[index + 9*GHOST_CELL_PADDING*ncols];
        qx_arr[temp] = halo[index + 10*GHOST_CELL_PADDING*ncols];
        qy_arr[temp] = halo[index + 11*GHOST_CELL_PADDING*ncols];
      }
      // Second half (id >= N*ncols) was incorrectly copying old interior edge data
      // to interior edge positions. This is now skipped.

    });
  }
@@ -1105,10 +1115,10 @@ namespace Kernels
      int iy = (ii % ncols);  //col id

      bool
      is_top = (ix == 1),
      is_btm = (ix == nrows - 2),
      is_lt = (iy == 1),
      is_rt = (iy == ncols - 2);
      is_top = (ix == GHOST_CELL_PADDING),
      is_btm = (ix == nrows - GHOST_CELL_PADDING - 1),
      is_lt = (iy == GHOST_CELL_PADDING),
      is_rt = (iy == ncols - GHOST_CELL_PADDING - 1);
			
			//if (!(rank == 0 && is_top) || !is_lt || !is_rt || !(rank == total_process - 1 && is_btm))
		if (!((rank == 0 && is_top) || is_lt || is_rt || (rank == total_process - 1 && is_btm)))