diff --git a/cmake/upstream/GoogleTest.cmake b/cmake/upstream/GoogleTest.cmake
index 944070ae16e2e37837893881d8be32bbdd2b8dc1..ba09bf6b278fd2cb4527a4fb249d569f9b2a8de3 100644
--- a/cmake/upstream/GoogleTest.cmake
+++ b/cmake/upstream/GoogleTest.cmake
@@ -14,6 +14,7 @@ This module defines functions to help use the Google Test infrastructure.
 
     gtest_add_tests(TARGET target
                     [SOURCES src1...]
+                    [EXEC_WRAPPER wrapper1...]
                     [EXTRA_ARGS arg1...]
                     [WORKING_DIRECTORY dir]
                     [TEST_PREFIX prefix]
@@ -31,6 +32,10 @@ This module defines functions to help use the Google Test infrastructure.
     this option is not given, the :prop_tgt:`SOURCES` property of the
     specified ``target`` will be used to obtain the list of sources.
 
+  ``EXEC_WRAPPER wrapper1...``
+    Any extra arguments to pass on the command line before each test case. This
+    can be userful when the test case should be run in parallel.
+
   ``EXTRA_ARGS arg1...``
     Any extra arguments to pass on the command line to each test case.
 
@@ -66,6 +71,7 @@ This module defines functions to help use the Google Test infrastructure.
     include(GoogleTest)
     add_executable(FooTest FooUnitTest.cxx)
     gtest_add_tests(TARGET      FooTest
+                    EXEC_WRAPPER mpirun -n 2
                     TEST_SUFFIX .noArgs
                     TEST_LIST   noArgsTests
     )
@@ -119,6 +125,7 @@ function(gtest_add_tests)
   )
   set(multiValueArgs
       SOURCES
+      EXEC_WRAPPER
       EXTRA_ARGS
   )
   set(allKeywords ${options} ${oneValueArgs} ${multiValueArgs})
@@ -182,6 +189,13 @@ function(gtest_add_tests)
         continue()
       endif()
 
+      # Wrap the test executable in another command if necessary
+      if (ARGS_EXEC_WRAPPER)
+        set(ctest_test_command ${ARGS_EXEC_WRAPPER} $<TARGET_FILE:${ARGS_TARGET}>)
+      else()
+        set(ctest_test_command ${ARGS_TARGET})
+      endif()
+
       # Make sure tests disabled in GTest get disabled in CTest
       if(gtest_test_name MATCHES "(^|\\.)DISABLED_")
         # Add the disabled test if CMake is new enough
@@ -198,7 +212,7 @@ function(gtest_add_tests)
           )
           add_test(NAME ${ctest_test_name}
                    ${workDir}
-                   COMMAND ${ARGS_TARGET}
+                   COMMAND ${ctest_test_command}
                      --gtest_also_run_disabled_tests
                      --gtest_filter=${gtest_test_name}
                      ${ARGS_EXTRA_ARGS}
@@ -210,7 +224,7 @@ function(gtest_add_tests)
         set(ctest_test_name ${ARGS_TEST_PREFIX}${gtest_test_name}${ARGS_TEST_SUFFIX})
         add_test(NAME ${ctest_test_name}
                  ${workDir}
-                 COMMAND ${ARGS_TARGET}
+                 COMMAND ${ctest_test_command}
                    --gtest_filter=${gtest_test_name}
                    ${ARGS_EXTRA_ARGS}
         )
diff --git a/testing/adios2/engine/SmallTestData.h b/testing/adios2/engine/SmallTestData.h
index ba6b50244911ca6ea35d09867b87c83ff9882473..550cdf9e8dffc8207956046004a181418fc94db8 100644
--- a/testing/adios2/engine/SmallTestData.h
+++ b/testing/adios2/engine/SmallTestData.h
@@ -6,11 +6,17 @@
 #define TESTING_ADIOS2_ENGINE_SMALLTESTDATA_H_
 
 #include <array>
+#include <limits>
+
+#ifdef WIN32
+#define NOMINMAX
+#endif
 
 // Test data for each type.  Make sure our values exceed the range of the
 // previous size to make sure we all bytes for each element
 struct SmallTestData
 {
+    // TODO: Fix the right initial value for char array
     std::array<char, 10> I8 = {{0, 1, -2, 3, -4, 5, -6, 7, -8, 9}};
     std::array<signed char, 10> SI8 = {{0, -1, 2, -3, 4, -5, 6, -7, 8, -9}};
     std::array<short, 10> I16 = {
@@ -36,4 +42,67 @@ struct SmallTestData
     std::array<double, 10> R64 = {{0, 1, 2, 3, 4, 5, 6, 7, 8, 9}};
 };
 
+// Utility function for generateNewSmallTestData
+template <typename T>
+T clip(const T &n, const T &lower, const T &upper)
+{
+    return std::max(lower, std::min(n, upper));
+}
+
+SmallTestData generateNewSmallTestData(SmallTestData input, int step, int rank,
+                                       int size)
+{
+    rank++; // Make rank to be 1 based index
+    for (int i = 0; i < 10; i++)
+    { // Make sure that data is within the range
+        int jump = rank + step * size;
+        input.I8[i] = clip(
+            static_cast<char>(input.I8[i] + static_cast<char>(jump)),
+            std::numeric_limits<char>::min(), std::numeric_limits<char>::max());
+        input.SI8[i] = clip(static_cast<signed char>(
+                                input.SI8[i] + static_cast<signed char>(jump)),
+                            std::numeric_limits<signed char>::min(),
+                            std::numeric_limits<signed char>::max());
+        input.I16[i] =
+            clip(static_cast<short>(input.I16[i] + static_cast<short>(jump)),
+                 std::numeric_limits<short>::min(),
+                 std::numeric_limits<short>::max());
+        input.I32[i] = clip(
+            static_cast<int>(input.I32[i] + static_cast<int>(jump)),
+            std::numeric_limits<int>::min(), std::numeric_limits<int>::max());
+        input.I64[i] = clip(
+            static_cast<long>(input.I64[i] + static_cast<long>(jump)),
+            std::numeric_limits<long>::min(), std::numeric_limits<long>::max());
+
+        input.U8[i] = clip(static_cast<unsigned char>(
+                               input.U8[i] + static_cast<unsigned char>(jump)),
+                           std::numeric_limits<unsigned char>::min(),
+                           std::numeric_limits<unsigned char>::max());
+        input.U16[i] =
+            clip(static_cast<unsigned short>(input.U16[i] +
+                                             static_cast<unsigned short>(jump)),
+                 std::numeric_limits<unsigned short>::min(),
+                 std::numeric_limits<unsigned short>::max());
+        input.U32[i] = clip(static_cast<unsigned int>(
+                                input.U32[i] + static_cast<unsigned int>(jump)),
+                            std::numeric_limits<unsigned int>::min(),
+                            std::numeric_limits<unsigned int>::max());
+        input.U64[i] =
+            clip(static_cast<unsigned long int>(
+                     input.U64[i] + static_cast<unsigned long int>(jump)),
+                 std::numeric_limits<unsigned long int>::min(),
+                 std::numeric_limits<unsigned long int>::max());
+
+        input.R32[i] =
+            clip(static_cast<float>(input.R32[i] + static_cast<float>(jump)),
+                 -std::numeric_limits<float>::max(),
+                 std::numeric_limits<float>::max());
+        input.R64[i] =
+            clip(static_cast<double>(input.R64[i] + static_cast<double>(jump)),
+                 -std::numeric_limits<double>::max(),
+                 std::numeric_limits<double>::max());
+    }
+
+    return input;
+}
 #endif // TESTING_ADIOS2_ENGINE_SMALLTESTDATA_H_
diff --git a/testing/adios2/engine/adios1/CMakeLists.txt b/testing/adios2/engine/adios1/CMakeLists.txt
index 259a2aed065852814bf0e4d82f88abb14cedd28c..c31059b077e7d11a421775027e9dbabad540a4eb 100644
--- a/testing/adios2/engine/adios1/CMakeLists.txt
+++ b/testing/adios2/engine/adios1/CMakeLists.txt
@@ -3,12 +3,20 @@
 # accompanying file Copyright.txt for details.
 #------------------------------------------------------------------------------#
 
-# MPI versions of the test are not properly implemented at the moment
 if(NOT ADIOS2_HAVE_MPI)
   find_package(ADIOS1 COMPONENTS sequential REQUIRED)
+else()
+  find_package(ADIOS1 REQUIRED)
+endif()
 
-  add_executable(TestADIOS1WriteRead TestADIOS1WriteRead.cpp)
-  target_link_libraries(TestADIOS1WriteRead adios2 gtest adios1::adios)
+add_executable(TestADIOS1WriteRead TestADIOS1WriteRead.cpp)
+target_link_libraries(TestADIOS1WriteRead adios2 gtest adios1::adios)
 
+if(NOT ADIOS2_HAVE_MPI)
   gtest_add_tests(TARGET TestADIOS1WriteRead)
+else()
+  # MPI versions of the test
+  gtest_add_tests(TARGET TestADIOS1WriteRead EXEC_WRAPPER ${MPIEXEC}
+                                                        ${MPIEXEC_NUMPROC_FLAG}
+                                                        ${MPIEXEC_MAX_NUMPROCS})
 endif()
diff --git a/testing/adios2/engine/adios1/TestADIOS1WriteRead.cpp b/testing/adios2/engine/adios1/TestADIOS1WriteRead.cpp
index 19f338e4bd7138527f4a565d78721b9071a02408..7e6e95fb404e1cc8b5d22e05d9ab3e110fb53bad 100644
--- a/testing/adios2/engine/adios1/TestADIOS1WriteRead.cpp
+++ b/testing/adios2/engine/adios1/TestADIOS1WriteRead.cpp
@@ -16,12 +16,16 @@
 
 #include "../SmallTestData.h"
 
+#ifdef ADIOS2_HAVE_MPI
+#include "mpi.h"
+#endif
+
 class ADIOS1WriteReadTest : public ::testing::Test
 {
 public:
     ADIOS1WriteReadTest() = default;
 
-    SmallTestData m_TestData;
+    const SmallTestData m_TestData;
 };
 
 //******************************************************************************
@@ -31,48 +35,66 @@ public:
 // ADIOS2 write, native ADIOS1 read
 TEST_F(ADIOS1WriteReadTest, ADIOS2ADIOS1WriteADIOS1Read1D8)
 {
+    // Each process would write a 1x8 array and all processes would
+    // form a world_size * Nx matrix
     std::string fname = "ADIOS2ADIOS1WriteADIOS1Read1D8.bp";
 
+    int world_rank = 0, world_size = 1;
+    // Number of rows
+    const std::size_t Nx = 8;
+
+    // Number of steps
+    const std::size_t NSteps = 3;
+
+#ifdef ADIOS2_HAVE_MPI
+    MPI_Comm_rank(MPI_COMM_WORLD, &world_rank);
+    MPI_Comm_size(MPI_COMM_WORLD, &world_size);
+#endif
+
     // Write test data using ADIOS2
     {
-        adios_init_noxml(MPI_COMM_WORLD);
-
+#ifdef ADIOS2_HAVE_MPI
+        adios2::ADIOS adios(MPI_COMM_WORLD, adios2::DebugON);
+#else
         adios2::ADIOS adios(true);
+#endif
         adios2::IO &io = adios.DeclareIO("TestIO");
 
-        // Declare 1D variables
+        // Declare 2D variables (NumOfProcesses * Nx)
+        // The local process' part (start, count) can be defined now or later
+        // before Write().
         {
-            auto &var_i8 =
-                io.DefineVariable<char>("i8", {}, {}, adios2::Dims{8});
-            auto &var_i16 =
-                io.DefineVariable<short>("i16", {}, {}, adios2::Dims{8});
-            auto &var_i32 =
-                io.DefineVariable<int>("i32", {}, {}, adios2::Dims{8});
-            auto &var_i64 =
-                io.DefineVariable<long>("i64", {}, {}, adios2::Dims{8});
-            auto &var_u8 =
-                io.DefineVariable<unsigned char>("u8", {}, {}, adios2::Dims{8});
-            auto &var_u16 = io.DefineVariable<unsigned short>("u16", {}, {},
-                                                              adios2::Dims{8});
-            auto &var_u32 =
-                io.DefineVariable<unsigned int>("u32", {}, {}, adios2::Dims{8});
-            auto &var_u64 = io.DefineVariable<unsigned long>("u64", {}, {},
-                                                             adios2::Dims{8});
-            auto &var_r32 =
-                io.DefineVariable<float>("r32", {}, {}, adios2::Dims{8});
-            auto &var_r64 =
-                io.DefineVariable<double>("r64", {}, {}, adios2::Dims{8});
+            adios2::Dims shape = adios2::Dims{(unsigned int)world_size, Nx};
+            auto &var_i8 = io.DefineVariable<char>("i8", shape);
+            auto &var_i16 = io.DefineVariable<short>("i16", shape);
+            auto &var_i32 = io.DefineVariable<int>("i32", shape);
+            auto &var_i64 = io.DefineVariable<long>("i64", shape);
+            auto &var_u8 = io.DefineVariable<unsigned char>("u8", shape);
+            auto &var_u16 = io.DefineVariable<unsigned short>("u16", shape);
+            auto &var_u32 = io.DefineVariable<unsigned int>("u32", shape);
+            auto &var_u64 = io.DefineVariable<unsigned long>("u64", shape);
+            auto &var_r32 = io.DefineVariable<float>("r32", shape);
+            auto &var_r64 = io.DefineVariable<double>("r64", shape);
         }
 
         // Create the ADIOS 1 Engine
         io.SetEngine("ADIOS1Writer");
-        io.AddTransport("File");
+
+#ifdef ADIOS2_HAVE_MPI
+        io.AddTransport("file", {{"library", "MPI"}});
+#else
+        io.AddTransport("file");
+#endif
 
         auto engine = io.Open(fname, adios2::OpenMode::Write);
         ASSERT_NE(engine.get(), nullptr);
 
-        for (size_t step = 0; step < 3; ++step)
+        for (size_t step = 0; step < NSteps; ++step)
         {
+            // Generate test data for each process uniquely
+            SmallTestData currentTestData = generateNewSmallTestData(
+                m_TestData, step, world_rank, world_size);
+
             // Retrieve the variables that previously went out of scope
             auto &var_i8 = io.GetVariable<char>("i8");
             auto &var_i16 = io.GetVariable<short>("i16");
@@ -85,17 +107,34 @@ TEST_F(ADIOS1WriteReadTest, ADIOS2ADIOS1WriteADIOS1Read1D8)
             auto &var_r32 = io.GetVariable<float>("r32");
             auto &var_r64 = io.GetVariable<double>("r64");
 
+            // Make a 2D selection to describe the local dimensions of the
+            // variable we write and its offsets in the global spaces
+            adios2::SelectionBoundingBox sel({(unsigned int)world_rank, 0},
+                                             {1, Nx});
+            var_i8.SetSelection(sel);
+            var_i16.SetSelection(sel);
+            var_i32.SetSelection(sel);
+            var_i64.SetSelection(sel);
+            var_u8.SetSelection(sel);
+            var_u16.SetSelection(sel);
+            var_u32.SetSelection(sel);
+            var_u64.SetSelection(sel);
+            var_r32.SetSelection(sel);
+            var_r64.SetSelection(sel);
+
             // Write each one
-            engine->Write(var_i8, m_TestData.I8.data() + step);
-            engine->Write(var_i16, m_TestData.I16.data() + step);
-            engine->Write(var_i32, m_TestData.I32.data() + step);
-            engine->Write(var_i64, m_TestData.I64.data() + step);
-            engine->Write(var_u8, m_TestData.U8.data() + step);
-            engine->Write(var_u16, m_TestData.U16.data() + step);
-            engine->Write(var_u32, m_TestData.U32.data() + step);
-            engine->Write(var_u64, m_TestData.U64.data() + step);
-            engine->Write(var_r32, m_TestData.R32.data() + step);
-            engine->Write(var_r64, m_TestData.R64.data() + step);
+            // fill in the variable with values from starting index to
+            // starting index + count
+            engine->Write(var_i8, currentTestData.I8.data());
+            engine->Write(var_i16, currentTestData.I16.data());
+            engine->Write(var_i32, currentTestData.I32.data());
+            engine->Write(var_i64, currentTestData.I64.data());
+            engine->Write(var_u8, currentTestData.U8.data());
+            engine->Write(var_u16, currentTestData.U16.data());
+            engine->Write(var_u32, currentTestData.U32.data());
+            engine->Write(var_u64, currentTestData.U64.data());
+            engine->Write(var_r32, currentTestData.R32.data());
+            engine->Write(var_r64, currentTestData.R64.data());
 
             // Advance to the next time step
             engine->Advance();
@@ -107,13 +146,6 @@ TEST_F(ADIOS1WriteReadTest, ADIOS2ADIOS1WriteADIOS1Read1D8)
         adios_finalize(0);
     }
 
-// Read test data using ADIOS1
-#ifdef ADIOS2_HAVE_MPI
-    // Read everything from rank 0
-    int rank;
-    MPI_Comm_rank();
-    if (rank == 0)
-#endif
     {
         adios_read_init_method(ADIOS_READ_METHOD_BP, MPI_COMM_WORLD,
                                "verbose=3");
@@ -126,63 +158,96 @@ TEST_F(ADIOS1WriteReadTest, ADIOS2ADIOS1WriteADIOS1Read1D8)
         // Check the variables exist
         ADIOS_VARINFO *var_i8 = adios_inq_var(f, "i8");
         ASSERT_NE(var_i8, nullptr);
-        ASSERT_EQ(var_i8->ndim, 1);
-        ASSERT_EQ(var_i8->dims[0], 8);
+        ASSERT_EQ(var_i8->ndim, 2);
+        ASSERT_EQ(var_i8->global, 1);
+        ASSERT_EQ(var_i8->nsteps, NSteps);
+        ASSERT_EQ(var_i8->dims[0], world_size);
+        ASSERT_EQ(var_i8->dims[1], Nx);
         ADIOS_VARINFO *var_i16 = adios_inq_var(f, "i16");
         ASSERT_NE(var_i16, nullptr);
-        ASSERT_EQ(var_i16->ndim, 1);
-        ASSERT_EQ(var_i16->dims[0], 8);
+        ASSERT_EQ(var_i16->ndim, 2);
+        ASSERT_EQ(var_i16->global, 1);
+        ASSERT_EQ(var_i16->nsteps, NSteps);
+        ASSERT_EQ(var_i16->dims[0], world_size);
+        ASSERT_EQ(var_i16->dims[1], Nx);
         ADIOS_VARINFO *var_i32 = adios_inq_var(f, "i32");
         ASSERT_NE(var_i32, nullptr);
-        ASSERT_EQ(var_i32->ndim, 1);
-        ASSERT_EQ(var_i32->dims[0], 8);
+        ASSERT_EQ(var_i32->ndim, 2);
+        ASSERT_EQ(var_i32->global, 1);
+        ASSERT_EQ(var_i32->nsteps, NSteps);
+        ASSERT_EQ(var_i32->dims[0], world_size);
+        ASSERT_EQ(var_i32->dims[1], Nx);
         ADIOS_VARINFO *var_i64 = adios_inq_var(f, "i64");
         ASSERT_NE(var_i64, nullptr);
-        ASSERT_EQ(var_i64->ndim, 1);
-        ASSERT_EQ(var_i64->dims[0], 8);
+        ASSERT_EQ(var_i64->ndim, 2);
+        ASSERT_EQ(var_i64->global, 1);
+        ASSERT_EQ(var_i64->nsteps, NSteps);
+        ASSERT_EQ(var_i64->dims[0], world_size);
+        ASSERT_EQ(var_i64->dims[1], Nx);
         ADIOS_VARINFO *var_u8 = adios_inq_var(f, "u8");
         ASSERT_NE(var_u8, nullptr);
-        ASSERT_EQ(var_u8->ndim, 1);
-        ASSERT_EQ(var_u8->dims[0], 8);
+        ASSERT_EQ(var_u8->ndim, 2);
+        ASSERT_EQ(var_u8->global, 1);
+        ASSERT_EQ(var_u8->nsteps, NSteps);
+        ASSERT_EQ(var_u8->dims[0], world_size);
+        ASSERT_EQ(var_u8->dims[1], Nx);
         ADIOS_VARINFO *var_u16 = adios_inq_var(f, "u16");
         ASSERT_NE(var_u16, nullptr);
-        ASSERT_EQ(var_u16->ndim, 1);
-        ASSERT_EQ(var_u16->dims[0], 8);
+        ASSERT_EQ(var_u16->ndim, 2);
+        ASSERT_EQ(var_u16->global, 1);
+        ASSERT_EQ(var_u16->nsteps, NSteps);
+        ASSERT_EQ(var_u16->dims[0], world_size);
+        ASSERT_EQ(var_u16->dims[1], Nx);
         ADIOS_VARINFO *var_u32 = adios_inq_var(f, "u32");
         ASSERT_NE(var_u32, nullptr);
-        ASSERT_EQ(var_u32->ndim, 1);
-        ASSERT_EQ(var_u32->dims[0], 8);
+        ASSERT_EQ(var_u32->ndim, 2);
+        ASSERT_EQ(var_u32->global, 1);
+        ASSERT_EQ(var_u32->nsteps, NSteps);
+        ASSERT_EQ(var_u32->dims[0], world_size);
+        ASSERT_EQ(var_u32->dims[1], Nx);
         ADIOS_VARINFO *var_u64 = adios_inq_var(f, "u64");
         ASSERT_NE(var_u64, nullptr);
-        ASSERT_EQ(var_u64->ndim, 1);
-        ASSERT_EQ(var_u64->dims[0], 8);
+        ASSERT_EQ(var_u64->ndim, 2);
+        ASSERT_EQ(var_u64->global, 1);
+        ASSERT_EQ(var_u64->nsteps, NSteps);
+        ASSERT_EQ(var_u64->dims[0], world_size);
+        ASSERT_EQ(var_u64->dims[1], Nx);
         ADIOS_VARINFO *var_r32 = adios_inq_var(f, "r32");
         ASSERT_NE(var_r32, nullptr);
-        ASSERT_EQ(var_r32->ndim, 1);
-        ASSERT_EQ(var_r32->dims[0], 8);
+        ASSERT_EQ(var_r32->ndim, 2);
+        ASSERT_EQ(var_r32->global, 1);
+        ASSERT_EQ(var_r32->nsteps, NSteps);
+        ASSERT_EQ(var_r32->dims[0], world_size);
+        ASSERT_EQ(var_r32->dims[1], Nx);
         ADIOS_VARINFO *var_r64 = adios_inq_var(f, "r64");
         ASSERT_NE(var_r64, nullptr);
-        ASSERT_EQ(var_r64->ndim, 1);
-        ASSERT_EQ(var_r64->dims[0], 8);
-
-        std::array<char, 8> I8;
-        std::array<int16_t, 8> I16;
-        std::array<int32_t, 8> I32;
-        std::array<int64_t, 8> I64;
-        std::array<unsigned char, 8> U8;
-        std::array<uint16_t, 8> U16;
-        std::array<uint32_t, 8> U32;
-        std::array<uint64_t, 8> U64;
-        std::array<float, 8> R32;
-        std::array<double, 8> R64;
-
-        uint64_t start[1] = {0};
-        uint64_t count[1] = {8};
-        ADIOS_SELECTION *sel = adios_selection_boundingbox(1, start, count);
+        ASSERT_EQ(var_r64->ndim, 2);
+        ASSERT_EQ(var_r64->global, 1);
+        ASSERT_EQ(var_r64->nsteps, NSteps);
+        ASSERT_EQ(var_r64->dims[0], world_size);
+        ASSERT_EQ(var_r64->dims[1], Nx);
+
+        std::array<char, Nx> I8;
+        std::array<int16_t, Nx> I16;
+        std::array<int32_t, Nx> I32;
+        std::array<int64_t, Nx> I64;
+        std::array<unsigned char, Nx> U8;
+        std::array<uint16_t, Nx> U16;
+        std::array<uint32_t, Nx> U32;
+        std::array<uint64_t, Nx> U64;
+        std::array<float, Nx> R32;
+        std::array<double, Nx> R64;
+
+        uint64_t start[2] = {static_cast<uint64_t>(world_rank), 0};
+        uint64_t count[2] = {1, Nx};
+        ADIOS_SELECTION *sel = adios_selection_boundingbox(2, start, count);
 
         // Read stuff
-        for (size_t t = 0; t < 3; ++t)
+        for (size_t t = 0; t < NSteps; ++t)
         {
+            // Generate test data for each rank uniquely
+            SmallTestData currentTestData =
+                generateNewSmallTestData(m_TestData, t, world_rank, world_size);
             // Read the current step
             adios_schedule_read_byid(f, sel, var_i8->varid, t, 1, I8.data());
             adios_schedule_read_byid(f, sel, var_i16->varid, t, 1, I16.data());
@@ -197,22 +262,22 @@ TEST_F(ADIOS1WriteReadTest, ADIOS2ADIOS1WriteADIOS1Read1D8)
             adios_perform_reads(f, 1);
 
             // Check if it's correct
-            for (size_t i = 0; i < 8; ++i)
+            for (size_t i = 0; i < Nx; ++i)
             {
                 std::stringstream ss;
-                ss << "t=" << t << " i=" << i;
+                ss << "t=" << t << " i=" << i << " rank=" << world_rank;
                 std::string msg = ss.str();
 
-                EXPECT_EQ(I8[i], m_TestData.I8[i + t]) << msg;
-                EXPECT_EQ(I16[i], m_TestData.I16[i + t]) << msg;
-                EXPECT_EQ(I32[i], m_TestData.I32[i + t]) << msg;
-                EXPECT_EQ(I64[i], m_TestData.I64[i + t]) << msg;
-                EXPECT_EQ(U8[i], m_TestData.U8[i + t]) << msg;
-                EXPECT_EQ(U16[i], m_TestData.U16[i + t]) << msg;
-                EXPECT_EQ(U32[i], m_TestData.U32[i + t]) << msg;
-                EXPECT_EQ(U64[i], m_TestData.U64[i + t]) << msg;
-                EXPECT_EQ(R32[i], m_TestData.R32[i + t]) << msg;
-                EXPECT_EQ(R64[i], m_TestData.R64[i + t]) << msg;
+                EXPECT_EQ(I8[i], currentTestData.I8[i]) << msg;
+                EXPECT_EQ(I16[i], currentTestData.I16[i]) << msg;
+                EXPECT_EQ(I32[i], currentTestData.I32[i]) << msg;
+                EXPECT_EQ(I64[i], currentTestData.I64[i]) << msg;
+                EXPECT_EQ(U8[i], currentTestData.U8[i]) << msg;
+                EXPECT_EQ(U16[i], currentTestData.U16[i]) << msg;
+                EXPECT_EQ(U32[i], currentTestData.U32[i]) << msg;
+                EXPECT_EQ(U64[i], currentTestData.U64[i]) << msg;
+                EXPECT_EQ(R32[i], currentTestData.R32[i]) << msg;
+                EXPECT_EQ(R64[i], currentTestData.R64[i]) << msg;
             }
         }
 
@@ -260,48 +325,70 @@ TEST_F(ADIOS1WriteReadTest, DISABLED_ADIOS1WriteADIOS2ADIOS1Read1D8)
 // ADIOS2 write, native ADIOS1 read
 TEST_F(ADIOS1WriteReadTest, ADIOS2ADIOS1WriteADIOS1Read2D2x4)
 {
+    // Each process would write a 2x4 array and all processes would
+    // form a 2D (world_size*2) * Nx matrix where Nx is 4 here
     std::string fname = "ADIOS2ADIOS1WriteADIOS1Read2D2x4Test.bp";
 
+    int world_rank = 0, world_size = 1;
+    // Number of rows
+    const std::size_t Nx = 4;
+
+    // Number of rows
+    const std::size_t Ny = 2;
+
+    // Number of steps
+    const std::size_t NSteps = 1;
+
+#ifdef ADIOS2_HAVE_MPI
+    MPI_Comm_rank(MPI_COMM_WORLD, &world_rank);
+    MPI_Comm_size(MPI_COMM_WORLD, &world_size);
+#endif
+
     // Write test data using ADIOS2
     {
-        adios_init_noxml(MPI_COMM_WORLD);
-
+#ifdef ADIOS2_HAVE_MPI
+        adios2::ADIOS adios(MPI_COMM_WORLD, adios2::DebugON);
+#else
         adios2::ADIOS adios(true);
+#endif
         adios2::IO &io = adios.DeclareIO("TestIO");
 
-        // Declare 1D variables
+        // Declare 2D variables ((NumOfProcesses*2) * Nx)
+        // The local process' part (start, count) can be defined now or later
+        // before Write().
         {
-            auto &var_i8 =
-                io.DefineVariable<char>("i8", {}, {}, adios2::Dims{2, 4});
-            auto &var_i16 =
-                io.DefineVariable<short>("i16", {}, {}, adios2::Dims{2, 4});
-            auto &var_i32 =
-                io.DefineVariable<int>("i32", {}, {}, adios2::Dims{2, 4});
-            auto &var_i64 =
-                io.DefineVariable<long>("i64", {}, {}, adios2::Dims{2, 4});
-            auto &var_u8 = io.DefineVariable<unsigned char>("u8", {}, {},
-                                                            adios2::Dims{2, 4});
-            auto &var_u16 = io.DefineVariable<unsigned short>(
-                "u16", {}, {}, adios2::Dims{2, 4});
-            auto &var_u32 = io.DefineVariable<unsigned int>("u32", {}, {},
-                                                            adios2::Dims{2, 4});
-            auto &var_u64 = io.DefineVariable<unsigned long>(
-                "u64", {}, {}, adios2::Dims{2, 4});
-            auto &var_r32 =
-                io.DefineVariable<float>("r32", {}, {}, adios2::Dims{2, 4});
-            auto &var_r64 =
-                io.DefineVariable<double>("r64", {}, {}, adios2::Dims{2, 4});
+            adios2::Dims shape =
+                adios2::Dims{(unsigned int)Ny * world_size, Nx};
+            auto &var_i8 = io.DefineVariable<char>("i8", shape);
+            auto &var_i16 = io.DefineVariable<short>("i16", shape);
+            auto &var_i32 = io.DefineVariable<int>("i32", shape);
+            auto &var_i64 = io.DefineVariable<long>("i64", shape);
+            auto &var_u8 = io.DefineVariable<unsigned char>("u8", shape);
+            auto &var_u16 = io.DefineVariable<unsigned short>("u16", shape);
+            auto &var_u32 = io.DefineVariable<unsigned int>("u32", shape);
+            auto &var_u64 = io.DefineVariable<unsigned long>("u64", shape);
+            auto &var_r32 = io.DefineVariable<float>("r32", shape);
+            auto &var_r64 = io.DefineVariable<double>("r64", shape);
         }
 
         // Create the ADIOS 1 Engine
         io.SetEngine("ADIOS1Writer");
+
+#ifdef ADIOS2_HAVE_MPI
+        io.AddTransport("file", {{"library", "MPI"}});
+#else
         io.AddTransport("file");
+#endif
 
         auto engine = io.Open(fname, adios2::OpenMode::Write);
         ASSERT_NE(engine.get(), nullptr);
 
-        for (size_t step = 0; step < 3; ++step)
+        for (size_t step = 0; step < NSteps; ++step)
         {
+            // Generate test data for each process uniquely
+            SmallTestData currentTestData = generateNewSmallTestData(
+                m_TestData, step, world_rank, world_size);
+
             // Retrieve the variables that previously went out of scope
             auto &var_i8 = io.GetVariable<char>("i8");
             auto &var_i16 = io.GetVariable<short>("i16");
@@ -314,17 +401,34 @@ TEST_F(ADIOS1WriteReadTest, ADIOS2ADIOS1WriteADIOS1Read2D2x4)
             auto &var_r32 = io.GetVariable<float>("r32");
             auto &var_r64 = io.GetVariable<double>("r64");
 
+            // Make a 2D selection to describe the local dimensions of the
+            // variable we write and its offsets in the global spaces
+            adios2::SelectionBoundingBox sel(
+                {(unsigned int)(world_rank * Ny), 0}, {Ny, Nx});
+            var_i8.SetSelection(sel);
+            var_i16.SetSelection(sel);
+            var_i32.SetSelection(sel);
+            var_i64.SetSelection(sel);
+            var_u8.SetSelection(sel);
+            var_u16.SetSelection(sel);
+            var_u32.SetSelection(sel);
+            var_u64.SetSelection(sel);
+            var_r32.SetSelection(sel);
+            var_r64.SetSelection(sel);
+
             // Write each one
-            engine->Write(var_i8, m_TestData.I8.data() + step);
-            engine->Write(var_i16, m_TestData.I16.data() + step);
-            engine->Write(var_i32, m_TestData.I32.data() + step);
-            engine->Write(var_i64, m_TestData.I64.data() + step);
-            engine->Write(var_u8, m_TestData.U8.data() + step);
-            engine->Write(var_u16, m_TestData.U16.data() + step);
-            engine->Write(var_u32, m_TestData.U32.data() + step);
-            engine->Write(var_u64, m_TestData.U64.data() + step);
-            engine->Write(var_r32, m_TestData.R32.data() + step);
-            engine->Write(var_r64, m_TestData.R64.data() + step);
+            // fill in the variable with values from starting index to
+            // starting index + count
+            engine->Write(var_i8, currentTestData.I8.data());
+            engine->Write(var_i16, currentTestData.I16.data());
+            engine->Write(var_i32, currentTestData.I32.data());
+            engine->Write(var_i64, currentTestData.I64.data());
+            engine->Write(var_u8, currentTestData.U8.data());
+            engine->Write(var_u16, currentTestData.U16.data());
+            engine->Write(var_u32, currentTestData.U32.data());
+            engine->Write(var_u64, currentTestData.U64.data());
+            engine->Write(var_r32, currentTestData.R32.data());
+            engine->Write(var_r64, currentTestData.R64.data());
 
             // Advance to the next time step
             engine->Advance();
@@ -336,13 +440,6 @@ TEST_F(ADIOS1WriteReadTest, ADIOS2ADIOS1WriteADIOS1Read2D2x4)
         adios_finalize(0);
     }
 
-// Read test data using ADIOS1
-#ifdef ADIOS2_HAVE_MPI
-    // Read everything from rank 0
-    int rank;
-    MPI_Comm_rank();
-    if (rank == 0)
-#endif
     {
         adios_read_init_method(ADIOS_READ_METHOD_BP, MPI_COMM_WORLD,
                                "verbose=3");
@@ -356,72 +453,98 @@ TEST_F(ADIOS1WriteReadTest, ADIOS2ADIOS1WriteADIOS1Read2D2x4)
         ADIOS_VARINFO *var_i8 = adios_inq_var(f, "i8");
         ASSERT_NE(var_i8, nullptr);
         ASSERT_EQ(var_i8->ndim, 2);
-        ASSERT_EQ(var_i8->dims[0], 2);
-        ASSERT_EQ(var_i8->dims[1], 4);
+        ASSERT_EQ(var_i8->global, 1);
+        ASSERT_EQ(var_i8->nsteps, NSteps);
+        ASSERT_EQ(var_i8->dims[0], world_size * Ny);
+        ASSERT_EQ(var_i8->dims[1], Nx);
         ADIOS_VARINFO *var_i16 = adios_inq_var(f, "i16");
         ASSERT_NE(var_i16, nullptr);
         ASSERT_EQ(var_i16->ndim, 2);
-        ASSERT_EQ(var_i16->dims[0], 2);
-        ASSERT_EQ(var_i16->dims[1], 4);
+        ASSERT_EQ(var_i16->global, 1);
+        ASSERT_EQ(var_i16->nsteps, NSteps);
+        ASSERT_EQ(var_i16->dims[0], world_size * Ny);
+        ASSERT_EQ(var_i16->dims[1], Nx);
         ADIOS_VARINFO *var_i32 = adios_inq_var(f, "i32");
         ASSERT_NE(var_i32, nullptr);
         ASSERT_EQ(var_i32->ndim, 2);
-        ASSERT_EQ(var_i32->dims[0], 2);
-        ASSERT_EQ(var_i32->dims[1], 4);
+        ASSERT_EQ(var_i32->global, 1);
+        ASSERT_EQ(var_i32->nsteps, NSteps);
+        ASSERT_EQ(var_i32->dims[0], world_size * Ny);
+        ASSERT_EQ(var_i32->dims[1], Nx);
         ADIOS_VARINFO *var_i64 = adios_inq_var(f, "i64");
         ASSERT_NE(var_i64, nullptr);
         ASSERT_EQ(var_i64->ndim, 2);
-        ASSERT_EQ(var_i64->dims[0], 2);
-        ASSERT_EQ(var_i64->dims[1], 4);
+        ASSERT_EQ(var_i64->global, 1);
+        ASSERT_EQ(var_i64->nsteps, NSteps);
+        ASSERT_EQ(var_i64->dims[0], world_size * Ny);
+        ASSERT_EQ(var_i64->dims[1], Nx);
         ADIOS_VARINFO *var_u8 = adios_inq_var(f, "u8");
         ASSERT_NE(var_u8, nullptr);
         ASSERT_EQ(var_u8->ndim, 2);
-        ASSERT_EQ(var_u8->dims[0], 2);
-        ASSERT_EQ(var_u8->dims[1], 4);
+        ASSERT_EQ(var_u8->global, 1);
+        ASSERT_EQ(var_u8->nsteps, NSteps);
+        ASSERT_EQ(var_u8->dims[0], world_size * Ny);
+        ASSERT_EQ(var_u8->dims[1], Nx);
         ADIOS_VARINFO *var_u16 = adios_inq_var(f, "u16");
         ASSERT_NE(var_u16, nullptr);
         ASSERT_EQ(var_u16->ndim, 2);
-        ASSERT_EQ(var_u16->dims[0], 2);
-        ASSERT_EQ(var_u16->dims[1], 4);
+        ASSERT_EQ(var_u16->global, 1);
+        ASSERT_EQ(var_u16->nsteps, NSteps);
+        ASSERT_EQ(var_u16->dims[0], world_size * Ny);
+        ASSERT_EQ(var_u16->dims[1], Nx);
         ADIOS_VARINFO *var_u32 = adios_inq_var(f, "u32");
         ASSERT_NE(var_u32, nullptr);
         ASSERT_EQ(var_u32->ndim, 2);
-        ASSERT_EQ(var_u32->dims[0], 2);
-        ASSERT_EQ(var_u32->dims[1], 4);
+        ASSERT_EQ(var_u32->global, 1);
+        ASSERT_EQ(var_u32->nsteps, NSteps);
+        ASSERT_EQ(var_u32->dims[0], world_size * Ny);
+        ASSERT_EQ(var_u32->dims[1], Nx);
         ADIOS_VARINFO *var_u64 = adios_inq_var(f, "u64");
         ASSERT_NE(var_u64, nullptr);
         ASSERT_EQ(var_u64->ndim, 2);
-        ASSERT_EQ(var_u64->dims[0], 2);
-        ASSERT_EQ(var_u64->dims[1], 4);
+        ASSERT_EQ(var_u64->global, 1);
+        ASSERT_EQ(var_u64->nsteps, NSteps);
+        ASSERT_EQ(var_u64->dims[0], world_size * Ny);
+        ASSERT_EQ(var_u64->dims[1], Nx);
         ADIOS_VARINFO *var_r32 = adios_inq_var(f, "r32");
         ASSERT_NE(var_r32, nullptr);
         ASSERT_EQ(var_r32->ndim, 2);
-        ASSERT_EQ(var_r32->dims[0], 2);
-        ASSERT_EQ(var_r32->dims[1], 4);
+        ASSERT_EQ(var_r32->global, 1);
+        ASSERT_EQ(var_r32->nsteps, NSteps);
+        ASSERT_EQ(var_r32->dims[0], world_size * Ny);
+        ASSERT_EQ(var_r32->dims[1], Nx);
         ADIOS_VARINFO *var_r64 = adios_inq_var(f, "r64");
         ASSERT_NE(var_r64, nullptr);
         ASSERT_EQ(var_r64->ndim, 2);
-        ASSERT_EQ(var_r64->dims[0], 2);
-        ASSERT_EQ(var_r64->dims[1], 4);
-
-        std::array<char, 8> I8;
-        std::array<int16_t, 8> I16;
-        std::array<int32_t, 8> I32;
-        std::array<int64_t, 8> I64;
-        std::array<unsigned char, 8> U8;
-        std::array<uint16_t, 8> U16;
-        std::array<uint32_t, 8> U32;
-        std::array<uint64_t, 8> U64;
-        std::array<float, 8> R32;
-        std::array<double, 8> R64;
-
-        uint64_t start[2] = {0, 0};
-        uint64_t count[2] = {2, 4};
+        ASSERT_EQ(var_r64->global, 1);
+        ASSERT_EQ(var_r64->nsteps, NSteps);
+        ASSERT_EQ(var_r64->dims[0], world_size * Ny);
+        ASSERT_EQ(var_r64->dims[1], Nx);
+
+        // If the size of the array is smaller than the data
+        // the result is weird... double and uint64_t would get completely
+        // garbage data
+        std::array<char, Nx * Ny> I8;
+        std::array<int16_t, Nx * Ny> I16;
+        std::array<int32_t, Nx * Ny> I32;
+        std::array<int64_t, Nx * Ny> I64;
+        std::array<unsigned char, Nx * Ny> U8;
+        std::array<uint16_t, Nx * Ny> U16;
+        std::array<uint32_t, Nx * Ny> U32;
+        std::array<uint64_t, Nx * Ny> U64;
+        std::array<float, Nx * Ny> R32;
+        std::array<double, Nx * Ny> R64;
+
+        uint64_t start[2] = {static_cast<uint64_t>(world_rank * Ny), 0};
+        uint64_t count[2] = {Ny, Nx};
         ADIOS_SELECTION *sel = adios_selection_boundingbox(2, start, count);
 
         // Read stuff
-        for (size_t t = 0; t < 3; ++t)
+        for (size_t t = 0; t < NSteps; ++t)
         {
+            // Generate test data for each rank uniquely
+            SmallTestData currentTestData =
+                generateNewSmallTestData(m_TestData, t, world_rank, world_size);
             // Read the current step
             adios_schedule_read_byid(f, sel, var_i8->varid, t, 1, I8.data());
             adios_schedule_read_byid(f, sel, var_i16->varid, t, 1, I16.data());
@@ -436,22 +559,22 @@ TEST_F(ADIOS1WriteReadTest, ADIOS2ADIOS1WriteADIOS1Read2D2x4)
             adios_perform_reads(f, 1);
 
             // Check if it's correct
-            for (size_t i = 0; i < 8; ++i)
+            for (size_t i = 0; i < Nx; ++i)
             {
                 std::stringstream ss;
-                ss << "t=" << t << " i=" << i;
+                ss << "t=" << t << " i=" << i << " rank=" << world_rank;
                 std::string msg = ss.str();
 
-                EXPECT_EQ(I8[i], m_TestData.I8[i + t]) << msg;
-                EXPECT_EQ(I16[i], m_TestData.I16[i + t]) << msg;
-                EXPECT_EQ(I32[i], m_TestData.I32[i + t]) << msg;
-                EXPECT_EQ(I64[i], m_TestData.I64[i + t]) << msg;
-                EXPECT_EQ(U8[i], m_TestData.U8[i + t]) << msg;
-                EXPECT_EQ(U16[i], m_TestData.U16[i + t]) << msg;
-                EXPECT_EQ(U32[i], m_TestData.U32[i + t]) << msg;
-                EXPECT_EQ(U64[i], m_TestData.U64[i + t]) << msg;
-                EXPECT_EQ(R32[i], m_TestData.R32[i + t]) << msg;
-                EXPECT_EQ(R64[i], m_TestData.R64[i + t]) << msg;
+                EXPECT_EQ(I8[i], currentTestData.I8[i]) << msg;
+                EXPECT_EQ(I16[i], currentTestData.I16[i]) << msg;
+                EXPECT_EQ(I32[i], currentTestData.I32[i]) << msg;
+                EXPECT_EQ(I64[i], currentTestData.I64[i]) << msg;
+                EXPECT_EQ(U8[i], currentTestData.U8[i]) << msg;
+                EXPECT_EQ(U16[i], currentTestData.U16[i]) << msg;
+                EXPECT_EQ(U32[i], currentTestData.U32[i]) << msg;
+                EXPECT_EQ(U64[i], currentTestData.U64[i]) << msg;
+                EXPECT_EQ(R32[i], currentTestData.R32[i]) << msg;
+                EXPECT_EQ(R64[i], currentTestData.R64[i]) << msg;
             }
         }
 
@@ -496,51 +619,72 @@ TEST_F(ADIOS1WriteReadTest, DISABLED_ADIOS1WriteADIOS2ADIOS1Read2D2x4)
 // 2D 4x2 test data
 //******************************************************************************
 
-// ADIOS2 write, native ADIOS1 read
-TEST_F(ADIOS1WriteReadTest, ADIOS2ADIOS1WriteADIOS1Read2D4x2)
+// ADIOS2 write using ADIOS1 Writer, native ADIOS1 read
+TEST_F(ADIOS1WriteReadTest, _ADIOS2ADIOS1WriteADIOS1Read2D4x2)
 {
+    // Each process would write a 4x2 array and all processes would
+    // form a 2D (world_size*4) * Nx matrix where Nx is 2 here
     std::string fname = "ADIOS2ADIOS1WriteADIOS1Read2D4x2Test.bp";
 
+    int world_rank = 0, world_size = 1;
+    // Number of rows
+    const std::size_t Nx = 2;
+    // Number of cols
+    const std::size_t Ny = 4;
+
+    // Number of steps
+    const std::size_t NSteps = 1;
+
+#ifdef ADIOS2_HAVE_MPI
+    MPI_Comm_rank(MPI_COMM_WORLD, &world_rank);
+    MPI_Comm_size(MPI_COMM_WORLD, &world_size);
+#endif
+
     // Write test data using ADIOS2
     {
-        adios_init_noxml(MPI_COMM_WORLD);
-
+#ifdef ADIOS2_HAVE_MPI
+        adios2::ADIOS adios(MPI_COMM_WORLD, adios2::DebugON);
+#else
         adios2::ADIOS adios(true);
+#endif
         adios2::IO &io = adios.DeclareIO("TestIO");
 
-        // Declare 1D variables
+        // Declare 2D variables ((NumOfProcesses*4) * Nx)
+        // The local process' part (start, count) can be defined now or later
+        // before Write().
         {
-            auto &var_i8 =
-                io.DefineVariable<char>("i8", {}, {}, adios2::Dims{4, 2});
-            auto &var_i16 =
-                io.DefineVariable<short>("i16", {}, {}, adios2::Dims{4, 2});
-            auto &var_i32 =
-                io.DefineVariable<int>("i32", {}, {}, adios2::Dims{4, 2});
-            auto &var_i64 =
-                io.DefineVariable<long>("i64", {}, {}, adios2::Dims{4, 2});
-            auto &var_u8 = io.DefineVariable<unsigned char>("u8", {}, {},
-                                                            adios2::Dims{4, 2});
-            auto &var_u16 = io.DefineVariable<unsigned short>(
-                "u16", {}, {}, adios2::Dims{4, 2});
-            auto &var_u32 = io.DefineVariable<unsigned int>("u32", {}, {},
-                                                            adios2::Dims{4, 2});
-            auto &var_u64 = io.DefineVariable<unsigned long>(
-                "u64", {}, {}, adios2::Dims{4, 2});
-            auto &var_r32 =
-                io.DefineVariable<float>("r32", {}, {}, adios2::Dims{4, 2});
-            auto &var_r64 =
-                io.DefineVariable<double>("r64", {}, {}, adios2::Dims{4, 2});
+            adios2::Dims shape =
+                adios2::Dims{(unsigned int)Ny * world_size, Nx};
+            auto &var_i8 = io.DefineVariable<char>("i8", shape);
+            auto &var_i16 = io.DefineVariable<short>("i16", shape);
+            auto &var_i32 = io.DefineVariable<int>("i32", shape);
+            auto &var_i64 = io.DefineVariable<long>("i64", shape);
+            auto &var_u8 = io.DefineVariable<unsigned char>("u8", shape);
+            auto &var_u16 = io.DefineVariable<unsigned short>("u16", shape);
+            auto &var_u32 = io.DefineVariable<unsigned int>("u32", shape);
+            auto &var_u64 = io.DefineVariable<unsigned long>("u64", shape);
+            auto &var_r32 = io.DefineVariable<float>("r32", shape);
+            auto &var_r64 = io.DefineVariable<double>("r64", shape);
         }
 
         // Create the ADIOS 1 Engine
         io.SetEngine("ADIOS1Writer");
+
+#ifdef ADIOS2_HAVE_MPI
+        io.AddTransport("file", {{"library", "MPI"}});
+#else
         io.AddTransport("file");
+#endif
 
         auto engine = io.Open(fname, adios2::OpenMode::Write);
         ASSERT_NE(engine.get(), nullptr);
 
-        for (size_t step = 0; step < 3; ++step)
+        for (size_t step = 0; step < NSteps; ++step)
         {
+            // Generate test data for each process uniquely
+            SmallTestData currentTestData = generateNewSmallTestData(
+                m_TestData, step, world_rank, world_size);
+
             // Retrieve the variables that previously went out of scope
             auto &var_i8 = io.GetVariable<char>("i8");
             auto &var_i16 = io.GetVariable<short>("i16");
@@ -553,17 +697,34 @@ TEST_F(ADIOS1WriteReadTest, ADIOS2ADIOS1WriteADIOS1Read2D4x2)
             auto &var_r32 = io.GetVariable<float>("r32");
             auto &var_r64 = io.GetVariable<double>("r64");
 
+            // Make a 2D selection to describe the local dimensions of the
+            // variable we write and its offsets in the global spaces
+            adios2::SelectionBoundingBox sel(
+                {(unsigned int)(world_rank * Ny), 0}, {Ny, Nx});
+            var_i8.SetSelection(sel);
+            var_i16.SetSelection(sel);
+            var_i32.SetSelection(sel);
+            var_i64.SetSelection(sel);
+            var_u8.SetSelection(sel);
+            var_u16.SetSelection(sel);
+            var_u32.SetSelection(sel);
+            var_u64.SetSelection(sel);
+            var_r32.SetSelection(sel);
+            var_r64.SetSelection(sel);
+
             // Write each one
-            engine->Write(var_i8, m_TestData.I8.data() + step);
-            engine->Write(var_i16, m_TestData.I16.data() + step);
-            engine->Write(var_i32, m_TestData.I32.data() + step);
-            engine->Write(var_i64, m_TestData.I64.data() + step);
-            engine->Write(var_u8, m_TestData.U8.data() + step);
-            engine->Write(var_u16, m_TestData.U16.data() + step);
-            engine->Write(var_u32, m_TestData.U32.data() + step);
-            engine->Write(var_u64, m_TestData.U64.data() + step);
-            engine->Write(var_r32, m_TestData.R32.data() + step);
-            engine->Write(var_r64, m_TestData.R64.data() + step);
+            // fill in the variable with values from starting index to
+            // starting index + count
+            engine->Write(var_i8, currentTestData.I8.data());
+            engine->Write(var_i16, currentTestData.I16.data());
+            engine->Write(var_i32, currentTestData.I32.data());
+            engine->Write(var_i64, currentTestData.I64.data());
+            engine->Write(var_u8, currentTestData.U8.data());
+            engine->Write(var_u16, currentTestData.U16.data());
+            engine->Write(var_u32, currentTestData.U32.data());
+            engine->Write(var_u64, currentTestData.U64.data());
+            engine->Write(var_r32, currentTestData.R32.data());
+            engine->Write(var_r64, currentTestData.R64.data());
 
             // Advance to the next time step
             engine->Advance();
@@ -575,13 +736,6 @@ TEST_F(ADIOS1WriteReadTest, ADIOS2ADIOS1WriteADIOS1Read2D4x2)
         adios_finalize(0);
     }
 
-// Read test data using ADIOS1
-#ifdef ADIOS2_HAVE_MPI
-    // Read everything from rank 0
-    int rank;
-    MPI_Comm_rank();
-    if (rank == 0)
-#endif
     {
         adios_read_init_method(ADIOS_READ_METHOD_BP, MPI_COMM_WORLD,
                                "verbose=3");
@@ -595,72 +749,98 @@ TEST_F(ADIOS1WriteReadTest, ADIOS2ADIOS1WriteADIOS1Read2D4x2)
         ADIOS_VARINFO *var_i8 = adios_inq_var(f, "i8");
         ASSERT_NE(var_i8, nullptr);
         ASSERT_EQ(var_i8->ndim, 2);
-        ASSERT_EQ(var_i8->dims[0], 4);
-        ASSERT_EQ(var_i8->dims[1], 2);
+        ASSERT_EQ(var_i8->global, 1);
+        ASSERT_EQ(var_i8->nsteps, NSteps);
+        ASSERT_EQ(var_i8->dims[0], world_size * Ny);
+        ASSERT_EQ(var_i8->dims[1], Nx);
         ADIOS_VARINFO *var_i16 = adios_inq_var(f, "i16");
         ASSERT_NE(var_i16, nullptr);
         ASSERT_EQ(var_i16->ndim, 2);
-        ASSERT_EQ(var_i16->dims[0], 4);
-        ASSERT_EQ(var_i16->dims[1], 2);
+        ASSERT_EQ(var_i16->global, 1);
+        ASSERT_EQ(var_i16->nsteps, NSteps);
+        ASSERT_EQ(var_i16->dims[0], world_size * Ny);
+        ASSERT_EQ(var_i16->dims[1], Nx);
         ADIOS_VARINFO *var_i32 = adios_inq_var(f, "i32");
         ASSERT_NE(var_i32, nullptr);
         ASSERT_EQ(var_i32->ndim, 2);
-        ASSERT_EQ(var_i32->dims[0], 4);
-        ASSERT_EQ(var_i32->dims[1], 2);
+        ASSERT_EQ(var_i32->global, 1);
+        ASSERT_EQ(var_i32->nsteps, NSteps);
+        ASSERT_EQ(var_i32->dims[0], world_size * Ny);
+        ASSERT_EQ(var_i32->dims[1], Nx);
         ADIOS_VARINFO *var_i64 = adios_inq_var(f, "i64");
         ASSERT_NE(var_i64, nullptr);
         ASSERT_EQ(var_i64->ndim, 2);
-        ASSERT_EQ(var_i64->dims[0], 4);
-        ASSERT_EQ(var_i64->dims[1], 2);
+        ASSERT_EQ(var_i64->global, 1);
+        ASSERT_EQ(var_i64->nsteps, NSteps);
+        ASSERT_EQ(var_i64->dims[0], world_size * Ny);
+        ASSERT_EQ(var_i64->dims[1], Nx);
         ADIOS_VARINFO *var_u8 = adios_inq_var(f, "u8");
         ASSERT_NE(var_u8, nullptr);
         ASSERT_EQ(var_u8->ndim, 2);
-        ASSERT_EQ(var_u8->dims[0], 4);
-        ASSERT_EQ(var_u8->dims[1], 2);
+        ASSERT_EQ(var_u8->global, 1);
+        ASSERT_EQ(var_u8->nsteps, NSteps);
+        ASSERT_EQ(var_u8->dims[0], world_size * Ny);
+        ASSERT_EQ(var_u8->dims[1], Nx);
         ADIOS_VARINFO *var_u16 = adios_inq_var(f, "u16");
         ASSERT_NE(var_u16, nullptr);
         ASSERT_EQ(var_u16->ndim, 2);
-        ASSERT_EQ(var_u16->dims[0], 4);
-        ASSERT_EQ(var_u16->dims[1], 2);
+        ASSERT_EQ(var_u16->global, 1);
+        ASSERT_EQ(var_u16->nsteps, NSteps);
+        ASSERT_EQ(var_u16->dims[0], world_size * Ny);
+        ASSERT_EQ(var_u16->dims[1], Nx);
         ADIOS_VARINFO *var_u32 = adios_inq_var(f, "u32");
         ASSERT_NE(var_u32, nullptr);
         ASSERT_EQ(var_u32->ndim, 2);
-        ASSERT_EQ(var_u32->dims[0], 4);
-        ASSERT_EQ(var_u32->dims[1], 2);
+        ASSERT_EQ(var_u32->global, 1);
+        ASSERT_EQ(var_u32->nsteps, NSteps);
+        ASSERT_EQ(var_u32->dims[0], world_size * Ny);
+        ASSERT_EQ(var_u32->dims[1], Nx);
         ADIOS_VARINFO *var_u64 = adios_inq_var(f, "u64");
         ASSERT_NE(var_u64, nullptr);
         ASSERT_EQ(var_u64->ndim, 2);
-        ASSERT_EQ(var_u64->dims[0], 4);
-        ASSERT_EQ(var_u64->dims[1], 2);
+        ASSERT_EQ(var_u64->global, 1);
+        ASSERT_EQ(var_u64->nsteps, NSteps);
+        ASSERT_EQ(var_u64->dims[0], world_size * Ny);
+        ASSERT_EQ(var_u64->dims[1], Nx);
         ADIOS_VARINFO *var_r32 = adios_inq_var(f, "r32");
         ASSERT_NE(var_r32, nullptr);
         ASSERT_EQ(var_r32->ndim, 2);
-        ASSERT_EQ(var_r32->dims[0], 4);
-        ASSERT_EQ(var_r32->dims[1], 2);
+        ASSERT_EQ(var_r32->global, 1);
+        ASSERT_EQ(var_r32->nsteps, NSteps);
+        ASSERT_EQ(var_r32->dims[0], world_size * Ny);
+        ASSERT_EQ(var_r32->dims[1], Nx);
         ADIOS_VARINFO *var_r64 = adios_inq_var(f, "r64");
         ASSERT_NE(var_r64, nullptr);
         ASSERT_EQ(var_r64->ndim, 2);
-        ASSERT_EQ(var_r64->dims[0], 4);
-        ASSERT_EQ(var_r64->dims[1], 2);
-
-        std::array<char, 8> I8;
-        std::array<int16_t, 8> I16;
-        std::array<int32_t, 8> I32;
-        std::array<int64_t, 8> I64;
-        std::array<unsigned char, 8> U8;
-        std::array<uint16_t, 8> U16;
-        std::array<uint32_t, 8> U32;
-        std::array<uint64_t, 8> U64;
-        std::array<float, 8> R32;
-        std::array<double, 8> R64;
-
-        uint64_t start[2] = {0, 0};
-        uint64_t count[2] = {4, 2};
+        ASSERT_EQ(var_r64->global, 1);
+        ASSERT_EQ(var_r64->nsteps, NSteps);
+        ASSERT_EQ(var_r64->dims[0], world_size * Ny);
+        ASSERT_EQ(var_r64->dims[1], Nx);
+
+        // If the size of the array is smaller than the data
+        // the result is weird... double and uint64_t would get completely
+        // garbage data
+        std::array<char, Nx * Ny> I8;
+        std::array<int16_t, Nx * Ny> I16;
+        std::array<int32_t, Nx * Ny> I32;
+        std::array<int64_t, Nx * Ny> I64;
+        std::array<unsigned char, Nx * Ny> U8;
+        std::array<uint16_t, Nx * Ny> U16;
+        std::array<uint32_t, Nx * Ny> U32;
+        std::array<uint64_t, Nx * Ny> U64;
+        std::array<float, Nx * Ny> R32;
+        std::array<double, Nx * Ny> R64;
+
+        uint64_t start[2] = {static_cast<uint64_t>(world_rank * Ny), 0};
+        uint64_t count[2] = {Ny, Nx};
         ADIOS_SELECTION *sel = adios_selection_boundingbox(2, start, count);
 
         // Read stuff
-        for (size_t t = 0; t < 3; ++t)
+        for (size_t t = 0; t < NSteps; ++t)
         {
+            // Generate test data for each rank uniquely
+            SmallTestData currentTestData =
+                generateNewSmallTestData(m_TestData, t, world_rank, world_size);
             // Read the current step
             adios_schedule_read_byid(f, sel, var_i8->varid, t, 1, I8.data());
             adios_schedule_read_byid(f, sel, var_i16->varid, t, 1, I16.data());
@@ -675,22 +855,22 @@ TEST_F(ADIOS1WriteReadTest, ADIOS2ADIOS1WriteADIOS1Read2D4x2)
             adios_perform_reads(f, 1);
 
             // Check if it's correct
-            for (size_t i = 0; i < 8; ++i)
+            for (size_t i = 0; i < Nx; ++i)
             {
                 std::stringstream ss;
-                ss << "t=" << t << " i=" << i;
+                ss << "t=" << t << " i=" << i << " rank=" << world_rank;
                 std::string msg = ss.str();
 
-                EXPECT_EQ(I8[i], m_TestData.I8[i + t]) << msg;
-                EXPECT_EQ(I16[i], m_TestData.I16[i + t]) << msg;
-                EXPECT_EQ(I32[i], m_TestData.I32[i + t]) << msg;
-                EXPECT_EQ(I64[i], m_TestData.I64[i + t]) << msg;
-                EXPECT_EQ(U8[i], m_TestData.U8[i + t]) << msg;
-                EXPECT_EQ(U16[i], m_TestData.U16[i + t]) << msg;
-                EXPECT_EQ(U32[i], m_TestData.U32[i + t]) << msg;
-                EXPECT_EQ(U64[i], m_TestData.U64[i + t]) << msg;
-                EXPECT_EQ(R32[i], m_TestData.R32[i + t]) << msg;
-                EXPECT_EQ(R64[i], m_TestData.R64[i + t]) << msg;
+                EXPECT_EQ(I8[i], currentTestData.I8[i]) << msg;
+                EXPECT_EQ(I16[i], currentTestData.I16[i]) << msg;
+                EXPECT_EQ(I32[i], currentTestData.I32[i]) << msg;
+                EXPECT_EQ(I64[i], currentTestData.I64[i]) << msg;
+                EXPECT_EQ(U8[i], currentTestData.U8[i]) << msg;
+                EXPECT_EQ(U16[i], currentTestData.U16[i]) << msg;
+                EXPECT_EQ(U32[i], currentTestData.U32[i]) << msg;
+                EXPECT_EQ(U64[i], currentTestData.U64[i]) << msg;
+                EXPECT_EQ(R32[i], currentTestData.R32[i]) << msg;
+                EXPECT_EQ(R64[i], currentTestData.R64[i]) << msg;
             }
         }
 
@@ -739,6 +919,7 @@ int main(int argc, char **argv)
 {
 #ifdef ADIOS2_HAVE_MPI
     MPI_Init(nullptr, nullptr);
+    adios_init_noxml(MPI_COMM_WORLD);
 #endif
 
     ::testing::InitGoogleTest(&argc, argv);