diff --git a/source/adios2/engine/bp/BPFileReader.cpp b/source/adios2/engine/bp/BPFileReader.cpp
index 5ecae315206f31d599484e0ac1c5ab67f017a655..5c70c2a9f5719cd741076beebea385e0145ee57a 100644
--- a/source/adios2/engine/bp/BPFileReader.cpp
+++ b/source/adios2/engine/bp/BPFileReader.cpp
@@ -27,11 +27,39 @@ BPFileReader::BPFileReader(IO &io, const std::string &name, const Mode mode,
 
 StepStatus BPFileReader::BeginStep(StepMode mode, const float timeoutSeconds)
 {
-    if (m_DebugMode && mode != StepMode::NextAvailable)
+    if (m_DebugMode)
+    {
+        if (mode != StepMode::NextAvailable)
+        {
+            throw std::invalid_argument("ERROR: mode is not supported yet, "
+                                        "only NextAvailable is valid for "
+                                        "engine BPFileReader, in call to "
+                                        "BeginStep\n");
+        }
+
+        if (!m_BP3Deserializer.m_PerformedGets)
+        {
+            throw std::invalid_argument(
+                "ERROR: existing variables subscribed with "
+                "GetDeferred, did you forget to call "
+                "PerformGets() or EndStep()?, in call to BeginStep\n");
+        }
+    }
+
+    StepStatus status = StepStatus::OK;
+
+    if (m_FirstStep)
+    {
+        m_FirstStep = false;
+    }
+    else
+    {
+        ++m_CurrentStep;
+    }
+
+    if (m_CurrentStep >= m_BP3Deserializer.m_MetadataSet.StepsCount - 1)
     {
-        throw std::invalid_argument("ERROR: mode is not supported yet for "
-                                    "engine BPFileReader, in call to "
-                                    "BeginStep\n");
+        status = StepStatus::EndOfStream;
     }
 
     const auto &variablesData = m_IO.GetVariablesDataMap();
@@ -50,26 +78,22 @@ StepStatus BPFileReader::BeginStep(StepMode mode, const float timeoutSeconds)
         auto variable = m_IO.InquireVariable<T>(name);                         \
         if (mode == StepMode::NextAvailable)                                   \
         {                                                                      \
-            variable->SetStepSelection({m_CurrentStep + 1, 1});                \
+            variable->SetStepSelection({m_CurrentStep, 1});                    \
         }                                                                      \
     }
         ADIOS2_FOREACH_TYPE_1ARG(declare_type)
 #undef declare_type
     }
 
-    return StepStatus::OK;
+    return status;
 }
 
 void BPFileReader::EndStep()
 {
-    if (m_DebugMode && !m_BP3Deserializer.m_PerformedGets)
+    if (!m_BP3Deserializer.m_PerformedGets)
     {
-        throw std::invalid_argument("ERROR: existing variables subscribed with "
-                                    "GetDeferred, did you forget to call "
-                                    "PerformGets()?, in call to EndStep\n");
+        PerformGets();
     }
-
-    ++m_CurrentStep;
 }
 
 void BPFileReader::PerformGets()
@@ -82,6 +106,11 @@ void BPFileReader::PerformGets()
 
 void BPFileReader::Close(const int transportIndex)
 {
+    if (!m_BP3Deserializer.m_PerformedGets)
+    {
+        PerformGets();
+    }
+
     m_SubFileManager.CloseFiles();
     m_FileManager.CloseFiles();
 }
@@ -141,7 +170,7 @@ void BPFileReader::InitBuffer()
     BroadcastVector(m_BP3Deserializer.m_Metadata.m_Buffer, m_MPIComm);
 
     // fills IO with Variables and Attributes
-    m_BP3Deserializer.ParseMetadata(m_IO);
+    m_BP3Deserializer.ParseMetadata(m_BP3Deserializer.m_Metadata, m_IO);
 }
 
 #define declare_type(T)                                                        \
diff --git a/source/adios2/engine/bp/BPFileReader.h b/source/adios2/engine/bp/BPFileReader.h
index 18f4be8cfbd140a3330e6d6a1e684c7eb9d3606c..1a7e4e4e9bb2664fac338e1b892602332285c767 100644
--- a/source/adios2/engine/bp/BPFileReader.h
+++ b/source/adios2/engine/bp/BPFileReader.h
@@ -56,8 +56,9 @@ private:
     transportman::TransportMan m_FileManager;
     transportman::TransportMan m_SubFileManager;
 
-    /** updates if step=1 in EndStep function, used for per-step reads */
+    /** used for per-step reads, TODO: to be moved to BP3Deserializer */
     size_t m_CurrentStep = 0;
+    bool m_FirstStep = true;
 
     void Init();
     void InitTransports();
diff --git a/source/adios2/engine/bp/BPFileWriter.cpp b/source/adios2/engine/bp/BPFileWriter.cpp
index 1cfc1ae6c4ab758b940c6f1db1692c07357171a3..81241153f28d4d1ab1efc33a85f2fd43889d8835 100644
--- a/source/adios2/engine/bp/BPFileWriter.cpp
+++ b/source/adios2/engine/bp/BPFileWriter.cpp
@@ -56,39 +56,21 @@ void BPFileWriter::PerformPuts()
 
 void BPFileWriter::EndStep()
 {
-    if (m_DebugMode && m_BP3Serializer.m_DeferredVariables.size() > 0)
+    if (m_BP3Serializer.m_DeferredVariables.size() > 0)
     {
-        throw std::invalid_argument("ERROR: existing variables subscribed with "
-                                    "PutDeferred, did you forget to call "
-                                    "PerformPuts()?, in call to EndStep\n");
+        PerformPuts();
     }
 
     m_BP3Serializer.SerializeData(m_IO, true); // true: advances step
 }
 
-// PRIVATE
-void BPFileWriter::Init()
-{
-    InitParameters();
-    InitTransports();
-    InitBPBuffer();
-}
-
-#define declare_type(T)                                                        \
-    void BPFileWriter::DoPutSync(Variable<T> &variable, const T *values)       \
-    {                                                                          \
-        PutSyncCommon(variable, values);                                       \
-    }                                                                          \
-    void BPFileWriter::DoPutDeferred(Variable<T> &variable, const T *values)   \
-    {                                                                          \
-        PutDeferredCommon(variable, values);                                   \
-    }                                                                          \
-    void BPFileWriter::DoPutDeferred(Variable<T> &, const T &value) {}
-ADIOS2_FOREACH_TYPE_1ARG(declare_type)
-#undef declare_type
-
 void BPFileWriter::Close(const int transportIndex)
 {
+    if (m_BP3Serializer.m_DeferredVariables.size() > 0)
+    {
+        PerformPuts();
+    }
+
     // close bp buffer by serializing data and metadata
     m_BP3Serializer.CloseData(m_IO);
     // send data to corresponding transports
@@ -112,6 +94,27 @@ void BPFileWriter::Close(const int transportIndex)
 }
 
 // PRIVATE FUNCTIONS
+// PRIVATE
+void BPFileWriter::Init()
+{
+    InitParameters();
+    InitTransports();
+    InitBPBuffer();
+}
+
+#define declare_type(T)                                                        \
+    void BPFileWriter::DoPutSync(Variable<T> &variable, const T *values)       \
+    {                                                                          \
+        PutSyncCommon(variable, values);                                       \
+    }                                                                          \
+    void BPFileWriter::DoPutDeferred(Variable<T> &variable, const T *values)   \
+    {                                                                          \
+        PutDeferredCommon(variable, values);                                   \
+    }                                                                          \
+    void BPFileWriter::DoPutDeferred(Variable<T> &, const T &value) {}
+ADIOS2_FOREACH_TYPE_1ARG(declare_type)
+#undef declare_type
+
 void BPFileWriter::InitParameters()
 {
     m_BP3Serializer.InitParameters(m_IO.m_Parameters);
diff --git a/source/adios2/toolkit/format/bp3/BP3.h b/source/adios2/toolkit/format/bp3/BP3.h
index 8cf1383f4ea72dd3149590b75fcf49f62df3a2a0..f1df1bb29730305211bb8669bae96dcc5cec301c 100644
--- a/source/adios2/toolkit/format/bp3/BP3.h
+++ b/source/adios2/toolkit/format/bp3/BP3.h
@@ -2,7 +2,7 @@
  * Distributed under the OSI-approved Apache License, Version 2.0.  See
  * accompanying file Copyright.txt for details.
  *
- * BP3.h
+ * BP3.h : public header for the BP3 format
  *
  *  Created on: Apr 4, 2017
  *      Author: William F Godoy godoywf@ornl.gov
diff --git a/source/adios2/toolkit/format/bp3/BP3Base.cpp b/source/adios2/toolkit/format/bp3/BP3Base.cpp
index 655ddb101b84aa7aa7e8cecc7d4611226cb107e1..4b481759792cf1fc806b43b222d3bfed4c953f71 100644
--- a/source/adios2/toolkit/format/bp3/BP3Base.cpp
+++ b/source/adios2/toolkit/format/bp3/BP3Base.cpp
@@ -538,6 +538,21 @@ size_t BP3Base::GetProcessGroupIndexSize(const std::string name,
     return pgSize;
 }
 
+BP3Base::ProcessGroupIndex
+BP3Base::ReadProcessGroupIndexHeader(const std::vector<char> &buffer,
+                                     size_t &position) const noexcept
+{
+    ProcessGroupIndex index;
+    index.Length = ReadValue<uint16_t>(buffer, position);
+    index.Name = ReadBP3String(buffer, position);
+    index.IsFortran = ReadValue<char>(buffer, position);
+    index.ProcessID = ReadValue<int32_t>(buffer, position);
+    index.StepName = ReadBP3String(buffer, position);
+    index.Step = ReadValue<uint32_t>(buffer, position);
+    index.Offset = ReadValue<uint64_t>(buffer, position);
+    return index;
+}
+
 BP3Base::ElementIndexHeader
 BP3Base::ReadElementIndexHeader(const std::vector<char> &buffer,
                                 size_t &position) const noexcept
diff --git a/source/adios2/toolkit/format/bp3/BP3Base.h b/source/adios2/toolkit/format/bp3/BP3Base.h
index 45bf3174e62fa95f2ce23eaf8e1052ce0fd1086a..3b8574f27fcd998154810cf6decc06c144f77526 100644
--- a/source/adios2/toolkit/format/bp3/BP3Base.h
+++ b/source/adios2/toolkit/format/bp3/BP3Base.h
@@ -2,14 +2,14 @@
  * Distributed under the OSI-approved Apache License, Version 2.0.  See
  * accompanying file Copyright.txt for details.
  *
- * BP1Base.h  base class for BP1Writer and BP1Reader
+ * BP3Base.h  base class for BP3Serializer and BP3Deserializer
  *
  *  Created on: Feb 2, 2017
  *      Author: William F Godoy godoywf@ornl.gov
  */
 
-#ifndef ADIOS2_TOOLKIT_FORMAT_BP1_BP1BASE_H_
-#define ADIOS2_TOOLKIT_FORMAT_BP1_BP1BASE_H_
+#ifndef ADIOS2_TOOLKIT_FORMAT_BP3_BP3BASE_H_
+#define ADIOS2_TOOLKIT_FORMAT_BP3_BP3BASE_H_
 
 /// \cond EXCLUDE_FROM_DOXYGEN
 #include <string>
@@ -59,11 +59,12 @@ public:
         }
     };
 
+    /** Single struct containing metadata indices and tracking information */
     struct MetadataSet
     {
         /**
-         * updated with advance step, if append it will be updated to last,
-         * starts with one in ADIOS1
+         * updated with EndStep, if append it will be updated to last,
+         * starts with one in ADIOS1 BP3 format
          */
         uint32_t TimeStep = 1;
 
@@ -92,6 +93,12 @@ public:
         size_t DataPGVarsCountPosition = 0;
         /** true: currently writing to a pg, false: no current pg */
         bool DataPGIsOpen = false;
+
+        /** Used at Read, steps start at zero */
+        size_t StepsStart = 0;
+
+        /** Used at Read, number of total steps */
+        size_t StepsCount = 1;
     };
 
     struct Minifooter
@@ -316,6 +323,17 @@ protected:
         statistic_finite = 6
     };
 
+    struct ProcessGroupIndex
+    {
+        uint64_t Offset;
+        uint32_t Step;
+        int32_t ProcessID;
+        uint16_t Length;
+        std::string Name;
+        std::string StepName;
+        char IsFortran;
+    };
+
     template <class T>
     struct Stats
     {
@@ -414,6 +432,10 @@ protected:
                                     const std::string timeStepName,
                                     const size_t transportsSize) const noexcept;
 
+    ProcessGroupIndex
+    ReadProcessGroupIndexHeader(const std::vector<char> &buffer,
+                                size_t &position) const noexcept;
+
     ElementIndexHeader ReadElementIndexHeader(const std::vector<char> &buffer,
                                               size_t &position) const noexcept;
 
@@ -469,4 +491,4 @@ ADIOS2_FOREACH_TYPE_1ARG(declare_template_instantiation)
 } // end namespace format
 } // end namespace adios2
 
-#endif /* ADIOS2_TOOLKIT_FORMAT_BP1_BP1BASE_H_ */
+#endif /* ADIOS2_TOOLKIT_FORMAT_BP3_BP3BASE_H_ */
diff --git a/source/adios2/toolkit/format/bp3/BP3Base.tcc b/source/adios2/toolkit/format/bp3/BP3Base.tcc
index 3a807d2614805a3aa1743636fcc1a752a045ab6d..0acb8bea42d63ecbdadf45a88919c3ded8d90c4c 100644
--- a/source/adios2/toolkit/format/bp3/BP3Base.tcc
+++ b/source/adios2/toolkit/format/bp3/BP3Base.tcc
@@ -8,8 +8,8 @@
  *      Author: William F Godoy godoywf@ornl.gov
  */
 
-#ifndef ADIOS2_TOOLKIT_FORMAT_BP1_BP3BASE_TCC_
-#define ADIOS2_TOOLKIT_FORMAT_BP1_BP3BASE_TCC_
+#ifndef ADIOS2_TOOLKIT_FORMAT_BP3_BP3BASE_TCC_
+#define ADIOS2_TOOLKIT_FORMAT_BP3_BP3BASE_TCC_
 
 #include "BP3Base.h"
 
@@ -429,4 +429,4 @@ BP3Base::ParseCharacteristics(const std::vector<char> &buffer, size_t &position,
 } // end namespace format
 } // end namespace adios2
 
-#endif /* ADIOS2_TOOLKIT_FORMAT_BP1_BP3Base_TCC_ */
+#endif /* ADIOS2_TOOLKIT_FORMAT_BP3_BP3Base_TCC_ */
diff --git a/source/adios2/toolkit/format/bp3/BP3Deserializer.cpp b/source/adios2/toolkit/format/bp3/BP3Deserializer.cpp
index edf25648c899194f12f0c2daeea31721e66b6cd5..8ea187ef81a152b9a834a82955138bc67f1ab3ce 100644
--- a/source/adios2/toolkit/format/bp3/BP3Deserializer.cpp
+++ b/source/adios2/toolkit/format/bp3/BP3Deserializer.cpp
@@ -32,12 +32,12 @@ BP3Deserializer::BP3Deserializer(MPI_Comm mpiComm, const bool debugMode)
 {
 }
 
-void BP3Deserializer::ParseMetadata(IO &io)
+void BP3Deserializer::ParseMetadata(const BufferSTL &bufferSTL, IO &io)
 {
-    ParseMinifooter();
-    ParsePGIndex();
-    ParseVariablesIndex(io);
-    ParseAttributesIndex(io);
+    ParseMinifooter(bufferSTL);
+    ParsePGIndex(bufferSTL);
+    ParseVariablesIndex(bufferSTL, io);
+    ParseAttributesIndex(bufferSTL, io);
 }
 
 void BP3Deserializer::ClipContiguousMemory(
@@ -96,7 +96,7 @@ void BP3Deserializer::GetStringFromMetadata(
 }
 
 // PRIVATE
-void BP3Deserializer::ParseMinifooter()
+void BP3Deserializer::ParseMinifooter(const BufferSTL &bufferSTL)
 {
     auto lf_GetEndianness = [](const uint8_t endianness, bool &isLittleEndian) {
 
@@ -111,9 +111,8 @@ void BP3Deserializer::ParseMinifooter()
         }
     };
 
-    const auto &buffer = m_Metadata.m_Buffer;
+    const auto &buffer = bufferSTL.m_Buffer;
     const size_t bufferSize = buffer.size();
-
     size_t position = bufferSize - 4;
     const uint8_t endianess = ReadValue<uint8_t>(buffer, position);
     lf_GetEndianness(endianess, m_Minifooter.IsLittleEndian);
@@ -140,25 +139,35 @@ void BP3Deserializer::ParseMinifooter()
     m_Minifooter.AttributesIndexStart = ReadValue<uint64_t>(buffer, position);
 }
 
-void BP3Deserializer::ParsePGIndex()
+void BP3Deserializer::ParsePGIndex(const BufferSTL &bufferSTL)
 {
-    const auto &buffer = m_Metadata.m_Buffer;
-    auto &position = m_Metadata.m_Position;
-    position = m_Minifooter.PGIndexStart;
+    const auto &buffer = bufferSTL.m_Buffer;
+    size_t position = m_Minifooter.PGIndexStart;
 
     m_MetadataSet.DataPGCount = ReadValue<uint64_t>(buffer, position);
-    position += 10; // skipping lengths
-    const uint16_t nameLength = ReadValue<uint16_t>(buffer, position);
-    position += static_cast<size_t>(nameLength); // skipping name
-    const char isFortran = ReadValue<char>(buffer, position);
+    const size_t length = ReadValue<uint64_t>(buffer, position);
 
-    if (isFortran == 'y')
+    size_t localPosition = 0;
+
+    while (localPosition < length)
     {
-        m_IsRowMajor = false;
+        ProcessGroupIndex index = ReadProcessGroupIndexHeader(buffer, position);
+        if (index.IsFortran == 'y')
+        {
+            m_IsRowMajor = false;
+        }
+
+        const size_t currentStep = static_cast<size_t>(index.Step);
+        if (currentStep > m_MetadataSet.StepsCount)
+        {
+            m_MetadataSet.StepsCount = currentStep;
+        }
+
+        localPosition += index.Length + 2;
     }
 }
 
-void BP3Deserializer::ParseVariablesIndex(IO &io)
+void BP3Deserializer::ParseVariablesIndex(const BufferSTL &bufferSTL, IO &io)
 {
     auto lf_ReadElementIndex = [&](IO &io, const std::vector<char> &buffer,
                                    size_t position) {
@@ -169,7 +178,6 @@ void BP3Deserializer::ParseVariablesIndex(IO &io)
         switch (header.DataType)
         {
 
-        // TODO: string
         case (type_string):
         {
             DefineVariableInIO<std::string>(header, io, buffer, position);
@@ -267,7 +275,7 @@ void BP3Deserializer::ParseVariablesIndex(IO &io)
     };
 
     // STARTS HERE
-    const auto &buffer = m_Metadata.m_Buffer;
+    const auto &buffer = bufferSTL.m_Buffer;
     size_t position = m_Minifooter.VarsIndexStart;
 
     const uint32_t count = ReadValue<uint32_t>(buffer, position);
@@ -317,7 +325,7 @@ void BP3Deserializer::ParseVariablesIndex(IO &io)
     }
 }
 
-void BP3Deserializer::ParseAttributesIndex(IO &io)
+void BP3Deserializer::ParseAttributesIndex(const BufferSTL &bufferSTL, IO &io)
 {
     auto lf_ReadElementIndex = [&](IO &io, const std::vector<char> &buffer,
                                    size_t position) {
@@ -409,7 +417,7 @@ void BP3Deserializer::ParseAttributesIndex(IO &io)
         } // end switch
     };
 
-    const auto &buffer = m_Metadata.m_Buffer;
+    const auto &buffer = bufferSTL.m_Buffer;
     size_t position = m_Minifooter.AttributesIndexStart;
 
     const uint32_t count = ReadValue<uint32_t>(buffer, position);
diff --git a/source/adios2/toolkit/format/bp3/BP3Deserializer.h b/source/adios2/toolkit/format/bp3/BP3Deserializer.h
index 0649a03dfb0eee4768baf5e1751a220523646d0f..369e79e4f3d0df8b854623ec8447b7e412a6d310 100644
--- a/source/adios2/toolkit/format/bp3/BP3Deserializer.h
+++ b/source/adios2/toolkit/format/bp3/BP3Deserializer.h
@@ -8,8 +8,8 @@
  *      Author: William F Godoy godoywf@ornl.gov
  */
 
-#ifndef ADIOS2_TOOLKIT_FORMAT_BP1_BP3DESERIALIZER_H_
-#define ADIOS2_TOOLKIT_FORMAT_BP1_BP3DESERIALIZER_H_
+#ifndef ADIOS2_TOOLKIT_FORMAT_BP3_BP3DESERIALIZER_H_
+#define ADIOS2_TOOLKIT_FORMAT_BP3_BP3DESERIALIZER_H_
 
 #include <mutex>
 #include <set>
@@ -31,7 +31,7 @@ public:
     /** BP Minifooter fields */
     Minifooter m_Minifooter;
 
-    bool m_PerformedGets = false;
+    bool m_PerformedGets = true;
 
     /**
      * Unique constructor
@@ -42,7 +42,7 @@ public:
 
     ~BP3Deserializer() = default;
 
-    void ParseMetadata(IO &io);
+    void ParseMetadata(const BufferSTL &bufferSTL, IO &io);
 
     // Sync functions
     template <class T>
@@ -68,10 +68,10 @@ private:
 
     static std::mutex m_Mutex;
 
-    void ParseMinifooter();
-    void ParsePGIndex();
-    void ParseVariablesIndex(IO &io);
-    void ParseAttributesIndex(IO &io);
+    void ParseMinifooter(const BufferSTL &bufferSTL);
+    void ParsePGIndex(const BufferSTL &bufferSTL);
+    void ParseVariablesIndex(const BufferSTL &bufferSTL, IO &io);
+    void ParseAttributesIndex(const BufferSTL &bufferSTL, IO &io);
 
     /**
      * Reads a variable index element (serialized) and calls IO.DefineVariable
@@ -139,4 +139,4 @@ ADIOS2_FOREACH_TYPE_1ARG(declare_template_instantiation)
 } // end namespace format
 } // end namespace adios2
 
-#endif /* ADIOS2_TOOLKIT_FORMAT_BP1_BP3DESERIALIZER_H_ */
+#endif /* ADIOS2_TOOLKIT_FORMAT_BP3_BP3DESERIALIZER_H_ */
diff --git a/source/adios2/toolkit/format/bp3/BP3Deserializer.tcc b/source/adios2/toolkit/format/bp3/BP3Deserializer.tcc
index 7fb69ef26c6af86023931eaaf4bb1929245e1d8e..9b4fe65c1c1b78bcbe9d81f469eecb3159749168 100644
--- a/source/adios2/toolkit/format/bp3/BP3Deserializer.tcc
+++ b/source/adios2/toolkit/format/bp3/BP3Deserializer.tcc
@@ -220,7 +220,7 @@ BP3Deserializer::GetSubFileInfo(const Variable<T> &variable) const
 
     const auto &buffer = m_Metadata.m_Buffer;
 
-    const size_t stepStart = variable.m_StepsStart;
+    const size_t stepStart = variable.m_StepsStart + 1;
     const size_t stepEnd =
         stepStart + variable.m_StepsCount; // inclusive or exclusive?
 
diff --git a/testing/adios2/engine/bp/TestBPWriteReadADIOS2.cpp b/testing/adios2/engine/bp/TestBPWriteReadADIOS2.cpp
index 4b70b08279be936111f860752bc192a25ad364be..7640cd3bd8a2a15d0e84b5ded2db13ff79bc6d72 100644
--- a/testing/adios2/engine/bp/TestBPWriteReadADIOS2.cpp
+++ b/testing/adios2/engine/bp/TestBPWriteReadADIOS2.cpp
@@ -264,20 +264,18 @@ TEST_F(BPWriteReadTestADIOS2, ADIOS2BPWriteRead1D8)
 
         for (size_t t = 0; t < NSteps; ++t)
         {
-            var_iString->SetStepSelection({t + 1, 1});
+            var_i8->SetStepSelection({t, 1});
+            var_i16->SetStepSelection({t, 1});
+            var_i32->SetStepSelection({t, 1});
+            var_i64->SetStepSelection({t, 1});
 
-            var_i8->SetStepSelection({t + 1, 1});
-            var_i16->SetStepSelection({t + 1, 1});
-            var_i32->SetStepSelection({t + 1, 1});
-            var_i64->SetStepSelection({t + 1, 1});
+            var_u8->SetStepSelection({t, 1});
+            var_u16->SetStepSelection({t, 1});
+            var_u32->SetStepSelection({t, 1});
+            var_u64->SetStepSelection({t, 1});
 
-            var_u8->SetStepSelection({t + 1, 1});
-            var_u16->SetStepSelection({t + 1, 1});
-            var_u32->SetStepSelection({t + 1, 1});
-            var_u64->SetStepSelection({t + 1, 1});
-
-            var_r32->SetStepSelection({t + 1, 1});
-            var_r64->SetStepSelection({t + 1, 1});
+            var_r32->SetStepSelection({t, 1});
+            var_r64->SetStepSelection({t, 1});
 
             // Generate test data for each rank uniquely
             SmallTestData currentTestData = generateNewSmallTestData(
@@ -566,18 +564,18 @@ TEST_F(BPWriteReadTestADIOS2, ADIOS2BPWriteRead2D2x4)
 
         for (size_t t = 0; t < NSteps; ++t)
         {
-            var_i8->SetStepSelection({t + 1, 1});
-            var_i16->SetStepSelection({t + 1, 1});
-            var_i32->SetStepSelection({t + 1, 1});
-            var_i64->SetStepSelection({t + 1, 1});
+            var_i8->SetStepSelection({t, 1});
+            var_i16->SetStepSelection({t, 1});
+            var_i32->SetStepSelection({t, 1});
+            var_i64->SetStepSelection({t, 1});
 
-            var_u8->SetStepSelection({t + 1, 1});
-            var_u16->SetStepSelection({t + 1, 1});
-            var_u32->SetStepSelection({t + 1, 1});
-            var_u64->SetStepSelection({t + 1, 1});
+            var_u8->SetStepSelection({t, 1});
+            var_u16->SetStepSelection({t, 1});
+            var_u32->SetStepSelection({t, 1});
+            var_u64->SetStepSelection({t, 1});
 
-            var_r32->SetStepSelection({t + 1, 1});
-            var_r64->SetStepSelection({t + 1, 1});
+            var_r32->SetStepSelection({t, 1});
+            var_r64->SetStepSelection({t, 1});
 
             bpReader.GetDeferred(*var_iString, IString);
 
@@ -859,18 +857,18 @@ TEST_F(BPWriteReadTestADIOS2, ADIOS2BPWriteRead2D4x2)
 
         for (size_t t = 0; t < NSteps; ++t)
         {
-            var_i8->SetStepSelection({t + 1, 1});
-            var_i16->SetStepSelection({t + 1, 1});
-            var_i32->SetStepSelection({t + 1, 1});
-            var_i64->SetStepSelection({t + 1, 1});
+            var_i8->SetStepSelection({t, 1});
+            var_i16->SetStepSelection({t, 1});
+            var_i32->SetStepSelection({t, 1});
+            var_i64->SetStepSelection({t, 1});
 
-            var_u8->SetStepSelection({t + 1, 1});
-            var_u16->SetStepSelection({t + 1, 1});
-            var_u32->SetStepSelection({t + 1, 1});
-            var_u64->SetStepSelection({t + 1, 1});
+            var_u8->SetStepSelection({t, 1});
+            var_u16->SetStepSelection({t, 1});
+            var_u32->SetStepSelection({t, 1});
+            var_u64->SetStepSelection({t, 1});
 
-            var_r32->SetStepSelection({t + 1, 1});
-            var_r64->SetStepSelection({t + 1, 1});
+            var_r32->SetStepSelection({t, 1});
+            var_r64->SetStepSelection({t, 1});
 
             bpReader.GetDeferred(*var_i8, I8.data());
             bpReader.GetDeferred(*var_i16, I16.data());
@@ -913,389 +911,6 @@ TEST_F(BPWriteReadTestADIOS2, ADIOS2BPWriteRead2D4x2)
     }
 }
 
-TEST_F(BPWriteReadTestADIOS2, ADIOS2BPWriteMissingPerformPuts)
-{
-    // Each process would write a 1x8 array and all processes would
-    // form a mpiSize * Nx 1D array
-    const std::string fname("ADIOS2BPWriteMissingPerformPuts.bp");
-
-    int mpiRank = 0, mpiSize = 1;
-    // Number of rows
-    const size_t Nx = 8;
-
-    // Number of steps
-    const size_t NSteps = 3;
-
-#ifdef ADIOS2_HAVE_MPI
-    MPI_Comm_rank(MPI_COMM_WORLD, &mpiRank);
-    MPI_Comm_size(MPI_COMM_WORLD, &mpiSize);
-#endif
-
-// Write test data using BP
-
-#ifdef ADIOS2_HAVE_MPI
-    adios2::ADIOS adios(MPI_COMM_WORLD, adios2::DebugON);
-#else
-    adios2::ADIOS adios(true);
-#endif
-    {
-        adios2::IO &io = adios.DeclareIO("TestIO");
-
-        // Declare 1D variables (NumOfProcesses * Nx)
-        // The local process' part (start, count) can be defined now or later
-        // before Write().
-        {
-            const adios2::Dims shape{static_cast<size_t>(Nx * mpiSize)};
-            const adios2::Dims start{static_cast<size_t>(Nx * mpiRank)};
-            const adios2::Dims count{Nx};
-
-            auto &var_iString = io.DefineVariable<std::string>("iString");
-            auto &var_i8 = io.DefineVariable<int8_t>("i8", shape, start, count);
-            auto &var_i16 =
-                io.DefineVariable<int16_t>("i16", shape, start, count);
-            auto &var_i32 =
-                io.DefineVariable<int32_t>("i32", shape, start, count);
-            auto &var_i64 =
-                io.DefineVariable<int64_t>("i64", shape, start, count);
-            auto &var_u8 =
-                io.DefineVariable<uint8_t>("u8", shape, start, count);
-            auto &var_u16 =
-                io.DefineVariable<uint16_t>("u16", shape, start, count);
-            auto &var_u32 =
-                io.DefineVariable<uint32_t>("u32", shape, start, count);
-            auto &var_u64 =
-                io.DefineVariable<uint64_t>("u64", shape, start, count);
-            auto &var_r32 =
-                io.DefineVariable<float>("r32", shape, start, count);
-            auto &var_r64 =
-                io.DefineVariable<double>("r64", shape, start, count);
-        }
-
-        adios2::Engine &bpWriter = io.Open(fname, adios2::Mode::Write);
-
-        for (size_t step = 0; step < NSteps; ++step)
-        {
-            // Generate test data for each process uniquely
-            SmallTestData currentTestData = generateNewSmallTestData(
-                m_TestData, static_cast<int>(step), mpiRank, mpiSize);
-
-            // Retrieve the variables that previously went out of scope
-            auto &var_iString = *io.InquireVariable<std::string>("iString");
-            auto &var_i8 = *io.InquireVariable<int8_t>("i8");
-            auto &var_i16 = *io.InquireVariable<int16_t>("i16");
-            auto &var_i32 = *io.InquireVariable<int32_t>("i32");
-            auto &var_i64 = *io.InquireVariable<int64_t>("i64");
-            auto &var_u8 = *io.InquireVariable<uint8_t>("u8");
-            auto &var_u16 = *io.InquireVariable<uint16_t>("u16");
-            auto &var_u32 = *io.InquireVariable<uint32_t>("u32");
-            auto &var_u64 = *io.InquireVariable<uint64_t>("u64");
-            auto &var_r32 = *io.InquireVariable<float>("r32");
-            auto &var_r64 = *io.InquireVariable<double>("r64");
-
-            // Make a 1D selection to describe the local dimensions of the
-            // variable we write and its offsets in the global spaces
-            adios2::Box<adios2::Dims> sel({mpiRank * Nx}, {Nx});
-
-            EXPECT_THROW(var_iString.SetSelection(sel), std::invalid_argument);
-            var_i8.SetSelection(sel);
-            var_i16.SetSelection(sel);
-            var_i32.SetSelection(sel);
-            var_i64.SetSelection(sel);
-            var_u8.SetSelection(sel);
-            var_u16.SetSelection(sel);
-            var_u32.SetSelection(sel);
-            var_u64.SetSelection(sel);
-            var_r32.SetSelection(sel);
-            var_r64.SetSelection(sel);
-
-            // Write each one
-            // fill in the variable with values from starting index to
-            // starting index + count
-            bpWriter.BeginStep();
-
-            bpWriter.PutDeferred(var_iString, currentTestData.S1);
-            bpWriter.PutDeferred(var_i8, currentTestData.I8.data());
-            bpWriter.PutDeferred(var_i16, currentTestData.I16.data());
-            bpWriter.PutDeferred(var_i32, currentTestData.I32.data());
-            bpWriter.PutDeferred(var_i64, currentTestData.I64.data());
-            bpWriter.PutDeferred(var_u8, currentTestData.U8.data());
-            bpWriter.PutDeferred(var_u16, currentTestData.U16.data());
-            bpWriter.PutDeferred(var_u32, currentTestData.U32.data());
-            bpWriter.PutDeferred(var_u64, currentTestData.U64.data());
-            bpWriter.PutDeferred(var_r32, currentTestData.R32.data());
-            bpWriter.PutDeferred(var_r64, currentTestData.R64.data());
-
-            EXPECT_THROW(bpWriter.EndStep(), std::invalid_argument);
-        }
-
-        // Close the file
-        bpWriter.Close();
-    }
-}
-
-TEST_F(BPWriteReadTestADIOS2, ADIOS2BPReadMissingPerformGets)
-{
-    // Each process would write a 4x2 array and all processes would
-    // form a 2D 4 * (NumberOfProcess * Nx) matrix where Nx is 2 here
-    const std::string fname("ADIOS2BPReadMissingPerformGets.bp");
-
-    int mpiRank = 0, mpiSize = 1;
-    // Number of rows
-    const std::size_t Nx = 2;
-    // Number of cols
-    const std::size_t Ny = 4;
-
-    // Number of steps
-    const std::size_t NSteps = 3;
-
-#ifdef ADIOS2_HAVE_MPI
-    MPI_Comm_rank(MPI_COMM_WORLD, &mpiRank);
-    MPI_Comm_size(MPI_COMM_WORLD, &mpiSize);
-#endif
-
-// Write test data using ADIOS2
-
-#ifdef ADIOS2_HAVE_MPI
-    adios2::ADIOS adios(MPI_COMM_WORLD, adios2::DebugON);
-#else
-    adios2::ADIOS adios(true);
-#endif
-    {
-        adios2::IO &io = adios.DeclareIO("TestIO");
-
-        // Declare 2D variables (4 * (NumberOfProcess * Nx))
-        // The local process' part (start, count) can be defined now or later
-        // before Write().
-        {
-            adios2::Dims shape{static_cast<unsigned int>(Ny),
-                               static_cast<unsigned int>(mpiSize * Nx)};
-            adios2::Dims start{static_cast<unsigned int>(0),
-                               static_cast<unsigned int>(mpiRank * Nx)};
-            adios2::Dims count{static_cast<unsigned int>(Ny),
-                               static_cast<unsigned int>(Nx)};
-            auto &var_i8 = io.DefineVariable<int8_t>("i8", shape, start, count);
-            auto &var_i16 =
-                io.DefineVariable<int16_t>("i16", shape, start, count);
-            auto &var_i32 =
-                io.DefineVariable<int32_t>("i32", shape, start, count);
-            auto &var_i64 =
-                io.DefineVariable<int64_t>("i64", shape, start, count);
-            auto &var_u8 =
-                io.DefineVariable<uint8_t>("u8", shape, start, count);
-            auto &var_u16 =
-                io.DefineVariable<uint16_t>("u16", shape, start, count);
-            auto &var_u32 =
-                io.DefineVariable<uint32_t>("u32", shape, start, count);
-            auto &var_u64 =
-                io.DefineVariable<uint64_t>("u64", shape, start, count);
-            auto &var_r32 =
-                io.DefineVariable<float>("r32", shape, start, count);
-            auto &var_r64 =
-                io.DefineVariable<double>("r64", shape, start, count);
-        }
-
-        // Create the BP Engine
-        io.SetEngine("BPFileWriter");
-
-        io.AddTransport("file");
-
-        adios2::Engine &bpWriter = io.Open(fname, adios2::Mode::Write);
-
-        for (size_t step = 0; step < NSteps; ++step)
-        {
-            // Generate test data for each process uniquely
-            SmallTestData currentTestData = generateNewSmallTestData(
-                m_TestData, static_cast<int>(step), mpiRank, mpiSize);
-
-            // Retrieve the variables that previously went out of scope
-            auto &var_i8 = *io.InquireVariable<int8_t>("i8");
-            auto &var_i16 = *io.InquireVariable<int16_t>("i16");
-            auto &var_i32 = *io.InquireVariable<int32_t>("i32");
-            auto &var_i64 = *io.InquireVariable<int64_t>("i64");
-            auto &var_u8 = *io.InquireVariable<uint8_t>("u8");
-            auto &var_u16 = *io.InquireVariable<uint16_t>("u16");
-            auto &var_u32 = *io.InquireVariable<uint32_t>("u32");
-            auto &var_u64 = *io.InquireVariable<uint64_t>("u64");
-            auto &var_r32 = *io.InquireVariable<float>("r32");
-            auto &var_r64 = *io.InquireVariable<double>("r64");
-
-            // Make a 2D selection to describe the local dimensions of the
-            // variable we write and its offsets in the global spaces
-            adios2::Box<adios2::Dims> sel(
-                {0, static_cast<unsigned int>(mpiRank * Nx)}, {Ny, Nx});
-            var_i8.SetSelection(sel);
-            var_i16.SetSelection(sel);
-            var_i32.SetSelection(sel);
-            var_i64.SetSelection(sel);
-            var_u8.SetSelection(sel);
-            var_u16.SetSelection(sel);
-            var_u32.SetSelection(sel);
-            var_u64.SetSelection(sel);
-            var_r32.SetSelection(sel);
-            var_r64.SetSelection(sel);
-
-            // Write each one
-            // fill in the variable with values from starting index to
-            // starting index + count
-            bpWriter.BeginStep();
-            bpWriter.PutSync(var_i8, currentTestData.I8.data());
-            bpWriter.PutSync(var_i16, currentTestData.I16.data());
-            bpWriter.PutSync(var_i32, currentTestData.I32.data());
-            bpWriter.PutSync(var_i64, currentTestData.I64.data());
-            bpWriter.PutSync(var_u8, currentTestData.U8.data());
-            bpWriter.PutSync(var_u16, currentTestData.U16.data());
-            bpWriter.PutSync(var_u32, currentTestData.U32.data());
-            bpWriter.PutSync(var_u64, currentTestData.U64.data());
-            bpWriter.PutSync(var_r32, currentTestData.R32.data());
-            bpWriter.PutSync(var_r64, currentTestData.R64.data());
-            bpWriter.EndStep();
-        }
-
-        // Close the file
-        bpWriter.Close();
-    }
-
-    {
-        adios2::IO &io = adios.DeclareIO("ReadIO");
-
-        adios2::Engine &bpReader = io.Open(fname, adios2::Mode::Read);
-
-        auto var_i8 = io.InquireVariable<int8_t>("i8");
-        ASSERT_NE(var_i8, nullptr);
-        ASSERT_EQ(var_i8->m_ShapeID, adios2::ShapeID::GlobalArray);
-        ASSERT_EQ(var_i8->m_AvailableStepsCount, NSteps);
-        ASSERT_EQ(var_i8->m_Shape[0], Ny);
-        ASSERT_EQ(var_i8->m_Shape[1], static_cast<size_t>(mpiSize * Nx));
-
-        auto var_i16 = io.InquireVariable<int16_t>("i16");
-        ASSERT_NE(var_i16, nullptr);
-        ASSERT_EQ(var_i16->m_ShapeID, adios2::ShapeID::GlobalArray);
-        ASSERT_EQ(var_i16->m_AvailableStepsCount, NSteps);
-        ASSERT_EQ(var_i16->m_Shape[0], Ny);
-        ASSERT_EQ(var_i16->m_Shape[1], static_cast<size_t>(mpiSize * Nx));
-
-        auto var_i32 = io.InquireVariable<int32_t>("i32");
-        ASSERT_NE(var_i32, nullptr);
-        ASSERT_EQ(var_i32->m_ShapeID, adios2::ShapeID::GlobalArray);
-        ASSERT_EQ(var_i32->m_AvailableStepsCount, NSteps);
-        ASSERT_EQ(var_i32->m_Shape[0], Ny);
-        ASSERT_EQ(var_i32->m_Shape[1], static_cast<size_t>(mpiSize * Nx));
-
-        auto var_i64 = io.InquireVariable<int64_t>("i64");
-        ASSERT_NE(var_i64, nullptr);
-        ASSERT_EQ(var_i64->m_ShapeID, adios2::ShapeID::GlobalArray);
-        ASSERT_EQ(var_i64->m_AvailableStepsCount, NSteps);
-        ASSERT_EQ(var_i64->m_Shape[0], Ny);
-        ASSERT_EQ(var_i64->m_Shape[1], static_cast<size_t>(mpiSize * Nx));
-
-        auto var_u8 = io.InquireVariable<uint8_t>("u8");
-        ASSERT_NE(var_u8, nullptr);
-        ASSERT_EQ(var_u8->m_ShapeID, adios2::ShapeID::GlobalArray);
-        ASSERT_EQ(var_u8->m_AvailableStepsCount, NSteps);
-        ASSERT_EQ(var_u8->m_Shape[0], Ny);
-        ASSERT_EQ(var_u8->m_Shape[1], static_cast<size_t>(mpiSize * Nx));
-
-        auto var_u16 = io.InquireVariable<uint16_t>("u16");
-        ASSERT_NE(var_u16, nullptr);
-        ASSERT_EQ(var_u16->m_ShapeID, adios2::ShapeID::GlobalArray);
-        ASSERT_EQ(var_u16->m_AvailableStepsCount, NSteps);
-        ASSERT_EQ(var_u16->m_Shape[0], Ny);
-        ASSERT_EQ(var_u16->m_Shape[1], static_cast<size_t>(mpiSize * Nx));
-
-        auto var_u32 = io.InquireVariable<uint32_t>("u32");
-        ASSERT_NE(var_u32, nullptr);
-        ASSERT_EQ(var_u32->m_ShapeID, adios2::ShapeID::GlobalArray);
-        ASSERT_EQ(var_u32->m_AvailableStepsCount, NSteps);
-        ASSERT_EQ(var_u32->m_Shape[0], Ny);
-        ASSERT_EQ(var_u32->m_Shape[1], static_cast<size_t>(mpiSize * Nx));
-
-        auto var_u64 = io.InquireVariable<uint64_t>("u64");
-        ASSERT_NE(var_u64, nullptr);
-        ASSERT_EQ(var_u64->m_ShapeID, adios2::ShapeID::GlobalArray);
-        ASSERT_EQ(var_u64->m_AvailableStepsCount, NSteps);
-        ASSERT_EQ(var_u64->m_Shape[0], Ny);
-        ASSERT_EQ(var_u64->m_Shape[1], static_cast<size_t>(mpiSize * Nx));
-
-        auto var_r32 = io.InquireVariable<float>("r32");
-        ASSERT_NE(var_r32, nullptr);
-        ASSERT_EQ(var_r32->m_ShapeID, adios2::ShapeID::GlobalArray);
-        ASSERT_EQ(var_r32->m_AvailableStepsCount, NSteps);
-        ASSERT_EQ(var_r32->m_Shape[0], Ny);
-        ASSERT_EQ(var_r32->m_Shape[1], static_cast<size_t>(mpiSize * Nx));
-
-        auto var_r64 = io.InquireVariable<double>("r64");
-        ASSERT_NE(var_r64, nullptr);
-        ASSERT_EQ(var_r64->m_ShapeID, adios2::ShapeID::GlobalArray);
-        ASSERT_EQ(var_r64->m_AvailableStepsCount, NSteps);
-        ASSERT_EQ(var_r64->m_Shape[0], Ny);
-        ASSERT_EQ(var_r64->m_Shape[1], static_cast<size_t>(mpiSize * Nx));
-
-        // If the size of the array is smaller than the data
-        // the result is weird... double and uint64_t would get
-        // completely garbage data
-        std::array<int8_t, Nx * Ny> I8;
-        std::array<int16_t, Nx * Ny> I16;
-        std::array<int32_t, Nx * Ny> I32;
-        std::array<int64_t, Nx * Ny> I64;
-        std::array<uint8_t, Nx * Ny> U8;
-        std::array<uint16_t, Nx * Ny> U16;
-        std::array<uint32_t, Nx * Ny> U32;
-        std::array<uint64_t, Nx * Ny> U64;
-        std::array<float, Nx * Ny> R32;
-        std::array<double, Nx * Ny> R64;
-
-        const adios2::Dims start{0, static_cast<size_t>(mpiRank * Nx)};
-        const adios2::Dims count{Ny, Nx};
-
-        const adios2::Box<adios2::Dims> sel(start, count);
-
-        var_i8->SetSelection(sel);
-        var_i16->SetSelection(sel);
-        var_i32->SetSelection(sel);
-        var_i64->SetSelection(sel);
-
-        var_u8->SetSelection(sel);
-        var_u16->SetSelection(sel);
-        var_u32->SetSelection(sel);
-        var_u64->SetSelection(sel);
-
-        var_r32->SetSelection(sel);
-        var_r64->SetSelection(sel);
-
-        for (size_t t = 0; t < NSteps; ++t)
-        {
-            var_i8->SetStepSelection({t + 1, 1});
-            var_i16->SetStepSelection({t + 1, 1});
-            var_i32->SetStepSelection({t + 1, 1});
-            var_i64->SetStepSelection({t + 1, 1});
-
-            var_u8->SetStepSelection({t + 1, 1});
-            var_u16->SetStepSelection({t + 1, 1});
-            var_u32->SetStepSelection({t + 1, 1});
-            var_u64->SetStepSelection({t + 1, 1});
-
-            var_r32->SetStepSelection({t + 1, 1});
-            var_r64->SetStepSelection({t + 1, 1});
-
-            bpReader.GetDeferred(*var_i8, I8.data());
-            bpReader.GetDeferred(*var_i16, I16.data());
-            bpReader.GetDeferred(*var_i32, I32.data());
-            bpReader.GetDeferred(*var_i64, I64.data());
-
-            bpReader.GetDeferred(*var_u8, U8.data());
-            bpReader.GetDeferred(*var_u16, U16.data());
-            bpReader.GetDeferred(*var_u32, U32.data());
-            bpReader.GetDeferred(*var_u64, U64.data());
-
-            bpReader.GetDeferred(*var_r32, R32.data());
-            bpReader.GetDeferred(*var_r64, R64.data());
-
-            EXPECT_THROW(bpReader.EndStep(), std::invalid_argument);
-        }
-        bpReader.Close();
-    }
-}
-
 //******************************************************************************
 // main
 //******************************************************************************
diff --git a/testing/adios2/engine/bp/TestBPWriteReadAsStreamADIOS2.cpp b/testing/adios2/engine/bp/TestBPWriteReadAsStreamADIOS2.cpp
index 1ee87873a9f0c480889404cf8ddc08a8423c6c44..c8aaf2b11a47ce8329382827799e2110af081a37 100644
--- a/testing/adios2/engine/bp/TestBPWriteReadAsStreamADIOS2.cpp
+++ b/testing/adios2/engine/bp/TestBPWriteReadAsStreamADIOS2.cpp
@@ -217,9 +217,11 @@ TEST_F(BPWriteReadAsStreamTestADIOS2, ADIOS2BPWriteRead1D8)
         var_r32->SetSelection(sel);
         var_r64->SetSelection(sel);
 
-        for (size_t t = 0; t < NSteps; ++t)
+        unsigned int t = 0;
+        adios2::StepStatus status = adios2::StepStatus::OK;
+        while (status == adios2::StepStatus::OK)
         {
-            bpReader.BeginStep();
+            status = bpReader.BeginStep();
 
             bpReader.GetDeferred(*var_i8, I8.data());
             bpReader.GetDeferred(*var_i16, I16.data());
@@ -258,7 +260,11 @@ TEST_F(BPWriteReadAsStreamTestADIOS2, ADIOS2BPWriteRead1D8)
                 EXPECT_EQ(R32[i], m_OriginalData.R32[i]) << msg;
                 EXPECT_EQ(R64[i], m_OriginalData.R64[i]) << msg;
             }
+            ++t;
         }
+
+        EXPECT_EQ(t, NSteps);
+
         bpReader.Close();
     }
 }
@@ -434,9 +440,6 @@ TEST_F(BPWriteReadAsStreamTestADIOS2, ADIOS2BPWriteRead2D2x4)
         ASSERT_EQ(var_r64->m_Shape[0], Ny);
         ASSERT_EQ(var_r64->m_Shape[1], static_cast<size_t>(mpiSize * Nx));
 
-        // If the size of the array is smaller than the data
-        // the result is weird... double and uint64_t would get
-        // completely garbage data
         std::array<int8_t, Nx * Ny> I8;
         std::array<int16_t, Nx * Ny> I16;
         std::array<int32_t, Nx * Ny> I32;
@@ -466,9 +469,11 @@ TEST_F(BPWriteReadAsStreamTestADIOS2, ADIOS2BPWriteRead2D2x4)
         var_r32->SetSelection(sel);
         var_r64->SetSelection(sel);
 
-        for (size_t t = 0; t < NSteps; ++t)
+        unsigned int t = 0;
+        adios2::StepStatus status = adios2::StepStatus::OK;
+        while (status == adios2::StepStatus::OK)
         {
-            bpReader.BeginStep();
+            status = bpReader.BeginStep();
 
             bpReader.GetDeferred(*var_i8, I8.data());
             bpReader.GetDeferred(*var_i16, I16.data());
@@ -507,7 +512,10 @@ TEST_F(BPWriteReadAsStreamTestADIOS2, ADIOS2BPWriteRead2D2x4)
                 EXPECT_EQ(R32[i], m_OriginalData.R32[i]) << msg;
                 EXPECT_EQ(R64[i], m_OriginalData.R64[i]) << msg;
             }
+            ++t;
         }
+        EXPECT_EQ(t, NSteps);
+
         bpReader.Close();
     }
 }
@@ -718,9 +726,11 @@ TEST_F(BPWriteReadAsStreamTestADIOS2, ADIOS2BPWriteRead2D4x2)
         var_r32->SetSelection(sel);
         var_r64->SetSelection(sel);
 
-        for (size_t t = 0; t < NSteps; ++t)
+        unsigned int t = 0;
+        adios2::StepStatus status = adios2::StepStatus::OK;
+        while (status == adios2::StepStatus::OK)
         {
-            bpReader.BeginStep();
+            status = bpReader.BeginStep();
 
             bpReader.GetDeferred(*var_i8, I8.data());
             bpReader.GetDeferred(*var_i16, I16.data());
@@ -736,6 +746,753 @@ TEST_F(BPWriteReadAsStreamTestADIOS2, ADIOS2BPWriteRead2D4x2)
             bpReader.GetDeferred(*var_r64, R64.data());
 
             bpReader.PerformGets();
+
+            bpReader.EndStep();
+
+            // Generate test data for each rank uniquely
+            UpdateSmallTestData(m_OriginalData, static_cast<int>(t), mpiRank,
+                                mpiSize);
+
+            for (size_t i = 0; i < Nx * Ny; ++i)
+            {
+                std::stringstream ss;
+                ss << "t=" << t << " i=" << i << " rank=" << mpiRank;
+                std::string msg = ss.str();
+
+                EXPECT_EQ(I8[i], m_OriginalData.I8[i]) << msg;
+                EXPECT_EQ(I16[i], m_OriginalData.I16[i]) << msg;
+                EXPECT_EQ(I32[i], m_OriginalData.I32[i]) << msg;
+                EXPECT_EQ(I64[i], m_OriginalData.I64[i]) << msg;
+                EXPECT_EQ(U8[i], m_OriginalData.U8[i]) << msg;
+                EXPECT_EQ(U16[i], m_OriginalData.U16[i]) << msg;
+                EXPECT_EQ(U32[i], m_OriginalData.U32[i]) << msg;
+                EXPECT_EQ(U64[i], m_OriginalData.U64[i]) << msg;
+                EXPECT_EQ(R32[i], m_OriginalData.R32[i]) << msg;
+                EXPECT_EQ(R64[i], m_OriginalData.R64[i]) << msg;
+            }
+            ++t;
+        }
+        EXPECT_EQ(t, NSteps);
+        bpReader.Close();
+    }
+}
+
+TEST_F(BPWriteReadAsStreamTestADIOS2, ADIOS2BPWriteRead1D8MissingPerformGets)
+{
+    // Each process would write a 1x8 array and all processes would
+    // form a mpiSize * Nx 1D array
+    const std::string fname(
+        "ADIOS2BPWriteReadAsStream1D8MissingPerformGetsTest.bp");
+
+    int mpiRank = 0, mpiSize = 1;
+    // Number of rows
+    const size_t Nx = 8;
+
+    // Number of steps
+    const size_t NSteps = 3;
+
+#ifdef ADIOS2_HAVE_MPI
+    MPI_Comm_rank(MPI_COMM_WORLD, &mpiRank);
+    MPI_Comm_size(MPI_COMM_WORLD, &mpiSize);
+#endif
+
+// Write test data using BP
+
+#ifdef ADIOS2_HAVE_MPI
+    adios2::ADIOS adios(MPI_COMM_WORLD, adios2::DebugON);
+#else
+    adios2::ADIOS adios(true);
+#endif
+    {
+        adios2::IO &io = adios.DeclareIO("TestIO");
+
+        // Declare 1D variables (NumOfProcesses * Nx)
+        // The local process' part (start, count) can be defined now or later
+        // before Write().
+        {
+            const adios2::Dims shape{static_cast<size_t>(Nx * mpiSize)};
+            const adios2::Dims start{static_cast<size_t>(Nx * mpiRank)};
+            const adios2::Dims count{Nx};
+
+            io.DefineVariable<int8_t>("i8", shape, start, count,
+                                      adios2::ConstantDims,
+                                      m_TestData.I8.data());
+            io.DefineVariable<int16_t>("i16", shape, start, count,
+                                       adios2::ConstantDims,
+                                       m_TestData.I16.data());
+            io.DefineVariable<int32_t>("i32", shape, start, count,
+                                       adios2::ConstantDims,
+                                       m_TestData.I32.data());
+            io.DefineVariable<int64_t>("i64", shape, start, count,
+                                       adios2::ConstantDims,
+                                       m_TestData.I64.data());
+
+            io.DefineVariable<uint8_t>("u8", shape, start, count,
+                                       adios2::ConstantDims,
+                                       m_TestData.U8.data());
+
+            io.DefineVariable<uint16_t>("u16", shape, start, count,
+                                        adios2::ConstantDims,
+                                        m_TestData.U16.data());
+            io.DefineVariable<uint32_t>("u32", shape, start, count,
+                                        adios2::ConstantDims,
+                                        m_TestData.U32.data());
+            io.DefineVariable<uint64_t>("u64", shape, start, count,
+                                        adios2::ConstantDims,
+                                        m_TestData.U64.data());
+
+            io.DefineVariable<float>("r32", shape, start, count,
+                                     adios2::ConstantDims,
+                                     m_TestData.R32.data());
+            io.DefineVariable<double>("r64", shape, start, count,
+                                      adios2::ConstantDims,
+                                      m_TestData.R64.data());
+        }
+
+        // Create the BP Engine
+        io.SetEngine("BPFileWriter");
+
+        io.AddTransport("file");
+
+        // QUESTION: It seems that BPFilterWriter cannot overwrite existing
+        // files
+        // Ex. if you tune Nx and NSteps, the test would fail. But if you clear
+        // the cache in
+        // ${adios2Build}/testing/adios2/engine/bp/ADIOS2BPWriteADIOS1Read1D8.bp.dir,
+        // then it works
+        adios2::Engine &bpWriter = io.Open(fname, adios2::Mode::Write);
+
+        for (size_t step = 0; step < NSteps; ++step)
+        {
+            UpdateSmallTestData(m_TestData, static_cast<int>(step), mpiRank,
+                                mpiSize);
+            bpWriter.WriteStep();
+        }
+
+        bpWriter.Close();
+    }
+
+    {
+        adios2::IO &io = adios.DeclareIO("ReadIO");
+
+        adios2::Engine &bpReader = io.Open(fname, adios2::Mode::Read);
+
+        auto var_i8 = io.InquireVariable<int8_t>("i8");
+        ASSERT_NE(var_i8, nullptr);
+        ASSERT_EQ(var_i8->m_ShapeID, adios2::ShapeID::GlobalArray);
+        ASSERT_EQ(var_i8->m_AvailableStepsCount, NSteps);
+        ASSERT_EQ(var_i8->m_Shape[0], mpiSize * Nx);
+
+        auto var_i16 = io.InquireVariable<int16_t>("i16");
+        ASSERT_NE(var_i16, nullptr);
+        ASSERT_EQ(var_i16->m_ShapeID, adios2::ShapeID::GlobalArray);
+        ASSERT_EQ(var_i16->m_AvailableStepsCount, NSteps);
+        ASSERT_EQ(var_i16->m_Shape[0], mpiSize * Nx);
+
+        auto var_i32 = io.InquireVariable<int32_t>("i32");
+        ASSERT_NE(var_i32, nullptr);
+        ASSERT_EQ(var_i32->m_ShapeID, adios2::ShapeID::GlobalArray);
+        ASSERT_EQ(var_i32->m_AvailableStepsCount, NSteps);
+        ASSERT_EQ(var_i32->m_Shape[0], mpiSize * Nx);
+
+        auto var_i64 = io.InquireVariable<int64_t>("i64");
+        ASSERT_NE(var_i64, nullptr);
+        ASSERT_EQ(var_i64->m_ShapeID, adios2::ShapeID::GlobalArray);
+        ASSERT_EQ(var_i64->m_AvailableStepsCount, NSteps);
+        ASSERT_EQ(var_i64->m_Shape[0], mpiSize * Nx);
+
+        auto var_u8 = io.InquireVariable<uint8_t>("u8");
+        ASSERT_NE(var_u8, nullptr);
+        ASSERT_EQ(var_u8->m_ShapeID, adios2::ShapeID::GlobalArray);
+        ASSERT_EQ(var_u8->m_AvailableStepsCount, NSteps);
+        ASSERT_EQ(var_u8->m_Shape[0], mpiSize * Nx);
+
+        auto var_u16 = io.InquireVariable<uint16_t>("u16");
+        ASSERT_NE(var_u16, nullptr);
+        ASSERT_EQ(var_u16->m_ShapeID, adios2::ShapeID::GlobalArray);
+        ASSERT_EQ(var_u16->m_AvailableStepsCount, NSteps);
+        ASSERT_EQ(var_u16->m_Shape[0], mpiSize * Nx);
+
+        auto var_u32 = io.InquireVariable<uint32_t>("u32");
+        ASSERT_NE(var_u32, nullptr);
+        ASSERT_EQ(var_u32->m_ShapeID, adios2::ShapeID::GlobalArray);
+        ASSERT_EQ(var_u32->m_AvailableStepsCount, NSteps);
+        ASSERT_EQ(var_u32->m_Shape[0], mpiSize * Nx);
+
+        auto var_u64 = io.InquireVariable<uint64_t>("u64");
+        ASSERT_NE(var_u64, nullptr);
+        ASSERT_EQ(var_u64->m_ShapeID, adios2::ShapeID::GlobalArray);
+        ASSERT_EQ(var_u64->m_AvailableStepsCount, NSteps);
+        ASSERT_EQ(var_u64->m_Shape[0], mpiSize * Nx);
+
+        auto var_r32 = io.InquireVariable<float>("r32");
+        ASSERT_NE(var_r32, nullptr);
+        ASSERT_EQ(var_r32->m_ShapeID, adios2::ShapeID::GlobalArray);
+        ASSERT_EQ(var_r32->m_AvailableStepsCount, NSteps);
+        ASSERT_EQ(var_r32->m_Shape[0], mpiSize * Nx);
+
+        auto var_r64 = io.InquireVariable<double>("r64");
+        ASSERT_NE(var_r64, nullptr);
+        ASSERT_EQ(var_r64->m_ShapeID, adios2::ShapeID::GlobalArray);
+        ASSERT_EQ(var_r64->m_AvailableStepsCount, NSteps);
+        ASSERT_EQ(var_r64->m_Shape[0], mpiSize * Nx);
+
+        std::string IString;
+        std::array<int8_t, Nx> I8;
+        std::array<int16_t, Nx> I16;
+        std::array<int32_t, Nx> I32;
+        std::array<int64_t, Nx> I64;
+        std::array<uint8_t, Nx> U8;
+        std::array<uint16_t, Nx> U16;
+        std::array<uint32_t, Nx> U32;
+        std::array<uint64_t, Nx> U64;
+        std::array<float, Nx> R32;
+        std::array<double, Nx> R64;
+
+        const adios2::Dims start{mpiRank * Nx};
+        const adios2::Dims count{Nx};
+
+        const adios2::Box<adios2::Dims> sel(start, count);
+
+        var_i8->SetSelection(sel);
+        var_i16->SetSelection(sel);
+        var_i32->SetSelection(sel);
+        var_i64->SetSelection(sel);
+
+        var_u8->SetSelection(sel);
+        var_u16->SetSelection(sel);
+        var_u32->SetSelection(sel);
+        var_u64->SetSelection(sel);
+
+        var_r32->SetSelection(sel);
+        var_r64->SetSelection(sel);
+
+        unsigned int t = 0;
+        adios2::StepStatus status = adios2::StepStatus::OK;
+        while (status == adios2::StepStatus::OK)
+        {
+            status = bpReader.BeginStep();
+            bpReader.GetDeferred(*var_i8, I8.data());
+            bpReader.GetDeferred(*var_i16, I16.data());
+            bpReader.GetDeferred(*var_i32, I32.data());
+            bpReader.GetDeferred(*var_i64, I64.data());
+
+            bpReader.GetDeferred(*var_u8, U8.data());
+            bpReader.GetDeferred(*var_u16, U16.data());
+            bpReader.GetDeferred(*var_u32, U32.data());
+            bpReader.GetDeferred(*var_u64, U64.data());
+
+            bpReader.GetDeferred(*var_r32, R32.data());
+            bpReader.GetDeferred(*var_r64, R64.data());
+
+            bpReader.EndStep();
+
+            UpdateSmallTestData(m_OriginalData, static_cast<int>(t), mpiRank,
+                                mpiSize);
+
+            for (size_t i = 0; i < Nx; ++i)
+            {
+                std::stringstream ss;
+                ss << "t=" << t << " i=" << i << " rank=" << mpiRank;
+                std::string msg = ss.str();
+
+                EXPECT_EQ(I8[i], m_OriginalData.I8[i]) << msg;
+                EXPECT_EQ(I16[i], m_OriginalData.I16[i]) << msg;
+                EXPECT_EQ(I32[i], m_OriginalData.I32[i]) << msg;
+                EXPECT_EQ(I64[i], m_OriginalData.I64[i]) << msg;
+                EXPECT_EQ(U8[i], m_OriginalData.U8[i]) << msg;
+                EXPECT_EQ(U16[i], m_OriginalData.U16[i]) << msg;
+                EXPECT_EQ(U32[i], m_OriginalData.U32[i]) << msg;
+                EXPECT_EQ(U64[i], m_OriginalData.U64[i]) << msg;
+                EXPECT_EQ(R32[i], m_OriginalData.R32[i]) << msg;
+                EXPECT_EQ(R64[i], m_OriginalData.R64[i]) << msg;
+            }
+            ++t;
+        }
+
+        EXPECT_EQ(t, NSteps);
+
+        bpReader.Close();
+    }
+}
+
+//******************************************************************************
+// 2D 2x4 test data
+//******************************************************************************
+
+// ADIOS2 BP write, native ADIOS1 read
+TEST_F(BPWriteReadAsStreamTestADIOS2, ADIOS2BPWriteRead2D2x4MissingPerformGets)
+{
+    // Each process would write a 2x4 array and all processes would
+    // form a 2D 2 * (numberOfProcess*Nx) matrix where Nx is 4 here
+    const std::string fname(
+        "ADIOS2BPWriteReadAsStream2D2x4MissingPerformGetsTest.bp");
+
+    int mpiRank = 0, mpiSize = 1;
+    // Number of rows
+    const std::size_t Nx = 4;
+
+    // Number of rows
+    const std::size_t Ny = 2;
+
+    // Number of steps
+    const std::size_t NSteps = 3;
+
+#ifdef ADIOS2_HAVE_MPI
+    MPI_Comm_rank(MPI_COMM_WORLD, &mpiRank);
+    MPI_Comm_size(MPI_COMM_WORLD, &mpiSize);
+#endif
+
+// Write test data using ADIOS2
+
+#ifdef ADIOS2_HAVE_MPI
+    adios2::ADIOS adios(MPI_COMM_WORLD, adios2::DebugON);
+#else
+    adios2::ADIOS adios(true);
+#endif
+    {
+        adios2::IO &io = adios.DeclareIO("TestIO");
+
+        // Declare 2D variables (Ny * (NumOfProcesses * Nx))
+        // The local process' part (start, count) can be defined now or later
+        // before Write().
+        {
+            const adios2::Dims shape{Ny, static_cast<size_t>(Nx * mpiSize)};
+            const adios2::Dims start{0, static_cast<size_t>(mpiRank * Nx)};
+            const adios2::Dims count{Ny, Nx};
+
+            io.DefineVariable<int8_t>("i8", shape, start, count,
+                                      adios2::ConstantDims,
+                                      m_TestData.I8.data());
+            io.DefineVariable<int16_t>("i16", shape, start, count,
+                                       adios2::ConstantDims,
+                                       m_TestData.I16.data());
+            io.DefineVariable<int32_t>("i32", shape, start, count,
+                                       adios2::ConstantDims,
+                                       m_TestData.I32.data());
+            io.DefineVariable<int64_t>("i64", shape, start, count,
+                                       adios2::ConstantDims,
+                                       m_TestData.I64.data());
+
+            io.DefineVariable<uint8_t>("u8", shape, start, count,
+                                       adios2::ConstantDims,
+                                       m_TestData.U8.data());
+
+            io.DefineVariable<uint16_t>("u16", shape, start, count,
+                                        adios2::ConstantDims,
+                                        m_TestData.U16.data());
+            io.DefineVariable<uint32_t>("u32", shape, start, count,
+                                        adios2::ConstantDims,
+                                        m_TestData.U32.data());
+            io.DefineVariable<uint64_t>("u64", shape, start, count,
+                                        adios2::ConstantDims,
+                                        m_TestData.U64.data());
+
+            io.DefineVariable<float>("r32", shape, start, count,
+                                     adios2::ConstantDims,
+                                     m_TestData.R32.data());
+            io.DefineVariable<double>("r64", shape, start, count,
+                                      adios2::ConstantDims,
+                                      m_TestData.R64.data());
+        }
+
+        // Create the BP Engine
+        io.SetEngine("BPFileWriter");
+        io.AddTransport("file");
+
+        adios2::Engine &bpWriter = io.Open(fname, adios2::Mode::Write);
+
+        for (size_t step = 0; step < NSteps; ++step)
+        {
+            UpdateSmallTestData(m_TestData, static_cast<int>(step), mpiRank,
+                                mpiSize);
+            bpWriter.WriteStep();
+        }
+
+        // Close the file
+        bpWriter.Close();
+    }
+
+    {
+        adios2::IO &io = adios.DeclareIO("ReadIO");
+
+        adios2::Engine &bpReader = io.Open(fname, adios2::Mode::Read);
+
+        auto var_i8 = io.InquireVariable<int8_t>("i8");
+        ASSERT_NE(var_i8, nullptr);
+        ASSERT_EQ(var_i8->m_ShapeID, adios2::ShapeID::GlobalArray);
+        ASSERT_EQ(var_i8->m_AvailableStepsCount, NSteps);
+        ASSERT_EQ(var_i8->m_Shape[0], Ny);
+        ASSERT_EQ(var_i8->m_Shape[1], static_cast<size_t>(mpiSize * Nx));
+
+        auto var_i16 = io.InquireVariable<int16_t>("i16");
+        ASSERT_NE(var_i16, nullptr);
+        ASSERT_EQ(var_i16->m_ShapeID, adios2::ShapeID::GlobalArray);
+        ASSERT_EQ(var_i16->m_AvailableStepsCount, NSteps);
+        ASSERT_EQ(var_i16->m_Shape[0], Ny);
+        ASSERT_EQ(var_i16->m_Shape[1], static_cast<size_t>(mpiSize * Nx));
+
+        auto var_i32 = io.InquireVariable<int32_t>("i32");
+        ASSERT_NE(var_i32, nullptr);
+        ASSERT_EQ(var_i32->m_ShapeID, adios2::ShapeID::GlobalArray);
+        ASSERT_EQ(var_i32->m_AvailableStepsCount, NSteps);
+        ASSERT_EQ(var_i32->m_Shape[0], Ny);
+        ASSERT_EQ(var_i32->m_Shape[1], static_cast<size_t>(mpiSize * Nx));
+
+        auto var_i64 = io.InquireVariable<int64_t>("i64");
+        ASSERT_NE(var_i64, nullptr);
+        ASSERT_EQ(var_i64->m_ShapeID, adios2::ShapeID::GlobalArray);
+        ASSERT_EQ(var_i64->m_AvailableStepsCount, NSteps);
+        ASSERT_EQ(var_i64->m_Shape[0], Ny);
+        ASSERT_EQ(var_i64->m_Shape[1], static_cast<size_t>(mpiSize * Nx));
+
+        auto var_u8 = io.InquireVariable<uint8_t>("u8");
+        ASSERT_NE(var_u8, nullptr);
+        ASSERT_EQ(var_u8->m_ShapeID, adios2::ShapeID::GlobalArray);
+        ASSERT_EQ(var_u8->m_AvailableStepsCount, NSteps);
+        ASSERT_EQ(var_u8->m_Shape[0], Ny);
+        ASSERT_EQ(var_u8->m_Shape[1], static_cast<size_t>(mpiSize * Nx));
+
+        auto var_u16 = io.InquireVariable<uint16_t>("u16");
+        ASSERT_NE(var_u16, nullptr);
+        ASSERT_EQ(var_u16->m_ShapeID, adios2::ShapeID::GlobalArray);
+        ASSERT_EQ(var_u16->m_AvailableStepsCount, NSteps);
+        ASSERT_EQ(var_u16->m_Shape[0], Ny);
+        ASSERT_EQ(var_u16->m_Shape[1], static_cast<size_t>(mpiSize * Nx));
+
+        auto var_u32 = io.InquireVariable<uint32_t>("u32");
+        ASSERT_NE(var_u32, nullptr);
+        ASSERT_EQ(var_u32->m_ShapeID, adios2::ShapeID::GlobalArray);
+        ASSERT_EQ(var_u32->m_AvailableStepsCount, NSteps);
+        ASSERT_EQ(var_u32->m_Shape[0], Ny);
+        ASSERT_EQ(var_u32->m_Shape[1], static_cast<size_t>(mpiSize * Nx));
+
+        auto var_u64 = io.InquireVariable<uint64_t>("u64");
+        ASSERT_NE(var_u64, nullptr);
+        ASSERT_EQ(var_u64->m_ShapeID, adios2::ShapeID::GlobalArray);
+        ASSERT_EQ(var_u64->m_AvailableStepsCount, NSteps);
+        ASSERT_EQ(var_u64->m_Shape[0], Ny);
+        ASSERT_EQ(var_u64->m_Shape[1], static_cast<size_t>(mpiSize * Nx));
+
+        auto var_r32 = io.InquireVariable<float>("r32");
+        ASSERT_NE(var_r32, nullptr);
+        ASSERT_EQ(var_r32->m_ShapeID, adios2::ShapeID::GlobalArray);
+        ASSERT_EQ(var_r32->m_AvailableStepsCount, NSteps);
+        ASSERT_EQ(var_r32->m_Shape[0], Ny);
+        ASSERT_EQ(var_r32->m_Shape[1], static_cast<size_t>(mpiSize * Nx));
+
+        auto var_r64 = io.InquireVariable<double>("r64");
+        ASSERT_NE(var_r64, nullptr);
+        ASSERT_EQ(var_r64->m_ShapeID, adios2::ShapeID::GlobalArray);
+        ASSERT_EQ(var_r64->m_AvailableStepsCount, NSteps);
+        ASSERT_EQ(var_r64->m_Shape[0], Ny);
+        ASSERT_EQ(var_r64->m_Shape[1], static_cast<size_t>(mpiSize * Nx));
+
+        std::array<int8_t, Nx * Ny> I8;
+        std::array<int16_t, Nx * Ny> I16;
+        std::array<int32_t, Nx * Ny> I32;
+        std::array<int64_t, Nx * Ny> I64;
+        std::array<uint8_t, Nx * Ny> U8;
+        std::array<uint16_t, Nx * Ny> U16;
+        std::array<uint32_t, Nx * Ny> U32;
+        std::array<uint64_t, Nx * Ny> U64;
+        std::array<float, Nx * Ny> R32;
+        std::array<double, Nx * Ny> R64;
+
+        const adios2::Dims start{0, static_cast<size_t>(mpiRank * Nx)};
+        const adios2::Dims count{Ny, Nx};
+
+        const adios2::Box<adios2::Dims> sel(start, count);
+
+        var_i8->SetSelection(sel);
+        var_i16->SetSelection(sel);
+        var_i32->SetSelection(sel);
+        var_i64->SetSelection(sel);
+
+        var_u8->SetSelection(sel);
+        var_u16->SetSelection(sel);
+        var_u32->SetSelection(sel);
+        var_u64->SetSelection(sel);
+
+        var_r32->SetSelection(sel);
+        var_r64->SetSelection(sel);
+
+        unsigned int t = 0;
+        adios2::StepStatus status = adios2::StepStatus::OK;
+        while (status == adios2::StepStatus::OK)
+        {
+            status = bpReader.BeginStep();
+
+            bpReader.GetDeferred(*var_i8, I8.data());
+            bpReader.GetDeferred(*var_i16, I16.data());
+            bpReader.GetDeferred(*var_i32, I32.data());
+            bpReader.GetDeferred(*var_i64, I64.data());
+
+            bpReader.GetDeferred(*var_u8, U8.data());
+            bpReader.GetDeferred(*var_u16, U16.data());
+            bpReader.GetDeferred(*var_u32, U32.data());
+            bpReader.GetDeferred(*var_u64, U64.data());
+
+            bpReader.GetDeferred(*var_r32, R32.data());
+            bpReader.GetDeferred(*var_r64, R64.data());
+
+            bpReader.EndStep();
+
+            // Generate test data for each rank uniquely
+            UpdateSmallTestData(m_OriginalData, static_cast<int>(t), mpiRank,
+                                mpiSize);
+
+            for (size_t i = 0; i < Nx * Ny; ++i)
+            {
+                std::stringstream ss;
+                ss << "t=" << t << " i=" << i << " rank=" << mpiRank;
+                std::string msg = ss.str();
+
+                EXPECT_EQ(I8[i], m_OriginalData.I8[i]) << msg;
+                EXPECT_EQ(I16[i], m_OriginalData.I16[i]) << msg;
+                EXPECT_EQ(I32[i], m_OriginalData.I32[i]) << msg;
+                EXPECT_EQ(I64[i], m_OriginalData.I64[i]) << msg;
+                EXPECT_EQ(U8[i], m_OriginalData.U8[i]) << msg;
+                EXPECT_EQ(U16[i], m_OriginalData.U16[i]) << msg;
+                EXPECT_EQ(U32[i], m_OriginalData.U32[i]) << msg;
+                EXPECT_EQ(U64[i], m_OriginalData.U64[i]) << msg;
+                EXPECT_EQ(R32[i], m_OriginalData.R32[i]) << msg;
+                EXPECT_EQ(R64[i], m_OriginalData.R64[i]) << msg;
+            }
+            ++t;
+        }
+        EXPECT_EQ(t, NSteps);
+
+        bpReader.Close();
+    }
+}
+
+//******************************************************************************
+// 2D 4x2 test data
+//******************************************************************************
+
+TEST_F(BPWriteReadAsStreamTestADIOS2, ADIOS2BPWriteRead2D4x2MissingPerformGets)
+{
+    // Each process would write a 4x2 array and all processes would
+    // form a 2D 4 * (NumberOfProcess * Nx) matrix where Nx is 2 here
+    const std::string fname(
+        "ADIOS2BPWriteReadAsStream2D4x2MissingPerformGetsTest.bp");
+
+    int mpiRank = 0, mpiSize = 1;
+    // Number of rows
+    const std::size_t Nx = 2;
+    // Number of cols
+    const std::size_t Ny = 4;
+
+    // Number of steps
+    const std::size_t NSteps = 3;
+
+#ifdef ADIOS2_HAVE_MPI
+    MPI_Comm_rank(MPI_COMM_WORLD, &mpiRank);
+    MPI_Comm_size(MPI_COMM_WORLD, &mpiSize);
+#endif
+
+// Write test data using ADIOS2
+
+#ifdef ADIOS2_HAVE_MPI
+    adios2::ADIOS adios(MPI_COMM_WORLD, adios2::DebugON);
+#else
+    adios2::ADIOS adios(true);
+#endif
+    {
+        adios2::IO &io = adios.DeclareIO("TestIO");
+
+        // Declare 2D variables (4 * (NumberOfProcess * Nx))
+        // The local process' part (start, count) can be defined now or later
+        // before Write().
+        {
+            adios2::Dims shape{static_cast<unsigned int>(Ny),
+                               static_cast<unsigned int>(mpiSize * Nx)};
+            adios2::Dims start{static_cast<unsigned int>(0),
+                               static_cast<unsigned int>(mpiRank * Nx)};
+            adios2::Dims count{static_cast<unsigned int>(Ny),
+                               static_cast<unsigned int>(Nx)};
+
+            io.DefineVariable<int8_t>("i8", shape, start, count,
+                                      adios2::ConstantDims,
+                                      m_TestData.I8.data());
+            io.DefineVariable<int16_t>("i16", shape, start, count,
+                                       adios2::ConstantDims,
+                                       m_TestData.I16.data());
+            io.DefineVariable<int32_t>("i32", shape, start, count,
+                                       adios2::ConstantDims,
+                                       m_TestData.I32.data());
+            io.DefineVariable<int64_t>("i64", shape, start, count,
+                                       adios2::ConstantDims,
+                                       m_TestData.I64.data());
+
+            io.DefineVariable<uint8_t>("u8", shape, start, count,
+                                       adios2::ConstantDims,
+                                       m_TestData.U8.data());
+
+            io.DefineVariable<uint16_t>("u16", shape, start, count,
+                                        adios2::ConstantDims,
+                                        m_TestData.U16.data());
+            io.DefineVariable<uint32_t>("u32", shape, start, count,
+                                        adios2::ConstantDims,
+                                        m_TestData.U32.data());
+            io.DefineVariable<uint64_t>("u64", shape, start, count,
+                                        adios2::ConstantDims,
+                                        m_TestData.U64.data());
+
+            io.DefineVariable<float>("r32", shape, start, count,
+                                     adios2::ConstantDims,
+                                     m_TestData.R32.data());
+            io.DefineVariable<double>("r64", shape, start, count,
+                                      adios2::ConstantDims,
+                                      m_TestData.R64.data());
+        }
+
+        // Create the BP Engine
+        io.SetEngine("BPFileWriter");
+
+        io.AddTransport("file");
+
+        adios2::Engine &bpWriter = io.Open(fname, adios2::Mode::Write);
+
+        for (size_t step = 0; step < NSteps; ++step)
+        {
+            // Generate test data for each process uniquely
+            UpdateSmallTestData(m_TestData, static_cast<int>(step), mpiRank,
+                                mpiSize);
+            bpWriter.WriteStep();
+        }
+
+        // Close the file
+        bpWriter.Close();
+    }
+
+    {
+        adios2::IO &io = adios.DeclareIO("ReadIO");
+
+        adios2::Engine &bpReader = io.Open(fname, adios2::Mode::Read);
+
+        auto var_i8 = io.InquireVariable<int8_t>("i8");
+        ASSERT_NE(var_i8, nullptr);
+        ASSERT_EQ(var_i8->m_ShapeID, adios2::ShapeID::GlobalArray);
+        ASSERT_EQ(var_i8->m_AvailableStepsCount, NSteps);
+        ASSERT_EQ(var_i8->m_Shape[0], Ny);
+        ASSERT_EQ(var_i8->m_Shape[1], static_cast<size_t>(mpiSize * Nx));
+
+        auto var_i16 = io.InquireVariable<int16_t>("i16");
+        ASSERT_NE(var_i16, nullptr);
+        ASSERT_EQ(var_i16->m_ShapeID, adios2::ShapeID::GlobalArray);
+        ASSERT_EQ(var_i16->m_AvailableStepsCount, NSteps);
+        ASSERT_EQ(var_i16->m_Shape[0], Ny);
+        ASSERT_EQ(var_i16->m_Shape[1], static_cast<size_t>(mpiSize * Nx));
+
+        auto var_i32 = io.InquireVariable<int32_t>("i32");
+        ASSERT_NE(var_i32, nullptr);
+        ASSERT_EQ(var_i32->m_ShapeID, adios2::ShapeID::GlobalArray);
+        ASSERT_EQ(var_i32->m_AvailableStepsCount, NSteps);
+        ASSERT_EQ(var_i32->m_Shape[0], Ny);
+        ASSERT_EQ(var_i32->m_Shape[1], static_cast<size_t>(mpiSize * Nx));
+
+        auto var_i64 = io.InquireVariable<int64_t>("i64");
+        ASSERT_NE(var_i64, nullptr);
+        ASSERT_EQ(var_i64->m_ShapeID, adios2::ShapeID::GlobalArray);
+        ASSERT_EQ(var_i64->m_AvailableStepsCount, NSteps);
+        ASSERT_EQ(var_i64->m_Shape[0], Ny);
+        ASSERT_EQ(var_i64->m_Shape[1], static_cast<size_t>(mpiSize * Nx));
+
+        auto var_u8 = io.InquireVariable<uint8_t>("u8");
+        ASSERT_NE(var_u8, nullptr);
+        ASSERT_EQ(var_u8->m_ShapeID, adios2::ShapeID::GlobalArray);
+        ASSERT_EQ(var_u8->m_AvailableStepsCount, NSteps);
+        ASSERT_EQ(var_u8->m_Shape[0], Ny);
+        ASSERT_EQ(var_u8->m_Shape[1], static_cast<size_t>(mpiSize * Nx));
+
+        auto var_u16 = io.InquireVariable<uint16_t>("u16");
+        ASSERT_NE(var_u16, nullptr);
+        ASSERT_EQ(var_u16->m_ShapeID, adios2::ShapeID::GlobalArray);
+        ASSERT_EQ(var_u16->m_AvailableStepsCount, NSteps);
+        ASSERT_EQ(var_u16->m_Shape[0], Ny);
+        ASSERT_EQ(var_u16->m_Shape[1], static_cast<size_t>(mpiSize * Nx));
+
+        auto var_u32 = io.InquireVariable<uint32_t>("u32");
+        ASSERT_NE(var_u32, nullptr);
+        ASSERT_EQ(var_u32->m_ShapeID, adios2::ShapeID::GlobalArray);
+        ASSERT_EQ(var_u32->m_AvailableStepsCount, NSteps);
+        ASSERT_EQ(var_u32->m_Shape[0], Ny);
+        ASSERT_EQ(var_u32->m_Shape[1], static_cast<size_t>(mpiSize * Nx));
+
+        auto var_u64 = io.InquireVariable<uint64_t>("u64");
+        ASSERT_NE(var_u64, nullptr);
+        ASSERT_EQ(var_u64->m_ShapeID, adios2::ShapeID::GlobalArray);
+        ASSERT_EQ(var_u64->m_AvailableStepsCount, NSteps);
+        ASSERT_EQ(var_u64->m_Shape[0], Ny);
+        ASSERT_EQ(var_u64->m_Shape[1], static_cast<size_t>(mpiSize * Nx));
+
+        auto var_r32 = io.InquireVariable<float>("r32");
+        ASSERT_NE(var_r32, nullptr);
+        ASSERT_EQ(var_r32->m_ShapeID, adios2::ShapeID::GlobalArray);
+        ASSERT_EQ(var_r32->m_AvailableStepsCount, NSteps);
+        ASSERT_EQ(var_r32->m_Shape[0], Ny);
+        ASSERT_EQ(var_r32->m_Shape[1], static_cast<size_t>(mpiSize * Nx));
+
+        auto var_r64 = io.InquireVariable<double>("r64");
+        ASSERT_NE(var_r64, nullptr);
+        ASSERT_EQ(var_r64->m_ShapeID, adios2::ShapeID::GlobalArray);
+        ASSERT_EQ(var_r64->m_AvailableStepsCount, NSteps);
+        ASSERT_EQ(var_r64->m_Shape[0], Ny);
+        ASSERT_EQ(var_r64->m_Shape[1], static_cast<size_t>(mpiSize * Nx));
+
+        // If the size of the array is smaller than the data
+        // the result is weird... double and uint64_t would get
+        // completely garbage data
+        std::array<int8_t, Nx * Ny> I8;
+        std::array<int16_t, Nx * Ny> I16;
+        std::array<int32_t, Nx * Ny> I32;
+        std::array<int64_t, Nx * Ny> I64;
+        std::array<uint8_t, Nx * Ny> U8;
+        std::array<uint16_t, Nx * Ny> U16;
+        std::array<uint32_t, Nx * Ny> U32;
+        std::array<uint64_t, Nx * Ny> U64;
+        std::array<float, Nx * Ny> R32;
+        std::array<double, Nx * Ny> R64;
+
+        const adios2::Dims start{0, static_cast<size_t>(mpiRank * Nx)};
+        const adios2::Dims count{Ny, Nx};
+
+        const adios2::Box<adios2::Dims> sel(start, count);
+
+        var_i8->SetSelection(sel);
+        var_i16->SetSelection(sel);
+        var_i32->SetSelection(sel);
+        var_i64->SetSelection(sel);
+
+        var_u8->SetSelection(sel);
+        var_u16->SetSelection(sel);
+        var_u32->SetSelection(sel);
+        var_u64->SetSelection(sel);
+
+        var_r32->SetSelection(sel);
+        var_r64->SetSelection(sel);
+
+        unsigned int t = 0;
+        adios2::StepStatus status = adios2::StepStatus::OK;
+        while (status == adios2::StepStatus::OK)
+        {
+            status = bpReader.BeginStep();
+
+            bpReader.GetDeferred(*var_i8, I8.data());
+            bpReader.GetDeferred(*var_i16, I16.data());
+            bpReader.GetDeferred(*var_i32, I32.data());
+            bpReader.GetDeferred(*var_i64, I64.data());
+
+            bpReader.GetDeferred(*var_u8, U8.data());
+            bpReader.GetDeferred(*var_u16, U16.data());
+            bpReader.GetDeferred(*var_u32, U32.data());
+            bpReader.GetDeferred(*var_u64, U64.data());
+
+            bpReader.GetDeferred(*var_r32, R32.data());
+            bpReader.GetDeferred(*var_r64, R64.data());
+
             bpReader.EndStep();
 
             // Generate test data for each rank uniquely
@@ -759,7 +1516,9 @@ TEST_F(BPWriteReadAsStreamTestADIOS2, ADIOS2BPWriteRead2D4x2)
                 EXPECT_EQ(R32[i], m_OriginalData.R32[i]) << msg;
                 EXPECT_EQ(R64[i], m_OriginalData.R64[i]) << msg;
             }
+            ++t;
         }
+        EXPECT_EQ(t, NSteps);
         bpReader.Close();
     }
 }