From fb938370b5c3db76e69bc74f1aaaa40cda4f11b3 Mon Sep 17 00:00:00 2001
From: Chuck Atkins <chuck.atkins@kitware.com>
Date: Fri, 23 Jun 2017 09:23:39 -0400
Subject: [PATCH] XML: Replace custom parser with pugixml parser

---
 .../runtimeconfig/hello/helloBPWriter.xml     |  28 +-
 source/adios2/CMakeLists.txt                  |   2 +-
 source/adios2/helper/adiosXML.cpp             | 490 ++++--------------
 source/adios2/helper/adiosXML.h               |  29 --
 4 files changed, 108 insertions(+), 441 deletions(-)

diff --git a/examples/experimental/runtimeconfig/hello/helloBPWriter.xml b/examples/experimental/runtimeconfig/hello/helloBPWriter.xml
index bda56e4c1..27a01d5db 100644
--- a/examples/experimental/runtimeconfig/hello/helloBPWriter.xml
+++ b/examples/experimental/runtimeconfig/hello/helloBPWriter.xml
@@ -1,20 +1,26 @@
 <?xml version="1.0"?>
 <adios-config>
-
     <io name="BPFile_N2N">
         <engine type="BPFileWriter">
-            Threads=1; <!-- for vectorized memory operations and asynchronous tasks --> 
-            ProfileUnits=Microseconds; <!-- Microseconds (default), Milliseconds, Seconds, Minutes, Hours -->
-            MaxBufferSize=20Mb;  <!-- XXKb, XXMb, or XXXGb supported, 16Mb (default should depend on system) -->
-            InitialBufferSize=1Mb; <!-- XXKb, XXMb, or XXXGb supported, 16Kb (default should depend on system) -->
-            BufferGrowthFactor=2;  <!-- exponential growth factor > 1,  1.5 (default, e.g. STL default=2), for this case: 1, 2, 4, 8, 16, 20 Mb-->
+            <!-- for vectorized memory operations and asynchronous tasks --> 
+            <parameter key="Threads" value="1"/>
+
+            <!-- Microseconds (default), Milliseconds, Seconds, Minutes, Hours -->
+            <parameter key="ProfileUnits" value="Microseconds"/>
+
+            <!-- XXKb, XXMb, or XXXGb supported, 16Mb (default should depend on system) -->
+            <parameter key="MaxBufferSize" value="20Mb"/>
+
+            <!-- XXKb, XXMb, or XXXGb supported, 16Kb (default should depend on system) -->
+            <parameter key="InitialBufferSize" value="1Mb"/>
+
+            <!-- exponential growth factor > 1,  1.5 (default, e.g. STL default=2), for this case: 1, 2, 4, 8, 16, 20 Mb-->
+            <parameter key="BufferGrowthFactor" value="2"/>
         </engine>
 
         <transport type="File">
-            Library=POSIX;
-            ProfileUnits=Milliseconds;
+            <parameter key="Library" value="POSIX"/>
+            <parameter key="ProfileUnits" value="Milliseconds"/>
         </transport>
-
     </io>
-
-</adios-config>
\ No newline at end of file
+</adios-config>
diff --git a/source/adios2/CMakeLists.txt b/source/adios2/CMakeLists.txt
index 651b7bc59..ce26aa233 100644
--- a/source/adios2/CMakeLists.txt
+++ b/source/adios2/CMakeLists.txt
@@ -48,7 +48,7 @@ target_include_directories(adios2
     $<INSTALL_INTERFACE:include>
   PRIVATE ${ADIOS2_SOURCE_DIR}/source
 )
-target_link_libraries(adios2 PRIVATE adios2sys)
+target_link_libraries(adios2 PRIVATE adios2sys pugixml)
 
 find_package(Threads REQUIRED)
 target_link_libraries(adios2 PUBLIC ${CMAKE_THREAD_LIBS_INIT})
diff --git a/source/adios2/helper/adiosXML.cpp b/source/adios2/helper/adiosXML.cpp
index 79f1c48a3..0045c4de3 100644
--- a/source/adios2/helper/adiosXML.cpp
+++ b/source/adios2/helper/adiosXML.cpp
@@ -11,473 +11,163 @@
 #include "adiosXML.h"
 
 /// \cond EXCLUDE_FROM_DOXYGEN
-#include <sstream>
 #include <stdexcept> //std::invalid_argument
 /// \endcond
 
+#include "adios2/ADIOSMPI.h"
 #include "adios2/ADIOSTypes.h"
 #include "adios2/helper/adiosString.h"
 
+#include <pugixml.hpp>
+
 namespace adios2
 {
 
-void RemoveCommentsXML(std::string &currentContent) noexcept
+Params InitParametersXML(pugi::xml_node node, bool debugMode)
 {
-    std::string::size_type startComment(currentContent.find("<!--"));
-
-    while (startComment != currentContent.npos)
+    Params params;
+    for (pugi::xml_node paramNode : node.children("parameter"))
     {
-        std::string::size_type endComment(currentContent.find("-->"));
-        currentContent.erase(startComment, endComment - startComment + 3);
-        startComment = currentContent.find("<!--");
-    }
-}
-
-TagXML GetTagXML(const std::string tagName, const std::string &content,
-                 std::string::size_type &position)
-{
-    auto lf_SetPositions =
-        [](const std::string input, const std::string &content,
-           std::string::size_type &position) -> std::string::size_type {
-
-        const std::string::size_type inputPosition =
-            GetStringPositionXML(input, content, position);
-
-        if (inputPosition != std::string::npos)
-        {
-            position = inputPosition + input.size();
-        }
-        return inputPosition;
-    };
-
-    TagXML tagXML;
-
-    std::string name(tagName);
-    if (name.back() == ' ')
-    {
-        name.pop_back();
-    }
-
-    auto openingStart = lf_SetPositions("<" + name, content, position);
-    auto openingEnd = lf_SetPositions(">", content, position);
-
-    if (openingStart == std::string::npos || openingEnd == std::string::npos)
-    {
-        tagXML.IsFull = false;
-        return tagXML;
-    }
-
-    tagXML.Header = content.substr(openingStart, openingEnd + 1 - openingStart);
-
-    auto closingStart =
-        GetStringPositionXML("</" + name + ">", content, position);
-
-    if (closingStart == std::string::npos)
-    {
-        throw std::invalid_argument(
-            "ERROR: could not find closing tag </" + name +
-            "> in XML config file, in call to ADIOS constructor\n");
-    }
-    tagXML.IsFull = true;
-    tagXML.Elements =
-        content.substr(openingEnd + 1, closingStart - (openingEnd + 1));
-
-    //    std::cout << "START..." << tagXML.Header << "...";
-    //    std::cout << tagXML.Elements << "...END\n";
-
-    return tagXML;
-}
-
-std::string::size_type
-GetStringPositionXML(const std::string input, const std::string &content,
-                     const std::string::size_type &startPosition) noexcept
-{
-    std::string::size_type foundPosition(content.find(input, startPosition));
-    if (foundPosition == content.npos)
-    {
-        return foundPosition;
-    }
-
-    // check if it is not inside " " or ' '
-    std::string::size_type currentPosition(startPosition);
-
-    while (foundPosition != content.npos)
-    {
-        const std::string::size_type singleQuotePosition(
-            content.find('\'', currentPosition));
-        const std::string::size_type doubleQuotePosition(
-            content.find('\"', currentPosition));
-
-        if ((singleQuotePosition == content.npos &&
-             doubleQuotePosition == content.npos) ||
-            (singleQuotePosition == content.npos &&
-             foundPosition < doubleQuotePosition) ||
-            (doubleQuotePosition == content.npos &&
-             foundPosition < singleQuotePosition) ||
-            (foundPosition < singleQuotePosition &&
-             foundPosition < doubleQuotePosition))
-        {
-            break;
-        }
-        // find the closing corresponding quote
-        std::string::size_type closingQuotePosition;
-
-        if (singleQuotePosition != content.npos &&
-            doubleQuotePosition == content.npos)
-        {
-            currentPosition = singleQuotePosition;
-            closingQuotePosition = content.find('\'', currentPosition + 1);
-        }
-        else if (singleQuotePosition == content.npos &&
-                 doubleQuotePosition != content.npos)
-        {
-            currentPosition = doubleQuotePosition;
-            closingQuotePosition = content.find('\"', currentPosition + 1);
-        }
-        else
+        pugi::xml_attribute attrKey = paramNode.attribute("key");
+        if (!attrKey)
         {
-            if (singleQuotePosition < doubleQuotePosition)
-            {
-                currentPosition = singleQuotePosition;
-                closingQuotePosition = content.find('\'', currentPosition + 1);
-            }
-            else
+            if (debugMode)
             {
-                currentPosition = doubleQuotePosition;
-                closingQuotePosition = content.find('\"', currentPosition + 1);
+                throw std::invalid_argument("ERROR: XML: No \"key\" attribute "
+                                            "found on <parameter> element.");
             }
-        }
-        // if can't find closing it's open until the end
-        if (closingQuotePosition == content.npos)
-        {
-            currentPosition == content.npos;
-            break;
-        }
-
-        currentPosition = closingQuotePosition + 1;
-
-        if (closingQuotePosition < foundPosition)
-        {
             continue;
         }
-        else
-        {
-            // if this point is reached it means it's a value inside " " or ' ',
-            // iterate
-            foundPosition = content.find(input, currentPosition);
-            currentPosition = foundPosition;
-        }
-    }
-
-    return foundPosition;
-}
-
-Params GetTagAttributesXML(const std::string &tagHeader)
-{
-    auto lf_GetQuotedValue = [](const char quote,
-                                const std::string::size_type &quotePosition,
-                                std::string &currentTag) -> std::string {
 
-        currentTag = currentTag.substr(quotePosition + 1);
-        auto nextQuotePosition = currentTag.find(quote);
-
-        if (nextQuotePosition == currentTag.npos)
+        pugi::xml_attribute attrValue = paramNode.attribute("value");
+        if (!attrValue)
         {
-            throw std::invalid_argument(
-                "ERROR: Invalid attribute in..." + currentTag +
-                "...check XML file, in call to ADIOS constructor\n");
-        }
-
-        const std::string value(currentTag.substr(0, nextQuotePosition));
-        currentTag = currentTag.substr(nextQuotePosition + 1);
-        return value;
-    };
-
-    auto lf_GetAttributes = [&](const std::string &tag) -> Params {
-        Params attributes;
-        std::string currentTag(tag.substr(tag.find_first_of(" \t\n")));
-        std::string::size_type currentPosition(0);
-
-        while (currentTag.find('=', currentPosition) !=
-               currentTag.npos) // equalPosition
-        {
-            currentTag = currentTag.substr(
-                currentTag.find_first_not_of(" \t\n", currentPosition));
-            auto equalPosition = currentTag.find('=');
-            if (currentTag.size() <= equalPosition + 1)
-            {
-                throw std::invalid_argument(
-                    "ERROR: tag " + tag +
-                    " is incomplete, check XML config file, "
-                    "in call to ADIOS constructor\n");
-            }
-
-            std::string key(currentTag.substr(0, equalPosition));
-            key.erase(key.find_last_not_of(" \t\n") + 1);
-
-            std::string value;
-
-            auto quotePosition =
-                currentTag.find_first_not_of(" \t\n", equalPosition + 1);
-
-            const char quote = currentTag.at(quotePosition);
-            if (quote == '\'' || quote == '"')
-            {
-                value = lf_GetQuotedValue(quote, quotePosition, currentTag);
-            }
-            else
+            if (debugMode)
             {
-                throw std::invalid_argument(
-                    "ERROR: quote must be \" or ' in XML config tag " + tag +
-                    ", in call to ADIOS constructor");
+                throw std::invalid_argument("ERROR: XML: No \"value\" "
+                                            "attribute found on <parameter> "
+                                            "element.");
             }
-
-            attributes.emplace(key, value);
-            currentPosition = quotePosition + value.size() + 1;
-        }
-        return attributes;
-    };
-
-    // BODY of function starts here
-    Params attributes;
-    // eliminate < >
-    std::string openingTag = tagHeader.substr(1, tagHeader.size() - 2);
-
-    if (tagHeader.back() == '/') // last char is / --> "XML empty tag"
-    {
-        // attributes = lf_GetAttributes(openingTag);
-        // throw exception here, ADIOS2 doesn't allow XML empty tags
-    }
-    else if (tagHeader[0] == '/') // first char is / ---> closing tag
-    {
-        attributes = lf_GetAttributes(openingTag);
-        if (attributes.size() > 0)
-        {
-            throw std::invalid_argument(
-                "ERROR: closing tag " + tagHeader +
-                " can't have attributes, in call to ADIOS constructor\n");
-        }
-    }
-    else // opening tag
-    {
-        attributes = lf_GetAttributes(openingTag);
-    }
-    return attributes;
-}
-
-void InitXML(const std::string configXML, const MPI_Comm mpiComm,
-             const bool debugMode,
-             std::vector<std::shared_ptr<Transform>> &transforms,
-             std::map<std::string, IO> &ios)
-{
-    // independent IO
-    std::string fileContents(FileToString(configXML));
-    if (fileContents.empty())
-    {
-        // issue a warning?
-        return;
-    }
-
-    RemoveCommentsXML(fileContents);
-
-    // adios-config
-    std::string::size_type position(0);
-    const TagXML adiosConfigXML(
-        GetTagXML("adios-config", fileContents, position));
-
-    // process transforms, not yet implemented
-
-    while (position != std::string::npos)
-    {
-        const TagXML transformXML(
-            GetTagXML("transform ", adiosConfigXML.Elements, position));
-
-        if (transformXML.Header.empty())
-        {
-            break;
+            continue;
         }
-        // InitTransform(transformTag, debugMode, transforms);
-    }
-
-    position = 0;
-    // process IOs
-    while (position != std::string::npos)
-    {
-        // io
-        const TagXML ioXML(GetTagXML("io ", adiosConfigXML.Elements, position));
 
-        if (ioXML.Header.empty()) // no more groups to find
-        {
-            break;
-        }
-        InitIOXML(ioXML, mpiComm, debugMode, transforms, ios);
+        params.emplace(attrKey.value(), attrValue.value());
     }
+    return params;
 }
 
-void InitIOXML(const TagXML &ioXML, const MPI_Comm mpiComm,
+void InitIOXML(const pugi::xml_node ioNode, const MPI_Comm mpiComm,
                const bool debugMode,
                std::vector<std::shared_ptr<Transform>> &transforms,
                std::map<std::string, IO> &ios)
 {
-    const Params ioAttributes(GetTagAttributesXML(ioXML.Header));
-
-    std::string ioName;
-    for (const auto &ioAttribute : ioAttributes)
-    {
-        if (ioAttribute.first == "name")
-        {
-            ioName = ioAttribute.second;
-        }
-    }
-
-    if (debugMode)
+    // Extract <io name=""> attribute
+    pugi::xml_attribute nameAttr = ioNode.attribute("name");
+    if (!nameAttr)
     {
-        if (ioName.empty())
+        if (debugMode)
         {
             throw std::invalid_argument(
-                "ERROR: io name=\"value\" attribute not found in opening XML "
-                "tag " +
-                ioXML.Header +
-                ", check XML config file, in call to ADIOS constructor\n");
-        }
-
-        if (ios.count(ioName) == 1) // io exists
-        {
-            throw std::invalid_argument("ERROR: io name " + ioName +
-                                        " must be unique in XML config file, "
-                                        "in call to ADIOS constructor\n");
+                "ERROR: XML: No \"name\" attribute found on <io> element.");
         }
+        return;
     }
+    std::string ioName = nameAttr.value();
 
-    // emplace io with inConfigFile argument as true
-    auto itIO = ios.emplace(ioName, IO(ioName, mpiComm, true, debugMode));
-
-    // process engine
-    std::string::size_type position(0);
-
-    TagXML engineXML(GetTagXML("engine ", ioXML.Elements, position));
-    if (!engineXML.Header.empty()) // found first one
-    {
-        InitEngineXML(engineXML, debugMode, itIO.first->second);
-    }
+    // Build the IO object
+    auto ioIt = ios.emplace(ioName, IO(ioName, mpiComm, true, debugMode));
+    IO &io = ioIt.first->second;
 
-    if (debugMode)
+    // Extract <engine> element
+    pugi::xml_node engineNode = ioNode.child("engine");
+    if (!engineNode)
     {
-        // try finding a 2nd one from current position
-        TagXML engineXML(GetTagXML("engine ", ioXML.Elements, position));
-        if (!engineXML.Header.empty()) // found first one
-        {
-            throw std::invalid_argument(
-                "ERROR: more than one engine found in <io name=" + ioName +
-                "...>, only one per io tag is allowed in XML "
-                "config file, in call to "
-                "ADIOS constructor\n");
-        }
+        throw std::invalid_argument(
+            "ERROR: XML: No <engine> element found in <io> element.");
     }
-
-    position = 0;
-    // process transports
-    while (position != std::string::npos)
+    pugi::xml_attribute engineTypeAttr = engineNode.attribute("type");
+    if (!engineTypeAttr)
     {
-        TagXML transportXML(GetTagXML("transport", ioXML.Elements, position));
-
-        if (transportXML.Header.empty()) // no more groups to find
-        {
-            break;
-        }
-        InitTransportXML(transportXML, debugMode, itIO.first->second);
+        throw std::invalid_argument(
+            "ERROR: XML: No \"type\" attribute found on <engine> element.");
     }
-}
+    io.SetEngine(engineTypeAttr.value());
 
-void InitEngineXML(const TagXML &engineXML, const bool debugMode, IO &io)
-{
-    const Params attributes = GetTagAttributesXML(engineXML.Header);
+    // Process <engine> parameters
+    io.SetParameters(InitParametersXML(engineNode, debugMode));
 
-    std::string type;
-    for (const auto &attribute : attributes)
+    // Extract and process <transport> elements
+    for (pugi::xml_node transportNode : ioNode.children("transport"))
     {
-        if (attribute.first == "type")
+        pugi::xml_attribute typeAttr = transportNode.attribute("type");
+        if (!typeAttr)
         {
-            type = attribute.second;
-            break;
+            if (debugMode)
+            {
+                throw std::invalid_argument("ERROR: XML: No \"type\" attribute "
+                                            "found on <transport> element.");
+            }
+            continue;
         }
+        io.AddTransport(typeAttr.value(),
+                        InitParametersXML(transportNode, debugMode));
     }
-
-    if (!type.empty())
-    {
-        io.SetEngine(type);
-    }
-
-    io.SetParameters(ParseParamsXML(engineXML.Elements, debugMode));
 }
 
-void InitTransportXML(const TagXML &transportXML, const bool debugMode, IO &io)
+void InitXML(const std::string configXML, const MPI_Comm mpiComm,
+             const bool debugMode,
+             std::vector<std::shared_ptr<Transform>> &transforms,
+             std::map<std::string, IO> &ios)
 {
-    const Params attributes = GetTagAttributesXML(transportXML.Header);
+    int mpiRank;
+    MPI_Comm_rank(mpiComm, &mpiRank);
+    std::string fileContents;
+    unsigned long long len;
 
-    std::string type;
-    for (const auto &attribute : attributes)
+    // Read the file on rank 0 and broadcast it to everybody else
+    if (mpiRank == 0)
     {
-        if (attribute.first == "type")
-        {
-            type = attribute.second;
-            break;
-        }
+        fileContents = FileToString(configXML);
+        len = static_cast<unsigned long long>(fileContents.size());
     }
-
-    if (type.empty())
+    MPI_Bcast(&len, 1, MPI_UNSIGNED_LONG, 0, mpiComm);
+    if (mpiRank != 0)
     {
-        throw std::invalid_argument(
-            "ERROR: missing transport type in " + transportXML.Header +
-            ", in XML config file, in call to ADIOS constructor\n");
+        fileContents.resize(len);
     }
+    MPI_Bcast(const_cast<char *>(fileContents.data()), len, MPI_CHAR, 0,
+              mpiComm);
 
-    io.AddTransport(type, ParseParamsXML(transportXML.Elements, debugMode));
-}
-
-Params ParseParamsXML(const std::string &tagElements, const bool debugMode)
-{
-    auto start = tagElements.find_first_not_of(" \t\n");
-    auto end = tagElements.find_last_not_of(" \t\n");
-
-    std::string parametersString(tagElements.substr(start, end - start + 1));
-    if (debugMode)
+    pugi::xml_document doc;
+    auto parse_result = doc.load_buffer_inplace(
+        const_cast<char *>(fileContents.data()), fileContents.size());
+    if (!parse_result)
     {
-        if (parametersString.back() != ';')
+        if (debugMode)
         {
             throw std::invalid_argument(
-                "ERROR: parameters in config XML file must end with a ; " +
-                tagElements + ", in call to ADIOS constructor\n");
+                std::string("ERROR: XML: Parse error: ") +
+                parse_result.description());
         }
+        return;
     }
 
-    std::istringstream parametersSS(parametersString);
-    std::string pair;
-
-    Params parameters;
-
-    while (std::getline(parametersSS, pair, ';'))
+    pugi::xml_node configNode = doc.child("adios-config");
+    if (!configNode)
     {
-        pair = pair.substr(pair.find_first_not_of(" \t\n"));
-        auto equalPosition = pair.find("=");
-
         if (debugMode)
         {
-            if (equalPosition == std::string::npos ||
-                equalPosition == pair.size())
-            {
-                throw std::invalid_argument("ERROR: wrong parameter " + pair +
-                                            " format is "
-                                            "key=value in XML config file, in "
-                                            "call to ADIOS constructor\n");
-            }
+            throw std::invalid_argument(
+                "ERROR: XML: No <adios-config> element found");
         }
+        return;
+    }
 
-        const std::string key(pair.substr(0, equalPosition));
-        const std::string value(pair.substr(equalPosition + 1));
-        parameters.emplace(key, value);
+    ios.clear();
+    for (pugi::xml_node ioNode : configNode.children("io"))
+    {
+        InitIOXML(ioNode, mpiComm, debugMode, transforms, ios);
     }
-    return parameters;
 }
 
 } // end namespace adios
diff --git a/source/adios2/helper/adiosXML.h b/source/adios2/helper/adiosXML.h
index 33c7d3880..055c14605 100644
--- a/source/adios2/helper/adiosXML.h
+++ b/source/adios2/helper/adiosXML.h
@@ -25,24 +25,6 @@
 namespace adios2
 {
 
-struct TagXML
-{
-    std::string Header;
-    std::string Elements;
-    bool IsFull;
-};
-
-void RemoveCommentsXML(std::string &currentContent) noexcept;
-
-TagXML GetTagXML(const std::string tagName, const std::string &content,
-                 std::string::size_type &position);
-
-std::string::size_type
-GetStringPositionXML(const std::string input, const std::string &content,
-                     const std::string::size_type &startPosition) noexcept;
-
-Params GetTagAttributesXML(const std::string &tagHeader);
-
 /**
  * Called inside the ADIOS XML constructors to get contents from file,
  * broadcast and fill transforms and ios
@@ -56,17 +38,6 @@ void InitXML(const std::string configXML, const MPI_Comm mpiComm,
              const bool debugMode,
              std::vector<std::shared_ptr<Transform>> &transforms,
              std::map<std::string, IO> &ios);
-
-void InitIOXML(const TagXML &ioXML, const MPI_Comm mpiComm,
-               const bool debugMode,
-               std::vector<std::shared_ptr<Transform>> &transforms,
-               std::map<std::string, IO> &ios);
-
-void InitEngineXML(const TagXML &engineXML, const bool debugMode, IO &io);
-
-void InitTransportXML(const TagXML &transportXML, const bool debugMode, IO &io);
-
-Params ParseParamsXML(const std::string &tagContents, const bool debugMode);
 }
 
 #endif /* ADIOS2_HELPER_ADIOSXML_H_ */
-- 
GitLab