From bb9b74ab91a651455573bee0e8ed1c1d00266e79 Mon Sep 17 00:00:00 2001
From: William F Godoy <williamfgc@yahoo.com>
Date: Mon, 23 Mar 2020 16:23:54 -0400
Subject: [PATCH] Added Mantid::NeXus::HDF5Descriptor class

Populates Metadata entries in constructor
Internal reference accessible in public function
Added unit test
Address CI issue with POSIX strdup function
---
 Framework/Nexus/CMakeLists.txt                |  13 +-
 .../inc/MantidNexus/NexusHDF5Descriptor.h     |  75 ++++++
 Framework/Nexus/src/NexusHDF5Descriptor.cpp   | 220 ++++++++++++++++++
 .../Nexus/test/NexusHDF5DescriptorTest.h      |  58 +++++
 4 files changed, 362 insertions(+), 4 deletions(-)
 create mode 100644 Framework/Nexus/inc/MantidNexus/NexusHDF5Descriptor.h
 create mode 100644 Framework/Nexus/src/NexusHDF5Descriptor.cpp
 create mode 100644 Framework/Nexus/test/NexusHDF5DescriptorTest.h

diff --git a/Framework/Nexus/CMakeLists.txt b/Framework/Nexus/CMakeLists.txt
index 0a00f9f80e7..e481148ce38 100644
--- a/Framework/Nexus/CMakeLists.txt
+++ b/Framework/Nexus/CMakeLists.txt
@@ -1,12 +1,13 @@
-set(SRC_FILES src/MuonNexusReader.cpp src/NexusClasses.cpp src/NexusFileIO.cpp)
+set(SRC_FILES src/MuonNexusReader.cpp src/NexusClasses.cpp src/NexusFileIO.cpp src/NexusHDF5Descriptor.cpp)
 
 set(INC_FILES
     inc/MantidNexus/MuonNexusReader.h
     inc/MantidNexus/NexusClasses.h
     inc/MantidNexus/NexusFileIO.h
-    inc/MantidNexus/NexusIOHelper.h)
+    inc/MantidNexus/NexusIOHelper.h
+    inc/MantidNexus/NexusHDF5Descriptor.h)
 
-set(TEST_FILES NexusIOHelperTest.h)
+set(TEST_FILES NexusIOHelperTest.h NexusHDF5DescriptorTest.h)
 
 if(COVERALLS)
   foreach(loop_var ${SRC_FILES} ${INC_FILES})
@@ -38,12 +39,16 @@ set_property(TARGET Nexus PROPERTY FOLDER "MantidFramework")
 
 include_directories(inc)
 
+target_include_directories(Nexus
+                           PRIVATE ${HDF5_INCLUDE_DIRS})
+                                   
 target_link_libraries(Nexus
                       LINK_PRIVATE
                       ${TCMALLOC_LIBRARIES_LINKTIME}
                       ${MANTIDLIBS}
                       ${NEXUS_C_LIBRARIES}
-                      ${NEXUS_LIBRARIES})
+                      ${NEXUS_LIBRARIES}
+                      ${HDF5_LIBRARIES})
 
 # Add the unit tests directory
 add_subdirectory(test)
diff --git a/Framework/Nexus/inc/MantidNexus/NexusHDF5Descriptor.h b/Framework/Nexus/inc/MantidNexus/NexusHDF5Descriptor.h
new file mode 100644
index 00000000000..d41db1717c8
--- /dev/null
+++ b/Framework/Nexus/inc/MantidNexus/NexusHDF5Descriptor.h
@@ -0,0 +1,75 @@
+// Mantid Repository : https://github.com/mantidproject/mantid
+//
+// Copyright &copy; 2007 ISIS Rutherford Appleton Laboratory UKRI,
+//   NScD Oak Ridge National Laboratory, European Spallation Source,
+//   Institut Laue - Langevin & CSNS, Institute of High Energy Physics, CAS
+// SPDX - License - Identifier: GPL - 3.0 +
+#pragma once
+
+#include "MantidKernel/System.h"
+
+#include <map>
+#include <set>
+#include <string>
+
+namespace Mantid {
+namespace NeXus {
+
+class DLLExport NexusHDF5Descriptor {
+
+public:
+  /**
+   * Unique constructor
+   * @param filename input HDF5 Nexus file name
+   */
+  NexusHDF5Descriptor(const std::string &filename);
+
+  NexusHDF5Descriptor() = delete;
+
+  /**
+   * Using RAII components, no need to deallocate explicitly
+   */
+  ~NexusHDF5Descriptor() = default;
+
+  /**
+   * Returns a copy of the current file name
+   * @return
+   */
+  std::string getFilename() const noexcept;
+
+  /**
+   * Returns a const reference of the internal map holding all entries in the
+   * NeXus HDF5 file
+   * @return map holding all entries by group class
+   * <pre>
+   *   key: group_class (e.g. NXentry, NXlog)
+   *   value: set with absolute entry names for the group_class key
+   *          (e.g. /entry/log)
+   * </pre>
+   */
+  const std::map<std::string, std::set<std::string>> &getAllEntries() const
+      noexcept;
+
+private:
+  /**
+   * Sets m_allEntries, called in HDF5 constructor.
+   * m_filename must be set
+   */
+  std::map<std::string, std::set<std::string>> initAllEntries();
+
+  /** NeXus HDF5 file name */
+  std::string m_filename;
+
+  /**
+   * All entries metadata
+   * <pre>
+   *   key: group_class (e.g. NXentry, NXlog)
+   *   value: set with absolute entry names for the group_class key
+   *          (e.g. /entry/log)
+   * </pre>
+   */
+  std::map<std::string, std::set<std::string>> m_allEntries;
+};
+
+} // namespace NeXus
+} // namespace Mantid
diff --git a/Framework/Nexus/src/NexusHDF5Descriptor.cpp b/Framework/Nexus/src/NexusHDF5Descriptor.cpp
new file mode 100644
index 00000000000..39e0bbe6b12
--- /dev/null
+++ b/Framework/Nexus/src/NexusHDF5Descriptor.cpp
@@ -0,0 +1,220 @@
+// Mantid Repository : https://github.com/mantidproject/mantid
+//
+// Copyright &copy; 2007 ISIS Rutherford Appleton Laboratory UKRI,
+//   NScD Oak Ridge National Laboratory, European Spallation Source,
+//   Institut Laue - Langevin & CSNS, Institute of High Energy Physics, CAS
+// SPDX - License - Identifier: GPL - 3.0 +
+
+#include "MantidNexus/NexusHDF5Descriptor.h"
+
+#include <hdf5.h>
+
+#include <cstdlib>   // malloc, calloc
+#include <cstring>   // strcpy
+#include <stdexcept> // std::invalid_argument
+
+namespace Mantid {
+namespace NeXus {
+
+/// hdf5 specific functions, stay in anonymous namespace to make hdf5 linking
+/// PRIVATE
+namespace {
+
+/**
+ * populate a string attribute from HDF5 attribute handler
+ * @param attr input HDF5 atttribute handler
+ * @param data
+ * @return
+ */
+herr_t readStringAttribute(hid_t attr, char **data) {
+  herr_t iRet = 0;
+  hid_t atype = -1;
+  hid_t space;
+  int ndims;
+  hsize_t thedims[H5S_MAX_RANK], sdim;
+
+  atype = H5Aget_type(attr);
+  sdim = H5Tget_size(atype);
+  space = H5Aget_space(attr);
+  ndims = H5Sget_simple_extent_dims(space, thedims, NULL);
+
+  if (ndims == 0) {
+    if (H5Tis_variable_str(atype)) {
+      hid_t btype = H5Tget_native_type(atype, H5T_DIR_ASCEND);
+      iRet = H5Aread(attr, btype, data);
+      H5Tclose(btype);
+    } else {
+      *data = (char *)malloc(sdim + 1);
+      iRet = H5Aread(attr, atype, *data);
+      (*data)[sdim] = '\0';
+    }
+  } else if (ndims == 1) {
+    unsigned int i;
+    char **strings;
+
+    strings = (char **)malloc(thedims[0] * sizeof(char *));
+
+    if (!H5Tis_variable_str(atype)) {
+      strings[0] = (char *)malloc(thedims[0] * sdim * sizeof(char));
+      for (i = 1; i < thedims[0]; i++) {
+        strings[i] = strings[0] + i * sdim;
+      }
+    }
+
+    iRet = H5Aread(attr, atype, strings[0]);
+    *data = (char *)calloc((sdim + 2) * thedims[0], sizeof(char));
+    for (i = 0; i < thedims[0]; i++) {
+      if (i == 0) {
+        strncpy(*data, strings[i], sdim);
+      } else {
+        strcat(*data, ", ");
+        strncat(*data, strings[i], sdim);
+      }
+    }
+    if (H5Tis_variable_str(atype)) {
+      H5Dvlen_reclaim(atype, space, H5P_DEFAULT, strings);
+    } else {
+      free(strings[0]);
+    }
+
+    free(strings);
+  } else {
+    *data = (char *)malloc(33);
+    strcpy(*data, " higher dimensional string array");
+    *data[32] = '\0';
+  }
+
+  H5Tclose(atype);
+  H5Sclose(space);
+  if (iRet < 0)
+    return -1;
+  return 0;
+}
+
+/**
+ * Reads a string attribute of N-dimensions
+ * @param attr input HDF5 attribute handler
+ * @param data output attribute data
+ * @param maxlen
+ * @return
+ */
+herr_t readStringAttributeN(hid_t attr, char *data, int maxlen) {
+  herr_t iRet;
+  char *vdat = NULL;
+  iRet = readStringAttribute(attr, &vdat);
+  if (iRet >= 0) {
+    strncpy(data, vdat, maxlen);
+    free(vdat);
+  }
+  data[maxlen - 1] = '\0';
+  return iRet;
+}
+
+void getGroup(hid_t groupID,
+              std::map<std::string, std::set<std::string>> &allEntries) {
+
+  /**
+   * Return the NX_class attribute associate with objectName group entry
+   */
+  auto lf_getNxClassAttribute = [&](hid_t groupID,
+                                    const char *objectName) -> std::string {
+    std::string attribute = "";
+    hid_t attributeID = H5Aopen_by_name(groupID, objectName, "NX_class",
+                                        H5P_DEFAULT, H5P_DEFAULT);
+    if (attributeID < 0) {
+      H5Aclose(attributeID);
+      return attribute;
+    }
+
+    hid_t type = H5T_C_S1;
+    hid_t atype = H5Tcopy(type);
+    char data[128];
+    H5Tset_size(atype, sizeof(data));
+    readStringAttributeN(attributeID, data, sizeof(data));
+    // already null terminated in readStringAttributeN
+    attribute = std::string(data);
+    H5Tclose(atype);
+    H5Aclose(attributeID);
+
+    return attribute;
+  };
+
+  // using HDF5 C API
+  constexpr std::size_t maxLength = 1024;
+  char groupName[maxLength];
+  char memberName[maxLength];
+  std::size_t groupNameLength =
+      static_cast<std::size_t>(H5Iget_name(groupID, groupName, maxLength));
+  hsize_t nObjects = 0;
+  H5Gget_num_objs(groupID, &nObjects);
+
+  const std::string groupNameStr(groupName, groupNameLength);
+  const std::string nxClass =
+      (groupNameStr == "/")
+          ? ""
+          : lf_getNxClassAttribute(groupID, groupNameStr.c_str());
+
+  if (!nxClass.empty()) {
+    allEntries[nxClass].insert(groupNameStr);
+  }
+
+  for (unsigned int i = 0; i < nObjects; ++i) {
+
+    const int type = H5Gget_objtype_by_idx(groupID, static_cast<size_t>(i));
+    const std::size_t memberNameLength =
+        static_cast<std::size_t>(H5Gget_objname_by_idx(
+            groupID, static_cast<hsize_t>(i), memberName, maxLength));
+
+    if (type == H5O_TYPE_GROUP) {
+      hid_t subGroupID = H5Gopen2(groupID, memberName, H5P_DEFAULT);
+      getGroup(subGroupID, allEntries);
+      H5Gclose(subGroupID);
+
+    } else if (type == H5O_TYPE_DATASET) {
+      const std::string memberNameStr(memberName, memberNameLength);
+      const std::string absoluteEntryName = groupNameStr + "/" + memberNameStr;
+      allEntries["SDS"].insert(absoluteEntryName);
+    }
+  }
+}
+
+} // namespace
+
+NexusHDF5Descriptor::NexusHDF5Descriptor(const std::string &filename)
+    : m_filename(filename), m_allEntries(initAllEntries()) {}
+
+// PUBLIC
+std::string NexusHDF5Descriptor::getFilename() const noexcept {
+  return m_filename;
+}
+
+const std::map<std::string, std::set<std::string>> &
+NexusHDF5Descriptor::getAllEntries() const noexcept {
+  return m_allEntries;
+}
+
+// PRIVATE
+std::map<std::string, std::set<std::string>>
+NexusHDF5Descriptor::initAllEntries() {
+
+  hid_t fileID = H5Fopen(m_filename.c_str(), H5F_ACC_RDONLY, H5P_DEFAULT);
+  if (fileID < 0) {
+    throw std::invalid_argument(
+        "ERROR: NeXus::HDF5Descriptor couldn't open hdf5 file " + m_filename +
+        "\n");
+  }
+
+  hid_t groupID = H5Gopen2(fileID, "/", H5P_DEFAULT);
+
+  std::map<std::string, std::set<std::string>> allEntries;
+  // scan file recursively starting with root group "/"
+  getGroup(groupID, allEntries);
+  H5Gclose(groupID);
+  H5Fclose(fileID);
+
+  // rely on move semantics
+  return allEntries;
+}
+
+} // namespace NeXus
+} // namespace Mantid
diff --git a/Framework/Nexus/test/NexusHDF5DescriptorTest.h b/Framework/Nexus/test/NexusHDF5DescriptorTest.h
new file mode 100644
index 00000000000..0ebf44e369a
--- /dev/null
+++ b/Framework/Nexus/test/NexusHDF5DescriptorTest.h
@@ -0,0 +1,58 @@
+// Mantid Repository : https://github.com/mantidproject/mantid
+//
+// Copyright &copy; 2018 ISIS Rutherford Appleton Laboratory UKRI,
+//   NScD Oak Ridge National Laboratory, European Spallation Source,
+//   Institut Laue - Langevin & CSNS, Institute of High Energy Physics, CAS
+// SPDX - License - Identifier: GPL - 3.0 +
+#pragma once
+
+#include "MantidAPI/FileFinder.h"
+#include "MantidNexus/NexusHDF5Descriptor.h"
+
+#include <cstddef> // std::size_t
+
+#include <cxxtest/TestSuite.h>
+
+class NexusHDF5DescriptorTest : public CxxTest::TestSuite {
+
+public:
+  // test get functions getFilename and getAllEntries
+  void test_nexus_hdf5_descriptor_get() {
+    const std::string filename =
+        Mantid::API::FileFinder::Instance().getFullPath("EQSANS_89157.nxs.h5");
+
+    Mantid::NeXus::NexusHDF5Descriptor nexusHDF5Descriptor(filename);
+
+    TS_ASSERT_EQUALS(filename, nexusHDF5Descriptor.getFilename());
+
+    const std::map<std::string, std::set<std::string>> &allEntries =
+        nexusHDF5Descriptor.getAllEntries();
+
+    TS_ASSERT_EQUALS(allEntries.size(), 12);
+
+    // confirms existence of groupClass key and expectedSize for value set
+    auto lf_TestSet = [&](const std::string &groupClass,
+                          const std::size_t expectedSize) -> std::size_t {
+      auto itClass = allEntries.find(groupClass);
+      TS_ASSERT_DIFFERS(itClass, allEntries.end());
+      TS_ASSERT_EQUALS(itClass->second.size(), expectedSize);
+      return expectedSize;
+    };
+
+    std::size_t nEntries = 0;
+    nEntries += lf_TestSet("NXcollection", 39);
+    nEntries += lf_TestSet("NXdetector", 48);
+    nEntries += lf_TestSet("NXdisk_chopper", 4);
+    nEntries += lf_TestSet("NXentry", 1);
+    nEntries += lf_TestSet("NXevent_data", 48);
+    nEntries += lf_TestSet("NXinstrument", 1);
+    nEntries += lf_TestSet("NXlog", 204);
+    nEntries += lf_TestSet("NXmonitor", 3);
+    nEntries += lf_TestSet("NXnote", 1);
+    nEntries += lf_TestSet("NXsample", 1);
+    nEntries += lf_TestSet("NXuser", 6);
+    nEntries += lf_TestSet("SDS", 2567);
+
+    TS_ASSERT_EQUALS(nEntries, 2923);
+  }
+};
-- 
GitLab