Strings.h

#ifndef MANTID_KERNEL_STRINGS_H
#define MANTID_KERNEL_STRINGS_H

//----------------------------------------------------------------------
// Includes
//----------------------------------------------------------------------
#include "MantidKernel/DllConfig.h"
#include "MantidKernel/MultiThreaded.h"
#include "MantidKernel/StringTokenizer.h"
#include "MantidKernel/System.h"

#ifndef Q_MOC_RUN
#include <boost/lexical_cast.hpp>
#endif
#include <iosfwd>
#include <map>
#include <set>
#include <sstream>
#include <string>
#include <vector>

namespace Mantid {
namespace Kernel {

/** Holds support functions for strings.

Copyright & copy; 2007-2012 ISIS Rutherford Appleton Laboratory & NScD Oak Ridge
National Laboratory

This file is part of Mantid.

Mantid is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3 of the License, or
(at your option) any later version.

Mantid is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program.  If not, see <http://www.gnu.org/licenses/>.

File change history is stored at: <https://github.com/mantidproject/mantid>.
Code Documentation is available at: <http://doxygen.mantidproject.org>
 */

namespace Strings {

//------------------------------------------------------------------------------------------------
/** Join a set or vector of (something that turns into a string) together
 * into one string, separated by a string.
 * Returns an empty string if the range is null.
 * Does not add the separator after the LAST item.
 *
 * For example, join a vector of strings with commas with:
 *  out = join(v.begin(), v.end(), ", ");
 *
 * This is a simple default version that works in all cases but is potentially
 * slow.
 *
 * @param begin :: iterator at the start
 * @param end :: iterator at the end
 * @param separator :: string to append.
 * @return
 */
template <typename ITERATOR_TYPE>
DLLExport std::string simpleJoin(ITERATOR_TYPE begin, ITERATOR_TYPE end,
                                 const std::string &separator) {
  std::ostringstream output;
  ITERATOR_TYPE it;
  for (it = begin; it != end;) {
    output << *it;
    it++;
    if (it != end)
      output << separator;
  }
  return output.str();
}

//------------------------------------------------------------------------------------------------
/** Join a set or vector of (something that turns into a string) together
 * into one string, separated by a string.
 * Returns an empty string if the range is null.
 * Does not add the separator after the LAST item.
 *
 * For example, join a vector of strings with commas with:
 *  out = join(v.begin(), v.end(), ", ");
 *
 * This version is used for random access iterators (e.g. map, set), and
 * it calls simpleJoin().
 *
 * @param begin :: iterator at the start
 * @param end :: iterator at the end
 * @param separator :: string to append.
 * @return
 */
template <typename ITERATOR_TYPE>
DLLExport std::string
join(ITERATOR_TYPE begin, ITERATOR_TYPE end, const std::string &separator,
     typename std::enable_if<
         !(std::is_same<
             typename std::iterator_traits<ITERATOR_TYPE>::iterator_category,
             std::random_access_iterator_tag>::value)>::type * = nullptr) {
  return simpleJoin(begin, end, separator);
}

//------------------------------------------------------------------------------------------------
/** Join a set or vector of (something that turns into a string) together
 * into one string, separated by a string.
 * Returns an empty string if the range is null.
 * Does not add the separator after the LAST item.
 *
 * For example, join a vector of strings with commas with:
 *  out = join(v.begin(), v.end(), ", ");
 *
 * This is a faster threaded version of the join() function above.
 * It is used only if the iterators are not random access (e.g. vector), as it
 * needs to be able to determine the distance between begin and end.
 * It reverts to calling simpleJoin() if the input array is small.
 *
 * @param begin :: iterator at the start
 * @param end :: iterator at the end
 * @param separator :: string to append.
 * @return
 */
template <typename ITERATOR_TYPE>
DLLExport std::string
join(ITERATOR_TYPE begin, ITERATOR_TYPE end, const std::string &separator,
     typename std::enable_if<
         (std::is_same<
             typename std::iterator_traits<ITERATOR_TYPE>::iterator_category,
             std::random_access_iterator_tag>::value)>::type * = nullptr) {

  // Get max number of threads
  int nmaxThreads = static_cast<int>(PARALLEL_GET_MAX_THREADS);

  // Define minimum size for using threading
  int min_size = 500 * nmaxThreads;

  // Get the distance between begining and end
  int dist = static_cast<int>(std::distance(begin, end));

  if (dist < min_size) {

    // If the input array is small, use the simpler function to avoid
    // unnecessary overhead from generating the parallel section
    return simpleJoin(begin, end, separator);

  } else {

    // Allocate vector space
    std::vector<std::string> output(nmaxThreads);
    size_t stream_size = 0;

    // Actual number of threads in the current region
    int nThreads = 1;
#pragma omp parallel reduction(+ : stream_size)
    {
      nThreads = static_cast<int>(PARALLEL_NUMBER_OF_THREADS);
      int idThread = static_cast<int>(PARALLEL_THREAD_NUMBER);
      ITERATOR_TYPE it;

      // Initialise ostringstream
      std::ostringstream thread_stream;

/* To make sure the loop is done in the right order, we use schedule(static).

   From the OpenMP documentation:
   "When schedule(static, chunk_size) is specified, iterations are divided into
   chunks of size chunk_size, and the chunks are assigned to the threads in the
   team in a round-robin fashion **in the order of the thread number**."

   "When no chunk_size is specified, the iteration space is divided into chunks
   that are approximately equal in size, and at most one chunk is distributed to
   each thread."
*/
#pragma omp for schedule(static)
      for (int i = 0; i < dist; i++) {
        thread_stream << separator << *(begin + i);
      }
      output[idThread] = thread_stream.str();
      stream_size += output[idThread].length();
    }

    // Reserve space in memory for output string
    std::string master_string = output[0].erase(0, separator.length());
    master_string.reserve(stream_size - separator.length());

    // Concatenate the contributions from the remaning threads
    for (int i = 1; i < nThreads; i++) {
      master_string += output[i];
    }

    return master_string;
  }
}

//------------------------------------------------------------------------------------------------
/** Join a set or vector of (something that turns into a string) together
 * into one string, separated by a separator,
 * adjacent items that are precisely 1 away from each other
 * will be compressed into a list syntax e.g. 1-5.
 * Returns an empty string if the range is null.
 * Does not add the separator after the LAST item.
 *
 * For example, join a vector of strings with commas with:
 *  out = join(v.begin(), v.end(), ", ");
 *
 * @param begin :: iterator at the start
 * @param end :: iterator at the end
 * @param separator :: string to append between items.
 * @param listSeparator :: string to append between list items.
 * @return A string with contiguous values compressed using the list syntax
 */
template <typename ITERATOR_TYPE>
DLLExport std::string joinCompress(ITERATOR_TYPE begin, ITERATOR_TYPE end,
                                   const std::string &separator = ",",
                                   const std::string &listSeparator = "-") {

  if (begin == end) {
    return "";
  }
  std::stringstream result;

  ITERATOR_TYPE i = begin;
  // Always include the first value
  result << *begin;
  // move on to the next value
  ITERATOR_TYPE previousValue = i;
  ++i;

  std::string currentSeparator = separator;
  for (; i != end; ++i) {
    // if it is one higher than the last value
    if (*i == (*previousValue + 1)) {
      currentSeparator = listSeparator;
    } else {
      if (currentSeparator == listSeparator) {
        // add the last value that was the end of the list
        result << currentSeparator;
        result << *previousValue;
        currentSeparator = separator;
      }
      // add the current value
      result << currentSeparator;
      result << *i;
    }
    previousValue = i;
  }
  // if we have got to the end and part of a list output the last value
  if (currentSeparator == listSeparator) {
    result << currentSeparator;
    result << *previousValue;
  }
  return result.str();
}
/// Converts long strings into "start ... end"
MANTID_KERNEL_DLL std::string shorten(const std::string &input,
                                      const size_t max_length);

/// Return a string with all matching occurence-strings
MANTID_KERNEL_DLL std::string replace(const std::string &input,
                                      const std::string &find_what,
                                      const std::string &replace_with);
/// Return a string with all occurrences of the characters in the input replaced
/// by the replace string
MANTID_KERNEL_DLL std::string replaceAll(const std::string &input,
                                         const std::string &charStr,
                                         const std::string &substitute);

/// determine if a character group exists in a string
MANTID_KERNEL_DLL int confirmStr(const std::string &S,
                                 const std::string &fullPhrase);
/// Get a word from a string
MANTID_KERNEL_DLL int extractWord(std::string &Line, const std::string &Word,
                                  const int cnt = 4);
/// Get an int from the end of a word
MANTID_KERNEL_DLL int endsWithInt(const std::string &word);

/// strip all spaces
MANTID_KERNEL_DLL std::string removeSpace(const std::string &CLine);
/// strip pre/post spaces
MANTID_KERNEL_DLL std::string fullBlock(const std::string &A);
/// strip pre/post spaces
MANTID_KERNEL_DLL std::string strip(const std::string &A);
/// strip trailling comments
MANTID_KERNEL_DLL void stripComment(std::string &A);
/// Determines if a string is only spaces
MANTID_KERNEL_DLL int isEmpty(const std::string &A);
/// Determines if a string starts with a #
MANTID_KERNEL_DLL bool skipLine(const std::string &line);
/// Get a line and strip comments
/// Use only for a single call
MANTID_KERNEL_DLL std::string getLine(std::istream &fh);
/// Get a line and strip comments
/// Use within a loop
MANTID_KERNEL_DLL void getLine(std::istream &fh, std::string &Line);
/// Peek at a line without extracting it from the stream
MANTID_KERNEL_DLL std::string peekLine(std::istream &fh);
/// get a part of a long line
MANTID_KERNEL_DLL int getPartLine(std::istream &fh, std::string &Out,
                                  std::string &Excess, const int spc = 256);

/// Takes a character string and evaluates the first [typename T] object
template <typename T> int convPartNum(const std::string &A, T &out);

/// Convert a string into a number
template <typename T> int convert(const std::string &A, T &out);
/// Convert a char* into a number
template <typename T> int convert(const char *A, T &out);

/// Convert a number to a string
template <typename T> std::string toString(const T &value);

/// Convert a vector to a string
template <typename T> std::string toString(const std::vector<T> &value);

/// Convert a set to a string
template <typename T> std::string toString(const std::set<T> &value);

template <typename T>
int setValues(const std::string &Line, const std::vector<int> &Index,
              std::vector<T> &Out);

/// Convert and cut a string
template <typename T> int sectPartNum(std::string &A, T &out);

/// Convert and cut a string
template <typename T> int section(std::string &A, T &out);
/// Convert and cut a char*
template <typename T> int section(char *cA, T &out);

/// Convert and cut a string for MCNPX
template <typename T> int sectionMCNPX(std::string &A, T &out);

/// Write file in standard MCNPX input form
MANTID_KERNEL_DLL void writeMCNPX(const std::string &Line, std::ostream &OX);

/// Split tring into spc deliminated components
MANTID_KERNEL_DLL std::vector<std::string> StrParts(const std::string &Ln);

/// Splits a string into key value pairs
MANTID_KERNEL_DLL std::map<std::string, std::string>
splitToKeyValues(const std::string &input, const std::string &keyValSep = "=",
                 const std::string &listSep = ",");

/// Write a set of containers to a file
template <template <typename T, typename A> class V, typename T, typename A>
int writeFile(const std::string &Fname, const T &step, const V<T, A> &Y);
template <template <typename T, typename A> class V, typename T, typename A>
int writeFile(const std::string &Fname, const V<T, A> &X, const V<T, A> &Y);
template <template <typename T, typename A> class V, typename T, typename A>
int writeFile(const std::string &Fname, const V<T, A> &X, const V<T, A> &Y,
              const V<T, A> &Err);

/// Convert a VAX number to x86 little eindien
float getVAXnum(const float A);

/// Eat everything from the stream until the next EOL
MANTID_KERNEL_DLL void readToEndOfLine(std::istream &in, bool ConsumeEOL);
/// Returns the next word in the stream
MANTID_KERNEL_DLL std::string getWord(std::istream &in, bool consumeEOL);
///  function parses a path, found in input string "path" and returns vector of
///  the folders contributed into the path */
MANTID_KERNEL_DLL size_t split_path(const std::string &path,
                                    std::vector<std::string> &path_components);

/// Loads the entire contents of a text file into a string
MANTID_KERNEL_DLL std::string loadFile(const std::string &filename);

/// checks if the candidate is the member of the group
MANTID_KERNEL_DLL int isMember(const std::vector<std::string> &group,
                               const std::string &candidate);

/// Parses a number range, e.g. "1,4-9,54-111,3,10", to the vector containing
/// all the elements within the range
MANTID_KERNEL_DLL std::vector<int>
parseRange(const std::string &str, const std::string &elemSep = ",",
           const std::string &rangeSep = "-");

/// Parses unsigned integer groups, e.g. "1+2,4-7,9,11" to a nested vector
/// structure.
template <typename Integer>
std::vector<std::vector<Integer>> parseGroups(const std::string &str) {
  std::vector<std::vector<Integer>> groups;

  // Local helper functions.
  auto translateAdd = [&groups](const std::string &str) {
    const auto tokens =
        Kernel::StringTokenizer(str, "+",
                                Kernel::StringTokenizer::TOK_TRIM |
                                    Kernel::StringTokenizer::TOK_IGNORE_EMPTY);
    std::vector<Integer> group;
    group.reserve(tokens.count());
    for (const auto &t : tokens) {
      // add this number to the group we're about to add
      group.emplace_back(boost::lexical_cast<Integer>(t));
    }
    groups.emplace_back(std::move(group));
  };

  auto translateSumRange = [&groups](const std::string &str) {
    // add a group with the numbers in the range
    const auto tokens =
        Kernel::StringTokenizer(str, "-",
                                Kernel::StringTokenizer::TOK_TRIM |
                                    Kernel::StringTokenizer::TOK_IGNORE_EMPTY);
    if (tokens.count() != 2)
      throw std::runtime_error("Malformed range (-) operation.");
    Integer first = boost::lexical_cast<Integer>(tokens[0]);
    Integer last = boost::lexical_cast<Integer>(tokens[1]);
    if (first > last)
      std::swap(first, last);
    // add all the numbers in the range to the output group
    std::vector<Integer> group;
    group.reserve(last - first + 1);
    for (Integer i = first; i <= last; ++i)
      group.emplace_back(i);
    if (!group.empty())
      groups.emplace_back(std::move(group));
  };

  auto translateRange = [&groups](const std::string &str) {
    // add a group per number
    const auto tokens =
        Kernel::StringTokenizer(str, ":",
                                Kernel::StringTokenizer::TOK_TRIM |
                                    Kernel::StringTokenizer::TOK_IGNORE_EMPTY);
    if (tokens.count() != 2)
      throw std::runtime_error("Malformed range (:) operation.");
    Integer first = boost::lexical_cast<Integer>(tokens[0]);
    Integer last = boost::lexical_cast<Integer>(tokens[1]);
    if (first > last)
      std::swap(first, last);
    // add all the numbers in the range to separate output groups
    for (Integer i = first; i <= last; ++i) {
      groups.emplace_back(1, i);
    }
  };

  try {
    // split into comma separated groups, each group potentially containing
    // an operation (+-:) that produces even more groups.
    const auto tokens = StringTokenizer(str, ",",
                                        StringTokenizer::TOK_TRIM |
                                            StringTokenizer::TOK_IGNORE_EMPTY);
    for (const auto &token : tokens) {
      // Look for the various operators in the string. If one is found then
      // do the necessary translation into groupings.
      if (token.find('+') != std::string::npos) {
        translateAdd(token);
      } else if (token.find('-') != std::string::npos) {
        translateSumRange(token);
      } else if (token.find(':') != std::string::npos) {
        translateRange(token);
      } else if (!token.empty()) {
        // contains a single number, just add it as a new group
        groups.emplace_back(1, boost::lexical_cast<Integer>(token));
      }
    }
  } catch (boost::bad_lexical_cast &) {
    throw std::runtime_error("Cannot parse numbers from string: '" + str + "'");
  }

  return groups;
}

/// Extract a line from input stream, discarding any EOL characters encountered
MANTID_KERNEL_DLL std::istream &extractToEOL(std::istream &is,
                                             std::string &str);

} // NAMESPACE Strings

} // NAMESPACE Kernel

} // NAMESPACE Mantid

#endif // MANTID_KERNEL_STRINGS_H