diff --git a/Framework/Kernel/inc/MantidKernel/Strings.h b/Framework/Kernel/inc/MantidKernel/Strings.h index 814b4a14fe7f07d8c18022eab68c3c1c691e5ca4..ac78084eaf2e35326c2eac94146a1e4158454ad5 100644 --- a/Framework/Kernel/inc/MantidKernel/Strings.h +++ b/Framework/Kernel/inc/MantidKernel/Strings.h @@ -55,7 +55,8 @@ namespace Strings { * For example, join a vector of strings with commas with: * out = join(v.begin(), v.end(), ", "); * - * This version is used for random access iterators (e.g. map, set). + * This is a simple default version that works in all cases but is potentially + * slow for large arrays. * * @param begin :: iterator at the start * @param end :: iterator at the end @@ -63,11 +64,8 @@ namespace Strings { * @return */ template <typename ITERATOR_TYPE> -DLLExport std::string -join(ITERATOR_TYPE begin, ITERATOR_TYPE end, const std::string &separator, - typename std::enable_if<!(std::is_same< - typename std::iterator_traits<ITERATOR_TYPE>::iterator_category, - std::random_access_iterator_tag>::value)>::type * = nullptr) { +DLLExport std::string simpleJoin(ITERATOR_TYPE begin, ITERATOR_TYPE end, + const std::string &separator) { std::ostringstream output; ITERATOR_TYPE it; for (it = begin; it != end;) { @@ -79,6 +77,33 @@ join(ITERATOR_TYPE begin, ITERATOR_TYPE end, const std::string &separator, return output.str(); } +//------------------------------------------------------------------------------------------------ +/** Join a set or vector of (something that turns into a string) together + * into one string, separated by a string. + * Returns an empty string if the range is null. + * Does not add the separator after the LAST item. + * + * For example, join a vector of strings with commas with: + * out = join(v.begin(), v.end(), ", "); + * + * This version is used for random access iterators (e.g. map, set), and + * it calls simpleJoin(). + * + * @param begin :: iterator at the start + * @param end :: iterator at the end + * @param separator :: string to append. + * @return + */ +template <typename ITERATOR_TYPE> +DLLExport std::string +join(ITERATOR_TYPE begin, ITERATOR_TYPE end, const std::string &separator, + typename std::enable_if< + !(std::is_same< + typename std::iterator_traits<ITERATOR_TYPE>::iterator_category, + std::random_access_iterator_tag>::value)>::type * = nullptr) { + return simpleJoin(begin, end, separator); +} + //------------------------------------------------------------------------------------------------ /** Join a set or vector of (something that turns into a string) together * into one string, separated by a string. @@ -91,68 +116,85 @@ join(ITERATOR_TYPE begin, ITERATOR_TYPE end, const std::string &separator, * This is a faster threaded version of the join() function above. * It is used only if the iterators are not random access (e.g. vector), as it * needs to be able to determine the distance between begin and end. + * It reverts to calling simpleJoin() if the input array is small. * * @param begin :: iterator at the start * @param end :: iterator at the end * @param separator :: string to append. * @return */ + template <typename ITERATOR_TYPE> DLLExport std::string join(ITERATOR_TYPE begin, ITERATOR_TYPE end, const std::string &separator, - typename std::enable_if<(std::is_same< - typename std::iterator_traits<ITERATOR_TYPE>::iterator_category, - std::random_access_iterator_tag>::value)>::type * = nullptr) { + typename std::enable_if< + (std::is_same< + typename std::iterator_traits<ITERATOR_TYPE>::iterator_category, + std::random_access_iterator_tag>::value)>::type * = nullptr) { + + // Get max number of threads + int nmaxThreads = static_cast<int>(PARALLEL_GET_MAX_THREADS); + + // Define minimum size for using threading + int min_size = 500 * nmaxThreads; // Get the distance between begining and end int dist = static_cast<int>(std::distance(begin, end)); - // Get max number of threads and allocate vector speace - int nmaxThreads = static_cast<int>(PARALLEL_GET_MAX_THREADS); - std::vector<std::ostringstream> output(nmaxThreads); + if (dist < min_size) { + + // If the input array is small, use the simpler function to avoid + // unnecessary overhead from generating the parallel section + return simpleJoin(begin, end, separator); + + } else { + + // Allocate vector space + std::vector<std::ostringstream> output(nmaxThreads); - // Actual number of threads in the current region - int nThreads = 1; + // Actual number of threads in the current region + int nThreads = 1; #pragma omp parallel - { - nThreads = static_cast<int>(PARALLEL_NUMBER_OF_THREADS); - int idThread = static_cast<int>(PARALLEL_THREAD_NUMBER); - ITERATOR_TYPE it; + { + nThreads = static_cast<int>(PARALLEL_NUMBER_OF_THREADS); + int idThread = static_cast<int>(PARALLEL_THREAD_NUMBER); + ITERATOR_TYPE it; #pragma omp for - for (int i = 0; i < dist; i++) { - // Write to stringstream - output[idThread] << separator << *(begin + i); + for (int i = 0; i < dist; i++) { + // Write to stringstream + output[idThread] << separator << *(begin + i); + } } - } - // Flush all buffers into the first one - for (int i = 1; i < nThreads; i++) { - output[0] << output[i].str(); - } + // Flush all buffers into the first one + for (int i = 1; i < nThreads; i++) { + output[0] << output[i].str(); + } - // Return the stringstream converted to a string, minus the separator at - // the start of the string. - return output[0].str().erase(0, separator.length()); + // Return the stringstream converted to a string, minus the separator at + // the start of the string. + return output[0].str().erase(0, separator.length()); + } } //------------------------------------------------------------------------------------------------ /** Join a set or vector of (something that turns into a string) together -* into one string, separated by a separator, -* adjacent items that are precisely 1 away from each other -* will be compressed into a list syntax e.g. 1-5. -* Returns an empty string if the range is null. -* Does not add the separator after the LAST item. -* -* For example, join a vector of strings with commas with: -* out = join(v.begin(), v.end(), ", "); -* -* @param begin :: iterator at the start -* @param end :: iterator at the end -* @param separator :: string to append between items. -* @param listSeparator :: string to append between list items. -* @return A string with contiguous values compressed using the list syntax -*/ + * into one string, separated by a separator, + * adjacent items that are precisely 1 away from each other + * will be compressed into a list syntax e.g. 1-5. + * Returns an empty string if the range is null. + * Does not add the separator after the LAST item. + * + * For example, join a vector of strings with commas with: + * out = join(v.begin(), v.end(), ", "); + * + * @param begin :: iterator at the start + * @param end :: iterator at the end + * @param separator :: string to append between items. + * @param listSeparator :: string to append between list items. + * @return A string with contiguous values compressed using the list syntax + */ template <typename ITERATOR_TYPE> DLLExport std::string joinCompress(ITERATOR_TYPE begin, ITERATOR_TYPE end, const std::string &separator = ",", @@ -303,8 +345,8 @@ MANTID_KERNEL_DLL void readToEndOfLine(std::istream &in, bool ConsumeEOL); MANTID_KERNEL_DLL std::string getWord(std::istream &in, bool consumeEOL); /// function parses a path, found in input string "path" and returns vector of /// the folders contributed into the path */ -MANTID_KERNEL_DLL size_t -split_path(const std::string &path, std::vector<std::string> &path_components); +MANTID_KERNEL_DLL size_t split_path(const std::string &path, + std::vector<std::string> &path_components); /// Loads the entire contents of a text file into a string MANTID_KERNEL_DLL std::string loadFile(const std::string &filename); @@ -327,9 +369,10 @@ std::vector<std::vector<Integer>> parseGroups(const std::string &str) { // Local helper functions. auto translateAdd = [&groups](const std::string &str) { - const auto tokens = Kernel::StringTokenizer( - str, "+", Kernel::StringTokenizer::TOK_TRIM | - Kernel::StringTokenizer::TOK_IGNORE_EMPTY); + const auto tokens = + Kernel::StringTokenizer(str, "+", + Kernel::StringTokenizer::TOK_TRIM | + Kernel::StringTokenizer::TOK_IGNORE_EMPTY); std::vector<Integer> group; group.reserve(tokens.count()); for (const auto &t : tokens) { @@ -341,9 +384,10 @@ std::vector<std::vector<Integer>> parseGroups(const std::string &str) { auto translateSumRange = [&groups](const std::string &str) { // add a group with the numbers in the range - const auto tokens = Kernel::StringTokenizer( - str, "-", Kernel::StringTokenizer::TOK_TRIM | - Kernel::StringTokenizer::TOK_IGNORE_EMPTY); + const auto tokens = + Kernel::StringTokenizer(str, "-", + Kernel::StringTokenizer::TOK_TRIM | + Kernel::StringTokenizer::TOK_IGNORE_EMPTY); if (tokens.count() != 2) throw std::runtime_error("Malformed range (-) operation."); Integer first = boost::lexical_cast<Integer>(tokens[0]); @@ -361,9 +405,10 @@ std::vector<std::vector<Integer>> parseGroups(const std::string &str) { auto translateRange = [&groups](const std::string &str) { // add a group per number - const auto tokens = Kernel::StringTokenizer( - str, ":", Kernel::StringTokenizer::TOK_TRIM | - Kernel::StringTokenizer::TOK_IGNORE_EMPTY); + const auto tokens = + Kernel::StringTokenizer(str, ":", + Kernel::StringTokenizer::TOK_TRIM | + Kernel::StringTokenizer::TOK_IGNORE_EMPTY); if (tokens.count() != 2) throw std::runtime_error("Malformed range (:) operation."); Integer first = boost::lexical_cast<Integer>(tokens[0]); @@ -379,9 +424,9 @@ std::vector<std::vector<Integer>> parseGroups(const std::string &str) { try { // split into comma separated groups, each group potentially containing // an operation (+-:) that produces even more groups. - const auto tokens = - StringTokenizer(str, ",", StringTokenizer::TOK_TRIM | - StringTokenizer::TOK_IGNORE_EMPTY); + const auto tokens = StringTokenizer(str, ",", + StringTokenizer::TOK_TRIM | + StringTokenizer::TOK_IGNORE_EMPTY); for (const auto &token : tokens) { // Look for the various operators in the string. If one is found then // do the necessary translation into groupings.