diff --git a/Framework/Kernel/inc/MantidKernel/Strings.h b/Framework/Kernel/inc/MantidKernel/Strings.h index 8ae51d40e152b1146ed674024fce08669d1d6b22..a98fc5e178ba860d692e13e22a4cfa9ef662ef47 100644 --- a/Framework/Kernel/inc/MantidKernel/Strings.h +++ b/Framework/Kernel/inc/MantidKernel/Strings.h @@ -5,6 +5,7 @@ // Includes //---------------------------------------------------------------------- #include "MantidKernel/DllConfig.h" +#include "MantidKernel/MultiThreaded.h" #include "MantidKernel/StringTokenizer.h" #include "MantidKernel/System.h" @@ -56,14 +57,17 @@ namespace Strings { * For example, join a vector of strings with commas with: * out = join(v.begin(), v.end(), ", "); * + * This is a simple default version that works in all cases but is potentially + * slow. + * * @param begin :: iterator at the start * @param end :: iterator at the end * @param separator :: string to append. * @return */ template <typename ITERATOR_TYPE> -DLLExport std::string join(ITERATOR_TYPE begin, ITERATOR_TYPE end, - const std::string &separator) { +DLLExport std::string simpleJoin(ITERATOR_TYPE begin, ITERATOR_TYPE end, + const std::string &separator) { std::ostringstream output; ITERATOR_TYPE it; for (it = begin; it != end;) { @@ -75,6 +79,124 @@ DLLExport std::string join(ITERATOR_TYPE begin, ITERATOR_TYPE end, return output.str(); } +//------------------------------------------------------------------------------------------------ +/** Join a set or vector of (something that turns into a string) together + * into one string, separated by a string. + * Returns an empty string if the range is null. + * Does not add the separator after the LAST item. + * + * For example, join a vector of strings with commas with: + * out = join(v.begin(), v.end(), ", "); + * + * This version is used for random access iterators (e.g. map, set), and + * it calls simpleJoin(). + * + * @param begin :: iterator at the start + * @param end :: iterator at the end + * @param separator :: string to append. + * @return + */ +template <typename ITERATOR_TYPE> +DLLExport std::string +join(ITERATOR_TYPE begin, ITERATOR_TYPE end, const std::string &separator, + typename std::enable_if< + !(std::is_same< + typename std::iterator_traits<ITERATOR_TYPE>::iterator_category, + std::random_access_iterator_tag>::value)>::type * = nullptr) { + return simpleJoin(begin, end, separator); +} + +//------------------------------------------------------------------------------------------------ +/** Join a set or vector of (something that turns into a string) together + * into one string, separated by a string. + * Returns an empty string if the range is null. + * Does not add the separator after the LAST item. + * + * For example, join a vector of strings with commas with: + * out = join(v.begin(), v.end(), ", "); + * + * This is a faster threaded version of the join() function above. + * It is used only if the iterators are not random access (e.g. vector), as it + * needs to be able to determine the distance between begin and end. + * It reverts to calling simpleJoin() if the input array is small. + * + * @param begin :: iterator at the start + * @param end :: iterator at the end + * @param separator :: string to append. + * @return + */ +template <typename ITERATOR_TYPE> +DLLExport std::string +join(ITERATOR_TYPE begin, ITERATOR_TYPE end, const std::string &separator, + typename std::enable_if< + (std::is_same< + typename std::iterator_traits<ITERATOR_TYPE>::iterator_category, + std::random_access_iterator_tag>::value)>::type * = nullptr) { + + // Get max number of threads + int nmaxThreads = static_cast<int>(PARALLEL_GET_MAX_THREADS); + + // Define minimum size for using threading + int min_size = 500 * nmaxThreads; + + // Get the distance between begining and end + int dist = static_cast<int>(std::distance(begin, end)); + + if (dist < min_size) { + + // If the input array is small, use the simpler function to avoid + // unnecessary overhead from generating the parallel section + return simpleJoin(begin, end, separator); + + } else { + + // Allocate vector space + std::vector<std::string> output(nmaxThreads); + size_t stream_size = 0; + + // Actual number of threads in the current region + int nThreads = 1; +#pragma omp parallel reduction(+ : stream_size) + { + nThreads = static_cast<int>(PARALLEL_NUMBER_OF_THREADS); + int idThread = static_cast<int>(PARALLEL_THREAD_NUMBER); + ITERATOR_TYPE it; + + // Initialise ostringstream + std::ostringstream thread_stream; + +/* To make sure the loop is done in the right order, we use schedule(static). + + From the OpenMP documentation: + "When schedule(static, chunk_size) is specified, iterations are divided into + chunks of size chunk_size, and the chunks are assigned to the threads in the + team in a round-robin fashion **in the order of the thread number**." + + "When no chunk_size is specified, the iteration space is divided into chunks + that are approximately equal in size, and at most one chunk is distributed + to each thread." +*/ +#pragma omp for schedule(static) + for (int i = 0; i < dist; i++) { + thread_stream << separator << *(begin + i); + } + output[idThread] = thread_stream.str(); + stream_size += output[idThread].length(); + } + + // Reserve space in memory for output string + std::string master_string = output[0].erase(0, separator.length()); + master_string.reserve(stream_size - separator.length()); + + // Concatenate the contributions from the remaning threads + for (int i = 1; i < nThreads; i++) { + master_string += output[i]; + } + + return master_string; + } +} + //------------------------------------------------------------------------------------------------ /** Join a set or vector of (something that turns into a string) together * into one string, separated by a separator, diff --git a/Framework/Kernel/test/StringsTest.h b/Framework/Kernel/test/StringsTest.h index f0805ab7127c49d7aea9aacc083e05bc76060851..e65782ae72eb1faaf2fbc04f4846605987818543 100644 --- a/Framework/Kernel/test/StringsTest.h +++ b/Framework/Kernel/test/StringsTest.h @@ -267,6 +267,44 @@ public: TS_ASSERT_EQUALS(out, "Help,Me,I'm,Stuck,Inside,A,Test"); } + void test_joinSet() { + std::set<std::string> v; + std::string out; + + out = join(v.begin(), v.end(), ","); + TS_ASSERT_EQUALS(out, ""); + + v.insert("Help"); + v.insert("Me"); + v.insert("I'm"); + v.insert("Stuck"); + v.insert("Inside"); + v.insert("A"); + v.insert("Test"); + + out = join(v.begin(), v.end(), ","); + TS_ASSERT_EQUALS(out, "A,Help,I'm,Inside,Me,Stuck,Test"); + } + + void test_joinLong() { + std::vector<std::string> v; + std::string out; + std::string ans; + + out = join(v.begin(), v.end(), ","); + TS_ASSERT_EQUALS(out, ""); + + int n = 100000; + for (int i = 0; i < n; i++) { + v.emplace_back(std::to_string(i)); + ans += std::to_string(i) + ","; + } + + out = join(v.begin(), v.end(), ","); + ans.pop_back(); + TS_ASSERT_EQUALS(out, ans); + } + void test_joinCompress() { std::vector<std::vector<int>> inputList{ @@ -590,4 +628,20 @@ public: } }; +class StringsTestPerformance : public CxxTest::TestSuite { +public: + static StringsTestPerformance *createSuite() { + return new StringsTestPerformance(); + } + static void destroySuite(StringsTestPerformance *suite) { delete suite; } + void setUp() override { input = std::vector<double>(50000000, 0.123456); } + void test_join_double() { + auto result = join(input.begin(), input.end(), separator); + } + +private: + std::vector<double> input; + std::string separator{","}; +}; + #endif // MANTID_SUPPORTTEST_H_