Skip to content
Snippets Groups Projects
Commit 1d1e5b7c authored by Neil Vaytet's avatar Neil Vaytet
Browse files

Refs #20443 : Threading of Strings::join()

This uses openmp threads to speed up the `Strings::join()` function which was causing CreateWorkspace to be excessively slow when writing to the algorithm history.
The speedup is not fantastic as it still takes about 8s to create a 1000x10000 workspace (using 8 threads), but it is an improvement on the 60s yielded by the original function.

A new version of the `join()` function was actually added to `Strings.h`, and is used when the iterators passed as arguments are not random access in nature. If this is the case, then the original function is used.

A test was added to `StringsTest.h` to make sure both `join()` functions are being tested.
parent 534dfa97
No related branches found
No related tags found
No related merge requests found
...@@ -5,6 +5,7 @@ ...@@ -5,6 +5,7 @@
// Includes // Includes
//---------------------------------------------------------------------- //----------------------------------------------------------------------
#include "MantidKernel/DllConfig.h" #include "MantidKernel/DllConfig.h"
#include "MantidKernel/MultiThreaded.h"
#include "MantidKernel/StringTokenizer.h" #include "MantidKernel/StringTokenizer.h"
#include "MantidKernel/System.h" #include "MantidKernel/System.h"
...@@ -54,14 +55,20 @@ namespace Strings { ...@@ -54,14 +55,20 @@ namespace Strings {
* For example, join a vector of strings with commas with: * For example, join a vector of strings with commas with:
* out = join(v.begin(), v.end(), ", "); * out = join(v.begin(), v.end(), ", ");
* *
* This version is used for random access iterators (e.g. map, set).
*
* @param begin :: iterator at the start * @param begin :: iterator at the start
* @param end :: iterator at the end * @param end :: iterator at the end
* @param separator :: string to append. * @param separator :: string to append.
* @return * @return
*/ */
template <typename ITERATOR_TYPE> template <typename ITERATOR_TYPE>
DLLExport std::string join(ITERATOR_TYPE begin, ITERATOR_TYPE end, DLLExport std::string
const std::string &separator) { join(ITERATOR_TYPE begin, ITERATOR_TYPE end, const std::string &separator,
typename std::enable_if<
!(std::is_same<
typename std::iterator_traits<ITERATOR_TYPE>::iterator_category,
std::random_access_iterator_tag>::value)>::type * = nullptr) {
std::ostringstream output; std::ostringstream output;
ITERATOR_TYPE it; ITERATOR_TYPE it;
for (it = begin; it != end;) { for (it = begin; it != end;) {
...@@ -73,6 +80,64 @@ DLLExport std::string join(ITERATOR_TYPE begin, ITERATOR_TYPE end, ...@@ -73,6 +80,64 @@ DLLExport std::string join(ITERATOR_TYPE begin, ITERATOR_TYPE end,
return output.str(); return output.str();
} }
//------------------------------------------------------------------------------------------------
/** Join a set or vector of (something that turns into a string) together
* into one string, separated by a string.
* Returns an empty string if the range is null.
* Does not add the separator after the LAST item.
*
* For example, join a vector of strings with commas with:
* out = join(v.begin(), v.end(), ", ");
*
* This is a faster threaded version of the join() function above.
* It is used only if the iterators are not random access (e.g. vector), as it
* needs to be able to determine the distance between begin and end.
*
* @param begin :: iterator at the start
* @param end :: iterator at the end
* @param separator :: string to append.
* @return
*/
template <typename ITERATOR_TYPE>
DLLExport std::string
join(ITERATOR_TYPE begin, ITERATOR_TYPE end, const std::string &separator,
typename std::enable_if<
(std::is_same<
typename std::iterator_traits<ITERATOR_TYPE>::iterator_category,
std::random_access_iterator_tag>::value)>::type * = nullptr) {
// Get the distance between begining and end
long int dist = std::distance(begin, end);
// Get max number of threads and allocate vector speace
int nmax_threads = PARALLEL_GET_MAX_THREADS;
std::vector<std::ostringstream> output(nmax_threads);
// Actual number of threads in the current region
int nThreads = 1;
#pragma omp parallel
{
nThreads = PARALLEL_NUMBER_OF_THREADS;
int idThread = PARALLEL_THREAD_NUMBER;
ITERATOR_TYPE it;
#pragma omp for
for (int i = 0; i < dist; i++) {
// Write to stringstream
output[idThread] << separator << *(begin + i);
}
}
// Flush all buffers into the first one
for (int i = 1; i < nThreads; i++) {
output[0] << output[i].str();
}
// Return the stringstream converted to a string, minus the separator at
// the start of the string.
return output[0].str().erase(0, separator.length());
}
//------------------------------------------------------------------------------------------------ //------------------------------------------------------------------------------------------------
/** Join a set or vector of (something that turns into a string) together /** Join a set or vector of (something that turns into a string) together
* into one string, separated by a separator, * into one string, separated by a separator,
......
...@@ -267,6 +267,25 @@ public: ...@@ -267,6 +267,25 @@ public:
TS_ASSERT_EQUALS(out, "Help,Me,I'm,Stuck,Inside,A,Test"); TS_ASSERT_EQUALS(out, "Help,Me,I'm,Stuck,Inside,A,Test");
} }
void test_joinSet() {
std::set<std::string> v;
std::string out;
out = join(v.begin(), v.end(), ",");
TS_ASSERT_EQUALS(out, "");
v.insert("Help");
v.insert("Me");
v.insert("I'm");
v.insert("Stuck");
v.insert("Inside");
v.insert("A");
v.insert("Test");
out = join(v.begin(), v.end(), ",");
TS_ASSERT_EQUALS(out, "A,Help,I'm,Inside,Me,Stuck,Test");
}
void test_joinCompress() { void test_joinCompress() {
std::vector<std::vector<int>> inputList{ std::vector<std::vector<int>> inputList{
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment