diff --git a/Framework/Kernel/inc/MantidKernel/Strings.h b/Framework/Kernel/inc/MantidKernel/Strings.h
index 8ae51d40e152b1146ed674024fce08669d1d6b22..a98fc5e178ba860d692e13e22a4cfa9ef662ef47 100644
--- a/Framework/Kernel/inc/MantidKernel/Strings.h
+++ b/Framework/Kernel/inc/MantidKernel/Strings.h
@@ -5,6 +5,7 @@
 // Includes
 //----------------------------------------------------------------------
 #include "MantidKernel/DllConfig.h"
+#include "MantidKernel/MultiThreaded.h"
 #include "MantidKernel/StringTokenizer.h"
 #include "MantidKernel/System.h"
 
@@ -56,14 +57,17 @@ namespace Strings {
  * For example, join a vector of strings with commas with:
  *  out = join(v.begin(), v.end(), ", ");
  *
+ * This is a simple default version that works in all cases but is potentially
+ * slow.
+ *
  * @param begin :: iterator at the start
  * @param end :: iterator at the end
  * @param separator :: string to append.
  * @return
  */
 template <typename ITERATOR_TYPE>
-DLLExport std::string join(ITERATOR_TYPE begin, ITERATOR_TYPE end,
-                           const std::string &separator) {
+DLLExport std::string simpleJoin(ITERATOR_TYPE begin, ITERATOR_TYPE end,
+                                 const std::string &separator) {
   std::ostringstream output;
   ITERATOR_TYPE it;
   for (it = begin; it != end;) {
@@ -75,6 +79,124 @@ DLLExport std::string join(ITERATOR_TYPE begin, ITERATOR_TYPE end,
   return output.str();
 }
 
+//------------------------------------------------------------------------------------------------
+/** Join a set or vector of (something that turns into a string) together
+ * into one string, separated by a string.
+ * Returns an empty string if the range is null.
+ * Does not add the separator after the LAST item.
+ *
+ * For example, join a vector of strings with commas with:
+ *  out = join(v.begin(), v.end(), ", ");
+ *
+ * This version is used for random access iterators (e.g. map, set), and
+ * it calls simpleJoin().
+ *
+ * @param begin :: iterator at the start
+ * @param end :: iterator at the end
+ * @param separator :: string to append.
+ * @return
+ */
+template <typename ITERATOR_TYPE>
+DLLExport std::string
+join(ITERATOR_TYPE begin, ITERATOR_TYPE end, const std::string &separator,
+     typename std::enable_if<
+         !(std::is_same<
+             typename std::iterator_traits<ITERATOR_TYPE>::iterator_category,
+             std::random_access_iterator_tag>::value)>::type * = nullptr) {
+  return simpleJoin(begin, end, separator);
+}
+
+//------------------------------------------------------------------------------------------------
+/** Join a set or vector of (something that turns into a string) together
+ * into one string, separated by a string.
+ * Returns an empty string if the range is null.
+ * Does not add the separator after the LAST item.
+ *
+ * For example, join a vector of strings with commas with:
+ *  out = join(v.begin(), v.end(), ", ");
+ *
+ * This is a faster threaded version of the join() function above.
+ * It is used only if the iterators are not random access (e.g. vector), as it
+ * needs to be able to determine the distance between begin and end.
+ * It reverts to calling simpleJoin() if the input array is small.
+ *
+ * @param begin :: iterator at the start
+ * @param end :: iterator at the end
+ * @param separator :: string to append.
+ * @return
+ */
+template <typename ITERATOR_TYPE>
+DLLExport std::string
+join(ITERATOR_TYPE begin, ITERATOR_TYPE end, const std::string &separator,
+     typename std::enable_if<
+         (std::is_same<
+             typename std::iterator_traits<ITERATOR_TYPE>::iterator_category,
+             std::random_access_iterator_tag>::value)>::type * = nullptr) {
+
+  // Get max number of threads
+  int nmaxThreads = static_cast<int>(PARALLEL_GET_MAX_THREADS);
+
+  // Define minimum size for using threading
+  int min_size = 500 * nmaxThreads;
+
+  // Get the distance between begining and end
+  int dist = static_cast<int>(std::distance(begin, end));
+
+  if (dist < min_size) {
+
+    // If the input array is small, use the simpler function to avoid
+    // unnecessary overhead from generating the parallel section
+    return simpleJoin(begin, end, separator);
+
+  } else {
+
+    // Allocate vector space
+    std::vector<std::string> output(nmaxThreads);
+    size_t stream_size = 0;
+
+    // Actual number of threads in the current region
+    int nThreads = 1;
+#pragma omp parallel reduction(+ : stream_size)
+    {
+      nThreads = static_cast<int>(PARALLEL_NUMBER_OF_THREADS);
+      int idThread = static_cast<int>(PARALLEL_THREAD_NUMBER);
+      ITERATOR_TYPE it;
+
+      // Initialise ostringstream
+      std::ostringstream thread_stream;
+
+/* To make sure the loop is done in the right order, we use schedule(static).
+
+   From the OpenMP documentation:
+   "When schedule(static, chunk_size) is specified, iterations are divided into
+   chunks of size chunk_size, and the chunks are assigned to the threads in the
+   team in a round-robin fashion **in the order of the thread number**."
+
+   "When no chunk_size is specified, the iteration space is divided into chunks
+   that are approximately equal in size, and at most one chunk is distributed
+   to each thread."
+*/
+#pragma omp for schedule(static)
+      for (int i = 0; i < dist; i++) {
+        thread_stream << separator << *(begin + i);
+      }
+      output[idThread] = thread_stream.str();
+      stream_size += output[idThread].length();
+    }
+
+    // Reserve space in memory for output string
+    std::string master_string = output[0].erase(0, separator.length());
+    master_string.reserve(stream_size - separator.length());
+
+    // Concatenate the contributions from the remaning threads
+    for (int i = 1; i < nThreads; i++) {
+      master_string += output[i];
+    }
+
+    return master_string;
+  }
+}
+
 //------------------------------------------------------------------------------------------------
 /** Join a set or vector of (something that turns into a string) together
  * into one string, separated by a separator,
diff --git a/Framework/Kernel/test/StringsTest.h b/Framework/Kernel/test/StringsTest.h
index f0805ab7127c49d7aea9aacc083e05bc76060851..e65782ae72eb1faaf2fbc04f4846605987818543 100644
--- a/Framework/Kernel/test/StringsTest.h
+++ b/Framework/Kernel/test/StringsTest.h
@@ -267,6 +267,44 @@ public:
     TS_ASSERT_EQUALS(out, "Help,Me,I'm,Stuck,Inside,A,Test");
   }
 
+  void test_joinSet() {
+    std::set<std::string> v;
+    std::string out;
+
+    out = join(v.begin(), v.end(), ",");
+    TS_ASSERT_EQUALS(out, "");
+
+    v.insert("Help");
+    v.insert("Me");
+    v.insert("I'm");
+    v.insert("Stuck");
+    v.insert("Inside");
+    v.insert("A");
+    v.insert("Test");
+
+    out = join(v.begin(), v.end(), ",");
+    TS_ASSERT_EQUALS(out, "A,Help,I'm,Inside,Me,Stuck,Test");
+  }
+
+  void test_joinLong() {
+    std::vector<std::string> v;
+    std::string out;
+    std::string ans;
+
+    out = join(v.begin(), v.end(), ",");
+    TS_ASSERT_EQUALS(out, "");
+
+    int n = 100000;
+    for (int i = 0; i < n; i++) {
+      v.emplace_back(std::to_string(i));
+      ans += std::to_string(i) + ",";
+    }
+
+    out = join(v.begin(), v.end(), ",");
+    ans.pop_back();
+    TS_ASSERT_EQUALS(out, ans);
+  }
+
   void test_joinCompress() {
 
     std::vector<std::vector<int>> inputList{
@@ -590,4 +628,20 @@ public:
   }
 };
 
+class StringsTestPerformance : public CxxTest::TestSuite {
+public:
+  static StringsTestPerformance *createSuite() {
+    return new StringsTestPerformance();
+  }
+  static void destroySuite(StringsTestPerformance *suite) { delete suite; }
+  void setUp() override { input = std::vector<double>(50000000, 0.123456); }
+  void test_join_double() {
+    auto result = join(input.begin(), input.end(), separator);
+  }
+
+private:
+  std::vector<double> input;
+  std::string separator{","};
+};
+
 #endif // MANTID_SUPPORTTEST_H_