diff --git a/Framework/Kernel/inc/MantidKernel/MultiThreaded.h b/Framework/Kernel/inc/MantidKernel/MultiThreaded.h
index 6c6c5334510abce3c167fc8f9047ca6a8720460f..559214a637336fc106092a910743c8531e1870a5 100644
--- a/Framework/Kernel/inc/MantidKernel/MultiThreaded.h
+++ b/Framework/Kernel/inc/MantidKernel/MultiThreaded.h
@@ -3,6 +3,7 @@
 
 #include "MantidKernel/DataItem.h"
 
+#include <atomic>
 #include <mutex>
 
 namespace Mantid {
@@ -58,6 +59,15 @@ threadSafe(const Arg &workspace, Args &&... others) {
   return workspace.threadSafe() && threadSafe(std::forward<Args>(others)...);
 }
 
+template <typename T, typename BinaryOp>
+void AtomicOp(std::atomic<T> &f, T d, BinaryOp op) {
+  T old = f.load();
+  T desired;
+  do {
+    desired = op(old, d);
+  } while (!f.compare_exchange_weak(old, desired));
+}
+
 } // namespace Kernel
 } // namespace Mantid
 
diff --git a/Framework/MDAlgorithms/inc/MantidMDAlgorithms/MDNormDirectSC.h b/Framework/MDAlgorithms/inc/MantidMDAlgorithms/MDNormDirectSC.h
index 09414c9b161118e729decf67f7415c3c15857d14..0de81b108752920c5b332ca704bf506f4c33e8a8 100644
--- a/Framework/MDAlgorithms/inc/MantidMDAlgorithms/MDNormDirectSC.h
+++ b/Framework/MDAlgorithms/inc/MantidMDAlgorithms/MDNormDirectSC.h
@@ -60,8 +60,8 @@ private:
   void calculateNormalization(const std::vector<coord_t> &otherValues,
                               const Kernel::Matrix<coord_t> &affineTrans);
 
-  std::vector<Kernel::VMD> calculateIntersections(const double theta,
-                                                  const double phi);
+  void calculateIntersections(std::vector<std::array<double, 4>> &intersections,
+                              const double theta, const double phi);
 
   /// Normalization workspace
   DataObjects::MDHistoWorkspace_sptr m_normWS;
diff --git a/Framework/MDAlgorithms/src/MDNormDirectSC.cpp b/Framework/MDAlgorithms/src/MDNormDirectSC.cpp
index 1d819e0014da9aff95873abf6528b3828cc0ee8a..4073469fbbb6838abbc928c6468573f1eb14eade 100644
--- a/Framework/MDAlgorithms/src/MDNormDirectSC.cpp
+++ b/Framework/MDAlgorithms/src/MDNormDirectSC.cpp
@@ -27,8 +27,8 @@ using namespace Mantid::Kernel;
 
 namespace {
 // function to  compare two intersections (h,k,l,Momentum) by Momentum
-bool compareMomentum(const Mantid::Kernel::VMD &v1,
-                     const Mantid::Kernel::VMD &v2) {
+bool compareMomentum(const std::array<double, 4> &v1,
+                     const std::array<double, 4> &v2) {
   return (v1[3] < v2[3]);
 }
 }
@@ -456,88 +456,89 @@ void MDNormDirectSC::calculateNormalization(
     solidAngDetToIdx = solidAngleWS->getDetectorIDToWorkspaceIndexMap();
   }
 
+  const size_t vmdDims = 4;
+  std::vector<std::atomic<signal_t>> signalArray(m_normWS->getNPoints());
+  std::vector<std::array<double, 4>> intersections;
+  std::vector<coord_t> pos, posNew;
   auto prog = make_unique<API::Progress>(this, 0.3, 1.0, ndets);
-  PARALLEL_FOR_NO_WSP_CHECK()
-  for (int64_t i = 0; i < ndets; i++) {
-    PARALLEL_START_INTERUPT_REGION
+PRAGMA_OMP(parallel for private(intersections, pos, posNew))
+for (int64_t i = 0; i < ndets; i++) {
+  PARALLEL_START_INTERUPT_REGION
 
-    if (!spectrumInfo.hasDetectors(i) || spectrumInfo.isMonitor(i) ||
-        spectrumInfo.isMasked(i)) {
-      continue;
-    }
-    const auto &detector = spectrumInfo.detector(i);
-    double theta = detector.getTwoTheta(m_samplePos, m_beamDir);
-    double phi = detector.getPhi();
-    // If the dtefctor is a group, this should be the ID of the first detector
-    const auto detID = detector.getID();
-
-    // Intersections
-    auto intersections = calculateIntersections(theta, phi);
-    if (intersections.empty())
+  if (!spectrumInfo.hasDetectors(i) || spectrumInfo.isMonitor(i) ||
+      spectrumInfo.isMasked(i)) {
+    continue;
+  }
+  const auto &detector = spectrumInfo.detector(i);
+  double theta = detector.getTwoTheta(m_samplePos, m_beamDir);
+  double phi = detector.getPhi();
+  // If the detector is a group, this should be the ID of the first detector
+  const auto detID = detector.getID();
+
+  // Intersections
+  this->calculateIntersections(intersections, theta, phi);
+  if (intersections.empty())
+    continue;
+
+  // Get solid angle for this contribution
+  double solid = protonCharge;
+  if (haveSA) {
+    solid =
+        solidAngleWS->y(solidAngDetToIdx.find(detID)->second)[0] * protonCharge;
+  }
+  // Compute final position in HKL
+  // pre-allocate for efficiency and copy non-hkl dim values into place
+  pos.resize(vmdDims + otherValues.size() + 1);
+  std::copy(otherValues.begin(), otherValues.end(), pos.begin() + vmdDims);
+  pos.push_back(1.);
+  auto intersectionsBegin = intersections.begin();
+  for (auto it = intersectionsBegin + 1; it != intersections.end(); ++it) {
+    const auto &curIntSec = *it;
+    const auto &prevIntSec = *(it - 1);
+    // the full vector isn't used so compute only what is necessary
+    double delta =
+        (curIntSec[3] * curIntSec[3] - prevIntSec[3] * prevIntSec[3]) /
+        energyToK;
+    if (delta < 1e-10)
+      continue; // Assume zero contribution if difference is small
+
+    // Average between two intersections for final position
+    std::transform(curIntSec.data(), curIntSec.data() + vmdDims,
+                   prevIntSec.data(), pos.begin(),
+                   VectorHelper::SimpleAverage<coord_t>());
+
+    // transform kf to energy transfer
+    pos[3] = static_cast<coord_t>(m_Ei - pos[3] * pos[3] / energyToK);
+    affineTrans.multiplyPoint(pos, posNew);
+    size_t linIndex = m_normWS->getLinearIndexAtCoord(posNew.data());
+    if (linIndex == size_t(-1))
       continue;
 
-    // Get solid angle for this contribution
-    double solid = protonCharge;
-    if (haveSA) {
-      solid = solidAngleWS->y(solidAngDetToIdx.find(detID)->second)[0] *
-              protonCharge;
-    }
-    // Compute final position in HKL
-    const size_t vmdDims = intersections.front().size();
-    // pre-allocate for efficiency and copy non-hkl dim values into place
-    std::vector<coord_t> pos(vmdDims + otherValues.size() + 1);
-    std::copy(otherValues.begin(), otherValues.end(), pos.begin() + vmdDims);
-    pos.push_back(1.);
-    auto intersectionsBegin = intersections.begin();
-    for (auto it = intersectionsBegin + 1; it != intersections.end(); ++it) {
-      const auto &curIntSec = *it;
-      const auto &prevIntSec = *(it - 1);
-      // the full vector isn't used so compute only what is necessary
-      double delta =
-          (curIntSec[3] * curIntSec[3] - prevIntSec[3] * prevIntSec[3]) /
-          energyToK;
-      if (delta < 1e-10)
-        continue; // Assume zero contribution if difference is small
-
-      // Average between two intersections for final position
-      std::transform(curIntSec.getBareArray(),
-                     curIntSec.getBareArray() + vmdDims,
-                     prevIntSec.getBareArray(), pos.begin(),
-                     VectorHelper::SimpleAverage<coord_t>());
-
-      // transform kf to energy transfer
-      pos[3] = static_cast<coord_t>(m_Ei - pos[3] * pos[3] / energyToK);
-      std::vector<coord_t> posNew = affineTrans * pos;
-      size_t linIndex = m_normWS->getLinearIndexAtCoord(posNew.data());
-      if (linIndex == size_t(-1))
-        continue;
-
-      // signal = integral between two consecutive intersections *solid angle
-      // *PC
-      double signal = solid * delta;
-
-      PARALLEL_CRITICAL(updateMD) {
-        signal += m_normWS->getSignalAt(linIndex);
-        m_normWS->setSignalAt(linIndex, signal);
-      }
-    }
-    prog->report();
-
-    PARALLEL_END_INTERUPT_REGION
+    // signal = integral between two consecutive intersections *solid angle
+    // *PC
+    double signal = solid * delta;
+    Mantid::Kernel::AtomicOp(signalArray[linIndex], signal,
+                             std::plus<signal_t>());
   }
-  PARALLEL_CHECK_INTERUPT_REGION
+  prog->report();
+
+  PARALLEL_END_INTERUPT_REGION
+}
+PARALLEL_CHECK_INTERUPT_REGION
+std::copy(signalArray.cbegin(), signalArray.cend(), m_normWS->getSignalArray());
 }
 
 /**
  * Calculate the points of intersection for the given detector with cuboid
  * surrounding the
  * detector position in HKL
- * @param theta Polar angle withd detector
+ * @param intersections A list of intersections in HKL space
+ * @param theta Polar angle with detector
  * @param phi Azimuthal angle with detector
- * @return A list of intersections in HKL space
  */
-std::vector<Kernel::VMD>
-MDNormDirectSC::calculateIntersections(const double theta, const double phi) {
+void MDNormDirectSC::calculateIntersections(
+    std::vector<std::array<double, 4>> &intersections, const double theta,
+    const double phi) {
   V3D qout(sin(theta) * cos(phi), sin(theta) * sin(phi), cos(theta)),
       qin(0., 0., m_ki);
 
@@ -558,7 +559,7 @@ MDNormDirectSC::calculateIntersections(const double theta, const double phi) {
   auto kNBins = m_kX.size();
   auto lNBins = m_lX.size();
   auto eNBins = m_eX.size();
-  std::vector<Kernel::VMD> intersections;
+  intersections.clear();
   intersections.reserve(hNBins + kNBins + lNBins + eNBins +
                         8); // 8 is 3*(min,max for each Q component)+kfmin+kfmax
 
@@ -580,7 +581,7 @@ MDNormDirectSC::calculateIntersections(const double theta, const double phi) {
           if ((ki >= m_kmin) && (ki <= m_kmax) && (li >= m_lmin) &&
               (li <= m_lmax)) {
             double momi = fmom * (hi - hStart) + m_kfmin;
-            intersections.emplace_back(hi, ki, li, momi);
+            intersections.push_back({{hi, ki, li, momi}});
           }
         }
       }
@@ -593,7 +594,7 @@ MDNormDirectSC::calculateIntersections(const double theta, const double phi) {
       double lhmin = fl * (m_hmin - hStart) + lStart;
       if ((khmin >= m_kmin) && (khmin <= m_kmax) && (lhmin >= m_lmin) &&
           (lhmin <= m_lmax)) {
-        intersections.emplace_back(m_hmin, khmin, lhmin, momhMin);
+        intersections.push_back({{m_hmin, khmin, lhmin, momhMin}});
       }
     }
     double momhMax = fmom * (m_hmax - hStart) + m_kfmin;
@@ -603,7 +604,7 @@ MDNormDirectSC::calculateIntersections(const double theta, const double phi) {
       double lhmax = fl * (m_hmax - hStart) + lStart;
       if ((khmax >= m_kmin) && (khmax <= m_kmax) && (lhmax >= m_lmin) &&
           (lhmax <= m_lmax)) {
-        intersections.emplace_back(m_hmax, khmax, lhmax, momhMax);
+        intersections.push_back({{m_hmax, khmax, lhmax, momhMax}});
       }
     }
   }
@@ -626,7 +627,7 @@ MDNormDirectSC::calculateIntersections(const double theta, const double phi) {
           if ((hi >= m_hmin) && (hi <= m_hmax) && (li >= m_lmin) &&
               (li <= m_lmax)) {
             double momi = fmom * (ki - kStart) + m_kfmin;
-            intersections.emplace_back(hi, ki, li, momi);
+            intersections.push_back({{hi, ki, li, momi}});
           }
         }
       }
@@ -638,7 +639,7 @@ MDNormDirectSC::calculateIntersections(const double theta, const double phi) {
       double lkmin = fl * (m_kmin - kStart) + lStart;
       if ((hkmin >= m_hmin) && (hkmin <= m_hmax) && (lkmin >= m_lmin) &&
           (lkmin <= m_lmax)) {
-        intersections.emplace_back(hkmin, m_kmin, lkmin, momkMin);
+        intersections.push_back({{hkmin, m_kmin, lkmin, momkMin}});
       }
     }
     double momkMax = fmom * (m_kmax - kStart) + m_kfmin;
@@ -648,7 +649,7 @@ MDNormDirectSC::calculateIntersections(const double theta, const double phi) {
       double lkmax = fl * (m_kmax - kStart) + lStart;
       if ((hkmax >= m_hmin) && (hkmax <= m_hmax) && (lkmax >= m_lmin) &&
           (lkmax <= m_lmax)) {
-        intersections.emplace_back(hkmax, m_kmax, lkmax, momkMax);
+        intersections.push_back({{hkmax, m_kmax, lkmax, momkMax}});
       }
     }
   }
@@ -668,7 +669,7 @@ MDNormDirectSC::calculateIntersections(const double theta, const double phi) {
           if ((hi >= m_hmin) && (hi <= m_hmax) && (ki >= m_kmin) &&
               (ki <= m_kmax)) {
             double momi = fmom * (li - lStart) + m_kfmin;
-            intersections.emplace_back(hi, ki, li, momi);
+            intersections.push_back({{hi, ki, li, momi}});
           }
         }
       }
@@ -680,7 +681,7 @@ MDNormDirectSC::calculateIntersections(const double theta, const double phi) {
       double klmin = fk * (m_lmin - lStart) + kStart;
       if ((hlmin >= m_hmin) && (hlmin <= m_hmax) && (klmin >= m_kmin) &&
           (klmin <= m_kmax)) {
-        intersections.emplace_back(hlmin, klmin, m_lmin, momlMin);
+        intersections.push_back({{hlmin, klmin, m_lmin, momlMin}});
       }
     }
     double momlMax = fmom * (m_lmax - lStart) + m_kfmin;
@@ -690,7 +691,7 @@ MDNormDirectSC::calculateIntersections(const double theta, const double phi) {
       double klmax = fk * (m_lmax - lStart) + kStart;
       if ((hlmax >= m_hmin) && (hlmax <= m_hmax) && (klmax >= m_kmin) &&
           (klmax <= m_kmax)) {
-        intersections.emplace_back(hlmax, klmax, m_lmax, momlMax);
+        intersections.push_back({{hlmax, klmax, m_lmax, momlMax}});
       }
     }
   }
@@ -705,7 +706,7 @@ MDNormDirectSC::calculateIntersections(const double theta, const double phi) {
         double l = qin.Z() - qout.Z() * kfi;
         if ((h >= m_hmin) && (h <= m_hmax) && (k >= m_kmin) && (k <= m_kmax) &&
             (l >= m_lmin) && (l <= m_lmax)) {
-          intersections.emplace_back(h, k, l, kfi);
+          intersections.push_back({{h, k, l, kfi}});
         }
       }
     }
@@ -714,16 +715,15 @@ MDNormDirectSC::calculateIntersections(const double theta, const double phi) {
   // endpoints
   if ((hStart >= m_hmin) && (hStart <= m_hmax) && (kStart >= m_kmin) &&
       (kStart <= m_kmax) && (lStart >= m_lmin) && (lStart <= m_lmax)) {
-    intersections.emplace_back(hStart, kStart, lStart, m_kfmin);
+    intersections.push_back({{hStart, kStart, lStart, m_kfmin}});
   }
   if ((hEnd >= m_hmin) && (hEnd <= m_hmax) && (kEnd >= m_kmin) &&
       (kEnd <= m_kmax) && (lEnd >= m_lmin) && (lEnd <= m_lmax)) {
-    intersections.emplace_back(hEnd, kEnd, lEnd, m_kfmax);
+    intersections.push_back({{hEnd, kEnd, lEnd, m_kfmax}});
   }
 
   // sort intersections by final momentum
   std::stable_sort(intersections.begin(), intersections.end(), compareMomentum);
-  return intersections;
 }
 
 } // namespace MDAlgorithms
diff --git a/Framework/MDAlgorithms/src/MDNormSCD.cpp b/Framework/MDAlgorithms/src/MDNormSCD.cpp
index ea3d89122c49519d65f58ecd127615f03ad1f438..e8b99d9620b5f7f46f4994b10378de0d13919b80 100644
--- a/Framework/MDAlgorithms/src/MDNormSCD.cpp
+++ b/Framework/MDAlgorithms/src/MDNormSCD.cpp
@@ -291,10 +291,8 @@ MDNormSCD::findIntergratedDimensions(const std::vector<coord_t> &otherDimValues,
     if (affineMat[row][0] == 1.) {
       m_hIntegrated = false;
       m_hIdx = row;
-      if (m_hmin < dimMin)
-        m_hmin = dimMin;
-      if (m_hmax > dimMax)
-        m_hmax = dimMax;
+      m_hmin = std::max(m_hmin, dimMin);
+      m_hmax = std::min(m_hmax, dimMax);
       if (m_hmin > dimMax || m_hmax < dimMin) {
         skipNormalization = true;
       }
@@ -302,10 +300,8 @@ MDNormSCD::findIntergratedDimensions(const std::vector<coord_t> &otherDimValues,
     if (affineMat[row][1] == 1.) {
       m_kIntegrated = false;
       m_kIdx = row;
-      if (m_kmin < dimMin)
-        m_kmin = dimMin;
-      if (m_kmax > dimMax)
-        m_kmax = dimMax;
+      m_kmin = std::max(m_kmin, dimMin);
+      m_kmax = std::min(m_kmax, dimMax);
       if (m_kmin > dimMax || m_kmax < dimMin) {
         skipNormalization = true;
       }
@@ -313,10 +309,8 @@ MDNormSCD::findIntergratedDimensions(const std::vector<coord_t> &otherDimValues,
     if (affineMat[row][2] == 1.) {
       m_lIntegrated = false;
       m_lIdx = row;
-      if (m_lmin < dimMin)
-        m_lmin = dimMin;
-      if (m_lmax > dimMax)
-        m_lmax = dimMax;
+      m_lmin = std::max(m_lmin, dimMin);
+      m_lmax = std::min(m_lmax, dimMax);
       if (m_lmin > dimMax || m_lmax < dimMin) {
         skipNormalization = true;
       }
@@ -363,16 +357,6 @@ void MDNormSCD::cacheDimensionXValues() {
   }
 }
 
-namespace {
-template <typename T, typename BinaryOp>
-void AtomicOp(std::atomic<T> &f, T d, BinaryOp _Op) {
-  T old = f.load();
-  T desired = _Op(old, d);
-  while (!f.compare_exchange_weak(old, desired))
-    desired = _Op(old, d);
-}
-} // namespace
-
 /**
  * Computed the normalization for the input workspace. Results are stored in
  * m_normWS
@@ -485,7 +469,8 @@ for (int64_t i = 0; i < ndets; i++) {
     size_t k = static_cast<size_t>(std::distance(intersectionsBegin, it));
     // signal = integral between two consecutive intersections
     signal_t signal = (yValues[k] - yValues[k - 1]) * solid;
-    AtomicOp(signalArray[linIndex], signal, std::plus<signal_t>());
+    Mantid::Kernel::AtomicOp(signalArray[linIndex], signal,
+                             std::plus<signal_t>());
   }
   prog->report();
 
diff --git a/docs/source/release/v3.10.0/framework.rst b/docs/source/release/v3.10.0/framework.rst
index 08a598ebd29dbff82f5d7d71f37ebfa252a0bd88..560da1fb3d7da823328618bb427fa594cb036408 100644
--- a/docs/source/release/v3.10.0/framework.rst
+++ b/docs/source/release/v3.10.0/framework.rst
@@ -48,6 +48,7 @@ Improved
   :ref:`AddSampleLog <algm-AddSampleLog>` can retrieve automatically.
 - ``ThreadPool`` now respects the value of ``OMP_NUM_THREADS`` environment variable (documented in [gcc](https://gcc.gnu.org/onlinedocs/libgomp/OMP_005fNUM_005fTHREADS.html))
 - Improved parallel scaling of :ref:`MDNormSCD <algm-MDNormSCD>` with > 4 cores.
+- Improved parallel scaling of :ref:`MDNormDirectSCD <algm-MDNormDirectSC>` with > 4 cores.
 - Reduced execution time of ``EventList::sortTof`` by over 2x, improving performance in algorithms such as :ref:`algm-CompressEvents` and :ref:`algm-SortEvents` which call it.
 - :ref:`LoadNexusProcessed <algm-LoadNexusProcessed>` is now approximately 33x faster when loading a ``PeaksWorkspace`` with a large instrument attached.