diff --git a/Framework/Kernel/inc/MantidKernel/MultiThreaded.h b/Framework/Kernel/inc/MantidKernel/MultiThreaded.h index 6c6c5334510abce3c167fc8f9047ca6a8720460f..559214a637336fc106092a910743c8531e1870a5 100644 --- a/Framework/Kernel/inc/MantidKernel/MultiThreaded.h +++ b/Framework/Kernel/inc/MantidKernel/MultiThreaded.h @@ -3,6 +3,7 @@ #include "MantidKernel/DataItem.h" +#include <atomic> #include <mutex> namespace Mantid { @@ -58,6 +59,15 @@ threadSafe(const Arg &workspace, Args &&... others) { return workspace.threadSafe() && threadSafe(std::forward<Args>(others)...); } +template <typename T, typename BinaryOp> +void AtomicOp(std::atomic<T> &f, T d, BinaryOp op) { + T old = f.load(); + T desired; + do { + desired = op(old, d); + } while (!f.compare_exchange_weak(old, desired)); +} + } // namespace Kernel } // namespace Mantid diff --git a/Framework/MDAlgorithms/inc/MantidMDAlgorithms/MDNormDirectSC.h b/Framework/MDAlgorithms/inc/MantidMDAlgorithms/MDNormDirectSC.h index 09414c9b161118e729decf67f7415c3c15857d14..0de81b108752920c5b332ca704bf506f4c33e8a8 100644 --- a/Framework/MDAlgorithms/inc/MantidMDAlgorithms/MDNormDirectSC.h +++ b/Framework/MDAlgorithms/inc/MantidMDAlgorithms/MDNormDirectSC.h @@ -60,8 +60,8 @@ private: void calculateNormalization(const std::vector<coord_t> &otherValues, const Kernel::Matrix<coord_t> &affineTrans); - std::vector<Kernel::VMD> calculateIntersections(const double theta, - const double phi); + void calculateIntersections(std::vector<std::array<double, 4>> &intersections, + const double theta, const double phi); /// Normalization workspace DataObjects::MDHistoWorkspace_sptr m_normWS; diff --git a/Framework/MDAlgorithms/src/MDNormDirectSC.cpp b/Framework/MDAlgorithms/src/MDNormDirectSC.cpp index 1d819e0014da9aff95873abf6528b3828cc0ee8a..4073469fbbb6838abbc928c6468573f1eb14eade 100644 --- a/Framework/MDAlgorithms/src/MDNormDirectSC.cpp +++ b/Framework/MDAlgorithms/src/MDNormDirectSC.cpp @@ -27,8 +27,8 @@ using namespace Mantid::Kernel; namespace { // function to compare two intersections (h,k,l,Momentum) by Momentum -bool compareMomentum(const Mantid::Kernel::VMD &v1, - const Mantid::Kernel::VMD &v2) { +bool compareMomentum(const std::array<double, 4> &v1, + const std::array<double, 4> &v2) { return (v1[3] < v2[3]); } } @@ -456,88 +456,89 @@ void MDNormDirectSC::calculateNormalization( solidAngDetToIdx = solidAngleWS->getDetectorIDToWorkspaceIndexMap(); } + const size_t vmdDims = 4; + std::vector<std::atomic<signal_t>> signalArray(m_normWS->getNPoints()); + std::vector<std::array<double, 4>> intersections; + std::vector<coord_t> pos, posNew; auto prog = make_unique<API::Progress>(this, 0.3, 1.0, ndets); - PARALLEL_FOR_NO_WSP_CHECK() - for (int64_t i = 0; i < ndets; i++) { - PARALLEL_START_INTERUPT_REGION +PRAGMA_OMP(parallel for private(intersections, pos, posNew)) +for (int64_t i = 0; i < ndets; i++) { + PARALLEL_START_INTERUPT_REGION - if (!spectrumInfo.hasDetectors(i) || spectrumInfo.isMonitor(i) || - spectrumInfo.isMasked(i)) { - continue; - } - const auto &detector = spectrumInfo.detector(i); - double theta = detector.getTwoTheta(m_samplePos, m_beamDir); - double phi = detector.getPhi(); - // If the dtefctor is a group, this should be the ID of the first detector - const auto detID = detector.getID(); - - // Intersections - auto intersections = calculateIntersections(theta, phi); - if (intersections.empty()) + if (!spectrumInfo.hasDetectors(i) || spectrumInfo.isMonitor(i) || + spectrumInfo.isMasked(i)) { + continue; + } + const auto &detector = spectrumInfo.detector(i); + double theta = detector.getTwoTheta(m_samplePos, m_beamDir); + double phi = detector.getPhi(); + // If the detector is a group, this should be the ID of the first detector + const auto detID = detector.getID(); + + // Intersections + this->calculateIntersections(intersections, theta, phi); + if (intersections.empty()) + continue; + + // Get solid angle for this contribution + double solid = protonCharge; + if (haveSA) { + solid = + solidAngleWS->y(solidAngDetToIdx.find(detID)->second)[0] * protonCharge; + } + // Compute final position in HKL + // pre-allocate for efficiency and copy non-hkl dim values into place + pos.resize(vmdDims + otherValues.size() + 1); + std::copy(otherValues.begin(), otherValues.end(), pos.begin() + vmdDims); + pos.push_back(1.); + auto intersectionsBegin = intersections.begin(); + for (auto it = intersectionsBegin + 1; it != intersections.end(); ++it) { + const auto &curIntSec = *it; + const auto &prevIntSec = *(it - 1); + // the full vector isn't used so compute only what is necessary + double delta = + (curIntSec[3] * curIntSec[3] - prevIntSec[3] * prevIntSec[3]) / + energyToK; + if (delta < 1e-10) + continue; // Assume zero contribution if difference is small + + // Average between two intersections for final position + std::transform(curIntSec.data(), curIntSec.data() + vmdDims, + prevIntSec.data(), pos.begin(), + VectorHelper::SimpleAverage<coord_t>()); + + // transform kf to energy transfer + pos[3] = static_cast<coord_t>(m_Ei - pos[3] * pos[3] / energyToK); + affineTrans.multiplyPoint(pos, posNew); + size_t linIndex = m_normWS->getLinearIndexAtCoord(posNew.data()); + if (linIndex == size_t(-1)) continue; - // Get solid angle for this contribution - double solid = protonCharge; - if (haveSA) { - solid = solidAngleWS->y(solidAngDetToIdx.find(detID)->second)[0] * - protonCharge; - } - // Compute final position in HKL - const size_t vmdDims = intersections.front().size(); - // pre-allocate for efficiency and copy non-hkl dim values into place - std::vector<coord_t> pos(vmdDims + otherValues.size() + 1); - std::copy(otherValues.begin(), otherValues.end(), pos.begin() + vmdDims); - pos.push_back(1.); - auto intersectionsBegin = intersections.begin(); - for (auto it = intersectionsBegin + 1; it != intersections.end(); ++it) { - const auto &curIntSec = *it; - const auto &prevIntSec = *(it - 1); - // the full vector isn't used so compute only what is necessary - double delta = - (curIntSec[3] * curIntSec[3] - prevIntSec[3] * prevIntSec[3]) / - energyToK; - if (delta < 1e-10) - continue; // Assume zero contribution if difference is small - - // Average between two intersections for final position - std::transform(curIntSec.getBareArray(), - curIntSec.getBareArray() + vmdDims, - prevIntSec.getBareArray(), pos.begin(), - VectorHelper::SimpleAverage<coord_t>()); - - // transform kf to energy transfer - pos[3] = static_cast<coord_t>(m_Ei - pos[3] * pos[3] / energyToK); - std::vector<coord_t> posNew = affineTrans * pos; - size_t linIndex = m_normWS->getLinearIndexAtCoord(posNew.data()); - if (linIndex == size_t(-1)) - continue; - - // signal = integral between two consecutive intersections *solid angle - // *PC - double signal = solid * delta; - - PARALLEL_CRITICAL(updateMD) { - signal += m_normWS->getSignalAt(linIndex); - m_normWS->setSignalAt(linIndex, signal); - } - } - prog->report(); - - PARALLEL_END_INTERUPT_REGION + // signal = integral between two consecutive intersections *solid angle + // *PC + double signal = solid * delta; + Mantid::Kernel::AtomicOp(signalArray[linIndex], signal, + std::plus<signal_t>()); } - PARALLEL_CHECK_INTERUPT_REGION + prog->report(); + + PARALLEL_END_INTERUPT_REGION +} +PARALLEL_CHECK_INTERUPT_REGION +std::copy(signalArray.cbegin(), signalArray.cend(), m_normWS->getSignalArray()); } /** * Calculate the points of intersection for the given detector with cuboid * surrounding the * detector position in HKL - * @param theta Polar angle withd detector + * @param intersections A list of intersections in HKL space + * @param theta Polar angle with detector * @param phi Azimuthal angle with detector - * @return A list of intersections in HKL space */ -std::vector<Kernel::VMD> -MDNormDirectSC::calculateIntersections(const double theta, const double phi) { +void MDNormDirectSC::calculateIntersections( + std::vector<std::array<double, 4>> &intersections, const double theta, + const double phi) { V3D qout(sin(theta) * cos(phi), sin(theta) * sin(phi), cos(theta)), qin(0., 0., m_ki); @@ -558,7 +559,7 @@ MDNormDirectSC::calculateIntersections(const double theta, const double phi) { auto kNBins = m_kX.size(); auto lNBins = m_lX.size(); auto eNBins = m_eX.size(); - std::vector<Kernel::VMD> intersections; + intersections.clear(); intersections.reserve(hNBins + kNBins + lNBins + eNBins + 8); // 8 is 3*(min,max for each Q component)+kfmin+kfmax @@ -580,7 +581,7 @@ MDNormDirectSC::calculateIntersections(const double theta, const double phi) { if ((ki >= m_kmin) && (ki <= m_kmax) && (li >= m_lmin) && (li <= m_lmax)) { double momi = fmom * (hi - hStart) + m_kfmin; - intersections.emplace_back(hi, ki, li, momi); + intersections.push_back({{hi, ki, li, momi}}); } } } @@ -593,7 +594,7 @@ MDNormDirectSC::calculateIntersections(const double theta, const double phi) { double lhmin = fl * (m_hmin - hStart) + lStart; if ((khmin >= m_kmin) && (khmin <= m_kmax) && (lhmin >= m_lmin) && (lhmin <= m_lmax)) { - intersections.emplace_back(m_hmin, khmin, lhmin, momhMin); + intersections.push_back({{m_hmin, khmin, lhmin, momhMin}}); } } double momhMax = fmom * (m_hmax - hStart) + m_kfmin; @@ -603,7 +604,7 @@ MDNormDirectSC::calculateIntersections(const double theta, const double phi) { double lhmax = fl * (m_hmax - hStart) + lStart; if ((khmax >= m_kmin) && (khmax <= m_kmax) && (lhmax >= m_lmin) && (lhmax <= m_lmax)) { - intersections.emplace_back(m_hmax, khmax, lhmax, momhMax); + intersections.push_back({{m_hmax, khmax, lhmax, momhMax}}); } } } @@ -626,7 +627,7 @@ MDNormDirectSC::calculateIntersections(const double theta, const double phi) { if ((hi >= m_hmin) && (hi <= m_hmax) && (li >= m_lmin) && (li <= m_lmax)) { double momi = fmom * (ki - kStart) + m_kfmin; - intersections.emplace_back(hi, ki, li, momi); + intersections.push_back({{hi, ki, li, momi}}); } } } @@ -638,7 +639,7 @@ MDNormDirectSC::calculateIntersections(const double theta, const double phi) { double lkmin = fl * (m_kmin - kStart) + lStart; if ((hkmin >= m_hmin) && (hkmin <= m_hmax) && (lkmin >= m_lmin) && (lkmin <= m_lmax)) { - intersections.emplace_back(hkmin, m_kmin, lkmin, momkMin); + intersections.push_back({{hkmin, m_kmin, lkmin, momkMin}}); } } double momkMax = fmom * (m_kmax - kStart) + m_kfmin; @@ -648,7 +649,7 @@ MDNormDirectSC::calculateIntersections(const double theta, const double phi) { double lkmax = fl * (m_kmax - kStart) + lStart; if ((hkmax >= m_hmin) && (hkmax <= m_hmax) && (lkmax >= m_lmin) && (lkmax <= m_lmax)) { - intersections.emplace_back(hkmax, m_kmax, lkmax, momkMax); + intersections.push_back({{hkmax, m_kmax, lkmax, momkMax}}); } } } @@ -668,7 +669,7 @@ MDNormDirectSC::calculateIntersections(const double theta, const double phi) { if ((hi >= m_hmin) && (hi <= m_hmax) && (ki >= m_kmin) && (ki <= m_kmax)) { double momi = fmom * (li - lStart) + m_kfmin; - intersections.emplace_back(hi, ki, li, momi); + intersections.push_back({{hi, ki, li, momi}}); } } } @@ -680,7 +681,7 @@ MDNormDirectSC::calculateIntersections(const double theta, const double phi) { double klmin = fk * (m_lmin - lStart) + kStart; if ((hlmin >= m_hmin) && (hlmin <= m_hmax) && (klmin >= m_kmin) && (klmin <= m_kmax)) { - intersections.emplace_back(hlmin, klmin, m_lmin, momlMin); + intersections.push_back({{hlmin, klmin, m_lmin, momlMin}}); } } double momlMax = fmom * (m_lmax - lStart) + m_kfmin; @@ -690,7 +691,7 @@ MDNormDirectSC::calculateIntersections(const double theta, const double phi) { double klmax = fk * (m_lmax - lStart) + kStart; if ((hlmax >= m_hmin) && (hlmax <= m_hmax) && (klmax >= m_kmin) && (klmax <= m_kmax)) { - intersections.emplace_back(hlmax, klmax, m_lmax, momlMax); + intersections.push_back({{hlmax, klmax, m_lmax, momlMax}}); } } } @@ -705,7 +706,7 @@ MDNormDirectSC::calculateIntersections(const double theta, const double phi) { double l = qin.Z() - qout.Z() * kfi; if ((h >= m_hmin) && (h <= m_hmax) && (k >= m_kmin) && (k <= m_kmax) && (l >= m_lmin) && (l <= m_lmax)) { - intersections.emplace_back(h, k, l, kfi); + intersections.push_back({{h, k, l, kfi}}); } } } @@ -714,16 +715,15 @@ MDNormDirectSC::calculateIntersections(const double theta, const double phi) { // endpoints if ((hStart >= m_hmin) && (hStart <= m_hmax) && (kStart >= m_kmin) && (kStart <= m_kmax) && (lStart >= m_lmin) && (lStart <= m_lmax)) { - intersections.emplace_back(hStart, kStart, lStart, m_kfmin); + intersections.push_back({{hStart, kStart, lStart, m_kfmin}}); } if ((hEnd >= m_hmin) && (hEnd <= m_hmax) && (kEnd >= m_kmin) && (kEnd <= m_kmax) && (lEnd >= m_lmin) && (lEnd <= m_lmax)) { - intersections.emplace_back(hEnd, kEnd, lEnd, m_kfmax); + intersections.push_back({{hEnd, kEnd, lEnd, m_kfmax}}); } // sort intersections by final momentum std::stable_sort(intersections.begin(), intersections.end(), compareMomentum); - return intersections; } } // namespace MDAlgorithms diff --git a/Framework/MDAlgorithms/src/MDNormSCD.cpp b/Framework/MDAlgorithms/src/MDNormSCD.cpp index ea3d89122c49519d65f58ecd127615f03ad1f438..e8b99d9620b5f7f46f4994b10378de0d13919b80 100644 --- a/Framework/MDAlgorithms/src/MDNormSCD.cpp +++ b/Framework/MDAlgorithms/src/MDNormSCD.cpp @@ -291,10 +291,8 @@ MDNormSCD::findIntergratedDimensions(const std::vector<coord_t> &otherDimValues, if (affineMat[row][0] == 1.) { m_hIntegrated = false; m_hIdx = row; - if (m_hmin < dimMin) - m_hmin = dimMin; - if (m_hmax > dimMax) - m_hmax = dimMax; + m_hmin = std::max(m_hmin, dimMin); + m_hmax = std::min(m_hmax, dimMax); if (m_hmin > dimMax || m_hmax < dimMin) { skipNormalization = true; } @@ -302,10 +300,8 @@ MDNormSCD::findIntergratedDimensions(const std::vector<coord_t> &otherDimValues, if (affineMat[row][1] == 1.) { m_kIntegrated = false; m_kIdx = row; - if (m_kmin < dimMin) - m_kmin = dimMin; - if (m_kmax > dimMax) - m_kmax = dimMax; + m_kmin = std::max(m_kmin, dimMin); + m_kmax = std::min(m_kmax, dimMax); if (m_kmin > dimMax || m_kmax < dimMin) { skipNormalization = true; } @@ -313,10 +309,8 @@ MDNormSCD::findIntergratedDimensions(const std::vector<coord_t> &otherDimValues, if (affineMat[row][2] == 1.) { m_lIntegrated = false; m_lIdx = row; - if (m_lmin < dimMin) - m_lmin = dimMin; - if (m_lmax > dimMax) - m_lmax = dimMax; + m_lmin = std::max(m_lmin, dimMin); + m_lmax = std::min(m_lmax, dimMax); if (m_lmin > dimMax || m_lmax < dimMin) { skipNormalization = true; } @@ -363,16 +357,6 @@ void MDNormSCD::cacheDimensionXValues() { } } -namespace { -template <typename T, typename BinaryOp> -void AtomicOp(std::atomic<T> &f, T d, BinaryOp _Op) { - T old = f.load(); - T desired = _Op(old, d); - while (!f.compare_exchange_weak(old, desired)) - desired = _Op(old, d); -} -} // namespace - /** * Computed the normalization for the input workspace. Results are stored in * m_normWS @@ -485,7 +469,8 @@ for (int64_t i = 0; i < ndets; i++) { size_t k = static_cast<size_t>(std::distance(intersectionsBegin, it)); // signal = integral between two consecutive intersections signal_t signal = (yValues[k] - yValues[k - 1]) * solid; - AtomicOp(signalArray[linIndex], signal, std::plus<signal_t>()); + Mantid::Kernel::AtomicOp(signalArray[linIndex], signal, + std::plus<signal_t>()); } prog->report(); diff --git a/docs/source/release/v3.10.0/framework.rst b/docs/source/release/v3.10.0/framework.rst index 08a598ebd29dbff82f5d7d71f37ebfa252a0bd88..560da1fb3d7da823328618bb427fa594cb036408 100644 --- a/docs/source/release/v3.10.0/framework.rst +++ b/docs/source/release/v3.10.0/framework.rst @@ -48,6 +48,7 @@ Improved :ref:`AddSampleLog <algm-AddSampleLog>` can retrieve automatically. - ``ThreadPool`` now respects the value of ``OMP_NUM_THREADS`` environment variable (documented in [gcc](https://gcc.gnu.org/onlinedocs/libgomp/OMP_005fNUM_005fTHREADS.html)) - Improved parallel scaling of :ref:`MDNormSCD <algm-MDNormSCD>` with > 4 cores. +- Improved parallel scaling of :ref:`MDNormDirectSCD <algm-MDNormDirectSC>` with > 4 cores. - Reduced execution time of ``EventList::sortTof`` by over 2x, improving performance in algorithms such as :ref:`algm-CompressEvents` and :ref:`algm-SortEvents` which call it. - :ref:`LoadNexusProcessed <algm-LoadNexusProcessed>` is now approximately 33x faster when loading a ``PeaksWorkspace`` with a large instrument attached.