From 24bb41c474025ec7b91f6e72f841acad116cba10 Mon Sep 17 00:00:00 2001 From: Owen Arnold <owen.arnold@stfc.ac.uk> Date: Thu, 10 Apr 2014 21:40:29 +0100 Subject: [PATCH] refs #9257. More performance work. Also start fixing a bug whereby the cluster offsets can become larger than the range of an integer. This has been partially fixed by better calculating the number of possible clusters that could exist. The full fix should be to also change the type of m_id in DisjointElement from int to size_t! Another issue that is being worked on here is a faster clone of the MDWorkspace. We only care about it's shape, so we just need something the right size to start putting data into! --- .../API/inc/MantidAPI/IMDHistoWorkspace.h | 2 + .../ConnectedComponentLabeling.h | 6 ++ .../IntegratePeaksUsingClusters.h | 4 +- .../src/ConnectedComponentLabeling.cpp | 69 +++++++++++++++---- .../src/IntegratePeaksUsingClusters.cpp | 43 ++++++++++-- .../test/ConnectedComponentLabelingTest.h | 11 +-- .../inc/MantidMDEvents/MDHistoWorkspace.h | 4 ++ .../MDEvents/src/MDHistoWorkspace.cpp | 5 ++ 8 files changed, 122 insertions(+), 22 deletions(-) diff --git a/Code/Mantid/Framework/API/inc/MantidAPI/IMDHistoWorkspace.h b/Code/Mantid/Framework/API/inc/MantidAPI/IMDHistoWorkspace.h index e58fa0a91b9..5da50570e19 100644 --- a/Code/Mantid/Framework/API/inc/MantidAPI/IMDHistoWorkspace.h +++ b/Code/Mantid/Framework/API/inc/MantidAPI/IMDHistoWorkspace.h @@ -78,6 +78,8 @@ namespace API virtual double & operator [](const size_t & index) = 0; virtual void setCoordinateSystem(const Mantid::API::SpecialCoordinateSystem coordinateSystem) = 0; + + boost::shared_ptr<IMDHistoWorkspace> cloneShape() const = 0; protected: virtual const std::string toString() const; }; diff --git a/Code/Mantid/Framework/Crystal/inc/MantidCrystal/ConnectedComponentLabeling.h b/Code/Mantid/Framework/Crystal/inc/MantidCrystal/ConnectedComponentLabeling.h index c483331e55e..39d11cf2bbb 100644 --- a/Code/Mantid/Framework/Crystal/inc/MantidCrystal/ConnectedComponentLabeling.h +++ b/Code/Mantid/Framework/Crystal/inc/MantidCrystal/ConnectedComponentLabeling.h @@ -11,6 +11,10 @@ namespace Mantid { + namespace Kernel + { + class Logger; + } namespace API { @@ -101,6 +105,8 @@ namespace Mantid /// Run multithreaded const boost::optional<int> m_nThreads; + Mantid::Kernel::Logger& m_logger; + }; } // namespace Crystal diff --git a/Code/Mantid/Framework/Crystal/inc/MantidCrystal/IntegratePeaksUsingClusters.h b/Code/Mantid/Framework/Crystal/inc/MantidCrystal/IntegratePeaksUsingClusters.h index 87f970edd3c..139fd5637ac 100644 --- a/Code/Mantid/Framework/Crystal/inc/MantidCrystal/IntegratePeaksUsingClusters.h +++ b/Code/Mantid/Framework/Crystal/inc/MantidCrystal/IntegratePeaksUsingClusters.h @@ -3,6 +3,7 @@ #include "MantidKernel/System.h" #include "MantidAPI/Algorithm.h" +#include "MantidAPI/IMDWorkspace.h" namespace Mantid { @@ -42,6 +43,7 @@ namespace Crystal virtual const std::string category() const; private: + Mantid::API::MDNormalization getNormalization(); virtual void initDocs(); void init(); void exec(); @@ -53,4 +55,4 @@ namespace Crystal } // namespace Crystal } // namespace Mantid -#endif /* MANTID_CRYSTAL_INTEGRATEPEAKSUSINGCLUSTERS_H_ */ \ No newline at end of file +#endif /* MANTID_CRYSTAL_INTEGRATEPEAKSUSINGCLUSTERS_H_ */ diff --git a/Code/Mantid/Framework/Crystal/src/ConnectedComponentLabeling.cpp b/Code/Mantid/Framework/Crystal/src/ConnectedComponentLabeling.cpp index 7d3bd25e054..abca8d23869 100644 --- a/Code/Mantid/Framework/Crystal/src/ConnectedComponentLabeling.cpp +++ b/Code/Mantid/Framework/Crystal/src/ConnectedComponentLabeling.cpp @@ -1,4 +1,5 @@ #include "MantidKernel/MultiThreaded.h" +#include "MantidKernel/Logger.h" #include "MantidKernel/V3D.h" #include "MantidAPI/AlgorithmManager.h" #include "MantidAPI/FrameworkManager.h" @@ -45,6 +46,27 @@ namespace Mantid return maxNeighbours; } + /** + * Get the maximum number of clusters possible in an MD space. For use as an offset. + * @param ws : Workspace to interogate + * @param nIterators : number of equally sized iterators working on the workspace + * @return Maximum number of clusters. + */ + size_t calculateMaxClusters(IMDHistoWorkspace const * const ws, size_t nIterators) + { + size_t maxClusters = 1; + for(size_t i = 0; i < ws->getNumDims(); ++i) + { + maxClusters *= ws->getDimension(i)->getNBins()/2; + } + maxClusters /= nIterators; + if(maxClusters == 0) + { + maxClusters = ws->getNPoints(); + } + return maxClusters; + } + /** * Helper non-member to clone the input workspace * @param inWS: To clone @@ -113,6 +135,18 @@ namespace Mantid typedef std::set<DisjointPair> SetDisjointPair; typedef std::vector<EdgeIndexPair> VecEdgeIndexPair; + /** + * Free function performing the CCL implementation over a range defined by the iterator. + * + * @param iterator : Iterator giving access the the image + * @param strategy : Strategy for identifying background + * @param neighbourElements : Grid of DisjointElements the same size as the image + * @param progress : Progress object to update + * @param maxNeighbours : Maximum number of neighbours each element may have. Determined by dimensionality. + * @param startLabelId : Start label index to increment + * @param edgeIndexVec : Vector of edge index pairs. To identify elements across iterator boundaries to resolve later. + * @return + */ size_t doConnectedComponentLabeling( IMDIterator* iterator, BackgroundStrategy * const strategy, @@ -123,6 +157,7 @@ namespace Mantid ) { size_t currentLabelCount = startLabelId; + strategy->configureIterator(iterator); // Set up such things as desired Normalization. do { if(!strategy->isBackground(iterator)) @@ -205,7 +240,7 @@ namespace Mantid * @param nThreads : Optional argument of number of threads to use. */ ConnectedComponentLabeling::ConnectedComponentLabeling(const size_t& startId, const boost::optional<int> nThreads) - : m_startId(startId), m_nThreads(nThreads) + : m_startId(startId), m_nThreads(nThreads), m_logger(Kernel::Logger::get("ConnectedComponentLabeling")) { if(m_nThreads.is_initialized() && m_nThreads.get() < 0) { @@ -274,30 +309,34 @@ namespace Mantid VecElements neighbourElements(ws->getNPoints()); const size_t maxNeighbours = calculateMaxNeighbours(ws.get()); + progress.doReport("Identifying clusters"); size_t frequency = reportEvery<size_t>(10000, ws->getNPoints()); - progress.resetNumSteps(frequency, 0.0, 0.5); + progress.resetNumSteps(frequency, 0.0, 0.8); // For each process maintains pair of index from within process bounds to index outside process bounds const int nThreadsToUse = getNThreads(); + if(nThreadsToUse > 1) { std::vector<API::IMDIterator*> iterators = ws->createIterators(nThreadsToUse); - const int nthreads = getNThreads(); - std::vector<VecEdgeIndexPair> parallelEdgeVec(nthreads); + + const size_t maxClustersPossible = calculateMaxClusters(ws.get(), nThreadsToUse); + + std::vector<VecEdgeIndexPair> parallelEdgeVec(nThreadsToUse); - std::vector<std::map<size_t, boost::shared_ptr<Cluster> > > parallelClusterMapVec(nthreads); + std::vector<std::map<size_t, boost::shared_ptr<Cluster> > > parallelClusterMapVec(nThreadsToUse); // ------------- Stage One. Local CCL in parallel. + m_logger.debug("Parallel solve local CCL"); PARALLEL_FOR_NO_WSP_CHECK() - for(int i = 0; i < nthreads; ++i) + for(int i = 0; i < nThreadsToUse; ++i) { API::IMDIterator* iterator = iterators[i]; - iterator->setNormalization(NoNormalization); boost::scoped_ptr<BackgroundStrategy> strategy(baseStrategy->clone()); // local strategy VecEdgeIndexPair& edgeVec = parallelEdgeVec[i]; // local edge indexes - const size_t startLabel = m_startId + (i * ws->getNPoints()); // Ensure that label ids are totally unique within each parallel unit. + const size_t startLabel = m_startId + (i * maxClustersPossible); // Ensure that label ids are totally unique within each parallel unit. const size_t endLabel = doConnectedComponentLabeling(iterator, strategy.get(), neighbourElements, progress, maxNeighbours, startLabel, edgeVec); // Create clusters from labels. @@ -332,6 +371,7 @@ namespace Mantid } // Percolate minimum label across boundaries for indexes where there is ambiguity. + m_logger.debug("Percolate minimum label across boundaries"); std::vector<boost::shared_ptr<Cluster> > incompleteClusterVec; incompleteClusterVec.reserve(1000); std::set<size_t> usedLabels; @@ -369,7 +409,8 @@ namespace Mantid // ------------- Stage 3 In parallel, process each incomplete cluster. progress.doReport("Merging clusters across processors"); - progress.resetNumSteps(incompleteClusterVec.size(), 0.5, 0.75); + m_logger.debug("Merging clusters across processors"); + progress.resetNumSteps(incompleteClusterVec.size(), 0.8, 0.9); PARALLEL_FOR_NO_WSP_CHECK() for(int i = 0; i < static_cast<int>(incompleteClusterVec.size()); ++i) { @@ -377,13 +418,16 @@ namespace Mantid cluster->toUniformMinimum(neighbourElements); progress.report(); } - - // Now combine clusters and add the resolved clusters to the clustermap. + m_logger.debug("Remove duplicates"); + m_logger.debug() << incompleteClusterVec.size() << " clusters to reconstruct" << std::endl; + // Now combine clusters and add the resolved clusters to the clusterMap. + SetIds usedOwningClusterIds; for(size_t i = 0; i < incompleteClusterVec.size(); ++i) { const size_t label = incompleteClusterVec[i]->getLabel(); - if(!does_contain_key(clusterMap, label)) + if(usedOwningClusterIds.find(label) == usedOwningClusterIds.end()) { + usedOwningClusterIds.insert(label); clusterMap.insert(std::make_pair(label, incompleteClusterVec[i])); } else @@ -409,7 +453,6 @@ namespace Mantid // Associate the member DisjointElements with a cluster. Involves looping back over iterator. iterator->jumpTo(0); // Reset - iterator->setNormalization(NoNormalization); // TODO: check that this is a valid assumption. std::set<size_t> labelIds; do { diff --git a/Code/Mantid/Framework/Crystal/src/IntegratePeaksUsingClusters.cpp b/Code/Mantid/Framework/Crystal/src/IntegratePeaksUsingClusters.cpp index 09b701642ee..0936f5ff036 100644 --- a/Code/Mantid/Framework/Crystal/src/IntegratePeaksUsingClusters.cpp +++ b/Code/Mantid/Framework/Crystal/src/IntegratePeaksUsingClusters.cpp @@ -140,6 +140,16 @@ namespace Mantid declareProperty( new PropertyWithValue<double>("Threshold", 0, compositeValidator, Direction::Input), "Threshold signal above which to consider peaks"); + + std::vector<std::string> normalizations(3); + normalizations[0] = "NoNormalization"; + normalizations[1] = "VolumeNormalization"; + normalizations[2] = "NumEventsNormalization"; + + declareProperty("Normalization",normalizations[1], + Kernel::IValidator_sptr(new Kernel::ListValidator<std::string>(normalizations)), + "Normalization to use with Threshold. Defaults to VolumeNormalization to account for different binning."); + declareProperty(new WorkspaceProperty<IPeaksWorkspace>("OutputWorkspace", "", Direction::Output), "An output integrated peaks workspace."); declareProperty( @@ -147,6 +157,30 @@ namespace Mantid "MDHistoWorkspace containing the labeled clusters used by the algorithm."); } + + /** + * Get the normalization. For use with iterators + background strategies. + * @return Chosen normalization + */ + MDNormalization IntegratePeaksUsingClusters::getNormalization() + { + std::string normProp = getPropertyValue("Normalization"); + Mantid::API::MDNormalization normalization; + if (normProp == "NoNormalization") + { + normalization = NoNormalization; + } + else if (normProp == "VolumeNormalization") + { + normalization = VolumeNormalization; + } + else + { + normalization = NumEventsNormalization; + } + return normalization; + } + //---------------------------------------------------------------------------------------------- /** Execute the algorithm. */ @@ -178,12 +212,12 @@ namespace Mantid const double threshold = getProperty("Threshold"); // Make a background strategy for the CCL analysis to use. - HardThresholdBackground backgroundStrategy(threshold,NoNormalization); + HardThresholdBackground backgroundStrategy(threshold, this->getNormalization()); // CCL. Multi-processor version. ConnectedComponentLabeling analysis; Progress progress(this, 0, 1, 1); - // Peform CCL. + // Perform CCL. ClusterTuple clusters = analysis.executeAndFetchClusters(mdWS, &backgroundStrategy, progress); // Extract the clusters ConnectedComponentMappingTypes::ClusterMap& clusterMap = clusters.get<1>(); @@ -195,14 +229,15 @@ namespace Mantid std::map<size_t, size_t> labelsTakenByPeaks; progress.doReport("Performing Peak Integration"); - progress.resetNumSteps(peakWS->getNumberPeaks(), 0.75, 1); + g_log.information("Starting Integration"); + progress.resetNumSteps(peakWS->getNumberPeaks(), 0.9, 1); PARALLEL_FOR1(peakWS) for(int i = 0; i < peakWS->getNumberPeaks(); ++i) { PARALLEL_START_INTERUPT_REGION IPeak& peak = peakWS->getPeak(i); const V3D& peakCenterInMDFrame = peakTransform->transformPeak(peak); - const Mantid::signal_t signalValue = outHistoWS->getSignalAtVMD(peakCenterInMDFrame, NoNormalization); + const Mantid::signal_t signalValue = outHistoWS->getSignalAtVMD(peakCenterInMDFrame, NoNormalization); // No normalization when extracting label ids! if(boost::math::isnan(signalValue)) { g_log.warning() << "Warning: image for integration is off edge of detector for peak " << i << std::endl; diff --git a/Code/Mantid/Framework/Crystal/test/ConnectedComponentLabelingTest.h b/Code/Mantid/Framework/Crystal/test/ConnectedComponentLabelingTest.h index 516972f75e3..d3ee6c2a55f 100644 --- a/Code/Mantid/Framework/Crystal/test/ConnectedComponentLabelingTest.h +++ b/Code/Mantid/Framework/Crystal/test/ConnectedComponentLabelingTest.h @@ -106,7 +106,7 @@ public: MockBackgroundStrategy mockStrategy; EXPECT_CALL(mockStrategy, isBackground(_)).Times(static_cast<int>(inWS->getNPoints())*2).WillRepeatedly(Return(false));// A filter that passes everything. - + EXPECT_CALL(mockStrategy, configureIterator(_)).Times(1); size_t labelingId = 1; int multiThreaded = 1; ConnectedComponentLabeling ccl(labelingId, multiThreaded); @@ -128,7 +128,7 @@ public: MockBackgroundStrategy mockStrategy; EXPECT_CALL(mockStrategy, isBackground(_)).Times(static_cast<int>(inWS->getNPoints())*2).WillRepeatedly(Return(false));// A filter that passes everything. - + EXPECT_CALL(mockStrategy, configureIterator(_)).Times(1); size_t labelingId = 2; int multiThreaded = 1; ConnectedComponentLabeling ccl(labelingId, multiThreaded); @@ -151,7 +151,7 @@ public: IMDHistoWorkspace_sptr inWS = MDEventsTestHelper::makeFakeMDHistoWorkspace(1, 1, 6); // Makes a 1 by 6 md ws with identical signal values. MockBackgroundStrategy mockStrategy; - + EXPECT_CALL(mockStrategy, configureIterator(_)).Times(1); /* * We use the is background strategy to set up two disconected blocks for us. * */ @@ -192,7 +192,7 @@ public: IMDHistoWorkspace_sptr inWS = MDEventsTestHelper::makeFakeMDHistoWorkspace(1, 1, 5); // Makes a 1 by 5 md ws with identical signal values. MockBackgroundStrategy mockStrategy; - + EXPECT_CALL(mockStrategy, configureIterator(_)).Times(1); /* * We use the is background strategy to set up three disconected blocks for us. * */ @@ -230,6 +230,7 @@ public: IMDHistoWorkspace_sptr inWS = MDEventsTestHelper::makeFakeMDHistoWorkspace(1, 2, 4); // Makes a 4 by 4 grid. MockBackgroundStrategy mockStrategy; + EXPECT_CALL(mockStrategy, configureIterator(_)).Times(1); EXPECT_CALL(mockStrategy, isBackground(_)).WillRepeatedly(Return(false));// Nothing is treated as background size_t labelingId = 1; @@ -249,6 +250,7 @@ public: IMDHistoWorkspace_sptr inWS = MDEventsTestHelper::makeFakeMDHistoWorkspace(1, 2, 3); // Makes a 3 by 3 grid. MockBackgroundStrategy mockStrategy; + EXPECT_CALL(mockStrategy, configureIterator(_)).Times(1); /* * We treat alternate cells as background, which actually should result in a single object. Think of a chequered flag. @@ -292,6 +294,7 @@ public: IMDHistoWorkspace_sptr inWS = MDEventsTestHelper::makeFakeMDHistoWorkspace(1, 3, 3); // Makes a 3 by 3 by 3 grid. All populated with a single value. MockBackgroundStrategy mockStrategy; + EXPECT_CALL(mockStrategy, configureIterator(_)).Times(1); /* * We treat alternate cells as background, which actually should result in a single object. Think of a chequered flag. diff --git a/Code/Mantid/Framework/MDEvents/inc/MantidMDEvents/MDHistoWorkspace.h b/Code/Mantid/Framework/MDEvents/inc/MantidMDEvents/MDHistoWorkspace.h index 2406ab0d3e2..d0cd3edb867 100644 --- a/Code/Mantid/Framework/MDEvents/inc/MantidMDEvents/MDHistoWorkspace.h +++ b/Code/Mantid/Framework/MDEvents/inc/MantidMDEvents/MDHistoWorkspace.h @@ -400,6 +400,10 @@ namespace MDEvents /// Get the size of an element in the HistoWorkspace. static size_t sizeOfElement(); + + /// Virtual constructor. + boost::shared_ptr<IMDHistoWorkspace> cloneShape() const; + private: void initVertexesArray(); diff --git a/Code/Mantid/Framework/MDEvents/src/MDHistoWorkspace.cpp b/Code/Mantid/Framework/MDEvents/src/MDHistoWorkspace.cpp index 8fe513c57d6..669866466d9 100644 --- a/Code/Mantid/Framework/MDEvents/src/MDHistoWorkspace.cpp +++ b/Code/Mantid/Framework/MDEvents/src/MDHistoWorkspace.cpp @@ -1308,6 +1308,11 @@ namespace MDEvents return (3 * sizeof(signal_t) ) + sizeof(bool); } + boost::shared_ptr<IMDHistoWorkspace> MDHistoWorkspace::clone() const; + { + return boost::make_ + } + } // namespace Mantid } // namespace MDEvents -- GitLab