Commit d5acbb87 by Peterson, Peter

### Central utility for statistics is in place. Now to have other code use it. Refs #1933.

parent 7bfa81bf
 #ifndef STATISTICS_H_ #define STATISTICS_H_ #include #include "MantidKernel/System.h" namespace Mantid { namespace Kernel { struct Statistics { /// Minimum value double minimum; /// Maximum value double maximum; /// Mean value double mean; /// Median value double median; /// standard_deviation of the values double standard_deviation; }; template Statistics getStatistics(const std::vector& data, const bool sorted=false); } // namespace Kernel } // namespace Mantid #endif /* STATISTICS_H_ */
 #include #include #include #include #include #include #include "MantidKernel/Statistics.h" namespace Mantid { namespace Kernel { using std::string; using std::vector; /** * Generate a Statistics object where all of the values are NaN. This is a good initial default. */ Statistics getNanStatistics() { double nan = std::numeric_limits::quiet_NaN(); Statistics stats; stats.minimum = nan; stats.maximum = nan; stats.mean = nan; stats.median = nan; stats.standard_deviation = nan; return stats; } /** * There are enough special cases in determining the median where it useful to * put it in a single function. */ template double getMedian(const vector& data, const size_t num_data) { if (num_data == 1) return static_cast(*(data.begin())); bool is_even = ((num_data % 2) == 0); if (is_even) { double left = static_cast(*(data.begin() + num_data/2 - 1)); double right = static_cast(*(data.begin() + num_data/2)); return (left + right) / 2.; } else { return static_cast(*(data.begin() + num_data/2)); } } /** * Determine the statistics for a vector of data. If it is sorted then let the * function know so it won't make a copy of the data for determining the median. */ template Statistics getStatistics(const vector& data, const bool sorted) { Statistics stats = getNanStatistics(); size_t num_data = data.size(); // chache since it is frequently used // calculate the mean stats.mean = std::accumulate(data.begin(), data.end(), 0., std::plus()); stats.mean /= (static_cast(num_data)); // calculate the standard deviation, min, max stats.minimum = stats.mean; stats.maximum = stats.mean; double stddev = 0.; double temp; typename vector::const_iterator it = data.begin(); for ( ; it != data.end(); ++it) { temp = static_cast(*it); stddev += ((temp-stats.mean)* (temp-stats.mean)); if (temp > stats.maximum) stats.maximum = temp; if (temp < stats.minimum) stats.minimum = temp; } stats.standard_deviation = sqrt(stddev / (static_cast(num_data))); // calculate the median if (sorted) { stats.median = getMedian(data, num_data); } else { vector temp(data.begin(), data.end()); std::nth_element(temp.begin(), temp.begin()+num_data/2, temp.end()); stats.median = getMedian(temp, num_data); } return stats; } /// Getting statistics of a string array should just give a bunch of NaNs template<> Statistics getStatistics(const vector& data, const bool sorted) { return getNanStatistics(); } // -------------------------- concrete instantiations template DLLExport Statistics getStatistics(const vector &, const bool); template DLLExport Statistics getStatistics(const vector &, const bool); } // namespace Kernel } // namespace Mantid
 #ifndef STATISTICSTEST_H_ #define STATISTICSTEST_H_ #include #include #include #include "MantidKernel/Statistics.h" using namespace Mantid::Kernel; using std::string; using std::vector; class StatisticsTest : public CxxTest::TestSuite { public: void testDoubleOdd() { vector data; data.push_back(17.2); data.push_back(18.1); data.push_back(16.5); data.push_back(18.3); data.push_back(12.6); Statistics stats = getStatistics(data); TS_ASSERT_EQUALS(stats.mean, 16.54); TS_ASSERT_DELTA(stats.standard_deviation, 2.0732, 0.0001); TS_ASSERT_EQUALS(stats.minimum, 12.6); TS_ASSERT_EQUALS(stats.maximum, 18.3); TS_ASSERT_EQUALS(stats.median, 17.2); } void testDoubleSingle() { vector data; data.push_back(42.); Statistics stats = getStatistics(data); TS_ASSERT_EQUALS(stats.mean, 42.); TS_ASSERT_EQUALS(stats.standard_deviation, 0.); TS_ASSERT_EQUALS(stats.minimum, 42.); TS_ASSERT_EQUALS(stats.maximum, 42.); TS_ASSERT_EQUALS(stats.median, 42.); } void testInt32Even() { vector data; data.push_back(1); data.push_back(2); data.push_back(3); data.push_back(4); data.push_back(5); data.push_back(6); Statistics stats = getStatistics(data); TS_ASSERT_EQUALS(stats.mean, 3.5); TS_ASSERT_DELTA(stats.standard_deviation, 1.7078, 0.0001); TS_ASSERT_EQUALS(stats.minimum, 1.); TS_ASSERT_EQUALS(stats.maximum, 6.); TS_ASSERT_EQUALS(stats.median, 3.5); } bool my_isnan(const double number) { return number != number; } void testString() { vector data; data.push_back("hi there"); Statistics stats = getStatistics(data); TS_ASSERT(my_isnan(stats.mean)); TS_ASSERT(my_isnan(stats.standard_deviation)); TS_ASSERT(my_isnan(stats.minimum)); TS_ASSERT(my_isnan(stats.maximum)); TS_ASSERT(my_isnan(stats.median)); } }; #endif // STATISTICSTEST_H_
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!