Commit d5acbb87 authored by Peterson, Peter's avatar Peterson, Peter
Browse files

Central utility for statistics is in place. Now to have other code use it. Refs #1933.

parent 7bfa81bf
#ifndef STATISTICS_H_
#define STATISTICS_H_
#include <vector>
#include "MantidKernel/System.h"
namespace Mantid
{
namespace Kernel
{
struct Statistics
{
/// Minimum value
double minimum;
/// Maximum value
double maximum;
/// Mean value
double mean;
/// Median value
double median;
/// standard_deviation of the values
double standard_deviation;
};
template<typename TYPE>
Statistics getStatistics(const std::vector<TYPE>& data, const bool sorted=false);
} // namespace Kernel
} // namespace Mantid
#endif /* STATISTICS_H_ */
#include <algorithm>
#include <functional>
#include <limits>
#include <math.h>
#include <numeric>
#include <string>
#include "MantidKernel/Statistics.h"
namespace Mantid
{
namespace Kernel
{
using std::string;
using std::vector;
/**
* Generate a Statistics object where all of the values are NaN. This is a good initial default.
*/
Statistics getNanStatistics()
{
double nan = std::numeric_limits<double>::quiet_NaN();
Statistics stats;
stats.minimum = nan;
stats.maximum = nan;
stats.mean = nan;
stats.median = nan;
stats.standard_deviation = nan;
return stats;
}
/**
* There are enough special cases in determining the median where it useful to
* put it in a single function.
*/
template<typename TYPE>
double getMedian(const vector<TYPE>& data, const size_t num_data)
{
if (num_data == 1)
return static_cast<double>(*(data.begin()));
bool is_even = ((num_data % 2) == 0);
if (is_even) {
double left = static_cast<double>(*(data.begin() + num_data/2 - 1));
double right = static_cast<double>(*(data.begin() + num_data/2));
return (left + right) / 2.;
}
else {
return static_cast<double>(*(data.begin() + num_data/2));
}
}
/**
* Determine the statistics for a vector of data. If it is sorted then let the
* function know so it won't make a copy of the data for determining the median.
*/
template<typename TYPE>
Statistics getStatistics(const vector<TYPE>& data, const bool sorted)
{
Statistics stats = getNanStatistics();
size_t num_data = data.size(); // chache since it is frequently used
// calculate the mean
stats.mean = std::accumulate(data.begin(), data.end(), 0., std::plus<double>());
stats.mean /= (static_cast<double>(num_data));
// calculate the standard deviation, min, max
stats.minimum = stats.mean;
stats.maximum = stats.mean;
double stddev = 0.;
double temp;
typename vector<TYPE>::const_iterator it = data.begin();
for ( ; it != data.end(); ++it)
{
temp = static_cast<double>(*it);
stddev += ((temp-stats.mean)* (temp-stats.mean));
if (temp > stats.maximum)
stats.maximum = temp;
if (temp < stats.minimum)
stats.minimum = temp;
}
stats.standard_deviation = sqrt(stddev / (static_cast<double>(num_data)));
// calculate the median
if (sorted) {
stats.median = getMedian(data, num_data);
}
else {
vector<TYPE> temp(data.begin(), data.end());
std::nth_element(temp.begin(), temp.begin()+num_data/2, temp.end());
stats.median = getMedian(temp, num_data);
}
return stats;
}
/// Getting statistics of a string array should just give a bunch of NaNs
template<>
Statistics getStatistics<string>(const vector<string>& data, const bool sorted)
{
return getNanStatistics();
}
// -------------------------- concrete instantiations
template DLLExport Statistics getStatistics<double>(const vector<double> &, const bool);
template DLLExport Statistics getStatistics<int32_t>(const vector<int32_t> &, const bool);
} // namespace Kernel
} // namespace Mantid
#ifndef STATISTICSTEST_H_
#define STATISTICSTEST_H_
#include <cxxtest/TestSuite.h>
#include <vector>
#include <string>
#include "MantidKernel/Statistics.h"
using namespace Mantid::Kernel;
using std::string;
using std::vector;
class StatisticsTest : public CxxTest::TestSuite
{
public:
void testDoubleOdd()
{
vector<double> data;
data.push_back(17.2);
data.push_back(18.1);
data.push_back(16.5);
data.push_back(18.3);
data.push_back(12.6);
Statistics stats = getStatistics(data);
TS_ASSERT_EQUALS(stats.mean, 16.54);
TS_ASSERT_DELTA(stats.standard_deviation, 2.0732, 0.0001);
TS_ASSERT_EQUALS(stats.minimum, 12.6);
TS_ASSERT_EQUALS(stats.maximum, 18.3);
TS_ASSERT_EQUALS(stats.median, 17.2);
}
void testDoubleSingle()
{
vector<double> data;
data.push_back(42.);
Statistics stats = getStatistics(data);
TS_ASSERT_EQUALS(stats.mean, 42.);
TS_ASSERT_EQUALS(stats.standard_deviation, 0.);
TS_ASSERT_EQUALS(stats.minimum, 42.);
TS_ASSERT_EQUALS(stats.maximum, 42.);
TS_ASSERT_EQUALS(stats.median, 42.);
}
void testInt32Even()
{
vector<int32_t> data;
data.push_back(1);
data.push_back(2);
data.push_back(3);
data.push_back(4);
data.push_back(5);
data.push_back(6);
Statistics stats = getStatistics(data);
TS_ASSERT_EQUALS(stats.mean, 3.5);
TS_ASSERT_DELTA(stats.standard_deviation, 1.7078, 0.0001);
TS_ASSERT_EQUALS(stats.minimum, 1.);
TS_ASSERT_EQUALS(stats.maximum, 6.);
TS_ASSERT_EQUALS(stats.median, 3.5);
}
bool my_isnan(const double number)
{
return number != number;
}
void testString()
{
vector<string> data;
data.push_back("hi there");
Statistics stats = getStatistics(data);
TS_ASSERT(my_isnan(stats.mean));
TS_ASSERT(my_isnan(stats.standard_deviation));
TS_ASSERT(my_isnan(stats.minimum));
TS_ASSERT(my_isnan(stats.maximum));
TS_ASSERT(my_isnan(stats.median));
}
};
#endif // STATISTICSTEST_H_
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment