Commit a5d4b236 authored by David Fairbrother's avatar David Fairbrother
Browse files

Remove prem-optimisation passing size into median

The compiler potentially cannot make assumptions about the code and it
causes cppcheck to have a wobble since we "could" deref an invalid
iterator if we used it incorrectly
parent 4f1fa389
......@@ -70,8 +70,7 @@ template <typename TYPE> std::vector<double> getZscore(const std::vector<TYPE> &
template <typename TYPE>
std::vector<double> getWeightedZscore(const std::vector<TYPE> &data, const std::vector<TYPE> &weights);
/// Return the modified Z score values for a dataset
template <typename TYPE>
std::vector<double> getModifiedZscore(const std::vector<TYPE> &data, const bool sorted = false);
template <typename TYPE> std::vector<double> getModifiedZscore(const std::vector<TYPE> &data);
/// Return the R-factors (Rwp) of a diffraction pattern data
Rfactor MANTID_KERNEL_DLL getRFactor(const std::vector<double> &obsI, const std::vector<double> &calI,
const std::vector<double> &obsE);
......
......@@ -48,45 +48,30 @@ Statistics getNanStatistics() {
* There are enough special cases in determining the median where it useful to
* put it in a single function.
*/
template <typename TYPE> double getMedian(const vector<TYPE> &data, const size_t num_data, const bool sorted) {
if (num_data == 1)
return static_cast<double>(*(data.begin()));
template <typename TYPE> double getMedian(const vector<TYPE> &data) {
const size_t size = data.size();
if (size == 1)
return static_cast<double>(data[0]);
const bool isSorted = std::is_sorted(data.cbegin(), data.cend());
std::vector<TYPE> tmpSortedData;
auto sortedDataRef = std::ref(data);
if (!isSorted) {
tmpSortedData = data;
std::sort(tmpSortedData.begin(), tmpSortedData.end());
sortedDataRef = std::ref(std::as_const(tmpSortedData));
}
bool is_even = ((num_data & 1) == 0);
const bool is_even = (size % 2 == 0);
double retVal = 0;
if (is_even) {
double left = 0.0;
double right = 0.0;
if (sorted) {
// Just get the centre two elements.
left = static_cast<double>(*(data.begin() + num_data / 2 - 1));
right = static_cast<double>(*(data.begin() + num_data / 2));
} else {
// If the data is not sorted, make a copy we can mess with
vector<TYPE> temp(data.begin(), data.end());
// Get what the centre two elements should be...
std::nth_element(temp.begin(), temp.begin() + num_data / 2 - 1, temp.end());
left = static_cast<double>(*(temp.begin() + num_data / 2 - 1));
std::nth_element(temp.begin(), temp.begin() + num_data / 2, temp.end());
right = static_cast<double>(*(temp.begin() + num_data / 2));
}
// return the average
return (left + right) / 2.;
} else
// Odd number
{
if (sorted) {
// If sorted and odd, just return the centre value
return static_cast<double>(*(data.begin() + num_data / 2));
} else {
// If the data is not sorted, make a copy we can mess with
vector<TYPE> temp(data.begin(), data.end());
// Make sure the centre value is in the correct position
std::nth_element(temp.begin(), temp.begin() + num_data / 2, temp.end());
// Now return the centre value
return static_cast<double>(*(temp.begin() + num_data / 2));
}
const auto left = static_cast<double>(sortedDataRef.get()[size / 2 - 1]);
const auto right = static_cast<double>(sortedDataRef.get()[size / 2]);
retVal = (left + right) / 2;
} else {
retVal = static_cast<double>(sortedDataRef.get()[size / 2]);
}
return retVal;
}
/**
......@@ -147,20 +132,19 @@ template <typename TYPE> std::vector<double> getWeightedZscore(const vector<TYPE
* useful to
* put it in a single function.
*/
template <typename TYPE> std::vector<double> getModifiedZscore(const vector<TYPE> &data, const bool sorted) {
template <typename TYPE> std::vector<double> getModifiedZscore(const vector<TYPE> &data) {
if (data.size() < 3) {
std::vector<double> Zscore(data.size(), 0.);
return Zscore;
}
std::vector<double> MADvec;
double tmp;
size_t num_data = data.size(); // cache since it is frequently used
double median = getMedian(data, num_data, sorted);
double median = getMedian(data);
for (auto it = data.cbegin(); it != data.cend(); ++it) {
tmp = static_cast<double>(*it);
MADvec.emplace_back(fabs(tmp - median));
}
double MAD = getMedian(MADvec, num_data, sorted);
double MAD = getMedian(MADvec);
if (MAD == 0.) {
std::vector<double> Zscore(data.size(), 0.);
return Zscore;
......@@ -182,8 +166,7 @@ template <typename TYPE> std::vector<double> getModifiedZscore(const vector<TYPE
*/
template <typename TYPE> Statistics getStatistics(const vector<TYPE> &data, const unsigned int flags) {
Statistics statistics = getNanStatistics();
size_t num_data = data.size(); // cache since it is frequently used
if (num_data == 0) { // don't do anything
if (data.empty()) { // don't do anything
return statistics;
}
// calculate the mean if this or the stddev is requested
......@@ -216,7 +199,7 @@ template <typename TYPE> Statistics getStatistics(const vector<TYPE> &data, cons
// calculate the median if requested
if (flags & StatOptions::Median) {
statistics.median = getMedian(data, num_data, flags & StatOptions::SortedData);
statistics.median = getMedian(data);
}
return statistics;
......@@ -421,7 +404,7 @@ std::vector<double> getMomentsAboutMean(const std::vector<TYPE> &x, const std::v
template MANTID_KERNEL_DLL Statistics getStatistics<TYPE>(const vector<TYPE> &, const unsigned int); \
template MANTID_KERNEL_DLL std::vector<double> getZscore<TYPE>(const vector<TYPE> &); \
template MANTID_KERNEL_DLL std::vector<double> getWeightedZscore<TYPE>(const vector<TYPE> &, const vector<TYPE> &); \
template MANTID_KERNEL_DLL std::vector<double> getModifiedZscore<TYPE>(const vector<TYPE> &, const bool); \
template MANTID_KERNEL_DLL std::vector<double> getModifiedZscore<TYPE>(const vector<TYPE> &); \
template MANTID_KERNEL_DLL std::vector<double> getMomentsAboutOrigin<TYPE>( \
const std::vector<TYPE> &x, const std::vector<TYPE> &y, const int maxMoment); \
template MANTID_KERNEL_DLL std::vector<double> getMomentsAboutMean<TYPE>( \
......
......@@ -54,9 +54,9 @@ public:
TS_ASSERT_EQUALS(stats.median, 17.2);
}
void test_Unsorted_Data_With_Sorted_Flag_Gives_Expected_Incorrect_Result_For_Median() {
void test_Unsorted_Data_With_Sorted_Flag_Gives_Expected_Result_For_Median() {
vector<double> data;
data.emplace_back(17.2);
data.emplace_back(17.2); // Median value
data.emplace_back(18.1);
data.emplace_back(16.5);
data.emplace_back(18.3);
......@@ -68,7 +68,7 @@ public:
TS_ASSERT(std::isnan(stats.standard_deviation));
TS_ASSERT(std::isnan(stats.minimum));
TS_ASSERT(std::isnan(stats.maximum));
TS_ASSERT_EQUALS(stats.median, 16.5);
TS_ASSERT_EQUALS(stats.median, 17.2);
}
void test_Doubles_With_Corrected_StdDev_Calculates_Mean() {
......
......@@ -144,11 +144,12 @@ std::vector<double> getZscoreNumpyDeprecated(const NDArray &data, const bool sor
* arrays,
*/
std::vector<double> getModifiedZscoreNumpy(const NDArray &data, const bool sorted = false) {
UNUSED_ARG(sorted) // We explicitly check in the kernel now
using Converters::NDArrayToVector;
using Mantid::Kernel::getModifiedZscore;
if (isFloatArray(data.ptr())) {
return getModifiedZscore(NDArrayToVector<double>(data)(), sorted);
return getModifiedZscore(NDArrayToVector<double>(data)());
} else {
throw UnknownDataType();
}
......@@ -254,7 +255,7 @@ void export_Statistics() {
.def("getZscore", &getZscoreNumpy, arg("data"), "Determine the Z score for an array of data")
.def("getZscore", &getZscoreNumpyDeprecated, (arg("data"), arg("sorted")),
"Determine the Z score for an array of "
"data (deprecated sorted argument)")
"data (deprecated + ignored sorted argument)")
.staticmethod("getZscore")
.def("getModifiedZscore", &getModifiedZscoreNumpy,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment