Commit 018d1584 authored by Lynch, Vickie's avatar Lynch, Vickie
Browse files

Refs #5316 Zscores in statistics

parent 67e95e9d
......@@ -455,47 +455,55 @@ namespace Mantid
{
double centre = peakslist->getRef<double>("centre",i);
double width = peakslist->getRef<double>("width",i);
if (centre <= minD || centre >= maxD)
double chi2 = peakslist->getRef<double>("chi2",i);
// Get references to the data
peakPosFitted.push_back(centre);
peakWidFitted.push_back(width);
chisq.push_back(chi2);
}
for (size_t i = 0; i < peakslist->rowCount(); ++i)
{
if (peakPosFitted[i] <= minD || peakPosFitted[i] >= maxD)
{
banned.push_back(i);
continue;
}
double chi2 = peakslist->getRef<double>("chi2",i);
if (chi2 > m_maxChiSq)
if (chisq[i] > m_maxChiSq)
{
banned.push_back(i);
continue;
}
// Get references to the data
peakPosFitted.push_back(centre);
peakWidFitted.push_back(width);
chisq.push_back(chi2);
}
// delete banned peaks
for (std::vector<size_t>::const_reverse_iterator it = banned.rbegin(); it != banned.rend(); ++it)
{
peakPosToFit.erase(peakPosToFit.begin() + (*it));
if (peakPosFitted.size() > 2) Outliers(peakWidFitted, peakPosFitted, chisq, peakPosToFit);
nparams = peakPosFitted.size();
return;
}
void GetDetOffsetsMultiPeaks::Outliers(std::vector<double>& data, std::vector<double>& data2, std::vector<double>& data3, std::vector<double>& data4)
{
Statistics stats = getStatistics(data);
if(stats.standard_deviation == 0.)return;
for (int i = static_cast<int>(data.size())-1; i>=0; i--)
peakPosFitted.erase(peakPosFitted.begin() + (*it));
peakWidFitted.erase(peakWidFitted.begin() + (*it));
chisq.erase(chisq.begin() + (*it));
}
banned.clear();
std::vector<double> Zscore = getZscore(peakWidFitted);
for (size_t i = 0; i < peakWidFitted.size(); ++i)
{
double zscore = std::fabs((data[i] - stats.mean) / stats.standard_deviation);
if (zscore > 1.0)
if (Zscore[i] > 1.0)
{
data.erase(data.begin()+i);
data2.erase(data2.begin()+i);
data3.erase(data3.begin()+i);
data4.erase(data4.begin()+i);
banned.push_back(i);
continue;
}
}
// delete banned peaks
for (std::vector<size_t>::const_reverse_iterator it = banned.rbegin(); it != banned.rend(); ++it)
{
peakPosToFit.erase(peakPosToFit.begin() + (*it));
peakPosFitted.erase(peakPosFitted.begin() + (*it));
peakWidFitted.erase(peakWidFitted.begin() + (*it));
chisq.erase(chisq.begin() + (*it));
}
nparams = peakPosFitted.size();
return;
}
} // namespace Algorithm
} // namespace Mantid
......@@ -25,6 +25,10 @@ struct Statistics
template<typename TYPE>
Statistics getStatistics(const std::vector<TYPE>& data, const bool sorted=false);
template<typename TYPE>
std::vector<double> getZscore(const std::vector<TYPE>& data, const bool sorted=false);
template<typename TYPE>
std::vector<double> getModifiedZscore(const std::vector<TYPE>& data, const bool sorted=false);
} // namespace Kernel
} // namespace Mantid
......
......@@ -86,6 +86,57 @@ namespace Mantid
}
}
}
/**
* There are enough special cases in determining the Z score where it useful to
* put it in a single function.
*/
template<typename TYPE>
std::vector<double> getZscore(const vector<TYPE>& data, const bool sorted)
{
if (data.size() < 3)
{
std::vector<double>Zscore(data.size(),0.);
return Zscore;
}
std::vector<double> Zscore;
double tmp;
Statistics stats = getStatistics(data, sorted);
if(stats.standard_deviation == 0.)return Zscore;
typename vector<TYPE>::const_iterator it = data.begin();
for (; it != data.end(); ++it)
{
tmp = static_cast<double> (*it);
Zscore.push_back(fabs((tmp - stats.mean) / stats.standard_deviation));
}
return Zscore;
}
/**
* There are enough special cases in determining the modified Z score where it useful to
* put it in a single function.
*/
template<typename TYPE>
std::vector<double> getModifiedZscore(const vector<TYPE>& data, const bool sorted)
{
std::vector<double>Zscore, MADvec;
double tmp;
size_t num_data = data.size(); // cache since it is frequently used
double median = getMedian(data, num_data, sorted);
typename vector<TYPE>::const_iterator it = data.begin();
for (; it != data.end(); ++it)
{
tmp = static_cast<double> (*it);
MADvec.push_back(fabs(tmp - median));
}
double MAD = getMedian(MADvec, num_data, sorted);
MADvec.empty();
it = data.begin();
for (; it != data.end(); ++it)
{
tmp = static_cast<double> (*it);
Zscore.push_back(0.6745*fabs((tmp - median) / MAD));
}
return Zscore;
}
/**
* Determine the statistics for a vector of data. If it is sorted then let the
......@@ -138,9 +189,14 @@ namespace Mantid
return getNanStatistics();
}
// -------------------------- concrete instantiations
template DLLExport Statistics getStatistics<double> (const vector<double> &, const bool);
template DLLExport Statistics getStatistics<int32_t> (const vector<int32_t> &, const bool);
template DLLExport std::vector<double> getZscore<double> (const vector<double> &, const bool);
template DLLExport std::vector<double> getZscore<int32_t> (const vector<int32_t> &, const bool);
template DLLExport std::vector<double> getModifiedZscore<double> (const vector<double> &, const bool);
template DLLExport std::vector<double> getModifiedZscore<int32_t> (const vector<int32_t> &, const bool);
} // namespace Kernel
} // namespace Mantid
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment