Statistics.cpp 10.8 KB
Newer Older
1
2
3
// Mantid Repository : https://github.com/mantidproject/mantid
//
// Copyright © 2018 ISIS Rutherford Appleton Laboratory UKRI,
4
5
//   NScD Oak Ridge National Laboratory, European Spallation Source,
//   Institut Laue - Langevin & CSNS, Institute of High Energy Physics, CAS
6
// SPDX - License - Identifier: GPL - 3.0 +
7
#include "MantidKernel/Statistics.h"
8
#include "MantidKernel/WarningSuppressions.h"
9
#include "MantidPythonInterface/core/Converters/NDArrayToVector.h"
10
#include "MantidPythonInterface/core/NDArray.h"
11
#include "MantidPythonInterface/core/Policies/VectorToNumpy.h"
12
13
14
15

#include <boost/python/class.hpp>
#include <boost/python/def.hpp>
#include <boost/python/overloads.hpp>
16
#include <boost/python/return_value_policy.hpp>
17
18
#include <boost/python/scope.hpp>

19
20
// See
// http://docs.scipy.org/doc/numpy/reference/c-api.array.html#PY_ARRAY_UNIQUE_SYMBOL
21
22
23
24
25
26
27
28
#define PY_ARRAY_UNIQUE_SYMBOL KERNEL_ARRAY_API
#define NO_IMPORT_ARRAY
#include <numpy/arrayobject.h>

using Mantid::Kernel::Statistics;
using namespace Mantid::PythonInterface;
using namespace boost::python;

29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
namespace {
///@cond

// Dummy class used to define Stats "namespace" in python
class Stats {};

// For all methods below we have to extract specific types from Python to C++.
// We choose to support only Python float arrays (C++ double)

/**
 * Return true if the array contains float data. Equivalent to
 * PyArray_ISFLOAT but uses correct constness for numpy >= 1.7
 * @param obj A pointer to a numpy array as a plain Python object
 * @return True if the array contains float data
 */
bool isFloatArray(PyObject *obj) {
#if NPY_API_VERSION >= 0x00000007 // 1.7
  return PyArray_ISFLOAT((const PyArrayObject *)obj);
#else
  return PyArray_ISFLOAT((PyArrayObject *)obj);
#endif
}
51

52
53
54
55
56
57
58
59
60
61
/**
 * Return true if the two arrays contains the same type. Equivalent
 * to (PyARRAY_TYPE == PyArray_TYPE) but ses correct constness
 * for numpy >= 1.7
 * @param first A pointer to a numpy array as a plain Python object
 * @param second A pointer to a numpy array as a plain Python object
 * @return True if the array contains float data
 */
bool typesEqual(PyObject *first, PyObject *second) {
#if NPY_API_VERSION >= 0x00000007 // 1.7
62
63
  const auto *firstArray = reinterpret_cast<const PyArrayObject *>(first);
  const auto *secondArray = reinterpret_cast<const PyArrayObject *>(second);
64
65
66
67
68
69
#else
  PyArrayObject *firstArray = (PyArrayObject *)first;
  PyArrayObject *secondArray = (PyArrayObject *)second;
#endif
  return PyArray_TYPE(firstArray) != PyArray_TYPE(secondArray);
}
70

71
72
73
74
/// Custom exception type for unknown data type
class UnknownDataType : public std::invalid_argument {
public:
  UnknownDataType()
75
      : std::invalid_argument("Unknown datatype. Currently only arrays of "
76
77
78
79
80
81
82
83
                              "Python floats are supported ") {}
};

//============================ getStatistics
//============================================

/**
 * Proxy for @see Mantid::Kernel::getStatistics so that it can accept numpy
84
85
86
 * arrays
 * @param data Input data
 * @param sorted A boolean indicating whether the data is sorted
87
 */
88
Statistics getStatisticsNumpy(const NDArray &data, const bool sorted = false) {
89
  using Converters::NDArrayToVector;
LamarMoore's avatar
LamarMoore committed
90
  using Mantid::Kernel::getStatistics;
91
  using Mantid::Kernel::StatOptions;
92
93

  if (isFloatArray(data.ptr())) {
94
95
96
97
    unsigned int flags = StatOptions::AllStats;
    if (sorted)
      flags |= StatOptions::SortedData;
    return getStatistics(NDArrayToVector<double>(data)(), flags);
98
99
  } else {
    throw UnknownDataType();
100
  }
101
}
102

Samuel Jackson's avatar
Samuel Jackson committed
103
GNU_DIAG_OFF("unused-local-typedef")
104
105
// Ignore -Wconversion warnings coming from boost::python
// Seen with GCC 7.1.1 and Boost 1.63.0
Samuel Jackson's avatar
Samuel Jackson committed
106
GNU_DIAG_OFF("conversion")
107

108
// Define an overload to handle the default argument
Samuel Jones's avatar
Samuel Jones committed
109
BOOST_PYTHON_FUNCTION_OVERLOADS(getStatisticsOverloads, getStatisticsNumpy, 1, 2)
Samuel Jackson's avatar
Samuel Jackson committed
110
GNU_DIAG_ON("conversion")
Samuel Jackson's avatar
Samuel Jackson committed
111
GNU_DIAG_ON("unused-local-typedef")
112
//============================ Z score
113
114
115
//============================================

/**
116
117
 * The implementation for getZscoreNumpy for using numpy arrays
 * @param zscoreFunc A function pointer to the required moments function
118
119
 * @param data Numpy array of data
 */
120
std::vector<double> getZscoreNumpy(const NDArray &data) {
121
  using Converters::NDArrayToVector;
122
  using Mantid::Kernel::getZscore;
123
124

  if (isFloatArray(data.ptr())) {
125
    return getZscore(NDArrayToVector<double>(data)());
126
127
  } else {
    throw UnknownDataType();
128
  }
129
}
130

131
132
/**
 * Proxy for @see Mantid::Kernel::getZscore so that it can accept numpy arrays,
133
134
 * @param data The input array
 * @param sorted True if the data is sorted (deprecated)
135
 */
Samuel Jones's avatar
Samuel Jones committed
136
std::vector<double> getZscoreNumpyDeprecated(const NDArray &data, const bool sorted) {
137
  UNUSED_ARG(sorted);
Samuel Jones's avatar
Samuel Jones committed
138
  PyErr_Warn(PyExc_DeprecationWarning, "getZScore no longer requires the second sorted argument.");
139
  return getZscoreNumpy(data);
140
141
142
143
144
145
}

/**
 * Proxy for @see Mantid::Kernel::getModifiedZscore so that it can accept numpy
 * arrays,
 */
Samuel Jones's avatar
Samuel Jones committed
146
std::vector<double> getModifiedZscoreNumpy(const NDArray &data, const bool sorted = false) {
147
  UNUSED_ARG(sorted) // We explicitly check in the kernel now
148
  using Converters::NDArrayToVector;
LamarMoore's avatar
LamarMoore committed
149
  using Mantid::Kernel::getModifiedZscore;
150
151

  if (isFloatArray(data.ptr())) {
152
    return getModifiedZscore(NDArrayToVector<double>(data)());
153
154
155
  } else {
    throw UnknownDataType();
  }
156
}
157

Samuel Jackson's avatar
Samuel Jackson committed
158
GNU_DIAG_OFF("unused-local-typedef")
159
160
// Ignore -Wconversion warnings coming from boost::python
// Seen with GCC 7.1.1 and Boost 1.63.0
Samuel Jackson's avatar
Samuel Jackson committed
161
GNU_DIAG_OFF("conversion")
162

163
// Define an overload to handle the default argument
Samuel Jones's avatar
Samuel Jones committed
164
BOOST_PYTHON_FUNCTION_OVERLOADS(getModifiedZscoreOverloads, getModifiedZscoreNumpy, 1, 2)
Samuel Jackson's avatar
Samuel Jackson committed
165
GNU_DIAG_ON("conversion")
Samuel Jackson's avatar
Samuel Jackson committed
166
GNU_DIAG_ON("unused-local-typedef")
167
168
169
170
171

//============================ getMoments
//============================================

// Function pointer to real implementation of getMoments
Samuel Jones's avatar
Samuel Jones committed
172
using MomentsFunction = std::vector<double> (*)(const std::vector<double> &, const std::vector<double> &, const int);
173
174
175
176
177
178
179
180
181
182
183
184

/**
 * The implementation for getMomentsAboutOrigin & getMomentsAboutOriginMean for
 * using
 * numpy arrays are identical. This encapsulates that behaviour an additional
 * parameter for
 * specifying the actual function called along.
 * @param momentsFunc A function pointer to the required moments function
 * @param indep Numpy array of independent variables
 * @param depend Numpy array of dependent variables
 * @param maxMoment Maximum number of moments to return
 */
Samuel Jones's avatar
Samuel Jones committed
185
std::vector<double> getMomentsNumpyImpl(MomentsFunction momentsFunc, const NDArray &indep, const NDArray &depend,
186
187
188
189
190
191
                                        const int maxMoment) {
  using Converters::NDArrayToVector;

  // Both input arrays must have the same typed data
  if (typesEqual(indep.ptr(), depend.ptr())) {
    throw std::invalid_argument("Datatypes of input arrays must match.");
192
193
  }

194
  if (isFloatArray(indep.ptr()) && isFloatArray(indep.ptr())) {
Samuel Jones's avatar
Samuel Jones committed
195
    return momentsFunc(NDArrayToVector<double>(indep)(), NDArrayToVector<double>(depend)(), maxMoment);
196
197
  } else {
    throw UnknownDataType();
198
  }
199
}
200

201
202
203
204
/**
 * Proxy for @see Mantid::Kernel::getMomentsAboutOrigin so that it can accept
 * numpy arrays
 */
Samuel Jones's avatar
Samuel Jones committed
205
std::vector<double> getMomentsAboutOriginNumpy(const NDArray &indep, const NDArray &depend, const int maxMoment = 3) {
206
207
208
209
  using Mantid::Kernel::getMomentsAboutOrigin;
  return getMomentsNumpyImpl(&getMomentsAboutOrigin, indep, depend, maxMoment);
}

Samuel Jackson's avatar
Samuel Jackson committed
210
GNU_DIAG_OFF("unused-local-typedef")
211
212
// Ignore -Wconversion warnings coming from boost::python
// Seen with GCC 7.1.1 and Boost 1.63.0
Samuel Jackson's avatar
Samuel Jackson committed
213
GNU_DIAG_OFF("conversion")
214
// Define an overload to handle the default argument
Samuel Jones's avatar
Samuel Jones committed
215
BOOST_PYTHON_FUNCTION_OVERLOADS(getMomentsAboutOriginOverloads, getMomentsAboutOriginNumpy, 2, 3)
Samuel Jackson's avatar
Samuel Jackson committed
216
GNU_DIAG_ON("conversion")
Samuel Jackson's avatar
Samuel Jackson committed
217
GNU_DIAG_ON("unused-local-typedef")
218
219
220
221
/**
 * Proxy for @see Mantid::Kernel::getMomentsAboutMean so that it can accept
 * numpy arrays
 */
Samuel Jones's avatar
Samuel Jones committed
222
std::vector<double> getMomentsAboutMeanNumpy(const NDArray &indep, NDArray &depend, const int maxMoment = 3) {
223
224
225
  using Mantid::Kernel::getMomentsAboutMean;
  return getMomentsNumpyImpl(&getMomentsAboutMean, indep, depend, maxMoment);
}
226

Samuel Jackson's avatar
Samuel Jackson committed
227
GNU_DIAG_OFF("unused-local-typedef")
228
229
// Ignore -Wconversion warnings coming from boost::python
// Seen with GCC 7.1.1 and Boost 1.63.0
Samuel Jackson's avatar
Samuel Jackson committed
230
GNU_DIAG_OFF("conversion")
231
// Define an overload to handle the default argument
Samuel Jones's avatar
Samuel Jones committed
232
BOOST_PYTHON_FUNCTION_OVERLOADS(getMomentsAboutMeanOverloads, getMomentsAboutMeanNumpy, 2, 3)
Samuel Jackson's avatar
Samuel Jackson committed
233
GNU_DIAG_ON("conversion")
Samuel Jackson's avatar
Samuel Jackson committed
234
GNU_DIAG_ON("unused-local-typedef")
235

236
///@endcond
LamarMoore's avatar
LamarMoore committed
237
} // namespace
238

239
240
// -------------------------------------- Exports start here
// --------------------------------------
241

242
void export_Statistics() {
243
  // typedef std::vector --> numpy array result converter
244
  using ReturnNumpyArray = return_value_policy<Policies::VectorToNumpy>;
245

246
247
  // define a new "Statistics" scope so that everything is called as
  // Statistics.getXXX
248
  // this affects everything defined within the lifetime of the scope object
249
250
251
  scope stats =
      class_<Stats>("Stats", no_init)
          .def("getStatistics", &getStatisticsNumpy,
Samuel Jones's avatar
Samuel Jones committed
252
               getStatisticsOverloads((arg("data"), arg("sorted")), "Determine the statistics for an array of data"))
253
254
          .staticmethod("getStatistics")

Samuel Jones's avatar
Samuel Jones committed
255
256
          .def("getZscore", &getZscoreNumpy, arg("data"), "Determine the Z score for an array of data")
          .def("getZscore", &getZscoreNumpyDeprecated, (arg("data"), arg("sorted")),
257
               "Determine the Z score for an array of "
258
               "data (deprecated + ignored sorted argument)")
259
260
261
          .staticmethod("getZscore")

          .def("getModifiedZscore", &getModifiedZscoreNumpy,
Samuel Jones's avatar
Samuel Jones committed
262
263
               getModifiedZscoreOverloads((arg("data"), arg("sorted")),
                                          "Determine the modified Z score for an array of data"))
264
265
266
267
          .staticmethod("getModifiedZscore")

          .def("getMomentsAboutOrigin", &getMomentsAboutOriginNumpy,
               getMomentsAboutOriginOverloads(
268
                   (arg("indep"), arg("depend"), arg("maxMoment")),
Samuel Jones's avatar
Samuel Jones committed
269
                   "Calculate the first n-moments (inclusive) about the origin")[ReturnNumpyArray()])
270
271
272
273
          .staticmethod("getMomentsAboutOrigin")

          .def("getMomentsAboutMean", &getMomentsAboutMeanNumpy,
               getMomentsAboutMeanOverloads(
274
                   (arg("indep"), arg("depend"), arg("maxMoment")),
Samuel Jones's avatar
Samuel Jones committed
275
                   "Calculate the first n-moments (inclusive) about the mean")[ReturnNumpyArray()])
276
277
          .staticmethod("getMomentsAboutMean");

278
  // Want this in the same scope as above so must be here
279
  class_<Statistics>("Statistics")
Samuel Jones's avatar
Samuel Jones committed
280
281
282
283
284
      .add_property("minimum", &Statistics::minimum, "Minimum value of the data set")
      .add_property("maximum", &Statistics::maximum, "Maximum value of the data set")
      .add_property("mean", &Statistics::mean, "Simple mean, sum(data)/nvalues, of the data set")
      .add_property("median", &Statistics::median, "Middle value of the data set")
      .add_property("standard_deviation", &Statistics::standard_deviation, "Standard width of distribution");
285
}