38 #include <boost/lambda/lambda.hpp>
39 #include <boost/lambda/casts.hpp>
40 #include <boost/function/function_base.hpp>
42 #ifndef OPENMS_MATH_STATISTICS_STATISTICFUNCTIONS_H
43 #define OPENMS_MATH_STATISTICS_STATISTICFUNCTIONS_H
55 template <
typename IteratorType>
58 return std::accumulate(begin, end, 0.0);
68 template <
typename IteratorType>
76 return sum(begin, end) / size;
90 template <
typename IteratorType>
93 Size size = std::distance(begin, end);
102 std::sort(begin, end);
107 IteratorType it1 = begin;
108 std::advance(it1, size / 2 - 1);
109 IteratorType it2 = it1;
110 std::advance(it2, 1);
111 return (*it1 + *it2) / 2.0;
115 IteratorType it = begin;
116 std::advance(it, (size - 1) / 2);
132 template <
typename IteratorType>
135 Size size = std::distance(begin, end);
141 if (quantile > 100 || quantile < 1)
146 int l = floor( (
double(quantile) * (
double(size) / 100)) + 0.5);
150 std::sort(begin, end);
153 IteratorType it = begin;
154 std::advance(it, l - 1);
168 template <
typename IteratorType1,
typename IteratorType2>
172 SignedSize dist = std::distance(begin_a, end_a);
173 if (dist == 0 || dist != std::distance(begin_b, end_b))
179 while (begin_a != end_a)
199 template <
typename IteratorType1,
typename IteratorType2>
203 SignedSize dist = std::distance(begin_a, end_a);
204 if (dist == 0 || dist != std::distance(begin_b, end_b))
210 while (begin_a != end_a)
212 if ((*begin_a < 0 && *begin_b >= 0) || (*begin_a >= 0 && *begin_b < 0))
220 return correct / dist;
232 template <
typename IteratorType1,
typename IteratorType2>
236 Int dist = std::distance(begin_a, end_a);
237 if (dist == 0 || dist != std::distance(begin_b, end_b))
247 while (begin_a != end_a)
249 if (*begin_a < 0 && *begin_b >= 0)
253 else if (*begin_a < 0 && *begin_b < 0)
257 else if (*begin_a >= 0 && *begin_b >= 0)
261 else if (*begin_a >= 0 && *begin_b < 0)
270 return (tp * tn - fp * fn) / sqrt((tp + fp) * (tp + fn) * (tn + fp) * (tn + fn));
284 template <
typename IteratorType1,
typename IteratorType2>
288 SignedSize dist = std::distance(begin_a, end_a);
289 if (dist == 0 || dist != std::distance(begin_b, end_b))
295 DoubleReal avg_a = std::accumulate(begin_a, end_a, 0.0) / dist;
296 DoubleReal avg_b = std::accumulate(begin_b, end_b, 0.0) / dist;
301 while (begin_a != end_a)
305 numerator += (temp_a * temp_b);
306 denominator_a += (temp_a * temp_a);
307 denominator_b += (temp_b * temp_b);
312 return numerator / sqrt(denominator_a * denominator_b);
316 template <
typename Value>
322 Size n = (w.size() - 1);
324 std::vector<std::pair<Size, Value> > w_idx;
325 for (
Size j = 0; j < w.size(); ++j)
327 w_idx.push_back(std::make_pair(j, w[j]));
330 std::sort(w_idx.begin(), w_idx.end(),
331 boost::lambda::ret<bool>((&boost::lambda::_1->*& std::pair<Size, Value>::second) <
332 (&boost::lambda::_2->*& std::pair<Size, Value>::second)));
337 if (fabs(w_idx[i + 1].second - w_idx[i].second) > 0.0000001 * fabs(w_idx[i + 1].second))
339 w_idx[i].second = Value(i + 1);
345 for (z = i + 1; (z <= n) && fabs(w_idx[z].second - w_idx[i].second) <= 0.0000001 * fabs(w_idx[z].second); ++z)
349 rank = 0.5 * (i + z + 1);
351 for (
Size v = i; v <= z - 1; ++v)
353 w_idx[v].second = rank;
359 w_idx[n].second = Value(n + 1);
361 for (
Size j = 0; j < w.size(); ++j)
363 w[w_idx[j].first] = w_idx[j].second;
378 template <
typename IteratorType1,
typename IteratorType2>
382 SignedSize dist = std::distance(begin_a, end_a);
383 if (dist == 0 || dist != std::distance(begin_b, end_b))
389 std::vector<DoubleReal> ranks_data;
390 ranks_data.reserve(dist);
391 std::vector<DoubleReal> ranks_model;
392 ranks_model.reserve(dist);
394 while (begin_a != end_a)
396 ranks_model.push_back(*begin_a);
397 ranks_data.push_back(*begin_b);
414 for (
Int i = 0; i < dist; ++i)
416 sum_model_data += (ranks_data[i] - mu) * (ranks_model[i] - mu);
417 sqsum_data += (ranks_data[i] - mu) * (ranks_data[i] - mu);
418 sqsum_model += (ranks_model[i] - mu) * (ranks_model[i] - mu);
422 if (!sqsum_data || !sqsum_model)
425 return sum_model_data / (sqrt(sqsum_data) * sqrt(sqsum_model));
431 #endif // OPENMS_MATH_STATISTICS_STATISTICFUNCTIONS_H
static DoubleReal median(IteratorType begin, IteratorType end, bool sorted=FALSE)
Calculates the median of a range of values.
Definition: StatisticFunctions.h:91
static DoubleReal matthewsCorrelationCoefficient(IteratorType1 begin_a, IteratorType1 end_a, IteratorType2 begin_b, IteratorType2 end_b)
Calculates the Matthews correlation coefficient for the values in [begin_a, end_a) and [begin_b...
Definition: StatisticFunctions.h:233
static DoubleReal sum(IteratorType begin, IteratorType end)
Calculates the sum of a range of values.
Definition: StatisticFunctions.h:56
static void computeRank(std::vector< Value > &w)
Replaces the elements in vector w by their ranks.
Definition: StatisticFunctions.h:317
static DoubleReal classificationRate(IteratorType1 begin_a, IteratorType1 end_a, IteratorType2 begin_b, IteratorType2 end_b)
Calculates the classification rate for the values in [begin_a, end_a) and [begin_b, end_b)
Definition: StatisticFunctions.h:200
ptrdiff_t SignedSize
Signed Size type e.g. used as pointer difference.
Definition: Types.h:151
static DoubleReal quantile(IteratorType begin, IteratorType end, UInt quantile, bool sorted=FALSE)
Calculates the quantile of a range of values.
Definition: StatisticFunctions.h:133
static DoubleReal pearsonCorrelationCoefficient(IteratorType1 begin_a, IteratorType1 end_a, IteratorType2 begin_b, IteratorType2 end_b)
Calculates the Pearson correlation coefficient for the values in [begin_a, end_a) and [begin_b...
Definition: StatisticFunctions.h:285
static DoubleReal mean(IteratorType begin, IteratorType end)
Calculates the mean of a range of values.
Definition: StatisticFunctions.h:69
static DoubleReal rankCorrelationCoefficient(IteratorType1 begin_a, IteratorType1 end_a, IteratorType2 begin_b, IteratorType2 end_b)
calculates the rank correlation coefficient for the values in [begin_a, end_a) and [begin_b...
Definition: StatisticFunctions.h:379
static DoubleReal meanSquareError(IteratorType1 begin_a, IteratorType1 end_a, IteratorType2 begin_b, IteratorType2 end_b)
Calculates the mean square error for the values in [begin_a, end_a) and [begin_b, end_b) ...
Definition: StatisticFunctions.h:169
Invalid range exception.
Definition: Exception.h:286
size_t Size
Size type e.g. used as variable which can hold result of size()
Definition: Types.h:144
int Int
Signed integer type.
Definition: Types.h:100
double DoubleReal
Double-precision real type.
Definition: Types.h:118