tlx
Loading...
Searching...
No Matches
aggregate.hpp
Go to the documentation of this file.
1/*******************************************************************************
2 * tlx/math/aggregate.hpp
3 *
4 * Part of tlx - http://panthema.net/tlx
5 *
6 * Copyright (C) 2015-2018 Timo Bingmann <tb@panthema.net>
7 *
8 * All rights reserved. Published under the Boost Software License, Version 1.0
9 ******************************************************************************/
10
11#ifndef TLX_MATH_AGGREGATE_HEADER
12#define TLX_MATH_AGGREGATE_HEADER
13
14#include <tlx/define/likely.hpp>
15
16#include <algorithm>
17#include <cmath>
18#include <limits>
19
20namespace tlx {
21
22//! \addtogroup tlx_math
23//! \{
24
25/*!
26 * Calculate running aggregate statistics: feed it with values, and it will keep
27 * the minimum, the maximum, the average, the value number, and the standard
28 * deviation is values.
29 */
30template <typename Type_>
32{
33public:
34 using Type = Type_;
35
36 //! default constructor
37 Aggregate() = default;
38
39 //! initializing constructor
40 Aggregate(size_t count, const double& mean, const double& nvar,
41 const Type& min, const Type& max) noexcept
42 : count_(count), mean_(mean), nvar_(nvar),
43 min_(min), max_(max) { }
44
45 //! add a value to the running aggregation
46 Aggregate& add(const Type& value) noexcept {
47 count_++;
48 min_ = std::min(min_, value);
49 max_ = std::max(max_, value);
50 // Single-pass numerically stable mean and standard deviation
51 // calculation as described in Donald Knuth: The Art of Computer
52 // Programming, Volume 2, Chapter 4.2.2, Equations 15 & 16
53 double delta = value - mean_;
54 mean_ += delta / count_;
55 nvar_ += delta * (value - mean_);
56 return *this;
57 }
58
59 //! return number of values aggregated
60 size_t count() const noexcept { return count_; }
61
62 //! return sum over all values aggregated
63 // can't make noexcept since Type_'s conversion is allowed to throw
64 const Type sum() const { return static_cast<Type>(count_ * mean_); }
65
66 //! return sum over all values aggregated
67 const Type total() const { return sum(); }
68
69 //! return the average over all values aggregated
70 double average() const noexcept { return mean_; }
71
72 //! return the average over all values aggregated
73 double avg() const noexcept { return average(); }
74
75 //! return the average over all values aggregated
76 double mean() const noexcept { return average(); }
77
78 //! return minimum over all values aggregated
79 const Type& min() const noexcept { return min_; }
80
81 //! return maximum over all values aggregated
82 const Type& max() const noexcept { return max_; }
83
84 //! return maximum - minimum over all values aggregated
85 Type span() const noexcept { return max_ - min_; }
86
87 //! return the variance of all values aggregated.
88 //! ddof = delta degrees of freedom
89 //! Set to 0 if you have the entire distribution
90 //! Set to 1 if you have a sample (to correct for bias)
91 double variance(size_t ddof = 1) const {
92 if (count_ <= 1) return 0.0;
93 return nvar_ / static_cast<double>(count_ - ddof);
94 }
95
96 //! return the variance of all values aggregated.
97 //! ddof = delta degrees of freedom
98 //! Set to 0 if you have the entire distribution
99 //! Set to 1 if you have a sample (to correct for bias)
100 double var(size_t ddof = 1) const {
101 return variance(ddof);
102 }
103
104 //! return the standard deviation of all values aggregated.
105 //! ddof = delta degrees of freedom
106 //! Set to 0 if you have the entire distribution
107 //! Set to 1 if you have a sample (to correct for bias)
108 double standard_deviation(size_t ddof = 1) const {
109 return std::sqrt(variance(ddof));
110 }
111
112 //! return the standard deviation of all values aggregated.
113 //! ddof = delta degrees of freedom
114 //! Set to 0 if you have the entire distribution
115 //! Set to 1 if you have a sample (to correct for bias)
116 double stdev(size_t ddof = 1) const { return standard_deviation(ddof); }
117
118 //! operator + to combine two Aggregate<>
119 Aggregate operator + (const Aggregate& a) const noexcept {
120 return Aggregate(
121 // count
122 count_ + a.count_,
123 // mean
124 combine_means(a),
125 // merging variance is a bit complicated
127 // min, max
128 std::min(min_, a.min_), std::max(max_, a.max_));
129 }
130
131 //! operator += to combine two Aggregate<>
132 Aggregate& operator += (const Aggregate& a) noexcept {
133 mean_ = combine_means(a);
134 min_ = std::min(min_, a.min_);
135 max_ = std::max(max_, a.max_);
137 count_ += a.count_;
138 return *this;
139 }
140
141 //! serialization method for cereal.
142 template <typename Archive>
143 void serialize(Archive& archive) {
144 archive(count_, mean_, nvar_, min_, max_);
145 }
146
147private:
148 //! combine means, check if either count is zero. fix problems with NaN
149 double combine_means(const Aggregate& a) const noexcept {
150 if (count_ == 0)
151 return a.mean_;
152 if (a.count_ == 0)
153 return mean_;
154 return (mean_ * count_ + a.mean_ * a.count_) / (count_ + a.count_);
155 }
156
157 //! T. Chan et al 1979, "Updating Formulae and a Pairwise Algorithm for
158 //! Computing Sample Variances"
159 double combine_variance(const Aggregate& other) const noexcept {
160 double delta = mean_ - other.mean_;
161 return nvar_ + other.nvar_ + (delta * delta) *
162 (count_ * other.count_) / (count_ + other.count_);
163 }
164
165 //! number of values aggregated
166 size_t count_ = 0;
167
168 //! mean of values
169 double mean_ = 0.0;
170
171 //! approximate count * variance; stddev = sqrt(nvar / (count-1))
172 double nvar_ = 0.0;
173
174 //! minimum value
175 Type min_ = std::numeric_limits<Type>::max();
176
177 //! maximum value
178 Type max_ = std::numeric_limits<Type>::lowest();
179};
180
181//! \}
182
183} // namespace tlx
184
185#endif // !TLX_MATH_AGGREGATE_HEADER
186
187/******************************************************************************/
Calculate running aggregate statistics: feed it with values, and it will keep the minimum,...
Definition aggregate.hpp:32
Aggregate & operator+=(const Aggregate &a) noexcept
operator += to combine two Aggregate<>
const Type & min() const noexcept
return minimum over all values aggregated
Definition aggregate.hpp:79
double standard_deviation(size_t ddof=1) const
return the standard deviation of all values aggregated.
void serialize(Archive &archive)
serialization method for cereal.
Type span() const noexcept
return maximum - minimum over all values aggregated
Definition aggregate.hpp:85
size_t count_
number of values aggregated
const Type sum() const
return sum over all values aggregated
Definition aggregate.hpp:64
double avg() const noexcept
return the average over all values aggregated
Definition aggregate.hpp:73
const Type & max() const noexcept
return maximum over all values aggregated
Definition aggregate.hpp:82
Aggregate(size_t count, const double &mean, const double &nvar, const Type &min, const Type &max) noexcept
initializing constructor
Definition aggregate.hpp:40
double mean_
mean of values
double stdev(size_t ddof=1) const
return the standard deviation of all values aggregated.
Type min_
minimum value
double combine_means(const Aggregate &a) const noexcept
combine means, check if either count is zero. fix problems with NaN
const Type total() const
return sum over all values aggregated
Definition aggregate.hpp:67
double variance(size_t ddof=1) const
return the variance of all values aggregated.
Definition aggregate.hpp:91
Type max_
maximum value
Aggregate operator+(const Aggregate &a) const noexcept
operator + to combine two Aggregate<>
double combine_variance(const Aggregate &other) const noexcept
T.
double average() const noexcept
return the average over all values aggregated
Definition aggregate.hpp:70
Aggregate()=default
default constructor
Aggregate & add(const Type &value) noexcept
add a value to the running aggregation
Definition aggregate.hpp:46
double nvar_
approximate count * variance; stddev = sqrt(nvar / (count-1))
size_t count() const noexcept
return number of values aggregated
Definition aggregate.hpp:60
double mean() const noexcept
return the average over all values aggregated
Definition aggregate.hpp:76
double var(size_t ddof=1) const
return the variance of all values aggregated.