DiSMEC++
statistics.cpp
Go to the documentation of this file.
1 // Copyright (c) 2021, Aalto University, developed by Erik Schultheis
2 // All rights reserved.
3 //
4 // SPDX-License-Identifier: MIT
5 
6 #include "statistics.h"
7 #include "collection.h"
8 #include "utils/conversion.h"
9 #include <nlohmann/json.hpp>
10 
11 using namespace dismec::stats;
12 
13 void CounterStat::record_int(long integer) {
14  m_Counter += integer;
15 }
16 
17 std::unique_ptr<Statistics> CounterStat::clone() const {
18  return std::make_unique<CounterStat>();
19 }
20 
21 void CounterStat::merge_imp(const CounterStat& other) {
22  m_Counter += other.m_Counter;
23 }
24 
26  return {{"Counter", m_Counter}, {"Type", "Counter"}};
27 }
28 
29 void BasicStat::record_int(long value) {
30  record(real_t(value));
31 }
32 
34  ++m_Counter;
35  m_Sum += value;
36  m_SumSquared += value*value;
37 }
38 
39 std::unique_ptr<Statistics> BasicStat::clone() const {
40  return std::make_unique<BasicStat>();
41 }
42 
43 void BasicStat::merge_imp(const BasicStat& other) {
44  m_Counter += other.m_Counter;
45  m_Sum += other.m_Sum;
46  m_SumSquared += other.m_SumSquared;
47 }
48 
50  return {{"Counter", m_Counter}, {"Sum", m_Sum}, {"SumSquared", m_SumSquared}, {"Type", "Basic"},
51  {"Mean", m_Sum / double(m_Counter)}};
52 }
53 
54 TaggedStat::TaggedStat(std::string tag, int max_tag, std::string transform_name, std::function<double(double)> transform ) :
55  m_Tag( TagContainer::create_empty_container(std::move(tag)) ),
56  m_MaxTag(max_tag),
57  m_Transform( std::move(transform) ),
58  m_TransformName( std::move(transform_name) )
59  {
60 
61 }
62 void TaggedStat::record_int(long value) {
63  record(real_t(value));
64 }
65 
67  int tag = m_Tag.get_value();
68  if(tag < 0)
69  throw std::logic_error("Missing tag!");
70  if(tag > m_MaxTag && m_MaxTag >= 0)
71  tag = m_MaxTag;
72 
73  if(tag >= ssize(m_Counters)) {
74  m_Counters.resize(tag + 1);
75  m_Sums.resize(tag + 1);
76  m_SumsSquared.resize(tag + 1);
77  }
78  ++m_Counters[tag];
79  if(m_Transform) {
80  value = m_Transform(value);
81  }
82  m_Sums[tag] += value;
83  m_SumsSquared[tag] += value * value;
84 }
85 
86 std::unique_ptr<Statistics> TaggedStat::clone() const {
87  return std::make_unique<TaggedStat>(m_Tag.get_name(), m_MaxTag, m_TransformName, m_Transform);
88 }
89 
90 void TaggedStat::merge_imp(const TaggedStat& other) {
91  auto other_size = ssize(other.m_Counters);
92  if(other_size > ssize(m_Counters)) {
93  m_Counters.resize(other_size);
94  m_Sums.resize(other_size);
95  m_SumsSquared.resize(other_size);
96  }
97 
98  for(long i = 0; i < other_size; ++i) {
99  m_Counters[i] += other.m_Counters[i];
100  m_Sums[i] += other.m_Sums[i];
101  m_SumsSquared[i] += other.m_SumsSquared[i];
102  }
103 }
104 
106  return {{"Counters", m_Counters}, {"Sums", m_Sums}, {"SumsSquared", m_SumsSquared},
107  {"Type", "BasicTagged"}, {"Transform", m_TransformName}};
108 }
109 
111  m_Tag = source.get_tag_by_name(m_Tag.get_name());
112 }
113 
114 MultiStat::MultiStat(std::unordered_map<std::string, std::unique_ptr<Statistics>> ss) : m_SubStats(std::move(ss)) {
115 
116 }
117 
118 void MultiStat::record_int(long value) {
119  do_record(value);
120 }
122  do_record(value);
123 }
125  do_record(vector);
126 }
127 
128 template<class T>
129 void MultiStat::do_record(T&& value) {
130  for(const auto& entry : m_SubStats) {
131  entry.second->record(std::forward<T>(value));
132  }
133 }
134 
135 std::unique_ptr<Statistics> MultiStat::clone() const {
136  stats_map_t new_map;
137  for(const auto& entry : m_SubStats) {
138  new_map.emplace(entry.first, entry.second->clone());
139  }
140  return std::make_unique<MultiStat>(std::move(new_map));
141 }
142 
143 void MultiStat::merge_imp(const MultiStat& other) {
144  for(const auto& entry : m_SubStats) {
145  entry.second->merge( *other.m_SubStats.at(entry.first) );
146  }
147 }
148 
150  nlohmann::json result;
151  result["Type"] = "Multi";
152  nlohmann::json data;
153  for(const auto& entry : m_SubStats) {
154  data[entry.first] = entry.second->to_json();
155  }
156  result["Data"] = std::move(data);
157  return result;
158 }
159 
161  for(const auto& entry : m_SubStats) {
162  entry.second->setup(source);
163  }
164 }
165 
167  m_Data.push_back(value);
168 }
169 
170 void FullRecordStat::record_int(long value) {
171  m_Data.push_back(value);
172 }
173 
174 std::unique_ptr<Statistics> FullRecordStat::clone() const {
175  return std::make_unique<FullRecordStat>();
176 }
178  m_Data.reserve(m_Data.size() + other.m_Data.size());
179  m_Data.insert(end(m_Data), begin(other.m_Data), end(other.m_Data));
180 }
181 
183  return {{"Type", "Full"}, {"Values", m_Data}};
184 }
185 
186 VectorReductionStat::VectorReductionStat(std::unique_ptr<Statistics> stat, std::string reduction) :
187  m_Target( std::move(stat) ), m_ReductionName(std::move(reduction)) {
188  if(m_ReductionName == "L1") {
189  m_Reduction = [](const DenseRealVector& v) -> real_t { return v.lpNorm<1>(); };
190  } else if(m_ReductionName == "L2") {
191  m_Reduction = [](const DenseRealVector& v) -> real_t { return v.norm(); };
192  } else if(m_ReductionName == "L2Squared") {
193  m_Reduction = [](const DenseRealVector& v) -> real_t { return v.squaredNorm(); };
194  } else if(m_ReductionName == "Linf") {
195  m_Reduction = [](const DenseRealVector& v) -> real_t { return v.lpNorm<Eigen::Infinity>(); };
196  } else {
197  throw std::runtime_error("Unknown reduction operation");
198  }
199 }
200 
202  m_Target->record(real_t{m_Reduction(value)});
203 }
204 
205 std::unique_ptr<Statistics> VectorReductionStat::clone() const {
206  return std::make_unique<VectorReductionStat>(m_Target->clone(), m_ReductionName);
207 }
208 
210  m_Target->merge(*other.m_Target);
211 }
212 
214  return m_Target->to_json();
215 }
216 
217 #include "histogram.h"
218 std::unique_ptr<Statistics> dismec::stats::make_stat_from_json(const nlohmann::json& source) {
219  auto type = source.at("type").get<std::string>();
220  if(type == "Basic") {
221  return std::make_unique<BasicStat>();
222  } else if (type == "Counter") {
223  return std::make_unique<CounterStat>();
224  } else if (type == "Tagged") {
225  std::function<double(double)> transform;
226  std::string transform_name = "lin";
227  if(source.contains("transform")) {
228  transform_name= source.at("transform");
229  if(transform_name == "log") {
230  transform = [](double d){ return std::log(d); };
231  }
232  }
233  int max_tag = -1;
234  if(source.contains("max_tag")) {
235  max_tag = source.at("max_tag").get<int>();
236  }
237 
238  return std::make_unique<TaggedStat>(source.at("tag").get<std::string>(), max_tag,
239  std::move(transform_name), std::move(transform));
240  } else if (type == "LinHist") {
241  return make_linear_histogram(source.at("bins").get<int>(),
242  source.at("min").get<real_t>(), source.at("max").get<real_t>());
243  } else if (type == "LogHist") {
244  return make_logarithmic_histogram(source.at("bins").get<int>(),
245  source.at("min").get<real_t>(), source.at("max").get<real_t>());
246  } else if (type == "TagLinHist") {
247  return make_linear_histogram(source.at("tag").get<std::string>(),
248  source.at("max_tag").get<int>(), source.at("bins").get<int>(),
249  source.at("min").get<real_t>(),
250  source.at("max").get<real_t>());
251  } else if (type == "TagLogHist") {
253  source.at("tag").get<std::string>(),
254  source.at("max_tag").get<int>(), source.at("bins").get<int>(),
255  source.at("min").get<real_t>(),
256  source.at("max").get<real_t>());
257  } else if (type == "Multi") {
258  std::unordered_map<std::string, std::unique_ptr<Statistics>> sub_stats;
259  for(auto& sub : source.at("stats").items()) {
260  sub_stats[sub.key()] = make_stat_from_json(sub.value());
261  }
262  return std::make_unique<MultiStat>(std::move(sub_stats));
263  } else if (type == "Full") {
264  return std::make_unique<FullRecordStat>();
265  } else if (type == "VectorReduction") {
266  return std::make_unique<VectorReductionStat>(make_stat_from_json(source.at("stat")), source.at("reduction"));
267  }
268  else {
269  throw std::runtime_error("Unknown statistics type");
270  }
271 }
void record_real(real_t value) override
Definition: statistics.cpp:33
void record_int(long value) override
Definition: statistics.cpp:29
nlohmann::json to_json() const override
Converts the statistics current value into a json object.
Definition: statistics.cpp:49
std::unique_ptr< Statistics > clone() const override
Definition: statistics.cpp:39
void merge_imp(const BasicStat &other)
Definition: statistics.cpp:43
nlohmann::json to_json() const override
Converts the statistics current value into a json object.
Definition: statistics.cpp:25
std::unique_ptr< Statistics > clone() const override
Definition: statistics.cpp:17
void merge_imp(const CounterStat &other)
Definition: statistics.cpp:21
void record_int(long integer) override
Definition: statistics.cpp:13
void record_int(long value) override
Definition: statistics.cpp:170
void merge_imp(const FullRecordStat &other)
Definition: statistics.cpp:177
void record_real(real_t value) override
Definition: statistics.cpp:166
nlohmann::json to_json() const override
Converts the statistics current value into a json object.
Definition: statistics.cpp:182
std::unique_ptr< Statistics > clone() const override
Definition: statistics.cpp:174
std::vector< real_t > m_Data
Definition: statistics.h:99
std::unordered_map< std::string, std::unique_ptr< Statistics > > stats_map_t
Definition: statistics.h:80
std::unique_ptr< Statistics > clone() const override
Definition: statistics.cpp:135
void record_real(real_t value) override
Definition: statistics.cpp:121
stats_map_t m_SubStats
Definition: statistics.h:81
void merge_imp(const MultiStat &other)
Definition: statistics.cpp:143
void record_int(long value) override
Definition: statistics.cpp:118
void record_vec(const DenseRealVector &vector) override
Definition: statistics.cpp:124
void setup(const StatisticsCollection &source) override
This function has to be called before the Statistics is used to collect data for the first time.
Definition: statistics.cpp:160
MultiStat(std::unordered_map< std::string, std::unique_ptr< Statistics >> ss)
Definition: statistics.cpp:114
nlohmann::json to_json() const override
Converts the statistics current value into a json object.
Definition: statistics.cpp:149
This class manages a collection of named Statistics objects.
Definition: collection.h:47
TagContainer get_tag_by_name(const std::string &name) const
Gets the tag with the given name.
Definition: collection.cpp:105
void record(int integer)
Definition: stats_base.h:70
A tag container combines a name with a shared pointer, which points to the tag value.
Definition: stats_base.h:30
int get_value() const
Returns the current value of the tag. Requires the container to not be empty.
Definition: stats_base.h:36
const std::string & get_name() const
returns the name of the associated tag
Definition: stats_base.h:33
void record_int(long value) override
Definition: statistics.cpp:62
void record_real(real_t value) override
Definition: statistics.cpp:66
std::vector< double > m_Sums
Definition: statistics.h:56
std::vector< long > m_Counters
Definition: statistics.h:55
TaggedStat(std::string tag, int max_tag, std::string transform_name={}, std::function< double(double)> transform={})
Definition: statistics.cpp:54
void merge_imp(const TaggedStat &other)
Definition: statistics.cpp:90
std::vector< double > m_SumsSquared
Definition: statistics.h:57
void setup(const StatisticsCollection &source) override
This function has to be called before the Statistics is used to collect data for the first time.
Definition: statistics.cpp:110
std::string m_TransformName
Definition: statistics.h:63
std::function< double(double)> m_Transform
Definition: statistics.h:62
nlohmann::json to_json() const override
Converts the statistics current value into a json object.
Definition: statistics.cpp:105
std::unique_ptr< Statistics > clone() const override
Definition: statistics.cpp:86
std::function< real_t(const DenseRealVector &)> m_Reduction
Definition: statistics.h:116
std::unique_ptr< Statistics > clone() const override
Definition: statistics.cpp:205
VectorReductionStat(std::unique_ptr< Statistics > stat, std::string reduction)
Definition: statistics.cpp:186
void record_vec(const DenseRealVector &value) override
Definition: statistics.cpp:201
nlohmann::json to_json() const override
Converts the statistics current value into a json object.
Definition: statistics.cpp:213
std::unique_ptr< Statistics > m_Target
Definition: statistics.h:115
void merge_imp(const VectorReductionStat &other)
Definition: statistics.cpp:209
nlohmann::json json
Definition: model-io.cpp:22
std::unique_ptr< Statistics > make_linear_histogram(int bins, real_t min, real_t max)
Definition: histogram.cpp:218
std::unique_ptr< stats::Statistics > make_stat_from_json(const nlohmann::json &source)
Generates a stats::Statistics object based on a json configuration.
Definition: statistics.cpp:218
std::unique_ptr< Statistics > make_logarithmic_histogram(int bins, real_t min, real_t max)
Definition: histogram.cpp:222
constexpr auto ssize(const C &c) -> std::common_type_t< std::ptrdiff_t, std::make_signed_t< decltype(c.size())>>
signed size free function. Taken from https://en.cppreference.com/w/cpp/iterator/size
Definition: conversion.h:42
types::DenseVector< real_t > DenseRealVector
Any dense, real values vector.
Definition: matrix_types.h:40
float real_t
The default type for floating point values.
Definition: config.h:17