DiSMEC++
metrics.cpp
Go to the documentation of this file.
1 // Copyright (c) 2021, Aalto University, developed by Erik Schultheis
2 // All rights reserved.
3 //
4 // SPDX-License-Identifier: MIT
5 
6 #include "metrics.h"
7 #include "spdlog/fmt/fmt.h"
8 #include "utils/throw_error.h"
9 #include "utils/conversion.h"
10 #include <boost/predef.h>
11 #include <numeric>
12 
13 using namespace dismec::prediction;
14 
15 MetricCollectionInterface::MetricCollectionInterface(long num_labels) : m_NumLabels(num_labels) {
16  if(num_labels <= 0) {
17  THROW_EXCEPTION(std::invalid_argument, "Number of labels must be positive. Got {}", num_labels);
18  }
19 }
20 
21 // ---------------------------------------------------------------------------------------------------------------------
22 // Micro Confusion Matrix
23 // ---------------------------------------------------------------------------------------------------------------------
24 
25 ConfusionMatrixRecorder::ConfusionMatrixRecorder(long num_labels, long k) : MetricCollectionInterface(num_labels), m_K(k) {
26  m_Confusion.resize(num_labels);
27 }
28 
30  for(long j = 0; j < m_K; ++j) {
31  if(prediction[j].Correct) {
32  ++m_Confusion[prediction[j].Label.to_index()].TruePositives;
33  } else {
34  ++m_Confusion[prediction[j].Label.to_index()].FalsePositives;
35  }
36  }
37 
38  for(const auto& lbl : labels) {
39  if(lbl.Rank >= m_K) {
40  ++m_Confusion[lbl.Label.to_index()].FalseNegatives;
41  }
42  }
44 }
45 
47  const auto& other_direct = dynamic_cast<const ConfusionMatrixRecorder&>(other);
48 
49  ALWAYS_ASSERT_EQUAL(m_K, other_direct.m_K, "Mismatch in confusion matrix K: {} and {}");
50  ALWAYS_ASSERT_EQUAL(num_labels(), other.num_labels(), "Mismatch in number of labels: {} and {}");
51 
52  m_InstanceCount += other_direct.m_InstanceCount;
53  for(int i = 0; i < ssize(m_Confusion); ++i) {
54  m_Confusion[i] += other_direct.m_Confusion[i];
55  }
56 }
57 
59  assert(label.to_index() < ssize(m_Confusion));
60  auto base = m_Confusion[label.to_index()];
61  base.TrueNegatives = m_InstanceCount - base.TruePositives - base.FalsePositives - base.FalseNegatives;
62  return base;
63 }
64 
65 std::unique_ptr<MetricCollectionInterface> ConfusionMatrixRecorder::clone() const {
66  return std::make_unique<ConfusionMatrixRecorder>(num_labels(), m_K);
67 }
68 
69 // ---------------------------------------------------------------------------------------------------------------------
71 
72 }
73 
76  ++m_NumSamples;
77 }
78 
80  const auto& cast = dynamic_cast<const InstanceAveragedMetric&>(other);
81  // add up weights and accumulated values
82  m_Accumulator += cast.m_Accumulator.value();
83  m_NumSamples += cast.m_NumSamples;
84 }
85 
86 
87 // ---------------------------------------------------------------------------------------------------------------------
88 // Generalization of Precision and DCG at K
89 // ---------------------------------------------------------------------------------------------------------------------
90 
91 namespace {
92  std::vector<double> uniform_weights(long k) {
93  std::vector<double> weights;
94  weights.reserve(k);
95  std::fill_n(std::back_inserter(weights), k, 1.0 / static_cast<double>(k));
96  return weights;
97  }
98 }
99 
100 InstanceRankedPositives::InstanceRankedPositives(long num_labels, long k, bool normalize) :
101  InstanceRankedPositives(num_labels, k, normalize, uniform_weights(k))
102 {
103 }
104 #include <iostream>
105 InstanceRankedPositives::InstanceRankedPositives(long num_labels, long k, bool normalize, std::vector<double> weights) :
106  InstanceAveragedMetric(num_labels), m_K(k), m_Normalize(normalize), m_Weights( std::move(weights) ) {
107  ALWAYS_ASSERT_EQUAL(m_K, ssize(m_Weights), "Mismatch between k={} and #weights = {}");
108  // Exclusive scan -- prepend a zero
109  m_Cumulative.push_back(0.0);
110  std::partial_sum(begin(m_Weights), end(m_Weights), std::back_inserter(m_Cumulative));
111 }
112 
113 
114 // With -O3 GCC 9 produces an ICE here, so we manually fix the optimization options here
115 #if BOOST_COMP_GNUC && BOOST_COMP_GNUC <= BOOST_VERSION_NUMBER(10, 0, 0)
116 #pragma GCC push_options
117 #pragma GCC optimize("-O1")
118 #endif
120  assert(ssize(prediction) >= m_K);
121  double correct = 0;
122  for(long j = 0; j < m_K; ++j) {
123  if(prediction[j].Correct) {
124  correct += m_Weights[j];
125  }
126  }
127 
128  if(m_Normalize) {
129  long step = std::min(m_K, (long)labels.size());
130  correct /= m_Cumulative[step];
131  }
132 
133  accumulate(correct);
134 }
135 #if BOOST_COMP_GNUC && BOOST_COMP_GNUC <= BOOST_VERSION_NUMBER(10, 0, 0)
136 #pragma GCC pop_options
137 #endif
138 
139 std::unique_ptr<MetricCollectionInterface> InstanceRankedPositives::clone() const {
140  return std::make_unique<InstanceRankedPositives>(num_labels(), m_K, m_Normalize, m_Weights);
141 }
142 
143 
144 // ---------------------------------------------------------------------------------------------------------------------
145 // Abandonment At K
146 // ---------------------------------------------------------------------------------------------------------------------
147 AbandonmentAtK::AbandonmentAtK(long num_labels, long k) : InstanceAveragedMetric(num_labels), m_K(k) {
148 }
149 
151  assert(ssize(prediction) >= m_K);
152  double correct = 0.0;
153  for(long j = 0; j < m_K; ++j) {
154  if(prediction[j].Correct) {
155  correct = 1.0;
156  break;
157  }
158  }
159  accumulate(correct);
160 }
161 
162 std::unique_ptr<MetricCollectionInterface> AbandonmentAtK::clone() const {
163  return std::make_unique<AbandonmentAtK>(num_labels(), m_K);
164 }
165 
166 // ---------------------------------------------------------------------------------------------------------------------
167 // Metric Reporters
168 // ---------------------------------------------------------------------------------------------------------------------
169 
171  m_Name(std::move(name)), m_Metric(metric) {
172 
173 }
174 
175 auto InstanceWiseMetricReporter::get_values() const -> std::vector<metric_t> {
176  return {{m_Name, m_Metric->value()}};
177 }
178 
179 void MacroMetricReporter::add_coverage(double threshold, std::string name) {
180  if(name.empty()) {
181  name = fmt::format("Cov@{}", m_ConfusionMatrix->get_k());
182  }
183 
184  auto fn = [threshold](const ConfusionMatrix& cm){
185  if(recall(cm) > threshold) {
186  return 1.0;
187  } else {
188  return 0.0;
189  }
190  };
191  add_reduction(std::move(name), MACRO, fn);
192 }
193 
194 namespace {
196  switch (type) {
199  }
200  __builtin_unreachable();
201  }
202 }
203 
204 // NOLINTNEXTLINE(cppcoreguidelines-macro-usage)
205 #define IMPLEMENT_ADD_METRIC(METRIC, SHORTHAND) \
206 void MacroMetricReporter::add_##METRIC(ReductionType reduction, std::string name) { \
207 auto fn = [](const ConfusionMatrix& cm){ return METRIC(cm); }; \
208 add_reduction_helper(std::move(name), "{}" SHORTHAND "@{}", reduction, fn); \
209 }
210 
224 
225 void MacroMetricReporter::add_f_measure(ReductionType reduction, double beta, std::string name) {
226  if(name.empty()) {
227  name = fmt::format("{}F{}@{}", reduction_name(reduction), beta, m_ConfusionMatrix->get_k());
228  }
229 
230  auto fn = [beta](const ConfusionMatrix& cm){
231  return f_beta(cm, beta);
232  };
233 
234  add_reduction(std::move(name), reduction, fn);
235 }
236 
238  add_reduction(fmt::format("MicroTP@{}", m_ConfusionMatrix->get_k()), MICRO,
239  [](const ConfusionMatrix& cm){ return true_positive_fraction(cm); });
240  add_reduction(fmt::format("MicroFP@{}", m_ConfusionMatrix->get_k()), MICRO,
241  [](const ConfusionMatrix& cm){ return false_positive_fraction(cm); });
242  add_reduction(fmt::format("MicroTN@{}", m_ConfusionMatrix->get_k()), MICRO,
243  [](const ConfusionMatrix& cm){ return true_negative_fraction(cm); });
244  add_reduction(fmt::format("MicroFN@{}", m_ConfusionMatrix->get_k()), MICRO,
245  [](const ConfusionMatrix& cm){ return false_negative_fraction(cm); });
246 }
247 
248 void MacroMetricReporter::add_reduction_helper(std::string name, const char* pattern, ReductionType reduction,
249  std::function<double(const ConfusionMatrix&)> fn) {
250  if(name.empty()) {
251  name = fmt::format(pattern, reduction_name(reduction), m_ConfusionMatrix->get_k());
252  }
253  add_reduction(std::move(name), reduction, std::move(fn));
254 }
255 
256 void MacroMetricReporter::add_reduction(std::string name, ReductionType type, std::function<double(const ConfusionMatrix&)> fn) {
257  if(type == MACRO) {
258  m_MacroReductions.emplace_back(std::move(name), std::move(fn));
259  } else {
260  m_MicroReductions.emplace_back(std::move(name), std::move(fn));
261  }
262 }
263 
264 MacroMetricReporter::MacroMetricReporter(const ConfusionMatrixRecorder* confusion) : m_ConfusionMatrix(confusion) {
265  if(confusion == nullptr) {
266  THROW_EXCEPTION(std::invalid_argument, "ConfusionMatrixRecorder cannot be null");
267  }
268 }
269 
270 auto MacroMetricReporter::get_values() const -> std::vector<metric_t> {
271  std::vector<metric_t> metric;
272  metric.reserve(m_MacroReductions.size());
273  for(const auto& red : m_MacroReductions) {
274  metric.emplace_back(red.first, 0.0);
275  }
276 
277  ConfusionMatrix micro;
278 
279  for(int l = 0; l < m_ConfusionMatrix->num_labels(); ++l) {
281  micro += cm;
282  for(int i = 0; i < ssize(m_MacroReductions); ++i) {
283  metric[i].second += m_MacroReductions[i].second(cm);
284  }
285  }
286 
287  auto normalize = static_cast<double>(m_ConfusionMatrix->num_labels());
288  if(normalize != 0) {
289  for(int i = 0; i < ssize(m_MacroReductions); ++i) {
290  metric[i].second /= normalize;
291  }
292  } else {
293  for(int i = 0; i < ssize(m_MacroReductions); ++i) {
294  if(metric[i].second != 0) {
295  metric[i].second = std::numeric_limits<double>::quiet_NaN();
296  }
297  }
298  }
299 
300  for(const auto& [name, fn] : m_MicroReductions) {
301  metric.emplace_back(name, fn(micro));
302  }
303 
304  return metric;
305 }
306 
307 #ifndef DOCTEST_CONFIG_DISABLE
308 #include "doctest.h"
309 
310 // NOLINTBEGIN(cppcoreguidelines-avoid-magic-numbers)
311 
312 namespace {
313  using pred_mat_t = Eigen::Matrix<long, 1, Eigen::Dynamic>;
314  auto make_labels(std::initializer_list<long> init_list) {
315  auto vec = std::vector<dismec::label_id_t>{};
316  vec.reserve(init_list.size());
317  for(const auto& i : init_list) {
318  vec.emplace_back(i);
319  }
320  return vec;
321  }
322 
323  template<class T>
324  void update_metric(T& target, std::initializer_list<long> prediction, std::initializer_list<long> labels) {
325  auto labels_vec = make_labels(labels);
326  auto pred_mat = pred_mat_t{prediction};
327  std::vector<sTrueLabelInfo> true_info;
328  std::vector<sPredLabelInfo> pred_info;
329  EvaluateMetrics::process_prediction(labels_vec, pred_mat, true_info, pred_info);
330  target.update(pred_info, true_info);
331  }
332 
333 }
334 
338 TEST_CASE("precision_at_k") {
339  auto pat3 = InstanceRankedPositives(13, 3);
340  CHECK(pat3.value() == 0.0);
341  update_metric(pat3, {2, 4, 6, 12}, {1, 4, 8, 12});
342  CHECK(pat3.value() == 1.0 / 3.0);
343  update_metric(pat3, {3, 1, 2, 5}, {2, 3});
344  CHECK(pat3.value() == 3.0 / 6.0);
345  update_metric(pat3, {1, 2, 3, 4, 5}, {4, 5, 6});
346  CHECK(pat3.value() == 3.0 / 9.0);
347  update_metric(pat3, {1, 2, 3}, {});
348  CHECK(pat3.value() == 3.0 / 12.0);
349  update_metric(pat3, {3, 2, 1}, {1, 2, 3});
350  CHECK(pat3.value() == 6.0 / 15.0);
351 }
352 
356 TEST_CASE("abandonment_at_k") {
357  auto aat3 = AbandonmentAtK(13, 3);
358  CHECK(aat3.value() == 0.0);
359  update_metric(aat3, {2, 4, 6, 12}, {1, 4, 8, 12});
360  CHECK(aat3.value() == 1.0 / 1.0);
361  update_metric(aat3, {3, 1, 2, 5}, {2, 3});
362  CHECK(aat3.value() == 2.0 / 2.0);
363  update_metric(aat3, {1, 2, 3, 4, 5}, {4, 5, 6});
364  CHECK(aat3.value() == 2.0 / 3.0);
365  update_metric(aat3, {1, 2, 3}, {});
366  CHECK(aat3.value() == 2.0 / 4.0);
367  update_metric(aat3, {3, 2, 1}, {1, 2, 3});
368  CHECK(aat3.value() == 3.0 / 5.0);
369 }
370 
371 
375  /*
376 TEST_CASE("coverage_at_k") {
377  auto cat3 = CoverageAtK(3, 20);
378  CHECK(cat3.value() == 0.0);
379  cat3.update(pred_mat_t{{2, 4, 6, 12}}, make_labels({1, 4, 8, 12}));
380  CHECK(cat3.value() == 1.0 / 20.0);
381  cat3.update(pred_mat_t{{3, 1, 2, 5}}, make_labels({2, 3}));
382  CHECK(cat3.value() == 3.0 / 20.0);
383  cat3.update(pred_mat_t{{1, 2, 3, 4, 5}}, make_labels({4, 5, 6}));
384  CHECK(cat3.value() == 3.0 / 20.0);
385  cat3.update(pred_mat_t{{1, 2, 3}}, {});
386  CHECK(cat3.value() == 3.0 / 20.0);
387  cat3.update(pred_mat_t{{3, 2, 1}}, make_labels({1, 2, 3}));
388  CHECK(cat3.value() == 4.0 / 20.0);
389 }
390 */
391 // NOLINTEND(cppcoreguidelines-avoid-magic-numbers)
392 #endif
Float value() const
Definition: sum.h:22
Strong typedef for an int to signify a label id.
Definition: types.h:20
constexpr T to_index() const
! Explicitly convert to an integer.
Definition: opaque_int.h:32
void update(const pd_info_vec &prediction, const gt_info_vec &labels) override
Definition: metrics.cpp:150
std::unique_ptr< MetricCollectionInterface > clone() const override
Definition: metrics.cpp:162
AbandonmentAtK(long num_labels, long k)
Definition: metrics.cpp:147
void reduce(const MetricCollectionInterface &other) override
Definition: metrics.cpp:46
ConfusionMatrixRecorder(long num_labels, long k)
Definition: metrics.cpp:25
std::unique_ptr< MetricCollectionInterface > clone() const override
Definition: metrics.cpp:65
std::vector< ConfusionMatrix > m_Confusion
Definition: metrics.h:59
void update(const pd_info_vec &prediction, const gt_info_vec &labels) override
Definition: metrics.cpp:29
ConfusionMatrix get_confusion_matrix(label_id_t label) const
Definition: metrics.cpp:58
static void process_prediction(const std::vector< label_id_t > &raw_labels, const prediction_t &raw_prediction, std::vector< sTrueLabelInfo > &proc_labels, std::vector< sPredLabelInfo > &proc_pred)
Definition: evaluate.cpp:24
KahanAccumulator< double > m_Accumulator
Definition: metrics.h:74
void reduce(const MetricCollectionInterface &other) override
Definition: metrics.cpp:79
std::unique_ptr< MetricCollectionInterface > clone() const override
Definition: metrics.cpp:139
void update(const pd_info_vec &prediction, const gt_info_vec &labels) override
Definition: metrics.cpp:119
std::vector< double > m_Cumulative
Definition: metrics.h:89
InstanceRankedPositives(long num_labels, long k, bool normalize=false)
Definition: metrics.cpp:100
std::vector< metric_t > get_values() const override
Definition: metrics.cpp:175
InstanceWiseMetricReporter(std::string name, const InstanceAveragedMetric *metric)
Definition: metrics.cpp:170
const InstanceAveragedMetric * m_Metric
Definition: metrics.h:115
void add_coverage(double threshold, std::string name={})
Definition: metrics.cpp:179
MacroMetricReporter(const ConfusionMatrixRecorder *confusion)
Definition: metrics.cpp:264
std::vector< metric_t > get_values() const override
Definition: metrics.cpp:270
const ConfusionMatrixRecorder * m_ConfusionMatrix
Definition: metrics.h:151
void add_reduction(std::string name, ReductionType type, std::function< double(const ConfusionMatrix &)>)
Definition: metrics.cpp:256
std::vector< std::pair< std::string, reduction_fn > > m_MacroReductions
Definition: metrics.h:149
std::vector< std::pair< std::string, reduction_fn > > m_MicroReductions
Definition: metrics.h:150
void add_reduction_helper(std::string name, const char *pattern, ReductionType type, std::function< double(const ConfusionMatrix &)> fn)
Definition: metrics.cpp:248
Base class for all metrics that can be calculated during the evaluation phase.
Definition: metrics.h:28
long num_labels() const
Gets the number of labels.
Definition: metrics.h:38
std::vector< sPredLabelInfo > pd_info_vec
Definition: metrics.h:31
std::vector< sTrueLabelInfo > gt_info_vec
Definition: metrics.h:30
std::pair< std::string, double > metric_t
Definition: metrics.h:105
TEST_CASE("precision_at_k")
Definition: metrics.cpp:338
#define IMPLEMENT_ADD_METRIC(METRIC, SHORTHAND)
Definition: metrics.cpp:205
std::vector< double > uniform_weights(long k)
Definition: metrics.cpp:92
Eigen::Matrix< long, 1, Eigen::Dynamic > pred_mat_t
Definition: metrics.cpp:313
auto make_labels(std::initializer_list< long > init_list)
Definition: metrics.cpp:314
void update_metric(T &target, std::initializer_list< long > prediction, std::initializer_list< long > labels)
Definition: metrics.cpp:324
constexpr const char * reduction_name(MacroMetricReporter::ReductionType type)
Definition: metrics.cpp:195
constexpr double positive_likelihood_ratio(const ConfusionMatrixBase< T > &matrix)
constexpr double matthews(const ConfusionMatrixBase< T > &matrix)
constexpr double diagnostic_odds_ratio(const ConfusionMatrixBase< T > &matrix)
constexpr double negative_predictive_value(const ConfusionMatrixBase< T > &matrix)
constexpr double fowlkes_mallows(const ConfusionMatrixBase< T > &matrix)
constexpr double precision(const ConfusionMatrixBase< T > &matrix)
constexpr double markedness(const ConfusionMatrixBase< T > &matrix)
constexpr double balanced_accuracy(const ConfusionMatrixBase< T > &matrix)
constexpr double informedness(const ConfusionMatrixBase< T > &matrix)
constexpr double f_beta(const ConfusionMatrixBase< T > &matrix, double beta)
constexpr double specificity(const ConfusionMatrixBase< T > &matrix)
constexpr double recall(const ConfusionMatrixBase< T > &matrix)
constexpr double accuracy(const ConfusionMatrixBase< T > &matrix)
constexpr double negative_likelihood_ratio(const ConfusionMatrixBase< T > &matrix)
constexpr auto ssize(const C &c) -> std::common_type_t< std::ptrdiff_t, std::make_signed_t< decltype(c.size())>>
signed size free function. Taken from https://en.cppreference.com/w/cpp/iterator/size
Definition: conversion.h:42
#define ALWAYS_ASSERT_EQUAL(x, y, msg)
Definition: throw_error.h:24
#define THROW_EXCEPTION(exception_type,...)
Definition: throw_error.h:16