DiSMEC++
multi_pos.cpp
Go to the documentation of this file.
1 // Copyright (c) 2021, Aalto University, developed by Erik Schultheis
2 // All rights reserved.
3 //
4 // SPDX-License-Identifier: MIT
5 
6 #include "subset.h"
7 #include "utils/hash_vector.h"
8 #include "stats/collection.h"
9 #include "stats/timer.h"
10 #include "data/data.h"
11 #include "objective/objective.h"
12 #include <Eigen/Dense>
13 
14 using namespace dismec::init;
15 
16 namespace dismec::init {
17  template<bool Sparse>
18  struct TypeLookup;
19 
20  template<>
21  struct TypeLookup<false> {
24  };
25 
26  template<>
27  struct TypeLookup<true> {
30  };
31 
32  template<bool Sparse>
36 
37  public:
38  MultiPosMeanInitializer(std::shared_ptr<const DatasetBase> data,
39  const DenseRealVector& mean_of_all,
40  std::shared_ptr<const GenericFeatureMatrix> local_features,
41  int max_pos, real_t pos, real_t neg);
42 
43  void get_initial_weight(label_id_t label_id, Eigen::Ref<DenseRealVector> target,
45 
46  private:
47  std::vector<VectorType> m_PositiveInstances;
48  int m_MaxPos;
52  Eigen::LLT<types::DenseRowMajor<real_t>> m_LLT;
53 
54  real_t m_Lambda = 0.01;
55 
56  void extract_sub_dataset(label_id_t label_id);
57 
60  };
61 
63  public:
64  MultiPosMeanStrategy(std::shared_ptr<const DatasetBase> data, real_t negative_target, real_t positive_target,
65  int max_positives) :
66  SubsetFeatureMeanStrategy(std::move(data), negative_target, positive_target),
67  m_MaxPositives(max_positives)
68  {
69  }
70 
71  [[nodiscard]] std::unique_ptr<WeightsInitializer>
72  make_initializer(const std::shared_ptr<const GenericFeatureMatrix>& features) const override;
73 
74  private:
76  };
77 }
78 
79 template<bool b>
80 MultiPosMeanInitializer<b>::MultiPosMeanInitializer(std::shared_ptr<const DatasetBase> data,
81  const DenseRealVector& mean_of_all,
82  std::shared_ptr<const GenericFeatureMatrix> local_features, int max_pos,
83  real_t pos, real_t neg):
84  SubsetFeatureMeanInitializer(std::move(data), mean_of_all, std::move(local_features), pos, neg),
85  m_MaxPos(max_pos), m_LLT(max_pos + 1)
86 {
88 
89  declare_stat(STAT_NUM_POS, {"num_pos", "#positives"});
90  declare_stat(STAT_LOSS_REDUCTION, {"loss_reduction", "(f(0)-f(w))/f(0) [%]"});
91 }
92 
93 template<bool Sparse>
95  label_id_t label_id,
96  Eigen::Ref<DenseRealVector> target,
98  auto timer = make_timer(STAT_DURATION);
99  m_DataSet->get_labels(label_id, m_LabelBuffer);
100 
101  int num_pos = m_DataSet->num_positives(label_id);
102  if(num_pos > m_MaxPos) {
103  // this code is just copied from avg_of_pos
104  target.setZero();
105  for(int i = 0; i < m_LabelBuffer.size(); ++i) {
106  if(m_LabelBuffer.coeff(i) > 0.0) {
107  target += m_LocalFeatures->get<MatrixType>().row(i) / (real_t)num_pos;
108  }
109  }
110 
111  auto [p, a] = calculate_factors(label_id, target);
112  target = target * p + m_MeanOfAll * a;
113  } else {
114  real_t num_samples = m_DataSet->num_examples();
115  extract_sub_dataset(label_id);
116 
117  // at this point, m_Averages is prepared and we can start calculating the Gram matrix
118  m_GramMatrix.resize(num_pos + 1, num_pos + 1);
119  m_Target.resize(num_pos + 1);
120  m_Target.coeffRef(0) = m_NegTarget;
121  for (int i = 1; i < num_pos + 1; ++i) {
122  m_Target.coeffRef(i) = m_PosTarget;
123  }
124 
125  // the negatives are a bit tricky
126  // <N, N> = <X, X> - 2 <X, Ai> + <Ai, Ai>
127  m_GramMatrix.coeffRef(0, 0) = m_MeanAllNormSquared;
128 
129  // fill in the part of the gram matrix that is built by the positives
130  for (int i = 0; i < num_pos; ++i) {
131  for (int j = i; j < num_pos; ++j) {
132  auto& a = m_PositiveInstances[i];
133  auto& b = m_PositiveInstances[j];
134  real_t dot = a.dot(b);
135  m_GramMatrix.coeffRef(i + 1, j + 1) = dot;
136  m_GramMatrix.coeffRef(j + 1, i + 1) = dot;
137  }
138 
139  // adjustments for the negatives
140  m_GramMatrix.coeffRef(0, 0) += m_GramMatrix.coeffRef(i + 1, i + 1) / num_samples / num_samples;
141  real_t xTa = m_PositiveInstances[i].dot(m_MeanOfAll);
142  m_GramMatrix.coeffRef(0, i+1) = xTa;
143  m_GramMatrix.coeffRef(0, 0) -= 2*xTa / num_samples;
144  }
145 
146  // fix up the <N, Aj> elements
147  // <N, Aj> = <X, Aj> - sum <Ai, Aj>/n
148  for (int i = 0; i < num_pos; ++i) {
149  for (int j = 0; j < num_pos; ++j) {
150  m_GramMatrix.coeffRef(0, i + 1) -= m_GramMatrix.coeffRef(j, i + 1) / num_samples;
151  }
152  m_GramMatrix.coeffRef(i + 1, 0) = m_GramMatrix.coeff(0, i + 1);
153 
154  // also put in the regularizer
155  m_GramMatrix.coeffRef(i + 1, i + 1) += m_Lambda;
156  }
157  m_GramMatrix.coeffRef(0, 0) += m_Lambda;
158 
159  m_LLT.compute(m_GramMatrix);
160  m_AlphaVector = m_LLT.solve(m_Target);
161 
162  // reconstruct the initial vector
163  target = m_AlphaVector[0] * m_MeanOfAll;
164  for (int i = 1; i < num_pos + 1; ++i) {
165  target += (m_AlphaVector[i] - m_AlphaVector[0] / num_samples) * m_PositiveInstances[i - 1];
166  }
167  }
168 
169  record(STAT_NUM_POS, [&]() -> long { return m_DataSet->num_positives(label_id); });
170  record(STAT_LOSS_REDUCTION, [&]() {
171  HashVector temp{target};
172  real_t obj_at_new = objective.value(temp);
173  temp.modify().setZero();
174  real_t obj_at_zero = objective.value(temp);
175  return 100.f * (obj_at_zero - obj_at_new) / obj_at_zero;
176  });
177 }
178 
179 template<bool Sparse>
181  assert( m_DataSet->num_positives(label_id) <= m_MaxPos);
182 
183  m_DataSet->get_labels(label_id, m_LabelBuffer);
184 
185  int pos_count = 0;
186  for(int i = 0; i < m_LabelBuffer.size(); ++i) {
187  if(m_LabelBuffer.coeff(i) <= 0.0) {
188  continue;
189  }
190 
191  m_PositiveInstances[pos_count] = m_LocalFeatures->get<MatrixType>().row(i);
192 
193  ++pos_count;
194  }
195 }
196 
197 std::unique_ptr<WeightsInitializer>
198 MultiPosMeanStrategy::make_initializer(const std::shared_ptr<const GenericFeatureMatrix>& features) const {
199  if(features->is_sparse()) {
200  return std::make_unique<MultiPosMeanInitializer<true>>(
202  } else {
203  return std::make_unique<MultiPosMeanInitializer<false>>(
205 
206  }
207 
208 
209 }
210 
211 
212 std::shared_ptr<WeightInitializationStrategy> dismec::init::create_multi_pos_mean_strategy(std::shared_ptr<DatasetBase> data, int max_pos, real_t pos, real_t neg) {
213  return std::make_shared<MultiPosMeanStrategy>(std::move(data), pos, neg, max_pos);
214 }
An Eigen vector with versioning information, to implement simple caching of results.
Definition: hash_vector.h:43
void get_initial_weight(label_id_t label_id, Eigen::Ref< DenseRealVector > target, objective::Objective &objective) override
Generate an initial vector for the given label. The result should be placed in target.
Definition: multi_pos.cpp:94
MultiPosMeanInitializer(std::shared_ptr< const DatasetBase > data, const DenseRealVector &mean_of_all, std::shared_ptr< const GenericFeatureMatrix > local_features, int max_pos, real_t pos, real_t neg)
Definition: multi_pos.cpp:80
types::DenseRowMajor< real_t > m_GramMatrix
Definition: multi_pos.cpp:49
Eigen::LLT< types::DenseRowMajor< real_t > > m_LLT
Definition: multi_pos.cpp:52
typename TypeLookup< Sparse >::MatrixType MatrixType
Definition: multi_pos.cpp:34
std::vector< VectorType > m_PositiveInstances
Definition: multi_pos.cpp:47
typename TypeLookup< Sparse >::VectorType VectorType
Definition: multi_pos.cpp:35
void extract_sub_dataset(label_id_t label_id)
Definition: multi_pos.cpp:180
MultiPosMeanStrategy(std::shared_ptr< const DatasetBase > data, real_t negative_target, real_t positive_target, int max_positives)
Definition: multi_pos.cpp:64
std::unique_ptr< WeightsInitializer > make_initializer(const std::shared_ptr< const GenericFeatureMatrix > &features) const override
Creats a new, thread local WeightsInitializer.
Definition: multi_pos.cpp:198
std::shared_ptr< const DatasetBase > m_DataSet
Definition: subset.h:42
DenseRealVector m_MeanOfAllInstances
Definition: subset.h:43
Strong typedef for an int to signify a label id.
Definition: types.h:20
Class that models an optimization objective.
Definition: objective.h:41
void declare_stat(stat_id_t index, StatisticMetaData meta)
Declares a new statistics. This function just forwards all its arguments to the internal StatisticsCo...
Definition: tracked.cpp:16
constexpr stat_id_t STAT_DURATION
Definition: sparsify.cpp:22
std::shared_ptr< WeightInitializationStrategy > create_multi_pos_mean_strategy(std::shared_ptr< DatasetBase > data, int max_pos, real_t pos=1, real_t neg=-2)
Creates an initialization strategy based on the mean of positive and negative features.
Definition: multi_pos.cpp:212
outer_const< T, dense_row_major_h > DenseRowMajor
Definition: type_helpers.h:43
types::DenseRowMajor< real_t > DenseFeatures
Dense Feature Matrix in Row Major format.
Definition: matrix_types.h:58
types::SparseVector< real_t > SparseRealVector
Definition: matrix_types.h:41
types::DenseVector< real_t > DenseRealVector
Any dense, real values vector.
Definition: matrix_types.h:40
types::SparseRowMajor< real_t > SparseFeatures
Sparse Feature Matrix in Row Major format.
Definition: matrix_types.h:50
float real_t
The default type for floating point values.
Definition: config.h:17