DiSMEC++
dense_and_sparse.h
Go to the documentation of this file.
1 // Copyright (c) 2021, Aalto University, developed by Erik Schultheis
2 // All rights reserved.
3 //
4 // SPDX-License-Identifier: MIT
5 
6 #ifndef DISMEC_SRC_OBJECTIVE_DENSE_AND_SPARSE_H
7 #define DISMEC_SRC_OBJECTIVE_DENSE_AND_SPARSE_H
8 
9 #include "objective.h"
10 #include "utils/hash_vector.h"
11 
12 namespace dismec::objective {
13 
21  public:
22  DenseAndSparseLinearBase(std::shared_ptr<const GenericFeatureMatrix> dense_features,
23  std::shared_ptr<const GenericFeatureMatrix> sparse_features);
24 
25  [[nodiscard]] long num_instances() const noexcept;
26  [[nodiscard]] long num_variables() const noexcept override;
27 
28  [[nodiscard]] BinaryLabelVector& get_label_ref();
29  void update_costs(real_t positive, real_t negative);
30  void update_features(const DenseFeatures& dense, const SparseFeatures& sparse);
31 
32  protected:
34  [[nodiscard]] long get_num_variables() const noexcept;
35 
44  const DenseRealVector& x_times_w(const HashVector& w);
45 
51  template<class Derived>
52  void update_xtw_cache(const HashVector& new_weight, const Eigen::MatrixBase<Derived>& new_result) {
53  // update the cached result to the new value
54  m_X_times_w.noalias() = new_result;
55  // and set the hash so that we can identify calls using the new weights
56  m_Last_W = new_weight.hash();
57  }
58 
69  void project_linear_to_line(const HashVector& location, const DenseRealVector& direction);
70 
71  [[nodiscard]] auto line_interpolation(real_t t) const {
72  return m_LsCache_xTw + t * m_LsCache_xTd;
73  }
74 
75  void declare_vector_on_last_line(const HashVector& location, real_t t) override {
77  }
78 
79  [[nodiscard]] const DenseFeatures& dense_features() const;
80  [[nodiscard]] const SparseFeatures& sparse_features() const;
81 
82  [[nodiscard]] const DenseRealVector& costs() const;
83  [[nodiscard]] const BinaryLabelVector& labels() const;
84  private:
85  real_t value_unchecked(const HashVector& location) override;
86 
87  real_t lookup_on_line(real_t position) override;
88 
89  void gradient_unchecked(const HashVector& location, Eigen::Ref<DenseRealVector> target) override;
90 
91  void gradient_at_zero_unchecked(Eigen::Ref<DenseRealVector> target) override;
92 
93  void hessian_times_direction_unchecked(const HashVector& location,
94  const DenseRealVector& direction,
95  Eigen::Ref<DenseRealVector> target) override;
96 
97  void diag_preconditioner_unchecked(const HashVector& location, Eigen::Ref<DenseRealVector> target) override;
98 
99  void gradient_and_pre_conditioner_unchecked(const HashVector& location, Eigen::Ref<DenseRealVector> gradient,
100  Eigen::Ref<DenseRealVector> pre) override;
101 
102  void project_to_line_unchecked(const HashVector& location, const DenseRealVector& direction) override;
103 
105 
106  virtual void calculate_loss(const DenseRealVector& scores,
107  const BinaryLabelVector& labels,
108  DenseRealVector& out) const = 0;
109 
110  virtual void calculate_derivative(const DenseRealVector& scores,
111  const BinaryLabelVector& labels,
112  DenseRealVector& out) const = 0;
113 
114  virtual void calculate_2nd_derivative(const DenseRealVector& scores,
115  const BinaryLabelVector& labels,
116  DenseRealVector& out) const = 0;
117 
118  const DenseRealVector& cached_derivative(const HashVector& location);
119  const DenseRealVector& cached_2nd_derivative(const HashVector& location);
120 
121  // needed for regularization
122  [[nodiscard]] virtual real_t regularization_value(const DenseRealVector& weights) const = 0;
123  virtual void regularization_gradient(const DenseRealVector& weights, Eigen::Ref<DenseRealVector> gradient) const = 0;
124  virtual void regularization_gradient_at_zero(Eigen::Ref<DenseRealVector> gradient) const = 0;
125  virtual void regularization_preconditioner(const DenseRealVector& weights, Eigen::Ref<DenseRealVector> pre_cond) const = 0;
126  virtual void regularization_hessian(const DenseRealVector& weights, const DenseRealVector& direction, Eigen::Ref<DenseRealVector> pre_cond) const = 0;
127 
129  std::shared_ptr<const GenericFeatureMatrix> m_DenseFeatures;
131  std::shared_ptr<const GenericFeatureMatrix> m_SparseFeatures;
132 
137 
142 
145 
150  void invalidate_labels();
151 
154 
158 
161  };
162 
163  template<class MarginFunction, class SparseRegFunction, class DenseRegFunction>
165  DenseAndSparseMargin(std::shared_ptr<const GenericFeatureMatrix> dense_features,
166  std::shared_ptr<const GenericFeatureMatrix> sparse_features,
167  MarginFunction phi, DenseRegFunction dr, real_t drs, SparseRegFunction sr, real_t srs) :
169  Phi(std::move(phi)), DenseRegStrength(drs), DenseReg(dr), SparseRegStrength(srs), SparseReg(sr)
170  {
171 
172  }
173 
174  void calculate_loss(const DenseRealVector& scores,
175  const BinaryLabelVector& labels,
176  DenseRealVector& out) const override {
177  assert(scores.size() == labels.size());
178  for(int i = 0; i < scores.size(); ++i) {
179  real_t margin = scores.coeff(i) * real_t(labels.coeff(i));
180  out.coeffRef(i) = Phi.value(margin);
181  }
182  }
183 
185  const BinaryLabelVector& labels,
186  DenseRealVector& out) const override {
187  assert(scores.size() == labels.size());
188  for(int i = 0; i < scores.size(); ++i) {
189  real_t label = labels.coeff(i);
190  real_t margin = scores.coeff(i) * label;
191  out.coeffRef(i) = Phi.grad(margin) * label;
192  }
193  }
194 
196  const BinaryLabelVector& labels,
197  DenseRealVector& out) const override {
198  assert(scores.size() == labels.size());
199  for(int i = 0; i < scores.size(); ++i) {
200  real_t margin = scores.coeff(i) * real_t(labels.coeff(i));
201  out.coeffRef(i) = Phi.quad(margin);
202  }
203  }
204 
205  [[nodiscard]] real_t regularization_value(const DenseRealVector& weights) const override {
206  int sparse_start = dense_features().cols();
207  int sparse_end = sparse_start + sparse_features().cols();
208  real_t sparse_value = 0;
209  real_t dense_value = 0;
210  for(int i = 0; i < sparse_start; ++i) {
211  dense_value += DenseReg.value(weights.coeff(i));
212  }
213  for(int i = sparse_start; i < sparse_end; ++i) {
214  sparse_value += SparseReg.value(weights.coeff(i));
215  }
216  return SparseRegStrength * sparse_value + DenseRegStrength * dense_value;
217  }
218 
219  void regularization_gradient(const DenseRealVector& weights, Eigen::Ref<DenseRealVector> gradient) const override {
220  int sparse_start = dense_features().cols();
221  int sparse_end = sparse_start + sparse_features().cols();
222 
223  // calculate and fill in the pointwise gradient
224  for (long i = 0; i < sparse_start; ++i) {
225  gradient.coeffRef(i) = DenseRegStrength * DenseReg.grad(weights.coeff(i));
226  }
227  for (long i = sparse_start; i < sparse_end; ++i) {
228  gradient.coeffRef(i) = SparseRegStrength * SparseReg.grad(weights.coeff(i));
229  }
230  }
231 
232  void regularization_gradient_at_zero(Eigen::Ref<DenseRealVector> gradient) const override {
233  int sparse_start = dense_features().cols();
234  int sparse_end = sparse_start + sparse_features().cols();
235 
236  real_t dense_zero = DenseRegStrength * DenseReg.grad(real_t{0});
237  real_t sparse_zero = SparseRegStrength * SparseReg.grad(real_t{0});
238  // calculate and fill in the pointwise gradient
239  for (long i = 0; i < sparse_start; ++i) {
240  gradient.coeffRef(i) = dense_zero;
241  }
242  for (long i = sparse_start; i < sparse_end; ++i) {
243  gradient.coeffRef(i) = sparse_zero;
244  }
245  }
246 
247  void regularization_preconditioner(const DenseRealVector& weights, Eigen::Ref<DenseRealVector> pre_cond) const override {
248  int sparse_start = dense_features().cols();
249  int sparse_end = sparse_start + sparse_features().cols();
250 
251  // calculate and fill in the pointwise gradient
252  for (long i = 0; i < sparse_start; ++i) {
253  pre_cond.coeffRef(i) = DenseRegStrength * DenseReg.quad(weights.coeff(i));
254  }
255  for (long i = sparse_start; i < sparse_end; ++i) {
256  pre_cond.coeffRef(i) = SparseRegStrength * SparseReg.quad(weights.coeff(i));
257  }
258  }
259 
260  void regularization_hessian(const DenseRealVector& weights, const DenseRealVector& direction, Eigen::Ref<DenseRealVector> target) const override {
261  int sparse_start = dense_features().cols();
262  int sparse_end = sparse_start + sparse_features().cols();
263 
264  // calculate and fill in the pointwise gradient
265  for (long i = 0; i < sparse_start; ++i) {
266  target.coeffRef(i) = DenseRegStrength * DenseReg.quad(weights.coeff(i)) * direction.coeff(i);
267  }
268  for (long i = sparse_start; i < sparse_end; ++i) {
269  target.coeffRef(i) = SparseRegStrength * SparseReg.quad(weights.coeff(i)) * direction.coeff(i);
270  }
271  }
272 
273  MarginFunction Phi;
275  DenseRegFunction DenseReg;
277  SparseRegFunction SparseReg;
278  };
279 
280  std::unique_ptr<DenseAndSparseLinearBase> make_sp_dense_squared_hinge(
281  std::shared_ptr<const GenericFeatureMatrix> dense_features,
282  real_t dense_reg_strength,
283  std::shared_ptr<const GenericFeatureMatrix> sparse_features,
284  real_t sparse_reg_strength
285  );
286 }
287 
288 #endif //DISMEC_SRC_OBJECTIVE_DENSE_AND_SPARSE_H
An Eigen vector with versioning information, to implement simple caching of results.
Definition: hash_vector.h:43
VectorHash hash() const
Gets the unique id of this vector.
Definition: hash_vector.cpp:45
A unique identifier for a HashVector.
Definition: hash_vector.h:118
Base class for implementationa of an objective that combines dense features and sparse features.
void gradient_and_pre_conditioner_unchecked(const HashVector &location, Eigen::Ref< DenseRealVector > gradient, Eigen::Ref< DenseRealVector > pre) override
void project_to_line_unchecked(const HashVector &location, const DenseRealVector &direction) override
virtual void calculate_2nd_derivative(const DenseRealVector &scores, const BinaryLabelVector &labels, DenseRealVector &out) const =0
void update_features(const DenseFeatures &dense, const SparseFeatures &sparse)
BinaryLabelVector m_Y
Label vector – use a vector of ints here. We encode label present == 1, absent == -1.
virtual void calculate_derivative(const DenseRealVector &scores, const BinaryLabelVector &labels, DenseRealVector &out) const =0
const DenseRealVector & costs() const
std::shared_ptr< const GenericFeatureMatrix > m_DenseFeatures
Pointer to the dense part of the feature matrix.
void diag_preconditioner_unchecked(const HashVector &location, Eigen::Ref< DenseRealVector > target) override
void hessian_times_direction_unchecked(const HashVector &location, const DenseRealVector &direction, Eigen::Ref< DenseRealVector > target) override
real_t value_from_xTw(const DenseRealVector &xTw)
const DenseRealVector & cached_derivative(const HashVector &location)
void gradient_at_zero_unchecked(Eigen::Ref< DenseRealVector > target) override
const SparseFeatures & sparse_features() const
virtual void regularization_gradient(const DenseRealVector &weights, Eigen::Ref< DenseRealVector > gradient) const =0
virtual void regularization_preconditioner(const DenseRealVector &weights, Eigen::Ref< DenseRealVector > pre_cond) const =0
real_t value_unchecked(const HashVector &location) override
const DenseRealVector & cached_2nd_derivative(const HashVector &location)
void update_costs(real_t positive, real_t negative)
DenseRealVector m_LsCache_xTw
cache for line search implementation: feature times weights
virtual void regularization_gradient_at_zero(Eigen::Ref< DenseRealVector > gradient) const =0
const BinaryLabelVector & labels() const
void declare_vector_on_last_line(const HashVector &location, real_t t) override
State that the given vector corresponds to a certain position on the line of the last line search.
virtual void calculate_loss(const DenseRealVector &scores, const BinaryLabelVector &labels, DenseRealVector &out) const =0
DenseRealVector m_LsCache_xTd
cache for line search implementation: feature times direction
std::shared_ptr< const GenericFeatureMatrix > m_SparseFeatures
pointer to the sparse part of the feature matrix
DenseRealVector m_Costs
Label-Dependent costs.
VectorHash m_Last_W
cache for the last argument to x_times_w().
virtual void regularization_hessian(const DenseRealVector &weights, const DenseRealVector &direction, Eigen::Ref< DenseRealVector > pre_cond) const =0
void project_linear_to_line(const HashVector &location, const DenseRealVector &direction)
Prepares the cache variables for line projection.
const DenseFeatures & dense_features() const
virtual real_t regularization_value(const DenseRealVector &weights) const =0
const DenseRealVector & x_times_w(const HashVector &w)
Calculates the vector of feature matrix times weights w
long get_num_variables() const noexcept
actual implementation of num_variables(). We need this non-virtual function to be called during the c...
DenseAndSparseLinearBase(std::shared_ptr< const GenericFeatureMatrix > dense_features, std::shared_ptr< const GenericFeatureMatrix > sparse_features)
void gradient_unchecked(const HashVector &location, Eigen::Ref< DenseRealVector > target) override
DenseRealVector m_X_times_w
cache for the last result of x_times_w() corresponding to m_Last_W.
long num_variables() const noexcept override
real_t lookup_on_line(real_t position) override
Looks up the value of the objective on the line defined by the last call to project_to_line().
void update_xtw_cache(const HashVector &new_weight, const Eigen::MatrixBase< Derived > &new_result)
Updates the cached value for x_times_w.
Class that models an optimization objective.
Definition: objective.h:41
void gradient(const HashVector &location, Eigen::Ref< DenseRealVector > target)
Evaluate the gradient at location.
Definition: objective.cpp:96
std::unique_ptr< DenseAndSparseLinearBase > make_sp_dense_squared_hinge(std::shared_ptr< const GenericFeatureMatrix > dense_features, real_t dense_reg_strength, std::shared_ptr< const GenericFeatureMatrix > sparse_features, real_t sparse_reg_strength)
types::DenseRowMajor< real_t > DenseFeatures
Dense Feature Matrix in Row Major format.
Definition: matrix_types.h:58
types::DenseVector< std::int8_t > BinaryLabelVector
Dense vector for storing binary labels.
Definition: matrix_types.h:68
types::DenseVector< real_t > DenseRealVector
Any dense, real values vector.
Definition: matrix_types.h:40
types::SparseRowMajor< real_t > SparseFeatures
Sparse Feature Matrix in Row Major format.
Definition: matrix_types.h:50
float real_t
The default type for floating point values.
Definition: config.h:17
void regularization_preconditioner(const DenseRealVector &weights, Eigen::Ref< DenseRealVector > pre_cond) const override
void regularization_hessian(const DenseRealVector &weights, const DenseRealVector &direction, Eigen::Ref< DenseRealVector > target) const override
DenseAndSparseMargin(std::shared_ptr< const GenericFeatureMatrix > dense_features, std::shared_ptr< const GenericFeatureMatrix > sparse_features, MarginFunction phi, DenseRegFunction dr, real_t drs, SparseRegFunction sr, real_t srs)
void calculate_loss(const DenseRealVector &scores, const BinaryLabelVector &labels, DenseRealVector &out) const override
real_t regularization_value(const DenseRealVector &weights) const override
void calculate_2nd_derivative(const DenseRealVector &scores, const BinaryLabelVector &labels, DenseRealVector &out) const override
void calculate_derivative(const DenseRealVector &scores, const BinaryLabelVector &labels, DenseRealVector &out) const override
void regularization_gradient(const DenseRealVector &weights, Eigen::Ref< DenseRealVector > gradient) const override
void regularization_gradient_at_zero(Eigen::Ref< DenseRealVector > gradient) const override