DiSMEC++
dense_and_sparse.cpp
Go to the documentation of this file.
1 // Copyright (c) 2021, Aalto University, developed by Erik Schultheis
2 // All rights reserved.
3 //
4 // SPDX-License-Identifier: MIT
5 
6 #include "dense_and_sparse.h"
7 #include "utils/throw_error.h"
8 #include "stats/timer.h"
9 #include "margin_losses.h"
10 #include "utils/eigen_generic.h"
11 
12 using namespace dismec;
13 using namespace dismec::objective;
14 
15 namespace {
17  constexpr const stat_id_t STAT_PERF_MATMUL{7};
18 }
19 
20 DenseAndSparseLinearBase::DenseAndSparseLinearBase(std::shared_ptr<const GenericFeatureMatrix> dense_features,
21  std::shared_ptr<const GenericFeatureMatrix> sparse_features) :
22  m_DenseFeatures( std::move(dense_features) ),
23  m_SparseFeatures( std::move(sparse_features) ),
24  m_X_times_w( m_DenseFeatures->rows() ),
25  m_LsCache_xTd( m_DenseFeatures->rows() ),
26  m_LsCache_xTw( m_DenseFeatures->rows() ),
27  m_Costs( m_DenseFeatures->rows() ),
28  m_Y( m_DenseFeatures->rows() ),
29  m_DerivativeBuffer(m_DenseFeatures->rows()), m_SecondDerivativeBuffer(m_DenseFeatures->rows()),
30  m_LineStart( get_num_variables() ), m_LineDirection( get_num_variables() ),
31  m_LineCache( get_num_variables() ), m_GenericInBuffer(m_DenseFeatures->rows()), m_GenericOutBuffer(m_DenseFeatures->rows())
32 {
33  ALWAYS_ASSERT_EQUAL(m_DenseFeatures->rows(), m_SparseFeatures->rows(), "Mismatching number ({} vs {}) of instances (rows) in dense and sparse part.")
34  m_Costs.fill(1);
35  declare_stat(STAT_PERF_MATMUL, {"perf_matmul", "µs"});
36 }
37 
38 
40  return m_DenseFeatures->rows();
41 }
42 
44  return get_num_variables();
45 }
46 
48  return m_DenseFeatures->cols() + m_SparseFeatures->cols();
49 }
50 
52  return m_DenseFeatures->dense();
53 }
54 
56  return m_SparseFeatures->sparse();
57 }
58 
59 #define DENSE_PART(source) source.head(dense_features().cols())
60 #define SPARSE_PART(source) source.tail(sparse_features().cols())
61 
63  if(w.hash() == m_Last_W) {
64  return m_X_times_w;
65  }
66  auto timer = make_timer(STAT_PERF_MATMUL);
67  m_X_times_w.noalias() = dense_features() * DENSE_PART(w.get());
68  m_X_times_w.noalias() += sparse_features() * SPARSE_PART(w.get());
69  m_Last_W = w.hash();
70  return m_X_times_w;
71 }
72 
74  m_LsCache_xTd.noalias() = dense_features() * DENSE_PART(direction);
75  m_LsCache_xTd.noalias() += sparse_features() * SPARSE_PART(direction);
76  m_LsCache_xTw = x_times_w(location);
77  m_LineDirection = direction;
78  m_LineStart = location.get();
79 }
80 
83  return m_Y;
84 }
85 
86 
88  m_Costs.resize(labels().size());
89  for(int i = 0; i < m_Costs.size(); ++i) {
90  if(m_Y.coeff(i) == 1) {
91  m_Costs.coeffRef(i) = positive;
92  } else {
93  m_Costs.coeffRef(i) = negative;
94  }
95  }
96 }
97 
99  return m_Costs;
100 }
101 
103  return m_Y;
104 }
105 
107  const DenseRealVector& xTw = x_times_w(location);
108  return value_from_xTw(xTw) + regularization_value(location.get());
109 }
110 
114  m_LineCache = m_LineStart + position * m_LineDirection;
115  return f + regularization_value(m_LineCache);
116 }
117 
119 {
120  m_GenericOutBuffer.resize(labels().size());
122  return m_GenericOutBuffer.dot(costs());
123 }
124 
125 void
127  Eigen::Ref<DenseRealVector> target) {
128  regularization_hessian(location.get(), direction, target);
129 
130  const auto& hessian = cached_2nd_derivative(location);
131  for (int pos = 0; pos < hessian.size(); ++pos) {
132  if(real_t h = hessian.coeff(pos); h != 0) {
133  real_t factor = dense_features().row(pos).dot(DENSE_PART(direction)) +
134  sparse_features().row(pos).dot(SPARSE_PART(direction));
135  DENSE_PART(target) += dense_features().row(pos) * factor * h;
136  SPARSE_PART(target) += sparse_features().row(pos) * factor * h;
137  }
138  }
139 }
140 
142  Eigen::Ref<DenseRealVector> gradient,
143  Eigen::Ref<DenseRealVector> pre) {
144  regularization_gradient(location.get(), gradient);
145  regularization_preconditioner(location.get(), pre);
146 
147  const auto& derivative = cached_derivative(location);
148  const auto& hessian = cached_2nd_derivative(location);
149  for (int pos = 0; pos < derivative.size(); ++pos) {
150  if(real_t d = derivative.coeff(pos); d != 0) {
151  DENSE_PART(gradient) += dense_features().row(pos) * d;
152  SPARSE_PART(gradient) += sparse_features().row(pos) * d;
153  }
154  if(real_t h = hessian.coeff(pos); h != 0) {
155  DENSE_PART(pre) += dense_features().row(pos).cwiseAbs2() * h;
156  SPARSE_PART(pre) += sparse_features().row(pos).cwiseAbs2() * h;
157  }
158  }
159 
160 }
161 
162 void DenseAndSparseLinearBase::gradient_unchecked(const HashVector& location, Eigen::Ref<DenseRealVector> target) {
163  regularization_gradient(location.get(), target);
164 
165  const auto& derivative = cached_derivative(location);
166  for (int pos = 0; pos < derivative.size(); ++pos) {
167  if(real_t d = derivative.coeff(pos); d != 0) {
168  DENSE_PART(target) += dense_features().row(pos) * d;
169  SPARSE_PART(target) += sparse_features().row(pos) * d;
170  }
171  }
172 }
173 
174 void DenseAndSparseLinearBase::gradient_at_zero_unchecked(Eigen::Ref<DenseRealVector> target) {
176 
177  m_GenericInBuffer = DenseRealVector::Zero(labels().size());
178  m_GenericOutBuffer.resize(m_GenericInBuffer.size());
180  const auto& cost_vector = costs();
181  for (int pos = 0; pos < m_GenericOutBuffer.size(); ++pos) {
182  if(real_t d = m_GenericOutBuffer.coeff(pos); d != 0) {
183  DENSE_PART(target) += dense_features().row(pos) * (cost_vector.coeff(pos) * d);
184  SPARSE_PART(target) += sparse_features().row(pos) * (cost_vector.coeff(pos) * d);
185  }
186  }
187 }
188 
189 void DenseAndSparseLinearBase::diag_preconditioner_unchecked(const HashVector& location, Eigen::Ref<DenseRealVector> target) {
190  regularization_preconditioner(location.get(), target);
191 
192  const auto& hessian = cached_2nd_derivative(location);
193  for (int pos = 0; pos < hessian.size(); ++pos) {
194  if(real_t h = hessian.coeff(pos); h != 0) {
195  DENSE_PART(target) += dense_features().row(pos).cwiseAbs2() * h;
196  SPARSE_PART(target) += sparse_features().row(pos).cwiseAbs2() * h;
197  }
198  }
199 }
200 
202  return m_DerivativeBuffer.update(location, [&](const DenseRealVector& input, DenseRealVector& out){
203  out.resize(labels().size());
204  calculate_derivative(x_times_w(location), labels(), out);
205  out.array() *= costs().array();
206  });
207 }
208 
210  return m_SecondDerivativeBuffer.update(location, [&](const DenseRealVector& input, DenseRealVector& out){
211  out.resize(labels().size());
212  calculate_2nd_derivative(x_times_w(location), labels(), out);
213  out.array() *= costs().array();
214  });
215 }
216 
220 }
221 
223  project_linear_to_line(location, direction);
224 }
225 
227  m_DenseFeatures = std::make_shared<const GenericFeatureMatrix>(dense);
228  m_SparseFeatures = std::make_shared<const GenericFeatureMatrix>(sparse);
229 }
230 
231 namespace {
232  struct L2Regularizer {
233  [[nodiscard]] real_t value(real_t weight) const {
234  return weight * weight;
235  }
236 
237  [[nodiscard]] real_t grad(real_t weight) const {
238  return real_t{2} * weight;
239  }
240 
241  [[nodiscard]] real_t quad(real_t weight) const {
242  return real_t{2};
243  }
244  };
245 }
246 
247 std::unique_ptr<DenseAndSparseLinearBase> dismec::objective::make_sp_dense_squared_hinge(
248  std::shared_ptr<const GenericFeatureMatrix> dense_features,
249  real_t dense_reg_strength,
250  std::shared_ptr<const GenericFeatureMatrix> sparse_features,
251  real_t sparse_reg_strength) {
252  return std::make_unique<objective::DenseAndSparseMargin<SquaredHingePhi, L2Regularizer, L2Regularizer>>(
253  std::move(dense_features),
254  std::move(sparse_features),
255  SquaredHingePhi{}, L2Regularizer{}, dense_reg_strength, L2Regularizer{}, sparse_reg_strength);
256 }
257 
258 #include "doctest.h"
259 
260 using namespace dismec;
261 
262 namespace {
263  struct ZeroPhi {
264  [[nodiscard]] real_t value(real_t margin) const {
265  return 0;
266  }
267 
268  [[nodiscard]] real_t grad(real_t margin) const {
269  return 0;
270  }
271 
272  [[nodiscard]] real_t quad(real_t margin) const {
273  return 0;
274  }
275  };
276 }
277 
278 TEST_CASE("pure-regularization") {
279  DenseFeatures zero_dense = DenseFeatures::Zero(5, 3);
280  SparseFeatures zero_sparse = SparseFeatures(5, 4);
282  std::make_shared<const GenericFeatureMatrix>(zero_dense),
283  std::make_shared<const GenericFeatureMatrix>(zero_sparse),
284  ZeroPhi{}, L2Regularizer{}, 1.0, L2Regularizer{}, 1.0);
285 
286  DenseRealVector weights{{1.0, 2.0, -1.0, 0.0, 1.0, 2.0, 5.0}};
287  HashVector hv(weights);
288 
289  CHECK(goal.value(hv) == 1.0 + 4.0 + 1.0 + 1.0 + 4.0 + 25.0);
290 
291  DenseRealVector out_grad(7);
292  DenseRealVector expected_grad = 2.0 * weights;
293  goal.gradient(hv, out_grad);
294  CHECK(expected_grad == out_grad);
295 }
const DenseRealVector & update(const HashVector &input, F &&function)
Definition: hash_vector.h:186
An Eigen vector with versioning information, to implement simple caching of results.
Definition: hash_vector.h:43
VectorHash hash() const
Gets the unique id of this vector.
Definition: hash_vector.cpp:45
const DenseRealVector & get() const
Gets a constant reference to the data of this vector.
Definition: hash_vector.h:57
void gradient_and_pre_conditioner_unchecked(const HashVector &location, Eigen::Ref< DenseRealVector > gradient, Eigen::Ref< DenseRealVector > pre) override
void project_to_line_unchecked(const HashVector &location, const DenseRealVector &direction) override
virtual void calculate_2nd_derivative(const DenseRealVector &scores, const BinaryLabelVector &labels, DenseRealVector &out) const =0
void update_features(const DenseFeatures &dense, const SparseFeatures &sparse)
BinaryLabelVector m_Y
Label vector – use a vector of ints here. We encode label present == 1, absent == -1.
virtual void calculate_derivative(const DenseRealVector &scores, const BinaryLabelVector &labels, DenseRealVector &out) const =0
const DenseRealVector & costs() const
std::shared_ptr< const GenericFeatureMatrix > m_DenseFeatures
Pointer to the dense part of the feature matrix.
void diag_preconditioner_unchecked(const HashVector &location, Eigen::Ref< DenseRealVector > target) override
void hessian_times_direction_unchecked(const HashVector &location, const DenseRealVector &direction, Eigen::Ref< DenseRealVector > target) override
real_t value_from_xTw(const DenseRealVector &xTw)
const DenseRealVector & cached_derivative(const HashVector &location)
void gradient_at_zero_unchecked(Eigen::Ref< DenseRealVector > target) override
const SparseFeatures & sparse_features() const
virtual void regularization_gradient(const DenseRealVector &weights, Eigen::Ref< DenseRealVector > gradient) const =0
virtual void regularization_preconditioner(const DenseRealVector &weights, Eigen::Ref< DenseRealVector > pre_cond) const =0
real_t value_unchecked(const HashVector &location) override
const DenseRealVector & cached_2nd_derivative(const HashVector &location)
void update_costs(real_t positive, real_t negative)
DenseRealVector m_LsCache_xTw
cache for line search implementation: feature times weights
virtual void regularization_gradient_at_zero(Eigen::Ref< DenseRealVector > gradient) const =0
const BinaryLabelVector & labels() const
virtual void calculate_loss(const DenseRealVector &scores, const BinaryLabelVector &labels, DenseRealVector &out) const =0
DenseRealVector m_LsCache_xTd
cache for line search implementation: feature times direction
std::shared_ptr< const GenericFeatureMatrix > m_SparseFeatures
pointer to the sparse part of the feature matrix
DenseRealVector m_Costs
Label-Dependent costs.
VectorHash m_Last_W
cache for the last argument to x_times_w().
virtual void regularization_hessian(const DenseRealVector &weights, const DenseRealVector &direction, Eigen::Ref< DenseRealVector > pre_cond) const =0
void project_linear_to_line(const HashVector &location, const DenseRealVector &direction)
Prepares the cache variables for line projection.
const DenseFeatures & dense_features() const
virtual real_t regularization_value(const DenseRealVector &weights) const =0
const DenseRealVector & x_times_w(const HashVector &w)
Calculates the vector of feature matrix times weights w
long get_num_variables() const noexcept
actual implementation of num_variables(). We need this non-virtual function to be called during the c...
DenseAndSparseLinearBase(std::shared_ptr< const GenericFeatureMatrix > dense_features, std::shared_ptr< const GenericFeatureMatrix > sparse_features)
void gradient_unchecked(const HashVector &location, Eigen::Ref< DenseRealVector > target) override
DenseRealVector m_X_times_w
cache for the last result of x_times_w() corresponding to m_Last_W.
long num_variables() const noexcept override
real_t lookup_on_line(real_t position) override
Looks up the value of the objective on the line defined by the last call to project_to_line().
void gradient(const HashVector &location, Eigen::Ref< DenseRealVector > target)
Evaluate the gradient at location.
Definition: objective.cpp:96
auto make_timer(stat_id_t id, Args... args)
Creates a new ScopeTimer using stats::record_scope_time.
Definition: tracked.h:130
void declare_stat(stat_id_t index, StatisticMetaData meta)
Declares a new statistics. This function just forwards all its arguments to the internal StatisticsCo...
Definition: tracked.cpp:16
#define SPARSE_PART(source)
TEST_CASE("pure-regularization")
#define DENSE_PART(source)
std::unique_ptr< DenseAndSparseLinearBase > make_sp_dense_squared_hinge(std::shared_ptr< const GenericFeatureMatrix > dense_features, real_t dense_reg_strength, std::shared_ptr< const GenericFeatureMatrix > sparse_features, real_t sparse_reg_strength)
opaque_int_type< detail::stat_id_tag > stat_id_t
An opaque int-like type that is used to identify a statistic in a StatisticsCollection.
Definition: stat_id.h:24
Main namespace in which all types, classes, and functions are defined.
Definition: app.h:15
types::DenseRowMajor< real_t > DenseFeatures
Dense Feature Matrix in Row Major format.
Definition: matrix_types.h:58
types::DenseVector< std::int8_t > BinaryLabelVector
Dense vector for storing binary labels.
Definition: matrix_types.h:68
types::DenseVector< real_t > DenseRealVector
Any dense, real values vector.
Definition: matrix_types.h:40
types::SparseRowMajor< real_t > SparseFeatures
Sparse Feature Matrix in Row Major format.
Definition: matrix_types.h:50
float real_t
The default type for floating point values.
Definition: config.h:17
#define ALWAYS_ASSERT_EQUAL(x, y, msg)
Definition: throw_error.h:24