DiSMEC++
regularizers_imp.cpp
Go to the documentation of this file.
1 // Copyright (c) 2021, Aalto University, developed by Erik Schultheis
2 // All rights reserved.
3 //
4 // SPDX-License-Identifier: MIT
5 
6 #include "regularizers_imp.h"
7 #include "regularizers.h"
8 #include "utils/hash_vector.h"
9 #include "utils/conversion.h"
10 
11 namespace objective = dismec::objective;
15 
17  PointWiseRegularizer(scale, ignore_bias) {
18 
19 }
20 
22 
23  m_LsCache_w02 = location->squaredNorm();
24  m_LsCache_d2 = direction.squaredNorm();
25  m_LsCache_dTw = location->dot(direction);
26  assert(std::isfinite(m_LsCache_w02));
27  assert(std::isfinite(m_LsCache_d2));
28  assert(std::isfinite(m_LsCache_dTw));
29 
30  if(dont_regularize_bias()) {
31  real_t ll = location->coeff(location->size() - 1);
32  real_t ld = direction.coeff(direction.size() - 1);
33  m_LsCache_w02 -= ll * ll;
34  m_LsCache_d2 -= ld * ld;
35  m_LsCache_dTw -= ll * ld;
36  }
37 }
38 
40  return real_t{0.5} * scale() * (m_LsCache_w02 + 2*a*m_LsCache_dTw + a*a*m_LsCache_d2);
41 }
42 
43 // TODO put some comments that figure out what is happening here!!!
45  return real_t{0.5} * PointWiseRegularizer::value_unchecked(location);
46 }
47 
49  return x * x;
50 }
51 
53  return x;
54 }
55 
57  return 1;
58 }
59 
60 HuberRegularizer::HuberRegularizer(real_t epsilon, real_t scale, bool ignore_bias) :
61  PointWiseRegularizer(scale, ignore_bias), m_Epsilon(epsilon) {
62  if(m_Epsilon <= 0) {
63  THROW_EXCEPTION(std::invalid_argument, "Epsilon has to be positive. Got {}", m_Epsilon);
64  }
65 }
66 
68  if(x > m_Epsilon) return x - m_Epsilon/2;
69  if(x < -m_Epsilon) return -x - m_Epsilon/2;
70  return real_t{0.5} * x*x / m_Epsilon;
71 }
72 
74  if(x > m_Epsilon) {
75  return 1.0;
76  } else if(x < -m_Epsilon) {
77  return -1.0;
78  } else {
79  return x / m_Epsilon;
80  }
81 }
82 
84  if(x > m_Epsilon) return real_t{1.0} / x;
85  if(x < -m_Epsilon) return -real_t{1.0} / x;
86  return real_t{0.5} / m_Epsilon;
87 }
88 
89 ElasticNetRegularizer::ElasticNetRegularizer(real_t epsilon, real_t scale, real_t interp, bool ignore_bias)
90  : PointWiseRegularizer(scale, ignore_bias), m_Epsilon(epsilon), m_L1_Factor(1 - interp), m_L2_Factor(interp)
91 {
92  if(m_Epsilon <= 0) {
93  THROW_EXCEPTION(std::invalid_argument, "Epsilon has to be positive. Got {}", m_Epsilon);
94  }
95 
96  if(interp < 0 || interp > 1) {
97  THROW_EXCEPTION(std::invalid_argument, "Interpolation needs to be in [0, 1]. Got {}", interp);
98  }
99 }
100 
102  real_t x2 = x*x;
103  if(x > m_Epsilon) return m_L1_Factor*(x - m_Epsilon/2) + real_t{0.5} * m_L2_Factor * x2;
104  if(x < -m_Epsilon) return m_L1_Factor*(-x - m_Epsilon/2) + real_t{0.5} * m_L2_Factor * x2;
105  return real_t{0.5} * (m_L1_Factor / m_Epsilon + m_L2_Factor) * x2;
106 }
107 
109  if(x > m_Epsilon) {
110  return m_L1_Factor + m_L2_Factor * x;
111  } else if(x < -m_Epsilon) {
112  return -m_L1_Factor + m_L2_Factor * x;
113  } else {
114  return m_L1_Factor * x / m_Epsilon + m_L2_Factor * x;
115  }
116 }
117 
119  if(x > m_Epsilon) return m_L1_Factor / x + m_L2_Factor;
120  if(x < -m_Epsilon) return -m_L1_Factor / x + m_L2_Factor;
121  return real_t{0.5} / m_Epsilon * m_L1_Factor + m_L2_Factor;
122 }
123 
125 // The factory functions
126 std::unique_ptr<Objective> objective::make_regularizer(const SquaredNormConfig& config) {
127  return std::make_unique<SquaredNormRegularizer>(config.Strength, config.IgnoreBias);
128 }
129 
130 std::unique_ptr<Objective> objective::make_regularizer(const HuberConfig& config) {
131  return std::make_unique<HuberRegularizer>(config.Epsilon, config.Strength, config.IgnoreBias);
132 }
133 
134 std::unique_ptr<Objective> objective::make_regularizer(const ElasticConfig& config) {
135  return std::make_unique<ElasticNetRegularizer>(config.Epsilon, config.Strength, config.Interpolation, config.IgnoreBias);
136 }
137 
138 
139 #include "doctest.h"
140 
141 using namespace dismec;
142 
143 #ifndef DOCTEST_CONFIG_DISABLE
144 
145 // helper functions for testing
146 namespace {
147  DenseRealVector make_vec(std::initializer_list<real_t> values) {
148  DenseRealVector vec(values.size());
149  auto it = begin(values);
150  for(int i = 0; i < ssize(values); ++i) {
151  vec.coeffRef(i) = *it;
152  ++it;
153  }
154  return vec;
155  }
156 
158  DenseRealVector loc = make_vec({1.0, 2.0, -3.0, 0.0});
159  DenseRealVector dir = make_vec({3.0, -1.0, 2.0, 1.0});
160 
161  reg.project_to_line(HashVector{loc}, dir);
162 
163  for(real_t t : {-1.2f, 0.1f, 0.5f, 0.8f, 2.5f}) {
164  real_t predict = reg.lookup_on_line(t);
165  real_t actual = reg.value(HashVector{loc + t*dir});
166  CHECK(predict == doctest::Approx(actual));
167  }
168  }
169 
171  DenseRealVector loc = make_vec({1.0, 0.05, -3.0, 0.0});
172  DenseRealVector dir = make_vec({3.0, -1.0, 2.0, 1.0});
173  HashVector hl{loc};
174 
175  // short versions
176  HashVector short_loc{loc.topRows(3)};
177  DenseRealVector short_dir{dir.topRows(3)};
178 
179  // short on full objective
180  real_t reference = full.value(short_loc);
181  CHECK(no_bias.value(hl) == doctest::Approx(reference));
182 
183  DenseRealVector target = DenseRealVector::Random(4);
184  DenseRealVector short_target(3);
185 
186  full.gradient(short_loc, short_target);
187  no_bias.gradient(hl, target);
188  for(int i = 0; i < 3; ++i) {
189  CHECK(target.coeff(i) == short_target.coeff(i));
190  }
191  CHECK(target.coeff(3) == 0);
192 
193  full.hessian_times_direction(short_loc, short_dir, short_target);
194  target = DenseRealVector::Random(4);
195  no_bias.hessian_times_direction(hl, dir, target);
196  for(int i = 0; i < 3; ++i) {
197  CHECK(target.coeff(i) == short_target.coeff(i));
198  }
199  CHECK(target.coeff(3) == 0);
200  }
201 }
202 
203 TEST_CASE("l2-reg") {
204  SquaredNormRegularizer reg(1.0);
205  DenseRealVector loc = make_vec({1.0, 2.0, -3.0, 0.0});
206  HashVector hl{loc};
207 
208  // check value
209  CHECK(reg.value(hl) == 0.5*(1+4+9));
210 
211  // check gradient: should be equal to location
212  DenseRealVector target(loc.size());
213  reg.gradient(hl, target);
214  for(int i = 0; i < target.size(); ++i) {
215  CHECK(loc.coeff(i) == target.coeff(i));
216  }
217 
218  reg.gradient_at_zero(target);
219  CHECK(target.squaredNorm() == 0);
220 
221  // check hessian: should be equal to probe direction
222  DenseRealVector probe = make_vec({1.0, 2.0, 1.0, -1.0});
223  reg.hessian_times_direction(hl, probe, target);
224  for(int i = 0; i < target.size(); ++i) {
225  CHECK(probe.coeff(i) == target.coeff(i));
226  }
227 
228  // check preconditioner
229  reg.diag_preconditioner(hl, target);
230  for(int i = 0; i < target.size(); ++i) {
231  CHECK(target.coeff(i) == 1.0);
232  }
233 }
234 
235 TEST_CASE("l2 line-search") {
236  bool ignore_bias = false;
237 
238  SUBCASE("ignore bias") {
239  ignore_bias = true;
240  }
241 
242  SUBCASE("full weights") {
243  ignore_bias = false;
244  }
245 
246  SquaredNormRegularizer reg(1.0, ignore_bias);
247  verify_line_search(reg);
248 }
249 
250 TEST_CASE("l2 bias") {
251  SquaredNormRegularizer full(1.0);
252  SquaredNormRegularizer bias(1.0, true);
253  verify_bias(full, bias);
254 }
255 
256 TEST_CASE("huber-reg") {
257  HuberRegularizer absreg(1);
258  DenseRealVector loc(4);
259  loc << 1, 5.0, -3.0, 0.0;
260  HashVector hl{loc};
261  CHECK(absreg.value(hl) == 9.0 - 1.5);
262 
263  DenseRealVector grad(4);
264  absreg.gradient(hl, grad);
265 
266  CHECK(grad.coeff(0) == 1.0);
267  CHECK(grad.coeff(1) == 1.0);
268  CHECK(grad.coeff(2) == -1.0);
269  CHECK(grad.coeff(3) == 0.0);
270 
271  absreg.gradient_at_zero(grad);
272  CHECK(grad.squaredNorm() == 0);
273 }
274 
275 TEST_CASE("huber line-search") {
276  bool ignore_bias = false;
277 
278  SUBCASE("ignore bias") {
279  ignore_bias = true;
280  }
281 
282  SUBCASE("full weights") {
283  ignore_bias = false;
284  }
285 
286  HuberRegularizer reg(1.0, 1.0, ignore_bias);
287  verify_line_search(reg);
288 }
289 
290 
291 TEST_CASE("huber bias") {
292  HuberRegularizer full(1.0, 1.0);
293  HuberRegularizer bias(1.0, 1.0, true);
294  verify_bias(full, bias);
295 }
296 #include "spdlog/spdlog.h"
297 TEST_CASE("elastic-net") {
298  ElasticNetRegularizer reg(1.0, 1.0, 0.4, false);
299  HuberRegularizer l1_part(1.0, 0.6, false);
300  SquaredNormRegularizer l2_part(0.4, false);
301 
302  DenseRealVector loc(4);
303  loc << 1, 0.5, -3.0, 0.0;
304  HashVector hl{loc};
305 
306  auto check_vector_valued = [&](auto&& f) {
307  DenseRealVector elastic(4);
308  DenseRealVector l1(4);
309  DenseRealVector l2(4);
310  f(reg, elastic);
311  f(l1_part, l1);
312  f(l2_part, l2);
313  CHECK(elastic.coeff(0) == doctest::Approx(l1.coeff(0) + l2.coeff(0)));
314  CHECK(elastic.coeff(1) == doctest::Approx(l1.coeff(1) + l2.coeff(1)));
315  CHECK(elastic.coeff(2) == doctest::Approx(l1.coeff(2) + l2.coeff(2)));
316  CHECK(elastic.coeff(3) == doctest::Approx(l1.coeff(3) + l2.coeff(3)));
317  };
318 
319  SUBCASE("value") {
320  CHECK(reg.value(hl) == l1_part.value(hl) + l2_part.value(hl));
321  }
322 
323  SUBCASE("gradient") {
324  check_vector_valued([&](auto&& ref, auto&& vec){
325  ref.gradient(hl, vec);
326  });
327  }
328 
329  SUBCASE("gradient_at_zero") {
330  check_vector_valued([&](auto&& ref, auto&& vec){
331  ref.gradient_at_zero(vec);
332  });
333  }
334 
335  SUBCASE("hessian_times_direction") {
336  DenseRealVector dir = DenseRealVector::Random(4);
337  check_vector_valued([&](auto&& ref, auto&& vec){
338  ref.hessian_times_direction(hl, dir, vec);
339  });
340  }
341 
342  DenseRealVector grad(4);
343  reg.gradient(hl, grad);
344 
345 
346 }
347 
348 TEST_CASE("elastic line-search") {
349  bool ignore_bias = false;
350 
351  SUBCASE("ignore bias") {
352  ignore_bias = true;
353  }
354 
355  SUBCASE("full weights") {
356  ignore_bias = false;
357  }
358 
359  ElasticNetRegularizer reg(1.0, 1.0, 0.5, ignore_bias);
360  verify_line_search(reg);
361 }
362 
363 
364 TEST_CASE("elastic bias") {
365  ElasticNetRegularizer full(1.0, 1.0, 0.7);
366  ElasticNetRegularizer bias(1.0, 1.0, 0.7,true);
367  verify_bias(full, bias);
368 }
369 
370 #endif
An Eigen vector with versioning information, to implement simple caching of results.
Definition: hash_vector.h:43
ElasticNetRegularizer(real_t epsilon, real_t scale, real_t interp, bool ignore_bias=false)
Constructor for a ElasticNet regularizer objective.
This class implements a huber regularizer.
HuberRegularizer(real_t epsilon, real_t scale=1.0, bool ignore_bias=false)
Constructor for a Huber regularizer objective.
real_t point_wise_value(real_t x) const
real_t point_wise_quad(real_t x) const
real_t point_wise_grad(real_t x) const
Class that models an optimization objective.
Definition: objective.h:41
void hessian_times_direction(const HashVector &location, const DenseRealVector &direction, Eigen::Ref< DenseRealVector > target)
Calculates the product of the Hessian matrix at location with direction.
Definition: objective.cpp:107
void gradient_at_zero(Eigen::Ref< DenseRealVector > target)
Gets the gradient for location zero.
Definition: objective.cpp:82
void gradient(const HashVector &location, Eigen::Ref< DenseRealVector > target)
Evaluate the gradient at location.
Definition: objective.cpp:96
virtual real_t lookup_on_line(real_t position)=0
Looks up the value of the objective on the line defined by the last call to project_to_line().
void project_to_line(const HashVector &location, const DenseRealVector &direction)
creates a function g such that g(a) = objective(location + a * direction) Use lookup_on_line() to eva...
Definition: objective.cpp:124
void diag_preconditioner(const HashVector &location, Eigen::Ref< DenseRealVector > target)
Get precondition to be used in CG optimization.
Definition: objective.cpp:43
real_t value(const HashVector &location)
Evaluate the objective at the given location.
Definition: objective.cpp:35
Base class for pointwise regularization functions.
Definition: pointwise.h:39
real_t value_unchecked(const HashVector &location) override
Definition: pointwise.h:112
real_t scale() const
Returns the common scale factor for the entire regularizer.
Definition: pointwise.h:69
This class implements a squared norm (L2) regularizer. Thus f(x) = 0.5 |x|^2.
real_t lookup_on_line(real_t a) override
Looks up the value of the objective on the line defined by the last call to project_to_line().
SquaredNormRegularizer(real_t scale=1, bool ignore_bias=false)
real_t value_unchecked(const HashVector &location) override
void project_to_line_unchecked(const HashVector &location, const DenseRealVector &direction) override
void verify_bias(objective::Objective &full, objective::Objective &no_bias)
void verify_line_search(objective::Objective &reg)
DenseRealVector make_vec(std::initializer_list< real_t > values)
std::unique_ptr< Objective > make_regularizer(const SquaredNormConfig &config)
Main namespace in which all types, classes, and functions are defined.
Definition: app.h:15
constexpr auto ssize(const C &c) -> std::common_type_t< std::ptrdiff_t, std::make_signed_t< decltype(c.size())>>
signed size free function. Taken from https://en.cppreference.com/w/cpp/iterator/size
Definition: conversion.h:42
types::DenseVector< real_t > DenseRealVector
Any dense, real values vector.
Definition: matrix_types.h:40
float real_t
The default type for floating point values.
Definition: config.h:17
TEST_CASE("l2-reg")
#define THROW_EXCEPTION(exception_type,...)
Definition: throw_error.h:16