DiSMEC++
newton.cpp
Go to the documentation of this file.
1 // Copyright (c) 2021, Aalto University, developed by Erik Schultheis
2 // All rights reserved.
3 //
4 // SPDX-License-Identifier: MIT
5 
6 #include "newton.h"
7 #include "line_search.h"
8 #include "utils/hash_vector.h"
9 #include "solver/cg.h"
10 #include "stats/collection.h"
11 #include "stats/timer.h"
12 
13 using namespace dismec::solvers;
14 
15 namespace {
17 
18  constexpr const stat_id_t STAT_GRADIENT_NORM_0{0};
19  constexpr const stat_id_t STAT_OBJECTIVE_VALUE{1};
20  constexpr const stat_id_t STAT_GRADIENT_NORM{2};
21  constexpr const stat_id_t STAT_GRADIENT{3};
22  constexpr const stat_id_t STAT_PRECONDITIONER{4};
23  constexpr const stat_id_t STAT_WEIGHT_VECTOR{5};
24  constexpr const stat_id_t STAT_LINESEARCH_STEPSIZE{6};
25  constexpr const stat_id_t STAT_CG_ITERS{7};
26  constexpr const stat_id_t STAT_ITER_TIME{8};
27  constexpr const stat_id_t STAT_LS_FAIL{9};
28  constexpr const stat_id_t STAT_LS_STEPS{10};
29  constexpr const stat_id_t STAT_PROGRESS{11};
30  constexpr const stat_id_t STAT_ABSOLUTE_STEP{12};
31 
33 };
34 
35 NewtonWithLineSearch::NewtonWithLineSearch(long num_variables) : m_CG_Solver(num_variables),
36  m_Gradient(num_variables), m_PreConditioner(num_variables),
37  m_Weights(DenseRealVector(num_variables))
38 {
44 
45  declare_stat(STAT_GRADIENT_NORM_0, {"grad_norm_0", "|g_0|"});
46  declare_stat(STAT_OBJECTIVE_VALUE, {"objective", "loss"});
47  declare_stat(STAT_GRADIENT_NORM, {"grad_norm", "|g|"});
48  declare_stat(STAT_GRADIENT, {"gradient", "|g_i|"});
49  declare_stat(STAT_PRECONDITIONER, {"preconditioner", "|H_ii|"});
50  declare_stat(STAT_WEIGHT_VECTOR, {"weight_vector", "|w_i|"});
51  declare_stat(STAT_LINESEARCH_STEPSIZE, {"linesearch_step"});
52  declare_stat(STAT_CG_ITERS, {"cg_iters", "#iters"});
53  declare_stat(STAT_ITER_TIME, {"iter_time", "duration [µs]"});
54  declare_stat(STAT_LS_FAIL, {"linesearch_fail", "#instances"});
55  declare_stat(STAT_LS_STEPS, {"linesearch_iters", "#steps"});
56  declare_stat(STAT_PROGRESS, {"progress", "|g|/|eps g_0|"});
57  declare_stat(STAT_ABSOLUTE_STEP, {"newton_step", ""});
58 
59  declare_tag(TAG_ITERATION, "iteration");
60 }
61 
62 
64 {
65  // calculate gradient norm at w=0 for stopping condition.
66  // first, check if the objective supports fast grad
67  objective.gradient_at_zero(m_Gradient);
68  real_t gnorm0 = m_Gradient.norm();
70 
71 
72  m_Weights = init;
73  real_t f, gnorm;
74 
82  {
84  auto scope_timer = make_timer(STAT_ITER_TIME);
85 
86  f = objective.value(m_Weights);
87  objective.gradient_and_pre_conditioner(m_Weights, m_Gradient, m_PreConditioner);
88  gnorm = m_Gradient.norm();
89 
90  record_iteration(0, 0, gnorm, f, sLineSearchResult{0, 0, 0}, m_Epsilon * gnorm0);
91  }
92 
93  real_t f_start = f;
94  real_t gnorm_start = gnorm;
95 
96  // OK, there is something wrong already!
97  if(!std::isfinite(f) || !std::isfinite(gnorm) || !std::isfinite(gnorm0)) {
98  spdlog::error("Invalid newton optimization: initial value: {}, gradient norm: {}, gnorm_0: {}", f, gnorm, gnorm0);
99  return {MinimizerStatus::FAILED, 0, f, gnorm, f, gnorm};
100  }
101 
102  if(m_Logger) {
103  m_Logger->info("initial: f={:<5.3} |g|={:<5.3} |g_0|={:<5.3} eps={:<5.3}", f, gnorm, gnorm0, m_Epsilon);
104  }
105 
106  if (gnorm <= m_Epsilon * gnorm0)
107  return {MinimizerStatus::SUCCESS, 0, f, gnorm, f, gnorm};
108 
109  for(int iter = 1; iter <= m_MaxIter; ++iter) {
110  set_tag(TAG_ITERATION, iter);
111  auto scope_timer = make_timer(STAT_ITER_TIME);
112 
113  // regularize the preconditioner: M = (1-a)I + aM
115 
116  // Here, we solve min \| Hd + g \|
117  int cg_iter = m_CG_Solver.minimize([&](const DenseRealVector& d, Eigen::Ref<DenseRealVector> o) {
118  objective.hessian_times_direction(m_Weights, d, o);
120 
121  const auto& cg_solution = m_CG_Solver.get_solution();
122 
123  real_t fold = f;
124  objective.project_to_line(m_Weights, cg_solution);
125  auto ls_result = m_LineSearcher.search([&](real_t a){ return objective.lookup_on_line(a); },
126  m_Gradient.dot(cg_solution), f);
127 
128  if (ls_result.StepSize == 0)
129  {
130  spdlog::warn("line search failed in iteration {} of newton optimization. Current objective value: {:.3}, "
131  "gradient norm: {:.3} (target: {:.3}), squared search dir: {:.3}",
132  iter, f, gnorm, m_Epsilon * gnorm0, cg_solution.squaredNorm());
133  init = m_Weights.get();
134  record(STAT_LS_FAIL, 1);
135  return {MinimizerStatus::FAILED, iter, f, gnorm, f_start, gnorm_start};
136  }
137 
138  f = ls_result.Value;
139  real_t absolute_improvement = fold - f;
140  m_Weights = m_Weights + cg_solution * ls_result.StepSize;
141  objective.declare_vector_on_last_line(m_Weights, ls_result.StepSize);
142  objective.gradient_and_pre_conditioner(m_Weights, m_Gradient, m_PreConditioner);
143 
144  gnorm = m_Gradient.norm();
145 
146  record_iteration(iter, cg_iter, gnorm, f, ls_result, m_Epsilon * gnorm0);
147  record(STAT_ABSOLUTE_STEP, [&]() -> real_t { return cg_solution.norm(); });
148 
149  if (gnorm <= m_Epsilon * gnorm0) {
150  init = m_Weights.get();
151  return {MinimizerStatus::SUCCESS, iter, f, gnorm, f_start, gnorm_start};
152  }
153  if (f < -1.0e+32)
154  {
155  spdlog::warn("Objective appears to be unbounded (got value {:.2})", f);
156  return {MinimizerStatus::DIVERGED, iter, f, gnorm, f_start, gnorm_start};
157  }
158  if (abs(absolute_improvement) <= 1.0e-12 * abs(f))
159  {
160  spdlog::warn("relative improvement too low");
161  return {MinimizerStatus::FAILED, iter, f, gnorm, f_start, gnorm_start};
162  }
163  }
164 
165  init = m_Weights.get();
166  return {MinimizerStatus::TIMED_OUT, m_MaxIter, f, gnorm, f_start, gnorm_start};
167 }
168 
169 void NewtonWithLineSearch::record_iteration(int iter, int cg_iter, real_t gnorm, real_t objective, const sLineSearchResult& step, real_t gnorm0) {
170  record(STAT_GRADIENT_NORM, gnorm);
176  record(STAT_CG_ITERS, cg_iter);
178  record(STAT_PROGRESS, gnorm / gnorm0);
179 
180  if(m_Logger) {
181  m_Logger->info("iter {:3}: f={:<10.8} |g|={:<8.4} CG={:<3} line-search={:<4.2}",
182  iter, objective, gnorm, cg_iter, step.StepSize);
183  }
184 }
185 
187  if(eps <= 0) {
188  spdlog::error("Non-positive epsilon {} specified for newton minimization", eps);
189  throw std::invalid_argument("Epsilon must be larger than zero.");
190  }
191  m_Epsilon = eps;
192 }
193 
195  if(max_iter <= 0) {
196  spdlog::error("Non-positive iteration limit {} specified for newton minimization", max_iter);
197  throw std::invalid_argument("maximum iterations must be larger than zero.");
198  }
199  m_MaxIter = max_iter;
200 }
201 
203  if(alpha <= 0 || alpha >= 1) {
204  spdlog::error("The `alpha_pcg` parameter needs to be between 0 and 1, got {} ", alpha);
205  throw std::invalid_argument("alpha_pcg not in (0, 1)");
206  }
207  m_Alpha_PCG = alpha;
208 }
209 
210 #include "doctest.h"
211 #include <Eigen/Dense>
212 
213 using namespace dismec;
214 
215 TEST_CASE("newton with line search hyperparameters") {
216  NewtonWithLineSearch nwls{2};
217 
218  // direct interface
219  nwls.set_epsilon(0.1);
220  CHECK(nwls.get_epsilon() == 0.1);
221 
222  nwls.set_maximum_iterations(500);
223  CHECK(nwls.get_maximum_iterations() == 500);
224 
225  nwls.set_alpha_preconditioner(0.4);
226  CHECK(nwls.get_alpha_preconditioner() == 0.4);
227 
228  // error checking
229  CHECK_THROWS(nwls.set_epsilon(-0.4));
230  CHECK_THROWS(nwls.set_maximum_iterations(0));
231  CHECK_THROWS(nwls.set_alpha_preconditioner(-0.1));
232  CHECK_THROWS(nwls.set_alpha_preconditioner(1.1));
233 
234  // hp interface
235  nwls.set_hyper_parameter("epsilon", 0.25);
236  CHECK( std::get<double>(nwls.get_hyper_parameter("epsilon")) == 0.25);
237  nwls.set_hyper_parameter("max-steps", 50l);
238  CHECK( std::get<long>(nwls.get_hyper_parameter("max-steps")) == 50);
239  nwls.set_hyper_parameter("alpha-pcg", 0.3);
240  CHECK( std::get<double>(nwls.get_hyper_parameter("alpha-pcg")) == 0.3);
241 }
242 
243 TEST_CASE("solve square objective") {
244  struct QuadraticObjective : public dismec::objective::Objective {
245  QuadraticObjective(types::DenseColMajor<real_t> m, DenseRealVector s) : A(std::move(m)), b(std::move(s)),
246  m_LocCache(A.row(0)){}
247 
248  [[nodiscard]] long num_variables() const noexcept override {
249  return b.size();
250  }
251 
252  real_t value_unchecked(const HashVector& location) override {
253  return location->dot(A * location) + location->dot(b);
254  }
255  void gradient_unchecked(const HashVector& location, Eigen::Ref<DenseRealVector> target) override {
256  target= 2 * A * location + b;
257  }
258  void hessian_times_direction_unchecked(const HashVector& location, const DenseRealVector& direction,
259  Eigen::Ref<DenseRealVector> target) override
260  {
261  target = 2 * A * direction;
262  }
263 
264  void project_to_line_unchecked(const HashVector& location, const DenseRealVector& direction) override {
265  m_DirCache = direction;
266  m_LocCache = location;
267  };
268  real_t lookup_on_line(real_t position) override {
269  return value(HashVector{m_LocCache + m_DirCache * position});
270  };
271 
272  types::DenseColMajor<real_t> A;
273  DenseRealVector b;
274 
275  DenseRealVector m_DirCache;
276  HashVector m_LocCache;
277  };
278  types::DenseColMajor<real_t> mat(4, 4);
279  mat << 1.0, 1.0, 0.0, 0.0,
280  1.0, 1.0, -1.0, 0.0,
281  0.0, -1.0, 2.0, 0.0,
282  0.0, 0.0, 0.0, 1.0;
283  // ensure PSD symmetric matrix
284  mat = (mat.transpose() * mat).eval();
285 
286  DenseRealVector vec(4);
287  vec << 1.0, 2.0, 0.0, -2.0;
288  QuadraticObjective objective{mat, vec};
289 
290  DenseRealVector w = DenseRealVector::Random(4);
291 
292  NewtonWithLineSearch solver(w.size());
293  solver.minimize(objective, w);
294 
295  // solve quadratic minimum directly:
296  DenseRealVector direct = -mat.inverse() * vec / 2;
297  for(int i = 0; i < w.size(); ++i) {
298  CHECK(w.coeff(i) == doctest::Approx(direct.coeff(i)));
299  }
300 }
An Eigen vector with versioning information, to implement simple caching of results.
Definition: hash_vector.h:43
const DenseRealVector & get() const
Gets a constant reference to the data of this vector.
Definition: hash_vector.h:57
void declare_hyper_parameter(std::string name, U S::*pointer)
Definition: hyperparams.h:117
void declare_sub_object(const std::string &name, T S::*object)
Declares a sub-object that also contains hyper-parameters.
Definition: hyperparams.h:179
Class that models an optimization objective.
Definition: objective.h:41
An integer-like type that represents categorical values.
Definition: opaque_int.h:24
sLineSearchResult search(const std::function< double(double)> &projected_objective, double gTs, double f_init) const
Definition: line_search.cpp:20
const DenseRealVector & get_solution() const
returns the solution vector found by the last minimize call
Definition: cg.h:35
long minimize(const MatrixVectorProductFn &A, const DenseRealVector &b, const DenseRealVector &M)
Solves Ax+b=0. returns the number of iterations.
Definition: cg.cpp:21
MinimizationResult minimize(objective::Objective &objective, Eigen::Ref< DenseRealVector > init)
Definition: minimizer.cpp:24
std::shared_ptr< spdlog::logger > m_Logger
Definition: minimizer.h:45
DenseRealVector m_PreConditioner
Definition: newton.h:44
BacktrackingLineSearch m_LineSearcher
Definition: newton.h:40
void record_iteration(int iter, int cg_iter, real_t gnorm, real_t objective, const sLineSearchResult &step, real_t gnorm0)
Definition: newton.cpp:169
void set_maximum_iterations(long max_iter)
Definition: newton.cpp:194
double get_alpha_preconditioner() const
Definition: newton.h:28
MinimizationResult run(objective::Objective &objective, Eigen::Ref< DenseRealVector > init) override
Definition: newton.cpp:63
NewtonWithLineSearch(long num_variables)
Definition: newton.cpp:35
void set_alpha_preconditioner(double alpha)
Definition: newton.cpp:202
void declare_tag(tag_id_t index, std::string name)
Declares a new tag. This function just forwards all its arguments to the internal StatisticsCollectio...
Definition: tracked.cpp:24
void record(stat_id_t stat, T &&value)
Record statistics. This function just forwards all its arguments to the internal StatisticsCollection...
Definition: tracked.h:90
auto make_timer(stat_id_t id, Args... args)
Creates a new ScopeTimer using stats::record_scope_time.
Definition: tracked.h:130
void declare_stat(stat_id_t index, StatisticMetaData meta)
Declares a new statistics. This function just forwards all its arguments to the internal StatisticsCo...
Definition: tracked.cpp:16
void set_tag(tag_id_t tag, long value)
Set value of tag. This function just forwards all its arguments to the internal StatisticsCollection.
Definition: tracked.h:116
constexpr const stat_id_t STAT_PROGRESS
Definition: newton.cpp:29
constexpr const stat_id_t STAT_WEIGHT_VECTOR
Definition: newton.cpp:23
constexpr const stat_id_t STAT_ABSOLUTE_STEP
Definition: newton.cpp:30
constexpr const stat_id_t STAT_GRADIENT
Definition: newton.cpp:21
constexpr const stat_id_t STAT_PRECONDITIONER
Definition: newton.cpp:22
constexpr const stat_id_t STAT_ITER_TIME
Definition: newton.cpp:26
constexpr const stat_id_t STAT_OBJECTIVE_VALUE
Definition: newton.cpp:19
constexpr const stat_id_t STAT_GRADIENT_NORM
Definition: newton.cpp:20
constexpr const stat_id_t STAT_CG_ITERS
Definition: newton.cpp:25
constexpr const stat_id_t STAT_GRADIENT_NORM_0
Definition: newton.cpp:18
constexpr const dismec::stats::tag_id_t TAG_ITERATION
Definition: newton.cpp:32
constexpr const stat_id_t STAT_LINESEARCH_STEPSIZE
Definition: newton.cpp:24
constexpr const stat_id_t STAT_LS_STEPS
Definition: newton.cpp:28
constexpr const stat_id_t STAT_LS_FAIL
Definition: newton.cpp:27
@ FAILED
Some internal operation failed.
@ DIVERGED
The optimization objective appears to be unbounded.
@ SUCCESS
The returned result is a minimum according to the stopping criterion of the algorithm.
@ TIMED_OUT
The maximum number of iterations has been reached but no minimum has been found.
opaque_int_type< detail::stat_id_tag > stat_id_t
An opaque int-like type that is used to identify a statistic in a StatisticsCollection.
Definition: stat_id.h:24
Main namespace in which all types, classes, and functions are defined.
Definition: app.h:15
types::DenseVector< real_t > DenseRealVector
Any dense, real values vector.
Definition: matrix_types.h:40
float real_t
The default type for floating point values.
Definition: config.h:17
TEST_CASE("newton with line search hyperparameters")
Definition: newton.cpp:215
Result of a Line Search operation.
Definition: line_search.h:17
double StepSize
The step size used to reach that position.
Definition: line_search.h:19