DiSMEC++
cascade.cpp
Go to the documentation of this file.
1 // Copyright (c) 2021, Aalto University, developed by Erik Schultheis
2 // All rights reserved.
3 //
4 // SPDX-License-Identifier: MIT
5 
6 #include "cascade.h"
8 #include "solver/newton.h"
9 #include "data/data.h"
10 #include "data/transform.h"
11 #include "utils/conversion.h"
12 #include "postproc.h"
13 #include "initializer.h"
14 #include "model/sparse.h"
15 
16 using namespace dismec;
17 
18 namespace {
20  public:
21  CombinedWeightInitializer( std::unique_ptr<init::WeightsInitializer> di, std::unique_ptr<init::WeightsInitializer> si,
22  long num_dense_features) :
23  m_NumDenseFeatures(num_dense_features), m_DenseInit(std::move(di)), m_SparseInit(std::move(si)) {
24 
25  }
26  void get_initial_weight(label_id_t label_id, Eigen::Ref<DenseRealVector> target,
27  objective::Objective& objective) override {
28  m_DenseInit->get_initial_weight(label_id, target.head(m_NumDenseFeatures), objective);
29  m_SparseInit->get_initial_weight(label_id, target.tail(target.size() - m_NumDenseFeatures), objective);
30  }
31  private:
33  std::unique_ptr<init::WeightsInitializer> m_DenseInit;
34  std::unique_ptr<init::WeightsInitializer> m_SparseInit;
35  };
36 }
37 
38 std::shared_ptr<objective::Objective> CascadeTraining::make_objective() const {
39  // we make a copy of the features, so they are in the local numa memory
40  auto sp_ftr = m_SparseReplicator.get_local();
41  auto ds_ftr = m_DenseReplicator.get_local();
43  sp_ftr, m_SparseReg);
44 }
45 
46 std::unique_ptr<solvers::Minimizer> CascadeTraining::make_minimizer() const {
47  auto minimizer = std::make_unique<solvers::NewtonWithLineSearch>(m_NumFeatures);
48  m_NewtonSettings.apply(*minimizer);
49  //minimizer->set_logger(get_logger());
50  return minimizer;
51 }
52 
53 void CascadeTraining::update_minimizer(solvers::Minimizer& base_minimizer, label_id_t label_id) const {
54  auto* minimizer = dynamic_cast<solvers::NewtonWithLineSearch*>(&base_minimizer);
55  if(!minimizer)
56  throw std::logic_error("Could not cast minimizer to <NewtonWithLineSearch>");
57 
58  // adjust the epsilon parameter according to number of positives/number of negatives
59  std::size_t num_pos = get_data().num_positives(label_id);
60  double small_count = static_cast<double>(std::min(num_pos, get_data().num_examples() - num_pos));
61  double epsilon_scale = std::max(small_count, 1.0) / static_cast<double>(get_data().num_examples());
62  if(m_Shortlist) {
63  std::size_t actual_num_pos = 0;
64  std::size_t actual_num_neg = 0;
65  const auto& shortlist = m_Shortlist->at(label_id.to_index());
66  auto label_vec = get_data().get_labels(label_id);
67  for(const auto& row : shortlist) {
68  if(label_vec->coeff(row)) {
69  ++actual_num_pos;
70  } else {
71  ++actual_num_neg;
72  }
73  }
74  epsilon_scale = std::max( static_cast<double>(std::min(actual_num_neg, actual_num_pos)), 1.0 ) / static_cast<double>( actual_num_pos + actual_num_neg );
75  }
76 
77  minimizer->set_epsilon(m_BaseEpsilon * epsilon_scale);
78 }
79 
80 void CascadeTraining::update_objective(objective::Objective& base_objective, label_id_t label_id) const {
81  auto* objective = dynamic_cast<objective::DenseAndSparseLinearBase*>(&base_objective);
82  if(!objective)
83  throw std::logic_error("Could not cast objective to <DenseAndSparseLinearBase>");
84 
85  if(m_Shortlist) {
86  // TODO this causes several memory allocations
87  const auto& shortlist = m_Shortlist->at(label_id.to_index());
88  DenseFeatures shortlisted_dense = shortlist_features(m_DenseReplicator.get_local()->dense(),
89  shortlist);
90  SparseFeatures shortlisted_sparse = shortlist_features(m_SparseReplicator.get_local()->sparse(),
91  shortlist);
92  objective->update_features(shortlisted_dense, shortlisted_sparse);
93  BinaryLabelVector& target_labels = objective->get_label_ref();
94  target_labels.resize(ssize(shortlist));
95  auto label_vec = get_data().get_labels(label_id);
96  long target_id = 0;
97  for(const auto& row : shortlist) {
98  target_labels.coeffRef(target_id) = label_vec->coeff(row);
99  ++target_id;
100  }
101  objective->update_costs(1.0, 1.0);
102  } else {
103  // we need to set the labels before we update the costs, since the label information is needed
104  // to determine whether to apply the positive or the negative weighting
105  get_data().get_labels(label_id, objective->get_label_ref());
106  }
107 }
108 
109 std::unique_ptr<init::WeightsInitializer> CascadeTraining::make_initializer() const {
110  auto dense = m_DenseReplicator.get_local();
111  auto sparse = m_SparseReplicator.get_local();
112 
113  auto dense_init = m_DenseInitStrategy->make_initializer(dense);
114  auto sparse_init = m_SparseInitStrategy->make_initializer(sparse);
115  return std::make_unique<CombinedWeightInitializer>(std::move(dense_init), std::move(sparse_init), dense->cols());
116 
117 }
118 
119 std::shared_ptr<model::Model> CascadeTraining::make_model(long num_features, model::PartialModelSpec spec) const {
120  return std::make_shared<model::SparseModel>(num_features, spec);
121 }
122 
123 std::unique_ptr<postproc::PostProcessor>
124 CascadeTraining::make_post_processor(const std::shared_ptr<objective::Objective>& objective) const {
125  return m_PostProcessor->make_processor(objective);
126 }
127 
129  return *m_StatsGather;
130 }
131 
132 CascadeTraining::CascadeTraining(std::shared_ptr<const DatasetBase> tfidf_data,
133  std::shared_ptr<const GenericFeatureMatrix> dense_data,
134  HyperParameters hyper_params,
135  std::shared_ptr<init::WeightInitializationStrategy> dense_init,
136  real_t dense_reg,
137  std::shared_ptr<init::WeightInitializationStrategy> sparse_init,
138  real_t sparse_reg,
139  std::shared_ptr<postproc::PostProcessFactory> post_proc,
140  std::shared_ptr<TrainingStatsGatherer> gatherer,
141  std::shared_ptr<const std::vector<std::vector<long>>> shortlist) :
142  TrainingSpec(std::move(tfidf_data)),
143  m_NewtonSettings( std::move(hyper_params) ),
144  m_SparseReplicator(get_data().get_features() ),
145  m_DenseReplicator(std::move(dense_data) ),
146  m_Shortlist( std::move(shortlist) ),
147  m_PostProcessor( std::move(post_proc) ),
148  m_DenseInitStrategy( std::move(dense_init) ),
149  m_SparseInitStrategy( std::move(sparse_init) ),
150  m_StatsGather( std::move(gatherer) ),
151  m_NumFeatures(m_SparseReplicator.get_local()->cols() + m_DenseReplicator.get_local()->cols()),
152  m_DenseReg(dense_reg),
153  m_SparseReg(sparse_reg)
154  {
155 
156  // extract the base value of `epsilon` from the `hyper_params` object.
157  m_BaseEpsilon = std::get<double>(m_NewtonSettings.get("epsilon"));
158 }
159 
160 
161 std::shared_ptr<TrainingSpec> dismec::create_cascade_training(
162  std::shared_ptr<const DatasetBase> data,
163  std::shared_ptr<const GenericFeatureMatrix> dense,
164  std::shared_ptr<const std::vector<std::vector<long>>> shortlist,
165  HyperParameters params,
166  CascadeTrainingConfig config)
167 {
168  if(!config.SparseInit)
170  if(!config.DenseInit)
172  if(!config.PostProcessing)
174  return std::make_shared<CascadeTraining>(std::move(data),
175  std::move(dense),
176  std::move(params),
177  std::move(config.DenseInit),
178  config.DenseReg,
179  std::move(config.SparseInit),
180  config.SparseReg,
181  std::move(config.PostProcessing),
182  std::move(config.StatsGatherer),
183  std::move(shortlist)
184  );
185 }
std::unique_ptr< init::WeightsInitializer > m_SparseInit
Definition: cascade.cpp:34
std::unique_ptr< init::WeightsInitializer > m_DenseInit
Definition: cascade.cpp:33
CombinedWeightInitializer(std::unique_ptr< init::WeightsInitializer > di, std::unique_ptr< init::WeightsInitializer > si, long num_dense_features)
Definition: cascade.cpp:21
void get_initial_weight(label_id_t label_id, Eigen::Ref< DenseRealVector > target, objective::Objective &objective) override
Generate an initial vector for the given label. The result should be placed in target.
Definition: cascade.cpp:26
long num_features() const override
Definition: cascade.h:26
std::shared_ptr< objective::Objective > make_objective() const override
Makes an Objective object suitable for the dataset.
Definition: cascade.cpp:38
HyperParameters m_NewtonSettings
Definition: cascade.h:47
std::unique_ptr< solvers::Minimizer > make_minimizer() const override
Makes a Minimizer object suitable for the dataset.
Definition: cascade.cpp:46
std::shared_ptr< init::WeightInitializationStrategy > m_DenseInitStrategy
Definition: cascade.h:58
std::shared_ptr< model::Model > make_model(long num_features, model::PartialModelSpec spec) const override
Creates the model that will be used to store the results.
Definition: cascade.cpp:119
std::unique_ptr< init::WeightsInitializer > make_initializer() const override
Makes a WeightsInitializer object.
Definition: cascade.cpp:109
std::shared_ptr< const std::vector< std::vector< long > > > m_Shortlist
Definition: cascade.h:52
CascadeTraining(std::shared_ptr< const DatasetBase > tfidf_data, std::shared_ptr< const GenericFeatureMatrix > dense_data, HyperParameters hyper_params, std::shared_ptr< init::WeightInitializationStrategy > dense_init, real_t dense_reg, std::shared_ptr< init::WeightInitializationStrategy > sparse_init, real_t sparse_reg, std::shared_ptr< postproc::PostProcessFactory > post_proc, std::shared_ptr< TrainingStatsGatherer > gatherer, std::shared_ptr< const std::vector< std::vector< long >>> shortlist=nullptr)
Definition: cascade.cpp:132
TrainingStatsGatherer & get_statistics_gatherer() override
Definition: cascade.cpp:128
parallel::NUMAReplicator< const GenericFeatureMatrix > m_DenseReplicator
Definition: cascade.h:50
std::unique_ptr< postproc::PostProcessor > make_post_processor(const std::shared_ptr< objective::Objective > &objective) const override
Makes a PostProcessor object.
Definition: cascade.cpp:124
std::shared_ptr< postproc::PostProcessFactory > m_PostProcessor
Definition: cascade.h:55
std::shared_ptr< TrainingStatsGatherer > m_StatsGather
Definition: cascade.h:61
void update_minimizer(solvers::Minimizer &base_minimizer, label_id_t label_id) const override
Updates the setting of the Minimizer for handling label label_id.
Definition: cascade.cpp:53
void update_objective(objective::Objective &base_objective, label_id_t label_id) const override
Updates the setting of the Objective for handling label label_id.
Definition: cascade.cpp:80
parallel::NUMAReplicator< const GenericFeatureMatrix > m_SparseReplicator
Definition: cascade.h:49
std::shared_ptr< init::WeightInitializationStrategy > m_SparseInitStrategy
Definition: cascade.h:59
long num_examples() const noexcept
Get the total number of instances, i.e. the number of rows in the feature matrix.
Definition: data.cpp:52
std::shared_ptr< const BinaryLabelVector > get_labels(label_id_t id) const
Definition: data.cpp:21
virtual long num_positives(label_id_t id) const
Definition: data.cpp:13
This class represents a set of hyper-parameters.
Definition: hyperparams.h:241
hyper_param_t get(const std::string &name) const
Gets the hyper-parameter with the given name, or throws if it does not exist.
Definition: hyperparams.cpp:46
void apply(HyperParameterBase &target) const
Definition: hyperparams.cpp:50
This class gathers the setting-specific parts of the training process.
Definition: spec.h:24
const DatasetBase & get_data() const
Definition: spec.h:31
Base class for all weight initializers.
Definition: initializer.h:30
Strong typedef for an int to signify a label id.
Definition: types.h:20
Base class for implementationa of an objective that combines dense features and sparse features.
Class that models an optimization objective.
Definition: objective.h:41
constexpr T to_index() const
! Explicitly convert to an integer.
Definition: opaque_int.h:32
auto get_features(const DatasetBase &ds)
Definition: py_data.cpp:28
std::shared_ptr< WeightInitializationStrategy > create_zero_initializer()
Creates an initialization strategy that initializes all weight vectors to zero.
Definition: zero.cpp:33
std::unique_ptr< DenseAndSparseLinearBase > make_sp_dense_squared_hinge(std::shared_ptr< const GenericFeatureMatrix > dense_features, real_t dense_reg_strength, std::shared_ptr< const GenericFeatureMatrix > sparse_features, real_t sparse_reg_strength)
FactoryPtr create_identity()
Definition: postproc.cpp:50
Main namespace in which all types, classes, and functions are defined.
Definition: app.h:15
types::DenseRowMajor< real_t > DenseFeatures
Dense Feature Matrix in Row Major format.
Definition: matrix_types.h:58
constexpr auto ssize(const C &c) -> std::common_type_t< std::ptrdiff_t, std::make_signed_t< decltype(c.size())>>
signed size free function. Taken from https://en.cppreference.com/w/cpp/iterator/size
Definition: conversion.h:42
types::DenseVector< std::int8_t > BinaryLabelVector
Dense vector for storing binary labels.
Definition: matrix_types.h:68
types::SparseRowMajor< real_t > SparseFeatures
Sparse Feature Matrix in Row Major format.
Definition: matrix_types.h:50
SparseFeatures shortlist_features(const SparseFeatures &source, const std::vector< long > &shortlist)
Definition: transform.cpp:219
std::shared_ptr< TrainingSpec > create_cascade_training(std::shared_ptr< const DatasetBase > data, std::shared_ptr< const GenericFeatureMatrix > dense, std::shared_ptr< const std::vector< std::vector< long >>> shortlist, HyperParameters params, CascadeTrainingConfig config)
Definition: cascade.cpp:161
float real_t
The default type for floating point values.
Definition: config.h:17
std::shared_ptr< TrainingStatsGatherer > StatsGatherer
Definition: spec.h:159
std::shared_ptr< init::WeightInitializationStrategy > DenseInit
Definition: spec.h:156
std::shared_ptr< init::WeightInitializationStrategy > SparseInit
Definition: spec.h:157
std::shared_ptr< postproc::PostProcessFactory > PostProcessing
Definition: spec.h:158
Specifies how to interpret a weight matrix for a partial model.
Definition: model.h:22