DiSMEC++
dismec.cpp
Go to the documentation of this file.
1 // Copyright (c) 2021, Aalto University, developed by Erik Schultheis
2 // All rights reserved.
3 //
4 // SPDX-License-Identifier: MIT
5 
6 #include "dismec.h"
10 #include "solver/newton.h"
11 #include "model/model.h"
12 #include "model/dense.h"
13 #include "model/sparse.h"
14 #include "data/data.h"
15 #include "initializer.h"
16 #include "weighting.h"
17 #include "postproc.h"
18 
19 using namespace dismec;
20 
21 /*
22 std::unique_ptr<objective::Objective> make_regularizer(RegularizerSpec spec) {
23  switch(reg) {
24  case RegularizerType::REG_L2:
25  return std::make_unique<objective::SquaredNormRegularizer>(scale, ignore_bias);
26  case RegularizerType::REG_L1:
27  return std::make_unique<objective::HuberRegularizer>(1e-2, scale, ignore_bias);
28  case RegularizerType::REG_L1_RELAXED:
29  return std::make_unique<objective::HuberRegularizer>(1e-1, scale, ignore_bias);
30  case RegularizerType::REG_HUBER:
31  return std::make_unique<objective::HuberRegularizer>(1, scale, ignore_bias);
32  case RegularizerType::REG_ELASTIC_50_50:
33  return std::make_unique<objective::ElasticNetRegularizer>(1e-1, scale, 0.5, ignore_bias);
34  case RegularizerType::REG_ELASTIC_90_10:
35  return std::make_unique<objective::ElasticNetRegularizer>(1e-1, scale, 0.9, ignore_bias);
36  default:
37  throw std::invalid_argument("Unknown regularizer");
38  }
39 }*/
40 
41 std::shared_ptr<objective::Objective> dismec::make_loss(
42  LossType type,
43  std::shared_ptr<const GenericFeatureMatrix> X,
44  std::unique_ptr<objective::Objective> reg) {
45  switch (type) {
47  if(X->is_sparse()) {
48  return std::make_shared<objective::Regularized_SquaredHingeSVC>(X, std::move(reg));
49  } else {
50  return make_squared_hinge(X, std::move(reg));
51  }
52  case LossType::LOGISTIC:
53  return make_logistic_loss(X, std::move(reg));
55  return make_huber_hinge(X, std::move(reg), 1.0);
56  case LossType::HINGE:
57  return make_huber_hinge(X, std::move(reg), 0.1);
58  default:
59  THROW_EXCEPTION(std::runtime_error, "Unexpected loss type");
60  }
61 }
62 
63 
64 std::shared_ptr<objective::Objective> DiSMECTraining::make_objective() const {
65  // we make a copy of the features, so they are in the local numa memory
66  auto copy = m_FeatureReplicator.get_local();
67  auto reg = std::visit([](auto&& config){ return make_regularizer(config); }, m_Regularizer);
68  return make_loss(m_Loss, std::move(copy), std::move(reg));
69 }
70 
71 std::unique_ptr<solvers::Minimizer> DiSMECTraining::make_minimizer() const {
72  auto minimizer = std::make_unique<solvers::NewtonWithLineSearch>(num_features());
73  m_NewtonSettings.apply(*minimizer);
74  return minimizer;
75 }
76 
77 void DiSMECTraining::update_minimizer(solvers::Minimizer& base_minimizer, label_id_t label_id) const
78 {
79  auto* minimizer = dynamic_cast<solvers::NewtonWithLineSearch*>(&base_minimizer);
80  if(!minimizer)
81  throw std::logic_error("Could not cast minimizer to <NewtonWithLineSearch>");
82 
83  // adjust the epsilon parameter according to number of positives/number of negatives
84  std::size_t num_pos = get_data().num_positives(label_id);
85  double small_count = static_cast<double>(std::min(num_pos, get_data().num_examples() - num_pos));
86  double epsilon_scale = std::max(small_count, 1.0) / get_data().num_examples();
87  minimizer->set_epsilon(m_BaseEpsilon * epsilon_scale);
88 }
89 
90 DiSMECTraining::DiSMECTraining(std::shared_ptr<const DatasetBase> data,
91  HyperParameters hyper_params,
92  std::shared_ptr<WeightingScheme> weighting,
93  std::shared_ptr<init::WeightInitializationStrategy> init,
94  std::shared_ptr<postproc::PostProcessFactory> post_proc,
95  std::shared_ptr<TrainingStatsGatherer> gatherer,
96  bool use_sparse,
97  RegularizerSpec regularizer,
98  LossType loss) :
99  TrainingSpec(std::move(data)),
100  m_NewtonSettings( std::move(hyper_params) ),
101  m_Weighting( std::move(weighting) ),
102  m_UseSparseModel( use_sparse ),
103  m_InitStrategy( std::move(init) ),
104  m_PostProcessor( std::move(post_proc) ),
105  m_FeatureReplicator(get_data().get_features() ),
106  m_StatsGather( std::move(gatherer) ),
107  m_Regularizer( regularizer ),
108  m_Loss( loss )
109 {
110  if(!m_InitStrategy) {
111  throw std::invalid_argument("Missing weight initialization strategy");
112  }
113 
114  if(!m_PostProcessor) {
115  throw std::invalid_argument("Missing weight post processor");
116  }
117 
118  // extract the base value of `epsilon` from the `hyper_params` object.
119  m_BaseEpsilon = std::get<double>(m_NewtonSettings.get("epsilon"));
120 }
121 
122 void DiSMECTraining::update_objective(objective::Objective& base_objective, label_id_t label_id) const {
123  auto* objective = dynamic_cast<objective::LinearClassifierBase*>(&base_objective);
124  if(!objective)
125  throw std::logic_error("Could not cast objective to <LinearClassifierBase>");
126 
127  // we need to set the labels before we update the costs, since the label information is needed
128  // to determine whether to apply the positive or the negative weighting
129  get_data().get_labels(label_id, objective->get_label_ref());
130  if(m_Weighting) {
131  objective->update_costs(m_Weighting->get_positive_weight(label_id),
132  m_Weighting->get_negative_weight(label_id));
133  }
134 }
135 
136 std::unique_ptr<init::WeightsInitializer> DiSMECTraining::make_initializer() const {
137  return m_InitStrategy->make_initializer(m_FeatureReplicator.get_local());
138 }
139 
140 std::shared_ptr<model::Model> DiSMECTraining::make_model(long num_features, model::PartialModelSpec spec) const {
141  if(m_UseSparseModel) {
142  return std::make_shared<model::SparseModel>(num_features, spec);
143  } else {
144  return std::make_shared<model::DenseModel>(num_features, spec);
145  }
146 }
147 
148 std::unique_ptr<postproc::PostProcessor> DiSMECTraining::make_post_processor(const std::shared_ptr<objective::Objective>& objective) const {
149  return m_PostProcessor->make_processor(objective);
150 }
151 
153  return *m_StatsGather;
154 }
155 
156 
157 std::shared_ptr<TrainingSpec> dismec::create_dismec_training(std::shared_ptr<const DatasetBase> data,
158  HyperParameters params,
159  DismecTrainingConfig config) {
160  if(!config.Init)
162  if(!config.PostProcessing)
164  return std::make_shared<DiSMECTraining>(std::move(data), std::move(params), std::move(config.Weighting),
165  std::move(config.Init),
166  std::move(config.PostProcessing),
167  std::move(config.StatsGatherer),
168  config.Sparse,
169  config.Regularizer,
170  config.Loss);
171 }
172 
long num_examples() const noexcept
Get the total number of instances, i.e. the number of rows in the feature matrix.
Definition: data.cpp:52
std::shared_ptr< const BinaryLabelVector > get_labels(label_id_t id) const
Definition: data.cpp:21
virtual long num_positives(label_id_t id) const
Definition: data.cpp:13
long num_features() const noexcept
Get the total number of features, i.e. the number of columns in the feature matrix.
Definition: data.cpp:48
parallel::NUMAReplicator< const GenericFeatureMatrix > m_FeatureReplicator
Definition: dismec.h:70
std::shared_ptr< objective::Objective > make_objective() const override
Makes an Objective object suitable for the dataset.
Definition: dismec.cpp:64
std::shared_ptr< WeightingScheme > m_Weighting
Definition: dismec.h:61
HyperParameters m_NewtonSettings
Definition: dismec.h:60
void update_objective(objective::Objective &base_objective, label_id_t label_id) const override
Updates the setting of the Objective for handling label label_id.
Definition: dismec.cpp:122
double m_BaseEpsilon
Definition: dismec.h:74
TrainingStatsGatherer & get_statistics_gatherer() override
Definition: dismec.cpp:152
std::shared_ptr< postproc::PostProcessFactory > m_PostProcessor
Definition: dismec.h:68
std::shared_ptr< init::WeightInitializationStrategy > m_InitStrategy
Definition: dismec.h:65
DiSMECTraining(std::shared_ptr< const DatasetBase > data, HyperParameters hyper_params, std::shared_ptr< WeightingScheme > weighting, std::shared_ptr< init::WeightInitializationStrategy > init, std::shared_ptr< postproc::PostProcessFactory > post_proc, std::shared_ptr< TrainingStatsGatherer > gatherer, bool use_sparse, RegularizerSpec regularizer, LossType loss)
Creates a DiSMECTraining instance.
Definition: dismec.cpp:90
void update_minimizer(solvers::Minimizer &base_minimizer, label_id_t label_id) const override
Updates the setting of the Minimizer for handling label label_id.
Definition: dismec.cpp:77
std::shared_ptr< model::Model > make_model(long num_features, model::PartialModelSpec spec) const override
Creates the model that will be used to store the results.
Definition: dismec.cpp:140
std::unique_ptr< solvers::Minimizer > make_minimizer() const override
Makes a Minimizer object suitable for the dataset.
Definition: dismec.cpp:71
std::shared_ptr< TrainingStatsGatherer > m_StatsGather
Definition: dismec.h:72
RegularizerSpec m_Regularizer
Definition: dismec.h:75
std::unique_ptr< init::WeightsInitializer > make_initializer() const override
Makes a WeightsInitializer object.
Definition: dismec.cpp:136
std::unique_ptr< postproc::PostProcessor > make_post_processor(const std::shared_ptr< objective::Objective > &objective) const override
Makes a PostProcessor object.
Definition: dismec.cpp:148
This class represents a set of hyper-parameters.
Definition: hyperparams.h:241
hyper_param_t get(const std::string &name) const
Gets the hyper-parameter with the given name, or throws if it does not exist.
Definition: hyperparams.cpp:46
void apply(HyperParameterBase &target) const
Definition: hyperparams.cpp:50
This class gathers the setting-specific parts of the training process.
Definition: spec.h:24
const DatasetBase & get_data() const
Definition: spec.h:31
virtual long num_features() const
Definition: dismec.cpp:173
Strong typedef for an int to signify a label id.
Definition: types.h:20
Base class for objectives that use a linear classifier.
Definition: linear.h:27
Class that models an optimization objective.
Definition: objective.h:41
auto get_features(const DatasetBase &ds)
Definition: py_data.cpp:28
std::shared_ptr< WeightInitializationStrategy > create_zero_initializer()
Creates an initialization strategy that initializes all weight vectors to zero.
Definition: zero.cpp:33
std::unique_ptr< Objective > make_regularizer(const SquaredNormConfig &config)
std::unique_ptr< GenericLinearClassifier > make_huber_hinge(std::shared_ptr< const GenericFeatureMatrix > X, std::unique_ptr< Objective > regularizer, real_t epsilon)
std::unique_ptr< GenericLinearClassifier > make_logistic_loss(std::shared_ptr< const GenericFeatureMatrix > X, std::unique_ptr< Objective > regularizer)
std::unique_ptr< GenericLinearClassifier > make_squared_hinge(std::shared_ptr< const GenericFeatureMatrix > X, std::unique_ptr< Objective > regularizer)
FactoryPtr create_identity()
Definition: postproc.cpp:50
auto visit(F &&f, Variants &&... variants)
Definition: eigen_generic.h:95
Main namespace in which all types, classes, and functions are defined.
Definition: app.h:15
std::shared_ptr< objective::Objective > make_loss(LossType type, std::shared_ptr< const GenericFeatureMatrix > X, std::unique_ptr< objective::Objective > regularizer)
Definition: dismec.cpp:41
std::variant< objective::SquaredNormConfig, objective::HuberConfig, objective::ElasticConfig > RegularizerSpec
Definition: spec.h:143
LossType
Definition: spec.h:129
std::shared_ptr< TrainingSpec > create_dismec_training(std::shared_ptr< const DatasetBase > data, HyperParameters params, DismecTrainingConfig config)
Definition: dismec.cpp:157
std::shared_ptr< postproc::PostProcessFactory > PostProcessing
Definition: spec.h:148
RegularizerSpec Regularizer
Definition: spec.h:151
std::shared_ptr< init::WeightInitializationStrategy > Init
Definition: spec.h:147
std::shared_ptr< WeightingScheme > Weighting
Definition: spec.h:146
std::shared_ptr< TrainingStatsGatherer > StatsGatherer
Definition: spec.h:149
Specifies how to interpret a weight matrix for a partial model.
Definition: model.h:22
#define THROW_EXCEPTION(exception_type,...)
Definition: throw_error.h:16