26 m_GenericInBuffer = line_interpolation(position);
28 return f + m_Regularizer->lookup_on_line(position);
33 calculate_loss(xTw, labels(), m_GenericOutBuffer);
34 return m_GenericOutBuffer.dot(costs());
39 Eigen::Ref<DenseRealVector> target) {
40 m_Regularizer->hessian_times_direction(location, direction, target);
42 const auto& hessian = cached_2nd_derivative(location);
43 visit([&](
const auto& features) {
44 for (
int pos = 0; pos < hessian.size(); ++pos) {
45 if(
real_t h = hessian.coeff(pos); h != 0) {
46 real_t factor = features.row(pos).dot(direction);
47 target += features.row(pos) * factor * h;
50 }, generic_features());
53 void GenericLinearClassifier::gradient_and_pre_conditioner_unchecked(
const HashVector& location,
54 Eigen::Ref<DenseRealVector> gradient,
55 Eigen::Ref<DenseRealVector> pre) {
56 m_Regularizer->gradient(location, gradient);
57 m_Regularizer->diag_preconditioner(location, pre);
59 const auto& derivative = cached_derivative(location);
60 const auto& hessian = cached_2nd_derivative(location);
61 visit([&](
const auto& features) {
62 for (
int pos = 0; pos < derivative.size(); ++pos) {
63 if(
real_t d = derivative.coeff(pos); d != 0) {
64 gradient += features.row(pos) * d;
66 if(
real_t h = hessian.coeff(pos); h != 0) {
67 pre += features.row(pos).cwiseAbs2() * h;
70 }, generic_features());
74 void GenericLinearClassifier::gradient_unchecked(
const HashVector& location, Eigen::Ref<DenseRealVector> target) {
75 m_Regularizer->gradient(location, target);
77 const auto& derivative = cached_derivative(location);
78 visit([&](
const auto& features) {
79 for (
int pos = 0; pos < derivative.size(); ++pos) {
80 if(
real_t d = derivative.coeff(pos); d != 0) {
81 target += features.row(pos) * d;
84 }, generic_features());
87 void GenericLinearClassifier::gradient_at_zero_unchecked(Eigen::Ref<DenseRealVector> target) {
88 m_Regularizer->gradient_at_zero(target);
90 m_GenericInBuffer = DenseRealVector::Zero(labels().size());
91 calculate_derivative(m_GenericInBuffer, labels(), m_GenericOutBuffer);
92 const auto& cost_vector = costs();
93 visit([&](
const auto& features) {
94 for (
int pos = 0; pos < m_GenericOutBuffer.size(); ++pos) {
95 if(
real_t d = m_GenericOutBuffer.coeff(pos); d != 0) {
96 target += features.row(pos) * (cost_vector.coeff(pos) * d);
99 }, generic_features());
102 void GenericLinearClassifier::diag_preconditioner_unchecked(
const HashVector& location, Eigen::Ref<DenseRealVector> target) {
103 m_Regularizer->diag_preconditioner(location, target);
105 const auto& hessian = cached_2nd_derivative(location);
106 visit([&](
const auto& features) {
107 for (
int pos = 0; pos < hessian.size(); ++pos) {
108 if(
real_t h = hessian.coeff(pos); h != 0) {
109 target += features.row(pos).cwiseAbs2() * h;
112 }, generic_features());
117 calculate_derivative(x_times_w(location), labels(), out);
120 for(
int i = 0; i < out.size(); ++i) {
121 if(out.coeff(i) != 0) ++nnz;
123 return static_cast<real_t>(
static_cast<double>(100*nnz) / out.size()); });
124 out.array() *= costs().array();
130 calculate_2nd_derivative(x_times_w(location), labels(), out);
131 out.array() *= costs().array();
135 void GenericLinearClassifier::invalidate_labels() {
136 m_DerivativeBuffer.invalidate();
137 m_SecondDerivativeBuffer.invalidate();
140 GenericLinearClassifier::GenericLinearClassifier(std::shared_ptr<const GenericFeatureMatrix> X,
141 std::unique_ptr<Objective> regularizer)
143 m_SecondDerivativeBuffer(num_instances()),
144 m_DerivativeBuffer(num_instances()), m_GenericInBuffer(num_instances()),
145 m_GenericOutBuffer(num_instances()), m_Regularizer(std::move(regularizer))
149 THROW_EXCEPTION(std::invalid_argument,
"Regularizer cannot be nullptr");
166 template<
class Phi,
class... Args>
168 std::unique_ptr<objective::Objective> regularizer,
170 return std::make_unique<objective::GenericMarginClassifier<Phi>>(std::move(X), std::move(regularizer),
171 Phi{std::forward<Args>(args)...});
176 std::unique_ptr<Objective> regularizer) {
177 return make_gen_lin_classifier<SquaredHingePhi>(std::move(X), std::move(regularizer));
181 std::unique_ptr<Objective> regularizer) {
182 return make_gen_lin_classifier<LogisticPhi>(std::move(X), std::move(regularizer));
186 std::unique_ptr<Objective> regularizer,
188 return make_gen_lin_classifier<HuberPhi>(std::move(X), std::move(regularizer), epsilon);
191 #ifndef DOCTEST_CONFIG_DISABLE
200 auto test_vector_equal = [](
auto&& u,
auto&& v,
const char* message){
201 REQUIRE(u.size() == v.size());
202 for(
int i = 0; i < u.size(); ++i) {
203 REQUIRE_MESSAGE(u.coeff(i) == doctest::Approx(v.coeff(i)), message);
208 CHECK_MESSAGE(a.
value(input) == doctest::Approx(b.
value(input)),
"values differ");
212 test_vector_equal(buffer_a, buffer_b,
"gradient@0 mismatch");
216 test_vector_equal(buffer_a, buffer_b,
"gradient mismatch");
220 test_vector_equal(buffer_a, buffer_b,
"pre-conditioner mismatch");
225 test_vector_equal(buffer_a, buffer_b,
"hessian mismatch");
231 test_vector_equal(buffer_a, buffer_b,
"gradient mismatch");
232 test_vector_equal(buffer_a2, buffer_b2,
"pre-conditioner mismatch");
238 real_t pos_cost = 1, neg_cost = 1;
240 auto run_test = [&](){
241 DenseFeatures features_dense = DenseFeatures::Random(rows, cols);
244 Eigen::Matrix<std::int8_t, Eigen::Dynamic, 1> labels = Eigen::Matrix<std::int8_t, Eigen::Dynamic, 1>::Random(rows);
245 for(
int i = 0; i < labels.size(); ++i) {
246 if(labels.coeff(i) > 0) {
247 labels.coeffRef(i) = 1;
249 labels.coeffRef(i) = -1;
254 auto reg_dense =
make_squared_hinge(std::make_shared<GenericFeatureMatrix>(features_dense),
255 std::make_unique<objective::SquaredNormRegularizer>());
256 auto reg_sparse =
make_squared_hinge(std::make_shared<GenericFeatureMatrix>(features_sparse),
257 std::make_unique<objective::SquaredNormRegularizer>());
260 std::make_unique<objective::SquaredNormRegularizer>());
262 auto do_test = [&](
auto& first,
auto& second) {
263 first.get_label_ref() = labels;
264 second.get_label_ref() = labels;
266 first.update_costs(pos_cost, neg_cost);
267 second.update_costs(pos_cost, neg_cost);
273 do_test(*reg_dense, *reg_sparse);
274 do_test(reference, *reg_sparse);
277 SUBCASE(
"rows > cols") {
282 SUBCASE(
"cols > rows") {
288 SUBCASE(
"pos weighted") {
295 SUBCASE(
"neg weighted") {
314 x.insert(0, 3) = 1.0;
315 x.insert(1, 0) = 2.0;
316 x.insert(2, 1) = 1.0;
317 x.insert(2, 2) = 1.0;
319 Eigen::Matrix<std::int8_t, Eigen::Dynamic, 1> y(3);
324 std::make_unique<objective::SquaredNormRegularizer>());
325 loss->get_label_ref() = y;
328 std::make_unique<objective::SquaredNormRegularizer>());
329 reference.get_label_ref() = y;
332 weights << 1.0, 2.0, 0.0, -1.0, 2.0;
334 auto do_check = [&](
real_t factor){
337 CHECK_MESSAGE(loss->value(
HashVector{weights}) == doctest::Approx(factor * 9.0 + 5),
"wrong value");
345 i << 0.0, 1.0, 1.0, 0.0, 0.0;
347 CHECK_MESSAGE(grad == r,
"wrong gradient");
353 CHECK (new_val - old_val == doctest::Approx(grad.squaredNorm() * 1e-4).epsilon(1e-4));
358 loss->diag_preconditioner(
HashVector{weights}, prec_new);
359 reference.diag_preconditioner(
HashVector{weights}, prec_old);
360 CHECK_MESSAGE(prec_new == prec_old,
"wrong preconditioner");
362 loss->hessian_times_direction(
HashVector{weights}, i, prec_new);
363 reference.hessian_times_direction(
HashVector{weights}, i, prec_old);
364 CHECK_MESSAGE(prec_new == prec_old,
"wrong hessian");
367 loss->gradient_at_zero(prec_new);
368 reference.gradient_at_zero(prec_old);
369 CHECK_MESSAGE(prec_new == prec_old,
"g@0 wrong");
376 SUBCASE(
"unweighted") {
379 SUBCASE(
"positive-reweighted") {
380 loss->update_costs(2.0, 1.0);
381 reference.update_costs(2.0, 1.0);
384 SUBCASE(
"negative-reweighted") {
385 loss->update_costs(1.0, 2.0);
386 reference.update_costs(1.0, 2.0);
An Eigen vector with versioning information, to implement simple caching of results.
This is a non-templated, runtime-polymorphic generic implementation of the linear classifier objectiv...
std::unique_ptr< Objective > m_Regularizer
Pointer to the regularizer.
void project_to_line_unchecked(const HashVector &location, const DenseRealVector &direction) override
Base class for objectives that use a linear classifier.
void project_linear_to_line(const HashVector &location, const DenseRealVector &direction)
Prepares the cache variables for line projection.
Class that models an optimization objective.
void hessian_times_direction(const HashVector &location, const DenseRealVector &direction, Eigen::Ref< DenseRealVector > target)
Calculates the product of the Hessian matrix at location with direction.
void gradient_at_zero(Eigen::Ref< DenseRealVector > target)
Gets the gradient for location zero.
void gradient(const HashVector &location, Eigen::Ref< DenseRealVector > target)
Evaluate the gradient at location.
void gradient_and_pre_conditioner(const HashVector &location, Eigen::Ref< DenseRealVector > gradient, Eigen::Ref< DenseRealVector > pre)
Combines the calculation of gradient and pre-conditioner, which may be more efficient in some cases.
void diag_preconditioner(const HashVector &location, Eigen::Ref< DenseRealVector > target)
Get precondition to be used in CG optimization.
real_t value(const HashVector &location)
Evaluate the objective at the given location.
void declare_stat(stat_id_t index, StatisticMetaData meta)
Declares a new statistics. This function just forwards all its arguments to the internal StatisticsCo...
TEST_CASE("sparse/dense equivalence")
std::unique_ptr< GenericLinearClassifier > make_gen_lin_classifier(std::shared_ptr< const GenericFeatureMatrix > X, std::unique_ptr< objective::Objective > regularizer, Args... args)
void test_equivalence(objective::Objective &a, objective::Objective &b, const HashVector &input)
constexpr const stat_id_t STAT_GRAD_SPARSITY
real_t value_from_xTw(const DenseRealVector &cost, const BinaryLabelVector &labels, const Eigen::DenseBase< Derived > &xTw)
std::unique_ptr< GenericLinearClassifier > make_huber_hinge(std::shared_ptr< const GenericFeatureMatrix > X, std::unique_ptr< Objective > regularizer, real_t epsilon)
std::unique_ptr< GenericLinearClassifier > make_logistic_loss(std::shared_ptr< const GenericFeatureMatrix > X, std::unique_ptr< Objective > regularizer)
std::unique_ptr< GenericLinearClassifier > make_squared_hinge(std::shared_ptr< const GenericFeatureMatrix > X, std::unique_ptr< Objective > regularizer)
opaque_int_type< detail::stat_id_tag > stat_id_t
An opaque int-like type that is used to identify a statistic in a StatisticsCollection.
auto visit(F &&f, Variants &&... variants)
Main namespace in which all types, classes, and functions are defined.
types::DenseRowMajor< real_t > DenseFeatures
Dense Feature Matrix in Row Major format.
types::DenseVector< real_t > DenseRealVector
Any dense, real values vector.
types::SparseRowMajor< real_t > SparseFeatures
Sparse Feature Matrix in Row Major format.
float real_t
The default type for floating point values.
#define THROW_EXCEPTION(exception_type,...)