dismecpp/generic__linear_8cpp_source.html

 // Copyright (c) 2021, Aalto University, developed by Erik Schultheis

 // All rights reserved.

 //

 // SPDX-License-Identifier: MIT


 #include "generic_linear.h"

 #include "utils/eigen_generic.h"

 #include "utils/throw_error.h"

 #include "stats/collection.h"

 #include "margin_losses.h"


 using namespace dismec;

 using dismec::objective::GenericLinearClassifier;


 namespace {

     using dismec::stats::stat_id_t;

     constexpr const stat_id_t STAT_GRAD_SPARSITY{8};

 }


 real_t GenericLinearClassifier::value_unchecked(const HashVector& location) {

     const DenseRealVector& xTw = x_times_w(location);

     return value_from_xTw(xTw) + m_Regularizer->value(location);

 }


 real_t GenericLinearClassifier::lookup_on_line(real_t position) {

     m_GenericInBuffer = line_interpolation(position);

     real_t f = value_from_xTw(m_GenericInBuffer);

     return f + m_Regularizer->lookup_on_line(position);

 }


 real_t GenericLinearClassifier::value_from_xTw(const DenseRealVector& xTw)

 {

     calculate_loss(xTw, labels(), m_GenericOutBuffer);

     return m_GenericOutBuffer.dot(costs());

 }


 void

 GenericLinearClassifier::hessian_times_direction_unchecked(const HashVector& location, const DenseRealVector& direction,

                                                            Eigen::Ref<DenseRealVector> target) {

     m_Regularizer->hessian_times_direction(location, direction, target);


     const auto& hessian = cached_2nd_derivative(location);

     visit([&](const auto& features) {

         for (int pos = 0; pos < hessian.size(); ++pos) {

             if(real_t h = hessian.coeff(pos); h != 0) {

                 real_t factor = features.row(pos).dot(direction);

                 target += features.row(pos) * factor * h;

             }

         }

     }, generic_features());

 }


 void GenericLinearClassifier::gradient_and_pre_conditioner_unchecked(const HashVector& location,

                                                                      Eigen::Ref<DenseRealVector> gradient,

                                                                      Eigen::Ref<DenseRealVector> pre) {

     m_Regularizer->gradient(location, gradient);

     m_Regularizer->diag_preconditioner(location, pre);


     const auto& derivative = cached_derivative(location);

     const auto& hessian = cached_2nd_derivative(location);

     visit([&](const auto& features) {

         for (int pos = 0; pos < derivative.size(); ++pos) {

             if(real_t d = derivative.coeff(pos); d != 0) {

                 gradient += features.row(pos) * d;

             }

             if(real_t h = hessian.coeff(pos); h != 0) {

                 pre += features.row(pos).cwiseAbs2() * h;

             }

         }

     }, generic_features());


 }


 void GenericLinearClassifier::gradient_unchecked(const HashVector& location, Eigen::Ref<DenseRealVector> target) {

     m_Regularizer->gradient(location, target);


     const auto& derivative = cached_derivative(location);

     visit([&](const auto& features) {

         for (int pos = 0; pos < derivative.size(); ++pos) {

             if(real_t d = derivative.coeff(pos); d != 0) {

                 target += features.row(pos) * d;

             }

         }

     }, generic_features());

 }


 void GenericLinearClassifier::gradient_at_zero_unchecked(Eigen::Ref<DenseRealVector> target) {

     m_Regularizer->gradient_at_zero(target);


     m_GenericInBuffer = DenseRealVector::Zero(labels().size());

     calculate_derivative(m_GenericInBuffer, labels(), m_GenericOutBuffer);

     const auto& cost_vector = costs();

     visit([&](const auto& features) {

         for (int pos = 0; pos < m_GenericOutBuffer.size(); ++pos) {

             if(real_t d = m_GenericOutBuffer.coeff(pos); d != 0) {

                 target += features.row(pos) * (cost_vector.coeff(pos) * d);

             }

         }

     }, generic_features());

 }


 void GenericLinearClassifier::diag_preconditioner_unchecked(const HashVector& location, Eigen::Ref<DenseRealVector> target) {

     m_Regularizer->diag_preconditioner(location, target);


     const auto& hessian = cached_2nd_derivative(location);

     visit([&](const auto& features) {

         for (int pos = 0; pos < hessian.size(); ++pos) {

             if(real_t h = hessian.coeff(pos); h != 0) {

                 target += features.row(pos).cwiseAbs2() * h;

             }

         }

     }, generic_features());

 }


 const DenseRealVector& GenericLinearClassifier::cached_derivative(const HashVector& location) {

     return m_DerivativeBuffer.update(location, [&](const DenseRealVector& input, DenseRealVector& out){

         calculate_derivative(x_times_w(location), labels(), out);

         record(STAT_GRAD_SPARSITY, [&](){

             long nnz = 0;

             for(int i = 0; i < out.size(); ++i) {

                 if(out.coeff(i) != 0) ++nnz;

             }

             return static_cast<real_t>(static_cast<double>(100*nnz) / out.size()); });

         out.array() *= costs().array();

     });

 }


 const DenseRealVector& GenericLinearClassifier::cached_2nd_derivative(const HashVector& location) {

     return m_SecondDerivativeBuffer.update(location, [&](const DenseRealVector& input, DenseRealVector& out){

         calculate_2nd_derivative(x_times_w(location), labels(), out);

         out.array() *= costs().array();

     });

 }


 void GenericLinearClassifier::invalidate_labels() {

     m_DerivativeBuffer.invalidate();

     m_SecondDerivativeBuffer.invalidate();

 }


 GenericLinearClassifier::GenericLinearClassifier(std::shared_ptr<const GenericFeatureMatrix> X,

                                                             std::unique_ptr<Objective> regularizer)

         : LinearClassifierBase(std::move(X)),

         m_SecondDerivativeBuffer(num_instances()),

         m_DerivativeBuffer(num_instances()), m_GenericInBuffer(num_instances()),

         m_GenericOutBuffer(num_instances()), m_Regularizer(std::move(regularizer))

         {

     declare_stat(STAT_GRAD_SPARSITY, {"gradient_sparsity", "% non-zeros"});

     if(!m_Regularizer) {

         THROW_EXCEPTION(std::invalid_argument, "Regularizer cannot be nullptr");

     }

 }


 void GenericLinearClassifier::project_to_line_unchecked(const HashVector& location, const DenseRealVector& direction) {

     project_linear_to_line(location, direction);

     m_Regularizer->project_to_line(location, direction);

 }


 // ---------------------------------------------------------------------------------------------------------------------

 //                  Some concrete implementations of common loss functions

 // ---------------------------------------------------------------------------------------------------------------------


 namespace objective = dismec::objective;


 namespace {

     template<class Phi, class... Args>

     std::unique_ptr<GenericLinearClassifier> make_gen_lin_classifier(std::shared_ptr<const GenericFeatureMatrix> X,

                                                                      std::unique_ptr<objective::Objective> regularizer,

                                                                      Args... args) {

         return std::make_unique<objective::GenericMarginClassifier<Phi>>(std::move(X), std::move(regularizer),

                 Phi{std::forward<Args>(args)...});

     }

 }


 std::unique_ptr<GenericLinearClassifier> objective::make_squared_hinge(std::shared_ptr<const GenericFeatureMatrix> X,

                                                                        std::unique_ptr<Objective> regularizer) {

     return make_gen_lin_classifier<SquaredHingePhi>(std::move(X), std::move(regularizer));

 }


 std::unique_ptr<GenericLinearClassifier> objective::make_logistic_loss(std::shared_ptr<const GenericFeatureMatrix> X,

                                                                        std::unique_ptr<Objective> regularizer) {

     return make_gen_lin_classifier<LogisticPhi>(std::move(X), std::move(regularizer));

 }


 std::unique_ptr<GenericLinearClassifier> objective::make_huber_hinge(std::shared_ptr<const GenericFeatureMatrix> X,

                                                                      std::unique_ptr<Objective> regularizer,

                                                                      real_t epsilon) {

     return make_gen_lin_classifier<HuberPhi>(std::move(X), std::move(regularizer), epsilon);

 }


 #ifndef DOCTEST_CONFIG_DISABLE

 #include "doctest.h"

 #include "regularizers_imp.h"

 #include "reg_sq_hinge.h"


 using namespace dismec;


 namespace {

     void test_equivalence(objective::Objective& a, objective::Objective& b, const HashVector& input) {

         auto test_vector_equal = [](auto&& u, auto&& v, const char* message){

             REQUIRE(u.size() == v.size());

             for(int i = 0; i < u.size(); ++i) {

                 REQUIRE_MESSAGE(u.coeff(i) == doctest::Approx(v.coeff(i)), message);

             }

         };

         DenseRealVector buffer_a(input->size());

         DenseRealVector buffer_b(input->size());

         CHECK_MESSAGE(a.value(input) == doctest::Approx(b.value(input)), "values differ");


         a.gradient_at_zero(buffer_a);

         b.gradient_at_zero(buffer_b);

         test_vector_equal(buffer_a, buffer_b, "gradient@0 mismatch");


         a.gradient(input, buffer_a);

         b.gradient(input, buffer_b);

         test_vector_equal(buffer_a, buffer_b, "gradient mismatch");


         a.diag_preconditioner(input, buffer_a);

         b.diag_preconditioner(input, buffer_b);

         test_vector_equal(buffer_a, buffer_b, "pre-conditioner mismatch");


         DenseRealVector direction = DenseRealVector::Random(input->size());

         a.hessian_times_direction(input, direction, buffer_a);

         b.hessian_times_direction(input, direction, buffer_b);

         test_vector_equal(buffer_a, buffer_b, "hessian mismatch");


         DenseRealVector buffer_a2(input->size());

         DenseRealVector buffer_b2(input->size());

         a.gradient_and_pre_conditioner(input, buffer_a, buffer_a2);

         b.gradient_and_pre_conditioner(input, buffer_b, buffer_b2);

         test_vector_equal(buffer_a, buffer_b, "gradient mismatch");

         test_vector_equal(buffer_a2, buffer_b2, "pre-conditioner mismatch");

     }

 }


 TEST_CASE("sparse/dense equivalence") {

     int rows, cols;

     real_t pos_cost = 1, neg_cost = 1;


     auto run_test = [&](){

         DenseFeatures features_dense = DenseFeatures::Random(rows, cols);

         SparseFeatures features_sparse = features_dense.sparseView();


         Eigen::Matrix<std::int8_t, Eigen::Dynamic, 1> labels = Eigen::Matrix<std::int8_t, Eigen::Dynamic, 1>::Random(rows);

         for(int i = 0; i < labels.size(); ++i) {

             if(labels.coeff(i) > 0) {

                 labels.coeffRef(i) = 1;

             } else {

                 labels.coeffRef(i) = -1;

             }

         }


         auto reg_dense = make_squared_hinge(std::make_shared<GenericFeatureMatrix>(features_dense),

                                             std::make_unique<objective::SquaredNormRegularizer>());

         auto reg_sparse = make_squared_hinge(std::make_shared<GenericFeatureMatrix>(features_sparse),

                                              std::make_unique<objective::SquaredNormRegularizer>());


         auto reference = objective::Regularized_SquaredHingeSVC(std::make_shared<GenericFeatureMatrix>(features_sparse),

                                                                 std::make_unique<objective::SquaredNormRegularizer>());


         auto do_test = [&](auto& first, auto& second) {

             first.get_label_ref() = labels;

             second.get_label_ref() = labels;


             first.update_costs(pos_cost, neg_cost);

             second.update_costs(pos_cost, neg_cost);


             DenseRealVector weights = DenseRealVector::Random(cols);

             test_equivalence(first, second, HashVector(weights));

         };


         do_test(*reg_dense, *reg_sparse);

         do_test(reference, *reg_sparse);

     };


     SUBCASE("rows > cols") {

         rows = 20;

         cols = 10;

         run_test();

     }

     SUBCASE("cols > rows") {

         rows = 10;

         cols = 20;

         run_test();

     }


     SUBCASE("pos weighted") {

         rows = 15;

         cols = 32;

         pos_cost = 2.0;

         run_test();

     }


     SUBCASE("neg weighted") {

         rows = 15;

         cols = 32;

         neg_cost = 2.0;

         run_test();

     }

     /*

     SUBCASE("large") {

         rows = 1500;

         cols = 3200;

         // this fails, which indicates different numerical stability

         run_test();

     }

     */

 }


 TEST_CASE("generic squared hinge") {

     SparseFeatures x(3, 5);

     x.insert(0, 3) = 1.0;

     x.insert(1, 0) = 2.0;

     x.insert(2, 1) = 1.0;

     x.insert(2, 2) = 1.0;


     Eigen::Matrix<std::int8_t, Eigen::Dynamic, 1> y(3);

     y << -1, 1, -1;


     auto loss = make_squared_hinge(std::make_shared<GenericFeatureMatrix>(DenseFeatures (x)),

             std::make_unique<objective::SquaredNormRegularizer>());

     loss->get_label_ref() = y;


     auto reference = objective::Regularized_SquaredHingeSVC(std::make_shared<GenericFeatureMatrix>(x),

                                                             std::make_unique<objective::SquaredNormRegularizer>());

     reference.get_label_ref() = y;


     DenseRealVector weights(5);

     weights << 1.0, 2.0, 0.0, -1.0, 2.0;


     auto do_check = [&](real_t factor){

         // z = (-1, 2, 2)

         // 1 - yz = 0, -1, 3

         CHECK_MESSAGE(loss->value(HashVector{weights}) == doctest::Approx(factor * 9.0 + 5), "wrong value");


         //

         DenseRealVector grad(5);

         loss->gradient(HashVector{weights}, grad);

         // dl/dz = 0, 0, 2*3

         // dl/dx = 6*(0.0, 1.0, 1.0, 0.0, 0.0) + 0.5*weights

         DenseRealVector i(5);

         i << 0.0, 1.0, 1.0, 0.0, 0.0;

         DenseRealVector r = factor * 6 * i + weights;

         CHECK_MESSAGE(grad == r, "wrong gradient");


         // also check numerically

         real_t old_val = loss->value(HashVector{weights});

         DenseRealVector nw = weights + grad * 1e-4;

         real_t new_val = loss->value(HashVector{nw});

         CHECK (new_val - old_val == doctest::Approx(grad.squaredNorm() * 1e-4).epsilon(1e-4));


         // preconditioner == diagonal of Hessian

         DenseRealVector prec_new(5);

         DenseRealVector prec_old(5);

         loss->diag_preconditioner(HashVector{weights}, prec_new);

         reference.diag_preconditioner(HashVector{weights}, prec_old);

         CHECK_MESSAGE(prec_new == prec_old, "wrong preconditioner");


         loss->hessian_times_direction(HashVector{weights}, i, prec_new);

         reference.hessian_times_direction(HashVector{weights}, i, prec_old);

         CHECK_MESSAGE(prec_new == prec_old, "wrong hessian");


         // g@0

         loss->gradient_at_zero(prec_new);

         reference.gradient_at_zero(prec_old);

         CHECK_MESSAGE(prec_new == prec_old, "g@0 wrong");

     };


     // since the positive example is correct with margin,

     // re-weighting positives does not change the outcome,

     // whereas negatives change the result by a constant factor

     SUBCASE("unweighted") {

         do_check(1.0);

     }

     SUBCASE("positive-reweighted") {

         loss->update_costs(2.0, 1.0);

         reference.update_costs(2.0, 1.0);

         do_check(1.0);

     }

     SUBCASE("negative-reweighted") {

         loss->update_costs(1.0, 2.0);

         reference.update_costs(1.0, 2.0);

         do_check(2.0);

     }

 }


 #endif

dismec::HashVector
An Eigen vector with versioning information, to implement simple caching of results.
Definition: hash_vector.h:43

dismec::objective::GenericLinearClassifier
This is a non-templated, runtime-polymorphic generic implementation of the linear classifier objectiv...
Definition: generic_linear.h:25

dismec::objective::GenericLinearClassifier::m_Regularizer
std::unique_ptr< Objective > m_Regularizer
Pointer to the regularizer.
Definition: generic_linear.h:109

dismec::objective::GenericLinearClassifier::project_to_line_unchecked
void project_to_line_unchecked(const HashVector &location, const DenseRealVector &direction) override
Definition: generic_linear.cpp:153

dismec::objective::LinearClassifierBase
Base class for objectives that use a linear classifier.
Definition: linear.h:27

dismec::objective::LinearClassifierBase::project_linear_to_line
void project_linear_to_line(const HashVector &location, const DenseRealVector &direction)
Prepares the cache variables for line projection.
Definition: linear.cpp:63

dismec::objective::Objective
Class that models an optimization objective.
Definition: objective.h:41

dismec::objective::Objective::hessian_times_direction
void hessian_times_direction(const HashVector &location, const DenseRealVector &direction, Eigen::Ref< DenseRealVector > target)
Calculates the product of the Hessian matrix at location with direction.
Definition: objective.cpp:107

dismec::objective::Objective::gradient_at_zero
void gradient_at_zero(Eigen::Ref< DenseRealVector > target)
Gets the gradient for location zero.
Definition: objective.cpp:82

dismec::objective::Objective::gradient
void gradient(const HashVector &location, Eigen::Ref< DenseRealVector > target)
Evaluate the gradient at location.
Definition: objective.cpp:96

dismec::objective::Objective::gradient_and_pre_conditioner
void gradient_and_pre_conditioner(const HashVector &location, Eigen::Ref< DenseRealVector > gradient, Eigen::Ref< DenseRealVector > pre)
Combines the calculation of gradient and pre-conditioner, which may be more efficient in some cases.
Definition: objective.cpp:58

dismec::objective::Objective::diag_preconditioner
void diag_preconditioner(const HashVector &location, Eigen::Ref< DenseRealVector > target)
Get precondition to be used in CG optimization.
Definition: objective.cpp:43

dismec::objective::Objective::value
real_t value(const HashVector &location)
Evaluate the objective at the given location.
Definition: objective.cpp:35

dismec::objective::Regularized_SquaredHingeSVC
Definition: reg_sq_hinge.h:21

dismec::stats::Tracked::declare_stat
void declare_stat(stat_id_t index, StatisticMetaData meta)
Declares a new statistics. This function just forwards all its arguments to the internal StatisticsCo...
Definition: tracked.cpp:16

collection.h

eigen_generic.h

TEST_CASE
TEST_CASE("sparse/dense equivalence")
Definition: generic_linear.cpp:236

generic_linear.h

margin_losses.h

anonymous_namespace{generic_linear.cpp}::make_gen_lin_classifier
std::unique_ptr< GenericLinearClassifier > make_gen_lin_classifier(std::shared_ptr< const GenericFeatureMatrix > X, std::unique_ptr< objective::Objective > regularizer, Args... args)
Definition: generic_linear.cpp:167

anonymous_namespace{generic_linear.cpp}::test_equivalence
void test_equivalence(objective::Objective &a, objective::Objective &b, const HashVector &input)
Definition: generic_linear.cpp:199

anonymous_namespace{generic_linear.cpp}::STAT_GRAD_SPARSITY
constexpr const stat_id_t STAT_GRAD_SPARSITY
Definition: generic_linear.cpp:17

dismec::l2_reg_sq_hinge_detail::value_from_xTw
real_t value_from_xTw(const DenseRealVector &cost, const BinaryLabelVector &labels, const Eigen::DenseBase< Derived > &xTw)
Definition: reg_sq_hinge_detail.h:83

dismec::objective
Definition: fwd.h:34

dismec::objective::make_huber_hinge
std::unique_ptr< GenericLinearClassifier > make_huber_hinge(std::shared_ptr< const GenericFeatureMatrix > X, std::unique_ptr< Objective > regularizer, real_t epsilon)
Definition: generic_linear.cpp:185

dismec::objective::make_logistic_loss
std::unique_ptr< GenericLinearClassifier > make_logistic_loss(std::shared_ptr< const GenericFeatureMatrix > X, std::unique_ptr< Objective > regularizer)
Definition: generic_linear.cpp:180

dismec::objective::make_squared_hinge
std::unique_ptr< GenericLinearClassifier > make_squared_hinge(std::shared_ptr< const GenericFeatureMatrix > X, std::unique_ptr< Objective > regularizer)
Definition: generic_linear.cpp:175

dismec::stats::stat_id_t
opaque_int_type< detail::stat_id_tag > stat_id_t
An opaque int-like type that is used to identify a statistic in a StatisticsCollection.
Definition: stat_id.h:24

dismec::types::visit
auto visit(F &&f, Variants &&... variants)
Definition: eigen_generic.h:95

dismec
Main namespace in which all types, classes, and functions are defined.
Definition: app.h:15

dismec::DenseFeatures
types::DenseRowMajor< real_t > DenseFeatures
Dense Feature Matrix in Row Major format.
Definition: matrix_types.h:58

dismec::DenseRealVector
types::DenseVector< real_t > DenseRealVector
Any dense, real values vector.
Definition: matrix_types.h:40

dismec::SparseFeatures
types::SparseRowMajor< real_t > SparseFeatures
Sparse Feature Matrix in Row Major format.
Definition: matrix_types.h:50

dismec::real_t
float real_t
The default type for floating point values.
Definition: config.h:17

reg_sq_hinge.h

regularizers_imp.h

throw_error.h

THROW_EXCEPTION
#define THROW_EXCEPTION(exception_type,...)
Definition: throw_error.h:16