26     m_GenericInBuffer = line_interpolation(position);
 
   28     return f + m_Regularizer->lookup_on_line(position);
 
   33     calculate_loss(xTw, labels(), m_GenericOutBuffer);
 
   34     return m_GenericOutBuffer.dot(costs());
 
   39                                                            Eigen::Ref<DenseRealVector> target) {
 
   40     m_Regularizer->hessian_times_direction(location, direction, target);
 
   42     const auto& hessian = cached_2nd_derivative(location);
 
   43     visit([&](
const auto& features) {
 
   44         for (
int pos = 0; pos < hessian.size(); ++pos) {
 
   45             if(
real_t h = hessian.coeff(pos); h != 0) {
 
   46                 real_t factor = features.row(pos).dot(direction);
 
   47                 target += features.row(pos) * factor * h;
 
   50     }, generic_features());
 
   53 void GenericLinearClassifier::gradient_and_pre_conditioner_unchecked(
const HashVector& location,
 
   54                                                                      Eigen::Ref<DenseRealVector> gradient,
 
   55                                                                      Eigen::Ref<DenseRealVector> pre) {
 
   56     m_Regularizer->gradient(location, gradient);
 
   57     m_Regularizer->diag_preconditioner(location, pre);
 
   59     const auto& derivative = cached_derivative(location);
 
   60     const auto& hessian = cached_2nd_derivative(location);
 
   61     visit([&](
const auto& features) {
 
   62         for (
int pos = 0; pos < derivative.size(); ++pos) {
 
   63             if(
real_t d = derivative.coeff(pos); d != 0) {
 
   64                 gradient += features.row(pos) * d;
 
   66             if(
real_t h = hessian.coeff(pos); h != 0) {
 
   67                 pre += features.row(pos).cwiseAbs2() * h;
 
   70     }, generic_features());
 
   74 void GenericLinearClassifier::gradient_unchecked(
const HashVector& location, Eigen::Ref<DenseRealVector> target) {
 
   75     m_Regularizer->gradient(location, target);
 
   77     const auto& derivative = cached_derivative(location);
 
   78     visit([&](
const auto& features) {
 
   79         for (
int pos = 0; pos < derivative.size(); ++pos) {
 
   80             if(
real_t d = derivative.coeff(pos); d != 0) {
 
   81                 target += features.row(pos) * d;
 
   84     }, generic_features());
 
   87 void GenericLinearClassifier::gradient_at_zero_unchecked(Eigen::Ref<DenseRealVector> target) {
 
   88     m_Regularizer->gradient_at_zero(target);
 
   90     m_GenericInBuffer = DenseRealVector::Zero(labels().size());
 
   91     calculate_derivative(m_GenericInBuffer, labels(), m_GenericOutBuffer);
 
   92     const auto& cost_vector = costs();
 
   93     visit([&](
const auto& features) {
 
   94         for (
int pos = 0; pos < m_GenericOutBuffer.size(); ++pos) {
 
   95             if(
real_t d = m_GenericOutBuffer.coeff(pos); d != 0) {
 
   96                 target += features.row(pos) * (cost_vector.coeff(pos) * d);
 
   99     }, generic_features());
 
  102 void GenericLinearClassifier::diag_preconditioner_unchecked(
const HashVector& location, Eigen::Ref<DenseRealVector> target) {
 
  103     m_Regularizer->diag_preconditioner(location, target);
 
  105     const auto& hessian = cached_2nd_derivative(location);
 
  106     visit([&](
const auto& features) {
 
  107         for (
int pos = 0; pos < hessian.size(); ++pos) {
 
  108             if(
real_t h = hessian.coeff(pos); h != 0) {
 
  109                 target += features.row(pos).cwiseAbs2() * h;
 
  112     }, generic_features());
 
  117         calculate_derivative(x_times_w(location), labels(), out);
 
  120             for(
int i = 0; i < out.size(); ++i) {
 
  121                 if(out.coeff(i) != 0) ++nnz;
 
  123             return static_cast<real_t>(
static_cast<double>(100*nnz) / out.size()); });
 
  124         out.array() *= costs().array();
 
  130         calculate_2nd_derivative(x_times_w(location), labels(), out);
 
  131         out.array() *= costs().array();
 
  135 void GenericLinearClassifier::invalidate_labels() {
 
  136     m_DerivativeBuffer.invalidate();
 
  137     m_SecondDerivativeBuffer.invalidate();
 
  140 GenericLinearClassifier::GenericLinearClassifier(std::shared_ptr<const GenericFeatureMatrix> X,
 
  141                                                             std::unique_ptr<Objective> regularizer)
 
  143         m_SecondDerivativeBuffer(num_instances()),
 
  144         m_DerivativeBuffer(num_instances()), m_GenericInBuffer(num_instances()),
 
  145         m_GenericOutBuffer(num_instances()), m_Regularizer(std::move(regularizer))
 
  149         THROW_EXCEPTION(std::invalid_argument, 
"Regularizer cannot be nullptr");
 
  166     template<
class Phi, 
class... Args>
 
  168                                                                      std::unique_ptr<objective::Objective> regularizer,
 
  170         return std::make_unique<objective::GenericMarginClassifier<Phi>>(std::move(X), std::move(regularizer),
 
  171                 Phi{std::forward<Args>(args)...});
 
  176                                                                        std::unique_ptr<Objective> regularizer) {
 
  177     return make_gen_lin_classifier<SquaredHingePhi>(std::move(X), std::move(regularizer));
 
  181                                                                        std::unique_ptr<Objective> regularizer) {
 
  182     return make_gen_lin_classifier<LogisticPhi>(std::move(X), std::move(regularizer));
 
  186                                                                      std::unique_ptr<Objective> regularizer,
 
  188     return make_gen_lin_classifier<HuberPhi>(std::move(X), std::move(regularizer), epsilon);
 
  191 #ifndef DOCTEST_CONFIG_DISABLE 
  200         auto test_vector_equal = [](
auto&& u, 
auto&& v, 
const char* message){
 
  201             REQUIRE(u.size() == v.size());
 
  202             for(
int i = 0; i < u.size(); ++i) {
 
  203                 REQUIRE_MESSAGE(u.coeff(i) == doctest::Approx(v.coeff(i)), message);
 
  208         CHECK_MESSAGE(a.
value(input) == doctest::Approx(b.
value(input)), 
"values differ");
 
  212         test_vector_equal(buffer_a, buffer_b, 
"gradient@0 mismatch");
 
  216         test_vector_equal(buffer_a, buffer_b, 
"gradient mismatch");
 
  220         test_vector_equal(buffer_a, buffer_b, 
"pre-conditioner mismatch");
 
  225         test_vector_equal(buffer_a, buffer_b, 
"hessian mismatch");
 
  231         test_vector_equal(buffer_a, buffer_b, 
"gradient mismatch");
 
  232         test_vector_equal(buffer_a2, buffer_b2, 
"pre-conditioner mismatch");
 
  238     real_t pos_cost = 1, neg_cost = 1;
 
  240     auto run_test = [&](){
 
  241         DenseFeatures features_dense = DenseFeatures::Random(rows, cols);
 
  244         Eigen::Matrix<std::int8_t, Eigen::Dynamic, 1> labels = Eigen::Matrix<std::int8_t, Eigen::Dynamic, 1>::Random(rows);
 
  245         for(
int i = 0; i < labels.size(); ++i) {
 
  246             if(labels.coeff(i) > 0) {
 
  247                 labels.coeffRef(i) = 1;
 
  249                 labels.coeffRef(i) = -1;
 
  254         auto reg_dense = 
make_squared_hinge(std::make_shared<GenericFeatureMatrix>(features_dense),
 
  255                                             std::make_unique<objective::SquaredNormRegularizer>());
 
  256         auto reg_sparse = 
make_squared_hinge(std::make_shared<GenericFeatureMatrix>(features_sparse),
 
  257                                              std::make_unique<objective::SquaredNormRegularizer>());
 
  260                                                                 std::make_unique<objective::SquaredNormRegularizer>());
 
  262         auto do_test = [&](
auto& first, 
auto& second) {
 
  263             first.get_label_ref() = labels;
 
  264             second.get_label_ref() = labels;
 
  266             first.update_costs(pos_cost, neg_cost);
 
  267             second.update_costs(pos_cost, neg_cost);
 
  273         do_test(*reg_dense, *reg_sparse);
 
  274         do_test(reference, *reg_sparse);
 
  277     SUBCASE(
"rows > cols") {
 
  282     SUBCASE(
"cols > rows") {
 
  288     SUBCASE(
"pos weighted") {
 
  295     SUBCASE(
"neg weighted") {
 
  314     x.insert(0, 3) = 1.0;
 
  315     x.insert(1, 0) = 2.0;
 
  316     x.insert(2, 1) = 1.0;
 
  317     x.insert(2, 2) = 1.0;
 
  319     Eigen::Matrix<std::int8_t, Eigen::Dynamic, 1> y(3);
 
  324             std::make_unique<objective::SquaredNormRegularizer>());
 
  325     loss->get_label_ref() = y;
 
  328                                                             std::make_unique<objective::SquaredNormRegularizer>());
 
  329     reference.get_label_ref() = y;
 
  332     weights << 1.0, 2.0, 0.0, -1.0, 2.0;
 
  334     auto do_check = [&](
real_t factor){
 
  337         CHECK_MESSAGE(loss->value(
HashVector{weights}) == doctest::Approx(factor * 9.0 + 5), 
"wrong value");
 
  345         i << 0.0, 1.0, 1.0, 0.0, 0.0;
 
  347         CHECK_MESSAGE(grad == r, 
"wrong gradient");
 
  353         CHECK (new_val - old_val == doctest::Approx(grad.squaredNorm() * 1e-4).epsilon(1e-4));
 
  358         loss->diag_preconditioner(
HashVector{weights}, prec_new);
 
  359         reference.diag_preconditioner(
HashVector{weights}, prec_old);
 
  360         CHECK_MESSAGE(prec_new == prec_old, 
"wrong preconditioner");
 
  362         loss->hessian_times_direction(
HashVector{weights}, i, prec_new);
 
  363         reference.hessian_times_direction(
HashVector{weights}, i, prec_old);
 
  364         CHECK_MESSAGE(prec_new == prec_old, 
"wrong hessian");
 
  367         loss->gradient_at_zero(prec_new);
 
  368         reference.gradient_at_zero(prec_old);
 
  369         CHECK_MESSAGE(prec_new == prec_old, 
"g@0 wrong");
 
  376     SUBCASE(
"unweighted") {
 
  379     SUBCASE(
"positive-reweighted") {
 
  380         loss->update_costs(2.0, 1.0);
 
  381         reference.update_costs(2.0, 1.0);
 
  384     SUBCASE(
"negative-reweighted") {
 
  385         loss->update_costs(1.0, 2.0);
 
  386         reference.update_costs(1.0, 2.0);
 
An Eigen vector with versioning information, to implement simple caching of results.
This is a non-templated, runtime-polymorphic generic implementation of the linear classifier objectiv...
std::unique_ptr< Objective > m_Regularizer
Pointer to the regularizer.
void project_to_line_unchecked(const HashVector &location, const DenseRealVector &direction) override
Base class for objectives that use a linear classifier.
void project_linear_to_line(const HashVector &location, const DenseRealVector &direction)
Prepares the cache variables for line projection.
Class that models an optimization objective.
void hessian_times_direction(const HashVector &location, const DenseRealVector &direction, Eigen::Ref< DenseRealVector > target)
Calculates the product of the Hessian matrix at location with direction.
void gradient_at_zero(Eigen::Ref< DenseRealVector > target)
Gets the gradient for location zero.
void gradient(const HashVector &location, Eigen::Ref< DenseRealVector > target)
Evaluate the gradient at location.
void gradient_and_pre_conditioner(const HashVector &location, Eigen::Ref< DenseRealVector > gradient, Eigen::Ref< DenseRealVector > pre)
Combines the calculation of gradient and pre-conditioner, which may be more efficient in some cases.
void diag_preconditioner(const HashVector &location, Eigen::Ref< DenseRealVector > target)
Get precondition to be used in CG optimization.
real_t value(const HashVector &location)
Evaluate the objective at the given location.
void declare_stat(stat_id_t index, StatisticMetaData meta)
Declares a new statistics. This function just forwards all its arguments to the internal StatisticsCo...
TEST_CASE("sparse/dense equivalence")
std::unique_ptr< GenericLinearClassifier > make_gen_lin_classifier(std::shared_ptr< const GenericFeatureMatrix > X, std::unique_ptr< objective::Objective > regularizer, Args... args)
void test_equivalence(objective::Objective &a, objective::Objective &b, const HashVector &input)
constexpr const stat_id_t STAT_GRAD_SPARSITY
real_t value_from_xTw(const DenseRealVector &cost, const BinaryLabelVector &labels, const Eigen::DenseBase< Derived > &xTw)
std::unique_ptr< GenericLinearClassifier > make_huber_hinge(std::shared_ptr< const GenericFeatureMatrix > X, std::unique_ptr< Objective > regularizer, real_t epsilon)
std::unique_ptr< GenericLinearClassifier > make_logistic_loss(std::shared_ptr< const GenericFeatureMatrix > X, std::unique_ptr< Objective > regularizer)
std::unique_ptr< GenericLinearClassifier > make_squared_hinge(std::shared_ptr< const GenericFeatureMatrix > X, std::unique_ptr< Objective > regularizer)
opaque_int_type< detail::stat_id_tag > stat_id_t
An opaque int-like type that is used to identify a statistic in a StatisticsCollection.
auto visit(F &&f, Variants &&... variants)
Main namespace in which all types, classes, and functions are defined.
types::DenseRowMajor< real_t > DenseFeatures
Dense Feature Matrix in Row Major format.
types::DenseVector< real_t > DenseRealVector
Any dense, real values vector.
types::SparseRowMajor< real_t > SparseFeatures
Sparse Feature Matrix in Row Major format.
float real_t
The default type for floating point values.
#define THROW_EXCEPTION(exception_type,...)