dismecpp/sparsify_8cpp_source.html

 // Copyright (c) 2021, Aalto University, developed by Erik Schultheis

 // All rights reserved.

 //

 // SPDX-License-Identifier: MIT


 #include <utility>


 #include "training/postproc.h"

 #include "data/types.h"

 #include "solver/minimizer.h"

 #include "utils/hash_vector.h"

 #include "training/postproc/generic.h"

 #include "stats/collection.h"

 #include "stats/timer.h"


 namespace {

     using dismec::stats::stat_id_t;

     constexpr stat_id_t STAT_CUTOFF{0};

     constexpr stat_id_t STAT_NNZ{1};

     constexpr stat_id_t STAT_BINARY_SEARCH_STEPS{2};

     constexpr stat_id_t STAT_INITIAL_STEPS{3};

     constexpr stat_id_t STAT_DURATION{4};

 };


 namespace dismec::postproc {

     class Sparsify : public PostProcessor {

     public:

         Sparsify(std::shared_ptr<objective::Objective> objective, real_t tolerance) :

             m_Objective(std::move(objective)),

             m_Tolerance(tolerance),

             m_WorkingVector(DenseRealVector(m_Objective->num_variables())) {


             declare_stat(STAT_CUTOFF, {"cutoff", {}});

             declare_stat(STAT_NNZ, {"nnz", "%"});

             declare_stat(STAT_BINARY_SEARCH_STEPS, {"binary_search_steps", {}});

             declare_stat(STAT_INITIAL_STEPS, {"initial_steps", {}});

             declare_stat(STAT_DURATION, {"duration", "µs"});

         }

     private:

         void process(label_id_t label_id, Eigen::Ref<DenseRealVector> weight_vector, solvers::MinimizationResult& result) override;


         std::shared_ptr<objective::Objective> m_Objective;

         real_t m_Tolerance;

         HashVector m_WorkingVector;


         static int make_sparse(Eigen::Ref<DenseRealVector> target, const Eigen::Ref<const DenseRealVector>& source, real_t cutoff) {

             int nnz = 0;

             for(int i = 0; i < target.size(); ++i) {

                 auto w_i = source.coeff(i);

                 bool is_small = abs(w_i) < cutoff;

                 target.coeffRef(i) = is_small ? 0 : w_i;

                 if(!is_small) ++nnz;

             }

             return nnz;

         }


         struct BoundData {

             real_t Cutoff;

             long NNZ;

             real_t Loss;

         };


         struct UpperBoundResult {

             BoundData LowerBound;

             BoundData UpperBound;

         };


         UpperBoundResult find_initial_bounds(Eigen::Ref<DenseRealVector> weight_vector, real_t tolerance, real_t initial_lower);


         real_t m_NumValues = 1;

         real_t m_SumLogVal = std::log(0.02);

         real_t m_SumSqrLog = std::log(0.02) * std::log(0.02);

     };


     void Sparsify::process(label_id_t label_id, Eigen::Ref<DenseRealVector> weight_vector, solvers::MinimizationResult& result) {

         auto timer = make_timer(STAT_DURATION);

         m_WorkingVector = weight_vector;

         real_t tolerance = (1 + m_Tolerance) * result.FinalValue + real_t{1e-5};


         auto [lower, upper] = find_initial_bounds(weight_vector, tolerance, result.FinalValue);


         // now we can do a binary search

         int count = 0;

         while( (lower.NNZ - upper.NNZ) > upper.NNZ / 10 + 1 ) {

             real_t middle = (upper.Cutoff + lower.Cutoff) / 2;

             int nnz = make_sparse(m_WorkingVector.modify(), weight_vector, middle);

             auto new_score = m_Objective->value(m_WorkingVector);

             if(new_score > tolerance) {

                 upper.Cutoff = middle;

                 upper.NNZ = nnz;

                 upper.Loss = new_score;

             } else {

                 lower.Cutoff = middle;

                 lower.NNZ = nnz;

                 lower.Loss = new_score;

             }

             ++count;

         }

         record(STAT_BINARY_SEARCH_STEPS, count);


         // finally, apply the culling to the actual weight vector

         int nnz = make_sparse(weight_vector, weight_vector, lower.Cutoff);


         m_NumValues += 1;

         real_t log_cutoff = std::log(lower.Cutoff);

         m_SumLogVal += log_cutoff;

         m_SumSqrLog += log_cutoff*log_cutoff;


         record(STAT_CUTOFF, lower.Cutoff);

         record(STAT_NNZ, float(100 * nnz) / weight_vector.size());

     }


     Sparsify::UpperBoundResult Sparsify::find_initial_bounds(Eigen::Ref<DenseRealVector> weight_vector, real_t tolerance, real_t initial_lower)

     {

         real_t mean_log = m_SumLogVal / m_NumValues;

         real_t std_log = std::sqrt(m_SumSqrLog / m_NumValues - mean_log*mean_log + real_t{1e-5});


         int step_count = 0;


         auto check_bound = [&](real_t log_cutoff) {

             real_t cutoff = std::exp(log_cutoff);

             int nnz = make_sparse(m_WorkingVector.modify(), weight_vector, cutoff);

             auto score = m_Objective->value(m_WorkingVector);

             ++step_count;

             return BoundData{cutoff, nnz, score};

         };


         // we assume that [exp(mean_log - 2std_var), exp(mean_log + 2std_var)] is a good interval

         auto at_mean = check_bound( mean_log );

         if(at_mean.Loss > tolerance) {

             // ok, mean is an upper bound

             // let's try the lower bound then

             BoundData minus_std = check_bound(mean_log - std_log);

             if(minus_std.Loss > tolerance) {

                 record(STAT_INITIAL_STEPS, step_count);

                 return {{0, weight_vector.size(), initial_lower}, minus_std};

             }

             record(STAT_INITIAL_STEPS, step_count);

             return {minus_std, at_mean};

         }


         // ok, mean is a lower bound

         BoundData plus_std = check_bound(mean_log + std_log);

         if(plus_std.Loss > tolerance) {

             record(STAT_INITIAL_STEPS, step_count);

             return {at_mean, plus_std};

         }


         // one more naive trial:

         BoundData plus_3_std = check_bound(mean_log + 3 * std_log);

         if(plus_3_std.Loss > tolerance) {

             record(STAT_INITIAL_STEPS, step_count);

             return {plus_std, plus_3_std};

         }


         BoundData at_max = check_bound( std::log(weight_vector.maxCoeff()) );

         record(STAT_INITIAL_STEPS, step_count);

         return {plus_3_std, at_max};

     }

 }


 std::shared_ptr<dismec::postproc::PostProcessFactory> dismec::postproc::create_sparsify(real_t tolerance) {

     return std::make_shared<GenericPostProcFactory<Sparsify, real_t>>(tolerance);

 }

dismec::HashVector
An Eigen vector with versioning information, to implement simple caching of results.
Definition: hash_vector.h:43

dismec::label_id_t
Strong typedef for an int to signify a label id.
Definition: types.h:20

dismec::postproc::PostProcessor
Definition: postproc.h:17

dismec::postproc::Sparsify
Definition: sparsify.cpp:27

dismec::postproc::Sparsify::m_SumLogVal
real_t m_SumLogVal
Definition: sparsify.cpp:74

dismec::postproc::Sparsify::process
void process(label_id_t label_id, Eigen::Ref< DenseRealVector > weight_vector, solvers::MinimizationResult &result) override
Apply post-processing for the weight_vector corresponding to the label label_id.
Definition: sparsify.cpp:80

dismec::postproc::Sparsify::m_NumValues
real_t m_NumValues
Definition: sparsify.cpp:73

dismec::postproc::Sparsify::m_Tolerance
real_t m_Tolerance
Definition: sparsify.cpp:44

dismec::postproc::Sparsify::find_initial_bounds
UpperBoundResult find_initial_bounds(Eigen::Ref< DenseRealVector > weight_vector, real_t tolerance, real_t initial_lower)
Definition: sparsify.cpp:118

dismec::postproc::Sparsify::m_WorkingVector
HashVector m_WorkingVector
Definition: sparsify.cpp:45

dismec::postproc::Sparsify::m_Objective
std::shared_ptr< objective::Objective > m_Objective
Definition: sparsify.cpp:43

dismec::postproc::Sparsify::m_SumSqrLog
real_t m_SumSqrLog
Definition: sparsify.cpp:75

dismec::postproc::Sparsify::make_sparse
static int make_sparse(Eigen::Ref< DenseRealVector > target, const Eigen::Ref< const DenseRealVector > &source, real_t cutoff)
Definition: sparsify.cpp:47

dismec::postproc::Sparsify::Sparsify
Sparsify(std::shared_ptr< objective::Objective > objective, real_t tolerance)
Definition: sparsify.cpp:29

dismec::stats::Tracked::make_timer
auto make_timer(stat_id_t id, Args... args)
Creates a new ScopeTimer using stats::record_scope_time.
Definition: tracked.h:130

dismec::stats::Tracked::declare_stat
void declare_stat(stat_id_t index, StatisticMetaData meta)
Declares a new statistics. This function just forwards all its arguments to the internal StatisticsCo...
Definition: tracked.cpp:16

collection.h

generic.h

hash_vector.h

minimizer.h

anonymous_namespace{sparsify.cpp}::STAT_DURATION
constexpr stat_id_t STAT_DURATION
Definition: sparsify.cpp:22

anonymous_namespace{sparsify.cpp}::STAT_CUTOFF
constexpr stat_id_t STAT_CUTOFF
Definition: sparsify.cpp:18

anonymous_namespace{sparsify.cpp}::STAT_BINARY_SEARCH_STEPS
constexpr stat_id_t STAT_BINARY_SEARCH_STEPS
Definition: sparsify.cpp:20

anonymous_namespace{sparsify.cpp}::STAT_NNZ
constexpr stat_id_t STAT_NNZ
Definition: sparsify.cpp:19

anonymous_namespace{sparsify.cpp}::STAT_INITIAL_STEPS
constexpr stat_id_t STAT_INITIAL_STEPS
Definition: sparsify.cpp:21

dismec::objective
Definition: fwd.h:34

dismec::postproc
Definition: fwd.h:48

dismec::postproc::create_sparsify
FactoryPtr create_sparsify(real_t tolerance)
Definition: sparsify.cpp:167

dismec::stats::stat_id_t
opaque_int_type< detail::stat_id_tag > stat_id_t
An opaque int-like type that is used to identify a statistic in a StatisticsCollection.
Definition: stat_id.h:24

dismec::DenseRealVector
types::DenseVector< real_t > DenseRealVector
Any dense, real values vector.
Definition: matrix_types.h:40

dismec::real_t
float real_t
The default type for floating point values.
Definition: config.h:17

postproc.h

real_t
float real_t
Definition: regularizers.h:11

dismec::postproc::Sparsify::BoundData
Definition: sparsify.cpp:58

dismec::postproc::Sparsify::BoundData::Loss
real_t Loss
Definition: sparsify.cpp:61

dismec::postproc::Sparsify::BoundData::NNZ
long NNZ
Definition: sparsify.cpp:60

dismec::postproc::Sparsify::BoundData::Cutoff
real_t Cutoff
Definition: sparsify.cpp:59

dismec::postproc::Sparsify::UpperBoundResult
Definition: sparsify.cpp:64

dismec::postproc::Sparsify::UpperBoundResult::LowerBound
BoundData LowerBound
Definition: sparsify.cpp:65

dismec::postproc::Sparsify::UpperBoundResult::UpperBound
BoundData UpperBound
Definition: sparsify.cpp:66

dismec::solvers::MinimizationResult
Definition: minimizer.h:24

dismec::solvers::MinimizationResult::FinalValue
double FinalValue
Definition: minimizer.h:27

timer.h

types.h