47 static int make_sparse(Eigen::Ref<DenseRealVector> target,
const Eigen::Ref<const DenseRealVector>& source,
real_t cutoff) {
49 for(
int i = 0; i < target.size(); ++i) {
50 auto w_i = source.coeff(i);
51 bool is_small = abs(w_i) < cutoff;
52 target.coeffRef(i) = is_small ? 0 : w_i;
85 auto [lower, upper] = find_initial_bounds(weight_vector, tolerance, result.FinalValue);
89 while( (lower.NNZ - upper.NNZ) > upper.NNZ / 10 + 1 ) {
90 real_t middle = (upper.Cutoff + lower.Cutoff) / 2;
91 int nnz = make_sparse(m_WorkingVector.modify(), weight_vector, middle);
92 auto new_score = m_Objective->value(m_WorkingVector);
93 if(new_score > tolerance) {
94 upper.Cutoff = middle;
96 upper.Loss = new_score;
98 lower.Cutoff = middle;
100 lower.Loss = new_score;
107 int nnz = make_sparse(weight_vector, weight_vector, lower.Cutoff);
110 real_t log_cutoff = std::log(lower.Cutoff);
111 m_SumLogVal += log_cutoff;
112 m_SumSqrLog += log_cutoff*log_cutoff;
115 record(
STAT_NNZ,
float(100 * nnz) / weight_vector.size());
120 real_t mean_log = m_SumLogVal / m_NumValues;
121 real_t std_log = std::sqrt(m_SumSqrLog / m_NumValues - mean_log*mean_log +
real_t{1e-5});
125 auto check_bound = [&](
real_t log_cutoff) {
126 real_t cutoff = std::exp(log_cutoff);
127 int nnz = make_sparse(m_WorkingVector.modify(), weight_vector, cutoff);
128 auto score = m_Objective->value(m_WorkingVector);
134 auto at_mean = check_bound( mean_log );
135 if(at_mean.Loss > tolerance) {
138 BoundData minus_std = check_bound(mean_log - std_log);
139 if(minus_std.
Loss > tolerance) {
141 return {{0, weight_vector.size(), initial_lower}, minus_std};
144 return {minus_std, at_mean};
148 BoundData plus_std = check_bound(mean_log + std_log);
149 if(plus_std.
Loss > tolerance) {
151 return {at_mean, plus_std};
155 BoundData plus_3_std = check_bound(mean_log + 3 * std_log);
156 if(plus_3_std.
Loss > tolerance) {
158 return {plus_std, plus_3_std};
161 BoundData at_max = check_bound( std::log(weight_vector.maxCoeff()) );
163 return {plus_3_std, at_max};
168 return std::make_shared<GenericPostProcFactory<Sparsify, real_t>>(tolerance);
An Eigen vector with versioning information, to implement simple caching of results.
Strong typedef for an int to signify a label id.
void process(label_id_t label_id, Eigen::Ref< DenseRealVector > weight_vector, solvers::MinimizationResult &result) override
Apply post-processing for the weight_vector corresponding to the label label_id.
UpperBoundResult find_initial_bounds(Eigen::Ref< DenseRealVector > weight_vector, real_t tolerance, real_t initial_lower)
HashVector m_WorkingVector
std::shared_ptr< objective::Objective > m_Objective
static int make_sparse(Eigen::Ref< DenseRealVector > target, const Eigen::Ref< const DenseRealVector > &source, real_t cutoff)
Sparsify(std::shared_ptr< objective::Objective > objective, real_t tolerance)
auto make_timer(stat_id_t id, Args... args)
Creates a new ScopeTimer using stats::record_scope_time.
void declare_stat(stat_id_t index, StatisticMetaData meta)
Declares a new statistics. This function just forwards all its arguments to the internal StatisticsCo...
constexpr stat_id_t STAT_DURATION
constexpr stat_id_t STAT_CUTOFF
constexpr stat_id_t STAT_BINARY_SEARCH_STEPS
constexpr stat_id_t STAT_NNZ
constexpr stat_id_t STAT_INITIAL_STEPS
FactoryPtr create_sparsify(real_t tolerance)
opaque_int_type< detail::stat_id_tag > stat_id_t
An opaque int-like type that is used to identify a statistic in a StatisticsCollection.
types::DenseVector< real_t > DenseRealVector
Any dense, real values vector.
float real_t
The default type for floating point values.