31     new_sparse.reserve(features.nonZeros() + features.rows());
 
   32     for (
int k=0; k < features.outerSize(); ++k) {
 
   33         new_sparse.startVec(k);
 
   34         for (SparseFeatures::InnerIterator it(features, k); it; ++it)
 
   36             new_sparse.insertBack(it.row(), it.col()) = it.value();
 
   38         new_sparse.insertBack(k, features.cols()) = bias;
 
   40     new_sparse.finalize();
 
   45     DenseFeatures new_features{features.rows(), features.cols() + 1};
 
   46     new_features.leftCols(features.cols()) = features;
 
   47     new_features.col(features.cols()).setConstant(bias);
 
   61     auto start = features.outerIndexPtr()[0];
 
   62     auto end = features.outerIndexPtr()[features.rows()];
 
   64     const auto* indices = features.innerIndexPtr();
 
   65     const auto* values = features.valuePtr();
 
   66     for(
auto index = start; index < end; ++index) {
 
   67         auto col = indices[index];
 
   68         result[col] += values[index];
 
   71     result /= features.rows();
 
   79     for(
int i = 0; i < features.rows(); ++i) {
 
   80         result += features.row(i);
 
   83     result /= features.rows();
 
   93     for(
int i = 0; i < features.rows(); ++i) {
 
   94         real_t norm = features.row(i).norm();
 
   96             features.row(i) /= norm;
 
  102     for(
int i = 0; i < features.rows(); ++i) {
 
  103         real_t norm = features.row(i).norm();
 
  105             features.row(i) /= norm;
 
  115     std::vector<long> counts(features.cols(), 0);
 
  116     assert(features.isCompressed());
 
  120     const auto* last = features.innerIndexPtr() + features.nonZeros();
 
  121     for(
const auto* start = features.innerIndexPtr(); start != last; ++start) {
 
  128     if(!features.isCompressed()) {
 
  129         features.makeCompressed();
 
  135     std::sort(reorder.begin(), reorder.end(), [&](
int a, 
int b){
 
  136         return counts[a] < counts[b];
 
  140     Eigen::PermutationMatrix<Eigen::Dynamic, Eigen::Dynamic, int> permutation(reorder);
 
  142     features = features * permutation;
 
  147     Eigen::PermutationMatrix<Eigen::Dynamic, Eigen::Dynamic, int> permutation(features.cols());
 
  148     permutation.setIdentity();
 
  163                 features = features.unaryExpr([](
const real_t& value) { 
return std::log1p(value); });
 
  166                 features = features.unaryExpr([](
const real_t& value) { 
return real_t{1} + std::log(value); });
 
  169                 features = features.unaryExpr([](
const real_t& value) { 
return std::sqrt(value); });
 
  185     if(!features.isCompressed()) {
 
  186         features.makeCompressed();
 
  189     std::ranlux24 rng(seed);
 
  190     std::uniform_int_distribution<int> mapping(0, buckets - 1);
 
  191     Eigen::MatrixXi hash = Eigen::MatrixXi::NullaryExpr(features.cols(), repeats, [&](){
 
  196     for (
int k=0; k < features.rows(); ++k) {
 
  199         for (SparseFeatures::InnerIterator it(features, k); it; ++it)
 
  201             for(
int j = 0; j < repeats; ++j) {
 
  202                 new_row.coeffRef(hash.coeff(it.col(), j) + j * buckets) += it.value();
 
  207         for(
int i = 0; i < new_row.size(); ++i) {
 
  208             if(new_row.coeff(i) > 0) {
 
  209                 result.insertBack(k, i) = new_row.coeff(i);
 
  216     features = std::move(result);
 
  221     new_features.reserve(2 * source.nonZeros() * 
double(shortlist.size()) / 
double(source.rows()));
 
  223     for (
auto row : shortlist) {
 
  224         new_features.startVec(new_row);
 
  225         for (SparseFeatures::InnerIterator it(source, row); it; ++it)
 
  227             new_features.insertBack(new_row, it.col()) = it.value();
 
  231     new_features.finalize();
 
  238     for (
auto row : shortlist) {
 
  239         new_features.row(new_row) = source.row(row);
 
  250     test.insert(3, 2) = 2.0;
 
  251     test.insert(1, 3) = -1.0;
 
  252     test.insert(0, 4) = 5.0;
 
  253     test.insert(2, 2) = 2.0;
 
  254     test.insert(2, 3) = 4.0;
 
  262     CHECK(dense_test.leftCols(Eigen::fix<5>) == dense_ext.leftCols(Eigen::fix<5>));
 
  263     CHECK(dense_ext.col(Eigen::fix<5>) == DenseFeatures::Ones(5, 1));
 
  268     test.insert(3, 2) = 2.0;
 
  269     test.insert(1, 3) = -1.0;
 
  270     test.insert(2, 2) = 2.0;
 
  271     test.insert(1, 2) = 2.0;
 
  272     test.insert(2, 3) = 4.0;
 
  273     test.insert(2, 0) = -4.0;
 
  279     expected.insert(3, 3) = 2.0;
 
  280     expected.insert(1, 2) = -1.0;
 
  281     expected.insert(2, 3) = 2.0;
 
  282     expected.insert(1, 3) = 2.0;
 
  283     expected.insert(2, 2) = 4.0;
 
  284     expected.insert(2, 1) = -4.0;
 
  288     CHECK(test.toDense() == expected.toDense());
 
std::shared_ptr< GenericFeatureMatrix > edit_features()
get a shared pointer to mutable feature data. Use with care.
outer_const< T, dense_vector_h > DenseVector
auto visit(F &&f, Variants &&... variants)
Main namespace in which all types, classes, and functions are defined.
types::DenseRowMajor< real_t > DenseFeatures
Dense Feature Matrix in Row Major format.
void normalize_instances(DatasetBase &data)
void augment_features_with_bias(DatasetBase &data, real_t bias=1)
void transform_features(DatasetBase &data, DatasetTransform transform)
types::DenseVector< real_t > DenseRealVector
Any dense, real values vector.
std::vector< long > count_features(const SparseFeatures &features)
types::SparseRowMajor< real_t > SparseFeatures
Sparse Feature Matrix in Row Major format.
void hash_sparse_features(SparseFeatures &features, unsigned seed, int buckets, int repeats)
Eigen::PermutationMatrix< Eigen::Dynamic, Eigen::Dynamic, int > sort_features_by_frequency(DatasetBase &data)
SparseFeatures shortlist_features(const SparseFeatures &source, const std::vector< long > &shortlist)
DenseRealVector get_mean_feature(const GenericFeatureMatrix &features)
float real_t
The default type for floating point values.