DiSMEC++
transform.h
Go to the documentation of this file.
1 // Copyright (c) 2021, Aalto University, developed by Erik Schultheis
2 // All rights reserved.
3 //
4 // SPDX-License-Identifier: MIT
5 
6 #ifndef DISMEC_SRC_DATA_TRANSFORM_H
7 #define DISMEC_SRC_DATA_TRANSFORM_H
8 
9 #include "data/types.h"
10 #include "matrix_types.h"
11 
12 namespace dismec {
13 
14  void augment_features_with_bias(DatasetBase& data, real_t bias = 1);
17 
21 
22  std::vector<long> count_features(const SparseFeatures& features);
23 
24  void normalize_instances(DatasetBase& data);
25  void normalize_instances(SparseFeatures& features);
26  void normalize_instances(DenseFeatures& features);
27 
28  Eigen::PermutationMatrix<Eigen::Dynamic, Eigen::Dynamic, int> sort_features_by_frequency(DatasetBase& data);
29  Eigen::PermutationMatrix<Eigen::Dynamic, Eigen::Dynamic, int> sort_features_by_frequency(SparseFeatures& features);
30  Eigen::PermutationMatrix<Eigen::Dynamic, Eigen::Dynamic, int> sort_features_by_frequency(DenseFeatures& features);
31 
32  void hash_sparse_features(SparseFeatures& features, unsigned seed, int buckets, int repeats);
33 
34  SparseFeatures shortlist_features(const SparseFeatures& source, const std::vector<long>& shortlist);
35  DenseFeatures shortlist_features(const DenseFeatures& source, const std::vector<long>& shortlist);
36 
37  enum class DatasetTransform {
38  IDENTITY, // x
39  ONE_PLUS_LOG, // 1 + log(x)
40  LOG_ONE_PLUS, // log(1+x)
41  SQRT,
42  };
43 
44  void transform_features(DatasetBase& data, DatasetTransform transform);
45  void transform_features(SparseFeatures& features, DatasetTransform transform);
46  void transform_features(DenseFeatures& features, DatasetTransform transform);
47 }
48 
49 #endif //DISMEC_SRC_DATA_TRANSFORM_H
Main namespace in which all types, classes, and functions are defined.
Definition: app.h:15
types::DenseRowMajor< real_t > DenseFeatures
Dense Feature Matrix in Row Major format.
Definition: matrix_types.h:58
void normalize_instances(DatasetBase &data)
Definition: transform.cpp:88
void augment_features_with_bias(DatasetBase &data, real_t bias=1)
Definition: transform.cpp:25
DatasetTransform
Definition: transform.h:37
void transform_features(DatasetBase &data, DatasetTransform transform)
Definition: transform.cpp:152
types::DenseVector< real_t > DenseRealVector
Any dense, real values vector.
Definition: matrix_types.h:40
std::vector< long > count_features(const SparseFeatures &features)
Definition: transform.cpp:114
types::SparseRowMajor< real_t > SparseFeatures
Sparse Feature Matrix in Row Major format.
Definition: matrix_types.h:50
void hash_sparse_features(SparseFeatures &features, unsigned seed, int buckets, int repeats)
Definition: transform.cpp:183
Eigen::PermutationMatrix< Eigen::Dynamic, Eigen::Dynamic, int > sort_features_by_frequency(DatasetBase &data)
Definition: transform.cpp:110
SparseFeatures shortlist_features(const SparseFeatures &source, const std::vector< long > &shortlist)
Definition: transform.cpp:219
DenseRealVector get_mean_feature(const GenericFeatureMatrix &features)
Definition: transform.cpp:52
types::GenericMatrix< DenseFeatures, SparseFeatures > GenericFeatureMatrix
Definition: matrix_types.h:60
float real_t
Definition: regularizers.h:11