DiSMEC++
|
#include <data.h>
Public Member Functions | |
virtual | ~DatasetBase ()=default |
DatasetBase (const DatasetBase &)=default | |
DatasetBase (DatasetBase &&)=default | |
DatasetBase & | operator= (DatasetBase &&)=default |
DatasetBase & | operator= (const DatasetBase &)=default |
std::shared_ptr< const GenericFeatureMatrix > | get_features () const |
get a shared pointer to the (immutable) feature data More... | |
std::shared_ptr< GenericFeatureMatrix > | edit_features () |
get a shared pointer to mutable feature data. Use with care. More... | |
long | num_features () const noexcept |
Get the total number of features, i.e. the number of columns in the feature matrix. More... | |
long | num_examples () const noexcept |
Get the total number of instances, i.e. the number of rows in the feature matrix. More... | |
virtual long | num_labels () const noexcept=0 |
virtual long | num_positives (label_id_t id) const |
virtual long | num_negatives (label_id_t id) const |
std::shared_ptr< const BinaryLabelVector > | get_labels (label_id_t id) const |
virtual void | get_labels (label_id_t id, Eigen::Ref< BinaryLabelVector > target) const =0 |
Protected Member Functions | |
DatasetBase (SparseFeatures x) | |
DatasetBase (DenseFeatures x) | |
Protected Attributes | |
std::shared_ptr< GenericFeatureMatrix > | m_Features |
|
virtualdefault |
|
default |
|
default |
|
explicitprotected |
|
explicitprotected |
std::shared_ptr< GenericFeatureMatrix > DatasetBase::edit_features | ( | ) |
get a shared pointer to mutable feature data. Use with care.
Definition at line 43 of file data.cpp.
References m_Features.
Referenced by dismec::augment_features_with_bias(), dismec::normalize_instances(), anonymous_namespace{py_data.cpp}::set_features_dense(), anonymous_namespace{py_data.cpp}::set_features_sparse(), dismec::sort_features_by_frequency(), and dismec::transform_features().
std::shared_ptr< const GenericFeatureMatrix > DatasetBase::get_features | ( | ) | const |
get a shared pointer to the (immutable) feature data
Definition at line 39 of file data.cpp.
References m_Features.
Referenced by anonymous_namespace{py_data.cpp}::get_features(), join_data(), and dismec::io::save_xmc_dataset().
std::shared_ptr< const BinaryLabelVector > DatasetBase::get_labels | ( | label_id_t | id | ) | const |
Gets the label vector (encoded as dense vector with elements from {-1, 1}) for the id
'th class. Throws std::out_of_bounds, if id is not in [0, num_labels())
.
Definition at line 21 of file data.cpp.
References num_examples().
Referenced by anonymous_namespace{py_data.cpp}::get_labels(), num_positives(), dismec::CascadeTraining::update_minimizer(), dismec::CascadeTraining::update_objective(), and dismec::DiSMECTraining::update_objective().
|
pure virtual |
Gets the label vector (encoded as dense vector with elements from {-1, 1}) for the id
'th class. The weights will be put into the given target
buffer. Throws std::out_of_bounds, if id is not in [0, num_labels())
.
Implemented in dismec::MultiLabelData, and dismec::BinaryData.
|
noexcept |
Get the total number of instances, i.e. the number of rows in the feature matrix.
Definition at line 52 of file data.cpp.
References m_Features.
Referenced by dismec::prediction::FullPredictionTaskGenerator::FullPredictionTaskGenerator(), get_labels(), num_negatives(), dismec::MultiLabelData::num_negatives(), dismec::prediction::FullPredictionTaskGenerator::num_tasks(), dismec::prediction::TopKPredictionTaskGenerator::num_tasks(), dismec::PropensityModel::PropensityModel(), dismec::io::save_xmc_dataset(), dismec::prediction::TopKPredictionTaskGenerator::TopKPredictionTaskGenerator(), dismec::CascadeTraining::update_minimizer(), and dismec::DiSMECTraining::update_minimizer().
|
noexcept |
Get the total number of features, i.e. the number of columns in the feature matrix.
Definition at line 48 of file data.cpp.
References m_Features.
Referenced by dismec::TrainingSpec::num_features(), dismec::prediction::PredictionBase::PredictionBase(), and dismec::io::save_xmc_dataset().
|
pure virtualnoexcept |
Gets the total number of different labels in the dataset. TODO call this num_classes instead?
Implemented in dismec::MultiLabelData, and dismec::BinaryData.
Referenced by dismec::prediction::FullPredictionTaskGenerator::FullPredictionTaskGenerator(), dismec::prediction::PredictionBase::PredictionBase(), and dismec::prediction::TopKPredictionTaskGenerator::TopKPredictionTaskGenerator().
|
virtual |
Gets the number of instances where label id
is absent (=-1) Throws std::out_of_bounds, if id is not in [0, num_labels())
.
Reimplemented in dismec::MultiLabelData.
Definition at line 17 of file data.cpp.
References num_examples(), and num_positives().
Referenced by anonymous_namespace{py_data.cpp}::num_negatives().
|
virtual |
Gets the number of instances where label id
is present (=+1) Throws std::out_of_bounds, if id is not in [0, num_labels())
.
Reimplemented in dismec::MultiLabelData.
Definition at line 13 of file data.cpp.
References get_labels().
Referenced by dismec::PropensityModel::get_propensity(), num_negatives(), anonymous_namespace{py_data.cpp}::num_positives(), dismec::CascadeTraining::update_minimizer(), and dismec::DiSMECTraining::update_minimizer().
|
default |
|
default |
|
protected |
Definition at line 60 of file data.h.
Referenced by edit_features(), get_features(), num_examples(), and num_features().