9 #include "spdlog/spdlog.h"
27 weight_callback(buffer);
40 read_callback(buffer);
56 throw std::runtime_error(
"Error while writing weights");
74 bool col_major =
false;
78 binary_dump(target, data.data(), data.data() + data.size());
87 if(info.DataType != data_type_string<real_t>()) {
88 THROW_ERROR(
"Mismatch in data type, got {} but expected {}", info.DataType, data_type_string<real_t>());
98 if(info.ColumnMajor) {
99 THROW_ERROR(
"Weight data is required to be in row-major format");
103 binary_load(source, data.data(), data.data() + data.size());
114 throw std::invalid_argument(
"Threshold cannot be negative");
120 for(
int j = 0; j < data.size(); ++j)
122 if(std::abs(data.coeff(j)) > threshold) {
123 target << j <<
':' << data.coeff(j) <<
' ';
131 if(nnz > 0.25 * entries) {
132 spdlog::warn(
"Saved model in sparse mode, but sparsity is only {}%. "
133 "Consider increasing the threshold or saving as dense data.",
134 100 - (100 * nnz) / entries);
136 spdlog::info(
"Saved model in sparse mode. Only {:2.2}% of weights exceeded threshold.",
double(100 * nnz) / entries);
141 Eigen::SparseVector<real_t> sparse_vec;
143 std::string line_buffer;
147 if(!std::getline(source, line_buffer)) {
148 THROW_ERROR(
"Input operation failed when trying to read weights for label {} out of {}",
151 sparse_vec.setZero();
154 if (index >= num_features || index < 0) {
155 THROW_ERROR(
"Encountered index {:5} with value {} for weights of label {:6}. Number of features "
156 "was specified as {}.", index, value, label.to_index(), num_features);
158 sparse_vec.insertBack(index) = value;
160 }
catch (
const std::exception& error) {
161 THROW_ERROR(
"Error while parsing weights for label {:6}: {}", label.to_index(), error.what());
172 using ::model::DenseModel;
173 using ::model::PartialModelSpec;
183 weights << 1, 0, 0, 2,
186 DenseModel model(std::make_shared<DenseModel::WeightMatrix>(weights), PartialModelSpec{
label_id_t{1}, 4, 6});
187 DenseModel reconstruct(2, PartialModelSpec{
label_id_t{1}, 4, 6});
188 std::stringstream target;
190 std::string expected_dense =
"1 0\n"
196 std::string expected_sparse =
"0:1 \n"
201 SUBCASE(
"save dense txt") {
203 std::string result = target.str();
204 CHECK(result == expected_dense);
207 SUBCASE(
"save sparse txt") {
209 std::string result = target.str();
210 CHECK(result == expected_sparse);
213 SUBCASE(
"load dense txt") {
214 target.str(expected_dense);
216 CHECK(model.get_raw_weights() == reconstruct.get_raw_weights());
219 SUBCASE(
"load sparse txt") {
220 target.str(expected_sparse);
222 CHECK(model.get_raw_weights() == reconstruct.get_raw_weights());
230 weights << 1, 0, 0, 2,
233 DenseModel model(std::make_shared<DenseModel::WeightMatrix>(weights), PartialModelSpec{
label_id_t{1}, 4, 6});
234 DenseModel reconstruct(2, PartialModelSpec{
label_id_t{1}, 4, 6});
236 std::stringbuf target;
238 target.pubseekpos(0);
242 CHECK(model.get_raw_weights() == reconstruct.get_raw_weights());
Strong typedef for an int to signify a label id.
A model combines a set of weight with some meta-information about these weights.
label_id_t labels_end() const noexcept
virtual long num_features() const =0
How many weights are in each weight vector, i.e. how many features should the input have.
long num_labels() const noexcept
How many labels are in the underlying dataset.
long contained_labels() const noexcept
How many labels are in this submodel.
void set_weights_for_label(label_id_t label, const WeightVectorIn &weights)
Sets the weights for a label.
void get_weights_for_label(label_id_t label, Eigen::Ref< DenseRealVector > target) const
Gets the weights for the given label as a dense vector.
label_id_t labels_begin() const noexcept
building blocks for io procedures that are used by multiple io subsystems
Eigen::Matrix< real_t, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor > WeightMatrix
void load_weights(Model &target, F &&read_callback)
Basic scaffold for loading weights.
void save_weights(const Model &model, F &&weight_callback)
Basic scaffold for saving weights.
namespace for all model-related io functions.
void load_sparse_weights_txt(std::istream &source, Model &target)
Loads sparse weights from plain-text format.
void save_dense_weights_npy(std::streambuf &target, const Model &model)
Saves the dense weights in a npy file.
void save_as_sparse_weights_txt(std::ostream &target, const Model &model, double threshold)
Saves the weights in sparse plain-text format, culling small weights.
void load_dense_weights_txt(std::istream &source, Model &target)
Loads weights saved by io::model::save_dense_weights_txt.
void save_dense_weights_txt(std::ostream &target, const Model &model)
Saves the dense weights in a plain-text format.
void load_dense_weights_npy(std::streambuf &target, Model &model)
Loads dense weights from a npy file.
std::ostream & write_vector_as_text(std::ostream &stream, const Eigen::Ref< const DenseRealVector > &data)
Writes the given vector as space-separated human-readable numbers.
std::istream & read_vector_from_text(std::istream &stream, Eigen::Ref< DenseRealVector > data)
Reads the given vector as space-separated human-readable numbers.
void binary_dump(std::streambuf &target, const T *begin, const T *end)
std::string make_npy_description(std::string_view dtype_desc, bool column_major, std::size_t size)
Creates a string with the data description dictionary for (1 dimensional) arrays.
void binary_load(std::streambuf &target, T *begin, T *end)
void write_npy_header(std::streambuf &target, std::string_view description)
Writes the header for a npy file.
void parse_sparse_vector_from_text(const char *feature_part, F &&callback)
parses sparse features given in index:value text format.
NpyHeaderData parse_npy_header(std::streambuf &source)
Parses the header of the npy file given by source.
Main namespace in which all types, classes, and functions are defined.
types::DenseVector< real_t > DenseRealVector
Any dense, real values vector.
TEST_CASE("save/load weights as plain text")