12 #include <spdlog/spdlog.h>
13 #include <spdlog/stopwatch.h>
24 std::string line_buffer;
25 std::getline(features, line_buffer);
29 for(
int row = 0; row < header.
NumRows; ++row) {
37 spdlog::stopwatch timer;
42 if(label_data.NumRows != feature_matrix.rows()) {
43 THROW_ERROR(
"Mismatch between number of examples in feature file ({}) and in label file ({})",
44 feature_matrix.rows(), label_data.NumRows);
47 spdlog::info(
"Finished loading dataset with {} examples in {:.3}s.", label_data.NumCols, timer);
49 return MultiLabelData(std::move(feature_matrix), std::move(label_data.NonZeros));
53 std::fstream features_file(features, std::fstream::in);
54 if (!features_file.is_open()) {
55 throw std::runtime_error(fmt::format(
"Cannot open input file {}", features.c_str()));
57 std::fstream labels_file(labels, std::fstream::in);
58 if (!labels_file.is_open()) {
59 throw std::runtime_error(fmt::format(
"Cannot open input file {}", labels.c_str()));
71 std::stringstream features;
72 std::stringstream labels;
75 "1.0 2.5 -1.0 3.5 4.4\n"
76 "-1.0 0.0 0.5 2.5 1.5\n"
77 "0.0 5.4\t 3.4 2.5 1.6\n");
87 auto df = ds.get_features()->dense();
88 REQUIRE(df.rows() == 3);
89 REQUIRE(df.cols() == 5);
90 float true_features[] = {1.0, 2.5, -1.0, 3.5, 4.4, -1.0, 0.0, 0.5, 2.5, 1.5, 0.0, 5.4, 3.4, 2.5, 1.6};
91 for(
int i = 0; i < df.size(); ++i) {
92 CHECK(df.coeff(i) == true_features[i]);
96 const auto& l0 = ds.get_label_instances(
label_id_t{0});
97 REQUIRE(l0.size() == 2);
101 const auto& l1 = ds.get_label_instances(
label_id_t{1});
102 REQUIRE(l1.size() == 1);
105 const auto& l2 = ds.get_label_instances(
label_id_t{2});
106 REQUIRE(l2.size() == 1);
Strong typedef for an int to signify a label id.
building blocks for io procedures that are used by multiple io subsystems
DenseFeatures load_features(std::istream &features)
MultiLabelData read_slice_dataset(std::istream &features, std::istream &labels)
reads a dataset given in slice format.
std::istream & read_vector_from_text(std::istream &stream, Eigen::Ref< DenseRealVector > data)
Reads the given vector as space-separated human-readable numbers.
MatrixHeader parse_header(const std::string &content)
LoLBinarySparse read_binary_matrix_as_lol(std::istream &source)
bool is_npy(std::istream &target)
Check whether the stream is a npy file.
types::DenseRowMajor< real_t > load_matrix_from_npy(std::istream &source)
Loads a matrix from a numpy array.
Main namespace in which all types, classes, and functions are defined.
types::DenseRowMajor< real_t > DenseFeatures
Dense Feature Matrix in Row Major format.
TEST_CASE("small dataset")