12 #include <spdlog/spdlog.h> 
   13 #include <spdlog/stopwatch.h> 
   24         std::string line_buffer;
 
   25         std::getline(features, line_buffer);
 
   29         for(
int row = 0; row < header.
NumRows; ++row) {
 
   37     spdlog::stopwatch timer;
 
   42     if(label_data.NumRows != feature_matrix.rows()) {
 
   43         THROW_ERROR(
"Mismatch between number of examples in feature file ({}) and in label file ({})",
 
   44                     feature_matrix.rows(), label_data.NumRows);
 
   47     spdlog::info(
"Finished loading dataset with {} examples in {:.3}s.", label_data.NumCols, timer);
 
   49     return MultiLabelData(std::move(feature_matrix), std::move(label_data.NonZeros));
 
   53     std::fstream features_file(features, std::fstream::in);
 
   54     if (!features_file.is_open()) {
 
   55         throw std::runtime_error(fmt::format(
"Cannot open input file {}", features.c_str()));
 
   57     std::fstream labels_file(labels, std::fstream::in);
 
   58     if (!labels_file.is_open()) {
 
   59         throw std::runtime_error(fmt::format(
"Cannot open input file {}", labels.c_str()));
 
   71     std::stringstream features;
 
   72     std::stringstream labels;
 
   75                  "1.0  2.5  -1.0  3.5  4.4\n" 
   76                  "-1.0 0.0   0.5  2.5  1.5\n" 
   77                  "0.0   5.4\t 3.4   2.5 1.6\n");
 
   87     auto df = ds.get_features()->dense();
 
   88     REQUIRE(df.rows() == 3);
 
   89     REQUIRE(df.cols() == 5);
 
   90     float true_features[] = {1.0, 2.5, -1.0, 3.5, 4.4, -1.0, 0.0, 0.5, 2.5, 1.5, 0.0, 5.4, 3.4, 2.5, 1.6};
 
   91     for(
int i = 0; i < df.size(); ++i) {
 
   92         CHECK(df.coeff(i) == true_features[i]);
 
   96     const auto& l0 = ds.get_label_instances(
label_id_t{0});
 
   97     REQUIRE(l0.size() == 2);
 
  101     const auto& l1 = ds.get_label_instances(
label_id_t{1});
 
  102     REQUIRE(l1.size() == 1);
 
  105     const auto& l2 = ds.get_label_instances(
label_id_t{2});
 
  106     REQUIRE(l2.size() == 1);
 
Strong typedef for an int to signify a label id.
building blocks for io procedures that are used by multiple io subsystems
DenseFeatures load_features(std::istream &features)
MultiLabelData read_slice_dataset(std::istream &features, std::istream &labels)
reads a dataset given in slice format.
std::istream & read_vector_from_text(std::istream &stream, Eigen::Ref< DenseRealVector > data)
Reads the given vector as space-separated human-readable numbers.
MatrixHeader parse_header(const std::string &content)
LoLBinarySparse read_binary_matrix_as_lol(std::istream &source)
bool is_npy(std::istream &target)
Check whether the stream is a npy file.
types::DenseRowMajor< real_t > load_matrix_from_npy(std::istream &source)
Loads a matrix from a numpy array.
Main namespace in which all types, classes, and functions are defined.
types::DenseRowMajor< real_t > DenseFeatures
Dense Feature Matrix in Row Major format.
TEST_CASE("small dataset")