DiSMEC++
prediction.cpp
Go to the documentation of this file.
1 // Copyright (c) 2021, Aalto University, developed by Erik Schultheis
2 // All rights reserved.
3 //
4 // SPDX-License-Identifier: MIT
5 
6 #include "io/prediction.h"
7 #include "io/common.h"
8 #include "io/numpy.h"
9 #include <fstream>
10 
11 using namespace dismec;
13 
14 void prediction::save_sparse_predictions(const path& target_file,
15  const PredictionMatrix& values,
16  const IndexMatrix& indices) {
17  std::fstream file(target_file, std::fstream::out);
18  save_sparse_predictions(file, values, indices);
19 }
20 
21 void prediction::save_sparse_predictions(std::ostream& target,
22  const PredictionMatrix& values,
23  const IndexMatrix& indices) {
24  if(values.rows() != indices.rows()) {
25  throw std::invalid_argument(fmt::format("Inconsistent number of rows of values ({}) and indices ({}).",
26  values.rows(), indices.rows()));
27  }
28  if(values.cols() != indices.cols()) {
29  throw std::invalid_argument(fmt::format("Inconsistent number of columns of values ({}) and indices ({}).",
30  values.rows(), indices.rows()));
31  }
32 
33  long last_col = values.cols() - 1;
34 
35  // write the header
36  target << values.rows() << " " << values.cols() << "\n";
38  for(int row = 0; row < values.rows(); ++row) {
39  for(int col = 0; col < last_col; ++col) {
40  target << indices.coeff(row, col) << ":" << values.coeff(row, col) << " ";
41  }
42  target << indices.coeff(row, last_col) << ":" << values.coeff(row, last_col) << '\n';
43  }
44 }
45 
46 std::pair<IndexMatrix, PredictionMatrix> prediction::read_sparse_prediction(std::istream& source) {
47  std::string line_buffer;
48  long rows, cols;
49  {
50  if(!std::getline(source, line_buffer)) {
51  throw std::runtime_error("Error while reading header");
52  }
53  std::stringstream parsing(line_buffer);
54  parsing >> rows >> cols;
55  if(parsing.bad()) {
56  throw std::runtime_error("Error while parsing header");
57  }
58 
59  if(rows <= 0) {
60  throw std::runtime_error(fmt::format("Invalid number of rows {} specified.", rows));
61  }
62 
63  if(cols <= 0) {
64  throw std::runtime_error(fmt::format("Invalid number of columns {} specified.", cols));
65  }
66  }
67 
68  IndexMatrix indices(rows, cols);
69  PredictionMatrix values(rows, cols);
70  long current_row = 0;
71 
72  for(; current_row < rows; ++current_row) {
73  if(!std::getline(source, line_buffer)) {
74  throw std::runtime_error(fmt::format("Error while reading predictions for instance {}", current_row));
75  }
76  long k = 0;
77  parse_sparse_vector_from_text(line_buffer.c_str(), [&](long index, double value)
78  {
79  if(k >= cols) {
80  THROW_ERROR("Got more predictions than expected ({}) for instance {}", cols, current_row);
81  }
82  indices.coeffRef(current_row, k) = index;
83  values.coeffRef(current_row, k) = value;
84  ++k;
85  });
86  if(k != cols) {
87  THROW_ERROR("Expected {} columns, but got only {}", cols, k);
88  }
89  }
90 
91  if(current_row != rows) {
92  THROW_ERROR("Expected {} rows, but got only {}", rows, current_row);
93  }
94 
95  return {std::move(indices), std::move(values)};
96 }
97 
98 std::pair<IndexMatrix, PredictionMatrix> prediction::read_sparse_prediction(const path& source) {
99  std::fstream stream(source, std::fstream::in);
100  return read_sparse_prediction(stream);
101 }
102 
103 void prediction::save_dense_predictions_as_txt(const path& target, const PredictionMatrix & values) {
104  std::fstream file(target, std::fstream::out);
105  save_dense_predictions_as_txt(file, values);
106 }
107 
108 void prediction::save_dense_predictions_as_txt(std::ostream& target, const PredictionMatrix& values) {
109  target << values.rows() << " " << values.cols() << "\n";
110  for(int row = 0; row < values.rows(); ++row) {
111  io::write_vector_as_text(target, values.row(row)) << '\n';
112  }
113 }
114 void prediction::save_dense_predictions_as_npy(const path& target, const PredictionMatrix & values) {
115  std::fstream file(target, std::fstream::out);
116  save_dense_predictions_as_npy(file, values);
117 }
118 
119 void prediction::save_dense_predictions_as_npy(std::ostream& target, const PredictionMatrix& values) {
120  io::save_matrix_to_npy(target, values);
121 }
122 
123 #include "doctest.h"
124 
129 TEST_CASE("save_load_sparse_predictions")
130 {
131  IndexMatrix indices(2, 3);
132  PredictionMatrix values(2, 3);
133  indices.row(0) << 0, 2, 1;
134  indices.row(1) << 1, 31, 2;
135  values.row(0) << 0.5, 1.5, 0.9;
136  values.row(1) << 1.5, 0.9, 0.4;
137  std::string as_text =
138  "2 3\n"
139  "0:0.5 2:1.5 1:0.9\n"
140  "1:1.5 31:0.9 2:0.4\n";
141 
142  SUBCASE("save") {
143  std::stringstream target;
144  prediction::save_sparse_predictions(target, values, indices);
145  CHECK(target.str() == as_text);
146  }
147 
148  SUBCASE("load") {
149  std::stringstream source(as_text);
150  auto loaded = prediction::read_sparse_prediction(source);
151  CHECK(loaded.first == indices);
152  CHECK(loaded.second == values);
153  }
154  SUBCASE("load changed whitespace") {
155  std::stringstream source("2 3\t\n"
156  "0:0.5 2: 1.5 1:0.9\n"
157  "1:1.5\t31:0.9 2:0.4");
158  auto loaded = prediction::read_sparse_prediction(source);
159  CHECK(loaded.first == indices);
160  CHECK(loaded.second == values);
161  }
162 }
163 
168 TEST_CASE("save_sparse_predictions checking") {
169  IndexMatrix indices(2, 3);
170  std::stringstream target;
171  SUBCASE("mismatched rows") {
172  PredictionMatrix values(3, 3);
173  CHECK_THROWS(prediction::save_sparse_predictions(target, values, indices));
174  }
175  SUBCASE("mismatched columns") {
176  PredictionMatrix values(2, 2);
177  CHECK_THROWS(prediction::save_sparse_predictions(target, values, indices));
178  }
179 }
180 
184 TEST_CASE("read_sparse_prediction check") {
185  std::stringstream source;
186  SUBCASE("missing header") {
187  source.str("1:2.0 4:1.0");
188  CHECK_THROWS(prediction::read_sparse_prediction(source));
189  }
190  SUBCASE("invalid rows") {
191  source.str("-5 4\n");
192  CHECK_THROWS(prediction::read_sparse_prediction(source));
193  }
194  SUBCASE("invalid columns") {
195  source.str("2 0\n");
196  CHECK_THROWS(prediction::read_sparse_prediction(source));
197  }
198  SUBCASE("too many columns") {
199  source.str("1 2\n1:5.0 2:0.5 3:5.2");
200  CHECK_THROWS(prediction::read_sparse_prediction(source));
201  }
202  SUBCASE("too few columns") {
203  source.str("1 2\n1:5.0");
204  CHECK_THROWS(prediction::read_sparse_prediction(source));
205  }
206  SUBCASE("too few rows") {
207  source.str("2 2\n1:5.0 2:0.5");
208  CHECK_THROWS(prediction::read_sparse_prediction(source));
209  }
210  // too many rows is not really an error that we can diagnose at this point.
211  // if we are reading a file, we know it is wrong, but when reading from a
212  // stream there might just be other data following. Therefore, this isn't checked
213  // here
214 }
215 
216 
217 
221 TEST_CASE("save_dense_predictions")
222 {
223  PredictionMatrix values(2, 3);
224  values.row(0) << 0.5, 1.5, 0.9;
225  values.row(1) << 1.5, 0.9, 0.4;
226  std::string as_text =
227  "2 3\n"
228  "0.5 1.5 0.9\n"
229  "1.5 0.9 0.4\n";
230 
231  std::stringstream target;
233  CHECK(target.str() == as_text);
234 }
building blocks for io procedures that are used by multiple io subsystems
#define THROW_ERROR(...)
Definition: common.h:23
TEST_CASE("save_load_sparse_predictions")
Definition: prediction.cpp:129
void save_dense_predictions_as_txt(const path &target, const PredictionMatrix &values)
Saves predictions as a dense txt matrix.
void save_dense_predictions_as_npy(const path &target, const PredictionMatrix &values)
Saves predictions as a dense npy file.
std::pair< IndexMatrix, PredictionMatrix > read_sparse_prediction(std::istream &source)
Reads sparse predictions as saved by save_sparse_predictions().
void save_sparse_predictions(const path &target, const PredictionMatrix &values, const IndexMatrix &indices)
Saves sparse predictions as a text file.
std::ostream & write_vector_as_text(std::ostream &stream, const Eigen::Ref< const DenseRealVector > &data)
Writes the given vector as space-separated human-readable numbers.
Definition: common.cpp:21
void parse_sparse_vector_from_text(const char *feature_part, F &&callback)
parses sparse features given in index:value text format.
Definition: common.h:52
void save_matrix_to_npy(std::ostream &source, const types::DenseRowMajor< real_t > &)
Saves a matrix to a numpy array.
Main namespace in which all types, classes, and functions are defined.
Definition: app.h:15
types::DenseRowMajor< long > IndexMatrix
Matrix used for indices in sparse predictions.
Definition: matrix_types.h:81
types::DenseRowMajor< real_t > PredictionMatrix
Dense matrix in Row Major format used for predictions.
Definition: matrix_types.h:75