DiSMEC++
common.h
Go to the documentation of this file.
1 // Copyright (c) 2021, Aalto University, developed by Erik Schultheis
2 // All rights reserved.
3 //
4 // SPDX-License-Identifier: MIT
5 
6 #ifndef DISMEC_COMMON_H
7 #define DISMEC_COMMON_H
8 
9 #include <stdexcept>
10 #include "matrix_types.h"
11 #include "spdlog/fmt/fmt.h"
12 #include "utils/throw_error.h"
13 #include "utils/conversion.h"
14 
22 // NOLINTNEXTLINE(cppcoreguidelines-macro-usage)
23 #define THROW_ERROR(...) THROW_EXCEPTION(std::runtime_error, __VA_ARGS__)
24 
25 namespace dismec::io {
26  namespace detail {
29  std::string print_char(char c);
30  }
31 
34  inline long parse_long(const char* string, const char** out) {
35  char* out_ptr = nullptr;
36  long result = std::strtol(string, &out_ptr, 10); // NOLINT(cppcoreguidelines-avoid-magic-numbers)
37  *out = out_ptr;
38  return result;
39  }
40 
51  template<class F>
52  void parse_sparse_vector_from_text(const char* feature_part, F&& callback) {
53  const char* start = feature_part;
54  while(*feature_part) {
55  const char* result = nullptr;
56  errno = 0;
57  long index = parse_long(feature_part, &result);
58  if (result == feature_part) {
59  // parsing failed -- either, wrong format, or we have reached some trailing spaces
60  // we verify this here explicitly, again using IELF to keep this out of the hot code path
61  bool is_error = [&](){
62  for(const char* scan = feature_part; *scan; ++scan) {
63  if(!std::isspace(*scan)) {
64  return true;
65  }
66  }
67  return false;
68  }();
69  if(!is_error) {
70  return;
71  }
72  THROW_ERROR("Error parsing feature. Missing feature index.");
73  } else if(*result != ':') {
74  THROW_ERROR("Error parsing feature index. Expected ':' at position {}, got '{}'", (result - start), detail::print_char(*result));
75  } else if(errno != 0) {
76  THROW_ERROR("Error parsing feature index. Errno={}: '{}'", errno, strerror(errno));
77  }
78 
79  errno = 0;
80  char* after_feature = nullptr;
81  double value = std::strtod(result+1, &after_feature);
82  if(result + 1 == after_feature) {
83  THROW_ERROR("Error parsing feature: Missing feature value.");
84  } else if(errno != 0) {
85  THROW_ERROR("Error parsing feature value. Errno={}: '{}'", errno, strerror(errno));
86  }
87 
88  feature_part = after_feature;
89  callback(index, value);
90  }
91  }
92 
98  std::ostream& write_vector_as_text(std::ostream& stream, const Eigen::Ref<const DenseRealVector>& data);
99 
106  std::istream& read_vector_from_text(std::istream& stream, Eigen::Ref<DenseRealVector> data);
107 
108  // binary read and write functions
109  template<class T>
110  void binary_dump(std::streambuf& target, const T* begin, const T* end) {
111  static_assert(std::is_pod_v<T>, "Can only binary dump POD types");
112  std::streamsize num_bytes = (end - begin) * ssizeof<T>;
113  auto wrote = target.sputn(reinterpret_cast<const char*>(begin), num_bytes);
114  if(num_bytes != wrote) {
115  THROW_ERROR("Expected to write {} bytes, but wrote only {}", num_bytes, wrote);
116  }
117  }
118 
119  template<class T>
120  void binary_load(std::streambuf& target, T* begin, T* end) {
121  static_assert(std::is_pod_v<T>, "Can only binary load POD types");
122  std::streamsize num_bytes = (end - begin) * ssizeof<T>;
123  auto read = target.sgetn(reinterpret_cast<char*>(begin), num_bytes);
124  if(num_bytes != read) {
125  THROW_ERROR("Expected to read {} bytes, but got only {}", num_bytes, read);
126  }
127  }
128 
130  struct MatrixHeader {
131  long NumRows;
132  long NumCols;
133  };
134 
137  MatrixHeader parse_header(const std::string& content);
138 
144  long NumRows;
145  long NumCols;
146  std::vector<std::vector<long>> NonZeros;
147  };
148 
151  LoLBinarySparse read_binary_matrix_as_lol(std::istream& source);
152 }
153 
154 #endif //DISMEC_COMMON_H
#define THROW_ERROR(...)
Definition: common.h:23
std::string print_char(char c)
Definition: common.cpp:10
std::ostream & write_vector_as_text(std::ostream &stream, const Eigen::Ref< const DenseRealVector > &data)
Writes the given vector as space-separated human-readable numbers.
Definition: common.cpp:21
std::istream & read_vector_from_text(std::istream &stream, Eigen::Ref< DenseRealVector > data)
Reads the given vector as space-separated human-readable numbers.
Definition: common.cpp:37
void binary_dump(std::streambuf &target, const T *begin, const T *end)
Definition: common.h:110
MatrixHeader parse_header(const std::string &content)
Definition: common.cpp:49
void binary_load(std::streambuf &target, T *begin, T *end)
Definition: common.h:120
LoLBinarySparse read_binary_matrix_as_lol(std::istream &source)
Definition: common.cpp:76
long parse_long(const char *string, const char **out)
Definition: common.h:34
void parse_sparse_vector_from_text(const char *feature_part, F &&callback)
parses sparse features given in index:value text format.
Definition: common.h:52
Binary Sparse Matrix in List-of-Lists format.
Definition: common.h:143
std::vector< std::vector< long > > NonZeros
Definition: common.h:146
Collects the rows and columns parsed from a plain-text matrix file.
Definition: common.h:130