15 #define PY_PROPERTY(function) \
16 def_property(#function, [](const DatasetBase& pds){ return pds.function(); } , nullptr)
39 if(one_based_indexing) {
50 PyDataSet load_slice(
const std::filesystem::path& features_file,
const std::filesystem::path& labels_file) {
57 py::class_<DatasetBase, PyDataSet>(m,
"DataSet")
62 {
return std::make_shared<MultiLabelData>(std::move(features), std::move(
positives)); }),
63 py::kw_only(), py::arg(
"sparse_features"), py::arg(
"positives")
66 {
return std::make_shared<MultiLabelData>(std::move(features), std::move(
positives)); }),
67 py::kw_only(), py::arg(
"dense_features"), py::arg(
"positives")
70 .PY_PROPERTY(num_examples)
71 .PY_PROPERTY(num_labels)
74 .def(
"get_labels",
get_labels, py::arg(
"label_id"))
78 py::arg(
"sparse_features"))
81 py::arg(
"dense_features"));
85 py::arg(
"source_file"), py::kw_only(),
86 py::arg(
"one_based_index") =
false,
87 py::call_guard<py::gil_scoped_release>());
90 py::arg(
"file_name"), py::arg(
"dataset"),
91 py::kw_only(), py::arg(
"precision") = 4,
92 py::call_guard<py::gil_scoped_release>()
96 py::kw_only(),py::arg(
"features"), py::arg(
"labels"),
97 py::call_guard<py::gil_scoped_release>()
std::shared_ptr< T > wrap_shared(T &&source)
std::shared_ptr< dismec::DatasetBase > PyDataSet
virtual long num_negatives(label_id_t id) const
std::shared_ptr< const BinaryLabelVector > get_labels(label_id_t id) const
virtual long num_positives(label_id_t id) const
std::shared_ptr< const GenericFeatureMatrix > get_features() const
get a shared pointer to the (immutable) feature data
std::shared_ptr< GenericFeatureMatrix > edit_features()
get a shared pointer to mutable feature data. Use with care.
Strong typedef for an int to signify a label id.
auto num_positives(const DatasetBase &ds, long label)
auto get_features(const DatasetBase &ds)
auto get_labels(const DatasetBase &ds, long id)
auto set_features_sparse(DatasetBase &ds, SparseFeatures features)
void save_xmc(const std::filesystem::path &target_file, const DatasetBase &ds, int precision)
PyDataSet load_xmc(const std::filesystem::path &source_file, bool one_based_indexing)
auto num_negatives(const DatasetBase &ds, long label)
PyDataSet load_slice(const std::filesystem::path &features_file, const std::filesystem::path &labels_file)
auto set_features_dense(DatasetBase &ds, DenseFeatures features)
constexpr double precision(const ConfusionMatrixBase< T > &matrix)
constexpr T positives(const ConfusionMatrixBase< T > &matrix)
MultiLabelData read_xmc_dataset(const std::filesystem::path &source, IndexMode mode=IndexMode::ZERO_BASED)
Reads a dataset given in the extreme multilabel classification format.
MultiLabelData read_slice_dataset(std::istream &features, std::istream &labels)
reads a dataset given in slice format.
void save_xmc_dataset(std::ostream &target, const MultiLabelData &data)
Saves the given dataset in XMC format.
@ ONE_BASED
labels and feature indices are 1, 2, ..., num
@ ZERO_BASED
labels and feature indices are 0, 1, ..., num - 1
Main namespace in which all types, classes, and functions are defined.
types::DenseRowMajor< real_t > DenseFeatures
Dense Feature Matrix in Row Major format.
types::SparseRowMajor< real_t > SparseFeatures
Sparse Feature Matrix in Row Major format.
types::GenericMatrix< DenseFeatures, SparseFeatures > GenericFeatureMatrix
#define PY_PROPERTY(function)
void register_dataset(pybind11::module_ &m)