Dataset ======= .. cpp:class:: pandas::Dataset Utility class for pandas operations. Example ------- .. code-block:: cpp #include using namespace pandas; // Use Dataset Dataset obj; // ... operations ... Constructors ------------ .. list-table:: :widths: 55 25 20 :header-rows: 1 * - Signature - Location - Example * - ``Dataset(const std::map& data_vars, const std::map& coords = {}, const std::map& attrs = {})`` - pd_xarray.h:467 - Indexing / Selection -------------------- .. list-table:: :widths: 40 20 15 25 :header-rows: 1 * - Signature - Return Type - Location - Example * - ``const std::map& attrs() const`` - const std::map& - pd_xarray.h:503 - :ref:`View ` * - ``std::map& attrs()`` - std::map& - pd_xarray.h:504 - :ref:`View ` Data Manipulation ----------------- .. list-table:: :widths: 40 20 15 25 :header-rows: 1 * - Signature - Return Type - Location - Example * - ``void drop_var(const std::string& name)`` - void - pd_xarray.h:574 - Statistics ---------- .. list-table:: :widths: 40 20 15 25 :header-rows: 1 * - Signature - Return Type - Location - Example * - ``std::vector var_names() const`` - std::vector - pd_xarray.h:514 - :ref:`View ` I/O --- .. list-table:: :widths: 40 20 15 25 :header-rows: 1 * - Signature - Return Type - Location - Example * - ``std::string to_string() const`` - std::string - pd_xarray.h:625 - :ref:`View ` Other Methods ------------- .. list-table:: :widths: 40 20 15 25 :header-rows: 1 * - Signature - Return Type - Location - Example * - ``void compute_dims()`` - void - pd_xarray.h:718 - * - ``const Coordinate& coord(const std::string& name) const`` - const Coordinate& - pd_xarray.h:593 - :ref:`View ` * - ``const std::map& coords() const`` - const std::map& - pd_xarray.h:492 - :ref:`View ` * - ``std::map& coords()`` - std::map& - pd_xarray.h:493 - :ref:`View ` * - ``const std::map& data_vars() const`` - const std::map& - pd_xarray.h:486 - * - ``std::map& data_vars()`` - std::map& - pd_xarray.h:487 - * - ``std::vector dim_names() const`` - std::vector - pd_xarray.h:526 - * - ``const std::map& dims() const`` - const std::map& - pd_xarray.h:498 - :ref:`View ` * - ``bool empty() const { return data_vars_.empty()`` - bool - pd_xarray.h:538 - :ref:`View ` * - ``bool has_coord(const std::string& name) const`` - bool - pd_xarray.h:586 - :ref:`View ` * - ``bool has_var(const std::string& name) const`` - bool - pd_xarray.h:547 - :ref:`View ` * - ``size_t nvars() const { return data_vars_.size()`` - size_t - pd_xarray.h:509 - :ref:`View ` * - ``void recompute_dims()`` - void - pd_xarray.h:614 - * - ``void set_coord(const std::string& name, const Coordinate& coord)`` - void - pd_xarray.h:604 - * - ``void set_var(const std::string& name, const DataArray& var)`` - void - pd_xarray.h:566 - Code Examples ------------- The following examples are extracted from the test suite. .. _example-dataset-attrs-0: .. dropdown:: attrs (pd_test_1_all.cpp:16361) :class-title: example-dropdown .. code-block:: cpp :linenos: :lineno-start: 16351 :emphasize-lines: 11 // ===================================================================== // Series Attrs Integration Tests // ===================================================================== void pd_test_ndframe_series_attrs() { std::cout << "========= series attrs integration =============================" << std::endl; pandas::Series s({1.0, 2.0, 3.0}); // Test setting attrs on Series s.attrs().set("source", std::string("test_data")); s.attrs().set("timestamp", 1234567890); bool passed = s.attrs().get("source") == "test_data"; if (!passed) { std::cout << " [FAIL] : in pd_test_ndframe_series_attrs() : set/get source" << std::endl; throw std::runtime_error("pd_test_ndframe_series_attrs failed: set/get source"); } passed = s.attrs().get("timestamp") == 1234567890; if (!passed) { .. _example-dataset-attrs-1: .. dropdown:: attrs (pd_test_1_all.cpp:16361) :class-title: example-dropdown .. code-block:: cpp :linenos: :lineno-start: 16351 :emphasize-lines: 11 // ===================================================================== // Series Attrs Integration Tests // ===================================================================== void pd_test_ndframe_series_attrs() { std::cout << "========= series attrs integration =============================" << std::endl; pandas::Series s({1.0, 2.0, 3.0}); // Test setting attrs on Series s.attrs().set("source", std::string("test_data")); s.attrs().set("timestamp", 1234567890); bool passed = s.attrs().get("source") == "test_data"; if (!passed) { std::cout << " [FAIL] : in pd_test_ndframe_series_attrs() : set/get source" << std::endl; throw std::runtime_error("pd_test_ndframe_series_attrs failed: set/get source"); } passed = s.attrs().get("timestamp") == 1234567890; if (!passed) { .. _example-dataset-var_names-2: .. dropdown:: var_names (pd_test_2_all.cpp:17312) :class-title: example-dropdown .. code-block:: cpp :linenos: :lineno-start: 17302 :emphasize-lines: 11 pandas::DataFrame df(data); pandas::Dataset ds = df.to_xarray(); // Verify single variable bool passed = (ds.nvars() == 1 && ds.has_var("value")); if (!passed) { std::cout << " [FAIL] : in pd_test_to_xarray_single_column() : expected single 'value' variable" << std::endl; throw std::runtime_error("pd_test_to_xarray_single_column failed"); } // Verify var_names() auto names = ds.var_names(); passed = (names.size() == 1 && names[0] == "value"); if (!passed) { std::cout << " [FAIL] : in pd_test_to_xarray_single_column() : wrong var_names" << std::endl; throw std::runtime_error("pd_test_to_xarray_single_column failed: wrong var_names"); } std::cout << " -> tests passed" << std::endl; } .. _example-dataset-to_string-3: .. dropdown:: to_string (pd_test_1_all.cpp:2693) :class-title: example-dropdown .. code-block:: cpp :linenos: :lineno-start: 2683 :emphasize-lines: 11 pandas::PeriodArray arr_m(std::vector{ "2020-01", "NaT", "2025-06" }, "M"); // Year auto years = arr_m.year(); auto y0 = years[0]; if (!y0.has_value() || y0.value() != 2020) { std::cout << " [FAIL] : year[0] should be 2020, got " << (y0.has_value() ? std::to_string(y0.value()) : "NA") << std::endl; throw std::runtime_error("pd_test_period_array_year_month_quarter failed: year[0]"); } auto y1 = years[1]; if (y1.has_value()) { std::cout << " [FAIL] : year[1] should be NA (NaT)" << std::endl; throw std::runtime_error("pd_test_period_array_year_month_quarter failed: year[1] should be NA"); } auto y2 = years[2]; .. _example-dataset-coord-4: .. dropdown:: coord (pd_test_2_all.cpp:17025) :class-title: example-dropdown .. code-block:: cpp :linenos: :lineno-start: 17015 :emphasize-lines: 11 // Create DataFrame with RangeIndex (default) std::map> data = { {"val", {1.0, 2.0, 3.0}} }; pandas::DataFrame df(data); pandas::Dataset ds = df.to_xarray(); // Get index coordinate const pandas::Coordinate& idx_coord = ds.coord("index"); // Verify coordinate size bool passed = (idx_coord.size() == 3); if (!passed) { std::cout << " [FAIL] : in pd_test_to_xarray_coordinate_values() : wrong coordinate size" << std::endl; throw std::runtime_error("pd_test_to_xarray_coordinate_values failed: wrong size"); } // Verify coordinate values (0, 1, 2 for RangeIndex) passed = (idx_coord.get_value_str(0) == "0" && .. _example-dataset-coords-5: .. dropdown:: coords (pd_test_3_all.cpp:14912) :class-title: example-dropdown .. code-block:: cpp :linenos: :lineno-start: 14902 :emphasize-lines: 11 /** * Test Series.to_xarray() coordinates */ void pd_test_series_to_xarray_coordinates() { std::cout << "========= Series.to_xarray() coordinates ==============="; pandas::Series s({100.0, 200.0, 300.0}, "prices"); pandas::DataArray da = s.to_xarray(); // Verify coordinate exists auto& coords = da.coords(); if (coords.find("index") == coords.end()) { std::cout << " [FAIL] : should have 'index' coordinate" << std::endl; throw std::runtime_error("pd_test_series_to_xarray_coordinates failed: missing coord"); } // Verify coordinate size matches Series const pandas::Coordinate& coord = coords.at("index"); if (coord.size() != 3) { std::cout << " [FAIL] : coordinate size should be 3, got " << coord.size() << std::endl; throw std::runtime_error("pd_test_series_to_xarray_coordinates failed: coord size"); .. _example-dataset-coords-6: .. dropdown:: coords (pd_test_3_all.cpp:14912) :class-title: example-dropdown .. code-block:: cpp :linenos: :lineno-start: 14902 :emphasize-lines: 11 /** * Test Series.to_xarray() coordinates */ void pd_test_series_to_xarray_coordinates() { std::cout << "========= Series.to_xarray() coordinates ==============="; pandas::Series s({100.0, 200.0, 300.0}, "prices"); pandas::DataArray da = s.to_xarray(); // Verify coordinate exists auto& coords = da.coords(); if (coords.find("index") == coords.end()) { std::cout << " [FAIL] : should have 'index' coordinate" << std::endl; throw std::runtime_error("pd_test_series_to_xarray_coordinates failed: missing coord"); } // Verify coordinate size matches Series const pandas::Coordinate& coord = coords.at("index"); if (coord.size() != 3) { std::cout << " [FAIL] : coordinate size should be 3, got " << coord.size() << std::endl; throw std::runtime_error("pd_test_series_to_xarray_coordinates failed: coord size"); .. _example-dataset-dims-7: .. dropdown:: dims (pd_test_2_all.cpp:16997) :class-title: example-dropdown .. code-block:: cpp :linenos: :lineno-start: 16987 :emphasize-lines: 11 // Create DataFrame with 4 rows std::map> data = { {"x", {10, 20, 30, 40}}, {"y", {100, 200, 300, 400}} }; pandas::DataFrame df(data); pandas::Dataset ds = df.to_xarray(); // Verify dimensions auto dims = ds.dims(); bool passed = (dims.size() == 1 && dims.count("index") == 1 && dims.at("index") == 4); if (!passed) { std::cout << " [FAIL] : in pd_test_to_xarray_dimensions() : wrong dimensions" << std::endl; std::cout << " Expected: index: 4" << std::endl; std::cout << " Got: "; for (const auto& [name, size] : dims) { std::cout << name << ": " << size << " "; } std::cout << std::endl; throw std::runtime_error("pd_test_to_xarray_dimensions failed"); .. _example-dataset-empty-8: .. dropdown:: empty (pd_test_1_all.cpp:941) :class-title: example-dropdown .. code-block:: cpp :linenos: :lineno-start: 931 :emphasize-lines: 11 #include "../pandas/pd_config.h" namespace dataframe_tests { namespace dataframe_tests_config { void pd_test_config_version() { std::cout << "========= df_config: version info ======================= "; const char* version = pandas::DataFrameInfo::version(); if (version == nullptr || std::string(version).empty()) { std::cout << "[FAIL] : in pd_test_config_version() : version is null or empty" << std::endl; throw std::runtime_error("pd_test_config_version failed: version is null or empty"); } std::cout << "-> tests passed" << std::endl; } void pd_test_config_na_repr() { std::cout << "========= df_config: NA representation ======================= "; const char* na_repr = pandas::DataFrameConfig::get_na_repr(); if (na_repr == nullptr) { .. _example-dataset-has_coord-9: .. dropdown:: has_coord (pd_test_2_all.cpp:16975) :class-title: example-dropdown .. code-block:: cpp :linenos: :lineno-start: 16965 :emphasize-lines: 11 } // Verify variable names passed = ds.has_var("A") && ds.has_var("B"); if (!passed) { std::cout << " [FAIL] : in pd_test_to_xarray_basic() : missing variable A or B" << std::endl; throw std::runtime_error("pd_test_to_xarray_basic failed: missing variables"); } // Verify coordinate exists passed = ds.has_coord("index"); if (!passed) { std::cout << " [FAIL] : in pd_test_to_xarray_basic() : missing index coordinate" << std::endl; throw std::runtime_error("pd_test_to_xarray_basic failed: missing index coordinate"); } std::cout << " -> tests passed" << std::endl; } void pd_test_to_xarray_dimensions() { std::cout << "========= to_xarray dimensions ========================="; .. _example-dataset-has_var-10: .. dropdown:: has_var (pd_test_2_all.cpp:16968) :class-title: example-dropdown .. code-block:: cpp :linenos: :lineno-start: 16958 :emphasize-lines: 11 // Verify Dataset has correct structure bool passed = (ds.nvars() == 2); if (!passed) { std::cout << " [FAIL] : in pd_test_to_xarray_basic() : expected 2 variables, got " << ds.nvars() << std::endl; throw std::runtime_error("pd_test_to_xarray_basic failed: wrong number of variables"); } // Verify variable names passed = ds.has_var("A") && ds.has_var("B"); if (!passed) { std::cout << " [FAIL] : in pd_test_to_xarray_basic() : missing variable A or B" << std::endl; throw std::runtime_error("pd_test_to_xarray_basic failed: missing variables"); } // Verify coordinate exists passed = ds.has_coord("index"); if (!passed) { std::cout << " [FAIL] : in pd_test_to_xarray_basic() : missing index coordinate" << std::endl; throw std::runtime_error("pd_test_to_xarray_basic failed: missing index coordinate"); .. _example-dataset-nvars-11: .. dropdown:: nvars (pd_test_2_all.cpp:16960) :class-title: example-dropdown .. code-block:: cpp :linenos: :lineno-start: 16950 :emphasize-lines: 11 {"A", {1.0, 2.0, 3.0}}, {"B", {4.0, 5.0, 6.0}} }; pandas::DataFrame df(data); // Convert to xarray Dataset pandas::Dataset ds = df.to_xarray(); // Verify Dataset has correct structure bool passed = (ds.nvars() == 2); if (!passed) { std::cout << " [FAIL] : in pd_test_to_xarray_basic() : expected 2 variables, got " << ds.nvars() << std::endl; throw std::runtime_error("pd_test_to_xarray_basic failed: wrong number of variables"); } // Verify variable names passed = ds.has_var("A") && ds.has_var("B"); if (!passed) { std::cout << " [FAIL] : in pd_test_to_xarray_basic() : missing variable A or B" << std::endl;