Dataset#

class pandas::Dataset#

Utility class for pandas operations.

Example#

#include <pandas/pandas.h>
using namespace pandas;

// Use Dataset
Dataset obj;
// ... operations ...

Constructors#

Signature

Location

Example

Dataset(const std::map<std::string, DataVarVariant>& data_vars, const std::map<std::string, Coordinate>& coords = {}, const std::map<std::string, std::string>& attrs = {})

pd_xarray.h:467

Indexing / Selection#

Signature

Return Type

Location

Example

const std::map<std::string, std::string>& attrs() const

const std::map<std::string, std::string>&

pd_xarray.h:503

View

std::map<std::string, std::string>& attrs()

std::map<std::string, std::string>&

pd_xarray.h:504

View

Data Manipulation#

Signature

Return Type

Location

Example

void drop_var(const std::string& name)

void

pd_xarray.h:574

Statistics#

Signature

Return Type

Location

Example

std::vector<std::string> var_names() const

std::vector<std::string>

pd_xarray.h:514

View

I/O#

Signature

Return Type

Location

Example

std::string to_string() const

std::string

pd_xarray.h:625

View

Other Methods#

Signature

Return Type

Location

Example

void compute_dims()

void

pd_xarray.h:718

const Coordinate& coord(const std::string& name) const

const Coordinate&

pd_xarray.h:593

View

const std::map<std::string, Coordinate>& coords() const

const std::map<std::string, Coordinate>&

pd_xarray.h:492

View

std::map<std::string, Coordinate>& coords()

std::map<std::string, Coordinate>&

pd_xarray.h:493

View

const std::map<std::string, DataVarVariant>& data_vars() const

const std::map<std::string, DataVarVariant>&

pd_xarray.h:486

std::map<std::string, DataVarVariant>& data_vars()

std::map<std::string, DataVarVariant>&

pd_xarray.h:487

std::vector<std::string> dim_names() const

std::vector<std::string>

pd_xarray.h:526

const std::map<std::string, size_t>& dims() const

const std::map<std::string, size_t>&

pd_xarray.h:498

View

bool empty() const { return data_vars_.empty()

bool

pd_xarray.h:538

View

bool has_coord(const std::string& name) const

bool

pd_xarray.h:586

View

bool has_var(const std::string& name) const

bool

pd_xarray.h:547

View

size_t nvars() const { return data_vars_.size()

size_t

pd_xarray.h:509

View

void recompute_dims()

void

pd_xarray.h:614

void set_coord(const std::string& name, const Coordinate& coord)

void

pd_xarray.h:604

void set_var(const std::string& name, const DataArray<T>& var)

void

pd_xarray.h:566

Code Examples#

The following examples are extracted from the test suite.

attrs (pd_test_1_all.cpp:16361)
16351        // =====================================================================
16352        // Series Attrs Integration Tests
16353        // =====================================================================
16354
16355        void pd_test_ndframe_series_attrs() {
16356            std::cout << "========= series attrs integration =============================" << std::endl;
16357
16358            pandas::Series<double> s({1.0, 2.0, 3.0});
16359
16360            // Test setting attrs on Series
16361            s.attrs().set("source", std::string("test_data"));
16362            s.attrs().set("timestamp", 1234567890);
16363
16364            bool passed = s.attrs().get<std::string>("source") == "test_data";
16365            if (!passed) {
16366                std::cout << "  [FAIL] : in pd_test_ndframe_series_attrs() : set/get source" << std::endl;
16367                throw std::runtime_error("pd_test_ndframe_series_attrs failed: set/get source");
16368            }
16369
16370            passed = s.attrs().get<int>("timestamp") == 1234567890;
16371            if (!passed) {
attrs (pd_test_1_all.cpp:16361)
16351        // =====================================================================
16352        // Series Attrs Integration Tests
16353        // =====================================================================
16354
16355        void pd_test_ndframe_series_attrs() {
16356            std::cout << "========= series attrs integration =============================" << std::endl;
16357
16358            pandas::Series<double> s({1.0, 2.0, 3.0});
16359
16360            // Test setting attrs on Series
16361            s.attrs().set("source", std::string("test_data"));
16362            s.attrs().set("timestamp", 1234567890);
16363
16364            bool passed = s.attrs().get<std::string>("source") == "test_data";
16365            if (!passed) {
16366                std::cout << "  [FAIL] : in pd_test_ndframe_series_attrs() : set/get source" << std::endl;
16367                throw std::runtime_error("pd_test_ndframe_series_attrs failed: set/get source");
16368            }
16369
16370            passed = s.attrs().get<int>("timestamp") == 1234567890;
16371            if (!passed) {
var_names (pd_test_2_all.cpp:17312)
17302            pandas::DataFrame df(data);
17303            pandas::Dataset ds = df.to_xarray();
17304
17305            // Verify single variable
17306            bool passed = (ds.nvars() == 1 && ds.has_var("value"));
17307            if (!passed) {
17308                std::cout << "  [FAIL] : in pd_test_to_xarray_single_column() : expected single 'value' variable" << std::endl;
17309                throw std::runtime_error("pd_test_to_xarray_single_column failed");
17310            }
17311
17312            // Verify var_names()
17313            auto names = ds.var_names();
17314            passed = (names.size() == 1 && names[0] == "value");
17315            if (!passed) {
17316                std::cout << "  [FAIL] : in pd_test_to_xarray_single_column() : wrong var_names" << std::endl;
17317                throw std::runtime_error("pd_test_to_xarray_single_column failed: wrong var_names");
17318            }
17319
17320            std::cout << " -> tests passed" << std::endl;
17321        }
to_string (pd_test_1_all.cpp:2693)
2683        pandas::PeriodArray arr_m(std::vector<std::string>{
2684            "2020-01",
2685            "NaT",
2686            "2025-06"
2687        }, "M");
2688
2689        // Year
2690        auto years = arr_m.year();
2691        auto y0 = years[0];
2692        if (!y0.has_value() || y0.value() != 2020) {
2693            std::cout << "  [FAIL] : year[0] should be 2020, got " << (y0.has_value() ? std::to_string(y0.value()) : "NA") << std::endl;
2694            throw std::runtime_error("pd_test_period_array_year_month_quarter failed: year[0]");
2695        }
2696
2697        auto y1 = years[1];
2698        if (y1.has_value()) {
2699            std::cout << "  [FAIL] : year[1] should be NA (NaT)" << std::endl;
2700            throw std::runtime_error("pd_test_period_array_year_month_quarter failed: year[1] should be NA");
2701        }
2702
2703        auto y2 = years[2];
coord (pd_test_2_all.cpp:17025)
17015            // Create DataFrame with RangeIndex (default)
17016            std::map<std::string, std::vector<numpy::float64>> data = {
17017                {"val", {1.0, 2.0, 3.0}}
17018            };
17019
17020            pandas::DataFrame df(data);
17021            pandas::Dataset ds = df.to_xarray();
17022
17023            // Get index coordinate
17024            const pandas::Coordinate& idx_coord = ds.coord("index");
17025
17026            // Verify coordinate size
17027            bool passed = (idx_coord.size() == 3);
17028            if (!passed) {
17029                std::cout << "  [FAIL] : in pd_test_to_xarray_coordinate_values() : wrong coordinate size" << std::endl;
17030                throw std::runtime_error("pd_test_to_xarray_coordinate_values failed: wrong size");
17031            }
17032
17033            // Verify coordinate values (0, 1, 2 for RangeIndex)
17034            passed = (idx_coord.get_value_str(0) == "0" &&
coords (pd_test_3_all.cpp:14912)
14902/**
14903 * Test Series.to_xarray() coordinates
14904 */
14905void pd_test_series_to_xarray_coordinates() {
14906    std::cout << "========= Series.to_xarray() coordinates ===============";
14907
14908    pandas::Series<double> s({100.0, 200.0, 300.0}, "prices");
14909    pandas::DataArray<double> da = s.to_xarray();
14910
14911    // Verify coordinate exists
14912    auto& coords = da.coords();
14913    if (coords.find("index") == coords.end()) {
14914        std::cout << "  [FAIL] : should have 'index' coordinate" << std::endl;
14915        throw std::runtime_error("pd_test_series_to_xarray_coordinates failed: missing coord");
14916    }
14917
14918    // Verify coordinate size matches Series
14919    const pandas::Coordinate& coord = coords.at("index");
14920    if (coord.size() != 3) {
14921        std::cout << "  [FAIL] : coordinate size should be 3, got " << coord.size() << std::endl;
14922        throw std::runtime_error("pd_test_series_to_xarray_coordinates failed: coord size");
coords (pd_test_3_all.cpp:14912)
14902/**
14903 * Test Series.to_xarray() coordinates
14904 */
14905void pd_test_series_to_xarray_coordinates() {
14906    std::cout << "========= Series.to_xarray() coordinates ===============";
14907
14908    pandas::Series<double> s({100.0, 200.0, 300.0}, "prices");
14909    pandas::DataArray<double> da = s.to_xarray();
14910
14911    // Verify coordinate exists
14912    auto& coords = da.coords();
14913    if (coords.find("index") == coords.end()) {
14914        std::cout << "  [FAIL] : should have 'index' coordinate" << std::endl;
14915        throw std::runtime_error("pd_test_series_to_xarray_coordinates failed: missing coord");
14916    }
14917
14918    // Verify coordinate size matches Series
14919    const pandas::Coordinate& coord = coords.at("index");
14920    if (coord.size() != 3) {
14921        std::cout << "  [FAIL] : coordinate size should be 3, got " << coord.size() << std::endl;
14922        throw std::runtime_error("pd_test_series_to_xarray_coordinates failed: coord size");
dims (pd_test_2_all.cpp:16997)
16987            // Create DataFrame with 4 rows
16988            std::map<std::string, std::vector<numpy::int64>> data = {
16989                {"x", {10, 20, 30, 40}},
16990                {"y", {100, 200, 300, 400}}
16991            };
16992
16993            pandas::DataFrame df(data);
16994            pandas::Dataset ds = df.to_xarray();
16995
16996            // Verify dimensions
16997            auto dims = ds.dims();
16998            bool passed = (dims.size() == 1 && dims.count("index") == 1 && dims.at("index") == 4);
16999            if (!passed) {
17000                std::cout << "  [FAIL] : in pd_test_to_xarray_dimensions() : wrong dimensions" << std::endl;
17001                std::cout << "    Expected: index: 4" << std::endl;
17002                std::cout << "    Got: ";
17003                for (const auto& [name, size] : dims) {
17004                    std::cout << name << ": " << size << " ";
17005                }
17006                std::cout << std::endl;
17007                throw std::runtime_error("pd_test_to_xarray_dimensions failed");
empty (pd_test_1_all.cpp:941)
931#include "../pandas/pd_config.h"
932
933namespace dataframe_tests {
934
935namespace dataframe_tests_config {
936
937    void pd_test_config_version() {
938        std::cout << "========= df_config: version info ======================= ";
939        const char* version = pandas::DataFrameInfo::version();
940        if (version == nullptr || std::string(version).empty()) {
941            std::cout << "[FAIL] : in pd_test_config_version() : version is null or empty" << std::endl;
942            throw std::runtime_error("pd_test_config_version failed: version is null or empty");
943        }
944        std::cout << "-> tests passed" << std::endl;
945    }
946
947    void pd_test_config_na_repr() {
948        std::cout << "========= df_config: NA representation ======================= ";
949        const char* na_repr = pandas::DataFrameConfig::get_na_repr();
950        if (na_repr == nullptr) {
has_coord (pd_test_2_all.cpp:16975)
16965            }
16966
16967            // Verify variable names
16968            passed = ds.has_var("A") && ds.has_var("B");
16969            if (!passed) {
16970                std::cout << "  [FAIL] : in pd_test_to_xarray_basic() : missing variable A or B" << std::endl;
16971                throw std::runtime_error("pd_test_to_xarray_basic failed: missing variables");
16972            }
16973
16974            // Verify coordinate exists
16975            passed = ds.has_coord("index");
16976            if (!passed) {
16977                std::cout << "  [FAIL] : in pd_test_to_xarray_basic() : missing index coordinate" << std::endl;
16978                throw std::runtime_error("pd_test_to_xarray_basic failed: missing index coordinate");
16979            }
16980
16981            std::cout << " -> tests passed" << std::endl;
16982        }
16983
16984        void pd_test_to_xarray_dimensions() {
16985            std::cout << "========= to_xarray dimensions =========================";
has_var (pd_test_2_all.cpp:16968)
16958            // Verify Dataset has correct structure
16959            bool passed = (ds.nvars() == 2);
16960            if (!passed) {
16961                std::cout << "  [FAIL] : in pd_test_to_xarray_basic() : expected 2 variables, got "
16962                          << ds.nvars() << std::endl;
16963                throw std::runtime_error("pd_test_to_xarray_basic failed: wrong number of variables");
16964            }
16965
16966            // Verify variable names
16967            passed = ds.has_var("A") && ds.has_var("B");
16968            if (!passed) {
16969                std::cout << "  [FAIL] : in pd_test_to_xarray_basic() : missing variable A or B" << std::endl;
16970                throw std::runtime_error("pd_test_to_xarray_basic failed: missing variables");
16971            }
16972
16973            // Verify coordinate exists
16974            passed = ds.has_coord("index");
16975            if (!passed) {
16976                std::cout << "  [FAIL] : in pd_test_to_xarray_basic() : missing index coordinate" << std::endl;
16977                throw std::runtime_error("pd_test_to_xarray_basic failed: missing index coordinate");
nvars (pd_test_2_all.cpp:16960)
16950                {"A", {1.0, 2.0, 3.0}},
16951                {"B", {4.0, 5.0, 6.0}}
16952            };
16953
16954            pandas::DataFrame df(data);
16955
16956            // Convert to xarray Dataset
16957            pandas::Dataset ds = df.to_xarray();
16958
16959            // Verify Dataset has correct structure
16960            bool passed = (ds.nvars() == 2);
16961            if (!passed) {
16962                std::cout << "  [FAIL] : in pd_test_to_xarray_basic() : expected 2 variables, got "
16963                          << ds.nvars() << std::endl;
16964                throw std::runtime_error("pd_test_to_xarray_basic failed: wrong number of variables");
16965            }
16966
16967            // Verify variable names
16968            passed = ds.has_var("A") && ds.has_var("B");
16969            if (!passed) {
16970                std::cout << "  [FAIL] : in pd_test_to_xarray_basic() : missing variable A or B" << std::endl;