Dataset#
-
class pandas::Dataset#
Utility class for pandas operations.
Example#
#include <pandas/pandas.h>
using namespace pandas;
// Use Dataset
Dataset obj;
// ... operations ...
Constructors#
Signature |
Location |
Example |
|---|---|---|
|
pd_xarray.h:467 |
Indexing / Selection#
Data Manipulation#
Signature |
Return Type |
Location |
Example |
|---|---|---|---|
|
void |
pd_xarray.h:574 |
Statistics#
Signature |
Return Type |
Location |
Example |
|---|---|---|---|
|
std::vector<std::string> |
pd_xarray.h:514 |
I/O#
Signature |
Return Type |
Location |
Example |
|---|---|---|---|
|
std::string |
pd_xarray.h:625 |
Other Methods#
Signature |
Return Type |
Location |
Example |
|---|---|---|---|
|
void |
pd_xarray.h:718 |
|
|
const Coordinate& |
pd_xarray.h:593 |
|
|
const std::map<std::string, Coordinate>& |
pd_xarray.h:492 |
|
|
std::map<std::string, Coordinate>& |
pd_xarray.h:493 |
|
|
const std::map<std::string, DataVarVariant>& |
pd_xarray.h:486 |
|
|
std::map<std::string, DataVarVariant>& |
pd_xarray.h:487 |
|
|
std::vector<std::string> |
pd_xarray.h:526 |
|
|
const std::map<std::string, size_t>& |
pd_xarray.h:498 |
|
|
bool |
pd_xarray.h:538 |
|
|
bool |
pd_xarray.h:586 |
|
|
bool |
pd_xarray.h:547 |
|
|
size_t |
pd_xarray.h:509 |
|
|
void |
pd_xarray.h:614 |
|
|
void |
pd_xarray.h:604 |
|
|
void |
pd_xarray.h:566 |
Code Examples#
The following examples are extracted from the test suite.
attrs (pd_test_1_all.cpp:16361)
16351 // =====================================================================
16352 // Series Attrs Integration Tests
16353 // =====================================================================
16354
16355 void pd_test_ndframe_series_attrs() {
16356 std::cout << "========= series attrs integration =============================" << std::endl;
16357
16358 pandas::Series<double> s({1.0, 2.0, 3.0});
16359
16360 // Test setting attrs on Series
16361 s.attrs().set("source", std::string("test_data"));
16362 s.attrs().set("timestamp", 1234567890);
16363
16364 bool passed = s.attrs().get<std::string>("source") == "test_data";
16365 if (!passed) {
16366 std::cout << " [FAIL] : in pd_test_ndframe_series_attrs() : set/get source" << std::endl;
16367 throw std::runtime_error("pd_test_ndframe_series_attrs failed: set/get source");
16368 }
16369
16370 passed = s.attrs().get<int>("timestamp") == 1234567890;
16371 if (!passed) {
attrs (pd_test_1_all.cpp:16361)
16351 // =====================================================================
16352 // Series Attrs Integration Tests
16353 // =====================================================================
16354
16355 void pd_test_ndframe_series_attrs() {
16356 std::cout << "========= series attrs integration =============================" << std::endl;
16357
16358 pandas::Series<double> s({1.0, 2.0, 3.0});
16359
16360 // Test setting attrs on Series
16361 s.attrs().set("source", std::string("test_data"));
16362 s.attrs().set("timestamp", 1234567890);
16363
16364 bool passed = s.attrs().get<std::string>("source") == "test_data";
16365 if (!passed) {
16366 std::cout << " [FAIL] : in pd_test_ndframe_series_attrs() : set/get source" << std::endl;
16367 throw std::runtime_error("pd_test_ndframe_series_attrs failed: set/get source");
16368 }
16369
16370 passed = s.attrs().get<int>("timestamp") == 1234567890;
16371 if (!passed) {
var_names (pd_test_2_all.cpp:17312)
17302 pandas::DataFrame df(data);
17303 pandas::Dataset ds = df.to_xarray();
17304
17305 // Verify single variable
17306 bool passed = (ds.nvars() == 1 && ds.has_var("value"));
17307 if (!passed) {
17308 std::cout << " [FAIL] : in pd_test_to_xarray_single_column() : expected single 'value' variable" << std::endl;
17309 throw std::runtime_error("pd_test_to_xarray_single_column failed");
17310 }
17311
17312 // Verify var_names()
17313 auto names = ds.var_names();
17314 passed = (names.size() == 1 && names[0] == "value");
17315 if (!passed) {
17316 std::cout << " [FAIL] : in pd_test_to_xarray_single_column() : wrong var_names" << std::endl;
17317 throw std::runtime_error("pd_test_to_xarray_single_column failed: wrong var_names");
17318 }
17319
17320 std::cout << " -> tests passed" << std::endl;
17321 }
to_string (pd_test_1_all.cpp:2693)
2683 pandas::PeriodArray arr_m(std::vector<std::string>{
2684 "2020-01",
2685 "NaT",
2686 "2025-06"
2687 }, "M");
2688
2689 // Year
2690 auto years = arr_m.year();
2691 auto y0 = years[0];
2692 if (!y0.has_value() || y0.value() != 2020) {
2693 std::cout << " [FAIL] : year[0] should be 2020, got " << (y0.has_value() ? std::to_string(y0.value()) : "NA") << std::endl;
2694 throw std::runtime_error("pd_test_period_array_year_month_quarter failed: year[0]");
2695 }
2696
2697 auto y1 = years[1];
2698 if (y1.has_value()) {
2699 std::cout << " [FAIL] : year[1] should be NA (NaT)" << std::endl;
2700 throw std::runtime_error("pd_test_period_array_year_month_quarter failed: year[1] should be NA");
2701 }
2702
2703 auto y2 = years[2];
coord (pd_test_2_all.cpp:17025)
17015 // Create DataFrame with RangeIndex (default)
17016 std::map<std::string, std::vector<numpy::float64>> data = {
17017 {"val", {1.0, 2.0, 3.0}}
17018 };
17019
17020 pandas::DataFrame df(data);
17021 pandas::Dataset ds = df.to_xarray();
17022
17023 // Get index coordinate
17024 const pandas::Coordinate& idx_coord = ds.coord("index");
17025
17026 // Verify coordinate size
17027 bool passed = (idx_coord.size() == 3);
17028 if (!passed) {
17029 std::cout << " [FAIL] : in pd_test_to_xarray_coordinate_values() : wrong coordinate size" << std::endl;
17030 throw std::runtime_error("pd_test_to_xarray_coordinate_values failed: wrong size");
17031 }
17032
17033 // Verify coordinate values (0, 1, 2 for RangeIndex)
17034 passed = (idx_coord.get_value_str(0) == "0" &&
coords (pd_test_3_all.cpp:14912)
14902/**
14903 * Test Series.to_xarray() coordinates
14904 */
14905void pd_test_series_to_xarray_coordinates() {
14906 std::cout << "========= Series.to_xarray() coordinates ===============";
14907
14908 pandas::Series<double> s({100.0, 200.0, 300.0}, "prices");
14909 pandas::DataArray<double> da = s.to_xarray();
14910
14911 // Verify coordinate exists
14912 auto& coords = da.coords();
14913 if (coords.find("index") == coords.end()) {
14914 std::cout << " [FAIL] : should have 'index' coordinate" << std::endl;
14915 throw std::runtime_error("pd_test_series_to_xarray_coordinates failed: missing coord");
14916 }
14917
14918 // Verify coordinate size matches Series
14919 const pandas::Coordinate& coord = coords.at("index");
14920 if (coord.size() != 3) {
14921 std::cout << " [FAIL] : coordinate size should be 3, got " << coord.size() << std::endl;
14922 throw std::runtime_error("pd_test_series_to_xarray_coordinates failed: coord size");
coords (pd_test_3_all.cpp:14912)
14902/**
14903 * Test Series.to_xarray() coordinates
14904 */
14905void pd_test_series_to_xarray_coordinates() {
14906 std::cout << "========= Series.to_xarray() coordinates ===============";
14907
14908 pandas::Series<double> s({100.0, 200.0, 300.0}, "prices");
14909 pandas::DataArray<double> da = s.to_xarray();
14910
14911 // Verify coordinate exists
14912 auto& coords = da.coords();
14913 if (coords.find("index") == coords.end()) {
14914 std::cout << " [FAIL] : should have 'index' coordinate" << std::endl;
14915 throw std::runtime_error("pd_test_series_to_xarray_coordinates failed: missing coord");
14916 }
14917
14918 // Verify coordinate size matches Series
14919 const pandas::Coordinate& coord = coords.at("index");
14920 if (coord.size() != 3) {
14921 std::cout << " [FAIL] : coordinate size should be 3, got " << coord.size() << std::endl;
14922 throw std::runtime_error("pd_test_series_to_xarray_coordinates failed: coord size");
dims (pd_test_2_all.cpp:16997)
16987 // Create DataFrame with 4 rows
16988 std::map<std::string, std::vector<numpy::int64>> data = {
16989 {"x", {10, 20, 30, 40}},
16990 {"y", {100, 200, 300, 400}}
16991 };
16992
16993 pandas::DataFrame df(data);
16994 pandas::Dataset ds = df.to_xarray();
16995
16996 // Verify dimensions
16997 auto dims = ds.dims();
16998 bool passed = (dims.size() == 1 && dims.count("index") == 1 && dims.at("index") == 4);
16999 if (!passed) {
17000 std::cout << " [FAIL] : in pd_test_to_xarray_dimensions() : wrong dimensions" << std::endl;
17001 std::cout << " Expected: index: 4" << std::endl;
17002 std::cout << " Got: ";
17003 for (const auto& [name, size] : dims) {
17004 std::cout << name << ": " << size << " ";
17005 }
17006 std::cout << std::endl;
17007 throw std::runtime_error("pd_test_to_xarray_dimensions failed");
empty (pd_test_1_all.cpp:941)
931#include "../pandas/pd_config.h"
932
933namespace dataframe_tests {
934
935namespace dataframe_tests_config {
936
937 void pd_test_config_version() {
938 std::cout << "========= df_config: version info ======================= ";
939 const char* version = pandas::DataFrameInfo::version();
940 if (version == nullptr || std::string(version).empty()) {
941 std::cout << "[FAIL] : in pd_test_config_version() : version is null or empty" << std::endl;
942 throw std::runtime_error("pd_test_config_version failed: version is null or empty");
943 }
944 std::cout << "-> tests passed" << std::endl;
945 }
946
947 void pd_test_config_na_repr() {
948 std::cout << "========= df_config: NA representation ======================= ";
949 const char* na_repr = pandas::DataFrameConfig::get_na_repr();
950 if (na_repr == nullptr) {
has_coord (pd_test_2_all.cpp:16975)
16965 }
16966
16967 // Verify variable names
16968 passed = ds.has_var("A") && ds.has_var("B");
16969 if (!passed) {
16970 std::cout << " [FAIL] : in pd_test_to_xarray_basic() : missing variable A or B" << std::endl;
16971 throw std::runtime_error("pd_test_to_xarray_basic failed: missing variables");
16972 }
16973
16974 // Verify coordinate exists
16975 passed = ds.has_coord("index");
16976 if (!passed) {
16977 std::cout << " [FAIL] : in pd_test_to_xarray_basic() : missing index coordinate" << std::endl;
16978 throw std::runtime_error("pd_test_to_xarray_basic failed: missing index coordinate");
16979 }
16980
16981 std::cout << " -> tests passed" << std::endl;
16982 }
16983
16984 void pd_test_to_xarray_dimensions() {
16985 std::cout << "========= to_xarray dimensions =========================";
has_var (pd_test_2_all.cpp:16968)
16958 // Verify Dataset has correct structure
16959 bool passed = (ds.nvars() == 2);
16960 if (!passed) {
16961 std::cout << " [FAIL] : in pd_test_to_xarray_basic() : expected 2 variables, got "
16962 << ds.nvars() << std::endl;
16963 throw std::runtime_error("pd_test_to_xarray_basic failed: wrong number of variables");
16964 }
16965
16966 // Verify variable names
16967 passed = ds.has_var("A") && ds.has_var("B");
16968 if (!passed) {
16969 std::cout << " [FAIL] : in pd_test_to_xarray_basic() : missing variable A or B" << std::endl;
16970 throw std::runtime_error("pd_test_to_xarray_basic failed: missing variables");
16971 }
16972
16973 // Verify coordinate exists
16974 passed = ds.has_coord("index");
16975 if (!passed) {
16976 std::cout << " [FAIL] : in pd_test_to_xarray_basic() : missing index coordinate" << std::endl;
16977 throw std::runtime_error("pd_test_to_xarray_basic failed: missing index coordinate");
nvars (pd_test_2_all.cpp:16960)
16950 {"A", {1.0, 2.0, 3.0}},
16951 {"B", {4.0, 5.0, 6.0}}
16952 };
16953
16954 pandas::DataFrame df(data);
16955
16956 // Convert to xarray Dataset
16957 pandas::Dataset ds = df.to_xarray();
16958
16959 // Verify Dataset has correct structure
16960 bool passed = (ds.nvars() == 2);
16961 if (!passed) {
16962 std::cout << " [FAIL] : in pd_test_to_xarray_basic() : expected 2 variables, got "
16963 << ds.nvars() << std::endl;
16964 throw std::runtime_error("pd_test_to_xarray_basic failed: wrong number of variables");
16965 }
16966
16967 // Verify variable names
16968 passed = ds.has_var("A") && ds.has_var("B");
16969 if (!passed) {
16970 std::cout << " [FAIL] : in pd_test_to_xarray_basic() : missing variable A or B" << std::endl;