DataArray#
-
class pandas::DataArray#
Extension array type for specialized data storage.
Example#
#include <pandas/pandas.h>
using namespace pandas;
// Use DataArray
DataArray obj;
// ... operations ...
Constructors#
Signature |
Location |
Example |
|---|---|---|
|
pd_xarray.h:166 |
|
|
pd_xarray.h:200 |
Indexing / Selection#
Signature |
Return Type |
Location |
Example |
|---|---|---|---|
|
T |
pd_xarray.h:320 |
|
|
const std::map<std::string, std::string>& |
pd_xarray.h:239 |
|
|
std::map<std::string, std::string>& |
pd_xarray.h:240 |
I/O#
Signature |
Return Type |
Location |
Example |
|---|---|---|---|
|
std::string |
pd_xarray.h:338 |
Other Methods#
Signature |
Return Type |
Location |
Example |
|---|---|---|---|
|
const Coordinate& |
pd_xarray.h:298 |
|
|
const std::map<std::string, Coordinate>& |
pd_xarray.h:233 |
|
|
std::map<std::string, Coordinate>& |
pd_xarray.h:234 |
|
|
const std::vector<std::string>& |
pd_xarray.h:228 |
|
|
bool |
pd_xarray.h:282 |
|
|
bool |
pd_xarray.h:291 |
|
|
std::optional<std::string> |
pd_xarray.h:245 |
|
|
size_t |
pd_xarray.h:255 |
|
|
void |
pd_xarray.h:309 |
|
|
void |
pd_xarray.h:250 |
|
|
size_t |
pd_xarray.h:265 |
|
|
std::map<std::string, size_t> |
pd_xarray.h:270 |
|
|
const numpy::NDArray<T>& |
pd_xarray.h:222 |
|
|
numpy::NDArray<T>& |
pd_xarray.h:223 |
Code Examples#
The following examples are extracted from the test suite.
at (pd_test_1_all.cpp:6581)
6571 // Test isna/notna with float data
6572 {
6573 std::map<std::string, std::vector<numpy::float64>> float_data;
6574 float_data["X"] = {1.0, std::nan(""), 3.0};
6575 float_data["Y"] = {4.0, 5.0, std::nan("")};
6576 pandas::DataFrame df_na(float_data);
6577
6578 auto na_mask = df_na.isna();
6579 // Row 1, col 0 (X) should be NA
6580 if (!na_mask.getElementAt({1, 0})) {
6581 std::cout << " [FAIL] : in pd_test_dataframe_manipulation() : isna at (1,0) should be true" << std::endl;
6582 throw std::runtime_error("pd_test_dataframe_manipulation failed: isna at (1,0)");
6583 }
6584 // Row 2, col 1 (Y) should be NA
6585 if (!na_mask.getElementAt({2, 1})) {
6586 std::cout << " [FAIL] : in pd_test_dataframe_manipulation() : isna at (2,1) should be true" << std::endl;
6587 throw std::runtime_error("pd_test_dataframe_manipulation failed: isna at (2,1)");
6588 }
6589 // Row 0, col 0 should NOT be NA
6590 if (na_mask.getElementAt({0, 0})) {
6591 std::cout << " [FAIL] : in pd_test_dataframe_manipulation() : isna at (0,0) should be false" << std::endl;
attrs (pd_test_1_all.cpp:16361)
16351 // =====================================================================
16352 // Series Attrs Integration Tests
16353 // =====================================================================
16354
16355 void pd_test_ndframe_series_attrs() {
16356 std::cout << "========= series attrs integration =============================" << std::endl;
16357
16358 pandas::Series<double> s({1.0, 2.0, 3.0});
16359
16360 // Test setting attrs on Series
16361 s.attrs().set("source", std::string("test_data"));
16362 s.attrs().set("timestamp", 1234567890);
16363
16364 bool passed = s.attrs().get<std::string>("source") == "test_data";
16365 if (!passed) {
16366 std::cout << " [FAIL] : in pd_test_ndframe_series_attrs() : set/get source" << std::endl;
16367 throw std::runtime_error("pd_test_ndframe_series_attrs failed: set/get source");
16368 }
16369
16370 passed = s.attrs().get<int>("timestamp") == 1234567890;
16371 if (!passed) {
attrs (pd_test_1_all.cpp:16361)
16351 // =====================================================================
16352 // Series Attrs Integration Tests
16353 // =====================================================================
16354
16355 void pd_test_ndframe_series_attrs() {
16356 std::cout << "========= series attrs integration =============================" << std::endl;
16357
16358 pandas::Series<double> s({1.0, 2.0, 3.0});
16359
16360 // Test setting attrs on Series
16361 s.attrs().set("source", std::string("test_data"));
16362 s.attrs().set("timestamp", 1234567890);
16363
16364 bool passed = s.attrs().get<std::string>("source") == "test_data";
16365 if (!passed) {
16366 std::cout << " [FAIL] : in pd_test_ndframe_series_attrs() : set/get source" << std::endl;
16367 throw std::runtime_error("pd_test_ndframe_series_attrs failed: set/get source");
16368 }
16369
16370 passed = s.attrs().get<int>("timestamp") == 1234567890;
16371 if (!passed) {
to_string (pd_test_1_all.cpp:2693)
2683 pandas::PeriodArray arr_m(std::vector<std::string>{
2684 "2020-01",
2685 "NaT",
2686 "2025-06"
2687 }, "M");
2688
2689 // Year
2690 auto years = arr_m.year();
2691 auto y0 = years[0];
2692 if (!y0.has_value() || y0.value() != 2020) {
2693 std::cout << " [FAIL] : year[0] should be 2020, got " << (y0.has_value() ? std::to_string(y0.value()) : "NA") << std::endl;
2694 throw std::runtime_error("pd_test_period_array_year_month_quarter failed: year[0]");
2695 }
2696
2697 auto y1 = years[1];
2698 if (y1.has_value()) {
2699 std::cout << " [FAIL] : year[1] should be NA (NaT)" << std::endl;
2700 throw std::runtime_error("pd_test_period_array_year_month_quarter failed: year[1] should be NA");
2701 }
2702
2703 auto y2 = years[2];
coord (pd_test_2_all.cpp:17025)
17015 // Create DataFrame with RangeIndex (default)
17016 std::map<std::string, std::vector<numpy::float64>> data = {
17017 {"val", {1.0, 2.0, 3.0}}
17018 };
17019
17020 pandas::DataFrame df(data);
17021 pandas::Dataset ds = df.to_xarray();
17022
17023 // Get index coordinate
17024 const pandas::Coordinate& idx_coord = ds.coord("index");
17025
17026 // Verify coordinate size
17027 bool passed = (idx_coord.size() == 3);
17028 if (!passed) {
17029 std::cout << " [FAIL] : in pd_test_to_xarray_coordinate_values() : wrong coordinate size" << std::endl;
17030 throw std::runtime_error("pd_test_to_xarray_coordinate_values failed: wrong size");
17031 }
17032
17033 // Verify coordinate values (0, 1, 2 for RangeIndex)
17034 passed = (idx_coord.get_value_str(0) == "0" &&
coords (pd_test_3_all.cpp:14912)
14902/**
14903 * Test Series.to_xarray() coordinates
14904 */
14905void pd_test_series_to_xarray_coordinates() {
14906 std::cout << "========= Series.to_xarray() coordinates ===============";
14907
14908 pandas::Series<double> s({100.0, 200.0, 300.0}, "prices");
14909 pandas::DataArray<double> da = s.to_xarray();
14910
14911 // Verify coordinate exists
14912 auto& coords = da.coords();
14913 if (coords.find("index") == coords.end()) {
14914 std::cout << " [FAIL] : should have 'index' coordinate" << std::endl;
14915 throw std::runtime_error("pd_test_series_to_xarray_coordinates failed: missing coord");
14916 }
14917
14918 // Verify coordinate size matches Series
14919 const pandas::Coordinate& coord = coords.at("index");
14920 if (coord.size() != 3) {
14921 std::cout << " [FAIL] : coordinate size should be 3, got " << coord.size() << std::endl;
14922 throw std::runtime_error("pd_test_series_to_xarray_coordinates failed: coord size");
coords (pd_test_3_all.cpp:14912)
14902/**
14903 * Test Series.to_xarray() coordinates
14904 */
14905void pd_test_series_to_xarray_coordinates() {
14906 std::cout << "========= Series.to_xarray() coordinates ===============";
14907
14908 pandas::Series<double> s({100.0, 200.0, 300.0}, "prices");
14909 pandas::DataArray<double> da = s.to_xarray();
14910
14911 // Verify coordinate exists
14912 auto& coords = da.coords();
14913 if (coords.find("index") == coords.end()) {
14914 std::cout << " [FAIL] : should have 'index' coordinate" << std::endl;
14915 throw std::runtime_error("pd_test_series_to_xarray_coordinates failed: missing coord");
14916 }
14917
14918 // Verify coordinate size matches Series
14919 const pandas::Coordinate& coord = coords.at("index");
14920 if (coord.size() != 3) {
14921 std::cout << " [FAIL] : coordinate size should be 3, got " << coord.size() << std::endl;
14922 throw std::runtime_error("pd_test_series_to_xarray_coordinates failed: coord size");
dims (pd_test_2_all.cpp:16997)
16987 // Create DataFrame with 4 rows
16988 std::map<std::string, std::vector<numpy::int64>> data = {
16989 {"x", {10, 20, 30, 40}},
16990 {"y", {100, 200, 300, 400}}
16991 };
16992
16993 pandas::DataFrame df(data);
16994 pandas::Dataset ds = df.to_xarray();
16995
16996 // Verify dimensions
16997 auto dims = ds.dims();
16998 bool passed = (dims.size() == 1 && dims.count("index") == 1 && dims.at("index") == 4);
16999 if (!passed) {
17000 std::cout << " [FAIL] : in pd_test_to_xarray_dimensions() : wrong dimensions" << std::endl;
17001 std::cout << " Expected: index: 4" << std::endl;
17002 std::cout << " Got: ";
17003 for (const auto& [name, size] : dims) {
17004 std::cout << name << ": " << size << " ";
17005 }
17006 std::cout << std::endl;
17007 throw std::runtime_error("pd_test_to_xarray_dimensions failed");
empty (pd_test_1_all.cpp:941)
931#include "../pandas/pd_config.h"
932
933namespace dataframe_tests {
934
935namespace dataframe_tests_config {
936
937 void pd_test_config_version() {
938 std::cout << "========= df_config: version info ======================= ";
939 const char* version = pandas::DataFrameInfo::version();
940 if (version == nullptr || std::string(version).empty()) {
941 std::cout << "[FAIL] : in pd_test_config_version() : version is null or empty" << std::endl;
942 throw std::runtime_error("pd_test_config_version failed: version is null or empty");
943 }
944 std::cout << "-> tests passed" << std::endl;
945 }
946
947 void pd_test_config_na_repr() {
948 std::cout << "========= df_config: NA representation ======================= ";
949 const char* na_repr = pandas::DataFrameConfig::get_na_repr();
950 if (na_repr == nullptr) {
has_coord (pd_test_2_all.cpp:16975)
16965 }
16966
16967 // Verify variable names
16968 passed = ds.has_var("A") && ds.has_var("B");
16969 if (!passed) {
16970 std::cout << " [FAIL] : in pd_test_to_xarray_basic() : missing variable A or B" << std::endl;
16971 throw std::runtime_error("pd_test_to_xarray_basic failed: missing variables");
16972 }
16973
16974 // Verify coordinate exists
16975 passed = ds.has_coord("index");
16976 if (!passed) {
16977 std::cout << " [FAIL] : in pd_test_to_xarray_basic() : missing index coordinate" << std::endl;
16978 throw std::runtime_error("pd_test_to_xarray_basic failed: missing index coordinate");
16979 }
16980
16981 std::cout << " -> tests passed" << std::endl;
16982 }
16983
16984 void pd_test_to_xarray_dimensions() {
16985 std::cout << "========= to_xarray dimensions =========================";
name (pd_test_1_all.cpp:295)
285 throw std::runtime_error("pd_test_boolean_array_reductions failed: mean");
286 }
287
288 std::cout << " -> tests passed" << std::endl;
289 }
290
291 void pd_test_boolean_array_dtype() {
292 std::cout << "========= BooleanArray: dtype ======================= ";
293
294 pandas::BooleanArray arr;
295 if (arr.dtype().name() != "boolean") {
296 std::cout << " [FAIL] : in pd_test_boolean_array_dtype() : dtype name should be 'boolean'" << std::endl;
297 throw std::runtime_error("pd_test_boolean_array_dtype failed: dtype name");
298 }
299
300 if (arr.dtype().kind() != "b") {
301 std::cout << " [FAIL] : in pd_test_boolean_array_dtype() : dtype kind should be 'b'" << std::endl;
302 throw std::runtime_error("pd_test_boolean_array_dtype failed: dtype kind");
303 }
304
305 std::cout << " -> tests passed" << std::endl;
ndim (pd_test_1_all.cpp:6195)
6185 pandas::DataFrame df(data);
6186
6187 // Test shape
6188 auto shape = df.shape();
6189 if (shape.size() != 2 || shape[0] != 4 || shape[1] != 3) {
6190 std::cout << " [FAIL] : in pd_test_dataframe_properties() : shape mismatch" << std::endl;
6191 throw std::runtime_error("pd_test_dataframe_properties failed: shape mismatch");
6192 }
6193
6194 // Test ndim
6195 if (df.ndim() != 2) {
6196 std::cout << " [FAIL] : in pd_test_dataframe_properties() : ndim != 2" << std::endl;
6197 throw std::runtime_error("pd_test_dataframe_properties failed: ndim != 2");
6198 }
6199
6200 // Test empty
6201 if (df.empty()) {
6202 std::cout << " [FAIL] : in pd_test_dataframe_properties() : should not be empty" << std::endl;
6203 throw std::runtime_error("pd_test_dataframe_properties failed: should not be empty");
6204 }
set_name (pd_test_1_all.cpp:11798)
11788 throw std::runtime_error("pd_test_index_vector_constructor failed");
11789 }
11790
11791 std::cout << " -> tests passed" << std::endl;
11792 }
11793
11794 void pd_test_index_copy_constructor() {
11795 std::cout << "========= copy constructor ============================";
11796
11797 pandas::Index<numpy::int64> idx1{1, 2, 3};
11798 idx1.set_name("original");
11799
11800 pandas::Index<numpy::int64> idx2(idx1);
11801
11802 bool passed = (idx2.size() == 3);
11803 passed = passed && (idx2.name().value() == "original");
11804 passed = passed && idx2.equals(idx1);
11805
11806 if (!passed) {
11807 std::cout << " [FAIL] : in pd_test_index_copy_constructor() : copy failed" << std::endl;
11808 throw std::runtime_error("pd_test_index_copy_constructor failed");
size (pd_test_1_all.cpp:22)
12#include "../pandas/pd_boolean_array.h"
13
14namespace dataframe_tests {
15
16namespace dataframe_tests_boolean_array {
17 void pd_test_boolean_array_constructors() {
18 std::cout << "========= BooleanArray: constructors ======================= ";
19
20 // Default constructor
21 pandas::BooleanArray arr1;
22 if (arr1.size() != 0) {
23 std::cout << " [FAIL] : in pd_test_boolean_array_constructors() : default constructor size != 0" << std::endl;
24 throw std::runtime_error("pd_test_boolean_array_constructors failed: default constructor size != 0");
25 }
26
27 // Initializer list constructor
28 pandas::BooleanArray arr2({
29 std::optional<bool>(true),
30 std::optional<bool>(false),
31 std::nullopt,
32 std::optional<bool>(true)
values (pd_test_1_all.cpp:364)
354 pandas::CategoricalArray arr1;
355 if (arr1.size() != 0) {
356 std::cout << " [FAIL] : in pd_test_categorical_array_constructors() : default constructor size != 0" << std::endl;
357 throw std::runtime_error("pd_test_categorical_array_constructors failed: default constructor size != 0");
358 }
359 if (arr1.ordered()) {
360 std::cout << " [FAIL] : in pd_test_categorical_array_constructors() : default should be unordered" << std::endl;
361 throw std::runtime_error("pd_test_categorical_array_constructors failed: default should be unordered");
362 }
363
364 // Constructor from values (infer categories)
365 std::vector<std::optional<std::string>> values = {
366 std::optional<std::string>("a"),
367 std::optional<std::string>("b"),
368 std::optional<std::string>("a"),
369 std::optional<std::string>("c")
370 };
371 pandas::CategoricalArray arr2(values);
372 if (arr2.size() != 4) {
373 std::cout << " [FAIL] : in pd_test_categorical_array_constructors() : values constructor size != 4" << std::endl;
374 throw std::runtime_error("pd_test_categorical_array_constructors failed: values constructor size != 4");
values (pd_test_1_all.cpp:364)
354 pandas::CategoricalArray arr1;
355 if (arr1.size() != 0) {
356 std::cout << " [FAIL] : in pd_test_categorical_array_constructors() : default constructor size != 0" << std::endl;
357 throw std::runtime_error("pd_test_categorical_array_constructors failed: default constructor size != 0");
358 }
359 if (arr1.ordered()) {
360 std::cout << " [FAIL] : in pd_test_categorical_array_constructors() : default should be unordered" << std::endl;
361 throw std::runtime_error("pd_test_categorical_array_constructors failed: default should be unordered");
362 }
363
364 // Constructor from values (infer categories)
365 std::vector<std::optional<std::string>> values = {
366 std::optional<std::string>("a"),
367 std::optional<std::string>("b"),
368 std::optional<std::string>("a"),
369 std::optional<std::string>("c")
370 };
371 pandas::CategoricalArray arr2(values);
372 if (arr2.size() != 4) {
373 std::cout << " [FAIL] : in pd_test_categorical_array_constructors() : values constructor size != 4" << std::endl;
374 throw std::runtime_error("pd_test_categorical_array_constructors failed: values constructor size != 4");