DataArray#

class pandas::DataArray#

Extension array type for specialized data storage.

Example#

#include <pandas/pandas.h>
using namespace pandas;

// Use DataArray
DataArray obj;
// ... operations ...

Constructors#

Signature

Location

Example

DataArray(const numpy::NDArray<T>& data, const std::vector<std::string>& dims = {}, const std::map<std::string, Coordinate>& coords = {}, const std::map<std::string, std::string>& attrs = {}, const std::optional<std::string>& name = std::nullopt)

pd_xarray.h:166

DataArray(const std::vector<T>& data, const std::string& dim_name = "dim_0", const std::optional<Coordinate>& coord = std::nullopt, const std::optional<std::string>& name = std::nullopt)

pd_xarray.h:200

Indexing / Selection#

Signature

Return Type

Location

Example

T at(const std::vector<size_t>& indices) const

T

pd_xarray.h:320

View

const std::map<std::string, std::string>& attrs() const

const std::map<std::string, std::string>&

pd_xarray.h:239

View

std::map<std::string, std::string>& attrs()

std::map<std::string, std::string>&

pd_xarray.h:240

View

I/O#

Signature

Return Type

Location

Example

std::string to_string() const

std::string

pd_xarray.h:338

View

Other Methods#

Signature

Return Type

Location

Example

const Coordinate& coord(const std::string& name) const

const Coordinate&

pd_xarray.h:298

View

const std::map<std::string, Coordinate>& coords() const

const std::map<std::string, Coordinate>&

pd_xarray.h:233

View

std::map<std::string, Coordinate>& coords()

std::map<std::string, Coordinate>&

pd_xarray.h:234

View

const std::vector<std::string>& dims() const

const std::vector<std::string>&

pd_xarray.h:228

View

bool empty() const { return data_.getSize() == 0

bool

pd_xarray.h:282

View

bool has_coord(const std::string& name) const

bool

pd_xarray.h:291

View

std::optional<std::string> name() const

std::optional<std::string>

pd_xarray.h:245

View

size_t ndim() const { return dims_.size()

size_t

pd_xarray.h:255

View

void set_coord(const std::string& name, const Coordinate& coord)

void

pd_xarray.h:309

void set_name(const std::string& name)

void

pd_xarray.h:250

View

size_t size() const { return data_.getSize()

size_t

pd_xarray.h:265

View

std::map<std::string, size_t> sizes() const

std::map<std::string, size_t>

pd_xarray.h:270

const numpy::NDArray<T>& values() const

const numpy::NDArray<T>&

pd_xarray.h:222

View

numpy::NDArray<T>& values()

numpy::NDArray<T>&

pd_xarray.h:223

View

Code Examples#

The following examples are extracted from the test suite.

at (pd_test_1_all.cpp:6581)
6571            // Test isna/notna with float data
6572            {
6573                std::map<std::string, std::vector<numpy::float64>> float_data;
6574                float_data["X"] = {1.0, std::nan(""), 3.0};
6575                float_data["Y"] = {4.0, 5.0, std::nan("")};
6576                pandas::DataFrame df_na(float_data);
6577
6578                auto na_mask = df_na.isna();
6579                // Row 1, col 0 (X) should be NA
6580                if (!na_mask.getElementAt({1, 0})) {
6581                    std::cout << "  [FAIL] : in pd_test_dataframe_manipulation() : isna at (1,0) should be true" << std::endl;
6582                    throw std::runtime_error("pd_test_dataframe_manipulation failed: isna at (1,0)");
6583                }
6584                // Row 2, col 1 (Y) should be NA
6585                if (!na_mask.getElementAt({2, 1})) {
6586                    std::cout << "  [FAIL] : in pd_test_dataframe_manipulation() : isna at (2,1) should be true" << std::endl;
6587                    throw std::runtime_error("pd_test_dataframe_manipulation failed: isna at (2,1)");
6588                }
6589                // Row 0, col 0 should NOT be NA
6590                if (na_mask.getElementAt({0, 0})) {
6591                    std::cout << "  [FAIL] : in pd_test_dataframe_manipulation() : isna at (0,0) should be false" << std::endl;
attrs (pd_test_1_all.cpp:16361)
16351        // =====================================================================
16352        // Series Attrs Integration Tests
16353        // =====================================================================
16354
16355        void pd_test_ndframe_series_attrs() {
16356            std::cout << "========= series attrs integration =============================" << std::endl;
16357
16358            pandas::Series<double> s({1.0, 2.0, 3.0});
16359
16360            // Test setting attrs on Series
16361            s.attrs().set("source", std::string("test_data"));
16362            s.attrs().set("timestamp", 1234567890);
16363
16364            bool passed = s.attrs().get<std::string>("source") == "test_data";
16365            if (!passed) {
16366                std::cout << "  [FAIL] : in pd_test_ndframe_series_attrs() : set/get source" << std::endl;
16367                throw std::runtime_error("pd_test_ndframe_series_attrs failed: set/get source");
16368            }
16369
16370            passed = s.attrs().get<int>("timestamp") == 1234567890;
16371            if (!passed) {
attrs (pd_test_1_all.cpp:16361)
16351        // =====================================================================
16352        // Series Attrs Integration Tests
16353        // =====================================================================
16354
16355        void pd_test_ndframe_series_attrs() {
16356            std::cout << "========= series attrs integration =============================" << std::endl;
16357
16358            pandas::Series<double> s({1.0, 2.0, 3.0});
16359
16360            // Test setting attrs on Series
16361            s.attrs().set("source", std::string("test_data"));
16362            s.attrs().set("timestamp", 1234567890);
16363
16364            bool passed = s.attrs().get<std::string>("source") == "test_data";
16365            if (!passed) {
16366                std::cout << "  [FAIL] : in pd_test_ndframe_series_attrs() : set/get source" << std::endl;
16367                throw std::runtime_error("pd_test_ndframe_series_attrs failed: set/get source");
16368            }
16369
16370            passed = s.attrs().get<int>("timestamp") == 1234567890;
16371            if (!passed) {
to_string (pd_test_1_all.cpp:2693)
2683        pandas::PeriodArray arr_m(std::vector<std::string>{
2684            "2020-01",
2685            "NaT",
2686            "2025-06"
2687        }, "M");
2688
2689        // Year
2690        auto years = arr_m.year();
2691        auto y0 = years[0];
2692        if (!y0.has_value() || y0.value() != 2020) {
2693            std::cout << "  [FAIL] : year[0] should be 2020, got " << (y0.has_value() ? std::to_string(y0.value()) : "NA") << std::endl;
2694            throw std::runtime_error("pd_test_period_array_year_month_quarter failed: year[0]");
2695        }
2696
2697        auto y1 = years[1];
2698        if (y1.has_value()) {
2699            std::cout << "  [FAIL] : year[1] should be NA (NaT)" << std::endl;
2700            throw std::runtime_error("pd_test_period_array_year_month_quarter failed: year[1] should be NA");
2701        }
2702
2703        auto y2 = years[2];
coord (pd_test_2_all.cpp:17025)
17015            // Create DataFrame with RangeIndex (default)
17016            std::map<std::string, std::vector<numpy::float64>> data = {
17017                {"val", {1.0, 2.0, 3.0}}
17018            };
17019
17020            pandas::DataFrame df(data);
17021            pandas::Dataset ds = df.to_xarray();
17022
17023            // Get index coordinate
17024            const pandas::Coordinate& idx_coord = ds.coord("index");
17025
17026            // Verify coordinate size
17027            bool passed = (idx_coord.size() == 3);
17028            if (!passed) {
17029                std::cout << "  [FAIL] : in pd_test_to_xarray_coordinate_values() : wrong coordinate size" << std::endl;
17030                throw std::runtime_error("pd_test_to_xarray_coordinate_values failed: wrong size");
17031            }
17032
17033            // Verify coordinate values (0, 1, 2 for RangeIndex)
17034            passed = (idx_coord.get_value_str(0) == "0" &&
coords (pd_test_3_all.cpp:14912)
14902/**
14903 * Test Series.to_xarray() coordinates
14904 */
14905void pd_test_series_to_xarray_coordinates() {
14906    std::cout << "========= Series.to_xarray() coordinates ===============";
14907
14908    pandas::Series<double> s({100.0, 200.0, 300.0}, "prices");
14909    pandas::DataArray<double> da = s.to_xarray();
14910
14911    // Verify coordinate exists
14912    auto& coords = da.coords();
14913    if (coords.find("index") == coords.end()) {
14914        std::cout << "  [FAIL] : should have 'index' coordinate" << std::endl;
14915        throw std::runtime_error("pd_test_series_to_xarray_coordinates failed: missing coord");
14916    }
14917
14918    // Verify coordinate size matches Series
14919    const pandas::Coordinate& coord = coords.at("index");
14920    if (coord.size() != 3) {
14921        std::cout << "  [FAIL] : coordinate size should be 3, got " << coord.size() << std::endl;
14922        throw std::runtime_error("pd_test_series_to_xarray_coordinates failed: coord size");
coords (pd_test_3_all.cpp:14912)
14902/**
14903 * Test Series.to_xarray() coordinates
14904 */
14905void pd_test_series_to_xarray_coordinates() {
14906    std::cout << "========= Series.to_xarray() coordinates ===============";
14907
14908    pandas::Series<double> s({100.0, 200.0, 300.0}, "prices");
14909    pandas::DataArray<double> da = s.to_xarray();
14910
14911    // Verify coordinate exists
14912    auto& coords = da.coords();
14913    if (coords.find("index") == coords.end()) {
14914        std::cout << "  [FAIL] : should have 'index' coordinate" << std::endl;
14915        throw std::runtime_error("pd_test_series_to_xarray_coordinates failed: missing coord");
14916    }
14917
14918    // Verify coordinate size matches Series
14919    const pandas::Coordinate& coord = coords.at("index");
14920    if (coord.size() != 3) {
14921        std::cout << "  [FAIL] : coordinate size should be 3, got " << coord.size() << std::endl;
14922        throw std::runtime_error("pd_test_series_to_xarray_coordinates failed: coord size");
dims (pd_test_2_all.cpp:16997)
16987            // Create DataFrame with 4 rows
16988            std::map<std::string, std::vector<numpy::int64>> data = {
16989                {"x", {10, 20, 30, 40}},
16990                {"y", {100, 200, 300, 400}}
16991            };
16992
16993            pandas::DataFrame df(data);
16994            pandas::Dataset ds = df.to_xarray();
16995
16996            // Verify dimensions
16997            auto dims = ds.dims();
16998            bool passed = (dims.size() == 1 && dims.count("index") == 1 && dims.at("index") == 4);
16999            if (!passed) {
17000                std::cout << "  [FAIL] : in pd_test_to_xarray_dimensions() : wrong dimensions" << std::endl;
17001                std::cout << "    Expected: index: 4" << std::endl;
17002                std::cout << "    Got: ";
17003                for (const auto& [name, size] : dims) {
17004                    std::cout << name << ": " << size << " ";
17005                }
17006                std::cout << std::endl;
17007                throw std::runtime_error("pd_test_to_xarray_dimensions failed");
empty (pd_test_1_all.cpp:941)
931#include "../pandas/pd_config.h"
932
933namespace dataframe_tests {
934
935namespace dataframe_tests_config {
936
937    void pd_test_config_version() {
938        std::cout << "========= df_config: version info ======================= ";
939        const char* version = pandas::DataFrameInfo::version();
940        if (version == nullptr || std::string(version).empty()) {
941            std::cout << "[FAIL] : in pd_test_config_version() : version is null or empty" << std::endl;
942            throw std::runtime_error("pd_test_config_version failed: version is null or empty");
943        }
944        std::cout << "-> tests passed" << std::endl;
945    }
946
947    void pd_test_config_na_repr() {
948        std::cout << "========= df_config: NA representation ======================= ";
949        const char* na_repr = pandas::DataFrameConfig::get_na_repr();
950        if (na_repr == nullptr) {
has_coord (pd_test_2_all.cpp:16975)
16965            }
16966
16967            // Verify variable names
16968            passed = ds.has_var("A") && ds.has_var("B");
16969            if (!passed) {
16970                std::cout << "  [FAIL] : in pd_test_to_xarray_basic() : missing variable A or B" << std::endl;
16971                throw std::runtime_error("pd_test_to_xarray_basic failed: missing variables");
16972            }
16973
16974            // Verify coordinate exists
16975            passed = ds.has_coord("index");
16976            if (!passed) {
16977                std::cout << "  [FAIL] : in pd_test_to_xarray_basic() : missing index coordinate" << std::endl;
16978                throw std::runtime_error("pd_test_to_xarray_basic failed: missing index coordinate");
16979            }
16980
16981            std::cout << " -> tests passed" << std::endl;
16982        }
16983
16984        void pd_test_to_xarray_dimensions() {
16985            std::cout << "========= to_xarray dimensions =========================";
name (pd_test_1_all.cpp:295)
285            throw std::runtime_error("pd_test_boolean_array_reductions failed: mean");
286        }
287
288        std::cout << " -> tests passed" << std::endl;
289    }
290
291    void pd_test_boolean_array_dtype() {
292        std::cout << "========= BooleanArray: dtype ======================= ";
293
294        pandas::BooleanArray arr;
295        if (arr.dtype().name() != "boolean") {
296            std::cout << "  [FAIL] : in pd_test_boolean_array_dtype() : dtype name should be 'boolean'" << std::endl;
297            throw std::runtime_error("pd_test_boolean_array_dtype failed: dtype name");
298        }
299
300        if (arr.dtype().kind() != "b") {
301            std::cout << "  [FAIL] : in pd_test_boolean_array_dtype() : dtype kind should be 'b'" << std::endl;
302            throw std::runtime_error("pd_test_boolean_array_dtype failed: dtype kind");
303        }
304
305        std::cout << " -> tests passed" << std::endl;
ndim (pd_test_1_all.cpp:6195)
6185            pandas::DataFrame df(data);
6186
6187            // Test shape
6188            auto shape = df.shape();
6189            if (shape.size() != 2 || shape[0] != 4 || shape[1] != 3) {
6190                std::cout << "  [FAIL] : in pd_test_dataframe_properties() : shape mismatch" << std::endl;
6191                throw std::runtime_error("pd_test_dataframe_properties failed: shape mismatch");
6192            }
6193
6194            // Test ndim
6195            if (df.ndim() != 2) {
6196                std::cout << "  [FAIL] : in pd_test_dataframe_properties() : ndim != 2" << std::endl;
6197                throw std::runtime_error("pd_test_dataframe_properties failed: ndim != 2");
6198            }
6199
6200            // Test empty
6201            if (df.empty()) {
6202                std::cout << "  [FAIL] : in pd_test_dataframe_properties() : should not be empty" << std::endl;
6203                throw std::runtime_error("pd_test_dataframe_properties failed: should not be empty");
6204            }
set_name (pd_test_1_all.cpp:11798)
11788                throw std::runtime_error("pd_test_index_vector_constructor failed");
11789            }
11790
11791            std::cout << " -> tests passed" << std::endl;
11792        }
11793
11794        void pd_test_index_copy_constructor() {
11795            std::cout << "========= copy constructor ============================";
11796
11797            pandas::Index<numpy::int64> idx1{1, 2, 3};
11798            idx1.set_name("original");
11799
11800            pandas::Index<numpy::int64> idx2(idx1);
11801
11802            bool passed = (idx2.size() == 3);
11803            passed = passed && (idx2.name().value() == "original");
11804            passed = passed && idx2.equals(idx1);
11805
11806            if (!passed) {
11807                std::cout << "  [FAIL] : in pd_test_index_copy_constructor() : copy failed" << std::endl;
11808                throw std::runtime_error("pd_test_index_copy_constructor failed");
size (pd_test_1_all.cpp:22)
12#include "../pandas/pd_boolean_array.h"
13
14namespace dataframe_tests {
15
16namespace dataframe_tests_boolean_array {
17    void pd_test_boolean_array_constructors() {
18        std::cout << "========= BooleanArray: constructors ======================= ";
19
20        // Default constructor
21        pandas::BooleanArray arr1;
22        if (arr1.size() != 0) {
23            std::cout << "  [FAIL] : in pd_test_boolean_array_constructors() : default constructor size != 0" << std::endl;
24            throw std::runtime_error("pd_test_boolean_array_constructors failed: default constructor size != 0");
25        }
26
27        // Initializer list constructor
28        pandas::BooleanArray arr2({
29            std::optional<bool>(true),
30            std::optional<bool>(false),
31            std::nullopt,
32            std::optional<bool>(true)
values (pd_test_1_all.cpp:364)
354        pandas::CategoricalArray arr1;
355        if (arr1.size() != 0) {
356            std::cout << "  [FAIL] : in pd_test_categorical_array_constructors() : default constructor size != 0" << std::endl;
357            throw std::runtime_error("pd_test_categorical_array_constructors failed: default constructor size != 0");
358        }
359        if (arr1.ordered()) {
360            std::cout << "  [FAIL] : in pd_test_categorical_array_constructors() : default should be unordered" << std::endl;
361            throw std::runtime_error("pd_test_categorical_array_constructors failed: default should be unordered");
362        }
363
364        // Constructor from values (infer categories)
365        std::vector<std::optional<std::string>> values = {
366            std::optional<std::string>("a"),
367            std::optional<std::string>("b"),
368            std::optional<std::string>("a"),
369            std::optional<std::string>("c")
370        };
371        pandas::CategoricalArray arr2(values);
372        if (arr2.size() != 4) {
373            std::cout << "  [FAIL] : in pd_test_categorical_array_constructors() : values constructor size != 4" << std::endl;
374            throw std::runtime_error("pd_test_categorical_array_constructors failed: values constructor size != 4");
values (pd_test_1_all.cpp:364)
354        pandas::CategoricalArray arr1;
355        if (arr1.size() != 0) {
356            std::cout << "  [FAIL] : in pd_test_categorical_array_constructors() : default constructor size != 0" << std::endl;
357            throw std::runtime_error("pd_test_categorical_array_constructors failed: default constructor size != 0");
358        }
359        if (arr1.ordered()) {
360            std::cout << "  [FAIL] : in pd_test_categorical_array_constructors() : default should be unordered" << std::endl;
361            throw std::runtime_error("pd_test_categorical_array_constructors failed: default should be unordered");
362        }
363
364        // Constructor from values (infer categories)
365        std::vector<std::optional<std::string>> values = {
366            std::optional<std::string>("a"),
367            std::optional<std::string>("b"),
368            std::optional<std::string>("a"),
369            std::optional<std::string>("c")
370        };
371        pandas::CategoricalArray arr2(values);
372        if (arr2.size() != 4) {
373            std::cout << "  [FAIL] : in pd_test_categorical_array_constructors() : values constructor size != 4" << std::endl;
374            throw std::runtime_error("pd_test_categorical_array_constructors failed: values constructor size != 4");