SparseArray#

class pandas::SparseArray#

Extension array type for specialized data storage.

Example#

#include <pandas/pandas.h>
using namespace pandas;

// Use SparseArray
SparseArray obj;
// ... operations ...

Constructors#

Signature

Location

Example

SparseArray(const numpy::NDArray<T>& dense, T fill_value)

pd_sparse_array.h:81

explicit SparseArray(const numpy::NDArray<T>& dense)

pd_sparse_array.h:94

SparseArray(const numpy::NDArray<T>& sp_values, const numpy::NDArray<numpy::int64>& sp_index, T fill_value, size_t length)

pd_sparse_array.h:110

SparseArray(const std::vector<T>& values, T fill_value)

pd_sparse_array.h:125

Construction#

Signature

Return Type

Location

Example

static SparseArray<T> from_dense(const numpy::NDArray<T>& dense, T fill_value)

static SparseArray<T>

pd_sparse_array.h:808

View

static SparseArray<T> from_dense(const numpy::NDArray<T>& dense)

static SparseArray<T>

pd_sparse_array.h:815

View

Indexing / Selection#

Signature

Return Type

Location

Example

T at(size_t index) const

T

pd_sparse_array.h:269

View

Missing Data#

Signature

Return Type

Location

Example

SparseArray<T> fillna(T value) const

SparseArray<T>

pd_sparse_array.h:378

View

numpy::NDArray<numpy::bool_> isna() const

numpy::NDArray<numpy::bool_>

pd_sparse_array.h:334

View

numpy::NDArray<numpy::bool_> notna() const

numpy::NDArray<numpy::bool_>

pd_sparse_array.h:365

View

Statistics#

Signature

Return Type

Location

Example

size_t count() const

size_t

pd_sparse_array.h:425

View

std::optional<T> max() const

std::optional<T>

pd_sparse_array.h:667

View

double mean() const

double

pd_sparse_array.h:595

View

std::optional<T> min() const

std::optional<T>

pd_sparse_array.h:630

View

std::optional<double> std(int ddof = 1) const

std::optional<double>

pd_sparse_array.h:704

View

T sum() const

T

pd_sparse_array.h:570

View

std::optional<double> var(int ddof = 1) const

std::optional<double>

pd_sparse_array.h:713

View

Comparison#

Signature

Return Type

Location

Example

numpy::NDArray<numpy::bool_> compare_op(const SparseArray<T>& other, Op op) const

numpy::NDArray<numpy::bool_>

pd_sparse_array.h:1043

size_t len() const

size_t

pd_sparse_array.h:195

View

Combining#

Signature

Return Type

Location

Example

static SparseArray<T> concat(const std::vector<SparseArray<T>>& arrays)

static SparseArray<T>

pd_sparse_array.h:822

View

I/O#

Signature

Return Type

Location

Example

numpy::NDArray<T> to_dense() const

numpy::NDArray<T>

pd_sparse_array.h:298

View

std::string to_string() const

std::string

pd_sparse_array.h:903

View

Conversion#

Signature

Return Type

Location

Example

SparseArray<T> copy() const

SparseArray<T>

pd_sparse_array.h:318

View

Type Checking#

Signature

Return Type

Location

Example

bool is_fill(size_t index) const

bool

pd_sparse_array.h:276

View

bool is_fill_value(T val) const

bool

pd_sparse_array.h:48

Other Methods#

Signature

Return Type

Location

Example

bool all() const

bool

pd_sparse_array.h:778

View

bool any() const

bool

pd_sparse_array.h:752

View

SparseArray<T> binary_op(const SparseArray<T>& other, Op op) const

SparseArray<T>

pd_sparse_array.h:1010

build_from_dense(dense)

pd_sparse_array.h:87

build_from_dense(dense)

pd_sparse_array.h:100

build_from_dense(dense)

pd_sparse_array.h:135

void build_from_dense(const numpy::NDArray<T>& dense)

void

pd_sparse_array.h:947

double density() const

double

pd_sparse_array.h:235

View

dtype_type dtype() const

dtype_type

pd_sparse_array.h:145

View

bool empty() const

bool

pd_sparse_array.h:188

View

T fill_value() const

T

pd_sparse_array.h:220

View

T find_value_at(size_t index) const

T

pd_sparse_array.h:988

size_t nbytes() const

size_t

pd_sparse_array.h:159

View

size_t nbytes_dense() const

size_t

pd_sparse_array.h:167

constexpr int ndim() const

constexpr int

pd_sparse_array.h:174

View

size_t npoints() const

size_t

pd_sparse_array.h:227

View

op(dense1.getElementAt({i}), dense2.getElementAt({i})))

pd_sparse_array.h:1022

op(fill_value_, scalar), length_)

pd_sparse_array.h:1039

std::string repr() const

std::string

pd_sparse_array.h:934

View

numpy::NDArray<numpy::bool_> scalar_compare(T scalar, Op op) const

numpy::NDArray<numpy::bool_>

pd_sparse_array.h:1060

SparseArray<T> scalar_op(T scalar, Op op) const

SparseArray<T>

pd_sparse_array.h:1031

std::vector<size_t> shape() const

std::vector<size_t>

pd_sparse_array.h:181

View

size_t size() const

size_t

pd_sparse_array.h:152

View

const numpy::NDArray<numpy::int64>& sp_index() const

const numpy::NDArray<numpy::int64>&

pd_sparse_array.h:213

View

const numpy::NDArray<T>& sp_values() const

const numpy::NDArray<T>&

pd_sparse_array.h:206

View

std::string sparse_dtype_footer() const

std::string

pd_sparse_array.h:890

double sparsity() const

double

pd_sparse_array.h:244

View

void validate_sparse_data()

void

pd_sparse_array.h:968

Internal Methods#

1 internal methods (prefixed with underscore)

Code Examples#

The following examples are extracted from the test suite.

from_dense (pd_test_1_all.cpp:3164)
3154    }
3155
3156    void pd_test_sparse_array_from_dense() {
3157        std::cout << "========= SparseArray: from_dense ======================= ";
3158
3159        numpy::NDArray<numpy::float64> dense(std::vector<size_t>{10});
3160        for (size_t i = 0; i < 10; ++i) {
3161            dense.setElementAt({i}, (i == 3 || i == 7) ? 5.0 : 0.0);
3162        }
3163
3164        auto sparse = pandas::SparseArray<numpy::float64>::from_dense(dense, 0.0);
3165
3166        if (sparse.size() != 10) {
3167            std::cout << "  [FAIL] : in pd_test_sparse_array_from_dense() : size != 10" << std::endl;
3168            throw std::runtime_error("pd_test_sparse_array_from_dense failed: size != 10");
3169        }
3170
3171        if (sparse.npoints() != 2) {
3172            std::cout << "  [FAIL] : in pd_test_sparse_array_from_dense() : npoints != 2" << std::endl;
3173            throw std::runtime_error("pd_test_sparse_array_from_dense failed: npoints != 2");
3174        }
from_dense (pd_test_1_all.cpp:3164)
3154    }
3155
3156    void pd_test_sparse_array_from_dense() {
3157        std::cout << "========= SparseArray: from_dense ======================= ";
3158
3159        numpy::NDArray<numpy::float64> dense(std::vector<size_t>{10});
3160        for (size_t i = 0; i < 10; ++i) {
3161            dense.setElementAt({i}, (i == 3 || i == 7) ? 5.0 : 0.0);
3162        }
3163
3164        auto sparse = pandas::SparseArray<numpy::float64>::from_dense(dense, 0.0);
3165
3166        if (sparse.size() != 10) {
3167            std::cout << "  [FAIL] : in pd_test_sparse_array_from_dense() : size != 10" << std::endl;
3168            throw std::runtime_error("pd_test_sparse_array_from_dense failed: size != 10");
3169        }
3170
3171        if (sparse.npoints() != 2) {
3172            std::cout << "  [FAIL] : in pd_test_sparse_array_from_dense() : npoints != 2" << std::endl;
3173            throw std::runtime_error("pd_test_sparse_array_from_dense failed: npoints != 2");
3174        }
at (pd_test_1_all.cpp:6581)
6571            // Test isna/notna with float data
6572            {
6573                std::map<std::string, std::vector<numpy::float64>> float_data;
6574                float_data["X"] = {1.0, std::nan(""), 3.0};
6575                float_data["Y"] = {4.0, 5.0, std::nan("")};
6576                pandas::DataFrame df_na(float_data);
6577
6578                auto na_mask = df_na.isna();
6579                // Row 1, col 0 (X) should be NA
6580                if (!na_mask.getElementAt({1, 0})) {
6581                    std::cout << "  [FAIL] : in pd_test_dataframe_manipulation() : isna at (1,0) should be true" << std::endl;
6582                    throw std::runtime_error("pd_test_dataframe_manipulation failed: isna at (1,0)");
6583                }
6584                // Row 2, col 1 (Y) should be NA
6585                if (!na_mask.getElementAt({2, 1})) {
6586                    std::cout << "  [FAIL] : in pd_test_dataframe_manipulation() : isna at (2,1) should be true" << std::endl;
6587                    throw std::runtime_error("pd_test_dataframe_manipulation failed: isna at (2,1)");
6588                }
6589                // Row 0, col 0 should NOT be NA
6590                if (na_mask.getElementAt({0, 0})) {
6591                    std::cout << "  [FAIL] : in pd_test_dataframe_manipulation() : isna at (0,0) should be false" << std::endl;
fillna (pd_test_1_all.cpp:537)
527            throw std::runtime_error("pd_test_categorical_array_na_handling failed: isna size != 4");
528        }
529
530        // Test dropna
531        pandas::CategoricalArray dropped = arr.dropna();
532        if (dropped.size() != 2) {
533            std::cout << "  [FAIL] : in pd_test_categorical_array_na_handling() : dropna size != 2" << std::endl;
534            throw std::runtime_error("pd_test_categorical_array_na_handling failed: dropna size != 2");
535        }
536
537        // Test fillna (fill with existing category)
538        pandas::CategoricalArray filled = arr.fillna("a");  // 'a' is in categories
539        if (filled.has_na()) {
540            std::cout << "  [FAIL] : in pd_test_categorical_array_na_handling() : fillna should have no NA" << std::endl;
541            throw std::runtime_error("pd_test_categorical_array_na_handling failed: fillna should have no NA");
542        }
543
544        std::cout << " -> tests passed" << std::endl;
545    }
546
547    void pd_test_categorical_array_add_categories() {
isna (pd_test_1_all.cpp:524)
514            throw std::runtime_error("pd_test_categorical_array_na_handling failed: has_na() should be true");
515        }
516
517        // Test count (non-NA)
518        if (arr.count() != 2) {
519            std::cout << "  [FAIL] : in pd_test_categorical_array_na_handling() : count() != 2" << std::endl;
520            throw std::runtime_error("pd_test_categorical_array_na_handling failed: count() != 2");
521        }
522
523        // Test isna array
524        numpy::NDArray<numpy::bool_> na_mask = arr.isna();
525        if (na_mask.getSize() != 4) {
526            std::cout << "  [FAIL] : in pd_test_categorical_array_na_handling() : isna size != 4" << std::endl;
527            throw std::runtime_error("pd_test_categorical_array_na_handling failed: isna size != 4");
528        }
529
530        // Test dropna
531        pandas::CategoricalArray dropped = arr.dropna();
532        if (dropped.size() != 2) {
533            std::cout << "  [FAIL] : in pd_test_categorical_array_na_handling() : dropna size != 2" << std::endl;
534            throw std::runtime_error("pd_test_categorical_array_na_handling failed: dropna size != 2");
notna (pd_test_1_all.cpp:6595)
6585                if (!na_mask.getElementAt({2, 1})) {
6586                    std::cout << "  [FAIL] : in pd_test_dataframe_manipulation() : isna at (2,1) should be true" << std::endl;
6587                    throw std::runtime_error("pd_test_dataframe_manipulation failed: isna at (2,1)");
6588                }
6589                // Row 0, col 0 should NOT be NA
6590                if (na_mask.getElementAt({0, 0})) {
6591                    std::cout << "  [FAIL] : in pd_test_dataframe_manipulation() : isna at (0,0) should be false" << std::endl;
6592                    throw std::runtime_error("pd_test_dataframe_manipulation failed: isna at (0,0)");
6593                }
6594
6595                auto notna_mask = df_na.notna();
6596                if (notna_mask.getElementAt({1, 0})) {
6597                    std::cout << "  [FAIL] : in pd_test_dataframe_manipulation() : notna at (1,0) should be false" << std::endl;
6598                    throw std::runtime_error("pd_test_dataframe_manipulation failed: notna at (1,0)");
6599                }
6600            }
6601
6602            // Test fillna
6603            {
6604                std::map<std::string, std::vector<numpy::float64>> float_data;
6605                float_data["X"] = {1.0, std::nan(""), 3.0};
count (pd_test_1_all.cpp:66)
56        if (arr.is_na(0)) {
57            std::cout << "  [FAIL] : in pd_test_boolean_array_na_handling() : is_na(0) should be false" << std::endl;
58            throw std::runtime_error("pd_test_boolean_array_na_handling failed: is_na(0) should be false");
59        }
60
61        if (!arr.has_na()) {
62            std::cout << "  [FAIL] : in pd_test_boolean_array_na_handling() : has_na() should be true" << std::endl;
63            throw std::runtime_error("pd_test_boolean_array_na_handling failed: has_na() should be true");
64        }
65
66        if (arr.count() != 2) {
67            std::cout << "  [FAIL] : in pd_test_boolean_array_na_handling() : count() should be 2" << std::endl;
68            throw std::runtime_error("pd_test_boolean_array_na_handling failed: count() should be 2");
69        }
70
71        std::cout << " -> tests passed" << std::endl;
72    }
73
74    void pd_test_boolean_array_kleene_and() {
75        std::cout << "========= BooleanArray: Kleene AND ======================= ";
max (pd_test_1_all.cpp:771)
761        pandas::CategoricalArray arr = pandas::CategoricalArray::from_codes(codes, cats, true);  // ordered
762
763        // Test min
764        std::optional<std::string> min_val = arr.min();
765        if (!min_val.has_value() || *min_val != "low") {
766            std::cout << "  [FAIL] : in pd_test_categorical_array_ordered_operations() : min != 'low'" << std::endl;
767            throw std::runtime_error("pd_test_categorical_array_ordered_operations failed: min != 'low'");
768        }
769
770        // Test max
771        std::optional<std::string> max_val = arr.max();
772        if (!max_val.has_value() || *max_val != "high") {
773            std::cout << "  [FAIL] : in pd_test_categorical_array_ordered_operations() : max != 'high'" << std::endl;
774            throw std::runtime_error("pd_test_categorical_array_ordered_operations failed: max != 'high'");
775        }
776
777        // Test unordered throws for min/max
778        pandas::CategoricalArray unordered = arr.as_unordered();
779        bool threw = false;
780        try {
781            unordered.min();
mean (pd_test_1_all.cpp:282)
272            std::optional<bool>(true),
273            std::optional<bool>(true)
274        });
275
276        auto s = arr.sum();
277        if (!s.has_value() || s.value() != 3) {
278            std::cout << "  [FAIL] : in pd_test_boolean_array_reductions() : sum should be 3" << std::endl;
279            throw std::runtime_error("pd_test_boolean_array_reductions failed: sum");
280        }
281
282        auto m = arr.mean();
283        if (!m.has_value() || std::abs(m.value() - 0.75) > 0.001) {
284            std::cout << "  [FAIL] : in pd_test_boolean_array_reductions() : mean should be 0.75" << std::endl;
285            throw std::runtime_error("pd_test_boolean_array_reductions failed: mean");
286        }
287
288        std::cout << " -> tests passed" << std::endl;
289    }
290
291    void pd_test_boolean_array_dtype() {
292        std::cout << "========= BooleanArray: dtype ======================= ";
min (pd_test_1_all.cpp:764)
754    }
755
756    void pd_test_categorical_array_ordered_operations() {
757        std::cout << "========= CategoricalArray: ordered operations (min/max) ======================= ";
758
759        std::vector<std::string> cats = {"low", "medium", "high"};
760        std::vector<numpy::int32> codes = {0, 2, 1, 0, -1};  // low, high, medium, low, NA
761        pandas::CategoricalArray arr = pandas::CategoricalArray::from_codes(codes, cats, true);  // ordered
762
763        // Test min
764        std::optional<std::string> min_val = arr.min();
765        if (!min_val.has_value() || *min_val != "low") {
766            std::cout << "  [FAIL] : in pd_test_categorical_array_ordered_operations() : min != 'low'" << std::endl;
767            throw std::runtime_error("pd_test_categorical_array_ordered_operations failed: min != 'low'");
768        }
769
770        // Test max
771        std::optional<std::string> max_val = arr.max();
772        if (!max_val.has_value() || *max_val != "high") {
773            std::cout << "  [FAIL] : in pd_test_categorical_array_ordered_operations() : max != 'high'" << std::endl;
774            throw std::runtime_error("pd_test_categorical_array_ordered_operations failed: max != 'high'");
std (pd_test_1_all.cpp:4526)
4516#include "../pandas/pd_series.h"
4517
4518namespace dataframe_tests {
4519    namespace dataframe_tests_aggregation {
4520
4521        void pd_test_aggregation_series_sem() {
4522            std::cout << "========= Series sem ============================";
4523
4524            pandas::Series<double> s({1.0, 2.0, 3.0, 4.0, 5.0});
4525            auto sem_val = s.sem();
4526            // std(ddof=1) = sqrt(2.5), sem = sqrt(2.5)/sqrt(5) ≈ 0.707
4527            bool passed = sem_val.has_value() && std::abs(*sem_val - 0.707) < 0.01;
4528            if (!passed) {
4529                std::cout << "  [FAIL] : in pd_test_aggregation_series_sem() : sem value incorrect" << std::endl;
4530                throw std::runtime_error("pd_test_aggregation_series_sem failed: sem value incorrect");
4531            }
4532
4533            std::cout << " -> tests passed" << std::endl;
4534        }
4535
4536        void pd_test_aggregation_series_quantile() {
sum (pd_test_1_all.cpp:276)
266        }
267
268        // Test sum/mean
269        pandas::BooleanArray arr({
270            std::optional<bool>(true),
271            std::optional<bool>(false),
272            std::optional<bool>(true),
273            std::optional<bool>(true)
274        });
275
276        auto s = arr.sum();
277        if (!s.has_value() || s.value() != 3) {
278            std::cout << "  [FAIL] : in pd_test_boolean_array_reductions() : sum should be 3" << std::endl;
279            throw std::runtime_error("pd_test_boolean_array_reductions failed: sum");
280        }
281
282        auto m = arr.mean();
283        if (!m.has_value() || std::abs(m.value() - 0.75) > 0.001) {
284            std::cout << "  [FAIL] : in pd_test_boolean_array_reductions() : mean should be 0.75" << std::endl;
285            throw std::runtime_error("pd_test_boolean_array_reductions failed: mean");
286        }
var (pd_test_1_all.cpp:20890)
20880                throw std::runtime_error("pd_test_expanding_std failed: expanding std values incorrect");
20881            }
20882
20883            std::cout << " -> tests passed" << std::endl;
20884        }
20885
20886        void pd_test_expanding_var() {
20887            std::cout << "========= Expanding var =========================";
20888
20889            pandas::Series<double> s({1.0, 2.0, 3.0, 4.0, 5.0});
20890            auto result = s.expanding().var();
20891
20892            // Expanding var (ddof=1): NaN, 0.5, 1.0, 1.6667, 2.5
20893            bool passed = std::isnan(result[0]) &&
20894                          std::abs(result[1] - 0.5) < 0.001 &&
20895                          std::abs(result[2] - 1.0) < 0.001 &&
20896                          std::abs(result[3] - 1.6667) < 0.001 &&
20897                          std::abs(result[4] - 2.5) < 0.001;
20898            if (!passed) {
20899                std::cout << "  [FAIL] : in pd_test_expanding_var() : expanding var values incorrect" << std::endl;
20900                throw std::runtime_error("pd_test_expanding_var failed: expanding var values incorrect");
len (pd_test_3_all.cpp:20867)
20857    auto title_result = s.str().title();
20858    if (title_result[0] != "Hello World" || title_result[1] != "Hello World" || title_result[2] != "Hello World") {
20859        std::cout << "  [FAIL] : title() failed" << std::endl;
20860        throw std::runtime_error("pd_test_str_capitalize_title: title() failed");
20861    }
20862
20863    std::cout << " -> tests passed" << std::endl;
20864}
20865
20866// ============================================================================
20867// Test str().len()
20868// ============================================================================
20869
20870void pd_test_str_len() {
20871    std::cout << "========= Series.str().len() ============================";
20872
20873    pandas::Series<std::string> s({"a", "bb", "ccc", ""});
20874
20875    auto lens = s.str().len();
20876    if (lens[0] != 1 || lens[1] != 2 || lens[2] != 3 || lens[3] != 0) {
20877        std::cout << "  [FAIL] : len() failed" << std::endl;
concat (pd_test_1_all.cpp:17717)
17707}
17708
17709void pd_test_period_index_concat() {
17710    std::cout << "========= concat factory ==============================";
17711
17712    std::vector<int64_t> ordinals1 = {0, 1};
17713    std::vector<int64_t> ordinals2 = {2, 3};
17714    pandas::PeriodIndex idx1(ordinals1, "D");
17715    pandas::PeriodIndex idx2(ordinals2, "D");
17716
17717    pandas::PeriodIndex concatenated = pandas::PeriodIndex::concat({idx1, idx2});
17718
17719    bool passed = (concatenated.size() == 4);
17720    if (!passed) {
17721        std::cout << "  [FAIL] : in pd_test_period_index_concat()" << std::endl;
17722        throw std::runtime_error("pd_test_period_index_concat failed");
17723    }
17724
17725    std::cout << " -> tests passed" << std::endl;
17726}
to_dense (pd_test_1_all.cpp:3272)
3262        std::cout << " -> tests passed" << std::endl;
3263    }
3264
3265    void pd_test_sparse_array_to_dense() {
3266        std::cout << "========= SparseArray: to_dense ======================= ";
3267
3268        std::vector<numpy::float64> data = {0.0, 1.0, 0.0, 2.0, 0.0};
3269        pandas::SparseArray<numpy::float64> arr(data, 0.0);
3270
3271        auto dense = arr.to_dense();
3272        if (dense.getSize() != 5) {
3273            std::cout << "  [FAIL] : in pd_test_sparse_array_to_dense() : dense size != 5" << std::endl;
3274            throw std::runtime_error("pd_test_sparse_array_to_dense failed: dense size != 5");
3275        }
3276
3277        if (dense.getElementAt({0}) != 0.0 ||
3278            dense.getElementAt({1}) != 1.0 ||
3279            dense.getElementAt({2}) != 0.0 ||
3280            dense.getElementAt({3}) != 2.0 ||
3281            dense.getElementAt({4}) != 0.0) {
to_string (pd_test_1_all.cpp:2693)
2683        pandas::PeriodArray arr_m(std::vector<std::string>{
2684            "2020-01",
2685            "NaT",
2686            "2025-06"
2687        }, "M");
2688
2689        // Year
2690        auto years = arr_m.year();
2691        auto y0 = years[0];
2692        if (!y0.has_value() || y0.value() != 2020) {
2693            std::cout << "  [FAIL] : year[0] should be 2020, got " << (y0.has_value() ? std::to_string(y0.value()) : "NA") << std::endl;
2694            throw std::runtime_error("pd_test_period_array_year_month_quarter failed: year[0]");
2695        }
2696
2697        auto y1 = years[1];
2698        if (y1.has_value()) {
2699            std::cout << "  [FAIL] : year[1] should be NA (NaT)" << std::endl;
2700            throw std::runtime_error("pd_test_period_array_year_month_quarter failed: year[1] should be NA");
2701        }
2702
2703        auto y2 = years[2];
copy (pd_test_1_all.cpp:5798)
5788// ============================================================================
5789// Copy/Rename Tests
5790// ============================================================================
5791
5792void pd_test_categorical_index_copy() {
5793    std::cout << "========= copy ========================================";
5794
5795    pandas::CategoricalArray arr({"a", "b", "c"});
5796    pandas::CategoricalIndex idx(arr, "original");
5797
5798    pandas::CategoricalIndex copied = idx.copy();
5799
5800    bool passed = (copied.size() == idx.size() && copied.name() == idx.name() &&
5801                   copied.categories() == idx.categories() && copied.ordered() == idx.ordered());
5802    if (!passed) {
5803        std::cout << "  [FAIL] : in pd_test_categorical_index_copy()" << std::endl;
5804        throw std::runtime_error("pd_test_categorical_index_copy failed");
5805    }
5806
5807    std::cout << " -> tests passed" << std::endl;
5808}
is_fill (pd_test_1_all.cpp:3314)
3304            throw std::runtime_error("pd_test_sparse_array_element_access failed: arr[3] != 10.0");
3305        }
3306
3307        // Access fill values
3308        if (arr[0] != 0.0) {
3309            std::cout << "  [FAIL] : in pd_test_sparse_array_element_access() : arr[0] != fill_value" << std::endl;
3310            throw std::runtime_error("pd_test_sparse_array_element_access failed: arr[0] != fill_value");
3311        }
3312
3313        // Test is_fill
3314        if (!arr.is_fill(0)) {
3315            std::cout << "  [FAIL] : in pd_test_sparse_array_element_access() : is_fill(0) should be true" << std::endl;
3316            throw std::runtime_error("pd_test_sparse_array_element_access failed: is_fill(0) should be true");
3317        }
3318
3319        if (arr.is_fill(1)) {
3320            std::cout << "  [FAIL] : in pd_test_sparse_array_element_access() : is_fill(1) should be false" << std::endl;
3321            throw std::runtime_error("pd_test_sparse_array_element_access failed: is_fill(1) should be false");
3322        }
3323
3324        std::cout << " -> tests passed" << std::endl;
all (pd_test_1_all.cpp:247)
237        pandas::BooleanArray has_true({
238            std::optional<bool>(false),
239            std::optional<bool>(true)
240        });
241        any_result = has_true.any();
242        if (!any_result.has_value() || !any_result.value()) {
243            std::cout << "  [FAIL] : in pd_test_boolean_array_reductions() : any() with True" << std::endl;
244            throw std::runtime_error("pd_test_boolean_array_reductions failed: any() with True");
245        }
246
247        // Test all()
248        pandas::BooleanArray all_true({
249            std::optional<bool>(true),
250            std::optional<bool>(true)
251        });
252        auto all_result = all_true.all();
253        if (!all_result.has_value() || !all_result.value()) {
254            std::cout << "  [FAIL] : in pd_test_boolean_array_reductions() : all() of all True" << std::endl;
255            throw std::runtime_error("pd_test_boolean_array_reductions failed: all() all True");
256        }
any (pd_test_1_all.cpp:226)
216            std::cout << "  [FAIL] : in pd_test_boolean_array_kleene_not() : ~NA should be NA" << std::endl;
217            throw std::runtime_error("pd_test_boolean_array_kleene_not failed: ~NA");
218        }
219
220        std::cout << " -> tests passed" << std::endl;
221    }
222
223    void pd_test_boolean_array_reductions() {
224        std::cout << "========= BooleanArray: reductions ======================= ";
225
226        // Test any()
227        pandas::BooleanArray all_false({
228            std::optional<bool>(false),
229            std::optional<bool>(false)
230        });
231        auto any_result = all_false.any();
232        if (!any_result.has_value() || any_result.value()) {
233            std::cout << "  [FAIL] : in pd_test_boolean_array_reductions() : any() of all False" << std::endl;
234            throw std::runtime_error("pd_test_boolean_array_reductions failed: any() all False");
235        }
density (pd_test_1_all.cpp:3247)
3237            std::cout << "  [FAIL] : in pd_test_sparse_array_fill_value_property() : default float fill_value should be NaN" << std::endl;
3238            throw std::runtime_error("pd_test_sparse_array_fill_value_property failed: default float fill_value should be NaN");
3239        }
3240
3241        std::cout << " -> tests passed" << std::endl;
3242    }
3243
3244    void pd_test_sparse_array_density() {
3245        std::cout << "========= SparseArray: density ======================= ";
3246
3247        // 20% density (2 non-fill out of 10)
3248        std::vector<numpy::float64> data = {0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0};
3249        pandas::SparseArray<numpy::float64> arr(data, 0.0);
3250
3251        double density = arr.density();
3252        if (std::abs(density - 0.2) > 0.001) {
3253            std::cout << "  [FAIL] : in pd_test_sparse_array_density() : density != 0.2, got " << density << std::endl;
3254            throw std::runtime_error("pd_test_sparse_array_density failed: density != 0.2");
3255        }
3256
3257        double sparsity = arr.sparsity();
dtype (pd_test_1_all.cpp:295)
285            throw std::runtime_error("pd_test_boolean_array_reductions failed: mean");
286        }
287
288        std::cout << " -> tests passed" << std::endl;
289    }
290
291    void pd_test_boolean_array_dtype() {
292        std::cout << "========= BooleanArray: dtype ======================= ";
293
294        pandas::BooleanArray arr;
295        if (arr.dtype().name() != "boolean") {
296            std::cout << "  [FAIL] : in pd_test_boolean_array_dtype() : dtype name should be 'boolean'" << std::endl;
297            throw std::runtime_error("pd_test_boolean_array_dtype failed: dtype name");
298        }
299
300        if (arr.dtype().kind() != "b") {
301            std::cout << "  [FAIL] : in pd_test_boolean_array_dtype() : dtype kind should be 'b'" << std::endl;
302            throw std::runtime_error("pd_test_boolean_array_dtype failed: dtype kind");
303        }
304
305        std::cout << " -> tests passed" << std::endl;
empty (pd_test_1_all.cpp:941)
931#include "../pandas/pd_config.h"
932
933namespace dataframe_tests {
934
935namespace dataframe_tests_config {
936
937    void pd_test_config_version() {
938        std::cout << "========= df_config: version info ======================= ";
939        const char* version = pandas::DataFrameInfo::version();
940        if (version == nullptr || std::string(version).empty()) {
941            std::cout << "[FAIL] : in pd_test_config_version() : version is null or empty" << std::endl;
942            throw std::runtime_error("pd_test_config_version failed: version is null or empty");
943        }
944        std::cout << "-> tests passed" << std::endl;
945    }
946
947    void pd_test_config_na_repr() {
948        std::cout << "========= df_config: NA representation ======================= ";
949        const char* na_repr = pandas::DataFrameConfig::get_na_repr();
950        if (na_repr == nullptr) {
fill_value (pd_test_1_all.cpp:3229)
3219        std::cout << " -> tests passed" << std::endl;
3220    }
3221
3222    void pd_test_sparse_array_fill_value_property() {
3223        std::cout << "========= SparseArray: fill_value property ======================= ";
3224
3225        std::vector<numpy::int64> data = {-1, 5, -1, 10, -1};
3226        pandas::SparseArray<numpy::int64> arr(data, static_cast<numpy::int64>(-1));
3227
3228        if (arr.fill_value() != -1) {
3229            std::cout << "  [FAIL] : in pd_test_sparse_array_fill_value_property() : fill_value != -1" << std::endl;
3230            throw std::runtime_error("pd_test_sparse_array_fill_value_property failed: fill_value != -1");
3231        }
3232
3233        // Test default fill_value for float (NaN)
3234        pandas::SparseArray<numpy::float64> arr_float;
3235        if (!std::isnan(arr_float.fill_value())) {
3236            std::cout << "  [FAIL] : in pd_test_sparse_array_fill_value_property() : default float fill_value should be NaN" << std::endl;
3237            throw std::runtime_error("pd_test_sparse_array_fill_value_property failed: default float fill_value should be NaN");
3238        }
nbytes (pd_test_1_all.cpp:6214)
6204            }
6205
6206            // Test empty DataFrame
6207            pandas::DataFrame empty_df;
6208            if (!empty_df.empty()) {
6209                std::cout << "  [FAIL] : in pd_test_dataframe_properties() : should be empty" << std::endl;
6210                throw std::runtime_error("pd_test_dataframe_properties failed: should be empty");
6211            }
6212
6213            // Test nbytes > 0 for non-empty
6214            if (df.nbytes() == 0) {
6215                std::cout << "  [FAIL] : in pd_test_dataframe_properties() : nbytes should be > 0" << std::endl;
6216                throw std::runtime_error("pd_test_dataframe_properties failed: nbytes should be > 0");
6217            }
6218
6219            // Test columns index
6220            if (df.columns().size() != 3) {
6221                std::cout << "  [FAIL] : in pd_test_dataframe_properties() : columns size != 3" << std::endl;
6222                throw std::runtime_error("pd_test_dataframe_properties failed: columns size != 3");
6223            }
ndim (pd_test_1_all.cpp:6195)
6185            pandas::DataFrame df(data);
6186
6187            // Test shape
6188            auto shape = df.shape();
6189            if (shape.size() != 2 || shape[0] != 4 || shape[1] != 3) {
6190                std::cout << "  [FAIL] : in pd_test_dataframe_properties() : shape mismatch" << std::endl;
6191                throw std::runtime_error("pd_test_dataframe_properties failed: shape mismatch");
6192            }
6193
6194            // Test ndim
6195            if (df.ndim() != 2) {
6196                std::cout << "  [FAIL] : in pd_test_dataframe_properties() : ndim != 2" << std::endl;
6197                throw std::runtime_error("pd_test_dataframe_properties failed: ndim != 2");
6198            }
6199
6200            // Test empty
6201            if (df.empty()) {
6202                std::cout << "  [FAIL] : in pd_test_dataframe_properties() : should not be empty" << std::endl;
6203                throw std::runtime_error("pd_test_dataframe_properties failed: should not be empty");
6204            }
npoints (pd_test_1_all.cpp:3171)
3161            dense.setElementAt({i}, (i == 3 || i == 7) ? 5.0 : 0.0);
3162        }
3163
3164        auto sparse = pandas::SparseArray<numpy::float64>::from_dense(dense, 0.0);
3165
3166        if (sparse.size() != 10) {
3167            std::cout << "  [FAIL] : in pd_test_sparse_array_from_dense() : size != 10" << std::endl;
3168            throw std::runtime_error("pd_test_sparse_array_from_dense failed: size != 10");
3169        }
3170
3171        if (sparse.npoints() != 2) {
3172            std::cout << "  [FAIL] : in pd_test_sparse_array_from_dense() : npoints != 2" << std::endl;
3173            throw std::runtime_error("pd_test_sparse_array_from_dense failed: npoints != 2");
3174        }
3175
3176        std::cout << " -> tests passed" << std::endl;
3177    }
3178
3179    void pd_test_sparse_array_sp_values_property() {
3180        std::cout << "========= SparseArray: sp_values property ======================= ";
repr (pd_test_1_all.cpp:10906)
10896    std::cout << " -> tests passed" << std::endl;
10897}
10898
10899void pd_test_extension_index_repr() {
10900    std::cout << "========= repr =========================";
10901
10902    pandas::CategoricalArray arr({"a", "b", "c"});
10903    // Use ExtensionIndex<CategoricalArray> directly to test base class repr
10904    pandas::ExtensionIndex<pandas::CategoricalArray> idx(arr, "test");
10905
10906    std::string repr_str = idx.repr();
10907
10908    bool passed = (!repr_str.empty() && repr_str.find("ExtensionIndex") != std::string::npos);
10909    if (!passed) {
10910        std::cout << "  [FAIL] : in pd_test_extension_index_repr() : repr check failed" << std::endl;
10911        throw std::runtime_error("pd_test_extension_index_repr failed");
10912    }
10913
10914    std::cout << " -> tests passed" << std::endl;
10915}
shape (pd_test_1_all.cpp:6188)
6178            std::cout << "========= properties =======================";
6179
6180            std::map<std::string, std::vector<numpy::float64>> data;
6181            data["A"] = {1.0, 2.0, 3.0, 4.0};
6182            data["B"] = {5.0, 6.0, 7.0, 8.0};
6183            data["C"] = {9.0, 10.0, 11.0, 12.0};
6184
6185            pandas::DataFrame df(data);
6186
6187            // Test shape
6188            auto shape = df.shape();
6189            if (shape.size() != 2 || shape[0] != 4 || shape[1] != 3) {
6190                std::cout << "  [FAIL] : in pd_test_dataframe_properties() : shape mismatch" << std::endl;
6191                throw std::runtime_error("pd_test_dataframe_properties failed: shape mismatch");
6192            }
6193
6194            // Test ndim
6195            if (df.ndim() != 2) {
6196                std::cout << "  [FAIL] : in pd_test_dataframe_properties() : ndim != 2" << std::endl;
6197                throw std::runtime_error("pd_test_dataframe_properties failed: ndim != 2");
6198            }
size (pd_test_1_all.cpp:22)
12#include "../pandas/pd_boolean_array.h"
13
14namespace dataframe_tests {
15
16namespace dataframe_tests_boolean_array {
17    void pd_test_boolean_array_constructors() {
18        std::cout << "========= BooleanArray: constructors ======================= ";
19
20        // Default constructor
21        pandas::BooleanArray arr1;
22        if (arr1.size() != 0) {
23            std::cout << "  [FAIL] : in pd_test_boolean_array_constructors() : default constructor size != 0" << std::endl;
24            throw std::runtime_error("pd_test_boolean_array_constructors failed: default constructor size != 0");
25        }
26
27        // Initializer list constructor
28        pandas::BooleanArray arr2({
29            std::optional<bool>(true),
30            std::optional<bool>(false),
31            std::nullopt,
32            std::optional<bool>(true)
sp_index (pd_test_1_all.cpp:3207)
3197        std::cout << " -> tests passed" << std::endl;
3198    }
3199
3200    void pd_test_sparse_array_sp_index_property() {
3201        std::cout << "========= SparseArray: sp_index property ======================= ";
3202
3203        std::vector<numpy::float64> data = {0.0, 1.0, 0.0, 2.0, 0.0, 3.0};
3204        pandas::SparseArray<numpy::float64> arr(data, 0.0);
3205
3206        const auto& sp_idx = arr.sp_index();
3207        if (sp_idx.getSize() != 3) {
3208            std::cout << "  [FAIL] : in pd_test_sparse_array_sp_index_property() : sp_index size != 3" << std::endl;
3209            throw std::runtime_error("pd_test_sparse_array_sp_index_property failed: sp_index size != 3");
3210        }
3211
3212        if (sp_idx.getElementAt({0}) != 1 ||
3213            sp_idx.getElementAt({1}) != 3 ||
3214            sp_idx.getElementAt({2}) != 5) {
3215            std::cout << "  [FAIL] : in pd_test_sparse_array_sp_index_property() : sp_index content mismatch" << std::endl;
3216            throw std::runtime_error("pd_test_sparse_array_sp_index_property failed: sp_index content mismatch");
sp_values (pd_test_1_all.cpp:3185)
3175        std::cout << " -> tests passed" << std::endl;
3176    }
3177
3178    void pd_test_sparse_array_sp_values_property() {
3179        std::cout << "========= SparseArray: sp_values property ======================= ";
3180
3181        std::vector<numpy::float64> data = {0.0, 1.0, 0.0, 2.0, 0.0, 3.0};
3182        pandas::SparseArray<numpy::float64> arr(data, 0.0);
3183
3184        const auto& sp_vals = arr.sp_values();
3185        if (sp_vals.getSize() != 3) {
3186            std::cout << "  [FAIL] : in pd_test_sparse_array_sp_values_property() : sp_values size != 3" << std::endl;
3187            throw std::runtime_error("pd_test_sparse_array_sp_values_property failed: sp_values size != 3");
3188        }
3189
3190        if (sp_vals.getElementAt({0}) != 1.0 ||
3191            sp_vals.getElementAt({1}) != 2.0 ||
3192            sp_vals.getElementAt({2}) != 3.0) {
3193            std::cout << "  [FAIL] : in pd_test_sparse_array_sp_values_property() : sp_values content mismatch" << std::endl;
3194            throw std::runtime_error("pd_test_sparse_array_sp_values_property failed: sp_values content mismatch");
sparsity (pd_test_1_all.cpp:3257)
3247        // 20% density (2 non-fill out of 10)
3248        std::vector<numpy::float64> data = {0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0};
3249        pandas::SparseArray<numpy::float64> arr(data, 0.0);
3250
3251        double density = arr.density();
3252        if (std::abs(density - 0.2) > 0.001) {
3253            std::cout << "  [FAIL] : in pd_test_sparse_array_density() : density != 0.2, got " << density << std::endl;
3254            throw std::runtime_error("pd_test_sparse_array_density failed: density != 0.2");
3255        }
3256
3257        double sparsity = arr.sparsity();
3258        if (std::abs(sparsity - 0.8) > 0.001) {
3259            std::cout << "  [FAIL] : in pd_test_sparse_array_density() : sparsity != 0.8, got " << sparsity << std::endl;
3260            throw std::runtime_error("pd_test_sparse_array_density failed: sparsity != 0.8");
3261        }
3262
3263        std::cout << " -> tests passed" << std::endl;
3264    }
3265
3266    void pd_test_sparse_array_to_dense() {
3267        std::cout << "========= SparseArray: to_dense ======================= ";