CategoricalArray#

class pandas::CategoricalArray#

Extension array type for specialized data storage.

Example#

#include <pandas/pandas.h>
using namespace pandas;

// Use CategoricalArray
CategoricalArray obj;
// ... operations ...

Constructors#

Signature

Location

Example

CategoricalArray(const numpy::NDArray<numpy::int32>& codes, const std::vector<std::string>& categories, bool ordered = false, bool copy = true)

pd_categorical_array.h:103

View

CategoricalArray(const std::vector<std::optional<std::string>>& values, const std::optional<std::vector<std::string>>& categories = std::nullopt, std::optional<bool> ordered = std::nullopt, const std::optional<CategoricalDtype>& dtype = std::nullopt, bool fastpath = false, bool copy = true)

pd_categorical_array.h:126

View

explicit CategoricalArray(const std::vector<std::optional<std::string>>& values, bool ordered)

pd_categorical_array.h:233

View

CategoricalArray(const std::vector<std::optional<std::string>>& values, const std::vector<std::string>& categories, bool ordered = false)

pd_categorical_array.h:251

View

Construction#

Signature

Return Type

Location

Example

static CategoricalArray from_codes( const std::vector<numpy::int32>& codes, const std::vector<std::string>& categories, bool ordered = false, const std::string& dtype = "", bool validate = true)

static CategoricalArray

pd_categorical_array.h:275

View

static CategoricalArray from_sequence( const std::vector<std::optional<std::string>>& values, bool ordered = false)

static CategoricalArray

pd_categorical_array.h:304

Indexing / Selection#

Signature

Return Type

Location

Example

numpy::int32 get_code(ssize_t index) const

numpy::int32

pd_categorical_array.h:519

CategoricalArray take(const std::vector<numpy::int64>& indices, std::optional<int> axis = std::nullopt, bool allow_fill = false, const std::string& fill_value = "") const

CategoricalArray

pd_categorical_array.h:1305

View

Data Manipulation#

Signature

Return Type

Location

Example

CategoricalArray dropna() const

CategoricalArray

pd_categorical_array.h:595

View

CategoricalArray insert(size_t loc, const std::optional<std::string>& item) const

CategoricalArray

pd_categorical_array.h:1594

View

CategoricalArray rename_categories(const std::vector<std::string>& new_categories) const

CategoricalArray

pd_categorical_array.h:740

View

CategoricalArray rename_categories(const std::unordered_map<std::string, std::string>& mapping) const

CategoricalArray

pd_categorical_array.h:760

View

Missing Data#

Signature

Return Type

Location

Example

CategoricalArray fillna(const std::string& value, const std::string& method = "", std::optional<size_t> limit = std::nullopt, bool copy = true) const

CategoricalArray

pd_categorical_array.h:566

View

CategoricalArray interpolate( const std::string& method = "pad", int axis = 0, const std::optional<std::vector<size_t>>& index = std::nullopt, std::optional<size_t> limit = std::nullopt, const std::string& limit_direction = "forward", const std::optional<std::string>& limit_area = std::nullopt, bool copy = true ) const

CategoricalArray

pd_categorical_array.h:1643

View

numpy::NDArray<numpy::bool_> isna() const

numpy::NDArray<numpy::bool_>

pd_categorical_array.h:539

View

numpy::NDArray<numpy::bool_> isnull() const

numpy::NDArray<numpy::bool_>

pd_categorical_array.h:1798

View

numpy::NDArray<numpy::bool_> notna() const

numpy::NDArray<numpy::bool_>

pd_categorical_array.h:550

View

numpy::NDArray<numpy::bool_> notnull() const

numpy::NDArray<numpy::bool_>

pd_categorical_array.h:1884

View

Statistics#

Signature

Return Type

Location

Example

size_t count() const

size_t

pd_categorical_array.h:609

View

std::map<std::string, std::string> describe() const

std::map<std::string, std::string>

pd_categorical_array.h:1480

View

std::optional<std::string> max(bool skipna = true) const

std::optional<std::string>

pd_categorical_array.h:918

View

std::optional<std::string> min(bool skipna = true) const

std::optional<std::string>

pd_categorical_array.h:884

View

std::pair<std::vector<std::string>, std::vector<numpy::int64>> value_counts() const

std::pair<std::vector<std::string>, std::vector<numpy::int64>>

pd_categorical_array.h:999

View

Aggregation#

Signature

Return Type

Location

Example

CategoricalArray map(const std::unordered_map<std::string, std::string>& mapper, const std::string& na_action = "") const

CategoricalArray

pd_categorical_array.h:1808

View

CategoricalArray map(Func func, const std::string& na_action = "") const

CategoricalArray

pd_categorical_array.h:1852

View

Arithmetic#

Signature

Return Type

Location

Example

CategoricalArray add_categories(const std::vector<std::string>& new_categories) const

CategoricalArray

pd_categorical_array.h:638

View

Comparison#

Signature

Return Type

Location

Example

bool equals(const CategoricalArray& other) const

bool

pd_categorical_array.h:1575

View

size_t len() const

size_t

pd_categorical_array.h:447

View

Sorting#

Signature

Return Type

Location

Example

numpy::NDArray<numpy::int64> argsort(bool ascending = true, const std::string& na_position = "last", const std::string& kind = "quicksort") const

numpy::NDArray<numpy::int64>

pd_categorical_array.h:1180

View

size_t searchsorted(const std::string& value, const std::string& side = "left", std::optional<numpy::NDArray<numpy::int64>> sorter = std::nullopt) const

size_t

pd_categorical_array.h:1970

View

CategoricalArray sort_values(bool ascending = true, const std::string& na_position = "last", bool inplace = false) const

CategoricalArray

pd_categorical_array.h:2067

View

Reshaping#

Signature

Return Type

Location

Example

CategoricalArray T() const

CategoricalArray

pd_categorical_array.h:2162

View

CategoricalArray swapaxes(int axis1, int axis2) const

CategoricalArray

pd_categorical_array.h:2094

View

CategoricalArray transpose() const

CategoricalArray

pd_categorical_array.h:2155

View

Combining#

Signature

Return Type

Location

Example

static CategoricalArray concat(const std::vector<CategoricalArray>& arrays)

static CategoricalArray

pd_categorical_array.h:314

View

static CategoricalArray concat_merge(const std::vector<CategoricalArray>& arrays)

static CategoricalArray

pd_categorical_array.h:351

View

Time Series#

Signature

Return Type

Location

Example

CategoricalArray shift(int64_t periods = 1, const std::optional<std::string>& fill_value = std::nullopt) const

CategoricalArray

pd_categorical_array.h:2031

View

I/O#

Signature

Return Type

Location

Example

std::vector<std::optional<std::string>> to_list() const

std::vector<std::optional<std::string>>

pd_categorical_array.h:2105

View

numpy::NDArray<U> to_numpy(bool copy = true, U na_value = U{-1}) const

numpy::NDArray<U>

pd_categorical_array.h:2136

View

std::string to_string() const

std::string

pd_categorical_array.h:2212

View

std::vector<std::optional<std::string>> tolist() const

std::vector<std::optional<std::string>>

pd_categorical_array.h:2124

View

Conversion#

Signature

Return Type

Location

Example

std::vector<std::optional<T>> astype(const std::string& dtype, bool copy = true) const

std::vector<std::optional<T>>

pd_categorical_array.h:1365

View

numpy::NDArray<numpy::int32> astype_codes() const

numpy::NDArray<numpy::int32>

pd_categorical_array.h:1418

View

CategoricalArray copy() const

CategoricalArray

pd_categorical_array.h:1298

View

CategoricalArray view() const

CategoricalArray

pd_categorical_array.h:2174

View

Set Operations#

Signature

Return Type

Location

Example

numpy::NDArray<numpy::bool_> duplicated(const std::string& keep = "first") const

numpy::NDArray<numpy::bool_>

pd_categorical_array.h:1517

View

numpy::NDArray<numpy::bool_> isin(const std::vector<std::string>& values) const

numpy::NDArray<numpy::bool_>

pd_categorical_array.h:1770

View

CategoricalArray unique() const

CategoricalArray

pd_categorical_array.h:951

View

Type Checking#

Signature

Return Type

Location

Example

bool is_na(ssize_t index) const

bool

pd_categorical_array.h:527

View

Other Methods#

Signature

Return Type

Location

Example

std::optional<size_t> argmax(std::optional<int> axis = std::nullopt, bool skipna = true) const

std::optional<size_t>

pd_categorical_array.h:1258

View

std::optional<size_t> argmin(std::optional<int> axis = std::nullopt, bool skipna = true) const

std::optional<size_t>

pd_categorical_array.h:1220

View

CategoricalArray as_ordered() const

CategoricalArray

pd_categorical_array.h:865

View

CategoricalArray as_unordered() const

CategoricalArray

pd_categorical_array.h:872

View

std::unordered_map<std::string, numpy::int32> build_category_map() const

std::unordered_map<std::string, numpy::int32>

pd_categorical_array.h:59

const std::vector<std::string>& categories() const

const std::vector<std::string>&

pd_categorical_array.h:473

View

const std::string& categories_dtype_str() const

const std::string&

pd_categorical_array.h:492

void check_for_ordered(const std::string& op) const

void

pd_categorical_array.h:1427

View

check_for_ordered("searchsorted")

pd_categorical_array.h:1974

View

check_for_ordered("sort_values")

pd_categorical_array.h:2071

View

const numpy::NDArray<numpy::int32>& codes() const

const numpy::NDArray<numpy::int32>&

pd_categorical_array.h:454

View

std::vector<numpy::int32> codes_vector() const

std::vector<numpy::int32>

pd_categorical_array.h:461

CategoricalArray delete_(size_t loc, std::optional<int> axis = std::nullopt) const

CategoricalArray

pd_categorical_array.h:1438

View

CategoricalArray delete_(const std::vector<size_t>& locs) const

CategoricalArray

pd_categorical_array.h:1461

View

CategoricalDtype dtype() const

CategoricalDtype

pd_categorical_array.h:401

View

bool empty() const

bool

pd_categorical_array.h:440

View

std::pair<IntegerArray<numpy::int64>, CategoricalArray> factorize() const

std::pair<IntegerArray<numpy::int64>, CategoricalArray>

pd_categorical_array.h:969

View

bool has_na() const

bool

pd_categorical_array.h:622

View

std::vector<std::string> internal_get_values() const

std::vector<std::string>

pd_categorical_array.h:2198

size_t memory_usage(bool deep = false) const

size_t

pd_categorical_array.h:1865

View

const std::optional<std::string>& name() const

const std::optional<std::string>&

pd_categorical_array.h:484

View

size_t nbytes() const

size_t

pd_categorical_array.h:415

View

constexpr int ndim() const

constexpr int

pd_categorical_array.h:426

View

bool ordered() const

bool

pd_categorical_array.h:480

View

CategoricalArray ravel() const

CategoricalArray

pd_categorical_array.h:1892

View

CategoricalArray remove_categories(const std::vector<std::string>& removals) const

CategoricalArray

pd_categorical_array.h:657

View

CategoricalArray remove_unused_categories() const

CategoricalArray

pd_categorical_array.h:823

View

CategoricalArray reorder_categories(const std::vector<std::string>& new_categories, std::optional<bool> ordered = std::nullopt) const

CategoricalArray

pd_categorical_array.h:783

View

CategoricalArray repeat(size_t repeats, std::optional<int> axis = std::nullopt) const

CategoricalArray

pd_categorical_array.h:1901

View

CategoricalArray repeat(const std::vector<size_t>& repeats) const

CategoricalArray

pd_categorical_array.h:1921

View

std::string repr() const

std::string

pd_categorical_array.h:2238

View

CategoricalArray reshape(const std::vector<size_t>& new_shape) const

CategoricalArray

pd_categorical_array.h:1950

View

CategoricalArray set_categories(const std::vector<std::string>& new_categories, std::optional<bool> ordered = std::nullopt, bool rename = false) const

CategoricalArray

pd_categorical_array.h:700

View

void set_categories_dtype(const std::string& dtype)

void

pd_categorical_array.h:496

void set_name(const std::string& name)

void

pd_categorical_array.h:488

View

CategoricalArray set_ordered(bool value) const

CategoricalArray

pd_categorical_array.h:2021

View

std::vector<size_t> shape() const

std::vector<size_t>

pd_categorical_array.h:433

View

size_t size() const

size_t

pd_categorical_array.h:408

View

CategoricalArray slice(size_t start, size_t stop, size_t step = 1) const

CategoricalArray

pd_categorical_array.h:1342

View

void validate_codes() const

void

pd_categorical_array.h:70

Internal Methods#

1 internal methods (prefixed with underscore)

Code Examples#

The following examples are extracted from the test suite.

CategoricalArray (pd_test_3_all.cpp:28514)
28504static int cgo_check(bool cond, const char* msg) {
28505    if (!cond) { std::cout << "    FAIL: " << msg << std::endl; return 1; }
28506    return 0;
28507}
28508
28509static pandas::CategoricalArray make_abc() {
28510    std::vector<std::optional<std::string>> v{
28511        std::string("a"), std::string("b"), std::string("c"), std::string("a")
28512    };
28513    return pandas::CategoricalArray(v, false);
28514}
28515
28516void pd_test_cat_rename_dict() {
28517    std::cout << "  -- pd_test_cat_rename_dict --" << std::endl;
28518    int fail = 0;
28519    auto arr = make_abc();
28520    std::unordered_map<std::string, std::string> m{{"a", "A"}, {"b", "B"}};
28521    auto r = arr.rename_categories(m);
28522    const auto& cats = r.categories();
28523    fail += cgo_check(cats.size() == 3, "size==3");
CategoricalArray (pd_test_3_all.cpp:28514)
28504static int cgo_check(bool cond, const char* msg) {
28505    if (!cond) { std::cout << "    FAIL: " << msg << std::endl; return 1; }
28506    return 0;
28507}
28508
28509static pandas::CategoricalArray make_abc() {
28510    std::vector<std::optional<std::string>> v{
28511        std::string("a"), std::string("b"), std::string("c"), std::string("a")
28512    };
28513    return pandas::CategoricalArray(v, false);
28514}
28515
28516void pd_test_cat_rename_dict() {
28517    std::cout << "  -- pd_test_cat_rename_dict --" << std::endl;
28518    int fail = 0;
28519    auto arr = make_abc();
28520    std::unordered_map<std::string, std::string> m{{"a", "A"}, {"b", "B"}};
28521    auto r = arr.rename_categories(m);
28522    const auto& cats = r.categories();
28523    fail += cgo_check(cats.size() == 3, "size==3");
CategoricalArray (pd_test_3_all.cpp:28514)
28504static int cgo_check(bool cond, const char* msg) {
28505    if (!cond) { std::cout << "    FAIL: " << msg << std::endl; return 1; }
28506    return 0;
28507}
28508
28509static pandas::CategoricalArray make_abc() {
28510    std::vector<std::optional<std::string>> v{
28511        std::string("a"), std::string("b"), std::string("c"), std::string("a")
28512    };
28513    return pandas::CategoricalArray(v, false);
28514}
28515
28516void pd_test_cat_rename_dict() {
28517    std::cout << "  -- pd_test_cat_rename_dict --" << std::endl;
28518    int fail = 0;
28519    auto arr = make_abc();
28520    std::unordered_map<std::string, std::string> m{{"a", "A"}, {"b", "B"}};
28521    auto r = arr.rename_categories(m);
28522    const auto& cats = r.categories();
28523    fail += cgo_check(cats.size() == 3, "size==3");
CategoricalArray (pd_test_3_all.cpp:28514)
28504static int cgo_check(bool cond, const char* msg) {
28505    if (!cond) { std::cout << "    FAIL: " << msg << std::endl; return 1; }
28506    return 0;
28507}
28508
28509static pandas::CategoricalArray make_abc() {
28510    std::vector<std::optional<std::string>> v{
28511        std::string("a"), std::string("b"), std::string("c"), std::string("a")
28512    };
28513    return pandas::CategoricalArray(v, false);
28514}
28515
28516void pd_test_cat_rename_dict() {
28517    std::cout << "  -- pd_test_cat_rename_dict --" << std::endl;
28518    int fail = 0;
28519    auto arr = make_abc();
28520    std::unordered_map<std::string, std::string> m{{"a", "A"}, {"b", "B"}};
28521    auto r = arr.rename_categories(m);
28522    const auto& cats = r.categories();
28523    fail += cgo_check(cats.size() == 3, "size==3");
from_codes (pd_test_1_all.cpp:403)
393        std::cout << " -> tests passed" << std::endl;
394    }
395
396    void pd_test_categorical_array_from_codes() {
397        std::cout << "========= CategoricalArray: from_codes ======================= ";
398
399        std::vector<std::string> cats = {"a", "b", "c"};
400        std::vector<numpy::int32> codes = {0, 1, 2, 0, 1, -1};  // -1 is NA
401
402        pandas::CategoricalArray arr = pandas::CategoricalArray::from_codes(codes, cats, false);
403
404        if (arr.size() != 6) {
405            std::cout << "  [FAIL] : in pd_test_categorical_array_from_codes() : size != 6" << std::endl;
406            throw std::runtime_error("pd_test_categorical_array_from_codes failed: size != 6");
407        }
408
409        // Check that code=-1 creates NA
410        if (!arr.is_na(5)) {
411            std::cout << "  [FAIL] : in pd_test_categorical_array_from_codes() : code -1 should be NA" << std::endl;
412            throw std::runtime_error("pd_test_categorical_array_from_codes failed: code -1 should be NA");
take (pd_test_1_all.cpp:5903)
5893// Inherited Operations Tests
5894// ============================================================================
5895
5896void pd_test_categorical_index_take() {
5897    std::cout << "========= inherited take ==============================";
5898
5899    pandas::CategoricalArray arr({"a", "b", "c", "d"});
5900    pandas::CategoricalIndex idx(arr);
5901
5902    std::vector<size_t> indices = {0, 2, 3};
5903    pandas::ExtensionIndex<pandas::CategoricalArray> taken = idx.take(indices);
5904
5905    bool passed = (taken.size() == 3);
5906    if (!passed) {
5907        std::cout << "  [FAIL] : in pd_test_categorical_index_take()" << std::endl;
5908        throw std::runtime_error("pd_test_categorical_index_take failed");
5909    }
5910
5911    std::cout << " -> tests passed" << std::endl;
5912}
dropna (pd_test_1_all.cpp:531)
521        }
522
523        // Test isna array
524        numpy::NDArray<numpy::bool_> na_mask = arr.isna();
525        if (na_mask.getSize() != 4) {
526            std::cout << "  [FAIL] : in pd_test_categorical_array_na_handling() : isna size != 4" << std::endl;
527            throw std::runtime_error("pd_test_categorical_array_na_handling failed: isna size != 4");
528        }
529
530        // Test dropna
531        pandas::CategoricalArray dropped = arr.dropna();
532        if (dropped.size() != 2) {
533            std::cout << "  [FAIL] : in pd_test_categorical_array_na_handling() : dropna size != 2" << std::endl;
534            throw std::runtime_error("pd_test_categorical_array_na_handling failed: dropna size != 2");
535        }
536
537        // Test fillna (fill with existing category)
538        pandas::CategoricalArray filled = arr.fillna("a");  // 'a' is in categories
539        if (filled.has_na()) {
540            std::cout << "  [FAIL] : in pd_test_categorical_array_na_handling() : fillna should have no NA" << std::endl;
541            throw std::runtime_error("pd_test_categorical_array_na_handling failed: fillna should have no NA");
insert (pd_test_1_all.cpp:12028)
12018            }
12019
12020            std::cout << " -> tests passed" << std::endl;
12021        }
12022
12023        void pd_test_index_insert_delete() {
12024            std::cout << "========= insert and delete ===========================";
12025
12026            pandas::Index<numpy::int64> idx{1, 2, 4, 5};
12027
12028            auto inserted = idx.insert(2, 3);
12029            bool passed = (inserted.size() == 5);
12030            passed = passed && (inserted[2] == 3);
12031
12032            auto deleted = inserted.delete_(2);
12033            passed = passed && (deleted.size() == 4);
12034            passed = passed && deleted.equals(idx);
12035
12036            if (!passed) {
12037                std::cout << "  [FAIL] : in pd_test_index_insert_delete() : insert/delete failed" << std::endl;
12038                throw std::runtime_error("pd_test_index_insert_delete failed");
rename_categories (pd_test_1_all.cpp:655)
645    void pd_test_categorical_array_rename_categories() {
646        std::cout << "========= CategoricalArray: rename_categories ======================= ";
647
648        std::vector<std::string> cats = {"a", "b"};
649        std::vector<numpy::int32> codes = {0, 1, 0};  // a, b, a
650        pandas::CategoricalArray arr = pandas::CategoricalArray::from_codes(codes, cats);
651
652        // Rename categories
653        std::vector<std::string> new_names = {"alpha", "beta"};
654        pandas::CategoricalArray result = arr.rename_categories(new_names);
655
656        // Check categories are renamed
657        const std::vector<std::string>& result_cats = result.categories();
658        if (result_cats[0] != "alpha" || result_cats[1] != "beta") {
659            std::cout << "  [FAIL] : in pd_test_categorical_array_rename_categories() : categories not renamed" << std::endl;
660            throw std::runtime_error("pd_test_categorical_array_rename_categories failed: categories not renamed");
661        }
662
663        // Values should now be renamed
664        std::optional<std::string> val = result[0];
rename_categories (pd_test_1_all.cpp:655)
645    void pd_test_categorical_array_rename_categories() {
646        std::cout << "========= CategoricalArray: rename_categories ======================= ";
647
648        std::vector<std::string> cats = {"a", "b"};
649        std::vector<numpy::int32> codes = {0, 1, 0};  // a, b, a
650        pandas::CategoricalArray arr = pandas::CategoricalArray::from_codes(codes, cats);
651
652        // Rename categories
653        std::vector<std::string> new_names = {"alpha", "beta"};
654        pandas::CategoricalArray result = arr.rename_categories(new_names);
655
656        // Check categories are renamed
657        const std::vector<std::string>& result_cats = result.categories();
658        if (result_cats[0] != "alpha" || result_cats[1] != "beta") {
659            std::cout << "  [FAIL] : in pd_test_categorical_array_rename_categories() : categories not renamed" << std::endl;
660            throw std::runtime_error("pd_test_categorical_array_rename_categories failed: categories not renamed");
661        }
662
663        // Values should now be renamed
664        std::optional<std::string> val = result[0];
fillna (pd_test_1_all.cpp:537)
527            throw std::runtime_error("pd_test_categorical_array_na_handling failed: isna size != 4");
528        }
529
530        // Test dropna
531        pandas::CategoricalArray dropped = arr.dropna();
532        if (dropped.size() != 2) {
533            std::cout << "  [FAIL] : in pd_test_categorical_array_na_handling() : dropna size != 2" << std::endl;
534            throw std::runtime_error("pd_test_categorical_array_na_handling failed: dropna size != 2");
535        }
536
537        // Test fillna (fill with existing category)
538        pandas::CategoricalArray filled = arr.fillna("a");  // 'a' is in categories
539        if (filled.has_na()) {
540            std::cout << "  [FAIL] : in pd_test_categorical_array_na_handling() : fillna should have no NA" << std::endl;
541            throw std::runtime_error("pd_test_categorical_array_na_handling failed: fillna should have no NA");
542        }
543
544        std::cout << " -> tests passed" << std::endl;
545    }
546
547    void pd_test_categorical_array_add_categories() {
interpolate (pd_test_1_all.cpp:24365)
24355        std::cout << "====================================== [OK] pd_test_idxmax_idxmin test suite ========================== " << std::endl;
24356        return 0;
24357    }
24358
24359} // namespace dataframe_tests
24360// ------------------- pd_test_idxmax_idxmin.cpp (end) -----------------------------
24361
24362// ------------------- pd_test_interpolate.cpp (start) -----------------------------
24363// dataframe_tests/pd_test_interpolate.cpp
24364// Test file for DataFrame.interpolate() method
24365
24366#include <iostream>
24367#include <stdexcept>
24368#include <cmath>
24369#include <limits>
24370#include <map>
24371#include "../pandas/pd_dataframe.h"
24372
24373// CRITICAL: No using namespace directives
isna (pd_test_1_all.cpp:524)
514            throw std::runtime_error("pd_test_categorical_array_na_handling failed: has_na() should be true");
515        }
516
517        // Test count (non-NA)
518        if (arr.count() != 2) {
519            std::cout << "  [FAIL] : in pd_test_categorical_array_na_handling() : count() != 2" << std::endl;
520            throw std::runtime_error("pd_test_categorical_array_na_handling failed: count() != 2");
521        }
522
523        // Test isna array
524        numpy::NDArray<numpy::bool_> na_mask = arr.isna();
525        if (na_mask.getSize() != 4) {
526            std::cout << "  [FAIL] : in pd_test_categorical_array_na_handling() : isna size != 4" << std::endl;
527            throw std::runtime_error("pd_test_categorical_array_na_handling failed: isna size != 4");
528        }
529
530        // Test dropna
531        pandas::CategoricalArray dropped = arr.dropna();
532        if (dropped.size() != 2) {
533            std::cout << "  [FAIL] : in pd_test_categorical_array_na_handling() : dropna size != 2" << std::endl;
534            throw std::runtime_error("pd_test_categorical_array_na_handling failed: dropna size != 2");
isnull (pd_test_3_all.cpp:671)
661// Category 5: Index Null Detection
662// ============================================================================
663
664void pd_test_3_all_index_null_detection() {
665    std::cout << "========= Index.isnull/notnull() =====================";
666
667    // Test with float index (can have NaN)
668    std::vector<double> vals = {1.0, std::nan(""), 3.0, std::nan("")};
669    pandas::Index<double> idx(vals);
670
671    numpy::NDArray<numpy::bool_> isnull_result = idx.isnull();
672    if (isnull_result.getSize() != 4) {
673        std::cout << "  [FAIL] : in pd_test_3_all_index_null_detection() : isnull() size mismatch" << std::endl;
674        throw std::runtime_error("pd_test_3_all_index_null_detection failed: isnull() size");
675    }
676    // Index 0: 1.0 -> not null
677    if (isnull_result.getElementAt({0})) {
678        std::cout << "  [FAIL] : in pd_test_3_all_index_null_detection() : index 0 should not be null" << std::endl;
679        throw std::runtime_error("pd_test_3_all_index_null_detection failed: index 0");
680    }
681    // Index 1: NaN -> null
notna (pd_test_1_all.cpp:6595)
6585                if (!na_mask.getElementAt({2, 1})) {
6586                    std::cout << "  [FAIL] : in pd_test_dataframe_manipulation() : isna at (2,1) should be true" << std::endl;
6587                    throw std::runtime_error("pd_test_dataframe_manipulation failed: isna at (2,1)");
6588                }
6589                // Row 0, col 0 should NOT be NA
6590                if (na_mask.getElementAt({0, 0})) {
6591                    std::cout << "  [FAIL] : in pd_test_dataframe_manipulation() : isna at (0,0) should be false" << std::endl;
6592                    throw std::runtime_error("pd_test_dataframe_manipulation failed: isna at (0,0)");
6593                }
6594
6595                auto notna_mask = df_na.notna();
6596                if (notna_mask.getElementAt({1, 0})) {
6597                    std::cout << "  [FAIL] : in pd_test_dataframe_manipulation() : notna at (1,0) should be false" << std::endl;
6598                    throw std::runtime_error("pd_test_dataframe_manipulation failed: notna at (1,0)");
6599                }
6600            }
6601
6602            // Test fillna
6603            {
6604                std::map<std::string, std::vector<numpy::float64>> float_data;
6605                float_data["X"] = {1.0, std::nan(""), 3.0};
notnull (pd_test_3_all.cpp:665)
655    }
656
657    std::cout << " -> tests passed" << std::endl;
658}
659
660// ============================================================================
661// Category 5: Index Null Detection
662// ============================================================================
663
664void pd_test_3_all_index_null_detection() {
665    std::cout << "========= Index.isnull/notnull() =====================";
666
667    // Test with float index (can have NaN)
668    std::vector<double> vals = {1.0, std::nan(""), 3.0, std::nan("")};
669    pandas::Index<double> idx(vals);
670
671    numpy::NDArray<numpy::bool_> isnull_result = idx.isnull();
672    if (isnull_result.getSize() != 4) {
673        std::cout << "  [FAIL] : in pd_test_3_all_index_null_detection() : isnull() size mismatch" << std::endl;
674        throw std::runtime_error("pd_test_3_all_index_null_detection failed: isnull() size");
675    }
count (pd_test_1_all.cpp:66)
56        if (arr.is_na(0)) {
57            std::cout << "  [FAIL] : in pd_test_boolean_array_na_handling() : is_na(0) should be false" << std::endl;
58            throw std::runtime_error("pd_test_boolean_array_na_handling failed: is_na(0) should be false");
59        }
60
61        if (!arr.has_na()) {
62            std::cout << "  [FAIL] : in pd_test_boolean_array_na_handling() : has_na() should be true" << std::endl;
63            throw std::runtime_error("pd_test_boolean_array_na_handling failed: has_na() should be true");
64        }
65
66        if (arr.count() != 2) {
67            std::cout << "  [FAIL] : in pd_test_boolean_array_na_handling() : count() should be 2" << std::endl;
68            throw std::runtime_error("pd_test_boolean_array_na_handling failed: count() should be 2");
69        }
70
71        std::cout << " -> tests passed" << std::endl;
72    }
73
74    void pd_test_boolean_array_kleene_and() {
75        std::cout << "========= BooleanArray: Kleene AND ======================= ";
describe (pd_test_2_all.cpp:19793)
19783        ++g_fail;
19784    }
19785}
19786
19787static bool approx_eq(double a, double b, double tol = 1e-9) {
19788    if (std::isnan(a) && std::isnan(b)) return true;
19789    return std::abs(a - b) < tol;
19790}
19791
19792// =====================================================================
19793// Test: describe() default mode — numeric columns only
19794// =====================================================================
19795
19796void pd_test_describe_numeric_only() {
19797    std::cout << "  -- pd_test_describe_numeric_only --" << std::endl;
19798
19799    pandas::DataFrame df;
19800    df.add_column("A", std::vector<double>{1.0, 2.0, 3.0, 4.0, 5.0});
19801    df.add_column("B", std::vector<double>{10.0, 20.0, 30.0, 40.0, 50.0});
19802    df.add_column("Name", std::vector<std::string>{"a", "b", "c", "d", "e"});
max (pd_test_1_all.cpp:771)
761        pandas::CategoricalArray arr = pandas::CategoricalArray::from_codes(codes, cats, true);  // ordered
762
763        // Test min
764        std::optional<std::string> min_val = arr.min();
765        if (!min_val.has_value() || *min_val != "low") {
766            std::cout << "  [FAIL] : in pd_test_categorical_array_ordered_operations() : min != 'low'" << std::endl;
767            throw std::runtime_error("pd_test_categorical_array_ordered_operations failed: min != 'low'");
768        }
769
770        // Test max
771        std::optional<std::string> max_val = arr.max();
772        if (!max_val.has_value() || *max_val != "high") {
773            std::cout << "  [FAIL] : in pd_test_categorical_array_ordered_operations() : max != 'high'" << std::endl;
774            throw std::runtime_error("pd_test_categorical_array_ordered_operations failed: max != 'high'");
775        }
776
777        // Test unordered throws for min/max
778        pandas::CategoricalArray unordered = arr.as_unordered();
779        bool threw = false;
780        try {
781            unordered.min();
min (pd_test_1_all.cpp:764)
754    }
755
756    void pd_test_categorical_array_ordered_operations() {
757        std::cout << "========= CategoricalArray: ordered operations (min/max) ======================= ";
758
759        std::vector<std::string> cats = {"low", "medium", "high"};
760        std::vector<numpy::int32> codes = {0, 2, 1, 0, -1};  // low, high, medium, low, NA
761        pandas::CategoricalArray arr = pandas::CategoricalArray::from_codes(codes, cats, true);  // ordered
762
763        // Test min
764        std::optional<std::string> min_val = arr.min();
765        if (!min_val.has_value() || *min_val != "low") {
766            std::cout << "  [FAIL] : in pd_test_categorical_array_ordered_operations() : min != 'low'" << std::endl;
767            throw std::runtime_error("pd_test_categorical_array_ordered_operations failed: min != 'low'");
768        }
769
770        // Test max
771        std::optional<std::string> max_val = arr.max();
772        if (!max_val.has_value() || *max_val != "high") {
773            std::cout << "  [FAIL] : in pd_test_categorical_array_ordered_operations() : max != 'high'" << std::endl;
774            throw std::runtime_error("pd_test_categorical_array_ordered_operations failed: max != 'high'");
value_counts (pd_test_1_all.cpp:865)
855        std::vector<std::optional<std::string>> values = {
856            std::optional<std::string>("a"),
857            std::optional<std::string>("b"),
858            std::optional<std::string>("a"),
859            std::optional<std::string>("a"),
860            std::optional<std::string>("b"),
861            std::nullopt  // NA not counted
862        };
863        pandas::CategoricalArray arr(values);
864
865        auto [cats, counts] = arr.value_counts();
866
867        // Should have 2 categories
868        if (cats.size() != 2 || counts.size() != 2) {
869            std::cout << "  [FAIL] : in pd_test_categorical_array_value_counts() : wrong size" << std::endl;
870            throw std::runtime_error("pd_test_categorical_array_value_counts failed: wrong size");
871        }
872
873        // Find 'a' count
874        int64_t a_count = 0, b_count = 0;
875        for (size_t i = 0; i < cats.size(); ++i) {
map (pd_test_1_all.cpp:5839)
5829// Map Tests
5830// ============================================================================
5831
5832void pd_test_categorical_index_map() {
5833    std::cout << "========= map =========================================";
5834
5835    pandas::CategoricalArray arr({"yes", "no", "yes"});
5836    pandas::CategoricalIndex idx(arr);
5837
5838    std::unordered_map<std::string, std::string> mapping = {{"yes", "1"}, {"no", "0"}};
5839    pandas::CategoricalIndex mapped = idx.map(mapping);
5840
5841    bool passed = (mapped.has_category("1") && mapped.has_category("0") &&
5842                   !mapped.has_category("yes") && !mapped.has_category("no"));
5843    if (!passed) {
5844        std::cout << "  [FAIL] : in pd_test_categorical_index_map()" << std::endl;
5845        throw std::runtime_error("pd_test_categorical_index_map failed");
5846    }
5847
5848    std::cout << " -> tests passed" << std::endl;
5849}
map (pd_test_1_all.cpp:5839)
5829// Map Tests
5830// ============================================================================
5831
5832void pd_test_categorical_index_map() {
5833    std::cout << "========= map =========================================";
5834
5835    pandas::CategoricalArray arr({"yes", "no", "yes"});
5836    pandas::CategoricalIndex idx(arr);
5837
5838    std::unordered_map<std::string, std::string> mapping = {{"yes", "1"}, {"no", "0"}};
5839    pandas::CategoricalIndex mapped = idx.map(mapping);
5840
5841    bool passed = (mapped.has_category("1") && mapped.has_category("0") &&
5842                   !mapped.has_category("yes") && !mapped.has_category("no"));
5843    if (!passed) {
5844        std::cout << "  [FAIL] : in pd_test_categorical_index_map()" << std::endl;
5845        throw std::runtime_error("pd_test_categorical_index_map failed");
5846    }
5847
5848    std::cout << " -> tests passed" << std::endl;
5849}
add_categories (pd_test_1_all.cpp:555)
545    }
546
547    void pd_test_categorical_array_add_categories() {
548        std::cout << "========= CategoricalArray: add_categories ======================= ";
549
550        std::vector<std::string> cats = {"a", "b"};
551        std::vector<numpy::int32> codes = {0, 1, 0};
552        pandas::CategoricalArray arr = pandas::CategoricalArray::from_codes(codes, cats);
553
554        // Add new categories
555        pandas::CategoricalArray result = arr.add_categories({"c", "d"});
556        if (result.categories().size() != 4) {
557            std::cout << "  [FAIL] : in pd_test_categorical_array_add_categories() : new categories size != 4" << std::endl;
558            throw std::runtime_error("pd_test_categorical_array_add_categories failed: new categories size != 4");
559        }
560
561        // Original values should be preserved
562        std::optional<std::string> val = result[0];
563        if (!val.has_value() || *val != "a") {
564            std::cout << "  [FAIL] : in pd_test_categorical_array_add_categories() : value not preserved" << std::endl;
565            throw std::runtime_error("pd_test_categorical_array_add_categories failed: value not preserved");
equals (pd_test_1_all.cpp:5866)
5856    std::cout << "========= equals ======================================";
5857
5858    pandas::CategoricalArray arr1({"a", "b", "a"});
5859    pandas::CategoricalArray arr2({"a", "b", "a"});
5860    pandas::CategoricalArray arr3({"a", "b", "c"});
5861
5862    pandas::CategoricalIndex idx1(arr1);
5863    pandas::CategoricalIndex idx2(arr2);
5864    pandas::CategoricalIndex idx3(arr3);
5865
5866    bool passed = (idx1.equals(idx2) && !idx1.equals(idx3));
5867    if (!passed) {
5868        std::cout << "  [FAIL] : in pd_test_categorical_index_equals()" << std::endl;
5869        throw std::runtime_error("pd_test_categorical_index_equals failed");
5870    }
5871
5872    std::cout << " -> tests passed" << std::endl;
5873}
5874
5875void pd_test_categorical_index_identical() {
5876    std::cout << "========= identical ===================================";
len (pd_test_3_all.cpp:20867)
20857    auto title_result = s.str().title();
20858    if (title_result[0] != "Hello World" || title_result[1] != "Hello World" || title_result[2] != "Hello World") {
20859        std::cout << "  [FAIL] : title() failed" << std::endl;
20860        throw std::runtime_error("pd_test_str_capitalize_title: title() failed");
20861    }
20862
20863    std::cout << " -> tests passed" << std::endl;
20864}
20865
20866// ============================================================================
20867// Test str().len()
20868// ============================================================================
20869
20870void pd_test_str_len() {
20871    std::cout << "========= Series.str().len() ============================";
20872
20873    pandas::Series<std::string> s({"a", "bb", "ccc", ""});
20874
20875    auto lens = s.str().len();
20876    if (lens[0] != 1 || lens[1] != 2 || lens[2] != 3 || lens[3] != 0) {
20877        std::cout << "  [FAIL] : len() failed" << std::endl;
argsort (pd_test_1_all.cpp:1304)
1294        std::cout << "========= DatetimeArray: sorting ======================= ";
1295
1296        pandas::DatetimeArray arr(std::vector<std::string>{
1297            "2023-06-15",
1298            "NaT",
1299            "2023-01-01",
1300            "2023-12-31"
1301        });
1302
1303        // argsort ascending
1304        auto indices = arr.argsort(true, "last");
1305        // Expected order: 2023-01-01(2), 2023-06-15(0), 2023-12-31(3), NaT(1)
1306        if (indices.getElementAt({0}) != 2) {
1307            std::cout << "  [FAIL] : argsort: first should be index 2 (2023-01-01)" << std::endl;
1308            throw std::runtime_error("pd_test_datetime_array_sorting failed: argsort first");
1309        }
1310        if (indices.getElementAt({3}) != 1) {
1311            std::cout << "  [FAIL] : argsort: last should be index 1 (NaT)" << std::endl;
1312            throw std::runtime_error("pd_test_datetime_array_sorting failed: NaT position");
1313        }
searchsorted (pd_test_1_all.cpp:18958)
18948    // =========================================================================
18949    // Search Tests
18950    // =========================================================================
18951
18952    void pd_test_range_index_searchsorted() {
18953        std::cout << "========= searchsorted ================================ ";
18954
18955        pandas::RangeIndex ri(0, 10, 2);  // [0, 2, 4, 6, 8]
18956
18957        bool passed = (ri.searchsorted(4, "left") == 2 &&
18958                      ri.searchsorted(4, "right") == 3 &&
18959                      ri.searchsorted(3, "left") == 2 &&   // 3 would go between 2 and 4
18960                      ri.searchsorted(-1, "left") == 0 &&  // Before all
18961                      ri.searchsorted(10, "left") == 5);   // After all
18962
18963        if (!passed) {
18964            std::cout << "  [FAIL] : searchsorted" << std::endl;
18965            throw std::runtime_error("pd_test_range_index_searchsorted failed");
18966        }
sort_values (pd_test_1_all.cpp:6408)
6398        void pd_test_dataframe_sorting() {
6399            std::cout << "========= sorting ==========================";
6400
6401            std::map<std::string, std::vector<numpy::float64>> data;
6402            data["A"] = {3.0, 1.0, 4.0, 1.0, 5.0};
6403            data["B"] = {9.0, 2.0, 6.0, 5.0, 3.0};
6404
6405            pandas::DataFrame df(data);
6406
6407            // Test sort_values ascending
6408            auto sorted_asc = df.sort_values("A", true);
6409            // First value should be smallest (1.0)
6410            std::string first_val = sorted_asc["A"].get_value_str(0);
6411            if (std::stod(first_val) != 1.0) {
6412                std::cout << "  [FAIL] : in pd_test_dataframe_sorting() : sort_values asc first != 1" << std::endl;
6413                throw std::runtime_error("pd_test_dataframe_sorting failed: sort_values asc first != 1");
6414            }
6415
6416            // Test sort_values descending
6417            auto sorted_desc = df.sort_values("A", false);
6418            first_val = sorted_desc["A"].get_value_str(0);
T (pd_test_1_all.cpp:128)
118            throw std::runtime_error("pd_test_boolean_array_kleene_and failed: NA & F");
119        }
120
121        std::cout << " -> tests passed" << std::endl;
122    }
123
124    void pd_test_boolean_array_kleene_or() {
125        std::cout << "========= BooleanArray: Kleene OR ======================= ";
126
127        // Kleene OR truth table:
128        // T | T = T, T | F = T, T | NA = T (True dominates)
129        // F | T = T, F | F = F, F | NA = NA
130        // NA | T = T, NA | F = NA, NA | NA = NA
131
132        pandas::BooleanArray t({std::optional<bool>(true)});
133        pandas::BooleanArray f({std::optional<bool>(false)});
134        pandas::BooleanArray na({std::nullopt});
135
136        // T | NA = T (True dominates)
137        auto tna = (t | na);
138        if (!tna[0].has_value() || !tna[0].value()) {
swapaxes (pd_test_3_all.cpp:2276)
2266    auto sorted_desc = arr.sort_values(false, "last");
2267    if (*sorted_desc[0] != "c" || *sorted_desc[1] != "b" ||
2268        *sorted_desc[2] != "a" || sorted_desc[3].has_value()) {
2269        throw std::runtime_error("sort_values descending failed");
2270    }
2271
2272    std::cout << " -> tests passed" << std::endl;
2273}
2274
2275void pd_test_3_all_categorical_swapaxes() {
2276    std::cout << "========= CategoricalArray.swapaxes() =================";
2277
2278    std::vector<std::optional<std::string>> values = {"a", "b", "c"};
2279    pandas::CategoricalArray arr(values);
2280
2281    auto result = arr.swapaxes(0, 0);
2282    if (result.size() != 3) {
2283        throw std::runtime_error("swapaxes failed");
2284    }
2285
2286    bool threw = false;
transpose (pd_test_1_all.cpp:16648)
16638                std::cout << "  [FAIL] : in pd_test_ndframe_transpose() : T_() size" << std::endl;
16639                throw std::runtime_error("pd_test_ndframe_transpose failed: T_() size");
16640            }
16641
16642            passed = transposed[0] == 1 && transposed[1] == 2 && transposed[2] == 3;
16643            if (!passed) {
16644                std::cout << "  [FAIL] : in pd_test_ndframe_transpose() : T_() values" << std::endl;
16645                throw std::runtime_error("pd_test_ndframe_transpose failed: T_() values");
16646            }
16647
16648            // Test transpose() alias
16649            auto transposed2 = s.transpose();
16650            passed = transposed2.size() == s.size();
16651            if (!passed) {
16652                std::cout << "  [FAIL] : in pd_test_ndframe_transpose() : transpose() size" << std::endl;
16653                throw std::runtime_error("pd_test_ndframe_transpose failed: transpose() size");
16654            }
16655
16656            std::cout << " -> tests passed" << std::endl;
16657        }
concat (pd_test_1_all.cpp:17717)
17707}
17708
17709void pd_test_period_index_concat() {
17710    std::cout << "========= concat factory ==============================";
17711
17712    std::vector<int64_t> ordinals1 = {0, 1};
17713    std::vector<int64_t> ordinals2 = {2, 3};
17714    pandas::PeriodIndex idx1(ordinals1, "D");
17715    pandas::PeriodIndex idx2(ordinals2, "D");
17716
17717    pandas::PeriodIndex concatenated = pandas::PeriodIndex::concat({idx1, idx2});
17718
17719    bool passed = (concatenated.size() == 4);
17720    if (!passed) {
17721        std::cout << "  [FAIL] : in pd_test_period_index_concat()" << std::endl;
17722        throw std::runtime_error("pd_test_period_index_concat failed");
17723    }
17724
17725    std::cout << " -> tests passed" << std::endl;
17726}
concat_merge (pd_test_3_all.cpp:26636)
26626    const auto& mi = result.multiindex();
26627    if (mi.get_level_values_str(0)[0] != "x") throw std::runtime_error("Key 'x' not found at level 0");
26628    if (mi.get_level_values_str(0)[2] != "y") throw std::runtime_error("Key 'y' not found at level 0");
26629    std::cout << "    -> test passed" << std::endl;
26630}
26631
26632void test_categorical_array_concat_merge() {
26633    std::cout << "  test_categorical_array_concat_merge" << std::endl;
26634    auto cat1 = CategoricalArray::from_codes({0, 1}, {"a", "b"});
26635    auto cat2 = CategoricalArray::from_codes({0, 1}, {"b", "c"});
26636    auto result = CategoricalArray::concat_merge({cat1, cat2});
26637    if (result.categories().size() != 3) throw std::runtime_error("Expected 3 merged categories");
26638    if (result.categories()[0] != "a" || result.categories()[1] != "b" || result.categories()[2] != "c")
26639        throw std::runtime_error("Merged categories wrong");
26640    if (result.size() != 4) throw std::runtime_error("Expected 4 elements");
26641    std::cout << "    -> test passed" << std::endl;
26642}
26643
26644int pd_test_concat_ext_main() {
26645    try {
26646        std::cout << "========= concat extension tests =========" << std::endl;
shift (pd_test_1_all.cpp:5188)
5178            // First element should be NaN
5179            val = d["A"].get_value_str(0);
5180            passed = std::isnan(std::stod(val));
5181            if (!passed) {
5182                std::cout << "  [FAIL] : in pd_test_arithmetic_dataframe_diff_shift() : diff NaN failed" << std::endl;
5183                throw std::runtime_error("pd_test_arithmetic_dataframe_diff_shift failed: diff NaN failed");
5184            }
5185
5186            // shift: [NaN, 1, 3, 6]
5187            auto s = df.shift();
5188            val = s["A"].get_value_str(1);
5189            passed = std::abs(std::stod(val) - 1.0) < 0.001;
5190            if (!passed) {
5191                std::cout << "  [FAIL] : in pd_test_arithmetic_dataframe_diff_shift() : shift failed" << std::endl;
5192                throw std::runtime_error("pd_test_arithmetic_dataframe_diff_shift failed: shift failed");
5193            }
5194
5195            std::cout << " -> tests passed" << std::endl;
5196        }
to_list (pd_test_1_all.cpp:10247)
10237    std::cout << " -> tests passed" << std::endl;
10238}
10239
10240void pd_test_extension_index_to_list() {
10241    std::cout << "========= to_list =========================";
10242
10243    pandas::CategoricalArray arr({"x", "y", "z"});
10244    pandas::CategoricalIndex idx(arr);
10245
10246    auto list = idx.to_list();
10247
10248    bool passed = (list.size() == 3 &&
10249                   list[0].has_value() && *list[0] == "x" &&
10250                   list[1].has_value() && *list[1] == "y" &&
10251                   list[2].has_value() && *list[2] == "z");
10252    if (!passed) {
10253        std::cout << "  [FAIL] : in pd_test_extension_index_to_list() : to_list check failed" << std::endl;
10254        throw std::runtime_error("pd_test_extension_index_to_list failed");
10255    }
to_numpy (pd_test_1_all.cpp:16764)
16754        // =====================================================================
16755        // to_numpy Tests
16756        // =====================================================================
16757
16758        void pd_test_ndframe_to_numpy() {
16759            std::cout << "========= to_numpy =============================================" << std::endl;
16760
16761            pandas::Series<int> s({10, 20, 30});
16762
16763            auto arr = s.to_numpy();
16764
16765            bool passed = arr.getSize() == 3;
16766            if (!passed) {
16767                std::cout << "  [FAIL] : in pd_test_ndframe_to_numpy() : size" << std::endl;
16768                throw std::runtime_error("pd_test_ndframe_to_numpy failed: size");
16769            }
16770
16771            passed = arr.getElementAt({0}) == 10 && arr.getElementAt({1}) == 20 && arr.getElementAt({2}) == 30;
16772            if (!passed) {
16773                std::cout << "  [FAIL] : in pd_test_ndframe_to_numpy() : values" << std::endl;
to_string (pd_test_1_all.cpp:2693)
2683        pandas::PeriodArray arr_m(std::vector<std::string>{
2684            "2020-01",
2685            "NaT",
2686            "2025-06"
2687        }, "M");
2688
2689        // Year
2690        auto years = arr_m.year();
2691        auto y0 = years[0];
2692        if (!y0.has_value() || y0.value() != 2020) {
2693            std::cout << "  [FAIL] : year[0] should be 2020, got " << (y0.has_value() ? std::to_string(y0.value()) : "NA") << std::endl;
2694            throw std::runtime_error("pd_test_period_array_year_month_quarter failed: year[0]");
2695        }
2696
2697        auto y1 = years[1];
2698        if (y1.has_value()) {
2699            std::cout << "  [FAIL] : year[1] should be NA (NaT)" << std::endl;
2700            throw std::runtime_error("pd_test_period_array_year_month_quarter failed: year[1] should be NA");
2701        }
2702
2703        auto y2 = years[2];
tolist (pd_test_3_all.cpp:2300)
2290        threw = true;
2291    }
2292    if (!threw) {
2293        throw std::runtime_error("swapaxes should throw for invalid axes");
2294    }
2295
2296    std::cout << " -> tests passed" << std::endl;
2297}
2298
2299void pd_test_3_all_categorical_to_list() {
2300    std::cout << "========= CategoricalArray.to_list()/tolist() =========";
2301
2302    std::vector<std::optional<std::string>> values = {"a", "b", std::nullopt, "c"};
2303    pandas::CategoricalArray arr(values);
2304
2305    auto list = arr.to_list();
2306    if (list.size() != 4 || *list[0] != "a" || *list[1] != "b" ||
2307        list[2].has_value() || *list[3] != "c") {
2308        throw std::runtime_error("to_list failed");
2309    }
astype (pd_test_1_all.cpp:21292)
21282            std::cout << "========= astype all columns to float64 =============";
21283
21284            // Create DataFrame with int64 columns
21285            std::map<std::string, std::vector<numpy::int64>> data;
21286            data["A"] = {1, 2, 3, 4, 5};
21287            data["B"] = {10, 20, 30, 40, 50};
21288
21289            pandas::DataFrame df(data);
21290
21291            // Convert all columns to float64
21292            pandas::DataFrame df_float = df.astype("float64");
21293
21294            // Verify dtype changed
21295            pandas::Series<std::string> dtypes = df_float.dtypes();
21296
21297            bool passed = true;
21298            if (dtypes[static_cast<size_t>(0)] != "float64") {
21299                std::cout << "  [FAIL] : in pd_test_astype_all_columns_to_float64() : column A dtype is " << dtypes[static_cast<size_t>(0)] << ", expected float64" << std::endl;
21300                passed = false;
21301            }
21302            if (dtypes[static_cast<size_t>(1)] != "float64") {
astype_codes (pd_test_3_all.cpp:1822)
1812    std::cout << "========= CategoricalArray.astype() ==================";
1813
1814    std::vector<std::optional<std::string>> values = {"a", "b", "c", "a", std::nullopt};
1815    pandas::CategoricalArray arr(values);
1816
1817    auto str_result = arr.astype<std::string>("str");
1818    if (str_result.size() != 5 || !str_result[0].has_value() || *str_result[0] != "a" || str_result[4].has_value()) {
1819        throw std::runtime_error("astype failed");
1820    }
1821
1822    auto codes = arr.astype_codes();
1823    if (codes.getSize() != 5) {
1824        throw std::runtime_error("astype_codes failed");
1825    }
1826
1827    std::cout << " -> tests passed" << std::endl;
1828}
1829
1830void pd_test_3_all_categorical_check_ordered() {
1831    std::cout << "========= CategoricalArray.check_for_ordered() ========";
copy (pd_test_1_all.cpp:5798)
5788// ============================================================================
5789// Copy/Rename Tests
5790// ============================================================================
5791
5792void pd_test_categorical_index_copy() {
5793    std::cout << "========= copy ========================================";
5794
5795    pandas::CategoricalArray arr({"a", "b", "c"});
5796    pandas::CategoricalIndex idx(arr, "original");
5797
5798    pandas::CategoricalIndex copied = idx.copy();
5799
5800    bool passed = (copied.size() == idx.size() && copied.name() == idx.name() &&
5801                   copied.categories() == idx.categories() && copied.ordered() == idx.ordered());
5802    if (!passed) {
5803        std::cout << "  [FAIL] : in pd_test_categorical_index_copy()" << std::endl;
5804        throw std::runtime_error("pd_test_categorical_index_copy failed");
5805    }
5806
5807    std::cout << " -> tests passed" << std::endl;
5808}
view (pd_test_3_all.cpp:2147)
2137        throw std::runtime_error("memory_usage shallow too small");
2138    }
2139    if (deep < shallow) {
2140        throw std::runtime_error("memory_usage deep should be >= shallow");
2141    }
2142
2143    std::cout << " -> tests passed" << std::endl;
2144}
2145
2146void pd_test_3_all_categorical_ravel_view() {
2147    std::cout << "========= CategoricalArray.ravel()/view() =============";
2148
2149    std::vector<std::optional<std::string>> values = {"a", "b", "c"};
2150    pandas::CategoricalArray arr(values);
2151
2152    auto raveled = arr.ravel();
2153    if (raveled.size() != 3 || !raveled.equals(arr)) {
2154        throw std::runtime_error("ravel failed");
2155    }
2156
2157    auto viewed = arr.view();
duplicated (pd_test_1_all.cpp:10583)
10573    std::cout << " -> tests passed" << std::endl;
10574}
10575
10576void pd_test_extension_index_duplicated() {
10577    std::cout << "========= duplicated =========================";
10578
10579    pandas::CategoricalArray arr({"a", "b", "a", "c", "a"});
10580    pandas::CategoricalIndex idx(arr);
10581
10582    auto dup_mask = idx.duplicated("first");
10583
10584    bool passed = (dup_mask.getElementAt({0}) == false &&
10585                   dup_mask.getElementAt({1}) == false &&
10586                   dup_mask.getElementAt({2}) == true &&
10587                   dup_mask.getElementAt({3}) == false &&
10588                   dup_mask.getElementAt({4}) == true);
10589    if (!passed) {
10590        std::cout << "  [FAIL] : in pd_test_extension_index_duplicated() : duplicated check failed" << std::endl;
10591        throw std::runtime_error("pd_test_extension_index_duplicated failed");
10592    }
isin (pd_test_1_all.cpp:5938)
5928    std::cout << " -> tests passed" << std::endl;
5929}
5930
5931void pd_test_categorical_index_isin() {
5932    std::cout << "========= inherited isin ==============================";
5933
5934    pandas::CategoricalArray arr({"a", "b", "c", "d"});
5935    pandas::CategoricalIndex idx(arr);
5936
5937    std::vector<std::string> values = {"a", "c"};
5938    numpy::NDArray<numpy::bool_> mask = idx.isin(values);
5939
5940    bool passed = (mask.getSize() == 4 &&
5941                   mask.getElementAt({0}) == true &&   // a
5942                   mask.getElementAt({1}) == false &&  // b
5943                   mask.getElementAt({2}) == true &&   // c
5944                   mask.getElementAt({3}) == false);   // d
5945    if (!passed) {
5946        std::cout << "  [FAIL] : in pd_test_categorical_index_isin()" << std::endl;
5947        throw std::runtime_error("pd_test_categorical_index_isin failed");
5948    }
unique (pd_test_1_all.cpp:1345)
1335        pandas::DatetimeArray arr(std::vector<std::string>{
1336            "2023-01-01",
1337            "2023-06-15",
1338            "2023-01-01",
1339            "NaT",
1340            "2023-06-15",
1341            "NaT"
1342        });
1343
1344        // unique
1345        auto uniq = arr.unique();
1346        // Should have: NaT, 2023-01-01, 2023-06-15 (3 unique values)
1347        if (uniq.size() != 3) {
1348            std::cout << "  [FAIL] : unique size should be 3, got " << uniq.size() << std::endl;
1349            throw std::runtime_error("pd_test_datetime_array_unique failed: size");
1350        }
1351
1352        // factorize
1353        auto [codes, uniques] = arr.factorize();
1354        // Codes for NaT should be -1
1355        if (codes.getElementAt({3}) != -1) {
is_na (pd_test_1_all.cpp:51)
41    void pd_test_boolean_array_na_handling() {
42        std::cout << "========= BooleanArray: NA handling ======================= ";
43
44        pandas::BooleanArray arr({
45            std::optional<bool>(true),
46            std::nullopt,  // NA at index 1
47            std::optional<bool>(false)
48        });
49
50        if (!arr.is_na(1)) {
51            std::cout << "  [FAIL] : in pd_test_boolean_array_na_handling() : is_na(1) should be true" << std::endl;
52            throw std::runtime_error("pd_test_boolean_array_na_handling failed: is_na(1) should be true");
53        }
54
55        if (arr.is_na(0)) {
56            std::cout << "  [FAIL] : in pd_test_boolean_array_na_handling() : is_na(0) should be false" << std::endl;
57            throw std::runtime_error("pd_test_boolean_array_na_handling failed: is_na(0) should be false");
58        }
59
60        if (!arr.has_na()) {
argmax (pd_test_1_all.cpp:1323)
1313        }
1314
1315        // argmin
1316        auto min_idx = arr.argmin();
1317        if (!min_idx.has_value() || min_idx.value() != 2) {
1318            std::cout << "  [FAIL] : argmin should be 2 (2023-01-01)" << std::endl;
1319            throw std::runtime_error("pd_test_datetime_array_sorting failed: argmin");
1320        }
1321
1322        // argmax
1323        auto max_idx = arr.argmax();
1324        if (!max_idx.has_value() || max_idx.value() != 3) {
1325            std::cout << "  [FAIL] : argmax should be 3 (2023-12-31)" << std::endl;
1326            throw std::runtime_error("pd_test_datetime_array_sorting failed: argmax");
1327        }
1328
1329        std::cout << " -> tests passed" << std::endl;
1330    }
1331
1332    void pd_test_datetime_array_unique() {
1333        std::cout << "========= DatetimeArray: unique/factorize ======================= ";
argmin (pd_test_1_all.cpp:1316)
1306        if (indices.getElementAt({0}) != 2) {
1307            std::cout << "  [FAIL] : argsort: first should be index 2 (2023-01-01)" << std::endl;
1308            throw std::runtime_error("pd_test_datetime_array_sorting failed: argsort first");
1309        }
1310        if (indices.getElementAt({3}) != 1) {
1311            std::cout << "  [FAIL] : argsort: last should be index 1 (NaT)" << std::endl;
1312            throw std::runtime_error("pd_test_datetime_array_sorting failed: NaT position");
1313        }
1314
1315        // argmin
1316        auto min_idx = arr.argmin();
1317        if (!min_idx.has_value() || min_idx.value() != 2) {
1318            std::cout << "  [FAIL] : argmin should be 2 (2023-01-01)" << std::endl;
1319            throw std::runtime_error("pd_test_datetime_array_sorting failed: argmin");
1320        }
1321
1322        // argmax
1323        auto max_idx = arr.argmax();
1324        if (!max_idx.has_value() || max_idx.value() != 3) {
1325            std::cout << "  [FAIL] : argmax should be 3 (2023-12-31)" << std::endl;
1326            throw std::runtime_error("pd_test_datetime_array_sorting failed: argmax");
as_ordered (pd_test_1_all.cpp:791)
781            unordered.min();
782        } catch (const std::exception&) {
783            threw = true;
784        }
785        if (!threw) {
786            std::cout << "  [FAIL] : in pd_test_categorical_array_ordered_operations() : unordered min should throw" << std::endl;
787            throw std::runtime_error("pd_test_categorical_array_ordered_operations failed: unordered min should throw");
788        }
789
790        // Test as_ordered / as_unordered
791        pandas::CategoricalArray reordered = unordered.as_ordered();
792        if (!reordered.ordered()) {
793            std::cout << "  [FAIL] : in pd_test_categorical_array_ordered_operations() : as_ordered failed" << std::endl;
794            throw std::runtime_error("pd_test_categorical_array_ordered_operations failed: as_ordered failed");
795        }
796
797        std::cout << " -> tests passed" << std::endl;
798    }
799
800    void pd_test_categorical_array_comparisons() {
801        std::cout << "========= CategoricalArray: comparisons ======================= ";
as_unordered (pd_test_1_all.cpp:778)
768        }
769
770        // Test max
771        std::optional<std::string> max_val = arr.max();
772        if (!max_val.has_value() || *max_val != "high") {
773            std::cout << "  [FAIL] : in pd_test_categorical_array_ordered_operations() : max != 'high'" << std::endl;
774            throw std::runtime_error("pd_test_categorical_array_ordered_operations failed: max != 'high'");
775        }
776
777        // Test unordered throws for min/max
778        pandas::CategoricalArray unordered = arr.as_unordered();
779        bool threw = false;
780        try {
781            unordered.min();
782        } catch (const std::exception&) {
783            threw = true;
784        }
785        if (!threw) {
786            std::cout << "  [FAIL] : in pd_test_categorical_array_ordered_operations() : unordered min should throw" << std::endl;
787            throw std::runtime_error("pd_test_categorical_array_ordered_operations failed: unordered min should throw");
788        }
categories (pd_test_1_all.cpp:389)
379        std::vector<std::optional<std::string>> vals = {
380            std::optional<std::string>("low"),
381            std::optional<std::string>("high"),
382            std::optional<std::string>("medium")
383        };
384        pandas::CategoricalArray arr3(vals, cats, true);  // ordered
385        if (!arr3.ordered()) {
386            std::cout << "  [FAIL] : in pd_test_categorical_array_constructors() : should be ordered" << std::endl;
387            throw std::runtime_error("pd_test_categorical_array_constructors failed: should be ordered");
388        }
389        if (arr3.categories().size() != 3) {
390            std::cout << "  [FAIL] : in pd_test_categorical_array_constructors() : categories size != 3" << std::endl;
391            throw std::runtime_error("pd_test_categorical_array_constructors failed: categories size != 3");
392        }
393
394        std::cout << " -> tests passed" << std::endl;
395    }
396
397    void pd_test_categorical_array_from_codes() {
398        std::cout << "========= CategoricalArray: from_codes ======================= ";
check_for_ordered (pd_test_3_all.cpp:1831)
1821    auto codes = arr.astype_codes();
1822    if (codes.getSize() != 5) {
1823        throw std::runtime_error("astype_codes failed");
1824    }
1825
1826    std::cout << " -> tests passed" << std::endl;
1827}
1828
1829void pd_test_3_all_categorical_check_ordered() {
1830    std::cout << "========= CategoricalArray.check_for_ordered() ========";
1831
1832    std::vector<std::optional<std::string>> values = {"a", "b", "c"};
1833    pandas::CategoricalArray unordered_arr(values, false);
1834    pandas::CategoricalArray ordered_arr(values, {"a", "b", "c"}, true);
1835
1836    bool threw = false;
1837    try {
1838        unordered_arr.check_for_ordered("test_op");
1839    } catch (const std::exception&) {
1840        threw = true;
check_for_ordered (pd_test_3_all.cpp:1831)
1821    auto codes = arr.astype_codes();
1822    if (codes.getSize() != 5) {
1823        throw std::runtime_error("astype_codes failed");
1824    }
1825
1826    std::cout << " -> tests passed" << std::endl;
1827}
1828
1829void pd_test_3_all_categorical_check_ordered() {
1830    std::cout << "========= CategoricalArray.check_for_ordered() ========";
1831
1832    std::vector<std::optional<std::string>> values = {"a", "b", "c"};
1833    pandas::CategoricalArray unordered_arr(values, false);
1834    pandas::CategoricalArray ordered_arr(values, {"a", "b", "c"}, true);
1835
1836    bool threw = false;
1837    try {
1838        unordered_arr.check_for_ordered("test_op");
1839    } catch (const std::exception&) {
1840        threw = true;
check_for_ordered (pd_test_3_all.cpp:1831)
1821    auto codes = arr.astype_codes();
1822    if (codes.getSize() != 5) {
1823        throw std::runtime_error("astype_codes failed");
1824    }
1825
1826    std::cout << " -> tests passed" << std::endl;
1827}
1828
1829void pd_test_3_all_categorical_check_ordered() {
1830    std::cout << "========= CategoricalArray.check_for_ordered() ========";
1831
1832    std::vector<std::optional<std::string>> values = {"a", "b", "c"};
1833    pandas::CategoricalArray unordered_arr(values, false);
1834    pandas::CategoricalArray ordered_arr(values, {"a", "b", "c"}, true);
1835
1836    bool threw = false;
1837    try {
1838        unordered_arr.check_for_ordered("test_op");
1839    } catch (const std::exception&) {
1840        threw = true;
codes (pd_test_1_all.cpp:473)
463        std::cout << " -> tests passed" << std::endl;
464    }
465
466    void pd_test_categorical_array_codes_property() {
467        std::cout << "========= CategoricalArray: codes property ======================= ";
468
469        std::vector<std::string> cats = {"x", "y", "z"};
470        std::vector<numpy::int32> codes = {0, 1, 2, 1, 0};
471        pandas::CategoricalArray arr = pandas::CategoricalArray::from_codes(codes, cats);
472
473        numpy::NDArray<numpy::int32> arr_codes = arr.codes();
474        if (arr_codes.getSize() != 5) {
475            std::cout << "  [FAIL] : in pd_test_categorical_array_codes_property() : codes size != 5" << std::endl;
476            throw std::runtime_error("pd_test_categorical_array_codes_property failed: codes size != 5");
477        }
478
479        // Check codes match
480        for (size_t i = 0; i < codes.size(); ++i) {
481            if (arr_codes.getElementAt({i}) != codes[i]) {
482                std::cout << "  [FAIL] : in pd_test_categorical_array_codes_property() : code mismatch at " << i << std::endl;
483                throw std::runtime_error("pd_test_categorical_array_codes_property failed: code mismatch");
delete_ (pd_test_1_all.cpp:10501)
10491    std::cout << " -> tests passed" << std::endl;
10492}
10493
10494void pd_test_extension_index_delete() {
10495    std::cout << "========= delete_ =========================";
10496
10497    pandas::CategoricalArray arr({"a", "b", "c", "d"});
10498    pandas::CategoricalIndex idx(arr);
10499
10500    auto deleted = idx.delete_(1);
10501    auto v0 = deleted[0];
10502    auto v1 = deleted[1];
10503    auto v2 = deleted[2];
10504
10505    bool passed = (deleted.size() == 3 &&
10506                   v0.has_value() && *v0 == "a" &&
10507                   v1.has_value() && *v1 == "c" &&
10508                   v2.has_value() && *v2 == "d");
10509    if (!passed) {
10510        std::cout << "  [FAIL] : in pd_test_extension_index_delete() : delete_ check failed" << std::endl;
delete_ (pd_test_1_all.cpp:10501)
10491    std::cout << " -> tests passed" << std::endl;
10492}
10493
10494void pd_test_extension_index_delete() {
10495    std::cout << "========= delete_ =========================";
10496
10497    pandas::CategoricalArray arr({"a", "b", "c", "d"});
10498    pandas::CategoricalIndex idx(arr);
10499
10500    auto deleted = idx.delete_(1);
10501    auto v0 = deleted[0];
10502    auto v1 = deleted[1];
10503    auto v2 = deleted[2];
10504
10505    bool passed = (deleted.size() == 3 &&
10506                   v0.has_value() && *v0 == "a" &&
10507                   v1.has_value() && *v1 == "c" &&
10508                   v2.has_value() && *v2 == "d");
10509    if (!passed) {
10510        std::cout << "  [FAIL] : in pd_test_extension_index_delete() : delete_ check failed" << std::endl;
dtype (pd_test_1_all.cpp:295)
285            throw std::runtime_error("pd_test_boolean_array_reductions failed: mean");
286        }
287
288        std::cout << " -> tests passed" << std::endl;
289    }
290
291    void pd_test_boolean_array_dtype() {
292        std::cout << "========= BooleanArray: dtype ======================= ";
293
294        pandas::BooleanArray arr;
295        if (arr.dtype().name() != "boolean") {
296            std::cout << "  [FAIL] : in pd_test_boolean_array_dtype() : dtype name should be 'boolean'" << std::endl;
297            throw std::runtime_error("pd_test_boolean_array_dtype failed: dtype name");
298        }
299
300        if (arr.dtype().kind() != "b") {
301            std::cout << "  [FAIL] : in pd_test_boolean_array_dtype() : dtype kind should be 'b'" << std::endl;
302            throw std::runtime_error("pd_test_boolean_array_dtype failed: dtype kind");
303        }
304
305        std::cout << " -> tests passed" << std::endl;
empty (pd_test_1_all.cpp:941)
931#include "../pandas/pd_config.h"
932
933namespace dataframe_tests {
934
935namespace dataframe_tests_config {
936
937    void pd_test_config_version() {
938        std::cout << "========= df_config: version info ======================= ";
939        const char* version = pandas::DataFrameInfo::version();
940        if (version == nullptr || std::string(version).empty()) {
941            std::cout << "[FAIL] : in pd_test_config_version() : version is null or empty" << std::endl;
942            throw std::runtime_error("pd_test_config_version failed: version is null or empty");
943        }
944        std::cout << "-> tests passed" << std::endl;
945    }
946
947    void pd_test_config_na_repr() {
948        std::cout << "========= df_config: NA representation ======================= ";
949        const char* na_repr = pandas::DataFrameConfig::get_na_repr();
950        if (na_repr == nullptr) {
factorize (pd_test_1_all.cpp:1353)
1343        // unique
1344        auto uniq = arr.unique();
1345        // Should have: NaT, 2023-01-01, 2023-06-15 (3 unique values)
1346        if (uniq.size() != 3) {
1347            std::cout << "  [FAIL] : unique size should be 3, got " << uniq.size() << std::endl;
1348            throw std::runtime_error("pd_test_datetime_array_unique failed: size");
1349        }
1350
1351        // factorize
1352        auto [codes, uniques] = arr.factorize();
1353        // Codes for NaT should be -1
1354        if (codes.getElementAt({3}) != -1) {
1355            std::cout << "  [FAIL] : factorize: NaT code should be -1" << std::endl;
1356            throw std::runtime_error("pd_test_datetime_array_unique failed: NaT code");
1357        }
1358        // Same values should have same codes
1359        if (codes.getElementAt({0}) != codes.getElementAt({2})) {
1360            std::cout << "  [FAIL] : factorize: 2023-01-01 values should have same code" << std::endl;
1361            throw std::runtime_error("pd_test_datetime_array_unique failed: same code");
1362        }
has_na (pd_test_1_all.cpp:61)
51        if (!arr.is_na(1)) {
52            std::cout << "  [FAIL] : in pd_test_boolean_array_na_handling() : is_na(1) should be true" << std::endl;
53            throw std::runtime_error("pd_test_boolean_array_na_handling failed: is_na(1) should be true");
54        }
55
56        if (arr.is_na(0)) {
57            std::cout << "  [FAIL] : in pd_test_boolean_array_na_handling() : is_na(0) should be false" << std::endl;
58            throw std::runtime_error("pd_test_boolean_array_na_handling failed: is_na(0) should be false");
59        }
60
61        if (!arr.has_na()) {
62            std::cout << "  [FAIL] : in pd_test_boolean_array_na_handling() : has_na() should be true" << std::endl;
63            throw std::runtime_error("pd_test_boolean_array_na_handling failed: has_na() should be true");
64        }
65
66        if (arr.count() != 2) {
67            std::cout << "  [FAIL] : in pd_test_boolean_array_na_handling() : count() should be 2" << std::endl;
68            throw std::runtime_error("pd_test_boolean_array_na_handling failed: count() should be 2");
69        }
70
71        std::cout << " -> tests passed" << std::endl;
memory_usage (pd_test_1_all.cpp:27063)
27053        }
27054
27055        std::cout << "====================================== [OK] pd_test_value_counts test suite ========================== " << std::endl;
27056        return 0;
27057    }
27058
27059} // namespace dataframe_tests
27060// ------------------- pd_test_value_counts.cpp (end) -----------------------------
27061
27062// ------------------- pd_test_memory_usage.cpp (start) -----------------------------
27063// Tests for DataFrame.memory_usage() - pandas-compatible memory usage reporting
27064
27065namespace dataframe_tests {
27066    namespace dataframe_tests_memory_usage {
27067
27068        void pd_test_memory_usage_basic() {
27069            std::cout << "========= basic memory_usage =======================";
27070
27071            // Create a simple DataFrame with multiple columns
27072            std::map<std::string, std::vector<double>> data;
27073            data["A"] = {1.0, 2.0, 3.0, 4.0, 5.0};
name (pd_test_1_all.cpp:295)
285            throw std::runtime_error("pd_test_boolean_array_reductions failed: mean");
286        }
287
288        std::cout << " -> tests passed" << std::endl;
289    }
290
291    void pd_test_boolean_array_dtype() {
292        std::cout << "========= BooleanArray: dtype ======================= ";
293
294        pandas::BooleanArray arr;
295        if (arr.dtype().name() != "boolean") {
296            std::cout << "  [FAIL] : in pd_test_boolean_array_dtype() : dtype name should be 'boolean'" << std::endl;
297            throw std::runtime_error("pd_test_boolean_array_dtype failed: dtype name");
298        }
299
300        if (arr.dtype().kind() != "b") {
301            std::cout << "  [FAIL] : in pd_test_boolean_array_dtype() : dtype kind should be 'b'" << std::endl;
302            throw std::runtime_error("pd_test_boolean_array_dtype failed: dtype kind");
303        }
304
305        std::cout << " -> tests passed" << std::endl;
nbytes (pd_test_1_all.cpp:6214)
6204            }
6205
6206            // Test empty DataFrame
6207            pandas::DataFrame empty_df;
6208            if (!empty_df.empty()) {
6209                std::cout << "  [FAIL] : in pd_test_dataframe_properties() : should be empty" << std::endl;
6210                throw std::runtime_error("pd_test_dataframe_properties failed: should be empty");
6211            }
6212
6213            // Test nbytes > 0 for non-empty
6214            if (df.nbytes() == 0) {
6215                std::cout << "  [FAIL] : in pd_test_dataframe_properties() : nbytes should be > 0" << std::endl;
6216                throw std::runtime_error("pd_test_dataframe_properties failed: nbytes should be > 0");
6217            }
6218
6219            // Test columns index
6220            if (df.columns().size() != 3) {
6221                std::cout << "  [FAIL] : in pd_test_dataframe_properties() : columns size != 3" << std::endl;
6222                throw std::runtime_error("pd_test_dataframe_properties failed: columns size != 3");
6223            }
ndim (pd_test_1_all.cpp:6195)
6185            pandas::DataFrame df(data);
6186
6187            // Test shape
6188            auto shape = df.shape();
6189            if (shape.size() != 2 || shape[0] != 4 || shape[1] != 3) {
6190                std::cout << "  [FAIL] : in pd_test_dataframe_properties() : shape mismatch" << std::endl;
6191                throw std::runtime_error("pd_test_dataframe_properties failed: shape mismatch");
6192            }
6193
6194            // Test ndim
6195            if (df.ndim() != 2) {
6196                std::cout << "  [FAIL] : in pd_test_dataframe_properties() : ndim != 2" << std::endl;
6197                throw std::runtime_error("pd_test_dataframe_properties failed: ndim != 2");
6198            }
6199
6200            // Test empty
6201            if (df.empty()) {
6202                std::cout << "  [FAIL] : in pd_test_dataframe_properties() : should not be empty" << std::endl;
6203                throw std::runtime_error("pd_test_dataframe_properties failed: should not be empty");
6204            }
ordered (pd_test_1_all.cpp:359)
349    void pd_test_categorical_array_constructors() {
350        std::cout << "========= CategoricalArray: constructors ======================= ";
351
352        // Default constructor
353        pandas::CategoricalArray arr1;
354        if (arr1.size() != 0) {
355            std::cout << "  [FAIL] : in pd_test_categorical_array_constructors() : default constructor size != 0" << std::endl;
356            throw std::runtime_error("pd_test_categorical_array_constructors failed: default constructor size != 0");
357        }
358        if (arr1.ordered()) {
359            std::cout << "  [FAIL] : in pd_test_categorical_array_constructors() : default should be unordered" << std::endl;
360            throw std::runtime_error("pd_test_categorical_array_constructors failed: default should be unordered");
361        }
362
363        // Constructor from values (infer categories)
364        std::vector<std::optional<std::string>> values = {
365            std::optional<std::string>("a"),
366            std::optional<std::string>("b"),
367            std::optional<std::string>("a"),
368            std::optional<std::string>("c")
ravel (pd_test_3_all.cpp:2147)
2137        throw std::runtime_error("memory_usage shallow too small");
2138    }
2139    if (deep < shallow) {
2140        throw std::runtime_error("memory_usage deep should be >= shallow");
2141    }
2142
2143    std::cout << " -> tests passed" << std::endl;
2144}
2145
2146void pd_test_3_all_categorical_ravel_view() {
2147    std::cout << "========= CategoricalArray.ravel()/view() =============";
2148
2149    std::vector<std::optional<std::string>> values = {"a", "b", "c"};
2150    pandas::CategoricalArray arr(values);
2151
2152    auto raveled = arr.ravel();
2153    if (raveled.size() != 3 || !raveled.equals(arr)) {
2154        throw std::runtime_error("ravel failed");
2155    }
2156
2157    auto viewed = arr.view();
remove_categories (pd_test_1_all.cpp:591)
581    }
582
583    void pd_test_categorical_array_remove_categories() {
584        std::cout << "========= CategoricalArray: remove_categories ======================= ";
585
586        std::vector<std::string> cats = {"a", "b", "c"};
587        std::vector<numpy::int32> codes = {0, 1, 2, 1};  // a, b, c, b
588        pandas::CategoricalArray arr = pandas::CategoricalArray::from_codes(codes, cats);
589
590        // Remove 'c' - values with 'c' become NA
591        pandas::CategoricalArray result = arr.remove_categories({"c"});
592
593        if (result.categories().size() != 2) {
594            std::cout << "  [FAIL] : in pd_test_categorical_array_remove_categories() : categories size != 2" << std::endl;
595            throw std::runtime_error("pd_test_categorical_array_remove_categories failed: categories size != 2");
596        }
597
598        // Element at index 2 should now be NA (was 'c')
599        if (!result.is_na(2)) {
600            std::cout << "  [FAIL] : in pd_test_categorical_array_remove_categories() : removed category should be NA" << std::endl;
601            throw std::runtime_error("pd_test_categorical_array_remove_categories failed: removed category should be NA");
remove_unused_categories (pd_test_1_all.cpp:737)
727        std::cout << " -> tests passed" << std::endl;
728    }
729
730    void pd_test_categorical_array_remove_unused_categories() {
731        std::cout << "========= CategoricalArray: remove_unused_categories ======================= ";
732
733        std::vector<std::string> cats = {"a", "b", "c", "d"};
734        std::vector<numpy::int32> codes = {0, 0, 2};  // a, a, c (b and d unused)
735        pandas::CategoricalArray arr = pandas::CategoricalArray::from_codes(codes, cats);
736
737        pandas::CategoricalArray result = arr.remove_unused_categories();
738
739        // Only 'a' and 'c' should remain
740        if (result.categories().size() != 2) {
741            std::cout << "  [FAIL] : in pd_test_categorical_array_remove_unused_categories() : categories size != 2" << std::endl;
742            throw std::runtime_error("pd_test_categorical_array_remove_unused_categories failed: categories size != 2");
743        }
744
745        // Values should be preserved
746        std::optional<std::string> val0 = result[0];
747        std::optional<std::string> val2 = result[2];
reorder_categories (pd_test_1_all.cpp:695)
685    void pd_test_categorical_array_reorder_categories() {
686        std::cout << "========= CategoricalArray: reorder_categories ======================= ";
687
688        std::vector<std::string> cats = {"a", "b", "c"};
689        std::vector<numpy::int32> codes = {0, 1, 2};  // a, b, c
690        pandas::CategoricalArray arr = pandas::CategoricalArray::from_codes(codes, cats);
691
692        // Reorder categories
693        std::vector<std::string> new_order = {"c", "b", "a"};
694        pandas::CategoricalArray result = arr.reorder_categories(new_order);
695
696        // Check categories are reordered
697        const std::vector<std::string>& result_cats = result.categories();
698        if (result_cats[0] != "c" || result_cats[1] != "b" || result_cats[2] != "a") {
699            std::cout << "  [FAIL] : in pd_test_categorical_array_reorder_categories() : categories not reordered" << std::endl;
700            throw std::runtime_error("pd_test_categorical_array_reorder_categories failed: categories not reordered");
701        }
702
703        // Values should be preserved
704        std::optional<std::string> val0 = result[0];
repeat (pd_test_3_all.cpp:2166)
2156    auto viewed = arr.view();
2157    if (viewed.size() != 3 || !viewed.equals(arr)) {
2158        throw std::runtime_error("view failed");
2159    }
2160
2161    std::cout << " -> tests passed" << std::endl;
2162}
2163
2164void pd_test_3_all_categorical_repeat() {
2165    std::cout << "========= CategoricalArray.repeat() ===================";
2166
2167    std::vector<std::optional<std::string>> values = {"a", "b"};
2168    pandas::CategoricalArray arr(values);
2169
2170    auto result = arr.repeat(3);
2171    if (result.size() != 6 || *result[0] != "a" || *result[2] != "a" ||
2172        *result[3] != "b" || *result[5] != "b") {
2173        throw std::runtime_error("repeat scalar failed");
2174    }
repeat (pd_test_3_all.cpp:2166)
2156    auto viewed = arr.view();
2157    if (viewed.size() != 3 || !viewed.equals(arr)) {
2158        throw std::runtime_error("view failed");
2159    }
2160
2161    std::cout << " -> tests passed" << std::endl;
2162}
2163
2164void pd_test_3_all_categorical_repeat() {
2165    std::cout << "========= CategoricalArray.repeat() ===================";
2166
2167    std::vector<std::optional<std::string>> values = {"a", "b"};
2168    pandas::CategoricalArray arr(values);
2169
2170    auto result = arr.repeat(3);
2171    if (result.size() != 6 || *result[0] != "a" || *result[2] != "a" ||
2172        *result[3] != "b" || *result[5] != "b") {
2173        throw std::runtime_error("repeat scalar failed");
2174    }
repr (pd_test_1_all.cpp:10906)
10896    std::cout << " -> tests passed" << std::endl;
10897}
10898
10899void pd_test_extension_index_repr() {
10900    std::cout << "========= repr =========================";
10901
10902    pandas::CategoricalArray arr({"a", "b", "c"});
10903    // Use ExtensionIndex<CategoricalArray> directly to test base class repr
10904    pandas::ExtensionIndex<pandas::CategoricalArray> idx(arr, "test");
10905
10906    std::string repr_str = idx.repr();
10907
10908    bool passed = (!repr_str.empty() && repr_str.find("ExtensionIndex") != std::string::npos);
10909    if (!passed) {
10910        std::cout << "  [FAIL] : in pd_test_extension_index_repr() : repr check failed" << std::endl;
10911        throw std::runtime_error("pd_test_extension_index_repr failed");
10912    }
10913
10914    std::cout << " -> tests passed" << std::endl;
10915}
reshape (pd_test_3_all.cpp:2186)
2176    auto result2 = arr.repeat({1, 2});
2177    if (result2.size() != 3 || *result2[0] != "a" || *result2[1] != "b" || *result2[2] != "b") {
2178        throw std::runtime_error("repeat array failed");
2179    }
2180
2181    std::cout << " -> tests passed" << std::endl;
2182}
2183
2184void pd_test_3_all_categorical_reshape() {
2185    std::cout << "========= CategoricalArray.reshape() ==================";
2186
2187    std::vector<std::optional<std::string>> values = {"a", "b", "c", "d"};
2188    pandas::CategoricalArray arr(values);
2189
2190    auto result = arr.reshape({4});
2191    if (result.size() != 4) {
2192        throw std::runtime_error("reshape failed");
2193    }
2194
2195    bool threw = false;
set_categories (pd_test_1_all.cpp:623)
613    void pd_test_categorical_array_set_categories() {
614        std::cout << "========= CategoricalArray: set_categories ======================= ";
615
616        std::vector<std::string> cats = {"a", "b"};
617        std::vector<numpy::int32> codes = {0, 1, 0};  // a, b, a
618        pandas::CategoricalArray arr = pandas::CategoricalArray::from_codes(codes, cats);
619
620        // Set new categories (values not in new categories become NA)
621        std::vector<std::string> new_cats = {"a", "c"};  // 'b' removed, 'c' added
622        pandas::CategoricalArray result = arr.set_categories(new_cats);
623
624        if (result.categories().size() != 2) {
625            std::cout << "  [FAIL] : in pd_test_categorical_array_set_categories() : categories size != 2" << std::endl;
626            throw std::runtime_error("pd_test_categorical_array_set_categories failed: categories size != 2");
627        }
628
629        // Element at index 1 should be NA (was 'b', now not in categories)
630        if (!result.is_na(1)) {
631            std::cout << "  [FAIL] : in pd_test_categorical_array_set_categories() : 'b' value should be NA" << std::endl;
632            throw std::runtime_error("pd_test_categorical_array_set_categories failed: 'b' value should be NA");
set_name (pd_test_1_all.cpp:11798)
11788                throw std::runtime_error("pd_test_index_vector_constructor failed");
11789            }
11790
11791            std::cout << " -> tests passed" << std::endl;
11792        }
11793
11794        void pd_test_index_copy_constructor() {
11795            std::cout << "========= copy constructor ============================";
11796
11797            pandas::Index<numpy::int64> idx1{1, 2, 3};
11798            idx1.set_name("original");
11799
11800            pandas::Index<numpy::int64> idx2(idx1);
11801
11802            bool passed = (idx2.size() == 3);
11803            passed = passed && (idx2.name().value() == "original");
11804            passed = passed && idx2.equals(idx1);
11805
11806            if (!passed) {
11807                std::cout << "  [FAIL] : in pd_test_index_copy_constructor() : copy failed" << std::endl;
11808                throw std::runtime_error("pd_test_index_copy_constructor failed");
set_ordered (pd_test_3_all.cpp:2210)
2200        threw = true;
2201    }
2202    if (!threw) {
2203        throw std::runtime_error("reshape should throw for incompatible shape");
2204    }
2205
2206    std::cout << " -> tests passed" << std::endl;
2207}
2208
2209void pd_test_3_all_categorical_set_ordered() {
2210    std::cout << "========= CategoricalArray.set_ordered() ==============";
2211
2212    std::vector<std::optional<std::string>> values = {"a", "b", "c"};
2213    pandas::CategoricalArray arr(values, false);
2214
2215    if (arr.ordered()) {
2216        throw std::runtime_error("initial should be unordered");
2217    }
2218
2219    auto ordered = arr.set_ordered(true);
2220    if (!ordered.ordered()) {
shape (pd_test_1_all.cpp:6188)
6178            std::cout << "========= properties =======================";
6179
6180            std::map<std::string, std::vector<numpy::float64>> data;
6181            data["A"] = {1.0, 2.0, 3.0, 4.0};
6182            data["B"] = {5.0, 6.0, 7.0, 8.0};
6183            data["C"] = {9.0, 10.0, 11.0, 12.0};
6184
6185            pandas::DataFrame df(data);
6186
6187            // Test shape
6188            auto shape = df.shape();
6189            if (shape.size() != 2 || shape[0] != 4 || shape[1] != 3) {
6190                std::cout << "  [FAIL] : in pd_test_dataframe_properties() : shape mismatch" << std::endl;
6191                throw std::runtime_error("pd_test_dataframe_properties failed: shape mismatch");
6192            }
6193
6194            // Test ndim
6195            if (df.ndim() != 2) {
6196                std::cout << "  [FAIL] : in pd_test_dataframe_properties() : ndim != 2" << std::endl;
6197                throw std::runtime_error("pd_test_dataframe_properties failed: ndim != 2");
6198            }
size (pd_test_1_all.cpp:22)
12#include "../pandas/pd_boolean_array.h"
13
14namespace dataframe_tests {
15
16namespace dataframe_tests_boolean_array {
17    void pd_test_boolean_array_constructors() {
18        std::cout << "========= BooleanArray: constructors ======================= ";
19
20        // Default constructor
21        pandas::BooleanArray arr1;
22        if (arr1.size() != 0) {
23            std::cout << "  [FAIL] : in pd_test_boolean_array_constructors() : default constructor size != 0" << std::endl;
24            throw std::runtime_error("pd_test_boolean_array_constructors failed: default constructor size != 0");
25        }
26
27        // Initializer list constructor
28        pandas::BooleanArray arr2({
29            std::optional<bool>(true),
30            std::optional<bool>(false),
31            std::nullopt,
32            std::optional<bool>(true)
slice (pd_test_1_all.cpp:17546)
17536// ============================================================================
17537// Slicing / Indexing Tests
17538// ============================================================================
17539
17540void pd_test_period_index_slice() {
17541    std::cout << "========= slice method ================================";
17542
17543    std::vector<int64_t> ordinals = {0, 1, 2, 3, 4};
17544    pandas::PeriodIndex idx(ordinals, "D");
17545
17546    pandas::PeriodIndex sliced = idx.slice(1, 4);
17547
17548    bool passed = (sliced.size() == 3 &&
17549                   sliced[0].has_value() && *sliced[0] == 1);
17550    if (!passed) {
17551        std::cout << "  [FAIL] : in pd_test_period_index_slice()" << std::endl;
17552        throw std::runtime_error("pd_test_period_index_slice failed");
17553    }
17554
17555    std::cout << " -> tests passed" << std::endl;
17556}