CategoricalIndex#

class pandas::CategoricalIndex#

Index class for axis labels in pandas data structures.

Example#

#include <pandas/pandas.h>
using namespace pandas;

// Create CategoricalIndex
CategoricalIndex<int64_t> idx({1, 2, 3}, "my_index");
size_t len = idx.size();

Constructors#

Signature

Location

Example

explicit CategoricalIndex(const CategoricalArray& array, const std::optional<std::string>& name = std::nullopt)

pd_categorical_index.h:62

View

explicit CategoricalIndex(CategoricalArray&& array, const std::optional<std::string>& name = std::nullopt)

pd_categorical_index.h:72

View

explicit CategoricalIndex(const std::vector<std::optional<std::string>>& values, const std::optional<std::string>& name = std::nullopt, bool ordered = false)

pd_categorical_index.h:83

View

CategoricalIndex(const std::vector<std::optional<std::string>>& values, const std::vector<std::string>& categories, bool ordered = false, const std::optional<std::string>& name = std::nullopt)

pd_categorical_index.h:96

View

CategoricalIndex(const CategoricalIndex& other)

pd_categorical_index.h:106

View

CategoricalIndex(CategoricalIndex&& other) noexcept

pd_categorical_index.h:113

View

Construction#

Signature

Return Type

Location

Example

static CategoricalIndex from_codes( const std::vector<numpy::int32>& codes, const std::vector<std::string>& categories, bool ordered = false, const std::optional<std::string>& name = std::nullopt)

static CategoricalIndex

pd_categorical_index.h:153

View

Indexing / Selection#

Signature

Return Type

Location

Example

get_indexer_non_unique(const CategoricalIndex& target) const

pd_categorical_index.h:945

View

CategoricalIndex get_level_values(int level) const

CategoricalIndex

pd_categorical_index.h:884

View

CategoricalIndex get_level_values(const std::string& level_name) const

CategoricalIndex

pd_categorical_index.h:900

View

size_t get_slice_bound(const std::string& label, const std::string& side = "left") const

size_t

pd_categorical_index.h:1003

View

std::string get_value_str(size_t index) const override

std::string

pd_categorical_index.h:587

View

CategoricalIndex where(const numpy::NDArray<numpy::bool_>& cond, const std::string& other) const

CategoricalIndex

pd_categorical_index.h:1761

View

Data Manipulation#

Signature

Return Type

Location

Example

CategoricalIndex droplevel(int level = 0) const

CategoricalIndex

pd_categorical_index.h:868

View

std::pair<CategoricalIndex, numpy::NDArray<numpy::int64>> reindex( const CategoricalIndex& target, const std::string& method = "") const

std::pair<CategoricalIndex, numpy::NDArray<numpy::int64>>

pd_categorical_index.h:1431

View

CategoricalIndex rename(const std::optional<std::string>& new_name) const

CategoricalIndex

pd_categorical_index.h:482

View

CategoricalIndex rename_categories(const std::vector<std::string>& new_names) const

CategoricalIndex

pd_categorical_index.h:262

View

CategoricalIndex rename_categories( const std::unordered_map<std::string, std::string>& mapping) const

CategoricalIndex

pd_categorical_index.h:276

View

CategoricalIndex set_names(const std::optional<std::string>& new_name) const

CategoricalIndex

pd_categorical_index.h:1557

View

Statistics#

Signature

Return Type

Location

Example

std::optional<std::string> max() const

std::optional<std::string>

pd_categorical_index.h:414

View

std::optional<std::string> min() const

std::optional<std::string>

pd_categorical_index.h:399

View

Aggregation#

Signature

Return Type

Location

Example

std::unordered_map<GroupT, std::vector<size_t>> groupby( const std::vector<GroupT>& values) const

std::unordered_map<GroupT, std::vector<size_t>>

pd_categorical_index.h:1040

View

CategoricalIndex map(const std::unordered_map<std::string, std::string>& mapping) const

CategoricalIndex

pd_categorical_index.h:460

View

Arithmetic#

Signature

Return Type

Location

Example

CategoricalIndex add_categories(const std::vector<std::string>& new_cats) const

CategoricalIndex

pd_categorical_index.h:308

View

Comparison#

Signature

Return Type

Location

Example

bool equals(const CategoricalIndex& other) const

bool

pd_categorical_index.h:611

View

Sorting#

Signature

Return Type

Location

Example

numpy::NDArray<numpy::int64> argsort(bool ascending = true) const

numpy::NDArray<numpy::int64>

pd_categorical_index.h:1793

View

size_t searchsorted(const std::string& value, const std::string& side = "left") const

size_t

pd_categorical_index.h:1518

View

CategoricalIndex sort_values(bool ascending = true) const

CategoricalIndex

pd_categorical_index.h:1854

View

Reshaping#

Signature

Return Type

Location

Example

CategoricalIndex transpose() const

CategoricalIndex

pd_categorical_index.h:1741

View

Combining#

Signature

Return Type

Location

Example

join(const CategoricalIndex& other, const std::string& how = "left", bool sort = false) const

pd_categorical_index.h:1182

View

Time Series#

Signature

Return Type

Location

Example

std::optional<std::string> asof(const std::string& where) const

std::optional<std::string>

pd_categorical_index.h:683

View

numpy::NDArray<numpy::int64> asof_locs(const std::vector<std::string>& where) const

numpy::NDArray<numpy::int64>

pd_categorical_index.h:745

View

numpy::NDArray<numpy::int64> diff(int64_t periods = 1) const

numpy::NDArray<numpy::int64>

pd_categorical_index.h:824

View

CategoricalIndex shift(int64_t periods) const

CategoricalIndex

pd_categorical_index.h:1569

View

I/O#

Signature

Return Type

Location

Example

CategoricalIndex to_flat_index() const

CategoricalIndex

pd_categorical_index.h:1669

View

std::vector<std::optional<std::string>> to_list() const

std::vector<std::optional<std::string>>

pd_categorical_index.h:1704

View

std::vector<std::string> to_numpy(bool copy = true, const std::string& na_value = "") const

std::vector<std::string>

pd_categorical_index.h:1678

View

std::string to_string() const override

std::string

pd_categorical_index.h:517

View

std::vector<std::optional<std::string>> tolist() const

std::vector<std::optional<std::string>>

pd_categorical_index.h:1700

View

Conversion#

Signature

Return Type

Location

Example

std::vector<std::string> astype(const std::string& dtype = "object") const

std::vector<std::string>

pd_categorical_index.h:781

View

CategoricalIndex copy() const

CategoricalIndex

pd_categorical_index.h:473

View

CategoricalIndex infer_objects(bool copy_data = true) const

CategoricalIndex

pd_categorical_index.h:1139

View

std::vector<std::optional<std::string>> view() const

std::vector<std::optional<std::string>>

pd_categorical_index.h:1750

View

Type Checking#

Signature

Return Type

Location

Example

bool is_(const CategoricalIndex& other) const

bool

pd_categorical_index.h:1149

View

bool is_boolean() const

bool

pd_categorical_index.h:1071

View

bool is_categorical() const

bool

pd_categorical_index.h:1083

View

bool is_floating() const

bool

pd_categorical_index.h:1091

View

bool is_integer() const

bool

pd_categorical_index.h:1099

View

bool is_interval() const

bool

pd_categorical_index.h:1107

View

bool is_numeric() const

bool

pd_categorical_index.h:1115

View

bool is_object() const

bool

pd_categorical_index.h:1123

View

Other Methods#

Signature

Return Type

Location

Example

bool all() const

bool

pd_categorical_index.h:645

View

bool any() const

bool

pd_categorical_index.h:661

View

std::optional<size_t> argmax() const

std::optional<size_t>

pd_categorical_index.h:440

View

std::optional<size_t> argmin() const

std::optional<size_t>

pd_categorical_index.h:427

View

CategoricalIndex as_ordered() const

CategoricalIndex

pd_categorical_index.h:369

View

CategoricalIndex as_unordered() const

CategoricalIndex

pd_categorical_index.h:382

View

const std::vector<std::string>& categories() const

const std::vector<std::string>&

pd_categorical_index.h:202

View

std::unique_ptr<IndexBase> clone() const override

std::unique_ptr<IndexBase>

pd_categorical_index.h:502

View

const numpy::NDArray<numpy::int32>& codes() const

const numpy::NDArray<numpy::int32>&

pd_categorical_index.h:214

View

std::vector<std::string> format(const std::string& formatter = "") const

std::vector<std::string>

pd_categorical_index.h:920

View

bool has_category(const std::string& category) const

bool

pd_categorical_index.h:243

View

bool holds_integer() const

bool

pd_categorical_index.h:1063

View

bool identical(const CategoricalIndex& other) const

bool

pd_categorical_index.h:630

View

std::string inferred_type() const override

std::string

pd_categorical_index.h:494

View

std::string item() const

std::string

pd_categorical_index.h:1158

View

size_t memory_usage(bool deep = false) const

size_t

pd_categorical_index.h:1354

View

size_t num_categories() const

size_t

pd_categorical_index.h:234

View

bool ordered() const

bool

pd_categorical_index.h:226

View

CategoricalIndex joined_index(joined_values, merged_cats, ordered(), this->name())

CategoricalIndex joined_index(joined_values, merged_cats,

pd_categorical_index.h:1320

View

CategoricalIndex putmask(const numpy::NDArray<numpy::bool_>& mask, const std::string& value) const

CategoricalIndex

pd_categorical_index.h:1386

View

std::vector<std::optional<std::string>> ravel() const

std::vector<std::optional<std::string>>

pd_categorical_index.h:1414

View

CategoricalIndex remove_categories(const std::vector<std::string>& removals) const

CategoricalIndex

pd_categorical_index.h:322

View

CategoricalIndex remove_unused_categories() const

CategoricalIndex

pd_categorical_index.h:334

View

CategoricalIndex reorder_categories(const std::vector<std::string>& new_order) const

CategoricalIndex

pd_categorical_index.h:293

View

CategoricalIndex repeat(size_t repeats) const

CategoricalIndex

pd_categorical_index.h:1480

View

std::string repr() const override

std::string

pd_categorical_index.h:578

View

CategoricalIndex round(int decimals = 0) const

CategoricalIndex

pd_categorical_index.h:1504

View

CategoricalIndex set_categories(const std::vector<std::string>& new_cats, bool rename = false) const

CategoricalIndex

pd_categorical_index.h:350

View

std::pair<size_t, size_t> slice_indexer( const std::optional<std::string>& start, const std::optional<std::string>& stop) const

std::pair<size_t, size_t>

pd_categorical_index.h:1604

View

std::pair<size_t, size_t> slice_locs( const std::optional<std::string>& start = std::nullopt, const std::optional<std::string>& stop = std::nullopt) const

std::pair<size_t, size_t>

pd_categorical_index.h:1627

View

CategoricalIndex sort(bool ascending = true) const

CategoricalIndex

pd_categorical_index.h:1873

View

std::pair<CategoricalIndex, numpy::NDArray<numpy::int64>> sortlevel( int level = 0, bool ascending = true) const

std::pair<CategoricalIndex, numpy::NDArray<numpy::int64>>

pd_categorical_index.h:1640

View

IndexTypeId type_id() const override

IndexTypeId

pd_categorical_index.h:506

View

Internal Methods#

2 internal methods (prefixed with underscore)

Code Examples#

The following examples are extracted from the test suite.

CategoricalIndex (pd_test_2_all.cpp:20850)
20840    auto sgb = data.groupby(by);
20841    sgb.set_categorical_categories({"A", "B", "C"});
20842    sgb.set_index_name("cat_key");
20843
20844    pandas::Series<numpy::float64> result(values);
20845    std::vector<std::string> idx_labels = {"A", "B"};
20846    result.set_index(std::make_unique<pandas::Index<std::string>>(idx_labels));
20847
20848    sgb.apply_result_index(result);
20849
20850    // Should have CategoricalIndex (dtype_name() returns "category")
20851    check(result.index().dtype_name() == "category", "is_categorical_index");
20852}
20853
20854// =====================================================================
20855// Per-group expanding tests
20856// =====================================================================
20857
20858void test_series_groupby_expanding_sum() {
20859    std::cout << "  -- test_series_groupby_expanding_sum --" << std::endl;
CategoricalIndex (pd_test_2_all.cpp:20850)
20840    auto sgb = data.groupby(by);
20841    sgb.set_categorical_categories({"A", "B", "C"});
20842    sgb.set_index_name("cat_key");
20843
20844    pandas::Series<numpy::float64> result(values);
20845    std::vector<std::string> idx_labels = {"A", "B"};
20846    result.set_index(std::make_unique<pandas::Index<std::string>>(idx_labels));
20847
20848    sgb.apply_result_index(result);
20849
20850    // Should have CategoricalIndex (dtype_name() returns "category")
20851    check(result.index().dtype_name() == "category", "is_categorical_index");
20852}
20853
20854// =====================================================================
20855// Per-group expanding tests
20856// =====================================================================
20857
20858void test_series_groupby_expanding_sum() {
20859    std::cout << "  -- test_series_groupby_expanding_sum --" << std::endl;
CategoricalIndex (pd_test_2_all.cpp:20850)
20840    auto sgb = data.groupby(by);
20841    sgb.set_categorical_categories({"A", "B", "C"});
20842    sgb.set_index_name("cat_key");
20843
20844    pandas::Series<numpy::float64> result(values);
20845    std::vector<std::string> idx_labels = {"A", "B"};
20846    result.set_index(std::make_unique<pandas::Index<std::string>>(idx_labels));
20847
20848    sgb.apply_result_index(result);
20849
20850    // Should have CategoricalIndex (dtype_name() returns "category")
20851    check(result.index().dtype_name() == "category", "is_categorical_index");
20852}
20853
20854// =====================================================================
20855// Per-group expanding tests
20856// =====================================================================
20857
20858void test_series_groupby_expanding_sum() {
20859    std::cout << "  -- test_series_groupby_expanding_sum --" << std::endl;
CategoricalIndex (pd_test_2_all.cpp:20850)
20840    auto sgb = data.groupby(by);
20841    sgb.set_categorical_categories({"A", "B", "C"});
20842    sgb.set_index_name("cat_key");
20843
20844    pandas::Series<numpy::float64> result(values);
20845    std::vector<std::string> idx_labels = {"A", "B"};
20846    result.set_index(std::make_unique<pandas::Index<std::string>>(idx_labels));
20847
20848    sgb.apply_result_index(result);
20849
20850    // Should have CategoricalIndex (dtype_name() returns "category")
20851    check(result.index().dtype_name() == "category", "is_categorical_index");
20852}
20853
20854// =====================================================================
20855// Per-group expanding tests
20856// =====================================================================
20857
20858void test_series_groupby_expanding_sum() {
20859    std::cout << "  -- test_series_groupby_expanding_sum --" << std::endl;
CategoricalIndex (pd_test_2_all.cpp:20850)
20840    auto sgb = data.groupby(by);
20841    sgb.set_categorical_categories({"A", "B", "C"});
20842    sgb.set_index_name("cat_key");
20843
20844    pandas::Series<numpy::float64> result(values);
20845    std::vector<std::string> idx_labels = {"A", "B"};
20846    result.set_index(std::make_unique<pandas::Index<std::string>>(idx_labels));
20847
20848    sgb.apply_result_index(result);
20849
20850    // Should have CategoricalIndex (dtype_name() returns "category")
20851    check(result.index().dtype_name() == "category", "is_categorical_index");
20852}
20853
20854// =====================================================================
20855// Per-group expanding tests
20856// =====================================================================
20857
20858void test_series_groupby_expanding_sum() {
20859    std::cout << "  -- test_series_groupby_expanding_sum --" << std::endl;
CategoricalIndex (pd_test_2_all.cpp:20850)
20840    auto sgb = data.groupby(by);
20841    sgb.set_categorical_categories({"A", "B", "C"});
20842    sgb.set_index_name("cat_key");
20843
20844    pandas::Series<numpy::float64> result(values);
20845    std::vector<std::string> idx_labels = {"A", "B"};
20846    result.set_index(std::make_unique<pandas::Index<std::string>>(idx_labels));
20847
20848    sgb.apply_result_index(result);
20849
20850    // Should have CategoricalIndex (dtype_name() returns "category")
20851    check(result.index().dtype_name() == "category", "is_categorical_index");
20852}
20853
20854// =====================================================================
20855// Per-group expanding tests
20856// =====================================================================
20857
20858void test_series_groupby_expanding_sum() {
20859    std::cout << "  -- test_series_groupby_expanding_sum --" << std::endl;
from_codes (pd_test_1_all.cpp:403)
393        std::cout << " -> tests passed" << std::endl;
394    }
395
396    void pd_test_categorical_array_from_codes() {
397        std::cout << "========= CategoricalArray: from_codes ======================= ";
398
399        std::vector<std::string> cats = {"a", "b", "c"};
400        std::vector<numpy::int32> codes = {0, 1, 2, 0, 1, -1};  // -1 is NA
401
402        pandas::CategoricalArray arr = pandas::CategoricalArray::from_codes(codes, cats, false);
403
404        if (arr.size() != 6) {
405            std::cout << "  [FAIL] : in pd_test_categorical_array_from_codes() : size != 6" << std::endl;
406            throw std::runtime_error("pd_test_categorical_array_from_codes failed: size != 6");
407        }
408
409        // Check that code=-1 creates NA
410        if (!arr.is_na(5)) {
411            std::cout << "  [FAIL] : in pd_test_categorical_array_from_codes() : code -1 should be NA" << std::endl;
412            throw std::runtime_error("pd_test_categorical_array_from_codes failed: code -1 should be NA");
get_indexer_non_unique (pd_test_3_all.cpp:739)
729    if (indexer.getElementAt({1}) != 3) {
730        std::cout << "  [FAIL] : in pd_test_3_all_index_indexers() : 'd' should be at index 3" << std::endl;
731        throw std::runtime_error("pd_test_3_all_index_indexers failed: 'd' index");
732    }
733    // "f" doesn't exist -> -1
734    if (indexer.getElementAt({2}) != -1) {
735        std::cout << "  [FAIL] : in pd_test_3_all_index_indexers() : 'f' should be -1" << std::endl;
736        throw std::runtime_error("pd_test_3_all_index_indexers failed: 'f' index");
737    }
738
739    // Test get_indexer_non_unique()
740    std::vector<std::string> target2 = {"a", "c", "z"};  // "z" doesn't exist
741    pandas::Index<std::string> target_idx(target2);
742    auto [indexer2, missing] = idx.get_indexer_non_unique(target_idx);
743
744    if (indexer2.getSize() < 2) {
745        std::cout << "  [FAIL] : in pd_test_3_all_index_indexers() : get_indexer_non_unique size too small" << std::endl;
746        throw std::runtime_error("pd_test_3_all_index_indexers failed: get_indexer_non_unique size");
747    }
748
749    // Test slice_indexer()
get_level_values (pd_test_3_all.cpp:4524)
4514    }
4515
4516    std::cout << " -> tests passed" << std::endl;
4517}
4518
4519void pd_test_3_all_interval_index_get_level_values_droplevel() {
4520    std::cout << "========= IntervalIndex.get_level_values/droplevel() ";
4521
4522    pandas::IntervalIndex64 idx = pandas::IntervalIndex64::from_breaks({0, 10, 20, 30});
4523
4524    // get_level_values(0) should work
4525    pandas::IntervalIndex64 level_vals = idx.get_level_values(0);
4526    if (level_vals.size() != idx.size()) {
4527        throw std::runtime_error("get_level_values(0) size mismatch");
4528    }
4529
4530    // get_level_values(1) should throw
4531    bool threw = false;
4532    try {
4533        idx.get_level_values(1);
4534    } catch (const std::out_of_range&) {
get_level_values (pd_test_3_all.cpp:4524)
4514    }
4515
4516    std::cout << " -> tests passed" << std::endl;
4517}
4518
4519void pd_test_3_all_interval_index_get_level_values_droplevel() {
4520    std::cout << "========= IntervalIndex.get_level_values/droplevel() ";
4521
4522    pandas::IntervalIndex64 idx = pandas::IntervalIndex64::from_breaks({0, 10, 20, 30});
4523
4524    // get_level_values(0) should work
4525    pandas::IntervalIndex64 level_vals = idx.get_level_values(0);
4526    if (level_vals.size() != idx.size()) {
4527        throw std::runtime_error("get_level_values(0) size mismatch");
4528    }
4529
4530    // get_level_values(1) should throw
4531    bool threw = false;
4532    try {
4533        idx.get_level_values(1);
4534    } catch (const std::out_of_range&) {
get_slice_bound (pd_test_3_all.cpp:3644)
3634    formatted = idx.format(custom_formatter);
3635
3636    if (formatted[0] != "val:1") {
3637        throw std::runtime_error("custom formatter failed");
3638    }
3639
3640    std::cout << " -> tests passed" << std::endl;
3641}
3642
3643void pd_test_3_all_index_get_slice_bound() {
3644    std::cout << "========= Index.get_slice_bound() ==================";
3645
3646    pandas::Index<numpy::int64> idx({10, 20, 30, 40, 50});
3647
3648    // Exact match, left side
3649    size_t bound = idx.get_slice_bound(30, "left");
3650    if (bound != 2) {
3651        throw std::runtime_error("left bound for 30 should be 2");
3652    }
3653
3654    // Exact match, right side
get_value_str (pd_test_1_all.cpp:4665)
4655            auto corr_df = df.corr();
4656
4657            // Check dimensions
4658            bool passed = corr_df.nrows() == 2 && corr_df.ncols() == 2;
4659            if (!passed) {
4660                std::cout << "  [FAIL] : in pd_test_aggregation_dataframe_corr() : corr should be 2x2" << std::endl;
4661                throw std::runtime_error("pd_test_aggregation_dataframe_corr failed: corr should be 2x2");
4662            }
4663
4664            // Diagonal should be 1.0
4665            std::string aa = corr_df["A"].get_value_str(0);
4666            passed = std::abs(std::stod(aa) - 1.0) < 0.001;
4667            if (!passed) {
4668                std::cout << "  [FAIL] : in pd_test_aggregation_dataframe_corr() : diagonal should be 1.0" << std::endl;
4669                throw std::runtime_error("pd_test_aggregation_dataframe_corr failed: diagonal should be 1.0");
4670            }
4671
4672            // A-B correlation should be 1.0 (perfect correlation)
4673            std::string ab = corr_df["B"].get_value_str(0);
4674            passed = std::abs(std::stod(ab) - 1.0) < 0.001;
4675            if (!passed) {
where (pd_test_1_all.cpp:22018)
22008            data["B"] = {5.0, 6.0, 7.0, 8.0};
22009            pandas::DataFrame df(data);
22010
22011            // Create condition DataFrame (values > 2)
22012            std::map<std::string, std::vector<numpy::bool_>> cond_data;
22013            cond_data["A"] = {false, false, true, true};   // 1<=2, 2<=2, 3>2, 4>2
22014            cond_data["B"] = {true, true, true, true};     // all >2
22015            pandas::DataFrame cond(cond_data);
22016
22017            // Apply where with replacement value -1
22018            pandas::DataFrame result = df.where(cond, -1.0);
22019
22020            // Get column index for A - it's sorted alphabetically in std::map
22021            size_t col_a_idx = df.get_column_index("A");
22022            size_t col_b_idx = df.get_column_index("B");
22023
22024            bool passed = true;
22025            std::string error_msg;
22026
22027            // Check A column values
22028            std::string a0 = result.iat<double>(0, col_a_idx) == -1.0 ? "ok" : "fail";
droplevel (pd_test_1_all.cpp:14428)
14418        void pd_test_multiindex_droplevel() {
14419            std::cout << "========= droplevel =================================== ";
14420
14421            std::vector<std::vector<std::string>> arrays = {
14422                {"a", "a", "b"},
14423                {"x", "y", "z"},
14424                {"1", "2", "3"}
14425            };
14426
14427            pandas::MultiIndex mi = pandas::MultiIndex::from_arrays<std::string>(arrays);
14428            pandas::MultiIndex dropped = mi.droplevel(1);
14429
14430            bool passed = true;
14431
14432            if (dropped.nlevels() != 2) {
14433                std::cout << "  [FAIL] : nlevels should be 2 after drop" << std::endl;
14434                passed = false;
14435            }
14436
14437            // Check remaining levels
14438            auto tup = dropped[0];
reindex (pd_test_1_all.cpp:6708)
6698                }
6699            }
6700
6701            // Test reindex rows
6702            {
6703                std::map<std::string, std::vector<double>> data;
6704                data["A"] = {1.0, 2.0, 3.0};
6705                pandas::DataFrame df(data);
6706                df = df.set_axis({"x", "y", "z"}, 0);
6707
6708                auto reindexed = df.reindex({"x", "z", "w"}, 0);
6709                if (reindexed.nrows() != 3) {
6710                    std::cout << "  [FAIL] : in pd_test_dataframe_index_ops() : reindex wrong nrows" << std::endl;
6711                    throw std::runtime_error("pd_test_dataframe_index_ops failed: reindex nrows");
6712                }
6713                // 'w' should have NaN
6714                std::string val = reindexed["A"].get_value_str(2);
6715                if (!std::isnan(std::stod(val))) {
6716                    std::cout << "  [FAIL] : in pd_test_dataframe_index_ops() : missing label should be NaN" << std::endl;
6717                    throw std::runtime_error("pd_test_dataframe_index_ops failed: reindex NaN");
6718                }
rename (pd_test_1_all.cpp:5816)
5806    std::cout << " -> tests passed" << std::endl;
5807}
5808
5809void pd_test_categorical_index_rename() {
5810    std::cout << "========= rename ======================================";
5811
5812    pandas::CategoricalArray arr({"x", "y"});
5813    pandas::CategoricalIndex idx(arr, "old_name");
5814
5815    pandas::CategoricalIndex renamed = idx.rename("new_name");
5816
5817    bool passed = (renamed.name().has_value() && *renamed.name() == "new_name" &&
5818                   renamed.size() == idx.size() && renamed.categories() == idx.categories());
5819    if (!passed) {
5820        std::cout << "  [FAIL] : in pd_test_categorical_index_rename()" << std::endl;
5821        throw std::runtime_error("pd_test_categorical_index_rename failed");
5822    }
5823
5824    std::cout << " -> tests passed" << std::endl;
5825}
rename_categories (pd_test_1_all.cpp:655)
645    void pd_test_categorical_array_rename_categories() {
646        std::cout << "========= CategoricalArray: rename_categories ======================= ";
647
648        std::vector<std::string> cats = {"a", "b"};
649        std::vector<numpy::int32> codes = {0, 1, 0};  // a, b, a
650        pandas::CategoricalArray arr = pandas::CategoricalArray::from_codes(codes, cats);
651
652        // Rename categories
653        std::vector<std::string> new_names = {"alpha", "beta"};
654        pandas::CategoricalArray result = arr.rename_categories(new_names);
655
656        // Check categories are renamed
657        const std::vector<std::string>& result_cats = result.categories();
658        if (result_cats[0] != "alpha" || result_cats[1] != "beta") {
659            std::cout << "  [FAIL] : in pd_test_categorical_array_rename_categories() : categories not renamed" << std::endl;
660            throw std::runtime_error("pd_test_categorical_array_rename_categories failed: categories not renamed");
661        }
662
663        // Values should now be renamed
664        std::optional<std::string> val = result[0];
rename_categories (pd_test_1_all.cpp:655)
645    void pd_test_categorical_array_rename_categories() {
646        std::cout << "========= CategoricalArray: rename_categories ======================= ";
647
648        std::vector<std::string> cats = {"a", "b"};
649        std::vector<numpy::int32> codes = {0, 1, 0};  // a, b, a
650        pandas::CategoricalArray arr = pandas::CategoricalArray::from_codes(codes, cats);
651
652        // Rename categories
653        std::vector<std::string> new_names = {"alpha", "beta"};
654        pandas::CategoricalArray result = arr.rename_categories(new_names);
655
656        // Check categories are renamed
657        const std::vector<std::string>& result_cats = result.categories();
658        if (result_cats[0] != "alpha" || result_cats[1] != "beta") {
659            std::cout << "  [FAIL] : in pd_test_categorical_array_rename_categories() : categories not renamed" << std::endl;
660            throw std::runtime_error("pd_test_categorical_array_rename_categories failed: categories not renamed");
661        }
662
663        // Values should now be renamed
664        std::optional<std::string> val = result[0];
set_names (pd_test_1_all.cpp:14519)
14509            std::cout << "-> tests passed" << std::endl;
14510        }
14511
14512        void pd_test_multiindex_set_names() {
14513            std::cout << "========= set_names =================================== ";
14514
14515            std::vector<std::vector<std::string>> arrays = {{"a", "b"}, {"x", "y"}};
14516            pandas::MultiIndex mi = pandas::MultiIndex::from_arrays<std::string>(arrays);
14517
14518            std::vector<std::optional<std::string>> new_names = {"level_a", "level_b"};
14519            pandas::MultiIndex named = mi.set_names(new_names);
14520
14521            bool passed = (named.names()[0] == "level_a" && named.names()[1] == "level_b");
14522
14523            if (!passed) {
14524                std::cout << "  [FAIL] : names not set correctly" << std::endl;
14525                throw std::runtime_error("pd_test_multiindex_set_names failed");
14526            }
14527
14528            std::cout << "-> tests passed" << std::endl;
14529        }
max (pd_test_1_all.cpp:771)
761        pandas::CategoricalArray arr = pandas::CategoricalArray::from_codes(codes, cats, true);  // ordered
762
763        // Test min
764        std::optional<std::string> min_val = arr.min();
765        if (!min_val.has_value() || *min_val != "low") {
766            std::cout << "  [FAIL] : in pd_test_categorical_array_ordered_operations() : min != 'low'" << std::endl;
767            throw std::runtime_error("pd_test_categorical_array_ordered_operations failed: min != 'low'");
768        }
769
770        // Test max
771        std::optional<std::string> max_val = arr.max();
772        if (!max_val.has_value() || *max_val != "high") {
773            std::cout << "  [FAIL] : in pd_test_categorical_array_ordered_operations() : max != 'high'" << std::endl;
774            throw std::runtime_error("pd_test_categorical_array_ordered_operations failed: max != 'high'");
775        }
776
777        // Test unordered throws for min/max
778        pandas::CategoricalArray unordered = arr.as_unordered();
779        bool threw = false;
780        try {
781            unordered.min();
min (pd_test_1_all.cpp:764)
754    }
755
756    void pd_test_categorical_array_ordered_operations() {
757        std::cout << "========= CategoricalArray: ordered operations (min/max) ======================= ";
758
759        std::vector<std::string> cats = {"low", "medium", "high"};
760        std::vector<numpy::int32> codes = {0, 2, 1, 0, -1};  // low, high, medium, low, NA
761        pandas::CategoricalArray arr = pandas::CategoricalArray::from_codes(codes, cats, true);  // ordered
762
763        // Test min
764        std::optional<std::string> min_val = arr.min();
765        if (!min_val.has_value() || *min_val != "low") {
766            std::cout << "  [FAIL] : in pd_test_categorical_array_ordered_operations() : min != 'low'" << std::endl;
767            throw std::runtime_error("pd_test_categorical_array_ordered_operations failed: min != 'low'");
768        }
769
770        // Test max
771        std::optional<std::string> max_val = arr.max();
772        if (!max_val.has_value() || *max_val != "high") {
773            std::cout << "  [FAIL] : in pd_test_categorical_array_ordered_operations() : max != 'high'" << std::endl;
774            throw std::runtime_error("pd_test_categorical_array_ordered_operations failed: max != 'high'");
groupby (pd_test_1_all.cpp:11495)
11485            std::cout << "========= GroupBy basic =========================";
11486
11487            // Create DataFrame with category column
11488            std::map<std::string, std::vector<double>> data = {
11489                {"category", {1.0, 1.0, 2.0, 2.0, 2.0}},
11490                {"value", {10.0, 20.0, 30.0, 40.0, 50.0}}
11491            };
11492            pandas::DataFrame df(data);
11493
11494            // Test groupby
11495            auto grouped = df.groupby("category");
11496
11497            bool passed = grouped.ngroups() == 2;
11498            if (!passed) {
11499                std::cout << "  [FAIL] : in pd_test_groupby_basic() : ngroups should be 2" << std::endl;
11500                throw std::runtime_error("pd_test_groupby_basic failed: ngroups should be 2");
11501            }
11502
11503            std::cout << " -> tests passed" << std::endl;
11504        }
map (pd_test_1_all.cpp:5839)
5829// Map Tests
5830// ============================================================================
5831
5832void pd_test_categorical_index_map() {
5833    std::cout << "========= map =========================================";
5834
5835    pandas::CategoricalArray arr({"yes", "no", "yes"});
5836    pandas::CategoricalIndex idx(arr);
5837
5838    std::unordered_map<std::string, std::string> mapping = {{"yes", "1"}, {"no", "0"}};
5839    pandas::CategoricalIndex mapped = idx.map(mapping);
5840
5841    bool passed = (mapped.has_category("1") && mapped.has_category("0") &&
5842                   !mapped.has_category("yes") && !mapped.has_category("no"));
5843    if (!passed) {
5844        std::cout << "  [FAIL] : in pd_test_categorical_index_map()" << std::endl;
5845        throw std::runtime_error("pd_test_categorical_index_map failed");
5846    }
5847
5848    std::cout << " -> tests passed" << std::endl;
5849}
add_categories (pd_test_1_all.cpp:555)
545    }
546
547    void pd_test_categorical_array_add_categories() {
548        std::cout << "========= CategoricalArray: add_categories ======================= ";
549
550        std::vector<std::string> cats = {"a", "b"};
551        std::vector<numpy::int32> codes = {0, 1, 0};
552        pandas::CategoricalArray arr = pandas::CategoricalArray::from_codes(codes, cats);
553
554        // Add new categories
555        pandas::CategoricalArray result = arr.add_categories({"c", "d"});
556        if (result.categories().size() != 4) {
557            std::cout << "  [FAIL] : in pd_test_categorical_array_add_categories() : new categories size != 4" << std::endl;
558            throw std::runtime_error("pd_test_categorical_array_add_categories failed: new categories size != 4");
559        }
560
561        // Original values should be preserved
562        std::optional<std::string> val = result[0];
563        if (!val.has_value() || *val != "a") {
564            std::cout << "  [FAIL] : in pd_test_categorical_array_add_categories() : value not preserved" << std::endl;
565            throw std::runtime_error("pd_test_categorical_array_add_categories failed: value not preserved");
equals (pd_test_1_all.cpp:5866)
5856    std::cout << "========= equals ======================================";
5857
5858    pandas::CategoricalArray arr1({"a", "b", "a"});
5859    pandas::CategoricalArray arr2({"a", "b", "a"});
5860    pandas::CategoricalArray arr3({"a", "b", "c"});
5861
5862    pandas::CategoricalIndex idx1(arr1);
5863    pandas::CategoricalIndex idx2(arr2);
5864    pandas::CategoricalIndex idx3(arr3);
5865
5866    bool passed = (idx1.equals(idx2) && !idx1.equals(idx3));
5867    if (!passed) {
5868        std::cout << "  [FAIL] : in pd_test_categorical_index_equals()" << std::endl;
5869        throw std::runtime_error("pd_test_categorical_index_equals failed");
5870    }
5871
5872    std::cout << " -> tests passed" << std::endl;
5873}
5874
5875void pd_test_categorical_index_identical() {
5876    std::cout << "========= identical ===================================";
argsort (pd_test_1_all.cpp:1304)
1294        std::cout << "========= DatetimeArray: sorting ======================= ";
1295
1296        pandas::DatetimeArray arr(std::vector<std::string>{
1297            "2023-06-15",
1298            "NaT",
1299            "2023-01-01",
1300            "2023-12-31"
1301        });
1302
1303        // argsort ascending
1304        auto indices = arr.argsort(true, "last");
1305        // Expected order: 2023-01-01(2), 2023-06-15(0), 2023-12-31(3), NaT(1)
1306        if (indices.getElementAt({0}) != 2) {
1307            std::cout << "  [FAIL] : argsort: first should be index 2 (2023-01-01)" << std::endl;
1308            throw std::runtime_error("pd_test_datetime_array_sorting failed: argsort first");
1309        }
1310        if (indices.getElementAt({3}) != 1) {
1311            std::cout << "  [FAIL] : argsort: last should be index 1 (NaT)" << std::endl;
1312            throw std::runtime_error("pd_test_datetime_array_sorting failed: NaT position");
1313        }
searchsorted (pd_test_1_all.cpp:18958)
18948    // =========================================================================
18949    // Search Tests
18950    // =========================================================================
18951
18952    void pd_test_range_index_searchsorted() {
18953        std::cout << "========= searchsorted ================================ ";
18954
18955        pandas::RangeIndex ri(0, 10, 2);  // [0, 2, 4, 6, 8]
18956
18957        bool passed = (ri.searchsorted(4, "left") == 2 &&
18958                      ri.searchsorted(4, "right") == 3 &&
18959                      ri.searchsorted(3, "left") == 2 &&   // 3 would go between 2 and 4
18960                      ri.searchsorted(-1, "left") == 0 &&  // Before all
18961                      ri.searchsorted(10, "left") == 5);   // After all
18962
18963        if (!passed) {
18964            std::cout << "  [FAIL] : searchsorted" << std::endl;
18965            throw std::runtime_error("pd_test_range_index_searchsorted failed");
18966        }
sort_values (pd_test_1_all.cpp:6408)
6398        void pd_test_dataframe_sorting() {
6399            std::cout << "========= sorting ==========================";
6400
6401            std::map<std::string, std::vector<numpy::float64>> data;
6402            data["A"] = {3.0, 1.0, 4.0, 1.0, 5.0};
6403            data["B"] = {9.0, 2.0, 6.0, 5.0, 3.0};
6404
6405            pandas::DataFrame df(data);
6406
6407            // Test sort_values ascending
6408            auto sorted_asc = df.sort_values("A", true);
6409            // First value should be smallest (1.0)
6410            std::string first_val = sorted_asc["A"].get_value_str(0);
6411            if (std::stod(first_val) != 1.0) {
6412                std::cout << "  [FAIL] : in pd_test_dataframe_sorting() : sort_values asc first != 1" << std::endl;
6413                throw std::runtime_error("pd_test_dataframe_sorting failed: sort_values asc first != 1");
6414            }
6415
6416            // Test sort_values descending
6417            auto sorted_desc = df.sort_values("A", false);
6418            first_val = sorted_desc["A"].get_value_str(0);
transpose (pd_test_1_all.cpp:16648)
16638                std::cout << "  [FAIL] : in pd_test_ndframe_transpose() : T_() size" << std::endl;
16639                throw std::runtime_error("pd_test_ndframe_transpose failed: T_() size");
16640            }
16641
16642            passed = transposed[0] == 1 && transposed[1] == 2 && transposed[2] == 3;
16643            if (!passed) {
16644                std::cout << "  [FAIL] : in pd_test_ndframe_transpose() : T_() values" << std::endl;
16645                throw std::runtime_error("pd_test_ndframe_transpose failed: T_() values");
16646            }
16647
16648            // Test transpose() alias
16649            auto transposed2 = s.transpose();
16650            passed = transposed2.size() == s.size();
16651            if (!passed) {
16652                std::cout << "  [FAIL] : in pd_test_ndframe_transpose() : transpose() size" << std::endl;
16653                throw std::runtime_error("pd_test_ndframe_transpose failed: transpose() size");
16654            }
16655
16656            std::cout << " -> tests passed" << std::endl;
16657        }
join (pd_test_1_all.cpp:12353)
12343            std::cout << " -> tests passed" << std::endl;
12344        }
12345
12346        void pd_test_index_join() {
12347            std::cout << "========= join ========================================";
12348
12349            pandas::Index<numpy::int64> idx1{1, 2, 3};
12350            pandas::Index<numpy::int64> idx2{2, 3, 4};
12351
12352            auto [inner_joined, left_idx, right_idx] = idx1.join(idx2, "inner");
12353            bool passed = (inner_joined.size() == 2);  // {2, 3}
12354
12355            auto [outer_joined, ol_idx, or_idx] = idx1.join(idx2, "outer");
12356            passed = passed && (outer_joined.size() == 4);  // {1, 2, 3, 4}
12357
12358            if (!passed) {
12359                std::cout << "  [FAIL] : in pd_test_index_join() : join failed" << std::endl;
12360                throw std::runtime_error("pd_test_index_join failed");
12361            }
asof (pd_test_2_all.cpp:366)
356        std::cout << "====================================== [OK] pd_test_add_prefix test suite ========================== " << std::endl;
357        return 0;
358    }
359
360} // namespace dataframe_tests
361// ------------------- pd_test_add_prefix.cpp (end) -----------------------------
362
363// ------------------- pd_test_asof.cpp (start) -----------------------------
364// dataframe_tests/pd_test_asof.cpp
365// Test for DataFrame.asof() method
366
367#include <iostream>
368#include <cmath>
369#include <stdexcept>
370#include <limits>
371#include "../pandas/pd_dataframe.h"
372
373// CRITICAL: No using namespace directives
374
375namespace dataframe_tests {
asof_locs (pd_test_3_all.cpp:3557)
3547        throw std::runtime_error("all() should be true for empty index");
3548    }
3549    if (empty_idx.any()) {
3550        throw std::runtime_error("any() should be false for empty index");
3551    }
3552
3553    std::cout << " -> tests passed" << std::endl;
3554}
3555
3556void pd_test_3_all_index_asof() {
3557    std::cout << "========= Index.asof()/asof_locs() =================";
3558
3559    // Test with monotonically increasing index
3560    pandas::Index<numpy::int64> idx({10, 20, 30, 40, 50});
3561
3562    // Exact match
3563    auto result = idx.asof(30);
3564    if (!result.has_value() || result.value() != 30) {
3565        throw std::runtime_error("asof() exact match should return 30");
3566    }
diff (pd_test_1_all.cpp:5171)
5161        }
5162
5163        void pd_test_arithmetic_dataframe_diff_shift() {
5164            std::cout << "========= DataFrame diff/shift ==================";
5165
5166            std::map<std::string, std::vector<double>> data;
5167            data["A"] = {1.0, 3.0, 6.0, 10.0};
5168            pandas::DataFrame df(data);
5169
5170            // diff: [NaN, 2, 3, 4]
5171            auto d = df.diff();
5172            std::string val = d["A"].get_value_str(1);
5173            bool passed = std::abs(std::stod(val) - 2.0) < 0.001;
5174            if (!passed) {
5175                std::cout << "  [FAIL] : in pd_test_arithmetic_dataframe_diff_shift() : diff failed" << std::endl;
5176                throw std::runtime_error("pd_test_arithmetic_dataframe_diff_shift failed: diff failed");
5177            }
5178
5179            // First element should be NaN
5180            val = d["A"].get_value_str(0);
5181            passed = std::isnan(std::stod(val));
shift (pd_test_1_all.cpp:5188)
5178            // First element should be NaN
5179            val = d["A"].get_value_str(0);
5180            passed = std::isnan(std::stod(val));
5181            if (!passed) {
5182                std::cout << "  [FAIL] : in pd_test_arithmetic_dataframe_diff_shift() : diff NaN failed" << std::endl;
5183                throw std::runtime_error("pd_test_arithmetic_dataframe_diff_shift failed: diff NaN failed");
5184            }
5185
5186            // shift: [NaN, 1, 3, 6]
5187            auto s = df.shift();
5188            val = s["A"].get_value_str(1);
5189            passed = std::abs(std::stod(val) - 1.0) < 0.001;
5190            if (!passed) {
5191                std::cout << "  [FAIL] : in pd_test_arithmetic_dataframe_diff_shift() : shift failed" << std::endl;
5192                throw std::runtime_error("pd_test_arithmetic_dataframe_diff_shift failed: shift failed");
5193            }
5194
5195            std::cout << " -> tests passed" << std::endl;
5196        }
to_flat_index (pd_test_1_all.cpp:14733)
14723        void pd_test_multiindex_to_flat_index() {
14724            std::cout << "========= to_flat_index =============================== ";
14725
14726            std::vector<std::vector<std::string>> arrays = {
14727                {"a", "b"},
14728                {"x", "y"}
14729            };
14730
14731            pandas::MultiIndex mi = pandas::MultiIndex::from_arrays<std::string>(arrays);
14732            auto flat = mi.to_flat_index();
14733
14734            bool passed = (flat.size() == 2 &&
14735                          flat[0][0] == "a" && flat[0][1] == "x" &&
14736                          flat[1][0] == "b" && flat[1][1] == "y");
14737
14738            if (!passed) {
14739                std::cout << "  [FAIL] : to_flat_index incorrect" << std::endl;
14740                throw std::runtime_error("pd_test_multiindex_to_flat_index failed");
14741            }
to_list (pd_test_1_all.cpp:10247)
10237    std::cout << " -> tests passed" << std::endl;
10238}
10239
10240void pd_test_extension_index_to_list() {
10241    std::cout << "========= to_list =========================";
10242
10243    pandas::CategoricalArray arr({"x", "y", "z"});
10244    pandas::CategoricalIndex idx(arr);
10245
10246    auto list = idx.to_list();
10247
10248    bool passed = (list.size() == 3 &&
10249                   list[0].has_value() && *list[0] == "x" &&
10250                   list[1].has_value() && *list[1] == "y" &&
10251                   list[2].has_value() && *list[2] == "z");
10252    if (!passed) {
10253        std::cout << "  [FAIL] : in pd_test_extension_index_to_list() : to_list check failed" << std::endl;
10254        throw std::runtime_error("pd_test_extension_index_to_list failed");
10255    }
to_numpy (pd_test_1_all.cpp:16764)
16754        // =====================================================================
16755        // to_numpy Tests
16756        // =====================================================================
16757
16758        void pd_test_ndframe_to_numpy() {
16759            std::cout << "========= to_numpy =============================================" << std::endl;
16760
16761            pandas::Series<int> s({10, 20, 30});
16762
16763            auto arr = s.to_numpy();
16764
16765            bool passed = arr.getSize() == 3;
16766            if (!passed) {
16767                std::cout << "  [FAIL] : in pd_test_ndframe_to_numpy() : size" << std::endl;
16768                throw std::runtime_error("pd_test_ndframe_to_numpy failed: size");
16769            }
16770
16771            passed = arr.getElementAt({0}) == 10 && arr.getElementAt({1}) == 20 && arr.getElementAt({2}) == 30;
16772            if (!passed) {
16773                std::cout << "  [FAIL] : in pd_test_ndframe_to_numpy() : values" << std::endl;
to_string (pd_test_1_all.cpp:2693)
2683        pandas::PeriodArray arr_m(std::vector<std::string>{
2684            "2020-01",
2685            "NaT",
2686            "2025-06"
2687        }, "M");
2688
2689        // Year
2690        auto years = arr_m.year();
2691        auto y0 = years[0];
2692        if (!y0.has_value() || y0.value() != 2020) {
2693            std::cout << "  [FAIL] : year[0] should be 2020, got " << (y0.has_value() ? std::to_string(y0.value()) : "NA") << std::endl;
2694            throw std::runtime_error("pd_test_period_array_year_month_quarter failed: year[0]");
2695        }
2696
2697        auto y1 = years[1];
2698        if (y1.has_value()) {
2699            std::cout << "  [FAIL] : year[1] should be NA (NaT)" << std::endl;
2700            throw std::runtime_error("pd_test_period_array_year_month_quarter failed: year[1] should be NA");
2701        }
2702
2703        auto y2 = years[2];
tolist (pd_test_3_all.cpp:2300)
2290        threw = true;
2291    }
2292    if (!threw) {
2293        throw std::runtime_error("swapaxes should throw for invalid axes");
2294    }
2295
2296    std::cout << " -> tests passed" << std::endl;
2297}
2298
2299void pd_test_3_all_categorical_to_list() {
2300    std::cout << "========= CategoricalArray.to_list()/tolist() =========";
2301
2302    std::vector<std::optional<std::string>> values = {"a", "b", std::nullopt, "c"};
2303    pandas::CategoricalArray arr(values);
2304
2305    auto list = arr.to_list();
2306    if (list.size() != 4 || *list[0] != "a" || *list[1] != "b" ||
2307        list[2].has_value() || *list[3] != "c") {
2308        throw std::runtime_error("to_list failed");
2309    }
astype (pd_test_1_all.cpp:21292)
21282            std::cout << "========= astype all columns to float64 =============";
21283
21284            // Create DataFrame with int64 columns
21285            std::map<std::string, std::vector<numpy::int64>> data;
21286            data["A"] = {1, 2, 3, 4, 5};
21287            data["B"] = {10, 20, 30, 40, 50};
21288
21289            pandas::DataFrame df(data);
21290
21291            // Convert all columns to float64
21292            pandas::DataFrame df_float = df.astype("float64");
21293
21294            // Verify dtype changed
21295            pandas::Series<std::string> dtypes = df_float.dtypes();
21296
21297            bool passed = true;
21298            if (dtypes[static_cast<size_t>(0)] != "float64") {
21299                std::cout << "  [FAIL] : in pd_test_astype_all_columns_to_float64() : column A dtype is " << dtypes[static_cast<size_t>(0)] << ", expected float64" << std::endl;
21300                passed = false;
21301            }
21302            if (dtypes[static_cast<size_t>(1)] != "float64") {
copy (pd_test_1_all.cpp:5798)
5788// ============================================================================
5789// Copy/Rename Tests
5790// ============================================================================
5791
5792void pd_test_categorical_index_copy() {
5793    std::cout << "========= copy ========================================";
5794
5795    pandas::CategoricalArray arr({"a", "b", "c"});
5796    pandas::CategoricalIndex idx(arr, "original");
5797
5798    pandas::CategoricalIndex copied = idx.copy();
5799
5800    bool passed = (copied.size() == idx.size() && copied.name() == idx.name() &&
5801                   copied.categories() == idx.categories() && copied.ordered() == idx.ordered());
5802    if (!passed) {
5803        std::cout << "  [FAIL] : in pd_test_categorical_index_copy()" << std::endl;
5804        throw std::runtime_error("pd_test_categorical_index_copy failed");
5805    }
5806
5807    std::cout << " -> tests passed" << std::endl;
5808}
infer_objects (pd_test_1_all.cpp:27595)
27585            // Create DataFrame with string column containing integers
27586            std::map<std::string, std::vector<std::string>> data;
27587            data["A"] = {"1", "2", "3", "4", "5"};
27588
27589            pandas::DataFrame df(data);
27590
27591            // Before inference, dtype should be string/object
27592            std::string before_dtype = df["A"].dtype_name();
27593
27594            // Apply infer_objects
27595            pandas::DataFrame result = df.infer_objects();
27596
27597            // After inference, dtype should be int64
27598            std::string after_dtype = result["A"].dtype_name();
27599
27600            bool passed = (after_dtype == "int64");
27601            if (!passed) {
27602                std::cout << "  [FAIL] : in pd_test_infer_objects_integer_column() : expected int64, got " << after_dtype << std::endl;
27603                throw std::runtime_error("pd_test_infer_objects_integer_column failed");
27604            }
view (pd_test_3_all.cpp:2147)
2137        throw std::runtime_error("memory_usage shallow too small");
2138    }
2139    if (deep < shallow) {
2140        throw std::runtime_error("memory_usage deep should be >= shallow");
2141    }
2142
2143    std::cout << " -> tests passed" << std::endl;
2144}
2145
2146void pd_test_3_all_categorical_ravel_view() {
2147    std::cout << "========= CategoricalArray.ravel()/view() =============";
2148
2149    std::vector<std::optional<std::string>> values = {"a", "b", "c"};
2150    pandas::CategoricalArray arr(values);
2151
2152    auto raveled = arr.ravel();
2153    if (raveled.size() != 3 || !raveled.equals(arr)) {
2154        throw std::runtime_error("ravel failed");
2155    }
2156
2157    auto viewed = arr.view();
is_ (pd_test_3_all.cpp:3972)
3962    // For typed Index, this is a no-op
3963    if (result.size() != 5) {
3964        throw std::runtime_error("infer_objects size should be 5");
3965    }
3966
3967    std::cout << " -> tests passed" << std::endl;
3968}
3969
3970void pd_test_3_all_index_is_() {
3971    std::cout << "========= Index.is_() ==============================";
3972
3973    pandas::Index<numpy::int64> idx1({1, 2, 3, 4, 5});
3974    pandas::Index<numpy::int64> idx2({1, 2, 3, 4, 5});  // Different object
3975
3976    // Different objects should not be the same
3977    if (idx1.is_(idx2)) {
3978        throw std::runtime_error("different objects should not be is_() equal");
3979    }
3980
3981    // Same object should be the same
is_boolean (pd_test_3_all.cpp:3290)
3280    std::cout << " -> tests passed" << std::endl;
3281}
3282
3283void pd_test_3_all_datetime_index_type_checks() {
3284    std::cout << "========= DatetimeIndex type checks ======================";
3285
3286    pandas::DatetimeIndex idx = pandas::date_range("2024-01-01", "2024-01-05", std::nullopt, "D");
3287
3288    // Type check methods
3289    if (idx.is_boolean()) {
3290        throw std::runtime_error("is_boolean() should be false");
3291    }
3292    if (idx.is_categorical()) {
3293        throw std::runtime_error("is_categorical() should be false");
3294    }
3295    if (idx.is_floating()) {
3296        throw std::runtime_error("is_floating() should be false");
3297    }
3298    if (idx.is_integer()) {
3299        throw std::runtime_error("is_integer() should be false");
is_categorical (pd_test_3_all.cpp:3293)
3283void pd_test_3_all_datetime_index_type_checks() {
3284    std::cout << "========= DatetimeIndex type checks ======================";
3285
3286    pandas::DatetimeIndex idx = pandas::date_range("2024-01-01", "2024-01-05", std::nullopt, "D");
3287
3288    // Type check methods
3289    if (idx.is_boolean()) {
3290        throw std::runtime_error("is_boolean() should be false");
3291    }
3292    if (idx.is_categorical()) {
3293        throw std::runtime_error("is_categorical() should be false");
3294    }
3295    if (idx.is_floating()) {
3296        throw std::runtime_error("is_floating() should be false");
3297    }
3298    if (idx.is_integer()) {
3299        throw std::runtime_error("is_integer() should be false");
3300    }
3301    if (idx.is_interval()) {
3302        throw std::runtime_error("is_interval() should be false");
is_floating (pd_test_3_all.cpp:622)
612    // Test with integer index
613    pandas::IndexDtype<numpy::int64> int_dtype;
614    if (!int_dtype.is_numeric()) {
615        std::cout << "  [FAIL] : in pd_test_3_all_index_dtype_checks() : int should be numeric" << std::endl;
616        throw std::runtime_error("pd_test_3_all_index_dtype_checks failed: int is_numeric");
617    }
618    if (!int_dtype.is_integer()) {
619        std::cout << "  [FAIL] : in pd_test_3_all_index_dtype_checks() : int should be integer" << std::endl;
620        throw std::runtime_error("pd_test_3_all_index_dtype_checks failed: int is_integer");
621    }
622    if (int_dtype.is_floating()) {
623        std::cout << "  [FAIL] : in pd_test_3_all_index_dtype_checks() : int should not be floating" << std::endl;
624        throw std::runtime_error("pd_test_3_all_index_dtype_checks failed: int is_floating");
625    }
626    if (int_dtype.is_object()) {
627        std::cout << "  [FAIL] : in pd_test_3_all_index_dtype_checks() : int should not be object" << std::endl;
628        throw std::runtime_error("pd_test_3_all_index_dtype_checks failed: int is_object");
629    }
630
631    // Test with float index
632    pandas::IndexDtype<double> float_dtype;
is_integer (pd_test_3_all.cpp:618)
608void pd_test_3_all_index_dtype_checks() {
609    std::cout << "========= IndexDtype.is_numeric/integer/floating/object() ";
610
611    // Test with integer index
612    pandas::IndexDtype<numpy::int64> int_dtype;
613    if (!int_dtype.is_numeric()) {
614        std::cout << "  [FAIL] : in pd_test_3_all_index_dtype_checks() : int should be numeric" << std::endl;
615        throw std::runtime_error("pd_test_3_all_index_dtype_checks failed: int is_numeric");
616    }
617    if (!int_dtype.is_integer()) {
618        std::cout << "  [FAIL] : in pd_test_3_all_index_dtype_checks() : int should be integer" << std::endl;
619        throw std::runtime_error("pd_test_3_all_index_dtype_checks failed: int is_integer");
620    }
621    if (int_dtype.is_floating()) {
622        std::cout << "  [FAIL] : in pd_test_3_all_index_dtype_checks() : int should not be floating" << std::endl;
623        throw std::runtime_error("pd_test_3_all_index_dtype_checks failed: int is_floating");
624    }
625    if (int_dtype.is_object()) {
626        std::cout << "  [FAIL] : in pd_test_3_all_index_dtype_checks() : int should not be object" << std::endl;
627        throw std::runtime_error("pd_test_3_all_index_dtype_checks failed: int is_object");
is_interval (pd_test_3_all.cpp:3302)
3292    }
3293    if (idx.is_categorical()) {
3294        throw std::runtime_error("is_categorical() should be false");
3295    }
3296    if (idx.is_floating()) {
3297        throw std::runtime_error("is_floating() should be false");
3298    }
3299    if (idx.is_integer()) {
3300        throw std::runtime_error("is_integer() should be false");
3301    }
3302    if (idx.is_interval()) {
3303        throw std::runtime_error("is_interval() should be false");
3304    }
3305    if (idx.is_numeric()) {
3306        throw std::runtime_error("is_numeric() should be false");
3307    }
3308    if (idx.is_object()) {
3309        throw std::runtime_error("is_object() should be false");
3310    }
3311    if (idx.holds_integer()) {
3312        throw std::runtime_error("holds_integer() should be false");
is_numeric (pd_test_3_all.cpp:614)
604// ============================================================================
605// Category 4: Index Type Checking (IndexDtype)
606// ============================================================================
607
608void pd_test_3_all_index_dtype_checks() {
609    std::cout << "========= IndexDtype.is_numeric/integer/floating/object() ";
610
611    // Test with integer index
612    pandas::IndexDtype<numpy::int64> int_dtype;
613    if (!int_dtype.is_numeric()) {
614        std::cout << "  [FAIL] : in pd_test_3_all_index_dtype_checks() : int should be numeric" << std::endl;
615        throw std::runtime_error("pd_test_3_all_index_dtype_checks failed: int is_numeric");
616    }
617    if (!int_dtype.is_integer()) {
618        std::cout << "  [FAIL] : in pd_test_3_all_index_dtype_checks() : int should be integer" << std::endl;
619        throw std::runtime_error("pd_test_3_all_index_dtype_checks failed: int is_integer");
620    }
621    if (int_dtype.is_floating()) {
622        std::cout << "  [FAIL] : in pd_test_3_all_index_dtype_checks() : int should not be floating" << std::endl;
623        throw std::runtime_error("pd_test_3_all_index_dtype_checks failed: int is_floating");
is_object (pd_test_3_all.cpp:626)
616        throw std::runtime_error("pd_test_3_all_index_dtype_checks failed: int is_numeric");
617    }
618    if (!int_dtype.is_integer()) {
619        std::cout << "  [FAIL] : in pd_test_3_all_index_dtype_checks() : int should be integer" << std::endl;
620        throw std::runtime_error("pd_test_3_all_index_dtype_checks failed: int is_integer");
621    }
622    if (int_dtype.is_floating()) {
623        std::cout << "  [FAIL] : in pd_test_3_all_index_dtype_checks() : int should not be floating" << std::endl;
624        throw std::runtime_error("pd_test_3_all_index_dtype_checks failed: int is_floating");
625    }
626    if (int_dtype.is_object()) {
627        std::cout << "  [FAIL] : in pd_test_3_all_index_dtype_checks() : int should not be object" << std::endl;
628        throw std::runtime_error("pd_test_3_all_index_dtype_checks failed: int is_object");
629    }
630
631    // Test with float index
632    pandas::IndexDtype<double> float_dtype;
633    if (!float_dtype.is_numeric()) {
634        std::cout << "  [FAIL] : in pd_test_3_all_index_dtype_checks() : float should be numeric" << std::endl;
635        throw std::runtime_error("pd_test_3_all_index_dtype_checks failed: float is_numeric");
636    }
all (pd_test_1_all.cpp:247)
237        pandas::BooleanArray has_true({
238            std::optional<bool>(false),
239            std::optional<bool>(true)
240        });
241        any_result = has_true.any();
242        if (!any_result.has_value() || !any_result.value()) {
243            std::cout << "  [FAIL] : in pd_test_boolean_array_reductions() : any() with True" << std::endl;
244            throw std::runtime_error("pd_test_boolean_array_reductions failed: any() with True");
245        }
246
247        // Test all()
248        pandas::BooleanArray all_true({
249            std::optional<bool>(true),
250            std::optional<bool>(true)
251        });
252        auto all_result = all_true.all();
253        if (!all_result.has_value() || !all_result.value()) {
254            std::cout << "  [FAIL] : in pd_test_boolean_array_reductions() : all() of all True" << std::endl;
255            throw std::runtime_error("pd_test_boolean_array_reductions failed: all() all True");
256        }
any (pd_test_1_all.cpp:226)
216            std::cout << "  [FAIL] : in pd_test_boolean_array_kleene_not() : ~NA should be NA" << std::endl;
217            throw std::runtime_error("pd_test_boolean_array_kleene_not failed: ~NA");
218        }
219
220        std::cout << " -> tests passed" << std::endl;
221    }
222
223    void pd_test_boolean_array_reductions() {
224        std::cout << "========= BooleanArray: reductions ======================= ";
225
226        // Test any()
227        pandas::BooleanArray all_false({
228            std::optional<bool>(false),
229            std::optional<bool>(false)
230        });
231        auto any_result = all_false.any();
232        if (!any_result.has_value() || any_result.value()) {
233            std::cout << "  [FAIL] : in pd_test_boolean_array_reductions() : any() of all False" << std::endl;
234            throw std::runtime_error("pd_test_boolean_array_reductions failed: any() all False");
235        }
argmax (pd_test_1_all.cpp:1323)
1313        }
1314
1315        // argmin
1316        auto min_idx = arr.argmin();
1317        if (!min_idx.has_value() || min_idx.value() != 2) {
1318            std::cout << "  [FAIL] : argmin should be 2 (2023-01-01)" << std::endl;
1319            throw std::runtime_error("pd_test_datetime_array_sorting failed: argmin");
1320        }
1321
1322        // argmax
1323        auto max_idx = arr.argmax();
1324        if (!max_idx.has_value() || max_idx.value() != 3) {
1325            std::cout << "  [FAIL] : argmax should be 3 (2023-12-31)" << std::endl;
1326            throw std::runtime_error("pd_test_datetime_array_sorting failed: argmax");
1327        }
1328
1329        std::cout << " -> tests passed" << std::endl;
1330    }
1331
1332    void pd_test_datetime_array_unique() {
1333        std::cout << "========= DatetimeArray: unique/factorize ======================= ";
argmin (pd_test_1_all.cpp:1316)
1306        if (indices.getElementAt({0}) != 2) {
1307            std::cout << "  [FAIL] : argsort: first should be index 2 (2023-01-01)" << std::endl;
1308            throw std::runtime_error("pd_test_datetime_array_sorting failed: argsort first");
1309        }
1310        if (indices.getElementAt({3}) != 1) {
1311            std::cout << "  [FAIL] : argsort: last should be index 1 (NaT)" << std::endl;
1312            throw std::runtime_error("pd_test_datetime_array_sorting failed: NaT position");
1313        }
1314
1315        // argmin
1316        auto min_idx = arr.argmin();
1317        if (!min_idx.has_value() || min_idx.value() != 2) {
1318            std::cout << "  [FAIL] : argmin should be 2 (2023-01-01)" << std::endl;
1319            throw std::runtime_error("pd_test_datetime_array_sorting failed: argmin");
1320        }
1321
1322        // argmax
1323        auto max_idx = arr.argmax();
1324        if (!max_idx.has_value() || max_idx.value() != 3) {
1325            std::cout << "  [FAIL] : argmax should be 3 (2023-12-31)" << std::endl;
1326            throw std::runtime_error("pd_test_datetime_array_sorting failed: argmax");
as_ordered (pd_test_1_all.cpp:791)
781            unordered.min();
782        } catch (const std::exception&) {
783            threw = true;
784        }
785        if (!threw) {
786            std::cout << "  [FAIL] : in pd_test_categorical_array_ordered_operations() : unordered min should throw" << std::endl;
787            throw std::runtime_error("pd_test_categorical_array_ordered_operations failed: unordered min should throw");
788        }
789
790        // Test as_ordered / as_unordered
791        pandas::CategoricalArray reordered = unordered.as_ordered();
792        if (!reordered.ordered()) {
793            std::cout << "  [FAIL] : in pd_test_categorical_array_ordered_operations() : as_ordered failed" << std::endl;
794            throw std::runtime_error("pd_test_categorical_array_ordered_operations failed: as_ordered failed");
795        }
796
797        std::cout << " -> tests passed" << std::endl;
798    }
799
800    void pd_test_categorical_array_comparisons() {
801        std::cout << "========= CategoricalArray: comparisons ======================= ";
as_unordered (pd_test_1_all.cpp:778)
768        }
769
770        // Test max
771        std::optional<std::string> max_val = arr.max();
772        if (!max_val.has_value() || *max_val != "high") {
773            std::cout << "  [FAIL] : in pd_test_categorical_array_ordered_operations() : max != 'high'" << std::endl;
774            throw std::runtime_error("pd_test_categorical_array_ordered_operations failed: max != 'high'");
775        }
776
777        // Test unordered throws for min/max
778        pandas::CategoricalArray unordered = arr.as_unordered();
779        bool threw = false;
780        try {
781            unordered.min();
782        } catch (const std::exception&) {
783            threw = true;
784        }
785        if (!threw) {
786            std::cout << "  [FAIL] : in pd_test_categorical_array_ordered_operations() : unordered min should throw" << std::endl;
787            throw std::runtime_error("pd_test_categorical_array_ordered_operations failed: unordered min should throw");
788        }
categories (pd_test_1_all.cpp:389)
379        std::vector<std::optional<std::string>> vals = {
380            std::optional<std::string>("low"),
381            std::optional<std::string>("high"),
382            std::optional<std::string>("medium")
383        };
384        pandas::CategoricalArray arr3(vals, cats, true);  // ordered
385        if (!arr3.ordered()) {
386            std::cout << "  [FAIL] : in pd_test_categorical_array_constructors() : should be ordered" << std::endl;
387            throw std::runtime_error("pd_test_categorical_array_constructors failed: should be ordered");
388        }
389        if (arr3.categories().size() != 3) {
390            std::cout << "  [FAIL] : in pd_test_categorical_array_constructors() : categories size != 3" << std::endl;
391            throw std::runtime_error("pd_test_categorical_array_constructors failed: categories size != 3");
392        }
393
394        std::cout << " -> tests passed" << std::endl;
395    }
396
397    void pd_test_categorical_array_from_codes() {
398        std::cout << "========= CategoricalArray: from_codes ======================= ";
clone (pd_test_1_all.cpp:5776)
5766    std::cout << " -> tests passed" << std::endl;
5767}
5768
5769void pd_test_categorical_index_clone() {
5770    std::cout << "========= clone =======================================";
5771
5772    pandas::CategoricalArray arr({"p", "q", "r"});
5773    pandas::CategoricalIndex idx(arr, "original");
5774
5775    std::unique_ptr<pandas::IndexBase> cloned = idx.clone();
5776
5777    bool passed = (cloned != nullptr && cloned->size() == idx.size() &&
5778                   cloned->name() == idx.name());
5779    if (!passed) {
5780        std::cout << "  [FAIL] : in pd_test_categorical_index_clone()" << std::endl;
5781        throw std::runtime_error("pd_test_categorical_index_clone failed");
5782    }
5783
5784    std::cout << " -> tests passed" << std::endl;
5785}
codes (pd_test_1_all.cpp:473)
463        std::cout << " -> tests passed" << std::endl;
464    }
465
466    void pd_test_categorical_array_codes_property() {
467        std::cout << "========= CategoricalArray: codes property ======================= ";
468
469        std::vector<std::string> cats = {"x", "y", "z"};
470        std::vector<numpy::int32> codes = {0, 1, 2, 1, 0};
471        pandas::CategoricalArray arr = pandas::CategoricalArray::from_codes(codes, cats);
472
473        numpy::NDArray<numpy::int32> arr_codes = arr.codes();
474        if (arr_codes.getSize() != 5) {
475            std::cout << "  [FAIL] : in pd_test_categorical_array_codes_property() : codes size != 5" << std::endl;
476            throw std::runtime_error("pd_test_categorical_array_codes_property failed: codes size != 5");
477        }
478
479        // Check codes match
480        for (size_t i = 0; i < codes.size(); ++i) {
481            if (arr_codes.getElementAt({i}) != codes[i]) {
482                std::cout << "  [FAIL] : in pd_test_categorical_array_codes_property() : code mismatch at " << i << std::endl;
483                throw std::runtime_error("pd_test_categorical_array_codes_property failed: code mismatch");
format (main.cpp:20)
10int main() {
11  // Automatically log all output to temp/pd_test_output.log
12  numpy::TestLogger logger("temp/pd_test_output.log");
13
14  int res = 0;
15  int res1 = 0;
16  std::string resS = "";
17
18  // call all the tests
19  res1 = dataframe_tests::pd_test_main();
20  resS += std::format("             pd_test_main: {}  errors\n", res1);
21  res += res1;
22
23  std::cout << "\n------------------------- main --------------------------------------------\n";
24  std::cout << std::endl << "All tests completed. Nb errors = " << res << std::endl;
25  std::cout << "Details: \n" << resS;
26  std::cout << "\n---------------------------------------------------------------------------\n";
27  return res;
28}
has_category (pd_test_1_all.cpp:5303)
5293}
5294
5295void pd_test_categorical_index_values_with_categories_constructor() {
5296    std::cout << "========= values with categories constructor ==========";
5297
5298    std::vector<std::optional<std::string>> values = {"a", "b", "a"};
5299    std::vector<std::string> categories = {"a", "b", "c", "d"};
5300    pandas::CategoricalIndex idx(values, categories, true, "explicit_cats");
5301
5302    bool passed = (idx.size() == 3 && idx.num_categories() == 4 &&
5303                   idx.ordered() && idx.has_category("c") && idx.has_category("d"));
5304    if (!passed) {
5305        std::cout << "  [FAIL] : in pd_test_categorical_index_values_with_categories_constructor()" << std::endl;
5306        throw std::runtime_error("pd_test_categorical_index_values_with_categories_constructor failed");
5307    }
5308
5309    std::cout << " -> tests passed" << std::endl;
5310}
5311
5312void pd_test_categorical_index_copy_constructor() {
5313    std::cout << "========= copy constructor ============================";
holds_integer (pd_test_3_all.cpp:3311)
3301    }
3302    if (idx.is_interval()) {
3303        throw std::runtime_error("is_interval() should be false");
3304    }
3305    if (idx.is_numeric()) {
3306        throw std::runtime_error("is_numeric() should be false");
3307    }
3308    if (idx.is_object()) {
3309        throw std::runtime_error("is_object() should be false");
3310    }
3311    if (idx.holds_integer()) {
3312        throw std::runtime_error("holds_integer() should be false");
3313    }
3314
3315    std::cout << " -> tests passed" << std::endl;
3316}
3317
3318void pd_test_3_all_datetime_index_sort() {
3319    std::cout << "========= DatetimeIndex.sort_values() ====================";
3320
3321    pandas::DatetimeIndex idx = pandas::date_range("2024-01-01", "2024-01-05", std::nullopt, "D");
identical (pd_test_1_all.cpp:5883)
5873}
5874
5875void pd_test_categorical_index_identical() {
5876    std::cout << "========= identical ===================================";
5877
5878    pandas::CategoricalArray arr({"a", "b"});
5879    pandas::CategoricalIndex idx1(arr, "same_name");
5880    pandas::CategoricalIndex idx2(arr, "same_name");
5881    pandas::CategoricalIndex idx3(arr, "diff_name");
5882
5883    bool passed = (idx1.identical(idx2) && !idx1.identical(idx3));
5884    if (!passed) {
5885        std::cout << "  [FAIL] : in pd_test_categorical_index_identical()" << std::endl;
5886        throw std::runtime_error("pd_test_categorical_index_identical failed");
5887    }
5888
5889    std::cout << " -> tests passed" << std::endl;
5890}
5891
5892// ============================================================================
5893// Inherited Operations Tests
inferred_type (pd_test_1_all.cpp:5270)
5260}
5261
5262void pd_test_categorical_index_array_constructor() {
5263    std::cout << "========= array constructor ===========================";
5264
5265    pandas::CategoricalArray arr({"apple", "banana", "apple", "cherry"});
5266    pandas::CategoricalIndex idx(arr, "fruits");
5267
5268    bool passed = (idx.size() == 4 && !idx.empty() &&
5269                   idx.name().has_value() && *idx.name() == "fruits" &&
5270                   idx.inferred_type() == "categorical");
5271    if (!passed) {
5272        std::cout << "  [FAIL] : in pd_test_categorical_index_array_constructor()" << std::endl;
5273        throw std::runtime_error("pd_test_categorical_index_array_constructor failed");
5274    }
5275
5276    std::cout << " -> tests passed" << std::endl;
5277}
5278
5279void pd_test_categorical_index_values_constructor() {
5280    std::cout << "========= values constructor ==========================";
item (pd_test_3_all.cpp:3712)
3702    // Test is_interval (always false for base Index)
3703    if (int_idx.is_interval()) {
3704        throw std::runtime_error("base Index should not be interval");
3705    }
3706
3707    std::cout << " -> tests passed" << std::endl;
3708}
3709
3710void pd_test_3_all_index_item() {
3711    std::cout << "========= Index.item() =============================";
3712
3713    pandas::Index<numpy::int64> idx1({42});
3714    numpy::int64 val = idx1.item();
3715
3716    if (val != 42) {
3717        throw std::runtime_error("item() should return 42");
3718    }
3719
3720    // Test error for size != 1
3721    pandas::Index<numpy::int64> idx2({1, 2, 3});
memory_usage (pd_test_1_all.cpp:27063)
27053        }
27054
27055        std::cout << "====================================== [OK] pd_test_value_counts test suite ========================== " << std::endl;
27056        return 0;
27057    }
27058
27059} // namespace dataframe_tests
27060// ------------------- pd_test_value_counts.cpp (end) -----------------------------
27061
27062// ------------------- pd_test_memory_usage.cpp (start) -----------------------------
27063// Tests for DataFrame.memory_usage() - pandas-compatible memory usage reporting
27064
27065namespace dataframe_tests {
27066    namespace dataframe_tests_memory_usage {
27067
27068        void pd_test_memory_usage_basic() {
27069            std::cout << "========= basic memory_usage =======================";
27070
27071            // Create a simple DataFrame with multiple columns
27072            std::map<std::string, std::vector<double>> data;
27073            data["A"] = {1.0, 2.0, 3.0, 4.0, 5.0};
num_categories (pd_test_1_all.cpp:5285)
5275    std::cout << " -> tests passed" << std::endl;
5276}
5277
5278void pd_test_categorical_index_values_constructor() {
5279    std::cout << "========= values constructor ==========================";
5280
5281    std::vector<std::optional<std::string>> values = {"a", "b", "a", std::nullopt, "c"};
5282    pandas::CategoricalIndex idx(values, std::optional<std::string>("letters"), false);
5283
5284    bool passed = (idx.size() == 5 && idx.num_categories() == 3 &&
5285                   !idx.ordered() && idx.name().has_value() && *idx.name() == "letters");
5286    if (!passed) {
5287        std::cout << "  [FAIL] : in pd_test_categorical_index_values_constructor()" << std::endl;
5288        throw std::runtime_error("pd_test_categorical_index_values_constructor failed");
5289    }
5290
5291    std::cout << " -> tests passed" << std::endl;
5292}
5293
5294void pd_test_categorical_index_values_with_categories_constructor() {
ordered (pd_test_1_all.cpp:359)
349    void pd_test_categorical_array_constructors() {
350        std::cout << "========= CategoricalArray: constructors ======================= ";
351
352        // Default constructor
353        pandas::CategoricalArray arr1;
354        if (arr1.size() != 0) {
355            std::cout << "  [FAIL] : in pd_test_categorical_array_constructors() : default constructor size != 0" << std::endl;
356            throw std::runtime_error("pd_test_categorical_array_constructors failed: default constructor size != 0");
357        }
358        if (arr1.ordered()) {
359            std::cout << "  [FAIL] : in pd_test_categorical_array_constructors() : default should be unordered" << std::endl;
360            throw std::runtime_error("pd_test_categorical_array_constructors failed: default should be unordered");
361        }
362
363        // Constructor from values (infer categories)
364        std::vector<std::optional<std::string>> values = {
365            std::optional<std::string>("a"),
366            std::optional<std::string>("b"),
367            std::optional<std::string>("a"),
368            std::optional<std::string>("c")
ordered (pd_test_1_all.cpp:359)
349    void pd_test_categorical_array_constructors() {
350        std::cout << "========= CategoricalArray: constructors ======================= ";
351
352        // Default constructor
353        pandas::CategoricalArray arr1;
354        if (arr1.size() != 0) {
355            std::cout << "  [FAIL] : in pd_test_categorical_array_constructors() : default constructor size != 0" << std::endl;
356            throw std::runtime_error("pd_test_categorical_array_constructors failed: default constructor size != 0");
357        }
358        if (arr1.ordered()) {
359            std::cout << "  [FAIL] : in pd_test_categorical_array_constructors() : default should be unordered" << std::endl;
360            throw std::runtime_error("pd_test_categorical_array_constructors failed: default should be unordered");
361        }
362
363        // Constructor from values (infer categories)
364        std::vector<std::optional<std::string>> values = {
365            std::optional<std::string>("a"),
366            std::optional<std::string>("b"),
367            std::optional<std::string>("a"),
368            std::optional<std::string>("c")
putmask (pd_test_3_all.cpp:3752)
3742    // Should be at least sizeof index + 5 * sizeof(int64)
3743    if (usage < 5 * sizeof(numpy::int64)) {
3744        throw std::runtime_error("memory_usage too small");
3745    }
3746
3747    std::cout << " -> tests passed" << std::endl;
3748}
3749
3750void pd_test_3_all_index_putmask() {
3751    std::cout << "========= Index.putmask() ==========================";
3752
3753    pandas::Index<numpy::int64> idx({1, 2, 3, 4, 5});
3754    numpy::NDArray<numpy::bool_> mask(std::vector<size_t>{5});
3755    mask.setElementAt({0}, numpy::bool_(true));
3756    mask.setElementAt({1}, numpy::bool_(false));
3757    mask.setElementAt({2}, numpy::bool_(true));
3758    mask.setElementAt({3}, numpy::bool_(false));
3759    mask.setElementAt({4}, numpy::bool_(true));
3760
3761    auto result = idx.putmask(mask, numpy::int64(99));
ravel (pd_test_3_all.cpp:2147)
2137        throw std::runtime_error("memory_usage shallow too small");
2138    }
2139    if (deep < shallow) {
2140        throw std::runtime_error("memory_usage deep should be >= shallow");
2141    }
2142
2143    std::cout << " -> tests passed" << std::endl;
2144}
2145
2146void pd_test_3_all_categorical_ravel_view() {
2147    std::cout << "========= CategoricalArray.ravel()/view() =============";
2148
2149    std::vector<std::optional<std::string>> values = {"a", "b", "c"};
2150    pandas::CategoricalArray arr(values);
2151
2152    auto raveled = arr.ravel();
2153    if (raveled.size() != 3 || !raveled.equals(arr)) {
2154        throw std::runtime_error("ravel failed");
2155    }
2156
2157    auto viewed = arr.view();
remove_categories (pd_test_1_all.cpp:591)
581    }
582
583    void pd_test_categorical_array_remove_categories() {
584        std::cout << "========= CategoricalArray: remove_categories ======================= ";
585
586        std::vector<std::string> cats = {"a", "b", "c"};
587        std::vector<numpy::int32> codes = {0, 1, 2, 1};  // a, b, c, b
588        pandas::CategoricalArray arr = pandas::CategoricalArray::from_codes(codes, cats);
589
590        // Remove 'c' - values with 'c' become NA
591        pandas::CategoricalArray result = arr.remove_categories({"c"});
592
593        if (result.categories().size() != 2) {
594            std::cout << "  [FAIL] : in pd_test_categorical_array_remove_categories() : categories size != 2" << std::endl;
595            throw std::runtime_error("pd_test_categorical_array_remove_categories failed: categories size != 2");
596        }
597
598        // Element at index 2 should now be NA (was 'c')
599        if (!result.is_na(2)) {
600            std::cout << "  [FAIL] : in pd_test_categorical_array_remove_categories() : removed category should be NA" << std::endl;
601            throw std::runtime_error("pd_test_categorical_array_remove_categories failed: removed category should be NA");
remove_unused_categories (pd_test_1_all.cpp:737)
727        std::cout << " -> tests passed" << std::endl;
728    }
729
730    void pd_test_categorical_array_remove_unused_categories() {
731        std::cout << "========= CategoricalArray: remove_unused_categories ======================= ";
732
733        std::vector<std::string> cats = {"a", "b", "c", "d"};
734        std::vector<numpy::int32> codes = {0, 0, 2};  // a, a, c (b and d unused)
735        pandas::CategoricalArray arr = pandas::CategoricalArray::from_codes(codes, cats);
736
737        pandas::CategoricalArray result = arr.remove_unused_categories();
738
739        // Only 'a' and 'c' should remain
740        if (result.categories().size() != 2) {
741            std::cout << "  [FAIL] : in pd_test_categorical_array_remove_unused_categories() : categories size != 2" << std::endl;
742            throw std::runtime_error("pd_test_categorical_array_remove_unused_categories failed: categories size != 2");
743        }
744
745        // Values should be preserved
746        std::optional<std::string> val0 = result[0];
747        std::optional<std::string> val2 = result[2];
reorder_categories (pd_test_1_all.cpp:695)
685    void pd_test_categorical_array_reorder_categories() {
686        std::cout << "========= CategoricalArray: reorder_categories ======================= ";
687
688        std::vector<std::string> cats = {"a", "b", "c"};
689        std::vector<numpy::int32> codes = {0, 1, 2};  // a, b, c
690        pandas::CategoricalArray arr = pandas::CategoricalArray::from_codes(codes, cats);
691
692        // Reorder categories
693        std::vector<std::string> new_order = {"c", "b", "a"};
694        pandas::CategoricalArray result = arr.reorder_categories(new_order);
695
696        // Check categories are reordered
697        const std::vector<std::string>& result_cats = result.categories();
698        if (result_cats[0] != "c" || result_cats[1] != "b" || result_cats[2] != "a") {
699            std::cout << "  [FAIL] : in pd_test_categorical_array_reorder_categories() : categories not reordered" << std::endl;
700            throw std::runtime_error("pd_test_categorical_array_reorder_categories failed: categories not reordered");
701        }
702
703        // Values should be preserved
704        std::optional<std::string> val0 = result[0];
repeat (pd_test_3_all.cpp:2166)
2156    auto viewed = arr.view();
2157    if (viewed.size() != 3 || !viewed.equals(arr)) {
2158        throw std::runtime_error("view failed");
2159    }
2160
2161    std::cout << " -> tests passed" << std::endl;
2162}
2163
2164void pd_test_3_all_categorical_repeat() {
2165    std::cout << "========= CategoricalArray.repeat() ===================";
2166
2167    std::vector<std::optional<std::string>> values = {"a", "b"};
2168    pandas::CategoricalArray arr(values);
2169
2170    auto result = arr.repeat(3);
2171    if (result.size() != 6 || *result[0] != "a" || *result[2] != "a" ||
2172        *result[3] != "b" || *result[5] != "b") {
2173        throw std::runtime_error("repeat scalar failed");
2174    }
repr (pd_test_1_all.cpp:10906)
10896    std::cout << " -> tests passed" << std::endl;
10897}
10898
10899void pd_test_extension_index_repr() {
10900    std::cout << "========= repr =========================";
10901
10902    pandas::CategoricalArray arr({"a", "b", "c"});
10903    // Use ExtensionIndex<CategoricalArray> directly to test base class repr
10904    pandas::ExtensionIndex<pandas::CategoricalArray> idx(arr, "test");
10905
10906    std::string repr_str = idx.repr();
10907
10908    bool passed = (!repr_str.empty() && repr_str.find("ExtensionIndex") != std::string::npos);
10909    if (!passed) {
10910        std::cout << "  [FAIL] : in pd_test_extension_index_repr() : repr check failed" << std::endl;
10911        throw std::runtime_error("pd_test_extension_index_repr failed");
10912    }
10913
10914    std::cout << " -> tests passed" << std::endl;
10915}
round (pd_test_1_all.cpp:1688)
1678    void pd_test_floating_array_rounding() {
1679        std::cout << "========= FloatingArray: rounding ======================= ";
1680
1681        pandas::FloatingArray<double> arr({
1682            std::optional<double>(1.234),
1683            std::optional<double>(2.567),
1684            std::nullopt
1685        });
1686
1687        auto rounded = arr.round(2);
1688        if (std::abs(rounded[0].value() - 1.23) > 0.001 ||
1689            std::abs(rounded[1].value() - 2.57) > 0.001) {
1690            std::cout << "  [FAIL] : in pd_test_floating_array_rounding() : round(2)" << std::endl;
1691            throw std::runtime_error("pd_test_floating_array_rounding failed: round(2)");
1692        }
1693
1694        if (!rounded.is_na(2)) {
1695            std::cout << "  [FAIL] : in pd_test_floating_array_rounding() : round should preserve NA" << std::endl;
1696            throw std::runtime_error("pd_test_floating_array_rounding failed: NA preservation");
1697        }
set_categories (pd_test_1_all.cpp:623)
613    void pd_test_categorical_array_set_categories() {
614        std::cout << "========= CategoricalArray: set_categories ======================= ";
615
616        std::vector<std::string> cats = {"a", "b"};
617        std::vector<numpy::int32> codes = {0, 1, 0};  // a, b, a
618        pandas::CategoricalArray arr = pandas::CategoricalArray::from_codes(codes, cats);
619
620        // Set new categories (values not in new categories become NA)
621        std::vector<std::string> new_cats = {"a", "c"};  // 'b' removed, 'c' added
622        pandas::CategoricalArray result = arr.set_categories(new_cats);
623
624        if (result.categories().size() != 2) {
625            std::cout << "  [FAIL] : in pd_test_categorical_array_set_categories() : categories size != 2" << std::endl;
626            throw std::runtime_error("pd_test_categorical_array_set_categories failed: categories size != 2");
627        }
628
629        // Element at index 1 should be NA (was 'b', now not in categories)
630        if (!result.is_na(1)) {
631            std::cout << "  [FAIL] : in pd_test_categorical_array_set_categories() : 'b' value should be NA" << std::endl;
632            throw std::runtime_error("pd_test_categorical_array_set_categories failed: 'b' value should be NA");
slice_indexer (pd_test_3_all.cpp:711)
701    }
702
703    std::cout << " -> tests passed" << std::endl;
704}
705
706// ============================================================================
707// Category 6: Index Indexer Methods
708// ============================================================================
709
710void pd_test_3_all_index_indexers() {
711    std::cout << "========= Index.get_indexer_for/non_unique/slice_indexer() ";
712
713    std::vector<std::string> vals = {"a", "b", "c", "d", "e"};
714    pandas::Index<std::string> idx(vals);
715
716    // Test get_indexer_for()
717    std::vector<std::string> target = {"b", "d", "f"};  // "f" doesn't exist
718    numpy::NDArray<numpy::int64> indexer = idx.get_indexer_for(target);
719    if (indexer.getSize() != 3) {
720        std::cout << "  [FAIL] : in pd_test_3_all_index_indexers() : get_indexer_for size mismatch" << std::endl;
721        throw std::runtime_error("pd_test_3_all_index_indexers failed: get_indexer_for size");
slice_locs (pd_test_1_all.cpp:18275)
18265        }
18266
18267        std::cout << "-> tests passed" << std::endl;
18268    }
18269
18270    void pd_test_range_index_slice_locs() {
18271        std::cout << "========= slice_locs ================================== ";
18272
18273        pandas::RangeIndex ri(0, 10);  // [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
18274
18275        auto [start_idx, stop_idx] = ri.slice_locs(3, 7);
18276
18277        bool passed = (start_idx == 3 && stop_idx == 8);
18278
18279        if (!passed) {
18280            std::cout << "  [FAIL] : slice_locs" << std::endl;
18281            throw std::runtime_error("pd_test_range_index_slice_locs failed");
18282        }
18283
18284        std::cout << "-> tests passed" << std::endl;
18285    }
sort (pd_test_3_all.cpp:3869)
3859        throw std::runtime_error("last 2 positions should be NaN");
3860    }
3861    if (std::abs(result[0] - 3.0) > 0.001) {
3862        throw std::runtime_error("shift(-2) [0] should be 3.0");
3863    }
3864
3865    std::cout << " -> tests passed" << std::endl;
3866}
3867
3868void pd_test_3_all_index_sort() {
3869    std::cout << "========= Index.sort() =============================";
3870
3871    pandas::Index<numpy::int64> idx({3, 1, 4, 1, 5, 9, 2, 6});
3872    auto result = idx.sort();
3873
3874    if (result[0] != 1 || result[1] != 1 || result[7] != 9) {
3875        throw std::runtime_error("sort() not working correctly");
3876    }
3877
3878    // Test descending
3879    result = idx.sort(false);
sortlevel (pd_test_1_all.cpp:14676)
14666        void pd_test_multiindex_sortlevel() {
14667            std::cout << "========= sortlevel =================================== ";
14668
14669            std::vector<std::vector<std::string>> arrays = {
14670                {"b", "a", "c"},
14671                {"2", "1", "3"}
14672            };
14673
14674            pandas::MultiIndex mi = pandas::MultiIndex::from_arrays<std::string>(arrays);
14675            auto [sorted, indices] = mi.sortlevel(0);
14676
14677            bool passed = true;
14678
14679            // After sorting by level 0: a, b, c
14680            if (sorted[0][0] != "a" || sorted[1][0] != "b" || sorted[2][0] != "c") {
14681                std::cout << "  [FAIL] : not sorted correctly by level 0" << std::endl;
14682                passed = false;
14683            }
14684
14685            if (!passed) {
type_id (pd_test_3_all.cpp:25592)
25582// ------------------- pd_test_value_classify (end) ------------------
25583
25584// ------------------- pd_test_index_type_id (start) ------------------
25585namespace dataframe_tests_index_type_id {
25586
25587void pd_test_index_type_id_dispatch() {
25588    std::cout << "========= IndexTypeId dispatch =======================";
25589
25590    // RangeIndex
25591    ::pandas::RangeIndex ri(0, 5);
25592    if (ri.type_id() != ::pandas::IndexTypeId::RangeIndex)
25593        throw std::runtime_error("RangeIndex type_id failed");
25594
25595    // Index<string>
25596    ::pandas::Index<std::string> si(std::vector<std::string>{"a", "b", "c"});
25597    if (si.type_id() != ::pandas::IndexTypeId::IndexString)
25598        throw std::runtime_error("Index<string> type_id failed");
25599
25600    // Index<int64>
25601    ::pandas::Index<numpy::int64> ii(std::vector<numpy::int64>{1, 2, 3});
25602    if (ii.type_id() != ::pandas::IndexTypeId::IndexInt64)