MultiIndex#

class pandas::MultiIndex#

Index class for axis labels in pandas data structures.

Example#

#include <pandas/pandas.h>
using namespace pandas;

// Create MultiIndex
MultiIndex<int64_t> idx({1, 2, 3}, "my_index");
size_t len = idx.size();

Constructors#

Signature

Location

Example

MultiIndex(std::vector<std::unique_ptr<IndexBase>>&& levels, std::vector<numpy::NDArray<numpy::int64>>&& codes, std::vector<std::optional<std::string>> names = {}, bool verify_integrity = true)

pd_multiindex.h:88

View

MultiIndex(const MultiIndex& other)

pd_multiindex.h:125

View

MultiIndex(MultiIndex&& other) noexcept = default

pd_multiindex.h:149

View

Construction#

Signature

Return Type

Location

Example

static MultiIndex from_arrays(const std::vector<std::vector<T>>& arrays, const std::vector<std::optional<std::string>>& names = {}, const std::optional<int>& sortorder = std::nullopt)

MultiIndex

pd_multiindex.h:186

View

static MultiIndex from_arrays(std::initializer_list<std::vector<T>> arrays, const std::vector<std::optional<std::string>>& names = {}, const std::optional<int>& sortorder = std::nullopt)

MultiIndex

pd_multiindex.h:240

View

static MultiIndex from_arrays(const std::vector<T1>& arr1, const std::vector<T2>& arr2, std::initializer_list<const char\*> names)

MultiIndex

pd_multiindex.h:256

View

static MultiIndex from_frame(const std::vector<std::vector<ColType>>& df, const std::vector<std::optional<std::string>>& names = {}, const std::optional<int>& sortorder = std::nullopt)

MultiIndex

pd_multiindex.h:1805

View

static MultiIndex from_mixed_arrays( const std::vector<std::vector<numpy::int64>>& int_arrays, const std::vector<std::vector<std::string>>& str_arrays, const std::string& level_types, const std::vector<std::optional<std::string>>& names = {})

static MultiIndex

pd_multiindex.h:278

View

static MultiIndex from_product(const std::vector<std::vector<T>>& iterables, const std::vector<std::optional<std::string>>& names = {}, const std::optional<int>& sortorder = std::nullopt)

MultiIndex

pd_multiindex.h:420

View

static MultiIndex from_product(std::initializer_list<std::vector<T>> iterables, const std::vector<std::optional<std::string>>& names = {}, const std::optional<int>& sortorder = std::nullopt)

MultiIndex

pd_multiindex.h:470

View

static MultiIndex from_tuples(const std::vector<std::vector<T>>& tuples, const std::vector<std::optional<std::string>>& names = {}, const std::optional<int>& sortorder = std::nullopt)

MultiIndex

pd_multiindex.h:373

View

static MultiIndex from_tuples(std::initializer_list<std::vector<T>> tuples, const std::vector<std::optional<std::string>>& names = {}, const std::optional<int>& sortorder = std::nullopt)

MultiIndex

pd_multiindex.h:406

View

Indexing / Selection#

Signature

Return Type

Location

Example

numpy::NDArray<numpy::int64> get_indexer(const MultiIndex& target, const std::string& method = "", std::optional<int> limit = std::nullopt, std::optional<double> tolerance = std::nullopt) const

numpy::NDArray<numpy::int64>

pd_multiindex.h:922

View

numpy::NDArray<numpy::int64> get_indexer_for(const MultiIndex& target) const

numpy::NDArray<numpy::int64>

pd_multiindex.h:1817

View

get_indexer_non_unique(const MultiIndex& target) const

pd_multiindex.h:1827

View

const IndexBase& get_level(size_t level) const

const IndexBase&

pd_multiindex.h:688

View

const std::vector<std::string>& get_level_categories(size_t level) const

const std::vector<std::string>&

pd_multiindex.h:559

std::unique_ptr<IndexBase> get_level_values(int level) const

std::unique_ptr<IndexBase>

pd_multiindex.h:614

View

std::unique_ptr<IndexBase> get_level_values(const std::string& level_name) const

std::unique_ptr<IndexBase>

pd_multiindex.h:643

View

std::vector<std::string> get_level_values_str(size_t level) const

std::vector<std::string>

pd_multiindex.h:666

View

Index<T> get_level_values_typed(size_t level) const

Index<T>

pd_multiindex.h:1857

std::variant<size_t, std::vector<size_t>> get_loc(const std::vector<std::string>& key) const

std::variant<size_t, std::vector<size_t>>

pd_multiindex.h:882

View

std::pair<std::vector<size_t>, MultiIndex> get_loc_level( const std::string& key, size_t level = 0, bool drop_level = true) const

std::pair<std::vector<size_t>, MultiIndex>

pd_multiindex.h:1886

View

std::optional<size_t> get_loc_string(const std::string& key) const

std::optional<size_t>

pd_multiindex.h:3263

View

numpy::NDArray<numpy::int64> get_locs(const std::vector<std::vector<std::string>>& seq) const

numpy::NDArray<numpy::int64>

pd_multiindex.h:1916

View

std::optional<std::string> get_name(size_t level) const

std::optional<std::string>

pd_multiindex.h:698

View

size_t get_slice_bound(const std::vector<std::string>& label, const std::string& side) const

size_t

pd_multiindex.h:1944

View

std::string get_string(size_t i) const

std::string

pd_multiindex.h:1083

View

std::vector<std::string> get_tuple_str(size_t index) const

std::vector<std::string>

pd_multiindex.h:955

View

std::pair<MultiIndex, numpy::NDArray<numpy::int64>> take( const std::vector<size_t>& indices, int axis = 0, bool allow_fill = false, std::nullptr_t fill_value = nullptr) const

std::pair<MultiIndex, numpy::NDArray<numpy::int64>>

pd_multiindex.h:1193

View

MultiIndex where(const numpy::NDArray<numpy::bool_>& cond, const std::vector<std::string>& other) const

MultiIndex

pd_multiindex.h:3075

View

Data Manipulation#

Signature

Return Type

Location

Example

MultiIndex drop(const std::vector<std::vector<std::string>>& labels, std::optional<std::vector<numpy::int64>> codes = std::nullopt, std::optional<int> level = std::nullopt, const std::string& errors = "raise") const

MultiIndex

pd_multiindex.h:1509

View

MultiIndex drop_duplicates(const std::string& keep = "first") const

MultiIndex

pd_multiindex.h:1545

View

MultiIndex droplevel(size_t level) const

MultiIndex

pd_multiindex.h:743

View

MultiIndex dropna(const std::string& how = "any") const

MultiIndex

pd_multiindex.h:1597

View

MultiIndex insert(size_t loc, const std::vector<std::string>& item) const

MultiIndex

pd_multiindex.h:2020

View

reindex(const MultiIndex& target, const std::string& method = "", std::optional<int> level = std::nullopt, std::optional<int> limit = std::nullopt, std::optional<double> tolerance = std::nullopt) const

pd_multiindex.h:2521

View

MultiIndex rename(const std::vector<std::optional<std::string>>& new_names, std::optional<size_t> level = std::nullopt, bool inplace = false, const std::optional<std::vector<std::string>>& names = std::nullopt) const

MultiIndex

pd_multiindex.h:2543

View

MultiIndex reorder_levels(const std::vector<size_t>& order) const

MultiIndex

pd_multiindex.h:801

View

MultiIndex set_names(const std::vector<std::optional<std::string>>& names, std::optional<int> level = std::nullopt, bool inplace = false) const

MultiIndex

pd_multiindex.h:716

View

MultiIndex swaplevel(size_t i, size_t j) const

MultiIndex

pd_multiindex.h:771

View

Missing Data#

Signature

Return Type

Location

Example

MultiIndex fillna(const std::vector<std::string>& value, const std::string& downcast = "") const

MultiIndex

pd_multiindex.h:1734

View

numpy::NDArray<numpy::bool_> isna() const

numpy::NDArray<numpy::bool_>

pd_multiindex.h:2218

View

numpy::NDArray<numpy::bool_> isnull() const

numpy::NDArray<numpy::bool_>

pd_multiindex.h:2229

View

numpy::NDArray<numpy::bool_> notna() const

numpy::NDArray<numpy::bool_>

pd_multiindex.h:2445

View

numpy::NDArray<numpy::bool_> notnull() const

numpy::NDArray<numpy::bool_>

pd_multiindex.h:2456

View

Statistics#

Signature

Return Type

Location

Example

std::vector<std::string> max() const

std::vector<std::string>

pd_multiindex.h:2390

View

std::vector<std::string> min() const

std::vector<std::string>

pd_multiindex.h:2434

View

size_t nunique() const

size_t

pd_multiindex.h:2463

View

Aggregation#

Signature

Return Type

Location

Example

groupby(const std::vector<std::string>& values) const

pd_multiindex.h:1970

View

MultiIndex map(Func mapper, const std::string& na_action = "") const

MultiIndex

pd_multiindex.h:2372

View

Comparison#

Signature

Return Type

Location

Example

int compare_tuples(size_t idx_a, size_t idx_b) const

int

pd_multiindex.h:3151

bool equal_levels(const MultiIndex& other) const

bool

pd_multiindex.h:1650

View

bool equals(const MultiIndex& other) const

bool

pd_multiindex.h:1674

View

const std::vector<std::unique_ptr<IndexBase>>& levels() const

const std::vector<std::unique_ptr<IndexBase>>&

pd_multiindex.h:520

View

std::vector<size_t> levshape() const

std::vector<size_t>

pd_multiindex.h:491

View

Sorting#

Signature

Return Type

Location

Example

numpy::NDArray<numpy::int64> argsort() const

numpy::NDArray<numpy::int64>

pd_multiindex.h:1153

View

size_t searchsorted(const std::vector<std::string>& value, const std::string& side = "left", const std::optional<numpy::NDArray<numpy::int64>>& sorter = std::nullopt) const

size_t

pd_multiindex.h:2605

View

MultiIndex sort_values(bool ascending = true, const std::string& na_position = "last", bool return_indexer = false, std::nullptr_t key = nullptr) const

MultiIndex

pd_multiindex.h:2758

View

Reshaping#

Signature

Return Type

Location

Example

std::map<std::string, std::vector<std::string>> to_frame(bool index = true, bool allow_duplicates = false, const std::optional<std::vector<std::string>>& name = std::nullopt) const

std::map<std::string, std::vector<std::string>>

pd_multiindex.h:2838

View

MultiIndex transpose(bool copy_data = false) const

MultiIndex

pd_multiindex.h:2902

View

Combining#

Signature

Return Type

Location

Example

MultiIndex append(const MultiIndex& other) const

MultiIndex

pd_multiindex.h:1234

View

join(const MultiIndex& other, const std::string& how = "left", std::optional<int> level = std::nullopt, bool return_indexers = true, bool sort = false) const

pd_multiindex.h:2254

View

Time Series#

Signature

Return Type

Location

Example

std::optional<std::vector<std::string>> asof(const std::vector<std::string>& key, const std::optional<std::vector<std::string>>& label = std::nullopt) const

std::optional<std::vector<std::string>>

pd_multiindex.h:1329

View

numpy::NDArray<numpy::int64> asof_locs( const std::vector<std::vector<std::string>>& where, const numpy::NDArray<numpy::bool_>& mask) const

numpy::NDArray<numpy::int64>

pd_multiindex.h:1359

View

std::vector<std::optional<std::string>> diff(int periods = 1) const

std::vector<std::optional<std::string>>

pd_multiindex.h:1449

View

MultiIndex difference(const MultiIndex& other, bool sort = true) const

MultiIndex

pd_multiindex.h:1473

View

MultiIndex shift(int periods = 1, const std::optional<std::string>& freq = std::nullopt) const

MultiIndex

pd_multiindex.h:2692

View

I/O#

Signature

Return Type

Location

Example

std::vector<std::vector<std::string>> to_flat_index() const

std::vector<std::vector<std::string>>

pd_multiindex.h:985

View

std::vector<std::vector<std::string>> to_list() const

std::vector<std::vector<std::string>>

pd_multiindex.h:2858

View

std::vector<std::vector<std::string>> to_numpy() const

std::vector<std::vector<std::string>>

pd_multiindex.h:2866

View

std::string to_string() const

std::string

pd_multiindex.h:999

View

std::vector<std::vector<std::string>> tolist() const

std::vector<std::vector<std::string>>

pd_multiindex.h:2893

View

Conversion#

Signature

Return Type

Location

Example

MultiIndex astype(const std::string& dtype, bool copy = true) const

MultiIndex

pd_multiindex.h:1393

View

MultiIndex copy() const

MultiIndex

pd_multiindex.h:1264

View

MultiIndex infer_objects() const

MultiIndex

pd_multiindex.h:2010

View

MultiIndex view() const

MultiIndex

pd_multiindex.h:3065

View

Set Operations#

Signature

Return Type

Location

Example

numpy::NDArray<numpy::bool_> duplicated(const std::string& keep = "first") const

numpy::NDArray<numpy::bool_>

pd_multiindex.h:1607

View

MultiIndex intersection(const MultiIndex& other, bool sort = false) const

MultiIndex

pd_multiindex.h:2060

View

numpy::NDArray<numpy::bool_> isin(const std::vector<std::vector<std::string>>& values, std::optional<int> level = std::nullopt) const

numpy::NDArray<numpy::bool_>

pd_multiindex.h:2171

View

MultiIndex symmetric_difference(const MultiIndex& other, const std::optional<std::string>& result_name = std::nullopt, bool sort = true) const

MultiIndex

pd_multiindex.h:2789

View

MultiIndex union_(const MultiIndex& other, bool sort = true) const

MultiIndex

pd_multiindex.h:2939

View

MultiIndex unique(std::optional<int> level = std::nullopt) const

MultiIndex

pd_multiindex.h:2983

View

Type Checking#

Signature

Return Type

Location

Example

bool is_(const MultiIndex& other) const

bool

pd_multiindex.h:2096

View

bool is_boolean() const

bool

pd_multiindex.h:2103

View

bool is_categorical() const

bool

pd_multiindex.h:2110

View

bool is_floating() const

bool

pd_multiindex.h:2117

View

bool is_integer() const

bool

pd_multiindex.h:2129

View

bool is_interval() const

bool

pd_multiindex.h:2142

View

bool is_level_ordered(size_t level) const

bool

pd_multiindex.h:568

bool is_monotonic_decreasing() const

bool

pd_multiindex.h:3122

View

bool is_monotonic_increasing() const

bool

pd_multiindex.h:3108

View

bool is_numeric() const

bool

pd_multiindex.h:2149

View

bool is_object() const

bool

pd_multiindex.h:2156

View

bool is_unique() const

bool

pd_multiindex.h:575

View

Other Methods#

Signature

Return Type

Location

Example

bool all() const

bool

pd_multiindex.h:1276

View

bool any() const

bool

pd_multiindex.h:1285

View

size_t argmax() const

size_t

pd_multiindex.h:1292

View

size_t argmin() const

size_t

pd_multiindex.h:1309

View

std::vector<std::vector<T>> arrays(nlevels)

std::vector<std::vector<T>>

pd_multiindex.h:385

View

std::vector<std::vector<T>> arrays(nlevels)

std::vector<std::vector<T>>

pd_multiindex.h:441

View

std::vector<std::vector<std::string>> arrays(nlevels())

std::vector<std::vector<std::string>>

pd_multiindex.h:1243

View

std::vector<std::vector<std::string>> arrays(nlevels())

std::vector<std::vector<std::string>>

pd_multiindex.h:2028

View

void build_hash_table() const

void

pd_multiindex.h:3189

const std::vector<numpy::NDArray<numpy::int64>>& codes() const

const std::vector<numpy::NDArray<numpy::int64>>&

pd_multiindex.h:527

View

bool contains(const std::vector<std::string>& key) const

bool

pd_multiindex.h:904

View

MultiIndex delete_(size_t loc) const

MultiIndex

pd_multiindex.h:1409

View

MultiIndex delete_(const std::vector<size_t>& locs) const

MultiIndex

pd_multiindex.h:1430

View

std::vector<std::string> dtypes() const

std::vector<std::string>

pd_multiindex.h:3137

View

bool empty() const

bool

pd_multiindex.h:513

View

void ensure_hash_table() const

void

pd_multiindex.h:3203

std::pair<numpy::NDArray<numpy::int64>, MultiIndex> factorize(bool sort = false, bool use_na_sentinel = true) const

std::pair<numpy::NDArray<numpy::int64>, MultiIndex>

pd_multiindex.h:1702

View

std::vector<std::string> format(bool name = true, size_t max_seq_items = 100, bool adjoin = true, std::nullptr_t formatter = nullptr, const std::string& na_rep = "NaN", std::nullptr_t names = nullptr, int space = 1, bool sparsify = true) const

std::vector<std::string>

pd_multiindex.h:1753

View

bool has_duplicates() const

bool

pd_multiindex.h:593

View

bool has_level_categories(size_t level) const

bool

pd_multiindex.h:552

bool holds_integer() const

bool

pd_multiindex.h:1988

View

bool identical(const MultiIndex& other) const

bool

pd_multiindex.h:2002

View

void invalidate_caches() const

void

pd_multiindex.h:3237

std::vector<std::string> item() const

std::vector<std::string>

pd_multiindex.h:2237

View

static std::string make_tuple_key(const std::vector<std::string>& values)

static std::string

pd_multiindex.h:3212

std::string make_tuple_key_at(size_t index) const

std::string

pd_multiindex.h:3224

size_t memory_usage(bool deep = false) const

size_t

pd_multiindex.h:2402

View

const std::vector<std::optional<std::string>>& names() const

const std::vector<std::optional<std::string>>&

pd_multiindex.h:534

View

size_t nlevels() const

size_t

pd_multiindex.h:484

View

std::vector<std::pair<std::string, size_t>> pairs(counts.begin(), counts.end())

std::vector<std::pair<std::string, size_t>>

pd_multiindex.h:3040

View

MultiIndex putmask(const numpy::NDArray<numpy::bool_>& mask, const std::vector<std::string>& value) const

MultiIndex

pd_multiindex.h:2474

View

std::vector<std::vector<std::string>> ravel() const

std::vector<std::vector<std::string>>

pd_multiindex.h:2507

View

MultiIndex remove_unused_levels() const

MultiIndex

pd_multiindex.h:835

View

MultiIndex repeat(size_t repeats, const std::optional<int>& axis = std::nullopt) const

MultiIndex

pd_multiindex.h:2568

View

std::string repr() const

std::string

pd_multiindex.h:1067

View

MultiIndex result(\*this)

MultiIndex

pd_multiindex.h:721

View

std::vector<std::optional<std::string>> result(size())

std::vector<std::optional<std::string>>

pd_multiindex.h:1450

View

std::vector<std::vector<std::string>> result_arrays(nlevels())

std::vector<std::vector<std::string>>

pd_multiindex.h:1476

std::vector<std::vector<std::string>> result_arrays(nlevels())

std::vector<std::vector<std::string>>

pd_multiindex.h:1522

std::vector<std::vector<std::string>> result_arrays(nlevels())

std::vector<std::vector<std::string>>

pd_multiindex.h:2063

std::vector<std::vector<std::string>> result_arrays(nlevels())

std::vector<std::vector<std::string>>

pd_multiindex.h:2263

std::vector<std::vector<std::string>> result_arrays(nlevels())

std::vector<std::vector<std::string>>

pd_multiindex.h:2374

std::vector<std::vector<std::string>> result_arrays(nlevels())

std::vector<std::vector<std::string>>

pd_multiindex.h:2483

std::vector<std::vector<std::string>> result_arrays(nlevels())

std::vector<std::vector<std::string>>

pd_multiindex.h:2571

std::vector<std::vector<std::string>> result_arrays(nlevels())

std::vector<std::vector<std::string>>

pd_multiindex.h:2794

std::vector<std::vector<std::string>> result_arrays(nlevels())

std::vector<std::vector<std::string>>

pd_multiindex.h:2942

std::vector<std::vector<std::string>> result_arrays(nlevels())

std::vector<std::vector<std::string>>

pd_multiindex.h:3084

MultiIndex round(int decimals = 0) const

MultiIndex

pd_multiindex.h:2593

View

MultiIndex set_codes(const std::vector<numpy::NDArray<numpy::int64>>& codes, std::optional<size_t> level = std::nullopt, bool verify_integrity = true) const

MultiIndex

pd_multiindex.h:2619

View

void set_level_categories(size_t level, const std::vector<std::string>& categories, bool ordered = false)

void

pd_multiindex.h:541

MultiIndex set_levels(const std::vector<std::vector<std::string>>& levels, std::optional<size_t> level = std::nullopt, bool verify_integrity = true) const

MultiIndex

pd_multiindex.h:2655

View

size_t size() const

size_t

pd_multiindex.h:503

View

std::pair<size_t, size_t> slice_indexer( const std::optional<std::vector<std::string>>& start = std::nullopt, const std::optional<std::vector<std::string>>& end = std::nullopt, size_t step = 1) const

std::pair<size_t, size_t>

pd_multiindex.h:2706

View

std::pair<size_t, size_t> slice_locs( const std::optional<std::vector<std::string>>& start = std::nullopt, const std::optional<std::vector<std::string>>& end = std::nullopt, size_t step = 1) const

std::pair<size_t, size_t>

pd_multiindex.h:2732

View

MultiIndex sort(bool ascending = true) const

MultiIndex

pd_multiindex.h:2746

View

std::pair<MultiIndex, numpy::NDArray<numpy::int64>> sortlevel( size_t level = 0, bool ascending = true, const std::string& na_position = "last", bool sort_remaining = true) const

std::pair<MultiIndex, numpy::NDArray<numpy::int64>>

pd_multiindex.h:1118

View

StringMethods<MultiIndex> str() const

StringMethods<MultiIndex>

pd_multiindex.h:1102

View

MultiIndex temp(other)

MultiIndex

pd_multiindex.h:156

MultiIndex truncate( const std::optional<std::vector<std::string>>& before = std::nullopt, const std::optional<std::vector<std::string>>& after = std::nullopt) const

MultiIndex

pd_multiindex.h:2915

View

void verify_integrity_impl() const

void

pd_multiindex.h:3172

Code Examples#

The following examples are extracted from the test suite.

MultiIndex (pd_test_3_all.cpp:26015)
26005    // Level 0 (rows): unique rows {0,1,2} = 3 labels
26006    if (mi.get_level(0).size() != 3) throw std::runtime_error("Expected 3 row labels, got " + std::to_string(mi.get_level(0).size()));
26007    // Level 1 (cols): unique cols {0,1,2} = 3 labels
26008    if (mi.get_level(1).size() != 3) throw std::runtime_error("Expected 3 col labels, got " + std::to_string(mi.get_level(1).size()));
26009    std::cout << "  PASSED" << std::endl;
26010}
26011
26012int pd_test_sparse_coo_main() {
26013    try {
26014        std::cout << "========= Sparse COO MultiIndex (N1) ==================" << std::endl;
26015        pd_test_sparse_coo_non_dense();
26016        pd_test_sparse_coo_dense();
26017        pd_test_sparse_coo_empty();
26018        pd_test_sparse_coo_sorting();
26019        pd_test_sparse_coo_multiindex_levels();
26020        std::cout << "All pd_test_sparse_coo tests passed!" << std::endl;
26021        return 0;
26022    } catch (const std::exception& e) {
26023        std::cout << "FAILED: " << e.what() << std::endl;
26024        return 1;
MultiIndex (pd_test_3_all.cpp:26015)
26005    // Level 0 (rows): unique rows {0,1,2} = 3 labels
26006    if (mi.get_level(0).size() != 3) throw std::runtime_error("Expected 3 row labels, got " + std::to_string(mi.get_level(0).size()));
26007    // Level 1 (cols): unique cols {0,1,2} = 3 labels
26008    if (mi.get_level(1).size() != 3) throw std::runtime_error("Expected 3 col labels, got " + std::to_string(mi.get_level(1).size()));
26009    std::cout << "  PASSED" << std::endl;
26010}
26011
26012int pd_test_sparse_coo_main() {
26013    try {
26014        std::cout << "========= Sparse COO MultiIndex (N1) ==================" << std::endl;
26015        pd_test_sparse_coo_non_dense();
26016        pd_test_sparse_coo_dense();
26017        pd_test_sparse_coo_empty();
26018        pd_test_sparse_coo_sorting();
26019        pd_test_sparse_coo_multiindex_levels();
26020        std::cout << "All pd_test_sparse_coo tests passed!" << std::endl;
26021        return 0;
26022    } catch (const std::exception& e) {
26023        std::cout << "FAILED: " << e.what() << std::endl;
26024        return 1;
MultiIndex (pd_test_3_all.cpp:26015)
26005    // Level 0 (rows): unique rows {0,1,2} = 3 labels
26006    if (mi.get_level(0).size() != 3) throw std::runtime_error("Expected 3 row labels, got " + std::to_string(mi.get_level(0).size()));
26007    // Level 1 (cols): unique cols {0,1,2} = 3 labels
26008    if (mi.get_level(1).size() != 3) throw std::runtime_error("Expected 3 col labels, got " + std::to_string(mi.get_level(1).size()));
26009    std::cout << "  PASSED" << std::endl;
26010}
26011
26012int pd_test_sparse_coo_main() {
26013    try {
26014        std::cout << "========= Sparse COO MultiIndex (N1) ==================" << std::endl;
26015        pd_test_sparse_coo_non_dense();
26016        pd_test_sparse_coo_dense();
26017        pd_test_sparse_coo_empty();
26018        pd_test_sparse_coo_sorting();
26019        pd_test_sparse_coo_multiindex_levels();
26020        std::cout << "All pd_test_sparse_coo tests passed!" << std::endl;
26021        return 0;
26022    } catch (const std::exception& e) {
26023        std::cout << "FAILED: " << e.what() << std::endl;
26024        return 1;
from_arrays (pd_test_1_all.cpp:1994)
1984// ============================================================================
1985// Test: from_arrays factory method
1986// ============================================================================
1987void test_from_arrays() {
1988    std::cout << "========= IntervalArray: from_arrays ======================= ";
1989
1990    std::vector<numpy::int64> left_vec = {0, 10, 20};
1991    std::vector<numpy::int64> right_vec = {5, 15, 25};
1992
1993    auto arr = pandas::IntervalArrayInt64::from_arrays(left_vec, right_vec);
1994
1995    if (arr.size() != 3) {
1996        std::cout << "[FAIL] : in test_from_arrays() : size" << std::endl;
1997        return;
1998    }
1999
2000    auto interval1 = arr[1];
2001    if (!interval1.has_value() || interval1->first != 10 || interval1->second != 15) {
2002        std::cout << "[FAIL] : in test_from_arrays() : interval values" << std::endl;
2003        return;
from_arrays (pd_test_1_all.cpp:1994)
1984// ============================================================================
1985// Test: from_arrays factory method
1986// ============================================================================
1987void test_from_arrays() {
1988    std::cout << "========= IntervalArray: from_arrays ======================= ";
1989
1990    std::vector<numpy::int64> left_vec = {0, 10, 20};
1991    std::vector<numpy::int64> right_vec = {5, 15, 25};
1992
1993    auto arr = pandas::IntervalArrayInt64::from_arrays(left_vec, right_vec);
1994
1995    if (arr.size() != 3) {
1996        std::cout << "[FAIL] : in test_from_arrays() : size" << std::endl;
1997        return;
1998    }
1999
2000    auto interval1 = arr[1];
2001    if (!interval1.has_value() || interval1->first != 10 || interval1->second != 15) {
2002        std::cout << "[FAIL] : in test_from_arrays() : interval values" << std::endl;
2003        return;
from_arrays (pd_test_1_all.cpp:1994)
1984// ============================================================================
1985// Test: from_arrays factory method
1986// ============================================================================
1987void test_from_arrays() {
1988    std::cout << "========= IntervalArray: from_arrays ======================= ";
1989
1990    std::vector<numpy::int64> left_vec = {0, 10, 20};
1991    std::vector<numpy::int64> right_vec = {5, 15, 25};
1992
1993    auto arr = pandas::IntervalArrayInt64::from_arrays(left_vec, right_vec);
1994
1995    if (arr.size() != 3) {
1996        std::cout << "[FAIL] : in test_from_arrays() : size" << std::endl;
1997        return;
1998    }
1999
2000    auto interval1 = arr[1];
2001    if (!interval1.has_value() || interval1->first != 10 || interval1->second != 15) {
2002        std::cout << "[FAIL] : in test_from_arrays() : interval values" << std::endl;
2003        return;
from_frame (pd_test_3_all.cpp:9009)
8999    bool neq = mi1.equal_levels(mi3);
9000    if (neq) {
9001        std::cout << "  [FAIL] : in pd_test_3_all_multiindex_equal_levels() : different levels should not be equal" << std::endl;
9002        throw std::runtime_error("pd_test_3_all_multiindex_equal_levels failed");
9003    }
9004
9005    std::cout << " -> tests passed" << std::endl;
9006}
9007
9008void pd_test_3_all_multiindex_from_frame() {
9009    std::cout << "========= MultiIndex.from_frame() =================";
9010
9011    std::vector<std::vector<std::string>> columns = {
9012        {"a", "a", "b", "b"},
9013        {"x", "y", "x", "y"}
9014    };
9015    std::vector<std::optional<std::string>> names = {"level0", "level1"};
9016
9017    pandas::MultiIndex mi = pandas::MultiIndex::from_frame<std::string>(columns, names);
9018
9019    if (mi.size() != 4) {
from_mixed_arrays (pd_test_1_all.cpp:14830)
14820        void pd_test_multiindex_mixed_types() {
14821            std::cout << "========= mixed types (int64 + string) ================ ";
14822
14823            std::vector<std::vector<numpy::int64>> int_arrays = {
14824                {2020, 2020, 2021, 2021}
14825            };
14826            std::vector<std::vector<std::string>> str_arrays = {
14827                {"Q1", "Q2", "Q1", "Q2"}
14828            };
14829
14830            pandas::MultiIndex mi = pandas::MultiIndex::from_mixed_arrays(
14831                int_arrays, str_arrays, "is",
14832                {std::optional<std::string>("year"), std::optional<std::string>("quarter")}
14833            );
14834
14835            bool passed = true;
14836
14837            if (mi.nlevels() != 2) {
14838                std::cout << "  [FAIL] : nlevels should be 2" << std::endl;
14839                passed = false;
14840            }
from_product (pd_test_1_all.cpp:14246)
14236        }
14237
14238        void pd_test_multiindex_from_product() {
14239            std::cout << "========= from_product ================================ ";
14240
14241            std::vector<std::vector<std::string>> iterables = {
14242                {"a", "b"},
14243                {"1", "2", "3"}
14244            };
14245
14246            pandas::MultiIndex mi = pandas::MultiIndex::from_product<std::string>(iterables);
14247
14248            bool passed = true;
14249
14250            // Should have 2*3=6 entries
14251            if (mi.size() != 6) {
14252                std::cout << "  [FAIL] : size should be 6, got " << mi.size() << std::endl;
14253                passed = false;
14254            }
14255
14256            // Check order: (a,1), (a,2), (a,3), (b,1), (b,2), (b,3)
from_product (pd_test_1_all.cpp:14246)
14236        }
14237
14238        void pd_test_multiindex_from_product() {
14239            std::cout << "========= from_product ================================ ";
14240
14241            std::vector<std::vector<std::string>> iterables = {
14242                {"a", "b"},
14243                {"1", "2", "3"}
14244            };
14245
14246            pandas::MultiIndex mi = pandas::MultiIndex::from_product<std::string>(iterables);
14247
14248            bool passed = true;
14249
14250            // Should have 2*3=6 entries
14251            if (mi.size() != 6) {
14252                std::cout << "  [FAIL] : size should be 6, got " << mi.size() << std::endl;
14253                passed = false;
14254            }
14255
14256            // Check order: (a,1), (a,2), (a,3), (b,1), (b,2), (b,3)
from_tuples (pd_test_1_all.cpp:2022)
2012// ============================================================================
2013void test_from_tuples() {
2014    std::cout << "========= IntervalArray: from_tuples ======================= ";
2015
2016    std::vector<std::pair<numpy::float64, numpy::float64>> tuples = {
2017        {0.0, 1.5},
2018        {1.5, 3.0},
2019        {3.0, 4.5}
2020    };
2021
2022    auto arr = pandas::IntervalArrayFloat64::from_tuples(tuples);
2023
2024    if (arr.size() != 3) {
2025        std::cout << "[FAIL] : in test_from_tuples() : size" << std::endl;
2026        return;
2027    }
2028
2029    auto interval2 = arr[2];
2030    if (!interval2.has_value() || interval2->first != 3.0 || interval2->second != 4.5) {
2031        std::cout << "[FAIL] : in test_from_tuples() : interval values" << std::endl;
2032        return;
from_tuples (pd_test_1_all.cpp:2022)
2012// ============================================================================
2013void test_from_tuples() {
2014    std::cout << "========= IntervalArray: from_tuples ======================= ";
2015
2016    std::vector<std::pair<numpy::float64, numpy::float64>> tuples = {
2017        {0.0, 1.5},
2018        {1.5, 3.0},
2019        {3.0, 4.5}
2020    };
2021
2022    auto arr = pandas::IntervalArrayFloat64::from_tuples(tuples);
2023
2024    if (arr.size() != 3) {
2025        std::cout << "[FAIL] : in test_from_tuples() : size" << std::endl;
2026        return;
2027    }
2028
2029    auto interval2 = arr[2];
2030    if (!interval2.has_value() || interval2->first != 3.0 || interval2->second != 4.5) {
2031        std::cout << "[FAIL] : in test_from_tuples() : interval values" << std::endl;
2032        return;
get_indexer (pd_test_1_all.cpp:10332)
10322void pd_test_extension_index_get_indexer() {
10323    std::cout << "========= get_indexer =========================";
10324
10325    pandas::CategoricalArray arr1({"a", "b", "c", "d"});
10326    pandas::CategoricalIndex idx1(arr1);
10327
10328    pandas::CategoricalArray arr2({"b", "d", "x"});
10329    pandas::CategoricalIndex idx2(arr2);
10330
10331    auto indexer = idx1.get_indexer(idx2);
10332
10333    bool passed = (indexer.getSize() == 3 &&
10334                   indexer.getElementAt({0}) == 1 &&
10335                   indexer.getElementAt({1}) == 3 &&
10336                   indexer.getElementAt({2}) == -1);
10337    if (!passed) {
10338        std::cout << "  [FAIL] : in pd_test_extension_index_get_indexer() : get_indexer check failed" << std::endl;
10339        throw std::runtime_error("pd_test_extension_index_get_indexer failed");
10340    }
get_indexer_for (pd_test_3_all.cpp:716)
706// ============================================================================
707// Category 6: Index Indexer Methods
708// ============================================================================
709
710void pd_test_3_all_index_indexers() {
711    std::cout << "========= Index.get_indexer_for/non_unique/slice_indexer() ";
712
713    std::vector<std::string> vals = {"a", "b", "c", "d", "e"};
714    pandas::Index<std::string> idx(vals);
715
716    // Test get_indexer_for()
717    std::vector<std::string> target = {"b", "d", "f"};  // "f" doesn't exist
718    numpy::NDArray<numpy::int64> indexer = idx.get_indexer_for(target);
719    if (indexer.getSize() != 3) {
720        std::cout << "  [FAIL] : in pd_test_3_all_index_indexers() : get_indexer_for size mismatch" << std::endl;
721        throw std::runtime_error("pd_test_3_all_index_indexers failed: get_indexer_for size");
722    }
723    // "b" is at index 1
724    if (indexer.getElementAt({0}) != 1) {
725        std::cout << "  [FAIL] : in pd_test_3_all_index_indexers() : 'b' should be at index 1" << std::endl;
726        throw std::runtime_error("pd_test_3_all_index_indexers failed: 'b' index");
get_indexer_non_unique (pd_test_3_all.cpp:739)
729    if (indexer.getElementAt({1}) != 3) {
730        std::cout << "  [FAIL] : in pd_test_3_all_index_indexers() : 'd' should be at index 3" << std::endl;
731        throw std::runtime_error("pd_test_3_all_index_indexers failed: 'd' index");
732    }
733    // "f" doesn't exist -> -1
734    if (indexer.getElementAt({2}) != -1) {
735        std::cout << "  [FAIL] : in pd_test_3_all_index_indexers() : 'f' should be -1" << std::endl;
736        throw std::runtime_error("pd_test_3_all_index_indexers failed: 'f' index");
737    }
738
739    // Test get_indexer_non_unique()
740    std::vector<std::string> target2 = {"a", "c", "z"};  // "z" doesn't exist
741    pandas::Index<std::string> target_idx(target2);
742    auto [indexer2, missing] = idx.get_indexer_non_unique(target_idx);
743
744    if (indexer2.getSize() < 2) {
745        std::cout << "  [FAIL] : in pd_test_3_all_index_indexers() : get_indexer_non_unique size too small" << std::endl;
746        throw std::runtime_error("pd_test_3_all_index_indexers failed: get_indexer_non_unique size");
747    }
748
749    // Test slice_indexer()
get_level (pd_test_3_all.cpp:26007)
25997    std::vector<int64_t> rows = {0, 1, 2};
25998    std::vector<int64_t> cols = {1, 2, 0};
25999
26000    auto s = ::pandas::series_from_coo(data, rows, cols, 3, 3, false);
26001    if (!s.has_multiindex()) throw std::runtime_error("Expected MultiIndex");
26002
26003    const auto& mi = s.multiindex();
26004    if (mi.nlevels() != 2) throw std::runtime_error("Expected 2 levels, got " + std::to_string(mi.nlevels()));
26005
26006    // Level 0 (rows): unique rows {0,1,2} = 3 labels
26007    if (mi.get_level(0).size() != 3) throw std::runtime_error("Expected 3 row labels, got " + std::to_string(mi.get_level(0).size()));
26008    // Level 1 (cols): unique cols {0,1,2} = 3 labels
26009    if (mi.get_level(1).size() != 3) throw std::runtime_error("Expected 3 col labels, got " + std::to_string(mi.get_level(1).size()));
26010    std::cout << "  PASSED" << std::endl;
26011}
26012
26013int pd_test_sparse_coo_main() {
26014    try {
26015        std::cout << "========= Sparse COO MultiIndex (N1) ==================" << std::endl;
26016        pd_test_sparse_coo_non_dense();
26017        pd_test_sparse_coo_dense();
get_level_values (pd_test_3_all.cpp:4524)
4514    }
4515
4516    std::cout << " -> tests passed" << std::endl;
4517}
4518
4519void pd_test_3_all_interval_index_get_level_values_droplevel() {
4520    std::cout << "========= IntervalIndex.get_level_values/droplevel() ";
4521
4522    pandas::IntervalIndex64 idx = pandas::IntervalIndex64::from_breaks({0, 10, 20, 30});
4523
4524    // get_level_values(0) should work
4525    pandas::IntervalIndex64 level_vals = idx.get_level_values(0);
4526    if (level_vals.size() != idx.size()) {
4527        throw std::runtime_error("get_level_values(0) size mismatch");
4528    }
4529
4530    // get_level_values(1) should throw
4531    bool threw = false;
4532    try {
4533        idx.get_level_values(1);
4534    } catch (const std::out_of_range&) {
get_level_values (pd_test_3_all.cpp:4524)
4514    }
4515
4516    std::cout << " -> tests passed" << std::endl;
4517}
4518
4519void pd_test_3_all_interval_index_get_level_values_droplevel() {
4520    std::cout << "========= IntervalIndex.get_level_values/droplevel() ";
4521
4522    pandas::IntervalIndex64 idx = pandas::IntervalIndex64::from_breaks({0, 10, 20, 30});
4523
4524    // get_level_values(0) should work
4525    pandas::IntervalIndex64 level_vals = idx.get_level_values(0);
4526    if (level_vals.size() != idx.size()) {
4527        throw std::runtime_error("get_level_values(0) size mismatch");
4528    }
4529
4530    // get_level_values(1) should throw
4531    bool threw = false;
4532    try {
4533        idx.get_level_values(1);
4534    } catch (const std::out_of_range&) {
get_level_values_str (pd_test_1_all.cpp:14394)
14384        void pd_test_multiindex_get_level_values() {
14385            std::cout << "========= get_level_values ============================ ";
14386
14387            std::vector<std::vector<std::string>> arrays = {
14388                {"a", "a", "b"},
14389                {"x", "y", "z"}
14390            };
14391
14392            pandas::MultiIndex mi = pandas::MultiIndex::from_arrays<std::string>(arrays);
14393
14394            auto level0_vals = mi.get_level_values_str(0);
14395            auto level1_vals = mi.get_level_values_str(1);
14396
14397            bool passed = true;
14398
14399            if (level0_vals.size() != 3 || level0_vals[0] != "a" ||
14400                level0_vals[1] != "a" || level0_vals[2] != "b") {
14401                std::cout << "  [FAIL] : level 0 values mismatch" << std::endl;
14402                passed = false;
14403            }
get_loc (pd_test_1_all.cpp:10281)
10271    bool passed = (idx.contains("apple") && idx.contains("banana") && !idx.contains("grape"));
10272    if (!passed) {
10273        std::cout << "  [FAIL] : in pd_test_extension_index_contains() : contains check failed" << std::endl;
10274        throw std::runtime_error("pd_test_extension_index_contains failed");
10275    }
10276
10277    std::cout << " -> tests passed" << std::endl;
10278}
10279
10280void pd_test_extension_index_get_loc_unique() {
10281    std::cout << "========= get_loc (unique) =========================";
10282
10283    pandas::CategoricalArray arr({"apple", "banana", "cherry"});
10284    pandas::CategoricalIndex idx(arr);
10285
10286    auto loc_apple = idx.get_loc("apple");
10287    auto loc_banana = idx.get_loc("banana");
10288    auto loc_cherry = idx.get_loc("cherry");
10289
10290    bool passed = (std::holds_alternative<size_t>(loc_apple) && std::get<size_t>(loc_apple) == 0 &&
10291                   std::get<size_t>(loc_banana) == 1 &&
get_loc_level (pd_test_3_all.cpp:9033)
9023    if (mi.nlevels() != 2) {
9024        std::cout << "  [FAIL] : in pd_test_3_all_multiindex_from_frame() : nlevels mismatch" << std::endl;
9025        throw std::runtime_error("pd_test_3_all_multiindex_from_frame failed: nlevels");
9026    }
9027
9028    std::cout << " -> tests passed" << std::endl;
9029}
9030
9031void pd_test_3_all_multiindex_get_loc_level() {
9032    std::cout << "========= MultiIndex.get_loc_level() ==============";
9033
9034    std::vector<std::vector<std::string>> arrays = {
9035        {"a", "a", "b", "b"},
9036        {"1", "2", "1", "2"}
9037    };
9038    pandas::MultiIndex mi = pandas::MultiIndex::from_arrays<std::string>(arrays);
9039
9040    auto [locs, result_mi] = mi.get_loc_level("a", 0, true);
9041
9042    if (locs.size() != 2) {
get_loc_string (pd_test_3_all.cpp:28108)
28098        vals.push_back(numpy::timedelta64(ns, numpy::DateTimeUnit::Nanosecond));
28099    }
28100    return pandas::TimedeltaArray(vals);
28101}
28102
28103void pd_test_getitem_timedelta_str_lookup() {
28104    std::cout << "  -- pd_test_getitem_timedelta_str_lookup --" << std::endl;
28105    int fail = 0;
28106    auto tda = ge_make_tda({1 * GE_NS_PER_DAY, 2 * GE_NS_PER_DAY, 3 * GE_NS_PER_DAY});
28107    pandas::TimedeltaIndex tdi(tda);
28108    auto pos = tdi.get_loc_string("2 days");
28109    if (!pos.has_value()) { std::cout << "    FAIL: '2 days' not found" << std::endl; fail++; }
28110    else if (*pos != 1) { std::cout << "    FAIL: expected pos=1, got " << *pos << std::endl; fail++; }
28111    if (fail == 0) std::cout << "    OK" << std::endl;
28112    if (fail) throw std::runtime_error("pd_test_getitem_timedelta_str_lookup failed");
28113}
28114
28115void pd_test_getitem_timedelta_str_not_found() {
28116    std::cout << "  -- pd_test_getitem_timedelta_str_not_found --" << std::endl;
28117    int fail = 0;
28118    auto tda = ge_make_tda({1 * GE_NS_PER_DAY});
get_locs (pd_test_3_all.cpp:9057)
9047    if (locs[0] != 0 || locs[1] != 1) {
9048        std::cout << "  [FAIL] : in pd_test_3_all_multiindex_get_loc_level() : wrong locations" << std::endl;
9049        throw std::runtime_error("pd_test_3_all_multiindex_get_loc_level failed: wrong locs");
9050    }
9051
9052    std::cout << " -> tests passed" << std::endl;
9053}
9054
9055void pd_test_3_all_multiindex_get_locs() {
9056    std::cout << "========= MultiIndex.get_locs() ===================";
9057
9058    std::vector<std::vector<std::string>> arrays = {
9059        {"a", "a", "b", "b"},
9060        {"1", "2", "1", "2"}
9061    };
9062    pandas::MultiIndex mi = pandas::MultiIndex::from_arrays<std::string>(arrays);
9063
9064    std::vector<std::vector<std::string>> seq = {{"a", "1"}, {"b", "2"}};
9065    numpy::NDArray<numpy::int64> locs = mi.get_locs(seq);
get_name (pd_test_5_all.cpp:50005)
49995        std::cout << tag << "   [" << c << "]"
49996                  << " override=" << override_or_empty(df, c)
49997                  << " dtype=" << series_dtype_or_missing(df, c) << "\n";
49998    }
49999    std::cout << tag << " has_multiindex=" << df.has_multiindex() << "\n";
50000    if (df.has_multiindex()) {
50001        const auto& mi = df.multiindex();
50002        std::cout << tag << " mi.nlevels=" << mi.nlevels()
50003                  << " mi.size=" << mi.size() << "\n";
50004        for (size_t i = 0; i < mi.nlevels(); ++i) {
50005            auto nm = mi.get_name(i);
50006            std::cout << tag << "   level[" << i << "] name="
50007                      << (nm.has_value() ? *nm : std::string("<none>"))
50008                      << " level_size=" << mi.get_level(i).size()
50009                      << " level_dtype=" << mi.get_level(i).dtype_name()
50010                      << "\n";
50011        }
50012    }
50013    std::cout << tag << " to_string:\n" << df.to_string() << "\n";
50014}
get_slice_bound (pd_test_3_all.cpp:3644)
3634    formatted = idx.format(custom_formatter);
3635
3636    if (formatted[0] != "val:1") {
3637        throw std::runtime_error("custom formatter failed");
3638    }
3639
3640    std::cout << " -> tests passed" << std::endl;
3641}
3642
3643void pd_test_3_all_index_get_slice_bound() {
3644    std::cout << "========= Index.get_slice_bound() ==================";
3645
3646    pandas::Index<numpy::int64> idx({10, 20, 30, 40, 50});
3647
3648    // Exact match, left side
3649    size_t bound = idx.get_slice_bound(30, "left");
3650    if (bound != 2) {
3651        throw std::runtime_error("left bound for 30 should be 2");
3652    }
3653
3654    // Exact match, right side
get_string (pd_test_3_all.cpp:27746)
27736        }
27737    }
27738
27739    pandas::Series<numpy::int64> si({10, 20, 30}, "ints");
27740    auto result2 = si.astype("str");
27741    auto* str_s2 = dynamic_cast<pandas::Series<std::string>*>(result2.get());
27742    if (!str_s2) {
27743        std::cout << "    FAIL: expected Series<string> from int" << std::endl;
27744        fail++;
27745    } else {
27746        if (str_s2->get_string(0) != "10") {
27747            std::cout << "    FAIL: expected '10', got '" << str_s2->get_string(0) << "'" << std::endl;
27748            fail++;
27749        }
27750    }
27751
27752    if (fail == 0) std::cout << "    OK" << std::endl;
27753}
27754
27755void pd_test_astype_datetime_to_string() {
27756    std::cout << "  -- pd_test_astype_datetime_to_string --" << std::endl;
get_tuple_str (pd_test_3_all.cpp:1023)
1013        }
1014        for (size_t i = 0; i < bn.size(); ++i) {
1015            if (bn[i].value_or("") != nn[i].value_or("")) {
1016                std::cout << "  [FAIL] level name " << i << " differs: '"
1017                          << bn[i].value_or("") << "' vs '"
1018                          << nn[i].value_or("") << "'" << std::endl;
1019                throw std::runtime_error("from_arrays brace-init: names divergence");
1020            }
1021        }
1022        for (size_t i = 0; i < via_brace.size(); ++i) {
1023            std::vector<std::string> tup_brace = via_brace.get_tuple_str(i);
1024            std::vector<std::string> tup_named = via_named.get_tuple_str(i);
1025            if (tup_brace != tup_named) {
1026                std::cout << "  [FAIL] row " << i << " differs" << std::endl;
1027                throw std::runtime_error("from_arrays brace-init: content divergence");
1028            }
1029        }
1030    }
1031
1032    // Case C: integer element type - exercises template deduction beyond string
1033    {
take (pd_test_1_all.cpp:5903)
5893// Inherited Operations Tests
5894// ============================================================================
5895
5896void pd_test_categorical_index_take() {
5897    std::cout << "========= inherited take ==============================";
5898
5899    pandas::CategoricalArray arr({"a", "b", "c", "d"});
5900    pandas::CategoricalIndex idx(arr);
5901
5902    std::vector<size_t> indices = {0, 2, 3};
5903    pandas::ExtensionIndex<pandas::CategoricalArray> taken = idx.take(indices);
5904
5905    bool passed = (taken.size() == 3);
5906    if (!passed) {
5907        std::cout << "  [FAIL] : in pd_test_categorical_index_take()" << std::endl;
5908        throw std::runtime_error("pd_test_categorical_index_take failed");
5909    }
5910
5911    std::cout << " -> tests passed" << std::endl;
5912}
where (pd_test_1_all.cpp:22018)
22008            data["B"] = {5.0, 6.0, 7.0, 8.0};
22009            pandas::DataFrame df(data);
22010
22011            // Create condition DataFrame (values > 2)
22012            std::map<std::string, std::vector<numpy::bool_>> cond_data;
22013            cond_data["A"] = {false, false, true, true};   // 1<=2, 2<=2, 3>2, 4>2
22014            cond_data["B"] = {true, true, true, true};     // all >2
22015            pandas::DataFrame cond(cond_data);
22016
22017            // Apply where with replacement value -1
22018            pandas::DataFrame result = df.where(cond, -1.0);
22019
22020            // Get column index for A - it's sorted alphabetically in std::map
22021            size_t col_a_idx = df.get_column_index("A");
22022            size_t col_b_idx = df.get_column_index("B");
22023
22024            bool passed = true;
22025            std::string error_msg;
22026
22027            // Check A column values
22028            std::string a0 = result.iat<double>(0, col_a_idx) == -1.0 ? "ok" : "fail";
drop (pd_test_1_all.cpp:6558)
6548            if (df.ncols() != 2) {
6549                std::cout << "  [FAIL] : in pd_test_dataframe_manipulation() : pop ncols != 2" << std::endl;
6550                throw std::runtime_error("pd_test_dataframe_manipulation failed: pop ncols != 2");
6551            }
6552            if (!popped) {
6553                std::cout << "  [FAIL] : in pd_test_dataframe_manipulation() : popped is null" << std::endl;
6554                throw std::runtime_error("pd_test_dataframe_manipulation failed: popped is null");
6555            }
6556
6557            // Test drop columns
6558            auto dropped = df.drop(std::vector<std::string>{"B"}, 1);
6559            if (dropped.ncols() != 1) {
6560                std::cout << "  [FAIL] : in pd_test_dataframe_manipulation() : drop ncols != 1" << std::endl;
6561                throw std::runtime_error("pd_test_dataframe_manipulation failed: drop ncols != 1");
6562            }
6563
6564            // Test rename
6565            auto renamed = df.rename_columns(std::map<std::string, std::string>{{"A", "X"}});
6566            if (!renamed.has_column("X")) {
6567                std::cout << "  [FAIL] : in pd_test_dataframe_manipulation() : rename failed" << std::endl;
6568                throw std::runtime_error("pd_test_dataframe_manipulation failed: rename failed");
drop_duplicates (pd_test_1_all.cpp:6639)
6629                }
6630            }
6631
6632            // Test drop_duplicates
6633            {
6634                std::map<std::string, std::vector<numpy::int64>> dup_data;
6635                dup_data["A"] = {1, 1, 2, 2};
6636                dup_data["B"] = {1, 1, 2, 3};
6637                pandas::DataFrame df_dup(dup_data);
6638
6639                auto deduped = df_dup.drop_duplicates();
6640                // Rows 0 and 1 are duplicates (A=1, B=1), so should have 3 rows
6641                if (deduped.nrows() != 3) {
6642                    std::cout << "  [FAIL] : in pd_test_dataframe_manipulation() : drop_duplicates nrows != 3, got " << deduped.nrows() << std::endl;
6643                    throw std::runtime_error("pd_test_dataframe_manipulation failed: drop_duplicates");
6644                }
6645            }
6646
6647            // Test assign
6648            {
6649                std::map<std::string, std::vector<numpy::int64>> assign_data;
droplevel (pd_test_1_all.cpp:14428)
14418        void pd_test_multiindex_droplevel() {
14419            std::cout << "========= droplevel =================================== ";
14420
14421            std::vector<std::vector<std::string>> arrays = {
14422                {"a", "a", "b"},
14423                {"x", "y", "z"},
14424                {"1", "2", "3"}
14425            };
14426
14427            pandas::MultiIndex mi = pandas::MultiIndex::from_arrays<std::string>(arrays);
14428            pandas::MultiIndex dropped = mi.droplevel(1);
14429
14430            bool passed = true;
14431
14432            if (dropped.nlevels() != 2) {
14433                std::cout << "  [FAIL] : nlevels should be 2 after drop" << std::endl;
14434                passed = false;
14435            }
14436
14437            // Check remaining levels
14438            auto tup = dropped[0];
dropna (pd_test_1_all.cpp:531)
521        }
522
523        // Test isna array
524        numpy::NDArray<numpy::bool_> na_mask = arr.isna();
525        if (na_mask.getSize() != 4) {
526            std::cout << "  [FAIL] : in pd_test_categorical_array_na_handling() : isna size != 4" << std::endl;
527            throw std::runtime_error("pd_test_categorical_array_na_handling failed: isna size != 4");
528        }
529
530        // Test dropna
531        pandas::CategoricalArray dropped = arr.dropna();
532        if (dropped.size() != 2) {
533            std::cout << "  [FAIL] : in pd_test_categorical_array_na_handling() : dropna size != 2" << std::endl;
534            throw std::runtime_error("pd_test_categorical_array_na_handling failed: dropna size != 2");
535        }
536
537        // Test fillna (fill with existing category)
538        pandas::CategoricalArray filled = arr.fillna("a");  // 'a' is in categories
539        if (filled.has_na()) {
540            std::cout << "  [FAIL] : in pd_test_categorical_array_na_handling() : fillna should have no NA" << std::endl;
541            throw std::runtime_error("pd_test_categorical_array_na_handling failed: fillna should have no NA");
insert (pd_test_1_all.cpp:12028)
12018            }
12019
12020            std::cout << " -> tests passed" << std::endl;
12021        }
12022
12023        void pd_test_index_insert_delete() {
12024            std::cout << "========= insert and delete ===========================";
12025
12026            pandas::Index<numpy::int64> idx{1, 2, 4, 5};
12027
12028            auto inserted = idx.insert(2, 3);
12029            bool passed = (inserted.size() == 5);
12030            passed = passed && (inserted[2] == 3);
12031
12032            auto deleted = inserted.delete_(2);
12033            passed = passed && (deleted.size() == 4);
12034            passed = passed && deleted.equals(idx);
12035
12036            if (!passed) {
12037                std::cout << "  [FAIL] : in pd_test_index_insert_delete() : insert/delete failed" << std::endl;
12038                throw std::runtime_error("pd_test_index_insert_delete failed");
reindex (pd_test_1_all.cpp:6708)
6698                }
6699            }
6700
6701            // Test reindex rows
6702            {
6703                std::map<std::string, std::vector<double>> data;
6704                data["A"] = {1.0, 2.0, 3.0};
6705                pandas::DataFrame df(data);
6706                df = df.set_axis({"x", "y", "z"}, 0);
6707
6708                auto reindexed = df.reindex({"x", "z", "w"}, 0);
6709                if (reindexed.nrows() != 3) {
6710                    std::cout << "  [FAIL] : in pd_test_dataframe_index_ops() : reindex wrong nrows" << std::endl;
6711                    throw std::runtime_error("pd_test_dataframe_index_ops failed: reindex nrows");
6712                }
6713                // 'w' should have NaN
6714                std::string val = reindexed["A"].get_value_str(2);
6715                if (!std::isnan(std::stod(val))) {
6716                    std::cout << "  [FAIL] : in pd_test_dataframe_index_ops() : missing label should be NaN" << std::endl;
6717                    throw std::runtime_error("pd_test_dataframe_index_ops failed: reindex NaN");
6718                }
rename (pd_test_1_all.cpp:5816)
5806    std::cout << " -> tests passed" << std::endl;
5807}
5808
5809void pd_test_categorical_index_rename() {
5810    std::cout << "========= rename ======================================";
5811
5812    pandas::CategoricalArray arr({"x", "y"});
5813    pandas::CategoricalIndex idx(arr, "old_name");
5814
5815    pandas::CategoricalIndex renamed = idx.rename("new_name");
5816
5817    bool passed = (renamed.name().has_value() && *renamed.name() == "new_name" &&
5818                   renamed.size() == idx.size() && renamed.categories() == idx.categories());
5819    if (!passed) {
5820        std::cout << "  [FAIL] : in pd_test_categorical_index_rename()" << std::endl;
5821        throw std::runtime_error("pd_test_categorical_index_rename failed");
5822    }
5823
5824    std::cout << " -> tests passed" << std::endl;
5825}
reorder_levels (pd_test_1_all.cpp:14495)
14485        void pd_test_multiindex_reorder_levels() {
14486            std::cout << "========= reorder_levels ============================== ";
14487
14488            std::vector<std::vector<std::string>> arrays = {
14489                {"a", "b"},
14490                {"x", "y"},
14491                {"1", "2"}
14492            };
14493
14494            pandas::MultiIndex mi = pandas::MultiIndex::from_arrays<std::string>(arrays);
14495            pandas::MultiIndex reordered = mi.reorder_levels({2, 0, 1});
14496
14497            bool passed = true;
14498
14499            auto tup = reordered[0];
14500            if (tup[0] != "1" || tup[1] != "a" || tup[2] != "x") {
14501                std::cout << "  [FAIL] : reordered tuple should be ('1', 'a', 'x')" << std::endl;
14502                passed = false;
14503            }
14504
14505            if (!passed) {
set_names (pd_test_1_all.cpp:14519)
14509            std::cout << "-> tests passed" << std::endl;
14510        }
14511
14512        void pd_test_multiindex_set_names() {
14513            std::cout << "========= set_names =================================== ";
14514
14515            std::vector<std::vector<std::string>> arrays = {{"a", "b"}, {"x", "y"}};
14516            pandas::MultiIndex mi = pandas::MultiIndex::from_arrays<std::string>(arrays);
14517
14518            std::vector<std::optional<std::string>> new_names = {"level_a", "level_b"};
14519            pandas::MultiIndex named = mi.set_names(new_names);
14520
14521            bool passed = (named.names()[0] == "level_a" && named.names()[1] == "level_b");
14522
14523            if (!passed) {
14524                std::cout << "  [FAIL] : names not set correctly" << std::endl;
14525                throw std::runtime_error("pd_test_multiindex_set_names failed");
14526            }
14527
14528            std::cout << "-> tests passed" << std::endl;
14529        }
swaplevel (pd_test_1_all.cpp:14461)
14451        void pd_test_multiindex_swaplevel() {
14452            std::cout << "========= swaplevel =================================== ";
14453
14454            std::vector<std::vector<std::string>> arrays = {
14455                {"a", "b"},
14456                {"x", "y"}
14457            };
14458            std::vector<std::optional<std::string>> names = {"first", "second"};
14459
14460            pandas::MultiIndex mi = pandas::MultiIndex::from_arrays<std::string>(arrays, names);
14461            pandas::MultiIndex swapped = mi.swaplevel(0, 1);
14462
14463            bool passed = true;
14464
14465            // Tuple should be reversed
14466            auto tup = swapped[0];
14467            if (tup[0] != "x" || tup[1] != "a") {
14468                std::cout << "  [FAIL] : swapped tuple should be ('x', 'a')" << std::endl;
14469                passed = false;
14470            }
fillna (pd_test_1_all.cpp:537)
527            throw std::runtime_error("pd_test_categorical_array_na_handling failed: isna size != 4");
528        }
529
530        // Test dropna
531        pandas::CategoricalArray dropped = arr.dropna();
532        if (dropped.size() != 2) {
533            std::cout << "  [FAIL] : in pd_test_categorical_array_na_handling() : dropna size != 2" << std::endl;
534            throw std::runtime_error("pd_test_categorical_array_na_handling failed: dropna size != 2");
535        }
536
537        // Test fillna (fill with existing category)
538        pandas::CategoricalArray filled = arr.fillna("a");  // 'a' is in categories
539        if (filled.has_na()) {
540            std::cout << "  [FAIL] : in pd_test_categorical_array_na_handling() : fillna should have no NA" << std::endl;
541            throw std::runtime_error("pd_test_categorical_array_na_handling failed: fillna should have no NA");
542        }
543
544        std::cout << " -> tests passed" << std::endl;
545    }
546
547    void pd_test_categorical_array_add_categories() {
isna (pd_test_1_all.cpp:524)
514            throw std::runtime_error("pd_test_categorical_array_na_handling failed: has_na() should be true");
515        }
516
517        // Test count (non-NA)
518        if (arr.count() != 2) {
519            std::cout << "  [FAIL] : in pd_test_categorical_array_na_handling() : count() != 2" << std::endl;
520            throw std::runtime_error("pd_test_categorical_array_na_handling failed: count() != 2");
521        }
522
523        // Test isna array
524        numpy::NDArray<numpy::bool_> na_mask = arr.isna();
525        if (na_mask.getSize() != 4) {
526            std::cout << "  [FAIL] : in pd_test_categorical_array_na_handling() : isna size != 4" << std::endl;
527            throw std::runtime_error("pd_test_categorical_array_na_handling failed: isna size != 4");
528        }
529
530        // Test dropna
531        pandas::CategoricalArray dropped = arr.dropna();
532        if (dropped.size() != 2) {
533            std::cout << "  [FAIL] : in pd_test_categorical_array_na_handling() : dropna size != 2" << std::endl;
534            throw std::runtime_error("pd_test_categorical_array_na_handling failed: dropna size != 2");
isnull (pd_test_3_all.cpp:671)
661// Category 5: Index Null Detection
662// ============================================================================
663
664void pd_test_3_all_index_null_detection() {
665    std::cout << "========= Index.isnull/notnull() =====================";
666
667    // Test with float index (can have NaN)
668    std::vector<double> vals = {1.0, std::nan(""), 3.0, std::nan("")};
669    pandas::Index<double> idx(vals);
670
671    numpy::NDArray<numpy::bool_> isnull_result = idx.isnull();
672    if (isnull_result.getSize() != 4) {
673        std::cout << "  [FAIL] : in pd_test_3_all_index_null_detection() : isnull() size mismatch" << std::endl;
674        throw std::runtime_error("pd_test_3_all_index_null_detection failed: isnull() size");
675    }
676    // Index 0: 1.0 -> not null
677    if (isnull_result.getElementAt({0})) {
678        std::cout << "  [FAIL] : in pd_test_3_all_index_null_detection() : index 0 should not be null" << std::endl;
679        throw std::runtime_error("pd_test_3_all_index_null_detection failed: index 0");
680    }
681    // Index 1: NaN -> null
notna (pd_test_1_all.cpp:6595)
6585                if (!na_mask.getElementAt({2, 1})) {
6586                    std::cout << "  [FAIL] : in pd_test_dataframe_manipulation() : isna at (2,1) should be true" << std::endl;
6587                    throw std::runtime_error("pd_test_dataframe_manipulation failed: isna at (2,1)");
6588                }
6589                // Row 0, col 0 should NOT be NA
6590                if (na_mask.getElementAt({0, 0})) {
6591                    std::cout << "  [FAIL] : in pd_test_dataframe_manipulation() : isna at (0,0) should be false" << std::endl;
6592                    throw std::runtime_error("pd_test_dataframe_manipulation failed: isna at (0,0)");
6593                }
6594
6595                auto notna_mask = df_na.notna();
6596                if (notna_mask.getElementAt({1, 0})) {
6597                    std::cout << "  [FAIL] : in pd_test_dataframe_manipulation() : notna at (1,0) should be false" << std::endl;
6598                    throw std::runtime_error("pd_test_dataframe_manipulation failed: notna at (1,0)");
6599                }
6600            }
6601
6602            // Test fillna
6603            {
6604                std::map<std::string, std::vector<numpy::float64>> float_data;
6605                float_data["X"] = {1.0, std::nan(""), 3.0};
notnull (pd_test_3_all.cpp:665)
655    }
656
657    std::cout << " -> tests passed" << std::endl;
658}
659
660// ============================================================================
661// Category 5: Index Null Detection
662// ============================================================================
663
664void pd_test_3_all_index_null_detection() {
665    std::cout << "========= Index.isnull/notnull() =====================";
666
667    // Test with float index (can have NaN)
668    std::vector<double> vals = {1.0, std::nan(""), 3.0, std::nan("")};
669    pandas::Index<double> idx(vals);
670
671    numpy::NDArray<numpy::bool_> isnull_result = idx.isnull();
672    if (isnull_result.getSize() != 4) {
673        std::cout << "  [FAIL] : in pd_test_3_all_index_null_detection() : isnull() size mismatch" << std::endl;
674        throw std::runtime_error("pd_test_3_all_index_null_detection failed: isnull() size");
675    }
max (pd_test_1_all.cpp:771)
761        pandas::CategoricalArray arr = pandas::CategoricalArray::from_codes(codes, cats, true);  // ordered
762
763        // Test min
764        std::optional<std::string> min_val = arr.min();
765        if (!min_val.has_value() || *min_val != "low") {
766            std::cout << "  [FAIL] : in pd_test_categorical_array_ordered_operations() : min != 'low'" << std::endl;
767            throw std::runtime_error("pd_test_categorical_array_ordered_operations failed: min != 'low'");
768        }
769
770        // Test max
771        std::optional<std::string> max_val = arr.max();
772        if (!max_val.has_value() || *max_val != "high") {
773            std::cout << "  [FAIL] : in pd_test_categorical_array_ordered_operations() : max != 'high'" << std::endl;
774            throw std::runtime_error("pd_test_categorical_array_ordered_operations failed: max != 'high'");
775        }
776
777        // Test unordered throws for min/max
778        pandas::CategoricalArray unordered = arr.as_unordered();
779        bool threw = false;
780        try {
781            unordered.min();
min (pd_test_1_all.cpp:764)
754    }
755
756    void pd_test_categorical_array_ordered_operations() {
757        std::cout << "========= CategoricalArray: ordered operations (min/max) ======================= ";
758
759        std::vector<std::string> cats = {"low", "medium", "high"};
760        std::vector<numpy::int32> codes = {0, 2, 1, 0, -1};  // low, high, medium, low, NA
761        pandas::CategoricalArray arr = pandas::CategoricalArray::from_codes(codes, cats, true);  // ordered
762
763        // Test min
764        std::optional<std::string> min_val = arr.min();
765        if (!min_val.has_value() || *min_val != "low") {
766            std::cout << "  [FAIL] : in pd_test_categorical_array_ordered_operations() : min != 'low'" << std::endl;
767            throw std::runtime_error("pd_test_categorical_array_ordered_operations failed: min != 'low'");
768        }
769
770        // Test max
771        std::optional<std::string> max_val = arr.max();
772        if (!max_val.has_value() || *max_val != "high") {
773            std::cout << "  [FAIL] : in pd_test_categorical_array_ordered_operations() : max != 'high'" << std::endl;
774            throw std::runtime_error("pd_test_categorical_array_ordered_operations failed: max != 'high'");
nunique (pd_test_1_all.cpp:10604)
10594    std::cout << " -> tests passed" << std::endl;
10595}
10596
10597void pd_test_extension_index_nunique() {
10598    std::cout << "========= nunique =========================";
10599
10600    pandas::CategoricalArray arr({"a", "b", "a", "c", "b", std::nullopt});
10601    pandas::CategoricalIndex idx(arr);
10602
10603    bool passed = (idx.nunique(true) == 3 && idx.nunique(false) == 4);
10604    if (!passed) {
10605        std::cout << "  [FAIL] : in pd_test_extension_index_nunique() : nunique check failed" << std::endl;
10606        throw std::runtime_error("pd_test_extension_index_nunique failed");
10607    }
10608
10609    std::cout << " -> tests passed" << std::endl;
10610}
10611
10612void pd_test_extension_index_factorize() {
10613    std::cout << "========= factorize =========================";
groupby (pd_test_1_all.cpp:11495)
11485            std::cout << "========= GroupBy basic =========================";
11486
11487            // Create DataFrame with category column
11488            std::map<std::string, std::vector<double>> data = {
11489                {"category", {1.0, 1.0, 2.0, 2.0, 2.0}},
11490                {"value", {10.0, 20.0, 30.0, 40.0, 50.0}}
11491            };
11492            pandas::DataFrame df(data);
11493
11494            // Test groupby
11495            auto grouped = df.groupby("category");
11496
11497            bool passed = grouped.ngroups() == 2;
11498            if (!passed) {
11499                std::cout << "  [FAIL] : in pd_test_groupby_basic() : ngroups should be 2" << std::endl;
11500                throw std::runtime_error("pd_test_groupby_basic failed: ngroups should be 2");
11501            }
11502
11503            std::cout << " -> tests passed" << std::endl;
11504        }
map (pd_test_1_all.cpp:5839)
5829// Map Tests
5830// ============================================================================
5831
5832void pd_test_categorical_index_map() {
5833    std::cout << "========= map =========================================";
5834
5835    pandas::CategoricalArray arr({"yes", "no", "yes"});
5836    pandas::CategoricalIndex idx(arr);
5837
5838    std::unordered_map<std::string, std::string> mapping = {{"yes", "1"}, {"no", "0"}};
5839    pandas::CategoricalIndex mapped = idx.map(mapping);
5840
5841    bool passed = (mapped.has_category("1") && mapped.has_category("0") &&
5842                   !mapped.has_category("yes") && !mapped.has_category("no"));
5843    if (!passed) {
5844        std::cout << "  [FAIL] : in pd_test_categorical_index_map()" << std::endl;
5845        throw std::runtime_error("pd_test_categorical_index_map failed");
5846    }
5847
5848    std::cout << " -> tests passed" << std::endl;
5849}
equal_levels (pd_test_3_all.cpp:8979)
8969    }
8970
8971    std::cout << " -> tests passed (placeholder)" << std::endl;
8972}
8973
8974// ============================================================================
8975// Category 34: Plan 07 - MultiIndex New Tests (equal_levels, from_frame, etc.)
8976// ============================================================================
8977
8978void pd_test_3_all_multiindex_equal_levels() {
8979    std::cout << "========= MultiIndex.equal_levels() ===============";
8980
8981    // Create two MultiIndex with same levels
8982    std::vector<std::vector<std::string>> arrays1 = {{"a", "a", "b", "b"}, {"1", "2", "1", "2"}};
8983    pandas::MultiIndex mi1 = pandas::MultiIndex::from_arrays<std::string>(arrays1);
8984
8985    std::vector<std::vector<std::string>> arrays2 = {{"a", "a", "b", "b"}, {"1", "2", "1", "2"}};
8986    pandas::MultiIndex mi2 = pandas::MultiIndex::from_arrays<std::string>(arrays2);
8987
8988    // Test equal levels
8989    bool eq = mi1.equal_levels(mi2);
equals (pd_test_1_all.cpp:5866)
5856    std::cout << "========= equals ======================================";
5857
5858    pandas::CategoricalArray arr1({"a", "b", "a"});
5859    pandas::CategoricalArray arr2({"a", "b", "a"});
5860    pandas::CategoricalArray arr3({"a", "b", "c"});
5861
5862    pandas::CategoricalIndex idx1(arr1);
5863    pandas::CategoricalIndex idx2(arr2);
5864    pandas::CategoricalIndex idx3(arr3);
5865
5866    bool passed = (idx1.equals(idx2) && !idx1.equals(idx3));
5867    if (!passed) {
5868        std::cout << "  [FAIL] : in pd_test_categorical_index_equals()" << std::endl;
5869        throw std::runtime_error("pd_test_categorical_index_equals failed");
5870    }
5871
5872    std::cout << " -> tests passed" << std::endl;
5873}
5874
5875void pd_test_categorical_index_identical() {
5876    std::cout << "========= identical ===================================";
levels (pd_test_2_all.cpp:9787)
9777            pandas::DataFrame df(data);
9778
9779            std::vector<std::string> hier_index = {
9780                "Final exam:History:January",
9781                "Final exam:Geography:February",
9782                "Coursework:History:March",
9783                "Coursework:Geography:April"
9784            };
9785            df.set_index(std::make_unique<pandas::Index<std::string>>(hier_index));
9786
9787            // Default: swap last two levels (i=-2, j=-1)
9788            pandas::DataFrame result = df.swaplevel();
9789
9790            std::string idx0 = result.index().get_value_str(0);
9791            std::string idx1 = result.index().get_value_str(1);
9792            std::string idx2 = result.index().get_value_str(2);
9793            std::string idx3 = result.index().get_value_str(3);
9794
9795            bool passed = (idx0 == "Final exam:January:History" &&
9796                           idx1 == "Final exam:February:Geography" &&
9797                           idx2 == "Coursework:March:History" &&
levshape (pd_test_1_all.cpp:14312)
14302        void pd_test_multiindex_levshape() {
14303            std::cout << "========= levshape property =========================== ";
14304
14305            std::vector<std::vector<std::string>> arrays = {
14306                {"a", "a", "b", "b", "c"},
14307                {"x", "y", "x", "y", "z"}
14308            };
14309
14310            pandas::MultiIndex mi = pandas::MultiIndex::from_arrays<std::string>(arrays);
14311
14312            auto shape = mi.levshape();
14313
14314            bool passed = (shape.size() == 2 && shape[0] == 3 && shape[1] == 3);
14315
14316            if (!passed) {
14317                std::cout << "  [FAIL] : levshape should be [3, 3]" << std::endl;
14318                throw std::runtime_error("pd_test_multiindex_levshape failed");
14319            }
14320
14321            std::cout << "-> tests passed" << std::endl;
14322        }
argsort (pd_test_1_all.cpp:1304)
1294        std::cout << "========= DatetimeArray: sorting ======================= ";
1295
1296        pandas::DatetimeArray arr(std::vector<std::string>{
1297            "2023-06-15",
1298            "NaT",
1299            "2023-01-01",
1300            "2023-12-31"
1301        });
1302
1303        // argsort ascending
1304        auto indices = arr.argsort(true, "last");
1305        // Expected order: 2023-01-01(2), 2023-06-15(0), 2023-12-31(3), NaT(1)
1306        if (indices.getElementAt({0}) != 2) {
1307            std::cout << "  [FAIL] : argsort: first should be index 2 (2023-01-01)" << std::endl;
1308            throw std::runtime_error("pd_test_datetime_array_sorting failed: argsort first");
1309        }
1310        if (indices.getElementAt({3}) != 1) {
1311            std::cout << "  [FAIL] : argsort: last should be index 1 (NaT)" << std::endl;
1312            throw std::runtime_error("pd_test_datetime_array_sorting failed: NaT position");
1313        }
searchsorted (pd_test_1_all.cpp:18958)
18948    // =========================================================================
18949    // Search Tests
18950    // =========================================================================
18951
18952    void pd_test_range_index_searchsorted() {
18953        std::cout << "========= searchsorted ================================ ";
18954
18955        pandas::RangeIndex ri(0, 10, 2);  // [0, 2, 4, 6, 8]
18956
18957        bool passed = (ri.searchsorted(4, "left") == 2 &&
18958                      ri.searchsorted(4, "right") == 3 &&
18959                      ri.searchsorted(3, "left") == 2 &&   // 3 would go between 2 and 4
18960                      ri.searchsorted(-1, "left") == 0 &&  // Before all
18961                      ri.searchsorted(10, "left") == 5);   // After all
18962
18963        if (!passed) {
18964            std::cout << "  [FAIL] : searchsorted" << std::endl;
18965            throw std::runtime_error("pd_test_range_index_searchsorted failed");
18966        }
sort_values (pd_test_1_all.cpp:6408)
6398        void pd_test_dataframe_sorting() {
6399            std::cout << "========= sorting ==========================";
6400
6401            std::map<std::string, std::vector<numpy::float64>> data;
6402            data["A"] = {3.0, 1.0, 4.0, 1.0, 5.0};
6403            data["B"] = {9.0, 2.0, 6.0, 5.0, 3.0};
6404
6405            pandas::DataFrame df(data);
6406
6407            // Test sort_values ascending
6408            auto sorted_asc = df.sort_values("A", true);
6409            // First value should be smallest (1.0)
6410            std::string first_val = sorted_asc["A"].get_value_str(0);
6411            if (std::stod(first_val) != 1.0) {
6412                std::cout << "  [FAIL] : in pd_test_dataframe_sorting() : sort_values asc first != 1" << std::endl;
6413                throw std::runtime_error("pd_test_dataframe_sorting failed: sort_values asc first != 1");
6414            }
6415
6416            // Test sort_values descending
6417            auto sorted_desc = df.sort_values("A", false);
6418            first_val = sorted_desc["A"].get_value_str(0);
to_frame (pd_test_3_all.cpp:4931)
4921    size_t usage = mi.memory_usage(true);
4922    if (usage == 0) {
4923        throw std::runtime_error("memory_usage() should return > 0");
4924    }
4925
4926    std::cout << " -> tests passed" << std::endl;
4927}
4928
4929void pd_test_3_all_multiindex_to_frame() {
4930    std::cout << "========= MultiIndex.to_frame() =======================";
4931
4932    std::vector<std::vector<std::string>> arrays = {{"a", "b"}, {"x", "y"}};
4933    std::vector<std::optional<std::string>> names = {"first", "second"};
4934    pandas::MultiIndex mi = pandas::MultiIndex::from_arrays<std::string>(arrays, names);
4935
4936    auto frame = mi.to_frame();
4937    if (frame.find("first") == frame.end() || frame.find("second") == frame.end()) {
4938        throw std::runtime_error("to_frame() missing columns");
4939    }
transpose (pd_test_1_all.cpp:16648)
16638                std::cout << "  [FAIL] : in pd_test_ndframe_transpose() : T_() size" << std::endl;
16639                throw std::runtime_error("pd_test_ndframe_transpose failed: T_() size");
16640            }
16641
16642            passed = transposed[0] == 1 && transposed[1] == 2 && transposed[2] == 3;
16643            if (!passed) {
16644                std::cout << "  [FAIL] : in pd_test_ndframe_transpose() : T_() values" << std::endl;
16645                throw std::runtime_error("pd_test_ndframe_transpose failed: T_() values");
16646            }
16647
16648            // Test transpose() alias
16649            auto transposed2 = s.transpose();
16650            passed = transposed2.size() == s.size();
16651            if (!passed) {
16652                std::cout << "  [FAIL] : in pd_test_ndframe_transpose() : transpose() size" << std::endl;
16653                throw std::runtime_error("pd_test_ndframe_transpose failed: transpose() size");
16654            }
16655
16656            std::cout << " -> tests passed" << std::endl;
16657        }
append (pd_test_1_all.cpp:10650)
10640    std::cout << "========= append =========================";
10641
10642    // Use same categories for both arrays (required by CategoricalArray::concat)
10643    std::vector<std::string> cats = {"a", "b", "c", "d"};
10644    pandas::CategoricalArray arr1({"a", "b"}, cats);
10645    pandas::CategoricalIndex idx1(arr1);
10646
10647    pandas::CategoricalArray arr2({"c", "d"}, cats);
10648    pandas::CategoricalIndex idx2(arr2);
10649
10650    auto appended = idx1.append(idx2);
10651
10652    bool passed = (appended.size() == 4);
10653    if (!passed) {
10654        std::cout << "  [FAIL] : in pd_test_extension_index_append() : append check failed" << std::endl;
10655        throw std::runtime_error("pd_test_extension_index_append failed");
10656    }
10657
10658    std::cout << " -> tests passed" << std::endl;
10659}
join (pd_test_1_all.cpp:12353)
12343            std::cout << " -> tests passed" << std::endl;
12344        }
12345
12346        void pd_test_index_join() {
12347            std::cout << "========= join ========================================";
12348
12349            pandas::Index<numpy::int64> idx1{1, 2, 3};
12350            pandas::Index<numpy::int64> idx2{2, 3, 4};
12351
12352            auto [inner_joined, left_idx, right_idx] = idx1.join(idx2, "inner");
12353            bool passed = (inner_joined.size() == 2);  // {2, 3}
12354
12355            auto [outer_joined, ol_idx, or_idx] = idx1.join(idx2, "outer");
12356            passed = passed && (outer_joined.size() == 4);  // {1, 2, 3, 4}
12357
12358            if (!passed) {
12359                std::cout << "  [FAIL] : in pd_test_index_join() : join failed" << std::endl;
12360                throw std::runtime_error("pd_test_index_join failed");
12361            }
asof (pd_test_2_all.cpp:366)
356        std::cout << "====================================== [OK] pd_test_add_prefix test suite ========================== " << std::endl;
357        return 0;
358    }
359
360} // namespace dataframe_tests
361// ------------------- pd_test_add_prefix.cpp (end) -----------------------------
362
363// ------------------- pd_test_asof.cpp (start) -----------------------------
364// dataframe_tests/pd_test_asof.cpp
365// Test for DataFrame.asof() method
366
367#include <iostream>
368#include <cmath>
369#include <stdexcept>
370#include <limits>
371#include "../pandas/pd_dataframe.h"
372
373// CRITICAL: No using namespace directives
374
375namespace dataframe_tests {
asof_locs (pd_test_3_all.cpp:3557)
3547        throw std::runtime_error("all() should be true for empty index");
3548    }
3549    if (empty_idx.any()) {
3550        throw std::runtime_error("any() should be false for empty index");
3551    }
3552
3553    std::cout << " -> tests passed" << std::endl;
3554}
3555
3556void pd_test_3_all_index_asof() {
3557    std::cout << "========= Index.asof()/asof_locs() =================";
3558
3559    // Test with monotonically increasing index
3560    pandas::Index<numpy::int64> idx({10, 20, 30, 40, 50});
3561
3562    // Exact match
3563    auto result = idx.asof(30);
3564    if (!result.has_value() || result.value() != 30) {
3565        throw std::runtime_error("asof() exact match should return 30");
3566    }
diff (pd_test_1_all.cpp:5171)
5161        }
5162
5163        void pd_test_arithmetic_dataframe_diff_shift() {
5164            std::cout << "========= DataFrame diff/shift ==================";
5165
5166            std::map<std::string, std::vector<double>> data;
5167            data["A"] = {1.0, 3.0, 6.0, 10.0};
5168            pandas::DataFrame df(data);
5169
5170            // diff: [NaN, 2, 3, 4]
5171            auto d = df.diff();
5172            std::string val = d["A"].get_value_str(1);
5173            bool passed = std::abs(std::stod(val) - 2.0) < 0.001;
5174            if (!passed) {
5175                std::cout << "  [FAIL] : in pd_test_arithmetic_dataframe_diff_shift() : diff failed" << std::endl;
5176                throw std::runtime_error("pd_test_arithmetic_dataframe_diff_shift failed: diff failed");
5177            }
5178
5179            // First element should be NaN
5180            val = d["A"].get_value_str(0);
5181            passed = std::isnan(std::stod(val));
difference (pd_test_1_all.cpp:10718)
10708    std::cout << "========= difference =========================";
10709
10710    // Use same categories for both arrays
10711    std::vector<std::string> cats = {"a", "b", "c", "d"};
10712    pandas::CategoricalArray arr1({"a", "b", "c", "d"}, cats);
10713    pandas::CategoricalIndex idx1(arr1);
10714
10715    pandas::CategoricalArray arr2({"b", "d"}, cats);
10716    pandas::CategoricalIndex idx2(arr2);
10717
10718    auto diff = idx1.difference(idx2);
10719
10720    bool passed = (diff.size() == 2 &&
10721                   diff.contains("a") && diff.contains("c") &&
10722                   !diff.contains("b") && !diff.contains("d"));
10723    if (!passed) {
10724        std::cout << "  [FAIL] : in pd_test_extension_index_difference() : difference check failed" << std::endl;
10725        throw std::runtime_error("pd_test_extension_index_difference failed");
10726    }
10727
10728    std::cout << " -> tests passed" << std::endl;
shift (pd_test_1_all.cpp:5188)
5178            // First element should be NaN
5179            val = d["A"].get_value_str(0);
5180            passed = std::isnan(std::stod(val));
5181            if (!passed) {
5182                std::cout << "  [FAIL] : in pd_test_arithmetic_dataframe_diff_shift() : diff NaN failed" << std::endl;
5183                throw std::runtime_error("pd_test_arithmetic_dataframe_diff_shift failed: diff NaN failed");
5184            }
5185
5186            // shift: [NaN, 1, 3, 6]
5187            auto s = df.shift();
5188            val = s["A"].get_value_str(1);
5189            passed = std::abs(std::stod(val) - 1.0) < 0.001;
5190            if (!passed) {
5191                std::cout << "  [FAIL] : in pd_test_arithmetic_dataframe_diff_shift() : shift failed" << std::endl;
5192                throw std::runtime_error("pd_test_arithmetic_dataframe_diff_shift failed: shift failed");
5193            }
5194
5195            std::cout << " -> tests passed" << std::endl;
5196        }
to_flat_index (pd_test_1_all.cpp:14733)
14723        void pd_test_multiindex_to_flat_index() {
14724            std::cout << "========= to_flat_index =============================== ";
14725
14726            std::vector<std::vector<std::string>> arrays = {
14727                {"a", "b"},
14728                {"x", "y"}
14729            };
14730
14731            pandas::MultiIndex mi = pandas::MultiIndex::from_arrays<std::string>(arrays);
14732            auto flat = mi.to_flat_index();
14733
14734            bool passed = (flat.size() == 2 &&
14735                          flat[0][0] == "a" && flat[0][1] == "x" &&
14736                          flat[1][0] == "b" && flat[1][1] == "y");
14737
14738            if (!passed) {
14739                std::cout << "  [FAIL] : to_flat_index incorrect" << std::endl;
14740                throw std::runtime_error("pd_test_multiindex_to_flat_index failed");
14741            }
to_list (pd_test_1_all.cpp:10247)
10237    std::cout << " -> tests passed" << std::endl;
10238}
10239
10240void pd_test_extension_index_to_list() {
10241    std::cout << "========= to_list =========================";
10242
10243    pandas::CategoricalArray arr({"x", "y", "z"});
10244    pandas::CategoricalIndex idx(arr);
10245
10246    auto list = idx.to_list();
10247
10248    bool passed = (list.size() == 3 &&
10249                   list[0].has_value() && *list[0] == "x" &&
10250                   list[1].has_value() && *list[1] == "y" &&
10251                   list[2].has_value() && *list[2] == "z");
10252    if (!passed) {
10253        std::cout << "  [FAIL] : in pd_test_extension_index_to_list() : to_list check failed" << std::endl;
10254        throw std::runtime_error("pd_test_extension_index_to_list failed");
10255    }
to_numpy (pd_test_1_all.cpp:16764)
16754        // =====================================================================
16755        // to_numpy Tests
16756        // =====================================================================
16757
16758        void pd_test_ndframe_to_numpy() {
16759            std::cout << "========= to_numpy =============================================" << std::endl;
16760
16761            pandas::Series<int> s({10, 20, 30});
16762
16763            auto arr = s.to_numpy();
16764
16765            bool passed = arr.getSize() == 3;
16766            if (!passed) {
16767                std::cout << "  [FAIL] : in pd_test_ndframe_to_numpy() : size" << std::endl;
16768                throw std::runtime_error("pd_test_ndframe_to_numpy failed: size");
16769            }
16770
16771            passed = arr.getElementAt({0}) == 10 && arr.getElementAt({1}) == 20 && arr.getElementAt({2}) == 30;
16772            if (!passed) {
16773                std::cout << "  [FAIL] : in pd_test_ndframe_to_numpy() : values" << std::endl;
to_string (pd_test_1_all.cpp:2693)
2683        pandas::PeriodArray arr_m(std::vector<std::string>{
2684            "2020-01",
2685            "NaT",
2686            "2025-06"
2687        }, "M");
2688
2689        // Year
2690        auto years = arr_m.year();
2691        auto y0 = years[0];
2692        if (!y0.has_value() || y0.value() != 2020) {
2693            std::cout << "  [FAIL] : year[0] should be 2020, got " << (y0.has_value() ? std::to_string(y0.value()) : "NA") << std::endl;
2694            throw std::runtime_error("pd_test_period_array_year_month_quarter failed: year[0]");
2695        }
2696
2697        auto y1 = years[1];
2698        if (y1.has_value()) {
2699            std::cout << "  [FAIL] : year[1] should be NA (NaT)" << std::endl;
2700            throw std::runtime_error("pd_test_period_array_year_month_quarter failed: year[1] should be NA");
2701        }
2702
2703        auto y2 = years[2];
tolist (pd_test_3_all.cpp:2300)
2290        threw = true;
2291    }
2292    if (!threw) {
2293        throw std::runtime_error("swapaxes should throw for invalid axes");
2294    }
2295
2296    std::cout << " -> tests passed" << std::endl;
2297}
2298
2299void pd_test_3_all_categorical_to_list() {
2300    std::cout << "========= CategoricalArray.to_list()/tolist() =========";
2301
2302    std::vector<std::optional<std::string>> values = {"a", "b", std::nullopt, "c"};
2303    pandas::CategoricalArray arr(values);
2304
2305    auto list = arr.to_list();
2306    if (list.size() != 4 || *list[0] != "a" || *list[1] != "b" ||
2307        list[2].has_value() || *list[3] != "c") {
2308        throw std::runtime_error("to_list failed");
2309    }
astype (pd_test_1_all.cpp:21292)
21282            std::cout << "========= astype all columns to float64 =============";
21283
21284            // Create DataFrame with int64 columns
21285            std::map<std::string, std::vector<numpy::int64>> data;
21286            data["A"] = {1, 2, 3, 4, 5};
21287            data["B"] = {10, 20, 30, 40, 50};
21288
21289            pandas::DataFrame df(data);
21290
21291            // Convert all columns to float64
21292            pandas::DataFrame df_float = df.astype("float64");
21293
21294            // Verify dtype changed
21295            pandas::Series<std::string> dtypes = df_float.dtypes();
21296
21297            bool passed = true;
21298            if (dtypes[static_cast<size_t>(0)] != "float64") {
21299                std::cout << "  [FAIL] : in pd_test_astype_all_columns_to_float64() : column A dtype is " << dtypes[static_cast<size_t>(0)] << ", expected float64" << std::endl;
21300                passed = false;
21301            }
21302            if (dtypes[static_cast<size_t>(1)] != "float64") {
copy (pd_test_1_all.cpp:5798)
5788// ============================================================================
5789// Copy/Rename Tests
5790// ============================================================================
5791
5792void pd_test_categorical_index_copy() {
5793    std::cout << "========= copy ========================================";
5794
5795    pandas::CategoricalArray arr({"a", "b", "c"});
5796    pandas::CategoricalIndex idx(arr, "original");
5797
5798    pandas::CategoricalIndex copied = idx.copy();
5799
5800    bool passed = (copied.size() == idx.size() && copied.name() == idx.name() &&
5801                   copied.categories() == idx.categories() && copied.ordered() == idx.ordered());
5802    if (!passed) {
5803        std::cout << "  [FAIL] : in pd_test_categorical_index_copy()" << std::endl;
5804        throw std::runtime_error("pd_test_categorical_index_copy failed");
5805    }
5806
5807    std::cout << " -> tests passed" << std::endl;
5808}
infer_objects (pd_test_1_all.cpp:27595)
27585            // Create DataFrame with string column containing integers
27586            std::map<std::string, std::vector<std::string>> data;
27587            data["A"] = {"1", "2", "3", "4", "5"};
27588
27589            pandas::DataFrame df(data);
27590
27591            // Before inference, dtype should be string/object
27592            std::string before_dtype = df["A"].dtype_name();
27593
27594            // Apply infer_objects
27595            pandas::DataFrame result = df.infer_objects();
27596
27597            // After inference, dtype should be int64
27598            std::string after_dtype = result["A"].dtype_name();
27599
27600            bool passed = (after_dtype == "int64");
27601            if (!passed) {
27602                std::cout << "  [FAIL] : in pd_test_infer_objects_integer_column() : expected int64, got " << after_dtype << std::endl;
27603                throw std::runtime_error("pd_test_infer_objects_integer_column failed");
27604            }
view (pd_test_3_all.cpp:2147)
2137        throw std::runtime_error("memory_usage shallow too small");
2138    }
2139    if (deep < shallow) {
2140        throw std::runtime_error("memory_usage deep should be >= shallow");
2141    }
2142
2143    std::cout << " -> tests passed" << std::endl;
2144}
2145
2146void pd_test_3_all_categorical_ravel_view() {
2147    std::cout << "========= CategoricalArray.ravel()/view() =============";
2148
2149    std::vector<std::optional<std::string>> values = {"a", "b", "c"};
2150    pandas::CategoricalArray arr(values);
2151
2152    auto raveled = arr.ravel();
2153    if (raveled.size() != 3 || !raveled.equals(arr)) {
2154        throw std::runtime_error("ravel failed");
2155    }
2156
2157    auto viewed = arr.view();
duplicated (pd_test_1_all.cpp:10583)
10573    std::cout << " -> tests passed" << std::endl;
10574}
10575
10576void pd_test_extension_index_duplicated() {
10577    std::cout << "========= duplicated =========================";
10578
10579    pandas::CategoricalArray arr({"a", "b", "a", "c", "a"});
10580    pandas::CategoricalIndex idx(arr);
10581
10582    auto dup_mask = idx.duplicated("first");
10583
10584    bool passed = (dup_mask.getElementAt({0}) == false &&
10585                   dup_mask.getElementAt({1}) == false &&
10586                   dup_mask.getElementAt({2}) == true &&
10587                   dup_mask.getElementAt({3}) == false &&
10588                   dup_mask.getElementAt({4}) == true);
10589    if (!passed) {
10590        std::cout << "  [FAIL] : in pd_test_extension_index_duplicated() : duplicated check failed" << std::endl;
10591        throw std::runtime_error("pd_test_extension_index_duplicated failed");
10592    }
intersection (pd_test_1_all.cpp:10672)
10662    std::cout << "========= intersection =========================";
10663
10664    // Use same categories for both arrays
10665    std::vector<std::string> cats = {"a", "b", "c", "d", "e", "f"};
10666    pandas::CategoricalArray arr1({"a", "b", "c", "d"}, cats);
10667    pandas::CategoricalIndex idx1(arr1);
10668
10669    pandas::CategoricalArray arr2({"b", "c", "e", "f"}, cats);
10670    pandas::CategoricalIndex idx2(arr2);
10671
10672    auto inter = idx1.intersection(idx2);
10673
10674    bool passed = (inter.size() == 2 && inter.contains("b") && inter.contains("c"));
10675    if (!passed) {
10676        std::cout << "  [FAIL] : in pd_test_extension_index_intersection() : intersection check failed" << std::endl;
10677        throw std::runtime_error("pd_test_extension_index_intersection failed");
10678    }
10679
10680    std::cout << " -> tests passed" << std::endl;
10681}
isin (pd_test_1_all.cpp:5938)
5928    std::cout << " -> tests passed" << std::endl;
5929}
5930
5931void pd_test_categorical_index_isin() {
5932    std::cout << "========= inherited isin ==============================";
5933
5934    pandas::CategoricalArray arr({"a", "b", "c", "d"});
5935    pandas::CategoricalIndex idx(arr);
5936
5937    std::vector<std::string> values = {"a", "c"};
5938    numpy::NDArray<numpy::bool_> mask = idx.isin(values);
5939
5940    bool passed = (mask.getSize() == 4 &&
5941                   mask.getElementAt({0}) == true &&   // a
5942                   mask.getElementAt({1}) == false &&  // b
5943                   mask.getElementAt({2}) == true &&   // c
5944                   mask.getElementAt({3}) == false);   // d
5945    if (!passed) {
5946        std::cout << "  [FAIL] : in pd_test_categorical_index_isin()" << std::endl;
5947        throw std::runtime_error("pd_test_categorical_index_isin failed");
5948    }
symmetric_difference (pd_test_1_all.cpp:10742)
10732    std::cout << "========= symmetric_difference =========================";
10733
10734    // Use same categories for both arrays
10735    std::vector<std::string> cats = {"a", "b", "c", "d"};
10736    pandas::CategoricalArray arr1({"a", "b", "c"}, cats);
10737    pandas::CategoricalIndex idx1(arr1);
10738
10739    pandas::CategoricalArray arr2({"b", "c", "d"}, cats);
10740    pandas::CategoricalIndex idx2(arr2);
10741
10742    auto sym_diff = idx1.symmetric_difference(idx2);
10743
10744    bool passed = (sym_diff.size() == 2 &&
10745                   sym_diff.contains("a") && sym_diff.contains("d") &&
10746                   !sym_diff.contains("b") && !sym_diff.contains("c"));
10747    if (!passed) {
10748        std::cout << "  [FAIL] : in pd_test_extension_index_symmetric_difference() : symmetric_difference check failed" << std::endl;
10749        throw std::runtime_error("pd_test_extension_index_symmetric_difference failed");
10750    }
10751
10752    std::cout << " -> tests passed" << std::endl;
union_ (pd_test_1_all.cpp:10694)
10684    std::cout << "========= union =========================";
10685
10686    // Use same categories for both arrays
10687    std::vector<std::string> cats = {"a", "b", "c", "d", "e"};
10688    pandas::CategoricalArray arr1({"a", "b", "c"}, cats);
10689    pandas::CategoricalIndex idx1(arr1);
10690
10691    pandas::CategoricalArray arr2({"b", "c", "d", "e"}, cats);
10692    pandas::CategoricalIndex idx2(arr2);
10693
10694    auto uni = idx1.union_(idx2);
10695
10696    bool passed = (uni.size() == 5 &&
10697                   uni.contains("a") && uni.contains("b") && uni.contains("c") &&
10698                   uni.contains("d") && uni.contains("e"));
10699    if (!passed) {
10700        std::cout << "  [FAIL] : in pd_test_extension_index_union() : union check failed" << std::endl;
10701        throw std::runtime_error("pd_test_extension_index_union failed");
10702    }
10703
10704    std::cout << " -> tests passed" << std::endl;
unique (pd_test_1_all.cpp:1345)
1335        pandas::DatetimeArray arr(std::vector<std::string>{
1336            "2023-01-01",
1337            "2023-06-15",
1338            "2023-01-01",
1339            "NaT",
1340            "2023-06-15",
1341            "NaT"
1342        });
1343
1344        // unique
1345        auto uniq = arr.unique();
1346        // Should have: NaT, 2023-01-01, 2023-06-15 (3 unique values)
1347        if (uniq.size() != 3) {
1348            std::cout << "  [FAIL] : unique size should be 3, got " << uniq.size() << std::endl;
1349            throw std::runtime_error("pd_test_datetime_array_unique failed: size");
1350        }
1351
1352        // factorize
1353        auto [codes, uniques] = arr.factorize();
1354        // Codes for NaT should be -1
1355        if (codes.getElementAt({3}) != -1) {
is_ (pd_test_3_all.cpp:3972)
3962    // For typed Index, this is a no-op
3963    if (result.size() != 5) {
3964        throw std::runtime_error("infer_objects size should be 5");
3965    }
3966
3967    std::cout << " -> tests passed" << std::endl;
3968}
3969
3970void pd_test_3_all_index_is_() {
3971    std::cout << "========= Index.is_() ==============================";
3972
3973    pandas::Index<numpy::int64> idx1({1, 2, 3, 4, 5});
3974    pandas::Index<numpy::int64> idx2({1, 2, 3, 4, 5});  // Different object
3975
3976    // Different objects should not be the same
3977    if (idx1.is_(idx2)) {
3978        throw std::runtime_error("different objects should not be is_() equal");
3979    }
3980
3981    // Same object should be the same
is_boolean (pd_test_3_all.cpp:3290)
3280    std::cout << " -> tests passed" << std::endl;
3281}
3282
3283void pd_test_3_all_datetime_index_type_checks() {
3284    std::cout << "========= DatetimeIndex type checks ======================";
3285
3286    pandas::DatetimeIndex idx = pandas::date_range("2024-01-01", "2024-01-05", std::nullopt, "D");
3287
3288    // Type check methods
3289    if (idx.is_boolean()) {
3290        throw std::runtime_error("is_boolean() should be false");
3291    }
3292    if (idx.is_categorical()) {
3293        throw std::runtime_error("is_categorical() should be false");
3294    }
3295    if (idx.is_floating()) {
3296        throw std::runtime_error("is_floating() should be false");
3297    }
3298    if (idx.is_integer()) {
3299        throw std::runtime_error("is_integer() should be false");
is_categorical (pd_test_3_all.cpp:3293)
3283void pd_test_3_all_datetime_index_type_checks() {
3284    std::cout << "========= DatetimeIndex type checks ======================";
3285
3286    pandas::DatetimeIndex idx = pandas::date_range("2024-01-01", "2024-01-05", std::nullopt, "D");
3287
3288    // Type check methods
3289    if (idx.is_boolean()) {
3290        throw std::runtime_error("is_boolean() should be false");
3291    }
3292    if (idx.is_categorical()) {
3293        throw std::runtime_error("is_categorical() should be false");
3294    }
3295    if (idx.is_floating()) {
3296        throw std::runtime_error("is_floating() should be false");
3297    }
3298    if (idx.is_integer()) {
3299        throw std::runtime_error("is_integer() should be false");
3300    }
3301    if (idx.is_interval()) {
3302        throw std::runtime_error("is_interval() should be false");
is_floating (pd_test_3_all.cpp:622)
612    // Test with integer index
613    pandas::IndexDtype<numpy::int64> int_dtype;
614    if (!int_dtype.is_numeric()) {
615        std::cout << "  [FAIL] : in pd_test_3_all_index_dtype_checks() : int should be numeric" << std::endl;
616        throw std::runtime_error("pd_test_3_all_index_dtype_checks failed: int is_numeric");
617    }
618    if (!int_dtype.is_integer()) {
619        std::cout << "  [FAIL] : in pd_test_3_all_index_dtype_checks() : int should be integer" << std::endl;
620        throw std::runtime_error("pd_test_3_all_index_dtype_checks failed: int is_integer");
621    }
622    if (int_dtype.is_floating()) {
623        std::cout << "  [FAIL] : in pd_test_3_all_index_dtype_checks() : int should not be floating" << std::endl;
624        throw std::runtime_error("pd_test_3_all_index_dtype_checks failed: int is_floating");
625    }
626    if (int_dtype.is_object()) {
627        std::cout << "  [FAIL] : in pd_test_3_all_index_dtype_checks() : int should not be object" << std::endl;
628        throw std::runtime_error("pd_test_3_all_index_dtype_checks failed: int is_object");
629    }
630
631    // Test with float index
632    pandas::IndexDtype<double> float_dtype;
is_integer (pd_test_3_all.cpp:618)
608void pd_test_3_all_index_dtype_checks() {
609    std::cout << "========= IndexDtype.is_numeric/integer/floating/object() ";
610
611    // Test with integer index
612    pandas::IndexDtype<numpy::int64> int_dtype;
613    if (!int_dtype.is_numeric()) {
614        std::cout << "  [FAIL] : in pd_test_3_all_index_dtype_checks() : int should be numeric" << std::endl;
615        throw std::runtime_error("pd_test_3_all_index_dtype_checks failed: int is_numeric");
616    }
617    if (!int_dtype.is_integer()) {
618        std::cout << "  [FAIL] : in pd_test_3_all_index_dtype_checks() : int should be integer" << std::endl;
619        throw std::runtime_error("pd_test_3_all_index_dtype_checks failed: int is_integer");
620    }
621    if (int_dtype.is_floating()) {
622        std::cout << "  [FAIL] : in pd_test_3_all_index_dtype_checks() : int should not be floating" << std::endl;
623        throw std::runtime_error("pd_test_3_all_index_dtype_checks failed: int is_floating");
624    }
625    if (int_dtype.is_object()) {
626        std::cout << "  [FAIL] : in pd_test_3_all_index_dtype_checks() : int should not be object" << std::endl;
627        throw std::runtime_error("pd_test_3_all_index_dtype_checks failed: int is_object");
is_interval (pd_test_3_all.cpp:3302)
3292    }
3293    if (idx.is_categorical()) {
3294        throw std::runtime_error("is_categorical() should be false");
3295    }
3296    if (idx.is_floating()) {
3297        throw std::runtime_error("is_floating() should be false");
3298    }
3299    if (idx.is_integer()) {
3300        throw std::runtime_error("is_integer() should be false");
3301    }
3302    if (idx.is_interval()) {
3303        throw std::runtime_error("is_interval() should be false");
3304    }
3305    if (idx.is_numeric()) {
3306        throw std::runtime_error("is_numeric() should be false");
3307    }
3308    if (idx.is_object()) {
3309        throw std::runtime_error("is_object() should be false");
3310    }
3311    if (idx.holds_integer()) {
3312        throw std::runtime_error("holds_integer() should be false");
is_monotonic_decreasing (pd_test_1_all.cpp:10203)
10193}
10194
10195void pd_test_extension_index_monotonicity() {
10196    std::cout << "========= monotonicity =========================";
10197
10198    pandas::CategoricalArray arr1({"a", "b", "c"});
10199    pandas::CategoricalIndex idx1(arr1);
10200
10201    // Just test that the methods work (result depends on internal ordering)
10202    bool inc = idx1.is_monotonic_increasing();
10203    bool dec = idx1.is_monotonic_decreasing();
10204
10205    bool passed = (inc || dec || (!inc && !dec));  // Any result is valid
10206    if (!passed) {
10207        std::cout << "  [FAIL] : in pd_test_extension_index_monotonicity() : monotonicity check failed" << std::endl;
10208        throw std::runtime_error("pd_test_extension_index_monotonicity failed");
10209    }
10210
10211    std::cout << " -> tests passed" << std::endl;
10212}
is_monotonic_increasing (pd_test_1_all.cpp:10202)
10192    std::cout << " -> tests passed" << std::endl;
10193}
10194
10195void pd_test_extension_index_monotonicity() {
10196    std::cout << "========= monotonicity =========================";
10197
10198    pandas::CategoricalArray arr1({"a", "b", "c"});
10199    pandas::CategoricalIndex idx1(arr1);
10200
10201    // Just test that the methods work (result depends on internal ordering)
10202    bool inc = idx1.is_monotonic_increasing();
10203    bool dec = idx1.is_monotonic_decreasing();
10204
10205    bool passed = (inc || dec || (!inc && !dec));  // Any result is valid
10206    if (!passed) {
10207        std::cout << "  [FAIL] : in pd_test_extension_index_monotonicity() : monotonicity check failed" << std::endl;
10208        throw std::runtime_error("pd_test_extension_index_monotonicity failed");
10209    }
10210
10211    std::cout << " -> tests passed" << std::endl;
10212}
is_numeric (pd_test_3_all.cpp:614)
604// ============================================================================
605// Category 4: Index Type Checking (IndexDtype)
606// ============================================================================
607
608void pd_test_3_all_index_dtype_checks() {
609    std::cout << "========= IndexDtype.is_numeric/integer/floating/object() ";
610
611    // Test with integer index
612    pandas::IndexDtype<numpy::int64> int_dtype;
613    if (!int_dtype.is_numeric()) {
614        std::cout << "  [FAIL] : in pd_test_3_all_index_dtype_checks() : int should be numeric" << std::endl;
615        throw std::runtime_error("pd_test_3_all_index_dtype_checks failed: int is_numeric");
616    }
617    if (!int_dtype.is_integer()) {
618        std::cout << "  [FAIL] : in pd_test_3_all_index_dtype_checks() : int should be integer" << std::endl;
619        throw std::runtime_error("pd_test_3_all_index_dtype_checks failed: int is_integer");
620    }
621    if (int_dtype.is_floating()) {
622        std::cout << "  [FAIL] : in pd_test_3_all_index_dtype_checks() : int should not be floating" << std::endl;
623        throw std::runtime_error("pd_test_3_all_index_dtype_checks failed: int is_floating");
is_object (pd_test_3_all.cpp:626)
616        throw std::runtime_error("pd_test_3_all_index_dtype_checks failed: int is_numeric");
617    }
618    if (!int_dtype.is_integer()) {
619        std::cout << "  [FAIL] : in pd_test_3_all_index_dtype_checks() : int should be integer" << std::endl;
620        throw std::runtime_error("pd_test_3_all_index_dtype_checks failed: int is_integer");
621    }
622    if (int_dtype.is_floating()) {
623        std::cout << "  [FAIL] : in pd_test_3_all_index_dtype_checks() : int should not be floating" << std::endl;
624        throw std::runtime_error("pd_test_3_all_index_dtype_checks failed: int is_floating");
625    }
626    if (int_dtype.is_object()) {
627        std::cout << "  [FAIL] : in pd_test_3_all_index_dtype_checks() : int should not be object" << std::endl;
628        throw std::runtime_error("pd_test_3_all_index_dtype_checks failed: int is_object");
629    }
630
631    // Test with float index
632    pandas::IndexDtype<double> float_dtype;
633    if (!float_dtype.is_numeric()) {
634        std::cout << "  [FAIL] : in pd_test_3_all_index_dtype_checks() : float should be numeric" << std::endl;
635        throw std::runtime_error("pd_test_3_all_index_dtype_checks failed: float is_numeric");
636    }
is_unique (pd_test_1_all.cpp:5962)
5952void pd_test_categorical_index_is_unique() {
5953    std::cout << "========= inherited is_unique =========================";
5954
5955    pandas::CategoricalArray arr_unique({"a", "b", "c"});
5956    pandas::CategoricalArray arr_dups({"a", "b", "a"});
5957
5958    pandas::CategoricalIndex idx_unique(arr_unique);
5959    pandas::CategoricalIndex idx_dups(arr_dups);
5960
5961    bool passed = (idx_unique.is_unique() && !idx_dups.is_unique());
5962    if (!passed) {
5963        std::cout << "  [FAIL] : in pd_test_categorical_index_is_unique()" << std::endl;
5964        throw std::runtime_error("pd_test_categorical_index_is_unique failed");
5965    }
5966
5967    std::cout << " -> tests passed" << std::endl;
5968}
5969
5970void pd_test_categorical_index_hasnans() {
5971    std::cout << "========= inherited hasnans ===========================";
all (pd_test_1_all.cpp:247)
237        pandas::BooleanArray has_true({
238            std::optional<bool>(false),
239            std::optional<bool>(true)
240        });
241        any_result = has_true.any();
242        if (!any_result.has_value() || !any_result.value()) {
243            std::cout << "  [FAIL] : in pd_test_boolean_array_reductions() : any() with True" << std::endl;
244            throw std::runtime_error("pd_test_boolean_array_reductions failed: any() with True");
245        }
246
247        // Test all()
248        pandas::BooleanArray all_true({
249            std::optional<bool>(true),
250            std::optional<bool>(true)
251        });
252        auto all_result = all_true.all();
253        if (!all_result.has_value() || !all_result.value()) {
254            std::cout << "  [FAIL] : in pd_test_boolean_array_reductions() : all() of all True" << std::endl;
255            throw std::runtime_error("pd_test_boolean_array_reductions failed: all() all True");
256        }
any (pd_test_1_all.cpp:226)
216            std::cout << "  [FAIL] : in pd_test_boolean_array_kleene_not() : ~NA should be NA" << std::endl;
217            throw std::runtime_error("pd_test_boolean_array_kleene_not failed: ~NA");
218        }
219
220        std::cout << " -> tests passed" << std::endl;
221    }
222
223    void pd_test_boolean_array_reductions() {
224        std::cout << "========= BooleanArray: reductions ======================= ";
225
226        // Test any()
227        pandas::BooleanArray all_false({
228            std::optional<bool>(false),
229            std::optional<bool>(false)
230        });
231        auto any_result = all_false.any();
232        if (!any_result.has_value() || any_result.value()) {
233            std::cout << "  [FAIL] : in pd_test_boolean_array_reductions() : any() of all False" << std::endl;
234            throw std::runtime_error("pd_test_boolean_array_reductions failed: any() all False");
235        }
argmax (pd_test_1_all.cpp:1323)
1313        }
1314
1315        // argmin
1316        auto min_idx = arr.argmin();
1317        if (!min_idx.has_value() || min_idx.value() != 2) {
1318            std::cout << "  [FAIL] : argmin should be 2 (2023-01-01)" << std::endl;
1319            throw std::runtime_error("pd_test_datetime_array_sorting failed: argmin");
1320        }
1321
1322        // argmax
1323        auto max_idx = arr.argmax();
1324        if (!max_idx.has_value() || max_idx.value() != 3) {
1325            std::cout << "  [FAIL] : argmax should be 3 (2023-12-31)" << std::endl;
1326            throw std::runtime_error("pd_test_datetime_array_sorting failed: argmax");
1327        }
1328
1329        std::cout << " -> tests passed" << std::endl;
1330    }
1331
1332    void pd_test_datetime_array_unique() {
1333        std::cout << "========= DatetimeArray: unique/factorize ======================= ";
argmin (pd_test_1_all.cpp:1316)
1306        if (indices.getElementAt({0}) != 2) {
1307            std::cout << "  [FAIL] : argsort: first should be index 2 (2023-01-01)" << std::endl;
1308            throw std::runtime_error("pd_test_datetime_array_sorting failed: argsort first");
1309        }
1310        if (indices.getElementAt({3}) != 1) {
1311            std::cout << "  [FAIL] : argsort: last should be index 1 (NaT)" << std::endl;
1312            throw std::runtime_error("pd_test_datetime_array_sorting failed: NaT position");
1313        }
1314
1315        // argmin
1316        auto min_idx = arr.argmin();
1317        if (!min_idx.has_value() || min_idx.value() != 2) {
1318            std::cout << "  [FAIL] : argmin should be 2 (2023-01-01)" << std::endl;
1319            throw std::runtime_error("pd_test_datetime_array_sorting failed: argmin");
1320        }
1321
1322        // argmax
1323        auto max_idx = arr.argmax();
1324        if (!max_idx.has_value() || max_idx.value() != 3) {
1325            std::cout << "  [FAIL] : argmax should be 3 (2023-12-31)" << std::endl;
1326            throw std::runtime_error("pd_test_datetime_array_sorting failed: argmax");
arrays (pd_test_1_all.cpp:10642)
10632    std::cout << " -> tests passed" << std::endl;
10633}
10634
10635// ============================================================================
10636// Set Operations Tests
10637// ============================================================================
10638
10639void pd_test_extension_index_append() {
10640    std::cout << "========= append =========================";
10641
10642    // Use same categories for both arrays (required by CategoricalArray::concat)
10643    std::vector<std::string> cats = {"a", "b", "c", "d"};
10644    pandas::CategoricalArray arr1({"a", "b"}, cats);
10645    pandas::CategoricalIndex idx1(arr1);
10646
10647    pandas::CategoricalArray arr2({"c", "d"}, cats);
10648    pandas::CategoricalIndex idx2(arr2);
10649
10650    auto appended = idx1.append(idx2);
10651
10652    bool passed = (appended.size() == 4);
arrays (pd_test_1_all.cpp:10642)
10632    std::cout << " -> tests passed" << std::endl;
10633}
10634
10635// ============================================================================
10636// Set Operations Tests
10637// ============================================================================
10638
10639void pd_test_extension_index_append() {
10640    std::cout << "========= append =========================";
10641
10642    // Use same categories for both arrays (required by CategoricalArray::concat)
10643    std::vector<std::string> cats = {"a", "b", "c", "d"};
10644    pandas::CategoricalArray arr1({"a", "b"}, cats);
10645    pandas::CategoricalIndex idx1(arr1);
10646
10647    pandas::CategoricalArray arr2({"c", "d"}, cats);
10648    pandas::CategoricalIndex idx2(arr2);
10649
10650    auto appended = idx1.append(idx2);
10651
10652    bool passed = (appended.size() == 4);
arrays (pd_test_1_all.cpp:10642)
10632    std::cout << " -> tests passed" << std::endl;
10633}
10634
10635// ============================================================================
10636// Set Operations Tests
10637// ============================================================================
10638
10639void pd_test_extension_index_append() {
10640    std::cout << "========= append =========================";
10641
10642    // Use same categories for both arrays (required by CategoricalArray::concat)
10643    std::vector<std::string> cats = {"a", "b", "c", "d"};
10644    pandas::CategoricalArray arr1({"a", "b"}, cats);
10645    pandas::CategoricalIndex idx1(arr1);
10646
10647    pandas::CategoricalArray arr2({"c", "d"}, cats);
10648    pandas::CategoricalIndex idx2(arr2);
10649
10650    auto appended = idx1.append(idx2);
10651
10652    bool passed = (appended.size() == 4);
arrays (pd_test_1_all.cpp:10642)
10632    std::cout << " -> tests passed" << std::endl;
10633}
10634
10635// ============================================================================
10636// Set Operations Tests
10637// ============================================================================
10638
10639void pd_test_extension_index_append() {
10640    std::cout << "========= append =========================";
10641
10642    // Use same categories for both arrays (required by CategoricalArray::concat)
10643    std::vector<std::string> cats = {"a", "b", "c", "d"};
10644    pandas::CategoricalArray arr1({"a", "b"}, cats);
10645    pandas::CategoricalIndex idx1(arr1);
10646
10647    pandas::CategoricalArray arr2({"c", "d"}, cats);
10648    pandas::CategoricalIndex idx2(arr2);
10649
10650    auto appended = idx1.append(idx2);
10651
10652    bool passed = (appended.size() == 4);
codes (pd_test_1_all.cpp:473)
463        std::cout << " -> tests passed" << std::endl;
464    }
465
466    void pd_test_categorical_array_codes_property() {
467        std::cout << "========= CategoricalArray: codes property ======================= ";
468
469        std::vector<std::string> cats = {"x", "y", "z"};
470        std::vector<numpy::int32> codes = {0, 1, 2, 1, 0};
471        pandas::CategoricalArray arr = pandas::CategoricalArray::from_codes(codes, cats);
472
473        numpy::NDArray<numpy::int32> arr_codes = arr.codes();
474        if (arr_codes.getSize() != 5) {
475            std::cout << "  [FAIL] : in pd_test_categorical_array_codes_property() : codes size != 5" << std::endl;
476            throw std::runtime_error("pd_test_categorical_array_codes_property failed: codes size != 5");
477        }
478
479        // Check codes match
480        for (size_t i = 0; i < codes.size(); ++i) {
481            if (arr_codes.getElementAt({i}) != codes[i]) {
482                std::cout << "  [FAIL] : in pd_test_categorical_array_codes_property() : code mismatch at " << i << std::endl;
483                throw std::runtime_error("pd_test_categorical_array_codes_property failed: code mismatch");
contains (pd_test_1_all.cpp:2200)
2190// Test: contains method
2191// ============================================================================
2192void test_contains() {
2193    std::cout << "========= IntervalArray: contains ======================= ";
2194
2195    std::vector<numpy::float64> breaks = {0.0, 1.0, 2.0, 3.0};
2196
2197    // Right-closed intervals: (0, 1], (1, 2], (2, 3]
2198    auto arr_right = pandas::IntervalArrayFloat64::from_breaks(breaks, pandas::IntervalClosed::Right);
2199
2200    // Test contains(1.0) - should be in interval 0 but not 1 (since 1 is exclusive on left of interval 1)
2201    auto contains_1 = arr_right.contains(1.0);
2202    // (0, 1] contains 1: yes, (1, 2] contains 1: no (open on left), (2, 3] contains 1: no
2203    if (contains_1[0].value_or(false) != true ||
2204        contains_1[1].value_or(true) != false ||
2205        contains_1[2].value_or(true) != false) {
2206        std::cout << "[FAIL] : in test_contains() : right-closed contains 1.0" << std::endl;
2207        return;
2208    }
2209
2210    // Left-closed intervals: [0, 1), [1, 2), [2, 3)
delete_ (pd_test_1_all.cpp:10501)
10491    std::cout << " -> tests passed" << std::endl;
10492}
10493
10494void pd_test_extension_index_delete() {
10495    std::cout << "========= delete_ =========================";
10496
10497    pandas::CategoricalArray arr({"a", "b", "c", "d"});
10498    pandas::CategoricalIndex idx(arr);
10499
10500    auto deleted = idx.delete_(1);
10501    auto v0 = deleted[0];
10502    auto v1 = deleted[1];
10503    auto v2 = deleted[2];
10504
10505    bool passed = (deleted.size() == 3 &&
10506                   v0.has_value() && *v0 == "a" &&
10507                   v1.has_value() && *v1 == "c" &&
10508                   v2.has_value() && *v2 == "d");
10509    if (!passed) {
10510        std::cout << "  [FAIL] : in pd_test_extension_index_delete() : delete_ check failed" << std::endl;
delete_ (pd_test_1_all.cpp:10501)
10491    std::cout << " -> tests passed" << std::endl;
10492}
10493
10494void pd_test_extension_index_delete() {
10495    std::cout << "========= delete_ =========================";
10496
10497    pandas::CategoricalArray arr({"a", "b", "c", "d"});
10498    pandas::CategoricalIndex idx(arr);
10499
10500    auto deleted = idx.delete_(1);
10501    auto v0 = deleted[0];
10502    auto v1 = deleted[1];
10503    auto v2 = deleted[2];
10504
10505    bool passed = (deleted.size() == 3 &&
10506                   v0.has_value() && *v0 == "a" &&
10507                   v1.has_value() && *v1 == "c" &&
10508                   v2.has_value() && *v2 == "d");
10509    if (!passed) {
10510        std::cout << "  [FAIL] : in pd_test_extension_index_delete() : delete_ check failed" << std::endl;
dtypes (pd_test_1_all.cpp:6226)
6216                throw std::runtime_error("pd_test_dataframe_properties failed: nbytes should be > 0");
6217            }
6218
6219            // Test columns index
6220            if (df.columns().size() != 3) {
6221                std::cout << "  [FAIL] : in pd_test_dataframe_properties() : columns size != 3" << std::endl;
6222                throw std::runtime_error("pd_test_dataframe_properties failed: columns size != 3");
6223            }
6224
6225            // Test dtypes
6226            auto dtypes = df.dtypes();
6227            if (dtypes.size() != 3) {
6228                std::cout << "  [FAIL] : in pd_test_dataframe_properties() : dtypes size != 3" << std::endl;
6229                throw std::runtime_error("pd_test_dataframe_properties failed: dtypes size != 3");
6230            }
6231
6232            std::cout << " -> tests passed" << std::endl;
6233        }
6234
6235        // =====================================================================
6236        // Test: Column Access
empty (pd_test_1_all.cpp:941)
931#include "../pandas/pd_config.h"
932
933namespace dataframe_tests {
934
935namespace dataframe_tests_config {
936
937    void pd_test_config_version() {
938        std::cout << "========= df_config: version info ======================= ";
939        const char* version = pandas::DataFrameInfo::version();
940        if (version == nullptr || std::string(version).empty()) {
941            std::cout << "[FAIL] : in pd_test_config_version() : version is null or empty" << std::endl;
942            throw std::runtime_error("pd_test_config_version failed: version is null or empty");
943        }
944        std::cout << "-> tests passed" << std::endl;
945    }
946
947    void pd_test_config_na_repr() {
948        std::cout << "========= df_config: NA representation ======================= ";
949        const char* na_repr = pandas::DataFrameConfig::get_na_repr();
950        if (na_repr == nullptr) {
factorize (pd_test_1_all.cpp:1353)
1343        // unique
1344        auto uniq = arr.unique();
1345        // Should have: NaT, 2023-01-01, 2023-06-15 (3 unique values)
1346        if (uniq.size() != 3) {
1347            std::cout << "  [FAIL] : unique size should be 3, got " << uniq.size() << std::endl;
1348            throw std::runtime_error("pd_test_datetime_array_unique failed: size");
1349        }
1350
1351        // factorize
1352        auto [codes, uniques] = arr.factorize();
1353        // Codes for NaT should be -1
1354        if (codes.getElementAt({3}) != -1) {
1355            std::cout << "  [FAIL] : factorize: NaT code should be -1" << std::endl;
1356            throw std::runtime_error("pd_test_datetime_array_unique failed: NaT code");
1357        }
1358        // Same values should have same codes
1359        if (codes.getElementAt({0}) != codes.getElementAt({2})) {
1360            std::cout << "  [FAIL] : factorize: 2023-01-01 values should have same code" << std::endl;
1361            throw std::runtime_error("pd_test_datetime_array_unique failed: same code");
1362        }
format (main.cpp:20)
10int main() {
11  // Automatically log all output to temp/pd_test_output.log
12  numpy::TestLogger logger("temp/pd_test_output.log");
13
14  int res = 0;
15  int res1 = 0;
16  std::string resS = "";
17
18  // call all the tests
19  res1 = dataframe_tests::pd_test_main();
20  resS += std::format("             pd_test_main: {}  errors\n", res1);
21  res += res1;
22
23  std::cout << "\n------------------------- main --------------------------------------------\n";
24  std::cout << std::endl << "All tests completed. Nb errors = " << res << std::endl;
25  std::cout << "Details: \n" << resS;
26  std::cout << "\n---------------------------------------------------------------------------\n";
27  return res;
28}
has_duplicates (pd_test_1_all.cpp:10176)
10166    std::cout << " -> tests passed" << std::endl;
10167}
10168
10169void pd_test_extension_index_uniqueness() {
10170    std::cout << "========= uniqueness =========================";
10171
10172    // Unique values
10173    pandas::CategoricalArray arr1({"a", "b", "c"});
10174    pandas::CategoricalIndex idx1(arr1);
10175
10176    bool passed1 = (idx1.is_unique() && !idx1.has_duplicates());
10177    if (!passed1) {
10178        std::cout << "  [FAIL] : in pd_test_extension_index_uniqueness() : unique check failed" << std::endl;
10179        throw std::runtime_error("pd_test_extension_index_uniqueness failed");
10180    }
10181
10182    // With duplicates
10183    pandas::CategoricalArray arr2({"a", "b", "a", "c"});
10184    pandas::CategoricalIndex idx2(arr2);
10185
10186    bool passed2 = (!idx2.is_unique() && idx2.has_duplicates());
holds_integer (pd_test_3_all.cpp:3311)
3301    }
3302    if (idx.is_interval()) {
3303        throw std::runtime_error("is_interval() should be false");
3304    }
3305    if (idx.is_numeric()) {
3306        throw std::runtime_error("is_numeric() should be false");
3307    }
3308    if (idx.is_object()) {
3309        throw std::runtime_error("is_object() should be false");
3310    }
3311    if (idx.holds_integer()) {
3312        throw std::runtime_error("holds_integer() should be false");
3313    }
3314
3315    std::cout << " -> tests passed" << std::endl;
3316}
3317
3318void pd_test_3_all_datetime_index_sort() {
3319    std::cout << "========= DatetimeIndex.sort_values() ====================";
3320
3321    pandas::DatetimeIndex idx = pandas::date_range("2024-01-01", "2024-01-05", std::nullopt, "D");
identical (pd_test_1_all.cpp:5883)
5873}
5874
5875void pd_test_categorical_index_identical() {
5876    std::cout << "========= identical ===================================";
5877
5878    pandas::CategoricalArray arr({"a", "b"});
5879    pandas::CategoricalIndex idx1(arr, "same_name");
5880    pandas::CategoricalIndex idx2(arr, "same_name");
5881    pandas::CategoricalIndex idx3(arr, "diff_name");
5882
5883    bool passed = (idx1.identical(idx2) && !idx1.identical(idx3));
5884    if (!passed) {
5885        std::cout << "  [FAIL] : in pd_test_categorical_index_identical()" << std::endl;
5886        throw std::runtime_error("pd_test_categorical_index_identical failed");
5887    }
5888
5889    std::cout << " -> tests passed" << std::endl;
5890}
5891
5892// ============================================================================
5893// Inherited Operations Tests
item (pd_test_3_all.cpp:3712)
3702    // Test is_interval (always false for base Index)
3703    if (int_idx.is_interval()) {
3704        throw std::runtime_error("base Index should not be interval");
3705    }
3706
3707    std::cout << " -> tests passed" << std::endl;
3708}
3709
3710void pd_test_3_all_index_item() {
3711    std::cout << "========= Index.item() =============================";
3712
3713    pandas::Index<numpy::int64> idx1({42});
3714    numpy::int64 val = idx1.item();
3715
3716    if (val != 42) {
3717        throw std::runtime_error("item() should return 42");
3718    }
3719
3720    // Test error for size != 1
3721    pandas::Index<numpy::int64> idx2({1, 2, 3});
memory_usage (pd_test_1_all.cpp:27063)
27053        }
27054
27055        std::cout << "====================================== [OK] pd_test_value_counts test suite ========================== " << std::endl;
27056        return 0;
27057    }
27058
27059} // namespace dataframe_tests
27060// ------------------- pd_test_value_counts.cpp (end) -----------------------------
27061
27062// ------------------- pd_test_memory_usage.cpp (start) -----------------------------
27063// Tests for DataFrame.memory_usage() - pandas-compatible memory usage reporting
27064
27065namespace dataframe_tests {
27066    namespace dataframe_tests_memory_usage {
27067
27068        void pd_test_memory_usage_basic() {
27069            std::cout << "========= basic memory_usage =======================";
27070
27071            // Create a simple DataFrame with multiple columns
27072            std::map<std::string, std::vector<double>> data;
27073            data["A"] = {1.0, 2.0, 3.0, 4.0, 5.0};
names (pd_test_1_all.cpp:11251)
11241            pandas::DataFrame df(data);
11242
11243            // apply axis=0 applies function to each column
11244            auto result = df.apply([](const std::vector<double>& col) {
11245                return std::accumulate(col.begin(), col.end(), 0.0);
11246            }, 0);
11247
11248            bool passed = true;
11249
11250            // Plan F·dtype: axis=0 reduce now returns a single "result" column
11251            // with the original column names ("A", "B") as the row index.
11252            // Sum of A: 1+2+3=6, Sum of B: 4+5+6=15
11253            const auto& result_col = result["result"];
11254            double sum_a = std::stod(result_col.get_value_str(0));
11255            double sum_b = std::stod(result_col.get_value_str(1));
11256
11257            if (!approx_equal(sum_a, 6.0)) {
11258                passed = false;
11259                std::cout << "  [FAIL] : in pd_test_func_apply_dataframe_apply_axis0() : sum A = " << sum_a << std::endl;
11260                throw std::runtime_error("pd_test_func_apply_dataframe_apply_axis0 failed: sum A");
11261            }
nlevels (pd_test_1_all.cpp:14138)
14128        // =====================================================================
14129        // Constructor Tests
14130        // =====================================================================
14131
14132        void pd_test_multiindex_default_constructor() {
14133            std::cout << "========= default constructor ========================= ";
14134
14135            pandas::MultiIndex mi;
14136
14137            bool passed = (mi.nlevels() == 0) && (mi.size() == 0) && mi.empty();
14138
14139            if (!passed) {
14140                std::cout << "  [FAIL] : in pd_test_multiindex_default_constructor()" << std::endl;
14141                throw std::runtime_error("pd_test_multiindex_default_constructor failed");
14142            }
14143
14144            std::cout << "-> tests passed" << std::endl;
14145        }
14146
14147        void pd_test_multiindex_from_arrays() {
pairs (pd_test_5_all.cpp:115042)
115032        run_mixed_pair<numpy::uint16, numpy::int16>  ("mixedT.uint16_PLUS_int16",{numpy::uint16(1),numpy::uint16(2)},{numpy::int16(3),numpy::int16(4)},                total_fail, find_id(119));
115033        run_mixed_pair<numpy::int32,  numpy::uint32> ("mixedT.int32_PLUS_uint32",{numpy::int32(1),numpy::int32(2)},{numpy::uint32(3),numpy::uint32(4)},                total_fail, find_id(120));
115034        run_mixed_pair<numpy::uint32, numpy::int32>  ("mixedT.uint32_PLUS_int32",{numpy::uint32(1),numpy::uint32(2)},{numpy::int32(3),numpy::int32(4)},                total_fail, find_id(121));
115035        run_mixed_pair<numpy::int64,  numpy::uint64> ("mixedT.int64_PLUS_uint64",{numpy::int64(1),numpy::int64(2)},{numpy::uint64(3),numpy::uint64(4)},                total_fail, find_id(122));
115036        run_mixed_pair<numpy::uint64, numpy::int64>  ("mixedT.uint64_PLUS_int64",{numpy::uint64(1),numpy::uint64(2)},{numpy::int64(3),numpy::int64(4)},                total_fail, find_id(123));
115037        run_mixed_pair<numpy::int32,  numpy::uint8>  ("mixedT.int32_PLUS_uint8", {numpy::int32(1),numpy::int32(2)},{numpy::uint8(3),numpy::uint8(4)},                  total_fail, find_id(124));
115038        run_mixed_pair<numpy::uint8,  numpy::int32>  ("mixedT.uint8_PLUS_int32", {numpy::uint8(1),numpy::uint8(2)},{numpy::int32(3),numpy::int32(4)},                  total_fail, find_id(125));
115039        run_mixed_pair<numpy::int64,  numpy::uint32> ("mixedT.int64_PLUS_uint32",{numpy::int64(1),numpy::int64(2)},{numpy::uint32(3),numpy::uint32(4)},                total_fail, find_id(126));
115040        run_mixed_pair<numpy::uint32, numpy::int64>  ("mixedT.uint32_PLUS_int64",{numpy::uint32(1),numpy::uint32(2)},{numpy::int64(3),numpy::int64(4)},                total_fail, find_id(127));
115041
115042        // cross int/float pairs (beyond int64/float64 already in base)
115043        run_mixed_pair<numpy::int8,   numpy::float32>("mixedT.int8_PLUS_float32", {numpy::int8(1),numpy::int8(2)},{3.0f,4.0f},          total_fail, find_id(128));
115044        run_mixed_pair<numpy::float32,numpy::int8>   ("mixedT.float32_PLUS_int8", {1.0f,2.0f},{numpy::int8(3),numpy::int8(4)},          total_fail, find_id(129));
115045        run_mixed_pair<numpy::int16,  numpy::float64>("mixedT.int16_PLUS_float64",{numpy::int16(1),numpy::int16(2)},{3.0,4.0},          total_fail, find_id(130));
115046        run_mixed_pair<numpy::float64,numpy::int16>  ("mixedT.float64_PLUS_int16",{1.0,2.0},{numpy::int16(3),numpy::int16(4)},          total_fail, find_id(131));
115047        run_mixed_pair<numpy::int32,  numpy::float32>("mixedT.int32_PLUS_float32",{numpy::int32(1),numpy::int32(2)},{3.0f,4.0f},        total_fail, find_id(132));
115048        run_mixed_pair<numpy::float32,numpy::int32>  ("mixedT.float32_PLUS_int32",{1.0f,2.0f},{numpy::int32(3),numpy::int32(4)},        total_fail, find_id(133));
115049        run_mixed_pair<numpy::uint16, numpy::float64>("mixedT.uint16_PLUS_float64",{numpy::uint16(1),numpy::uint16(2)},{3.0,4.0},       total_fail, find_id(134));
115050        run_mixed_pair<numpy::float64,numpy::uint16> ("mixedT.float64_PLUS_uint16",{1.0,2.0},{numpy::uint16(3),numpy::uint16(4)},       total_fail, find_id(135));
115051
115052        // same-T baseline for every remaining numeric dtype
putmask (pd_test_3_all.cpp:3752)
3742    // Should be at least sizeof index + 5 * sizeof(int64)
3743    if (usage < 5 * sizeof(numpy::int64)) {
3744        throw std::runtime_error("memory_usage too small");
3745    }
3746
3747    std::cout << " -> tests passed" << std::endl;
3748}
3749
3750void pd_test_3_all_index_putmask() {
3751    std::cout << "========= Index.putmask() ==========================";
3752
3753    pandas::Index<numpy::int64> idx({1, 2, 3, 4, 5});
3754    numpy::NDArray<numpy::bool_> mask(std::vector<size_t>{5});
3755    mask.setElementAt({0}, numpy::bool_(true));
3756    mask.setElementAt({1}, numpy::bool_(false));
3757    mask.setElementAt({2}, numpy::bool_(true));
3758    mask.setElementAt({3}, numpy::bool_(false));
3759    mask.setElementAt({4}, numpy::bool_(true));
3760
3761    auto result = idx.putmask(mask, numpy::int64(99));
ravel (pd_test_3_all.cpp:2147)
2137        throw std::runtime_error("memory_usage shallow too small");
2138    }
2139    if (deep < shallow) {
2140        throw std::runtime_error("memory_usage deep should be >= shallow");
2141    }
2142
2143    std::cout << " -> tests passed" << std::endl;
2144}
2145
2146void pd_test_3_all_categorical_ravel_view() {
2147    std::cout << "========= CategoricalArray.ravel()/view() =============";
2148
2149    std::vector<std::optional<std::string>> values = {"a", "b", "c"};
2150    pandas::CategoricalArray arr(values);
2151
2152    auto raveled = arr.ravel();
2153    if (raveled.size() != 3 || !raveled.equals(arr)) {
2154        throw std::runtime_error("ravel failed");
2155    }
2156
2157    auto viewed = arr.view();
remove_unused_levels (pd_test_3_all.cpp:772)
762    }
763
764    std::cout << " -> tests passed" << std::endl;
765}
766
767// ============================================================================
768// Category 7: MultiIndex Operations
769// ============================================================================
770
771void pd_test_3_all_multiindex_remove_unused() {
772    std::cout << "========= MultiIndex.remove_unused_levels() ==========";
773
774    // Create a MultiIndex with some unused level values
775    std::vector<std::unique_ptr<pandas::IndexBase>> levels;
776    levels.push_back(std::make_unique<pandas::Index<std::string>>(
777        std::vector<std::string>{"a", "b", "c", "d"}));  // "c" and "d" will be unused
778    levels.push_back(std::make_unique<pandas::Index<std::string>>(
779        std::vector<std::string>{"x", "y", "z"}));  // "z" will be unused
780
781    // Codes only reference a, b (indices 0, 1) and x, y (indices 0, 1)
782    numpy::NDArray<numpy::int64> codes0(std::vector<size_t>{4});
repeat (pd_test_3_all.cpp:2166)
2156    auto viewed = arr.view();
2157    if (viewed.size() != 3 || !viewed.equals(arr)) {
2158        throw std::runtime_error("view failed");
2159    }
2160
2161    std::cout << " -> tests passed" << std::endl;
2162}
2163
2164void pd_test_3_all_categorical_repeat() {
2165    std::cout << "========= CategoricalArray.repeat() ===================";
2166
2167    std::vector<std::optional<std::string>> values = {"a", "b"};
2168    pandas::CategoricalArray arr(values);
2169
2170    auto result = arr.repeat(3);
2171    if (result.size() != 6 || *result[0] != "a" || *result[2] != "a" ||
2172        *result[3] != "b" || *result[5] != "b") {
2173        throw std::runtime_error("repeat scalar failed");
2174    }
repr (pd_test_1_all.cpp:10906)
10896    std::cout << " -> tests passed" << std::endl;
10897}
10898
10899void pd_test_extension_index_repr() {
10900    std::cout << "========= repr =========================";
10901
10902    pandas::CategoricalArray arr({"a", "b", "c"});
10903    // Use ExtensionIndex<CategoricalArray> directly to test base class repr
10904    pandas::ExtensionIndex<pandas::CategoricalArray> idx(arr, "test");
10905
10906    std::string repr_str = idx.repr();
10907
10908    bool passed = (!repr_str.empty() && repr_str.find("ExtensionIndex") != std::string::npos);
10909    if (!passed) {
10910        std::cout << "  [FAIL] : in pd_test_extension_index_repr() : repr check failed" << std::endl;
10911        throw std::runtime_error("pd_test_extension_index_repr failed");
10912    }
10913
10914    std::cout << " -> tests passed" << std::endl;
10915}
result (pd_test_1_all.cpp:15406)
15396    data.setElementAt({0}, numpy::datetime64(100LL, numpy::DateTimeUnit::Nanosecond));
15397    data.setElementAt({1}, numpy::datetime64(200LL, numpy::DateTimeUnit::Nanosecond));
15398
15399    numpy::NDArray<numpy::bool_> mask(std::vector<size_t>{2});
15400    mask.setElementAt({0}, numpy::bool_(false));
15401    mask.setElementAt({1}, numpy::bool_(false));
15402
15403    pandas::DatetimeArray arr(data, mask);
15404    pandas::DatetimeIndexBase idx(arr, "original");
15405
15406    // Create join result (int64 values)
15407    numpy::NDArray<numpy::int64> join_result(std::vector<size_t>{3});
15408    join_result.setElementAt({0}, numpy::int64(500LL));
15409    join_result.setElementAt({1}, numpy::int64(600LL));
15410    join_result.setElementAt({2}, numpy::int64(700LL));
15411
15412    auto new_idx = idx._from_join_target(join_result);
15413
15414    bool passed = (new_idx.size() == 3 &&
15415                   new_idx.name().has_value() && *new_idx.name() == "original");
15416    if (!passed) {
result (pd_test_1_all.cpp:15406)
15396    data.setElementAt({0}, numpy::datetime64(100LL, numpy::DateTimeUnit::Nanosecond));
15397    data.setElementAt({1}, numpy::datetime64(200LL, numpy::DateTimeUnit::Nanosecond));
15398
15399    numpy::NDArray<numpy::bool_> mask(std::vector<size_t>{2});
15400    mask.setElementAt({0}, numpy::bool_(false));
15401    mask.setElementAt({1}, numpy::bool_(false));
15402
15403    pandas::DatetimeArray arr(data, mask);
15404    pandas::DatetimeIndexBase idx(arr, "original");
15405
15406    // Create join result (int64 values)
15407    numpy::NDArray<numpy::int64> join_result(std::vector<size_t>{3});
15408    join_result.setElementAt({0}, numpy::int64(500LL));
15409    join_result.setElementAt({1}, numpy::int64(600LL));
15410    join_result.setElementAt({2}, numpy::int64(700LL));
15411
15412    auto new_idx = idx._from_join_target(join_result);
15413
15414    bool passed = (new_idx.size() == 3 &&
15415                   new_idx.name().has_value() && *new_idx.name() == "original");
15416    if (!passed) {
round (pd_test_1_all.cpp:1688)
1678    void pd_test_floating_array_rounding() {
1679        std::cout << "========= FloatingArray: rounding ======================= ";
1680
1681        pandas::FloatingArray<double> arr({
1682            std::optional<double>(1.234),
1683            std::optional<double>(2.567),
1684            std::nullopt
1685        });
1686
1687        auto rounded = arr.round(2);
1688        if (std::abs(rounded[0].value() - 1.23) > 0.001 ||
1689            std::abs(rounded[1].value() - 2.57) > 0.001) {
1690            std::cout << "  [FAIL] : in pd_test_floating_array_rounding() : round(2)" << std::endl;
1691            throw std::runtime_error("pd_test_floating_array_rounding failed: round(2)");
1692        }
1693
1694        if (!rounded.is_na(2)) {
1695            std::cout << "  [FAIL] : in pd_test_floating_array_rounding() : round should preserve NA" << std::endl;
1696            throw std::runtime_error("pd_test_floating_array_rounding failed: NA preservation");
1697        }
set_codes (pd_test_3_all.cpp:9077)
9067    if (locs.getSize() != 2) {
9068        std::cout << "  [FAIL] : in pd_test_3_all_multiindex_get_locs() : expected 2 locations" << std::endl;
9069        throw std::runtime_error("pd_test_3_all_multiindex_get_locs failed: size");
9070    }
9071
9072    std::cout << " -> tests passed" << std::endl;
9073}
9074
9075void pd_test_3_all_multiindex_set_codes() {
9076    std::cout << "========= MultiIndex.set_codes() ==================";
9077
9078    std::vector<std::vector<std::string>> arrays = {
9079        {"a", "a", "b", "b"},
9080        {"1", "2", "1", "2"}
9081    };
9082    pandas::MultiIndex mi = pandas::MultiIndex::from_arrays<std::string>(arrays);
9083
9084    // Create new codes
9085    numpy::NDArray<numpy::int64> new_code0({4});
9086    new_code0.setElementAt({0}, 1);  // b
set_levels (pd_test_3_all.cpp:9104)
9094    if (mi2.size() != 4) {
9095        std::cout << "  [FAIL] : in pd_test_3_all_multiindex_set_codes() : size changed" << std::endl;
9096        throw std::runtime_error("pd_test_3_all_multiindex_set_codes failed");
9097    }
9098
9099    std::cout << " -> tests passed" << std::endl;
9100}
9101
9102void pd_test_3_all_multiindex_set_levels() {
9103    std::cout << "========= MultiIndex.set_levels() =================";
9104
9105    std::vector<std::vector<std::string>> arrays = {
9106        {"a", "a", "b", "b"},
9107        {"1", "2", "1", "2"}
9108    };
9109    pandas::MultiIndex mi = pandas::MultiIndex::from_arrays<std::string>(arrays);
9110
9111    // Set new levels for level 0
9112    std::vector<std::vector<std::string>> new_levels = {{"X", "Y"}};
9113    pandas::MultiIndex mi2 = mi.set_levels(new_levels, 0);
size (pd_test_1_all.cpp:22)
12#include "../pandas/pd_boolean_array.h"
13
14namespace dataframe_tests {
15
16namespace dataframe_tests_boolean_array {
17    void pd_test_boolean_array_constructors() {
18        std::cout << "========= BooleanArray: constructors ======================= ";
19
20        // Default constructor
21        pandas::BooleanArray arr1;
22        if (arr1.size() != 0) {
23            std::cout << "  [FAIL] : in pd_test_boolean_array_constructors() : default constructor size != 0" << std::endl;
24            throw std::runtime_error("pd_test_boolean_array_constructors failed: default constructor size != 0");
25        }
26
27        // Initializer list constructor
28        pandas::BooleanArray arr2({
29            std::optional<bool>(true),
30            std::optional<bool>(false),
31            std::nullopt,
32            std::optional<bool>(true)
slice_indexer (pd_test_3_all.cpp:711)
701    }
702
703    std::cout << " -> tests passed" << std::endl;
704}
705
706// ============================================================================
707// Category 6: Index Indexer Methods
708// ============================================================================
709
710void pd_test_3_all_index_indexers() {
711    std::cout << "========= Index.get_indexer_for/non_unique/slice_indexer() ";
712
713    std::vector<std::string> vals = {"a", "b", "c", "d", "e"};
714    pandas::Index<std::string> idx(vals);
715
716    // Test get_indexer_for()
717    std::vector<std::string> target = {"b", "d", "f"};  // "f" doesn't exist
718    numpy::NDArray<numpy::int64> indexer = idx.get_indexer_for(target);
719    if (indexer.getSize() != 3) {
720        std::cout << "  [FAIL] : in pd_test_3_all_index_indexers() : get_indexer_for size mismatch" << std::endl;
721        throw std::runtime_error("pd_test_3_all_index_indexers failed: get_indexer_for size");
slice_locs (pd_test_1_all.cpp:18275)
18265        }
18266
18267        std::cout << "-> tests passed" << std::endl;
18268    }
18269
18270    void pd_test_range_index_slice_locs() {
18271        std::cout << "========= slice_locs ================================== ";
18272
18273        pandas::RangeIndex ri(0, 10);  // [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
18274
18275        auto [start_idx, stop_idx] = ri.slice_locs(3, 7);
18276
18277        bool passed = (start_idx == 3 && stop_idx == 8);
18278
18279        if (!passed) {
18280            std::cout << "  [FAIL] : slice_locs" << std::endl;
18281            throw std::runtime_error("pd_test_range_index_slice_locs failed");
18282        }
18283
18284        std::cout << "-> tests passed" << std::endl;
18285    }
sort (pd_test_3_all.cpp:3869)
3859        throw std::runtime_error("last 2 positions should be NaN");
3860    }
3861    if (std::abs(result[0] - 3.0) > 0.001) {
3862        throw std::runtime_error("shift(-2) [0] should be 3.0");
3863    }
3864
3865    std::cout << " -> tests passed" << std::endl;
3866}
3867
3868void pd_test_3_all_index_sort() {
3869    std::cout << "========= Index.sort() =============================";
3870
3871    pandas::Index<numpy::int64> idx({3, 1, 4, 1, 5, 9, 2, 6});
3872    auto result = idx.sort();
3873
3874    if (result[0] != 1 || result[1] != 1 || result[7] != 9) {
3875        throw std::runtime_error("sort() not working correctly");
3876    }
3877
3878    // Test descending
3879    result = idx.sort(false);
sortlevel (pd_test_1_all.cpp:14676)
14666        void pd_test_multiindex_sortlevel() {
14667            std::cout << "========= sortlevel =================================== ";
14668
14669            std::vector<std::vector<std::string>> arrays = {
14670                {"b", "a", "c"},
14671                {"2", "1", "3"}
14672            };
14673
14674            pandas::MultiIndex mi = pandas::MultiIndex::from_arrays<std::string>(arrays);
14675            auto [sorted, indices] = mi.sortlevel(0);
14676
14677            bool passed = true;
14678
14679            // After sorting by level 0: a, b, c
14680            if (sorted[0][0] != "a" || sorted[1][0] != "b" || sorted[2][0] != "c") {
14681                std::cout << "  [FAIL] : not sorted correctly by level 0" << std::endl;
14682                passed = false;
14683            }
14684
14685            if (!passed) {
str (pd_test_1_all.cpp:7137)
7127            // Test basic info() with stringstream
7128            std::map<std::string, std::vector<int>> data = {
7129                {"A", {1, 2, 3, 4, 5}},
7130                {"B", {10, 20, 30, 40, 50}},
7131                {"C", {100, 200, 300, 400, 500}}
7132            };
7133            pandas::DataFrame df(data);
7134
7135            std::ostringstream oss;
7136            df.info(oss);
7137            std::string output = oss.str();
7138
7139            // Verify key components
7140            if (output.find("<class 'pandas.core.frame.DataFrame'>") == std::string::npos) {
7141                std::cout << "  [FAIL] : info missing class name" << std::endl;
7142                throw std::runtime_error("pd_test_dataframe_info failed: missing class name");
7143            }
7144            if (output.find("RangeIndex:") == std::string::npos) {
7145                std::cout << "  [FAIL] : info missing RangeIndex" << std::endl;
7146                throw std::runtime_error("pd_test_dataframe_info failed: missing RangeIndex");
7147            }
truncate (pd_test_1_all.cpp:20467)
20457            std::vector<std::string> dates = {
20458                "2020-01-01",
20459                "2020-01-02",
20460                "2020-01-03",
20461                "2020-01-04",
20462                "2020-01-05"
20463            };
20464            df.set_index(std::make_unique<pandas::Index<std::string>>(dates));
20465
20466            // Truncate to keep only dates from 2020-01-02 to 2020-01-04
20467            pandas::DataFrame result = df.truncate("2020-01-02", "2020-01-04");
20468
20469            bool passed = (result.nrows() == 3);
20470
20471            if (!passed) {
20472                std::cout << "  [FAIL] : in pd_test_timeseries_truncate() : expected 3 rows, got "
20473                          << result.nrows() << std::endl;
20474                throw std::runtime_error("pd_test_timeseries_truncate failed");
20475            }
20476
20477            std::cout << " -> tests passed" << std::endl;