ExtensionIndex#
-
class pandas::ExtensionIndex#
Index class for axis labels in pandas data structures.
Example#
#include <pandas/pandas.h>
using namespace pandas;
// Create ExtensionIndex
ExtensionIndex<int64_t> idx({1, 2, 3}, "my_index");
size_t len = idx.size();
Constructors#
Signature |
Location |
Example |
|---|---|---|
|
pd_extension_index.h:350 |
|
|
pd_extension_index.h:361 |
|
|
pd_extension_index.h:371 |
|
|
pd_extension_index.h:385 |
Indexing / Selection#
Signature |
Return Type |
Location |
Example |
|---|---|---|---|
|
numpy::NDArray<numpy::int64> |
pd_extension_index.h:709 |
|
|
numpy::NDArray<numpy::int64> |
pd_extension_index.h:747 |
|
|
std::variant<size_t, std::vector<size_t>> |
pd_extension_index.h:687 |
|
|
int64_t |
pd_extension_index.h:525 |
|
|
std::optional<size_t> |
pd_extension_index.h:547 |
|
|
std::string |
pd_extension_index.h:572 |
|
|
oss << |
pd_extension_index.h:607 |
|
|
ExtensionIndex |
pd_extension_index.h:838 |
|
|
ExtensionIndex |
pd_extension_index.h:1569 |
Data Manipulation#
Signature |
Return Type |
Location |
Example |
|---|---|---|---|
|
ExtensionIndex |
pd_extension_index.h:895 |
|
|
ExtensionIndex |
pd_extension_index.h:1020 |
|
|
ExtensionIndex |
pd_extension_index.h:822 |
|
|
ExtensionIndex |
pd_extension_index.h:982 |
Missing Data#
Signature |
Return Type |
Location |
Example |
|---|---|---|---|
|
ExtensionIndex |
pd_extension_index.h:812 |
|
|
numpy::NDArray<numpy::bool_> |
pd_extension_index.h:774 |
|
|
numpy::NDArray<numpy::bool_> |
pd_extension_index.h:781 |
|
|
numpy::NDArray<numpy::bool_> |
pd_extension_index.h:788 |
|
|
numpy::NDArray<numpy::bool_> |
pd_extension_index.h:795 |
Statistics#
Signature |
Return Type |
Location |
Example |
|---|---|---|---|
|
size_t |
pd_extension_index.h:1128 |
|
|
std::pair<std::vector<value_type>, std::vector<int64_t>> |
pd_extension_index.h:1142 |
Comparison#
Signature |
Return Type |
Location |
Example |
|---|---|---|---|
|
bool |
pd_extension_index.h:1489 |
|
|
ArrayType |
pd_extension_index.h:1587 |
Sorting#
Signature |
Return Type |
Location |
Example |
|---|---|---|---|
|
numpy::NDArray<numpy::int64> |
pd_extension_index.h:1385 |
|
|
ExtensionIndex |
pd_extension_index.h:1428 |
Combining#
Signature |
Return Type |
Location |
Example |
|---|---|---|---|
|
ExtensionIndex |
pd_extension_index.h:1210 |
Time Series#
Signature |
Return Type |
Location |
Example |
|---|---|---|---|
|
ExtensionIndex |
pd_extension_index.h:1311 |
I/O#
Signature |
Return Type |
Location |
Example |
|---|---|---|---|
|
std::vector<std::optional<value_type>> |
pd_extension_index.h:658 |
|
|
std::string |
pd_extension_index.h:600 |
|
|
std::vector<std::string> |
pd_extension_index.h:560 |
Conversion#
Signature |
Return Type |
Location |
Example |
|---|---|---|---|
|
ExtensionIndex |
pd_extension_index.h:975 |
Set Operations#
Signature |
Return Type |
Location |
Example |
|---|---|---|---|
|
numpy::NDArray<numpy::bool_> |
pd_extension_index.h:1069 |
|
|
ExtensionIndex |
pd_extension_index.h:1218 |
|
|
numpy::NDArray<numpy::bool_> |
pd_extension_index.h:870 |
|
|
ExtensionIndex |
pd_extension_index.h:1339 |
|
|
ExtensionIndex |
pd_extension_index.h:1251 |
|
|
ExtensionIndex |
pd_extension_index.h:992 |
Type Checking#
Other Methods#
Signature |
Return Type |
Location |
Example |
|---|---|---|---|
|
const ArrayType& |
pd_extension_index.h:651 |
|
|
void |
pd_extension_index.h:209 |
|
|
size_t |
pd_extension_index.h:1548 |
|
|
void |
pd_extension_index.h:1526 |
|
|
std::unique_ptr<IndexBase> |
pd_extension_index.h:589 |
|
|
void |
pd_extension_index.h:295 |
|
|
bool |
pd_extension_index.h:676 |
|
|
bool |
pd_extension_index.h:513 |
|
|
value_type |
pd_extension_index.h:236 |
|
|
std::string |
pd_extension_index.h:274 |
|
|
ExtensionIndex |
pd_extension_index.h:937 |
|
|
ExtensionIndex |
pd_extension_index.h:955 |
|
|
std::string |
pd_extension_index.h:437 |
|
|
bool |
pd_extension_index.h:423 |
|
|
void |
pd_extension_index.h:227 |
|
|
std::pair<numpy::NDArray<numpy::int64>, ExtensionIndex> |
pd_extension_index.h:1169 |
|
|
bool |
pd_extension_index.h:1535 |
|
|
bool |
pd_extension_index.h:499 |
|
|
bool |
pd_extension_index.h:802 |
|
|
bool |
pd_extension_index.h:1512 |
|
|
std::string |
pd_extension_index.h:458 |
|
|
void |
pd_extension_index.h:196 |
|
|
std::optional<std::string> |
pd_extension_index.h:444 |
|
|
size_t |
pd_extension_index.h:430 |
|
|
std::string |
pd_extension_index.h:626 |
|
|
ExtensionIndex |
pd_extension_index.h:983 |
|
|
void |
pd_extension_index.h:451 |
|
|
size_t |
pd_extension_index.h:416 |
|
|
IndexTypeId |
pd_extension_index.h:593 |
|
|
ArrayType |
pd_extension_index.h:644 |
Code Examples#
The following examples are extracted from the test suite.
get_indexer (pd_test_1_all.cpp:10332)
10322void pd_test_extension_index_get_indexer() {
10323 std::cout << "========= get_indexer =========================";
10324
10325 pandas::CategoricalArray arr1({"a", "b", "c", "d"});
10326 pandas::CategoricalIndex idx1(arr1);
10327
10328 pandas::CategoricalArray arr2({"b", "d", "x"});
10329 pandas::CategoricalIndex idx2(arr2);
10330
10331 auto indexer = idx1.get_indexer(idx2);
10332
10333 bool passed = (indexer.getSize() == 3 &&
10334 indexer.getElementAt({0}) == 1 &&
10335 indexer.getElementAt({1}) == 3 &&
10336 indexer.getElementAt({2}) == -1);
10337 if (!passed) {
10338 std::cout << " [FAIL] : in pd_test_extension_index_get_indexer() : get_indexer check failed" << std::endl;
10339 throw std::runtime_error("pd_test_extension_index_get_indexer failed");
10340 }
get_indexer_for (pd_test_3_all.cpp:716)
706// ============================================================================
707// Category 6: Index Indexer Methods
708// ============================================================================
709
710void pd_test_3_all_index_indexers() {
711 std::cout << "========= Index.get_indexer_for/non_unique/slice_indexer() ";
712
713 std::vector<std::string> vals = {"a", "b", "c", "d", "e"};
714 pandas::Index<std::string> idx(vals);
715
716 // Test get_indexer_for()
717 std::vector<std::string> target = {"b", "d", "f"}; // "f" doesn't exist
718 numpy::NDArray<numpy::int64> indexer = idx.get_indexer_for(target);
719 if (indexer.getSize() != 3) {
720 std::cout << " [FAIL] : in pd_test_3_all_index_indexers() : get_indexer_for size mismatch" << std::endl;
721 throw std::runtime_error("pd_test_3_all_index_indexers failed: get_indexer_for size");
722 }
723 // "b" is at index 1
724 if (indexer.getElementAt({0}) != 1) {
725 std::cout << " [FAIL] : in pd_test_3_all_index_indexers() : 'b' should be at index 1" << std::endl;
726 throw std::runtime_error("pd_test_3_all_index_indexers failed: 'b' index");
get_loc (pd_test_1_all.cpp:10281)
10271 bool passed = (idx.contains("apple") && idx.contains("banana") && !idx.contains("grape"));
10272 if (!passed) {
10273 std::cout << " [FAIL] : in pd_test_extension_index_contains() : contains check failed" << std::endl;
10274 throw std::runtime_error("pd_test_extension_index_contains failed");
10275 }
10276
10277 std::cout << " -> tests passed" << std::endl;
10278}
10279
10280void pd_test_extension_index_get_loc_unique() {
10281 std::cout << "========= get_loc (unique) =========================";
10282
10283 pandas::CategoricalArray arr({"apple", "banana", "cherry"});
10284 pandas::CategoricalIndex idx(arr);
10285
10286 auto loc_apple = idx.get_loc("apple");
10287 auto loc_banana = idx.get_loc("banana");
10288 auto loc_cherry = idx.get_loc("cherry");
10289
10290 bool passed = (std::holds_alternative<size_t>(loc_apple) && std::get<size_t>(loc_apple) == 0 &&
10291 std::get<size_t>(loc_banana) == 1 &&
get_loc_str (pd_test_1_all.cpp:10890)
10880 std::cout << " -> tests passed" << std::endl;
10881}
10882
10883void pd_test_extension_index_contains_str_get_loc_str() {
10884 std::cout << "========= contains_str/get_loc_str =========================";
10885
10886 pandas::CategoricalArray arr({"apple", "banana", "cherry"});
10887 pandas::CategoricalIndex idx(arr);
10888
10889 bool passed = (idx.contains_str("apple") && !idx.contains_str("grape") &&
10890 idx.get_loc_str("banana") == 1 && idx.get_loc_str("grape") == -1);
10891 if (!passed) {
10892 std::cout << " [FAIL] : in pd_test_extension_index_contains_str_get_loc_str() : contains_str/get_loc_str check failed" << std::endl;
10893 throw std::runtime_error("pd_test_extension_index_contains_str_get_loc_str failed");
10894 }
10895
10896 std::cout << " -> tests passed" << std::endl;
10897}
10898
10899void pd_test_extension_index_repr() {
10900 std::cout << "========= repr =========================";
get_loc_string (pd_test_3_all.cpp:28108)
28098 vals.push_back(numpy::timedelta64(ns, numpy::DateTimeUnit::Nanosecond));
28099 }
28100 return pandas::TimedeltaArray(vals);
28101}
28102
28103void pd_test_getitem_timedelta_str_lookup() {
28104 std::cout << " -- pd_test_getitem_timedelta_str_lookup --" << std::endl;
28105 int fail = 0;
28106 auto tda = ge_make_tda({1 * GE_NS_PER_DAY, 2 * GE_NS_PER_DAY, 3 * GE_NS_PER_DAY});
28107 pandas::TimedeltaIndex tdi(tda);
28108 auto pos = tdi.get_loc_string("2 days");
28109 if (!pos.has_value()) { std::cout << " FAIL: '2 days' not found" << std::endl; fail++; }
28110 else if (*pos != 1) { std::cout << " FAIL: expected pos=1, got " << *pos << std::endl; fail++; }
28111 if (fail == 0) std::cout << " OK" << std::endl;
28112 if (fail) throw std::runtime_error("pd_test_getitem_timedelta_str_lookup failed");
28113}
28114
28115void pd_test_getitem_timedelta_str_not_found() {
28116 std::cout << " -- pd_test_getitem_timedelta_str_not_found --" << std::endl;
28117 int fail = 0;
28118 auto tda = ge_make_tda({1 * GE_NS_PER_DAY});
get_value_str (pd_test_1_all.cpp:4665)
4655 auto corr_df = df.corr();
4656
4657 // Check dimensions
4658 bool passed = corr_df.nrows() == 2 && corr_df.ncols() == 2;
4659 if (!passed) {
4660 std::cout << " [FAIL] : in pd_test_aggregation_dataframe_corr() : corr should be 2x2" << std::endl;
4661 throw std::runtime_error("pd_test_aggregation_dataframe_corr failed: corr should be 2x2");
4662 }
4663
4664 // Diagonal should be 1.0
4665 std::string aa = corr_df["A"].get_value_str(0);
4666 passed = std::abs(std::stod(aa) - 1.0) < 0.001;
4667 if (!passed) {
4668 std::cout << " [FAIL] : in pd_test_aggregation_dataframe_corr() : diagonal should be 1.0" << std::endl;
4669 throw std::runtime_error("pd_test_aggregation_dataframe_corr failed: diagonal should be 1.0");
4670 }
4671
4672 // A-B correlation should be 1.0 (perfect correlation)
4673 std::string ab = corr_df["B"].get_value_str(0);
4674 passed = std::abs(std::stod(ab) - 1.0) < 0.001;
4675 if (!passed) {
get_value_str (pd_test_1_all.cpp:4665)
4655 auto corr_df = df.corr();
4656
4657 // Check dimensions
4658 bool passed = corr_df.nrows() == 2 && corr_df.ncols() == 2;
4659 if (!passed) {
4660 std::cout << " [FAIL] : in pd_test_aggregation_dataframe_corr() : corr should be 2x2" << std::endl;
4661 throw std::runtime_error("pd_test_aggregation_dataframe_corr failed: corr should be 2x2");
4662 }
4663
4664 // Diagonal should be 1.0
4665 std::string aa = corr_df["A"].get_value_str(0);
4666 passed = std::abs(std::stod(aa) - 1.0) < 0.001;
4667 if (!passed) {
4668 std::cout << " [FAIL] : in pd_test_aggregation_dataframe_corr() : diagonal should be 1.0" << std::endl;
4669 throw std::runtime_error("pd_test_aggregation_dataframe_corr failed: diagonal should be 1.0");
4670 }
4671
4672 // A-B correlation should be 1.0 (perfect correlation)
4673 std::string ab = corr_df["B"].get_value_str(0);
4674 passed = std::abs(std::stod(ab) - 1.0) < 0.001;
4675 if (!passed) {
take (pd_test_1_all.cpp:5903)
5893// Inherited Operations Tests
5894// ============================================================================
5895
5896void pd_test_categorical_index_take() {
5897 std::cout << "========= inherited take ==============================";
5898
5899 pandas::CategoricalArray arr({"a", "b", "c", "d"});
5900 pandas::CategoricalIndex idx(arr);
5901
5902 std::vector<size_t> indices = {0, 2, 3};
5903 pandas::ExtensionIndex<pandas::CategoricalArray> taken = idx.take(indices);
5904
5905 bool passed = (taken.size() == 3);
5906 if (!passed) {
5907 std::cout << " [FAIL] : in pd_test_categorical_index_take()" << std::endl;
5908 throw std::runtime_error("pd_test_categorical_index_take failed");
5909 }
5910
5911 std::cout << " -> tests passed" << std::endl;
5912}
drop (pd_test_1_all.cpp:6558)
6548 if (df.ncols() != 2) {
6549 std::cout << " [FAIL] : in pd_test_dataframe_manipulation() : pop ncols != 2" << std::endl;
6550 throw std::runtime_error("pd_test_dataframe_manipulation failed: pop ncols != 2");
6551 }
6552 if (!popped) {
6553 std::cout << " [FAIL] : in pd_test_dataframe_manipulation() : popped is null" << std::endl;
6554 throw std::runtime_error("pd_test_dataframe_manipulation failed: popped is null");
6555 }
6556
6557 // Test drop columns
6558 auto dropped = df.drop(std::vector<std::string>{"B"}, 1);
6559 if (dropped.ncols() != 1) {
6560 std::cout << " [FAIL] : in pd_test_dataframe_manipulation() : drop ncols != 1" << std::endl;
6561 throw std::runtime_error("pd_test_dataframe_manipulation failed: drop ncols != 1");
6562 }
6563
6564 // Test rename
6565 auto renamed = df.rename_columns(std::map<std::string, std::string>{{"A", "X"}});
6566 if (!renamed.has_column("X")) {
6567 std::cout << " [FAIL] : in pd_test_dataframe_manipulation() : rename failed" << std::endl;
6568 throw std::runtime_error("pd_test_dataframe_manipulation failed: rename failed");
drop_duplicates (pd_test_1_all.cpp:6639)
6629 }
6630 }
6631
6632 // Test drop_duplicates
6633 {
6634 std::map<std::string, std::vector<numpy::int64>> dup_data;
6635 dup_data["A"] = {1, 1, 2, 2};
6636 dup_data["B"] = {1, 1, 2, 3};
6637 pandas::DataFrame df_dup(dup_data);
6638
6639 auto deduped = df_dup.drop_duplicates();
6640 // Rows 0 and 1 are duplicates (A=1, B=1), so should have 3 rows
6641 if (deduped.nrows() != 3) {
6642 std::cout << " [FAIL] : in pd_test_dataframe_manipulation() : drop_duplicates nrows != 3, got " << deduped.nrows() << std::endl;
6643 throw std::runtime_error("pd_test_dataframe_manipulation failed: drop_duplicates");
6644 }
6645 }
6646
6647 // Test assign
6648 {
6649 std::map<std::string, std::vector<numpy::int64>> assign_data;
dropna (pd_test_1_all.cpp:531)
521 }
522
523 // Test isna array
524 numpy::NDArray<numpy::bool_> na_mask = arr.isna();
525 if (na_mask.getSize() != 4) {
526 std::cout << " [FAIL] : in pd_test_categorical_array_na_handling() : isna size != 4" << std::endl;
527 throw std::runtime_error("pd_test_categorical_array_na_handling failed: isna size != 4");
528 }
529
530 // Test dropna
531 pandas::CategoricalArray dropped = arr.dropna();
532 if (dropped.size() != 2) {
533 std::cout << " [FAIL] : in pd_test_categorical_array_na_handling() : dropna size != 2" << std::endl;
534 throw std::runtime_error("pd_test_categorical_array_na_handling failed: dropna size != 2");
535 }
536
537 // Test fillna (fill with existing category)
538 pandas::CategoricalArray filled = arr.fillna("a"); // 'a' is in categories
539 if (filled.has_na()) {
540 std::cout << " [FAIL] : in pd_test_categorical_array_na_handling() : fillna should have no NA" << std::endl;
541 throw std::runtime_error("pd_test_categorical_array_na_handling failed: fillna should have no NA");
rename (pd_test_1_all.cpp:5816)
5806 std::cout << " -> tests passed" << std::endl;
5807}
5808
5809void pd_test_categorical_index_rename() {
5810 std::cout << "========= rename ======================================";
5811
5812 pandas::CategoricalArray arr({"x", "y"});
5813 pandas::CategoricalIndex idx(arr, "old_name");
5814
5815 pandas::CategoricalIndex renamed = idx.rename("new_name");
5816
5817 bool passed = (renamed.name().has_value() && *renamed.name() == "new_name" &&
5818 renamed.size() == idx.size() && renamed.categories() == idx.categories());
5819 if (!passed) {
5820 std::cout << " [FAIL] : in pd_test_categorical_index_rename()" << std::endl;
5821 throw std::runtime_error("pd_test_categorical_index_rename failed");
5822 }
5823
5824 std::cout << " -> tests passed" << std::endl;
5825}
fillna (pd_test_1_all.cpp:537)
527 throw std::runtime_error("pd_test_categorical_array_na_handling failed: isna size != 4");
528 }
529
530 // Test dropna
531 pandas::CategoricalArray dropped = arr.dropna();
532 if (dropped.size() != 2) {
533 std::cout << " [FAIL] : in pd_test_categorical_array_na_handling() : dropna size != 2" << std::endl;
534 throw std::runtime_error("pd_test_categorical_array_na_handling failed: dropna size != 2");
535 }
536
537 // Test fillna (fill with existing category)
538 pandas::CategoricalArray filled = arr.fillna("a"); // 'a' is in categories
539 if (filled.has_na()) {
540 std::cout << " [FAIL] : in pd_test_categorical_array_na_handling() : fillna should have no NA" << std::endl;
541 throw std::runtime_error("pd_test_categorical_array_na_handling failed: fillna should have no NA");
542 }
543
544 std::cout << " -> tests passed" << std::endl;
545 }
546
547 void pd_test_categorical_array_add_categories() {
isna (pd_test_1_all.cpp:524)
514 throw std::runtime_error("pd_test_categorical_array_na_handling failed: has_na() should be true");
515 }
516
517 // Test count (non-NA)
518 if (arr.count() != 2) {
519 std::cout << " [FAIL] : in pd_test_categorical_array_na_handling() : count() != 2" << std::endl;
520 throw std::runtime_error("pd_test_categorical_array_na_handling failed: count() != 2");
521 }
522
523 // Test isna array
524 numpy::NDArray<numpy::bool_> na_mask = arr.isna();
525 if (na_mask.getSize() != 4) {
526 std::cout << " [FAIL] : in pd_test_categorical_array_na_handling() : isna size != 4" << std::endl;
527 throw std::runtime_error("pd_test_categorical_array_na_handling failed: isna size != 4");
528 }
529
530 // Test dropna
531 pandas::CategoricalArray dropped = arr.dropna();
532 if (dropped.size() != 2) {
533 std::cout << " [FAIL] : in pd_test_categorical_array_na_handling() : dropna size != 2" << std::endl;
534 throw std::runtime_error("pd_test_categorical_array_na_handling failed: dropna size != 2");
isnull (pd_test_3_all.cpp:671)
661// Category 5: Index Null Detection
662// ============================================================================
663
664void pd_test_3_all_index_null_detection() {
665 std::cout << "========= Index.isnull/notnull() =====================";
666
667 // Test with float index (can have NaN)
668 std::vector<double> vals = {1.0, std::nan(""), 3.0, std::nan("")};
669 pandas::Index<double> idx(vals);
670
671 numpy::NDArray<numpy::bool_> isnull_result = idx.isnull();
672 if (isnull_result.getSize() != 4) {
673 std::cout << " [FAIL] : in pd_test_3_all_index_null_detection() : isnull() size mismatch" << std::endl;
674 throw std::runtime_error("pd_test_3_all_index_null_detection failed: isnull() size");
675 }
676 // Index 0: 1.0 -> not null
677 if (isnull_result.getElementAt({0})) {
678 std::cout << " [FAIL] : in pd_test_3_all_index_null_detection() : index 0 should not be null" << std::endl;
679 throw std::runtime_error("pd_test_3_all_index_null_detection failed: index 0");
680 }
681 // Index 1: NaN -> null
notna (pd_test_1_all.cpp:6595)
6585 if (!na_mask.getElementAt({2, 1})) {
6586 std::cout << " [FAIL] : in pd_test_dataframe_manipulation() : isna at (2,1) should be true" << std::endl;
6587 throw std::runtime_error("pd_test_dataframe_manipulation failed: isna at (2,1)");
6588 }
6589 // Row 0, col 0 should NOT be NA
6590 if (na_mask.getElementAt({0, 0})) {
6591 std::cout << " [FAIL] : in pd_test_dataframe_manipulation() : isna at (0,0) should be false" << std::endl;
6592 throw std::runtime_error("pd_test_dataframe_manipulation failed: isna at (0,0)");
6593 }
6594
6595 auto notna_mask = df_na.notna();
6596 if (notna_mask.getElementAt({1, 0})) {
6597 std::cout << " [FAIL] : in pd_test_dataframe_manipulation() : notna at (1,0) should be false" << std::endl;
6598 throw std::runtime_error("pd_test_dataframe_manipulation failed: notna at (1,0)");
6599 }
6600 }
6601
6602 // Test fillna
6603 {
6604 std::map<std::string, std::vector<numpy::float64>> float_data;
6605 float_data["X"] = {1.0, std::nan(""), 3.0};
notnull (pd_test_3_all.cpp:665)
655 }
656
657 std::cout << " -> tests passed" << std::endl;
658}
659
660// ============================================================================
661// Category 5: Index Null Detection
662// ============================================================================
663
664void pd_test_3_all_index_null_detection() {
665 std::cout << "========= Index.isnull/notnull() =====================";
666
667 // Test with float index (can have NaN)
668 std::vector<double> vals = {1.0, std::nan(""), 3.0, std::nan("")};
669 pandas::Index<double> idx(vals);
670
671 numpy::NDArray<numpy::bool_> isnull_result = idx.isnull();
672 if (isnull_result.getSize() != 4) {
673 std::cout << " [FAIL] : in pd_test_3_all_index_null_detection() : isnull() size mismatch" << std::endl;
674 throw std::runtime_error("pd_test_3_all_index_null_detection failed: isnull() size");
675 }
nunique (pd_test_1_all.cpp:10604)
10594 std::cout << " -> tests passed" << std::endl;
10595}
10596
10597void pd_test_extension_index_nunique() {
10598 std::cout << "========= nunique =========================";
10599
10600 pandas::CategoricalArray arr({"a", "b", "a", "c", "b", std::nullopt});
10601 pandas::CategoricalIndex idx(arr);
10602
10603 bool passed = (idx.nunique(true) == 3 && idx.nunique(false) == 4);
10604 if (!passed) {
10605 std::cout << " [FAIL] : in pd_test_extension_index_nunique() : nunique check failed" << std::endl;
10606 throw std::runtime_error("pd_test_extension_index_nunique failed");
10607 }
10608
10609 std::cout << " -> tests passed" << std::endl;
10610}
10611
10612void pd_test_extension_index_factorize() {
10613 std::cout << "========= factorize =========================";
value_counts (pd_test_1_all.cpp:865)
855 std::vector<std::optional<std::string>> values = {
856 std::optional<std::string>("a"),
857 std::optional<std::string>("b"),
858 std::optional<std::string>("a"),
859 std::optional<std::string>("a"),
860 std::optional<std::string>("b"),
861 std::nullopt // NA not counted
862 };
863 pandas::CategoricalArray arr(values);
864
865 auto [cats, counts] = arr.value_counts();
866
867 // Should have 2 categories
868 if (cats.size() != 2 || counts.size() != 2) {
869 std::cout << " [FAIL] : in pd_test_categorical_array_value_counts() : wrong size" << std::endl;
870 throw std::runtime_error("pd_test_categorical_array_value_counts failed: wrong size");
871 }
872
873 // Find 'a' count
874 int64_t a_count = 0, b_count = 0;
875 for (size_t i = 0; i < cats.size(); ++i) {
equals (pd_test_1_all.cpp:5866)
5856 std::cout << "========= equals ======================================";
5857
5858 pandas::CategoricalArray arr1({"a", "b", "a"});
5859 pandas::CategoricalArray arr2({"a", "b", "a"});
5860 pandas::CategoricalArray arr3({"a", "b", "c"});
5861
5862 pandas::CategoricalIndex idx1(arr1);
5863 pandas::CategoricalIndex idx2(arr2);
5864 pandas::CategoricalIndex idx3(arr3);
5865
5866 bool passed = (idx1.equals(idx2) && !idx1.equals(idx3));
5867 if (!passed) {
5868 std::cout << " [FAIL] : in pd_test_categorical_index_equals()" << std::endl;
5869 throw std::runtime_error("pd_test_categorical_index_equals failed");
5870 }
5871
5872 std::cout << " -> tests passed" << std::endl;
5873}
5874
5875void pd_test_categorical_index_identical() {
5876 std::cout << "========= identical ===================================";
argsort (pd_test_1_all.cpp:1304)
1294 std::cout << "========= DatetimeArray: sorting ======================= ";
1295
1296 pandas::DatetimeArray arr(std::vector<std::string>{
1297 "2023-06-15",
1298 "NaT",
1299 "2023-01-01",
1300 "2023-12-31"
1301 });
1302
1303 // argsort ascending
1304 auto indices = arr.argsort(true, "last");
1305 // Expected order: 2023-01-01(2), 2023-06-15(0), 2023-12-31(3), NaT(1)
1306 if (indices.getElementAt({0}) != 2) {
1307 std::cout << " [FAIL] : argsort: first should be index 2 (2023-01-01)" << std::endl;
1308 throw std::runtime_error("pd_test_datetime_array_sorting failed: argsort first");
1309 }
1310 if (indices.getElementAt({3}) != 1) {
1311 std::cout << " [FAIL] : argsort: last should be index 1 (NaT)" << std::endl;
1312 throw std::runtime_error("pd_test_datetime_array_sorting failed: NaT position");
1313 }
sort_values (pd_test_1_all.cpp:6408)
6398 void pd_test_dataframe_sorting() {
6399 std::cout << "========= sorting ==========================";
6400
6401 std::map<std::string, std::vector<numpy::float64>> data;
6402 data["A"] = {3.0, 1.0, 4.0, 1.0, 5.0};
6403 data["B"] = {9.0, 2.0, 6.0, 5.0, 3.0};
6404
6405 pandas::DataFrame df(data);
6406
6407 // Test sort_values ascending
6408 auto sorted_asc = df.sort_values("A", true);
6409 // First value should be smallest (1.0)
6410 std::string first_val = sorted_asc["A"].get_value_str(0);
6411 if (std::stod(first_val) != 1.0) {
6412 std::cout << " [FAIL] : in pd_test_dataframe_sorting() : sort_values asc first != 1" << std::endl;
6413 throw std::runtime_error("pd_test_dataframe_sorting failed: sort_values asc first != 1");
6414 }
6415
6416 // Test sort_values descending
6417 auto sorted_desc = df.sort_values("A", false);
6418 first_val = sorted_desc["A"].get_value_str(0);
append (pd_test_1_all.cpp:10650)
10640 std::cout << "========= append =========================";
10641
10642 // Use same categories for both arrays (required by CategoricalArray::concat)
10643 std::vector<std::string> cats = {"a", "b", "c", "d"};
10644 pandas::CategoricalArray arr1({"a", "b"}, cats);
10645 pandas::CategoricalIndex idx1(arr1);
10646
10647 pandas::CategoricalArray arr2({"c", "d"}, cats);
10648 pandas::CategoricalIndex idx2(arr2);
10649
10650 auto appended = idx1.append(idx2);
10651
10652 bool passed = (appended.size() == 4);
10653 if (!passed) {
10654 std::cout << " [FAIL] : in pd_test_extension_index_append() : append check failed" << std::endl;
10655 throw std::runtime_error("pd_test_extension_index_append failed");
10656 }
10657
10658 std::cout << " -> tests passed" << std::endl;
10659}
difference (pd_test_1_all.cpp:10718)
10708 std::cout << "========= difference =========================";
10709
10710 // Use same categories for both arrays
10711 std::vector<std::string> cats = {"a", "b", "c", "d"};
10712 pandas::CategoricalArray arr1({"a", "b", "c", "d"}, cats);
10713 pandas::CategoricalIndex idx1(arr1);
10714
10715 pandas::CategoricalArray arr2({"b", "d"}, cats);
10716 pandas::CategoricalIndex idx2(arr2);
10717
10718 auto diff = idx1.difference(idx2);
10719
10720 bool passed = (diff.size() == 2 &&
10721 diff.contains("a") && diff.contains("c") &&
10722 !diff.contains("b") && !diff.contains("d"));
10723 if (!passed) {
10724 std::cout << " [FAIL] : in pd_test_extension_index_difference() : difference check failed" << std::endl;
10725 throw std::runtime_error("pd_test_extension_index_difference failed");
10726 }
10727
10728 std::cout << " -> tests passed" << std::endl;
to_list (pd_test_1_all.cpp:10247)
10237 std::cout << " -> tests passed" << std::endl;
10238}
10239
10240void pd_test_extension_index_to_list() {
10241 std::cout << "========= to_list =========================";
10242
10243 pandas::CategoricalArray arr({"x", "y", "z"});
10244 pandas::CategoricalIndex idx(arr);
10245
10246 auto list = idx.to_list();
10247
10248 bool passed = (list.size() == 3 &&
10249 list[0].has_value() && *list[0] == "x" &&
10250 list[1].has_value() && *list[1] == "y" &&
10251 list[2].has_value() && *list[2] == "z");
10252 if (!passed) {
10253 std::cout << " [FAIL] : in pd_test_extension_index_to_list() : to_list check failed" << std::endl;
10254 throw std::runtime_error("pd_test_extension_index_to_list failed");
10255 }
to_string (pd_test_1_all.cpp:2693)
2683 pandas::PeriodArray arr_m(std::vector<std::string>{
2684 "2020-01",
2685 "NaT",
2686 "2025-06"
2687 }, "M");
2688
2689 // Year
2690 auto years = arr_m.year();
2691 auto y0 = years[0];
2692 if (!y0.has_value() || y0.value() != 2020) {
2693 std::cout << " [FAIL] : year[0] should be 2020, got " << (y0.has_value() ? std::to_string(y0.value()) : "NA") << std::endl;
2694 throw std::runtime_error("pd_test_period_array_year_month_quarter failed: year[0]");
2695 }
2696
2697 auto y1 = years[1];
2698 if (y1.has_value()) {
2699 std::cout << " [FAIL] : year[1] should be NA (NaT)" << std::endl;
2700 throw std::runtime_error("pd_test_period_array_year_month_quarter failed: year[1] should be NA");
2701 }
2702
2703 auto y2 = years[2];
to_string_vector (pd_test_1_all.cpp:10871)
10861 std::cout << " -> tests passed" << std::endl;
10862}
10863
10864void pd_test_extension_index_to_string_vector() {
10865 std::cout << "========= to_string_vector =========================";
10866
10867 pandas::CategoricalArray arr({"a", std::nullopt, "c"});
10868 pandas::CategoricalIndex idx(arr);
10869
10870 auto str_vec = idx.to_string_vector();
10871
10872 bool passed = (str_vec.size() == 3 &&
10873 str_vec[0] == "a" && str_vec[1] == "NA" && str_vec[2] == "c");
10874 if (!passed) {
10875 std::cout << " [FAIL] : in pd_test_extension_index_to_string_vector() : to_string_vector check failed" << std::endl;
10876 throw std::runtime_error("pd_test_extension_index_to_string_vector failed");
10877 }
10878
10879 std::cout << " -> tests passed" << std::endl;
10880}
copy (pd_test_1_all.cpp:5798)
5788// ============================================================================
5789// Copy/Rename Tests
5790// ============================================================================
5791
5792void pd_test_categorical_index_copy() {
5793 std::cout << "========= copy ========================================";
5794
5795 pandas::CategoricalArray arr({"a", "b", "c"});
5796 pandas::CategoricalIndex idx(arr, "original");
5797
5798 pandas::CategoricalIndex copied = idx.copy();
5799
5800 bool passed = (copied.size() == idx.size() && copied.name() == idx.name() &&
5801 copied.categories() == idx.categories() && copied.ordered() == idx.ordered());
5802 if (!passed) {
5803 std::cout << " [FAIL] : in pd_test_categorical_index_copy()" << std::endl;
5804 throw std::runtime_error("pd_test_categorical_index_copy failed");
5805 }
5806
5807 std::cout << " -> tests passed" << std::endl;
5808}
duplicated (pd_test_1_all.cpp:10583)
10573 std::cout << " -> tests passed" << std::endl;
10574}
10575
10576void pd_test_extension_index_duplicated() {
10577 std::cout << "========= duplicated =========================";
10578
10579 pandas::CategoricalArray arr({"a", "b", "a", "c", "a"});
10580 pandas::CategoricalIndex idx(arr);
10581
10582 auto dup_mask = idx.duplicated("first");
10583
10584 bool passed = (dup_mask.getElementAt({0}) == false &&
10585 dup_mask.getElementAt({1}) == false &&
10586 dup_mask.getElementAt({2}) == true &&
10587 dup_mask.getElementAt({3}) == false &&
10588 dup_mask.getElementAt({4}) == true);
10589 if (!passed) {
10590 std::cout << " [FAIL] : in pd_test_extension_index_duplicated() : duplicated check failed" << std::endl;
10591 throw std::runtime_error("pd_test_extension_index_duplicated failed");
10592 }
intersection (pd_test_1_all.cpp:10672)
10662 std::cout << "========= intersection =========================";
10663
10664 // Use same categories for both arrays
10665 std::vector<std::string> cats = {"a", "b", "c", "d", "e", "f"};
10666 pandas::CategoricalArray arr1({"a", "b", "c", "d"}, cats);
10667 pandas::CategoricalIndex idx1(arr1);
10668
10669 pandas::CategoricalArray arr2({"b", "c", "e", "f"}, cats);
10670 pandas::CategoricalIndex idx2(arr2);
10671
10672 auto inter = idx1.intersection(idx2);
10673
10674 bool passed = (inter.size() == 2 && inter.contains("b") && inter.contains("c"));
10675 if (!passed) {
10676 std::cout << " [FAIL] : in pd_test_extension_index_intersection() : intersection check failed" << std::endl;
10677 throw std::runtime_error("pd_test_extension_index_intersection failed");
10678 }
10679
10680 std::cout << " -> tests passed" << std::endl;
10681}
isin (pd_test_1_all.cpp:5938)
5928 std::cout << " -> tests passed" << std::endl;
5929}
5930
5931void pd_test_categorical_index_isin() {
5932 std::cout << "========= inherited isin ==============================";
5933
5934 pandas::CategoricalArray arr({"a", "b", "c", "d"});
5935 pandas::CategoricalIndex idx(arr);
5936
5937 std::vector<std::string> values = {"a", "c"};
5938 numpy::NDArray<numpy::bool_> mask = idx.isin(values);
5939
5940 bool passed = (mask.getSize() == 4 &&
5941 mask.getElementAt({0}) == true && // a
5942 mask.getElementAt({1}) == false && // b
5943 mask.getElementAt({2}) == true && // c
5944 mask.getElementAt({3}) == false); // d
5945 if (!passed) {
5946 std::cout << " [FAIL] : in pd_test_categorical_index_isin()" << std::endl;
5947 throw std::runtime_error("pd_test_categorical_index_isin failed");
5948 }
symmetric_difference (pd_test_1_all.cpp:10742)
10732 std::cout << "========= symmetric_difference =========================";
10733
10734 // Use same categories for both arrays
10735 std::vector<std::string> cats = {"a", "b", "c", "d"};
10736 pandas::CategoricalArray arr1({"a", "b", "c"}, cats);
10737 pandas::CategoricalIndex idx1(arr1);
10738
10739 pandas::CategoricalArray arr2({"b", "c", "d"}, cats);
10740 pandas::CategoricalIndex idx2(arr2);
10741
10742 auto sym_diff = idx1.symmetric_difference(idx2);
10743
10744 bool passed = (sym_diff.size() == 2 &&
10745 sym_diff.contains("a") && sym_diff.contains("d") &&
10746 !sym_diff.contains("b") && !sym_diff.contains("c"));
10747 if (!passed) {
10748 std::cout << " [FAIL] : in pd_test_extension_index_symmetric_difference() : symmetric_difference check failed" << std::endl;
10749 throw std::runtime_error("pd_test_extension_index_symmetric_difference failed");
10750 }
10751
10752 std::cout << " -> tests passed" << std::endl;
union_ (pd_test_1_all.cpp:10694)
10684 std::cout << "========= union =========================";
10685
10686 // Use same categories for both arrays
10687 std::vector<std::string> cats = {"a", "b", "c", "d", "e"};
10688 pandas::CategoricalArray arr1({"a", "b", "c"}, cats);
10689 pandas::CategoricalIndex idx1(arr1);
10690
10691 pandas::CategoricalArray arr2({"b", "c", "d", "e"}, cats);
10692 pandas::CategoricalIndex idx2(arr2);
10693
10694 auto uni = idx1.union_(idx2);
10695
10696 bool passed = (uni.size() == 5 &&
10697 uni.contains("a") && uni.contains("b") && uni.contains("c") &&
10698 uni.contains("d") && uni.contains("e"));
10699 if (!passed) {
10700 std::cout << " [FAIL] : in pd_test_extension_index_union() : union check failed" << std::endl;
10701 throw std::runtime_error("pd_test_extension_index_union failed");
10702 }
10703
10704 std::cout << " -> tests passed" << std::endl;
unique (pd_test_1_all.cpp:1345)
1335 pandas::DatetimeArray arr(std::vector<std::string>{
1336 "2023-01-01",
1337 "2023-06-15",
1338 "2023-01-01",
1339 "NaT",
1340 "2023-06-15",
1341 "NaT"
1342 });
1343
1344 // unique
1345 auto uniq = arr.unique();
1346 // Should have: NaT, 2023-01-01, 2023-06-15 (3 unique values)
1347 if (uniq.size() != 3) {
1348 std::cout << " [FAIL] : unique size should be 3, got " << uniq.size() << std::endl;
1349 throw std::runtime_error("pd_test_datetime_array_unique failed: size");
1350 }
1351
1352 // factorize
1353 auto [codes, uniques] = arr.factorize();
1354 // Codes for NaT should be -1
1355 if (codes.getElementAt({3}) != -1) {
is_monotonic_decreasing (pd_test_1_all.cpp:10203)
10193}
10194
10195void pd_test_extension_index_monotonicity() {
10196 std::cout << "========= monotonicity =========================";
10197
10198 pandas::CategoricalArray arr1({"a", "b", "c"});
10199 pandas::CategoricalIndex idx1(arr1);
10200
10201 // Just test that the methods work (result depends on internal ordering)
10202 bool inc = idx1.is_monotonic_increasing();
10203 bool dec = idx1.is_monotonic_decreasing();
10204
10205 bool passed = (inc || dec || (!inc && !dec)); // Any result is valid
10206 if (!passed) {
10207 std::cout << " [FAIL] : in pd_test_extension_index_monotonicity() : monotonicity check failed" << std::endl;
10208 throw std::runtime_error("pd_test_extension_index_monotonicity failed");
10209 }
10210
10211 std::cout << " -> tests passed" << std::endl;
10212}
is_monotonic_increasing (pd_test_1_all.cpp:10202)
10192 std::cout << " -> tests passed" << std::endl;
10193}
10194
10195void pd_test_extension_index_monotonicity() {
10196 std::cout << "========= monotonicity =========================";
10197
10198 pandas::CategoricalArray arr1({"a", "b", "c"});
10199 pandas::CategoricalIndex idx1(arr1);
10200
10201 // Just test that the methods work (result depends on internal ordering)
10202 bool inc = idx1.is_monotonic_increasing();
10203 bool dec = idx1.is_monotonic_decreasing();
10204
10205 bool passed = (inc || dec || (!inc && !dec)); // Any result is valid
10206 if (!passed) {
10207 std::cout << " [FAIL] : in pd_test_extension_index_monotonicity() : monotonicity check failed" << std::endl;
10208 throw std::runtime_error("pd_test_extension_index_monotonicity failed");
10209 }
10210
10211 std::cout << " -> tests passed" << std::endl;
10212}
is_unique (pd_test_1_all.cpp:5962)
5952void pd_test_categorical_index_is_unique() {
5953 std::cout << "========= inherited is_unique =========================";
5954
5955 pandas::CategoricalArray arr_unique({"a", "b", "c"});
5956 pandas::CategoricalArray arr_dups({"a", "b", "a"});
5957
5958 pandas::CategoricalIndex idx_unique(arr_unique);
5959 pandas::CategoricalIndex idx_dups(arr_dups);
5960
5961 bool passed = (idx_unique.is_unique() && !idx_dups.is_unique());
5962 if (!passed) {
5963 std::cout << " [FAIL] : in pd_test_categorical_index_is_unique()" << std::endl;
5964 throw std::runtime_error("pd_test_categorical_index_is_unique failed");
5965 }
5966
5967 std::cout << " -> tests passed" << std::endl;
5968}
5969
5970void pd_test_categorical_index_hasnans() {
5971 std::cout << "========= inherited hasnans ===========================";
array (pd_test_1_all.cpp:7343)
7333 };
7334 pandas::DatetimeIndex idx(values, "with_nat");
7335
7336 bool passed = (idx.size() == 3);
7337 if (!passed) {
7338 std::cout << " [FAIL] : in pd_test_datetime_index_optional_vector_constructor()" << std::endl;
7339 throw std::runtime_error("pd_test_datetime_index_optional_vector_constructor failed");
7340 }
7341
7342 // Check that middle element is NA
7343 bool has_na = idx.array().is_na(1);
7344 passed = passed && has_na;
7345 if (!passed) {
7346 std::cout << " [FAIL] : NA not preserved" << std::endl;
7347 throw std::runtime_error("pd_test_datetime_index_optional_vector_constructor failed");
7348 }
7349
7350 std::cout << " -> tests passed" << std::endl;
7351}
7352
7353void pd_test_datetime_index_copy_constructor() {
clear_cache (pd_test_1_all.cpp:19413)
19403 s.mean();
19404 s.min();
19405 s.max();
19406
19407 passed = s.has_cached_values() == true;
19408 if (!passed) {
19409 std::cout << " [FAIL] : in pd_test_series_cache() : cache not populated" << std::endl;
19410 throw std::runtime_error("pd_test_series_cache failed: cache not populated");
19411 }
19412
19413 s.clear_cache();
19414 passed = s.has_cached_values() == false;
19415 if (!passed) {
19416 std::cout << " [FAIL] : in pd_test_series_cache() : cache not cleared" << std::endl;
19417 throw std::runtime_error("pd_test_series_cache failed: cache not cleared");
19418 }
19419
19420 std::cout << " -> tests passed" << std::endl;
19421 }
19422
19423 void pd_test_series_string_repr() {
clone (pd_test_1_all.cpp:5776)
5766 std::cout << " -> tests passed" << std::endl;
5767}
5768
5769void pd_test_categorical_index_clone() {
5770 std::cout << "========= clone =======================================";
5771
5772 pandas::CategoricalArray arr({"p", "q", "r"});
5773 pandas::CategoricalIndex idx(arr, "original");
5774
5775 std::unique_ptr<pandas::IndexBase> cloned = idx.clone();
5776
5777 bool passed = (cloned != nullptr && cloned->size() == idx.size() &&
5778 cloned->name() == idx.name());
5779 if (!passed) {
5780 std::cout << " [FAIL] : in pd_test_categorical_index_clone()" << std::endl;
5781 throw std::runtime_error("pd_test_categorical_index_clone failed");
5782 }
5783
5784 std::cout << " -> tests passed" << std::endl;
5785}
contains (pd_test_1_all.cpp:2200)
2190// Test: contains method
2191// ============================================================================
2192void test_contains() {
2193 std::cout << "========= IntervalArray: contains ======================= ";
2194
2195 std::vector<numpy::float64> breaks = {0.0, 1.0, 2.0, 3.0};
2196
2197 // Right-closed intervals: (0, 1], (1, 2], (2, 3]
2198 auto arr_right = pandas::IntervalArrayFloat64::from_breaks(breaks, pandas::IntervalClosed::Right);
2199
2200 // Test contains(1.0) - should be in interval 0 but not 1 (since 1 is exclusive on left of interval 1)
2201 auto contains_1 = arr_right.contains(1.0);
2202 // (0, 1] contains 1: yes, (1, 2] contains 1: no (open on left), (2, 3] contains 1: no
2203 if (contains_1[0].value_or(false) != true ||
2204 contains_1[1].value_or(true) != false ||
2205 contains_1[2].value_or(true) != false) {
2206 std::cout << "[FAIL] : in test_contains() : right-closed contains 1.0" << std::endl;
2207 return;
2208 }
2209
2210 // Left-closed intervals: [0, 1), [1, 2), [2, 3)
contains_str (pd_test_1_all.cpp:10889)
10879 std::cout << " -> tests passed" << std::endl;
10880}
10881
10882void pd_test_extension_index_contains_str_get_loc_str() {
10883 std::cout << "========= contains_str/get_loc_str =========================";
10884
10885 pandas::CategoricalArray arr({"apple", "banana", "cherry"});
10886 pandas::CategoricalIndex idx(arr);
10887
10888 bool passed = (idx.contains_str("apple") && !idx.contains_str("grape") &&
10889 idx.get_loc_str("banana") == 1 && idx.get_loc_str("grape") == -1);
10890 if (!passed) {
10891 std::cout << " [FAIL] : in pd_test_extension_index_contains_str_get_loc_str() : contains_str/get_loc_str check failed" << std::endl;
10892 throw std::runtime_error("pd_test_extension_index_contains_str_get_loc_str failed");
10893 }
10894
10895 std::cout << " -> tests passed" << std::endl;
10896}
10897
10898void pd_test_extension_index_repr() {
delete_ (pd_test_1_all.cpp:10501)
10491 std::cout << " -> tests passed" << std::endl;
10492}
10493
10494void pd_test_extension_index_delete() {
10495 std::cout << "========= delete_ =========================";
10496
10497 pandas::CategoricalArray arr({"a", "b", "c", "d"});
10498 pandas::CategoricalIndex idx(arr);
10499
10500 auto deleted = idx.delete_(1);
10501 auto v0 = deleted[0];
10502 auto v1 = deleted[1];
10503 auto v2 = deleted[2];
10504
10505 bool passed = (deleted.size() == 3 &&
10506 v0.has_value() && *v0 == "a" &&
10507 v1.has_value() && *v1 == "c" &&
10508 v2.has_value() && *v2 == "d");
10509 if (!passed) {
10510 std::cout << " [FAIL] : in pd_test_extension_index_delete() : delete_ check failed" << std::endl;
delete_ (pd_test_1_all.cpp:10501)
10491 std::cout << " -> tests passed" << std::endl;
10492}
10493
10494void pd_test_extension_index_delete() {
10495 std::cout << "========= delete_ =========================";
10496
10497 pandas::CategoricalArray arr({"a", "b", "c", "d"});
10498 pandas::CategoricalIndex idx(arr);
10499
10500 auto deleted = idx.delete_(1);
10501 auto v0 = deleted[0];
10502 auto v1 = deleted[1];
10503 auto v2 = deleted[2];
10504
10505 bool passed = (deleted.size() == 3 &&
10506 v0.has_value() && *v0 == "a" &&
10507 v1.has_value() && *v1 == "c" &&
10508 v2.has_value() && *v2 == "d");
10509 if (!passed) {
10510 std::cout << " [FAIL] : in pd_test_extension_index_delete() : delete_ check failed" << std::endl;
dtype_name (pd_test_1_all.cpp:10104)
10094}
10095
10096void pd_test_extension_index_array_constructor() {
10097 std::cout << "========= array constructor =========================";
10098
10099 pandas::CategoricalArray arr({"apple", "banana", "apple", "cherry"});
10100 pandas::CategoricalIndex idx(arr, "fruits");
10101
10102 bool passed = (idx.size() == 4 && !idx.empty() &&
10103 idx.name().has_value() && *idx.name() == "fruits" &&
10104 idx.dtype_name() == "category");
10105 if (!passed) {
10106 std::cout << " [FAIL] : in pd_test_extension_index_array_constructor() : array constructor check failed" << std::endl;
10107 throw std::runtime_error("pd_test_extension_index_array_constructor failed");
10108 }
10109
10110 std::cout << " -> tests passed" << std::endl;
10111}
10112
10113void pd_test_extension_index_copy_constructor() {
10114 std::cout << "========= copy constructor =========================";
empty (pd_test_1_all.cpp:941)
931#include "../pandas/pd_config.h"
932
933namespace dataframe_tests {
934
935namespace dataframe_tests_config {
936
937 void pd_test_config_version() {
938 std::cout << "========= df_config: version info ======================= ";
939 const char* version = pandas::DataFrameInfo::version();
940 if (version == nullptr || std::string(version).empty()) {
941 std::cout << "[FAIL] : in pd_test_config_version() : version is null or empty" << std::endl;
942 throw std::runtime_error("pd_test_config_version failed: version is null or empty");
943 }
944 std::cout << "-> tests passed" << std::endl;
945 }
946
947 void pd_test_config_na_repr() {
948 std::cout << "========= df_config: NA representation ======================= ";
949 const char* na_repr = pandas::DataFrameConfig::get_na_repr();
950 if (na_repr == nullptr) {
factorize (pd_test_1_all.cpp:1353)
1343 // unique
1344 auto uniq = arr.unique();
1345 // Should have: NaT, 2023-01-01, 2023-06-15 (3 unique values)
1346 if (uniq.size() != 3) {
1347 std::cout << " [FAIL] : unique size should be 3, got " << uniq.size() << std::endl;
1348 throw std::runtime_error("pd_test_datetime_array_unique failed: size");
1349 }
1350
1351 // factorize
1352 auto [codes, uniques] = arr.factorize();
1353 // Codes for NaT should be -1
1354 if (codes.getElementAt({3}) != -1) {
1355 std::cout << " [FAIL] : factorize: NaT code should be -1" << std::endl;
1356 throw std::runtime_error("pd_test_datetime_array_unique failed: NaT code");
1357 }
1358 // Same values should have same codes
1359 if (codes.getElementAt({0}) != codes.getElementAt({2})) {
1360 std::cout << " [FAIL] : factorize: 2023-01-01 values should have same code" << std::endl;
1361 throw std::runtime_error("pd_test_datetime_array_unique failed: same code");
1362 }
has_cached_values (pd_test_1_all.cpp:19395)
19385 }
19386
19387 std::cout << " -> tests passed" << std::endl;
19388 }
19389
19390 void pd_test_series_cache() {
19391 std::cout << "========= cache management =========================================";
19392
19393 pandas::Series<double> s({1.0, 2.0, 3.0, 4.0, 5.0});
19394
19395 bool passed = s.has_cached_values() == false;
19396 if (!passed) {
19397 std::cout << " [FAIL] : in pd_test_series_cache() : initial cache not empty" << std::endl;
19398 throw std::runtime_error("pd_test_series_cache failed: initial cache not empty");
19399 }
19400
19401 // Trigger cache
19402 s.sum();
19403 s.mean();
19404 s.min();
19405 s.max();
has_duplicates (pd_test_1_all.cpp:10176)
10166 std::cout << " -> tests passed" << std::endl;
10167}
10168
10169void pd_test_extension_index_uniqueness() {
10170 std::cout << "========= uniqueness =========================";
10171
10172 // Unique values
10173 pandas::CategoricalArray arr1({"a", "b", "c"});
10174 pandas::CategoricalIndex idx1(arr1);
10175
10176 bool passed1 = (idx1.is_unique() && !idx1.has_duplicates());
10177 if (!passed1) {
10178 std::cout << " [FAIL] : in pd_test_extension_index_uniqueness() : unique check failed" << std::endl;
10179 throw std::runtime_error("pd_test_extension_index_uniqueness failed");
10180 }
10181
10182 // With duplicates
10183 pandas::CategoricalArray arr2({"a", "b", "a", "c"});
10184 pandas::CategoricalIndex idx2(arr2);
10185
10186 bool passed2 = (!idx2.is_unique() && idx2.has_duplicates());
hasnans (pd_test_1_all.cpp:5363)
5353void pd_test_categorical_index_from_codes() {
5354 std::cout << "========= from_codes =================================";
5355
5356 std::vector<numpy::int32> codes = {0, 1, 0, 2, -1}; // -1 = NA
5357 std::vector<std::string> categories = {"low", "medium", "high"};
5358
5359 pandas::CategoricalIndex idx = pandas::CategoricalIndex::from_codes(codes, categories, true, "level");
5360
5361 bool passed = (idx.size() == 5 && idx.num_categories() == 3 &&
5362 idx.ordered() && idx.name().has_value() && *idx.name() == "level" &&
5363 idx.hasnans()); // has NA from code -1
5364 if (!passed) {
5365 std::cout << " [FAIL] : in pd_test_categorical_index_from_codes()" << std::endl;
5366 throw std::runtime_error("pd_test_categorical_index_from_codes failed");
5367 }
5368
5369 std::cout << " -> tests passed" << std::endl;
5370}
5371
5372void pd_test_categorical_index_simple_new() {
5373 std::cout << "========= _simple_new =================================";
identical (pd_test_1_all.cpp:5883)
5873}
5874
5875void pd_test_categorical_index_identical() {
5876 std::cout << "========= identical ===================================";
5877
5878 pandas::CategoricalArray arr({"a", "b"});
5879 pandas::CategoricalIndex idx1(arr, "same_name");
5880 pandas::CategoricalIndex idx2(arr, "same_name");
5881 pandas::CategoricalIndex idx3(arr, "diff_name");
5882
5883 bool passed = (idx1.identical(idx2) && !idx1.identical(idx3));
5884 if (!passed) {
5885 std::cout << " [FAIL] : in pd_test_categorical_index_identical()" << std::endl;
5886 throw std::runtime_error("pd_test_categorical_index_identical failed");
5887 }
5888
5889 std::cout << " -> tests passed" << std::endl;
5890}
5891
5892// ============================================================================
5893// Inherited Operations Tests
inferred_type (pd_test_1_all.cpp:5270)
5260}
5261
5262void pd_test_categorical_index_array_constructor() {
5263 std::cout << "========= array constructor ===========================";
5264
5265 pandas::CategoricalArray arr({"apple", "banana", "apple", "cherry"});
5266 pandas::CategoricalIndex idx(arr, "fruits");
5267
5268 bool passed = (idx.size() == 4 && !idx.empty() &&
5269 idx.name().has_value() && *idx.name() == "fruits" &&
5270 idx.inferred_type() == "categorical");
5271 if (!passed) {
5272 std::cout << " [FAIL] : in pd_test_categorical_index_array_constructor()" << std::endl;
5273 throw std::runtime_error("pd_test_categorical_index_array_constructor failed");
5274 }
5275
5276 std::cout << " -> tests passed" << std::endl;
5277}
5278
5279void pd_test_categorical_index_values_constructor() {
5280 std::cout << "========= values constructor ==========================";
name (pd_test_1_all.cpp:295)
285 throw std::runtime_error("pd_test_boolean_array_reductions failed: mean");
286 }
287
288 std::cout << " -> tests passed" << std::endl;
289 }
290
291 void pd_test_boolean_array_dtype() {
292 std::cout << "========= BooleanArray: dtype ======================= ";
293
294 pandas::BooleanArray arr;
295 if (arr.dtype().name() != "boolean") {
296 std::cout << " [FAIL] : in pd_test_boolean_array_dtype() : dtype name should be 'boolean'" << std::endl;
297 throw std::runtime_error("pd_test_boolean_array_dtype failed: dtype name");
298 }
299
300 if (arr.dtype().kind() != "b") {
301 std::cout << " [FAIL] : in pd_test_boolean_array_dtype() : dtype kind should be 'b'" << std::endl;
302 throw std::runtime_error("pd_test_boolean_array_dtype failed: dtype kind");
303 }
304
305 std::cout << " -> tests passed" << std::endl;
nbytes (pd_test_1_all.cpp:6214)
6204 }
6205
6206 // Test empty DataFrame
6207 pandas::DataFrame empty_df;
6208 if (!empty_df.empty()) {
6209 std::cout << " [FAIL] : in pd_test_dataframe_properties() : should be empty" << std::endl;
6210 throw std::runtime_error("pd_test_dataframe_properties failed: should be empty");
6211 }
6212
6213 // Test nbytes > 0 for non-empty
6214 if (df.nbytes() == 0) {
6215 std::cout << " [FAIL] : in pd_test_dataframe_properties() : nbytes should be > 0" << std::endl;
6216 throw std::runtime_error("pd_test_dataframe_properties failed: nbytes should be > 0");
6217 }
6218
6219 // Test columns index
6220 if (df.columns().size() != 3) {
6221 std::cout << " [FAIL] : in pd_test_dataframe_properties() : columns size != 3" << std::endl;
6222 throw std::runtime_error("pd_test_dataframe_properties failed: columns size != 3");
6223 }
repr (pd_test_1_all.cpp:10906)
10896 std::cout << " -> tests passed" << std::endl;
10897}
10898
10899void pd_test_extension_index_repr() {
10900 std::cout << "========= repr =========================";
10901
10902 pandas::CategoricalArray arr({"a", "b", "c"});
10903 // Use ExtensionIndex<CategoricalArray> directly to test base class repr
10904 pandas::ExtensionIndex<pandas::CategoricalArray> idx(arr, "test");
10905
10906 std::string repr_str = idx.repr();
10907
10908 bool passed = (!repr_str.empty() && repr_str.find("ExtensionIndex") != std::string::npos);
10909 if (!passed) {
10910 std::cout << " [FAIL] : in pd_test_extension_index_repr() : repr check failed" << std::endl;
10911 throw std::runtime_error("pd_test_extension_index_repr failed");
10912 }
10913
10914 std::cout << " -> tests passed" << std::endl;
10915}
result (pd_test_1_all.cpp:15406)
15396 data.setElementAt({0}, numpy::datetime64(100LL, numpy::DateTimeUnit::Nanosecond));
15397 data.setElementAt({1}, numpy::datetime64(200LL, numpy::DateTimeUnit::Nanosecond));
15398
15399 numpy::NDArray<numpy::bool_> mask(std::vector<size_t>{2});
15400 mask.setElementAt({0}, numpy::bool_(false));
15401 mask.setElementAt({1}, numpy::bool_(false));
15402
15403 pandas::DatetimeArray arr(data, mask);
15404 pandas::DatetimeIndexBase idx(arr, "original");
15405
15406 // Create join result (int64 values)
15407 numpy::NDArray<numpy::int64> join_result(std::vector<size_t>{3});
15408 join_result.setElementAt({0}, numpy::int64(500LL));
15409 join_result.setElementAt({1}, numpy::int64(600LL));
15410 join_result.setElementAt({2}, numpy::int64(700LL));
15411
15412 auto new_idx = idx._from_join_target(join_result);
15413
15414 bool passed = (new_idx.size() == 3 &&
15415 new_idx.name().has_value() && *new_idx.name() == "original");
15416 if (!passed) {
set_name (pd_test_1_all.cpp:11798)
11788 throw std::runtime_error("pd_test_index_vector_constructor failed");
11789 }
11790
11791 std::cout << " -> tests passed" << std::endl;
11792 }
11793
11794 void pd_test_index_copy_constructor() {
11795 std::cout << "========= copy constructor ============================";
11796
11797 pandas::Index<numpy::int64> idx1{1, 2, 3};
11798 idx1.set_name("original");
11799
11800 pandas::Index<numpy::int64> idx2(idx1);
11801
11802 bool passed = (idx2.size() == 3);
11803 passed = passed && (idx2.name().value() == "original");
11804 passed = passed && idx2.equals(idx1);
11805
11806 if (!passed) {
11807 std::cout << " [FAIL] : in pd_test_index_copy_constructor() : copy failed" << std::endl;
11808 throw std::runtime_error("pd_test_index_copy_constructor failed");
size (pd_test_1_all.cpp:22)
12#include "../pandas/pd_boolean_array.h"
13
14namespace dataframe_tests {
15
16namespace dataframe_tests_boolean_array {
17 void pd_test_boolean_array_constructors() {
18 std::cout << "========= BooleanArray: constructors ======================= ";
19
20 // Default constructor
21 pandas::BooleanArray arr1;
22 if (arr1.size() != 0) {
23 std::cout << " [FAIL] : in pd_test_boolean_array_constructors() : default constructor size != 0" << std::endl;
24 throw std::runtime_error("pd_test_boolean_array_constructors failed: default constructor size != 0");
25 }
26
27 // Initializer list constructor
28 pandas::BooleanArray arr2({
29 std::optional<bool>(true),
30 std::optional<bool>(false),
31 std::nullopt,
32 std::optional<bool>(true)
type_id (pd_test_3_all.cpp:25592)
25582// ------------------- pd_test_value_classify (end) ------------------
25583
25584// ------------------- pd_test_index_type_id (start) ------------------
25585namespace dataframe_tests_index_type_id {
25586
25587void pd_test_index_type_id_dispatch() {
25588 std::cout << "========= IndexTypeId dispatch =======================";
25589
25590 // RangeIndex
25591 ::pandas::RangeIndex ri(0, 5);
25592 if (ri.type_id() != ::pandas::IndexTypeId::RangeIndex)
25593 throw std::runtime_error("RangeIndex type_id failed");
25594
25595 // Index<string>
25596 ::pandas::Index<std::string> si(std::vector<std::string>{"a", "b", "c"});
25597 if (si.type_id() != ::pandas::IndexTypeId::IndexString)
25598 throw std::runtime_error("Index<string> type_id failed");
25599
25600 // Index<int64>
25601 ::pandas::Index<numpy::int64> ii(std::vector<numpy::int64>{1, 2, 3});
25602 if (ii.type_id() != ::pandas::IndexTypeId::IndexInt64)
values (pd_test_1_all.cpp:364)
354 pandas::CategoricalArray arr1;
355 if (arr1.size() != 0) {
356 std::cout << " [FAIL] : in pd_test_categorical_array_constructors() : default constructor size != 0" << std::endl;
357 throw std::runtime_error("pd_test_categorical_array_constructors failed: default constructor size != 0");
358 }
359 if (arr1.ordered()) {
360 std::cout << " [FAIL] : in pd_test_categorical_array_constructors() : default should be unordered" << std::endl;
361 throw std::runtime_error("pd_test_categorical_array_constructors failed: default should be unordered");
362 }
363
364 // Constructor from values (infer categories)
365 std::vector<std::optional<std::string>> values = {
366 std::optional<std::string>("a"),
367 std::optional<std::string>("b"),
368 std::optional<std::string>("a"),
369 std::optional<std::string>("c")
370 };
371 pandas::CategoricalArray arr2(values);
372 if (arr2.size() != 4) {
373 std::cout << " [FAIL] : in pd_test_categorical_array_constructors() : values constructor size != 4" << std::endl;
374 throw std::runtime_error("pd_test_categorical_array_constructors failed: values constructor size != 4");