CategoricalArray#
-
class pandas::CategoricalArray#
Extension array type for specialized data storage.
Example#
#include <pandas/pandas.h>
using namespace pandas;
// Use CategoricalArray
CategoricalArray obj;
// ... operations ...
Constructors#
Signature |
Location |
Example |
|---|---|---|
|
pd_categorical_array.h:103 |
|
|
pd_categorical_array.h:126 |
|
|
pd_categorical_array.h:233 |
|
|
pd_categorical_array.h:251 |
Construction#
Signature |
Return Type |
Location |
Example |
|---|---|---|---|
|
static CategoricalArray |
pd_categorical_array.h:275 |
|
|
static CategoricalArray |
pd_categorical_array.h:304 |
Indexing / Selection#
Signature |
Return Type |
Location |
Example |
|---|---|---|---|
|
numpy::int32 |
pd_categorical_array.h:519 |
|
|
CategoricalArray |
pd_categorical_array.h:1305 |
Data Manipulation#
Signature |
Return Type |
Location |
Example |
|---|---|---|---|
|
CategoricalArray |
pd_categorical_array.h:595 |
|
|
CategoricalArray |
pd_categorical_array.h:1594 |
|
|
CategoricalArray |
pd_categorical_array.h:740 |
|
|
CategoricalArray |
pd_categorical_array.h:760 |
Missing Data#
Signature |
Return Type |
Location |
Example |
|---|---|---|---|
|
CategoricalArray |
pd_categorical_array.h:566 |
|
|
CategoricalArray |
pd_categorical_array.h:1643 |
|
|
numpy::NDArray<numpy::bool_> |
pd_categorical_array.h:539 |
|
|
numpy::NDArray<numpy::bool_> |
pd_categorical_array.h:1798 |
|
|
numpy::NDArray<numpy::bool_> |
pd_categorical_array.h:550 |
|
|
numpy::NDArray<numpy::bool_> |
pd_categorical_array.h:1884 |
Statistics#
Signature |
Return Type |
Location |
Example |
|---|---|---|---|
|
size_t |
pd_categorical_array.h:609 |
|
|
std::map<std::string, std::string> |
pd_categorical_array.h:1480 |
|
|
std::optional<std::string> |
pd_categorical_array.h:918 |
|
|
std::optional<std::string> |
pd_categorical_array.h:884 |
|
|
std::pair<std::vector<std::string>, std::vector<numpy::int64>> |
pd_categorical_array.h:999 |
Aggregation#
Signature |
Return Type |
Location |
Example |
|---|---|---|---|
|
CategoricalArray |
pd_categorical_array.h:1808 |
|
|
CategoricalArray |
pd_categorical_array.h:1852 |
Arithmetic#
Signature |
Return Type |
Location |
Example |
|---|---|---|---|
|
CategoricalArray |
pd_categorical_array.h:638 |
Comparison#
Sorting#
Signature |
Return Type |
Location |
Example |
|---|---|---|---|
|
numpy::NDArray<numpy::int64> |
pd_categorical_array.h:1180 |
|
|
size_t |
pd_categorical_array.h:1970 |
|
|
CategoricalArray |
pd_categorical_array.h:2067 |
Reshaping#
Signature |
Return Type |
Location |
Example |
|---|---|---|---|
|
CategoricalArray |
pd_categorical_array.h:2162 |
|
|
CategoricalArray |
pd_categorical_array.h:2094 |
|
|
CategoricalArray |
pd_categorical_array.h:2155 |
Combining#
Signature |
Return Type |
Location |
Example |
|---|---|---|---|
|
static CategoricalArray |
pd_categorical_array.h:314 |
|
|
static CategoricalArray |
pd_categorical_array.h:351 |
Time Series#
Signature |
Return Type |
Location |
Example |
|---|---|---|---|
|
CategoricalArray |
pd_categorical_array.h:2031 |
I/O#
Signature |
Return Type |
Location |
Example |
|---|---|---|---|
|
std::vector<std::optional<std::string>> |
pd_categorical_array.h:2105 |
|
|
numpy::NDArray<U> |
pd_categorical_array.h:2136 |
|
|
std::string |
pd_categorical_array.h:2212 |
|
|
std::vector<std::optional<std::string>> |
pd_categorical_array.h:2124 |
Conversion#
Signature |
Return Type |
Location |
Example |
|---|---|---|---|
|
std::vector<std::optional<T>> |
pd_categorical_array.h:1365 |
|
|
numpy::NDArray<numpy::int32> |
pd_categorical_array.h:1418 |
|
|
CategoricalArray |
pd_categorical_array.h:1298 |
|
|
CategoricalArray |
pd_categorical_array.h:2174 |
Set Operations#
Signature |
Return Type |
Location |
Example |
|---|---|---|---|
|
numpy::NDArray<numpy::bool_> |
pd_categorical_array.h:1517 |
|
|
numpy::NDArray<numpy::bool_> |
pd_categorical_array.h:1770 |
|
|
CategoricalArray |
pd_categorical_array.h:951 |
Type Checking#
Signature |
Return Type |
Location |
Example |
|---|---|---|---|
|
bool |
pd_categorical_array.h:527 |
Other Methods#
Signature |
Return Type |
Location |
Example |
|---|---|---|---|
|
std::optional<size_t> |
pd_categorical_array.h:1258 |
|
|
std::optional<size_t> |
pd_categorical_array.h:1220 |
|
|
CategoricalArray |
pd_categorical_array.h:865 |
|
|
CategoricalArray |
pd_categorical_array.h:872 |
|
|
std::unordered_map<std::string, numpy::int32> |
pd_categorical_array.h:59 |
|
|
const std::vector<std::string>& |
pd_categorical_array.h:473 |
|
|
const std::string& |
pd_categorical_array.h:492 |
|
|
void |
pd_categorical_array.h:1427 |
|
|
pd_categorical_array.h:1974 |
||
|
pd_categorical_array.h:2071 |
||
|
const numpy::NDArray<numpy::int32>& |
pd_categorical_array.h:454 |
|
|
std::vector<numpy::int32> |
pd_categorical_array.h:461 |
|
|
CategoricalArray |
pd_categorical_array.h:1438 |
|
|
CategoricalArray |
pd_categorical_array.h:1461 |
|
|
CategoricalDtype |
pd_categorical_array.h:401 |
|
|
bool |
pd_categorical_array.h:440 |
|
|
std::pair<IntegerArray<numpy::int64>, CategoricalArray> |
pd_categorical_array.h:969 |
|
|
bool |
pd_categorical_array.h:622 |
|
|
std::vector<std::string> |
pd_categorical_array.h:2198 |
|
|
size_t |
pd_categorical_array.h:1865 |
|
|
const std::optional<std::string>& |
pd_categorical_array.h:484 |
|
|
size_t |
pd_categorical_array.h:415 |
|
|
constexpr int |
pd_categorical_array.h:426 |
|
|
bool |
pd_categorical_array.h:480 |
|
|
CategoricalArray |
pd_categorical_array.h:1892 |
|
|
CategoricalArray |
pd_categorical_array.h:657 |
|
|
CategoricalArray |
pd_categorical_array.h:823 |
|
|
CategoricalArray |
pd_categorical_array.h:783 |
|
|
CategoricalArray |
pd_categorical_array.h:1901 |
|
|
CategoricalArray |
pd_categorical_array.h:1921 |
|
|
std::string |
pd_categorical_array.h:2238 |
|
|
CategoricalArray |
pd_categorical_array.h:1950 |
|
|
CategoricalArray |
pd_categorical_array.h:700 |
|
|
void |
pd_categorical_array.h:496 |
|
|
void |
pd_categorical_array.h:488 |
|
|
CategoricalArray |
pd_categorical_array.h:2021 |
|
|
std::vector<size_t> |
pd_categorical_array.h:433 |
|
|
size_t |
pd_categorical_array.h:408 |
|
|
CategoricalArray |
pd_categorical_array.h:1342 |
|
|
void |
pd_categorical_array.h:70 |
Internal Methods#
1 internal methods (prefixed with underscore)
Code Examples#
The following examples are extracted from the test suite.
CategoricalArray (pd_test_3_all.cpp:28514)
28504static int cgo_check(bool cond, const char* msg) {
28505 if (!cond) { std::cout << " FAIL: " << msg << std::endl; return 1; }
28506 return 0;
28507}
28508
28509static pandas::CategoricalArray make_abc() {
28510 std::vector<std::optional<std::string>> v{
28511 std::string("a"), std::string("b"), std::string("c"), std::string("a")
28512 };
28513 return pandas::CategoricalArray(v, false);
28514}
28515
28516void pd_test_cat_rename_dict() {
28517 std::cout << " -- pd_test_cat_rename_dict --" << std::endl;
28518 int fail = 0;
28519 auto arr = make_abc();
28520 std::unordered_map<std::string, std::string> m{{"a", "A"}, {"b", "B"}};
28521 auto r = arr.rename_categories(m);
28522 const auto& cats = r.categories();
28523 fail += cgo_check(cats.size() == 3, "size==3");
CategoricalArray (pd_test_3_all.cpp:28514)
28504static int cgo_check(bool cond, const char* msg) {
28505 if (!cond) { std::cout << " FAIL: " << msg << std::endl; return 1; }
28506 return 0;
28507}
28508
28509static pandas::CategoricalArray make_abc() {
28510 std::vector<std::optional<std::string>> v{
28511 std::string("a"), std::string("b"), std::string("c"), std::string("a")
28512 };
28513 return pandas::CategoricalArray(v, false);
28514}
28515
28516void pd_test_cat_rename_dict() {
28517 std::cout << " -- pd_test_cat_rename_dict --" << std::endl;
28518 int fail = 0;
28519 auto arr = make_abc();
28520 std::unordered_map<std::string, std::string> m{{"a", "A"}, {"b", "B"}};
28521 auto r = arr.rename_categories(m);
28522 const auto& cats = r.categories();
28523 fail += cgo_check(cats.size() == 3, "size==3");
CategoricalArray (pd_test_3_all.cpp:28514)
28504static int cgo_check(bool cond, const char* msg) {
28505 if (!cond) { std::cout << " FAIL: " << msg << std::endl; return 1; }
28506 return 0;
28507}
28508
28509static pandas::CategoricalArray make_abc() {
28510 std::vector<std::optional<std::string>> v{
28511 std::string("a"), std::string("b"), std::string("c"), std::string("a")
28512 };
28513 return pandas::CategoricalArray(v, false);
28514}
28515
28516void pd_test_cat_rename_dict() {
28517 std::cout << " -- pd_test_cat_rename_dict --" << std::endl;
28518 int fail = 0;
28519 auto arr = make_abc();
28520 std::unordered_map<std::string, std::string> m{{"a", "A"}, {"b", "B"}};
28521 auto r = arr.rename_categories(m);
28522 const auto& cats = r.categories();
28523 fail += cgo_check(cats.size() == 3, "size==3");
CategoricalArray (pd_test_3_all.cpp:28514)
28504static int cgo_check(bool cond, const char* msg) {
28505 if (!cond) { std::cout << " FAIL: " << msg << std::endl; return 1; }
28506 return 0;
28507}
28508
28509static pandas::CategoricalArray make_abc() {
28510 std::vector<std::optional<std::string>> v{
28511 std::string("a"), std::string("b"), std::string("c"), std::string("a")
28512 };
28513 return pandas::CategoricalArray(v, false);
28514}
28515
28516void pd_test_cat_rename_dict() {
28517 std::cout << " -- pd_test_cat_rename_dict --" << std::endl;
28518 int fail = 0;
28519 auto arr = make_abc();
28520 std::unordered_map<std::string, std::string> m{{"a", "A"}, {"b", "B"}};
28521 auto r = arr.rename_categories(m);
28522 const auto& cats = r.categories();
28523 fail += cgo_check(cats.size() == 3, "size==3");
from_codes (pd_test_1_all.cpp:403)
393 std::cout << " -> tests passed" << std::endl;
394 }
395
396 void pd_test_categorical_array_from_codes() {
397 std::cout << "========= CategoricalArray: from_codes ======================= ";
398
399 std::vector<std::string> cats = {"a", "b", "c"};
400 std::vector<numpy::int32> codes = {0, 1, 2, 0, 1, -1}; // -1 is NA
401
402 pandas::CategoricalArray arr = pandas::CategoricalArray::from_codes(codes, cats, false);
403
404 if (arr.size() != 6) {
405 std::cout << " [FAIL] : in pd_test_categorical_array_from_codes() : size != 6" << std::endl;
406 throw std::runtime_error("pd_test_categorical_array_from_codes failed: size != 6");
407 }
408
409 // Check that code=-1 creates NA
410 if (!arr.is_na(5)) {
411 std::cout << " [FAIL] : in pd_test_categorical_array_from_codes() : code -1 should be NA" << std::endl;
412 throw std::runtime_error("pd_test_categorical_array_from_codes failed: code -1 should be NA");
take (pd_test_1_all.cpp:5903)
5893// Inherited Operations Tests
5894// ============================================================================
5895
5896void pd_test_categorical_index_take() {
5897 std::cout << "========= inherited take ==============================";
5898
5899 pandas::CategoricalArray arr({"a", "b", "c", "d"});
5900 pandas::CategoricalIndex idx(arr);
5901
5902 std::vector<size_t> indices = {0, 2, 3};
5903 pandas::ExtensionIndex<pandas::CategoricalArray> taken = idx.take(indices);
5904
5905 bool passed = (taken.size() == 3);
5906 if (!passed) {
5907 std::cout << " [FAIL] : in pd_test_categorical_index_take()" << std::endl;
5908 throw std::runtime_error("pd_test_categorical_index_take failed");
5909 }
5910
5911 std::cout << " -> tests passed" << std::endl;
5912}
dropna (pd_test_1_all.cpp:531)
521 }
522
523 // Test isna array
524 numpy::NDArray<numpy::bool_> na_mask = arr.isna();
525 if (na_mask.getSize() != 4) {
526 std::cout << " [FAIL] : in pd_test_categorical_array_na_handling() : isna size != 4" << std::endl;
527 throw std::runtime_error("pd_test_categorical_array_na_handling failed: isna size != 4");
528 }
529
530 // Test dropna
531 pandas::CategoricalArray dropped = arr.dropna();
532 if (dropped.size() != 2) {
533 std::cout << " [FAIL] : in pd_test_categorical_array_na_handling() : dropna size != 2" << std::endl;
534 throw std::runtime_error("pd_test_categorical_array_na_handling failed: dropna size != 2");
535 }
536
537 // Test fillna (fill with existing category)
538 pandas::CategoricalArray filled = arr.fillna("a"); // 'a' is in categories
539 if (filled.has_na()) {
540 std::cout << " [FAIL] : in pd_test_categorical_array_na_handling() : fillna should have no NA" << std::endl;
541 throw std::runtime_error("pd_test_categorical_array_na_handling failed: fillna should have no NA");
insert (pd_test_1_all.cpp:12028)
12018 }
12019
12020 std::cout << " -> tests passed" << std::endl;
12021 }
12022
12023 void pd_test_index_insert_delete() {
12024 std::cout << "========= insert and delete ===========================";
12025
12026 pandas::Index<numpy::int64> idx{1, 2, 4, 5};
12027
12028 auto inserted = idx.insert(2, 3);
12029 bool passed = (inserted.size() == 5);
12030 passed = passed && (inserted[2] == 3);
12031
12032 auto deleted = inserted.delete_(2);
12033 passed = passed && (deleted.size() == 4);
12034 passed = passed && deleted.equals(idx);
12035
12036 if (!passed) {
12037 std::cout << " [FAIL] : in pd_test_index_insert_delete() : insert/delete failed" << std::endl;
12038 throw std::runtime_error("pd_test_index_insert_delete failed");
rename_categories (pd_test_1_all.cpp:655)
645 void pd_test_categorical_array_rename_categories() {
646 std::cout << "========= CategoricalArray: rename_categories ======================= ";
647
648 std::vector<std::string> cats = {"a", "b"};
649 std::vector<numpy::int32> codes = {0, 1, 0}; // a, b, a
650 pandas::CategoricalArray arr = pandas::CategoricalArray::from_codes(codes, cats);
651
652 // Rename categories
653 std::vector<std::string> new_names = {"alpha", "beta"};
654 pandas::CategoricalArray result = arr.rename_categories(new_names);
655
656 // Check categories are renamed
657 const std::vector<std::string>& result_cats = result.categories();
658 if (result_cats[0] != "alpha" || result_cats[1] != "beta") {
659 std::cout << " [FAIL] : in pd_test_categorical_array_rename_categories() : categories not renamed" << std::endl;
660 throw std::runtime_error("pd_test_categorical_array_rename_categories failed: categories not renamed");
661 }
662
663 // Values should now be renamed
664 std::optional<std::string> val = result[0];
rename_categories (pd_test_1_all.cpp:655)
645 void pd_test_categorical_array_rename_categories() {
646 std::cout << "========= CategoricalArray: rename_categories ======================= ";
647
648 std::vector<std::string> cats = {"a", "b"};
649 std::vector<numpy::int32> codes = {0, 1, 0}; // a, b, a
650 pandas::CategoricalArray arr = pandas::CategoricalArray::from_codes(codes, cats);
651
652 // Rename categories
653 std::vector<std::string> new_names = {"alpha", "beta"};
654 pandas::CategoricalArray result = arr.rename_categories(new_names);
655
656 // Check categories are renamed
657 const std::vector<std::string>& result_cats = result.categories();
658 if (result_cats[0] != "alpha" || result_cats[1] != "beta") {
659 std::cout << " [FAIL] : in pd_test_categorical_array_rename_categories() : categories not renamed" << std::endl;
660 throw std::runtime_error("pd_test_categorical_array_rename_categories failed: categories not renamed");
661 }
662
663 // Values should now be renamed
664 std::optional<std::string> val = result[0];
fillna (pd_test_1_all.cpp:537)
527 throw std::runtime_error("pd_test_categorical_array_na_handling failed: isna size != 4");
528 }
529
530 // Test dropna
531 pandas::CategoricalArray dropped = arr.dropna();
532 if (dropped.size() != 2) {
533 std::cout << " [FAIL] : in pd_test_categorical_array_na_handling() : dropna size != 2" << std::endl;
534 throw std::runtime_error("pd_test_categorical_array_na_handling failed: dropna size != 2");
535 }
536
537 // Test fillna (fill with existing category)
538 pandas::CategoricalArray filled = arr.fillna("a"); // 'a' is in categories
539 if (filled.has_na()) {
540 std::cout << " [FAIL] : in pd_test_categorical_array_na_handling() : fillna should have no NA" << std::endl;
541 throw std::runtime_error("pd_test_categorical_array_na_handling failed: fillna should have no NA");
542 }
543
544 std::cout << " -> tests passed" << std::endl;
545 }
546
547 void pd_test_categorical_array_add_categories() {
interpolate (pd_test_1_all.cpp:24365)
24355 std::cout << "====================================== [OK] pd_test_idxmax_idxmin test suite ========================== " << std::endl;
24356 return 0;
24357 }
24358
24359} // namespace dataframe_tests
24360// ------------------- pd_test_idxmax_idxmin.cpp (end) -----------------------------
24361
24362// ------------------- pd_test_interpolate.cpp (start) -----------------------------
24363// dataframe_tests/pd_test_interpolate.cpp
24364// Test file for DataFrame.interpolate() method
24365
24366#include <iostream>
24367#include <stdexcept>
24368#include <cmath>
24369#include <limits>
24370#include <map>
24371#include "../pandas/pd_dataframe.h"
24372
24373// CRITICAL: No using namespace directives
isna (pd_test_1_all.cpp:524)
514 throw std::runtime_error("pd_test_categorical_array_na_handling failed: has_na() should be true");
515 }
516
517 // Test count (non-NA)
518 if (arr.count() != 2) {
519 std::cout << " [FAIL] : in pd_test_categorical_array_na_handling() : count() != 2" << std::endl;
520 throw std::runtime_error("pd_test_categorical_array_na_handling failed: count() != 2");
521 }
522
523 // Test isna array
524 numpy::NDArray<numpy::bool_> na_mask = arr.isna();
525 if (na_mask.getSize() != 4) {
526 std::cout << " [FAIL] : in pd_test_categorical_array_na_handling() : isna size != 4" << std::endl;
527 throw std::runtime_error("pd_test_categorical_array_na_handling failed: isna size != 4");
528 }
529
530 // Test dropna
531 pandas::CategoricalArray dropped = arr.dropna();
532 if (dropped.size() != 2) {
533 std::cout << " [FAIL] : in pd_test_categorical_array_na_handling() : dropna size != 2" << std::endl;
534 throw std::runtime_error("pd_test_categorical_array_na_handling failed: dropna size != 2");
isnull (pd_test_3_all.cpp:671)
661// Category 5: Index Null Detection
662// ============================================================================
663
664void pd_test_3_all_index_null_detection() {
665 std::cout << "========= Index.isnull/notnull() =====================";
666
667 // Test with float index (can have NaN)
668 std::vector<double> vals = {1.0, std::nan(""), 3.0, std::nan("")};
669 pandas::Index<double> idx(vals);
670
671 numpy::NDArray<numpy::bool_> isnull_result = idx.isnull();
672 if (isnull_result.getSize() != 4) {
673 std::cout << " [FAIL] : in pd_test_3_all_index_null_detection() : isnull() size mismatch" << std::endl;
674 throw std::runtime_error("pd_test_3_all_index_null_detection failed: isnull() size");
675 }
676 // Index 0: 1.0 -> not null
677 if (isnull_result.getElementAt({0})) {
678 std::cout << " [FAIL] : in pd_test_3_all_index_null_detection() : index 0 should not be null" << std::endl;
679 throw std::runtime_error("pd_test_3_all_index_null_detection failed: index 0");
680 }
681 // Index 1: NaN -> null
notna (pd_test_1_all.cpp:6595)
6585 if (!na_mask.getElementAt({2, 1})) {
6586 std::cout << " [FAIL] : in pd_test_dataframe_manipulation() : isna at (2,1) should be true" << std::endl;
6587 throw std::runtime_error("pd_test_dataframe_manipulation failed: isna at (2,1)");
6588 }
6589 // Row 0, col 0 should NOT be NA
6590 if (na_mask.getElementAt({0, 0})) {
6591 std::cout << " [FAIL] : in pd_test_dataframe_manipulation() : isna at (0,0) should be false" << std::endl;
6592 throw std::runtime_error("pd_test_dataframe_manipulation failed: isna at (0,0)");
6593 }
6594
6595 auto notna_mask = df_na.notna();
6596 if (notna_mask.getElementAt({1, 0})) {
6597 std::cout << " [FAIL] : in pd_test_dataframe_manipulation() : notna at (1,0) should be false" << std::endl;
6598 throw std::runtime_error("pd_test_dataframe_manipulation failed: notna at (1,0)");
6599 }
6600 }
6601
6602 // Test fillna
6603 {
6604 std::map<std::string, std::vector<numpy::float64>> float_data;
6605 float_data["X"] = {1.0, std::nan(""), 3.0};
notnull (pd_test_3_all.cpp:665)
655 }
656
657 std::cout << " -> tests passed" << std::endl;
658}
659
660// ============================================================================
661// Category 5: Index Null Detection
662// ============================================================================
663
664void pd_test_3_all_index_null_detection() {
665 std::cout << "========= Index.isnull/notnull() =====================";
666
667 // Test with float index (can have NaN)
668 std::vector<double> vals = {1.0, std::nan(""), 3.0, std::nan("")};
669 pandas::Index<double> idx(vals);
670
671 numpy::NDArray<numpy::bool_> isnull_result = idx.isnull();
672 if (isnull_result.getSize() != 4) {
673 std::cout << " [FAIL] : in pd_test_3_all_index_null_detection() : isnull() size mismatch" << std::endl;
674 throw std::runtime_error("pd_test_3_all_index_null_detection failed: isnull() size");
675 }
count (pd_test_1_all.cpp:66)
56 if (arr.is_na(0)) {
57 std::cout << " [FAIL] : in pd_test_boolean_array_na_handling() : is_na(0) should be false" << std::endl;
58 throw std::runtime_error("pd_test_boolean_array_na_handling failed: is_na(0) should be false");
59 }
60
61 if (!arr.has_na()) {
62 std::cout << " [FAIL] : in pd_test_boolean_array_na_handling() : has_na() should be true" << std::endl;
63 throw std::runtime_error("pd_test_boolean_array_na_handling failed: has_na() should be true");
64 }
65
66 if (arr.count() != 2) {
67 std::cout << " [FAIL] : in pd_test_boolean_array_na_handling() : count() should be 2" << std::endl;
68 throw std::runtime_error("pd_test_boolean_array_na_handling failed: count() should be 2");
69 }
70
71 std::cout << " -> tests passed" << std::endl;
72 }
73
74 void pd_test_boolean_array_kleene_and() {
75 std::cout << "========= BooleanArray: Kleene AND ======================= ";
describe (pd_test_2_all.cpp:19793)
19783 ++g_fail;
19784 }
19785}
19786
19787static bool approx_eq(double a, double b, double tol = 1e-9) {
19788 if (std::isnan(a) && std::isnan(b)) return true;
19789 return std::abs(a - b) < tol;
19790}
19791
19792// =====================================================================
19793// Test: describe() default mode — numeric columns only
19794// =====================================================================
19795
19796void pd_test_describe_numeric_only() {
19797 std::cout << " -- pd_test_describe_numeric_only --" << std::endl;
19798
19799 pandas::DataFrame df;
19800 df.add_column("A", std::vector<double>{1.0, 2.0, 3.0, 4.0, 5.0});
19801 df.add_column("B", std::vector<double>{10.0, 20.0, 30.0, 40.0, 50.0});
19802 df.add_column("Name", std::vector<std::string>{"a", "b", "c", "d", "e"});
max (pd_test_1_all.cpp:771)
761 pandas::CategoricalArray arr = pandas::CategoricalArray::from_codes(codes, cats, true); // ordered
762
763 // Test min
764 std::optional<std::string> min_val = arr.min();
765 if (!min_val.has_value() || *min_val != "low") {
766 std::cout << " [FAIL] : in pd_test_categorical_array_ordered_operations() : min != 'low'" << std::endl;
767 throw std::runtime_error("pd_test_categorical_array_ordered_operations failed: min != 'low'");
768 }
769
770 // Test max
771 std::optional<std::string> max_val = arr.max();
772 if (!max_val.has_value() || *max_val != "high") {
773 std::cout << " [FAIL] : in pd_test_categorical_array_ordered_operations() : max != 'high'" << std::endl;
774 throw std::runtime_error("pd_test_categorical_array_ordered_operations failed: max != 'high'");
775 }
776
777 // Test unordered throws for min/max
778 pandas::CategoricalArray unordered = arr.as_unordered();
779 bool threw = false;
780 try {
781 unordered.min();
min (pd_test_1_all.cpp:764)
754 }
755
756 void pd_test_categorical_array_ordered_operations() {
757 std::cout << "========= CategoricalArray: ordered operations (min/max) ======================= ";
758
759 std::vector<std::string> cats = {"low", "medium", "high"};
760 std::vector<numpy::int32> codes = {0, 2, 1, 0, -1}; // low, high, medium, low, NA
761 pandas::CategoricalArray arr = pandas::CategoricalArray::from_codes(codes, cats, true); // ordered
762
763 // Test min
764 std::optional<std::string> min_val = arr.min();
765 if (!min_val.has_value() || *min_val != "low") {
766 std::cout << " [FAIL] : in pd_test_categorical_array_ordered_operations() : min != 'low'" << std::endl;
767 throw std::runtime_error("pd_test_categorical_array_ordered_operations failed: min != 'low'");
768 }
769
770 // Test max
771 std::optional<std::string> max_val = arr.max();
772 if (!max_val.has_value() || *max_val != "high") {
773 std::cout << " [FAIL] : in pd_test_categorical_array_ordered_operations() : max != 'high'" << std::endl;
774 throw std::runtime_error("pd_test_categorical_array_ordered_operations failed: max != 'high'");
value_counts (pd_test_1_all.cpp:865)
855 std::vector<std::optional<std::string>> values = {
856 std::optional<std::string>("a"),
857 std::optional<std::string>("b"),
858 std::optional<std::string>("a"),
859 std::optional<std::string>("a"),
860 std::optional<std::string>("b"),
861 std::nullopt // NA not counted
862 };
863 pandas::CategoricalArray arr(values);
864
865 auto [cats, counts] = arr.value_counts();
866
867 // Should have 2 categories
868 if (cats.size() != 2 || counts.size() != 2) {
869 std::cout << " [FAIL] : in pd_test_categorical_array_value_counts() : wrong size" << std::endl;
870 throw std::runtime_error("pd_test_categorical_array_value_counts failed: wrong size");
871 }
872
873 // Find 'a' count
874 int64_t a_count = 0, b_count = 0;
875 for (size_t i = 0; i < cats.size(); ++i) {
map (pd_test_1_all.cpp:5839)
5829// Map Tests
5830// ============================================================================
5831
5832void pd_test_categorical_index_map() {
5833 std::cout << "========= map =========================================";
5834
5835 pandas::CategoricalArray arr({"yes", "no", "yes"});
5836 pandas::CategoricalIndex idx(arr);
5837
5838 std::unordered_map<std::string, std::string> mapping = {{"yes", "1"}, {"no", "0"}};
5839 pandas::CategoricalIndex mapped = idx.map(mapping);
5840
5841 bool passed = (mapped.has_category("1") && mapped.has_category("0") &&
5842 !mapped.has_category("yes") && !mapped.has_category("no"));
5843 if (!passed) {
5844 std::cout << " [FAIL] : in pd_test_categorical_index_map()" << std::endl;
5845 throw std::runtime_error("pd_test_categorical_index_map failed");
5846 }
5847
5848 std::cout << " -> tests passed" << std::endl;
5849}
map (pd_test_1_all.cpp:5839)
5829// Map Tests
5830// ============================================================================
5831
5832void pd_test_categorical_index_map() {
5833 std::cout << "========= map =========================================";
5834
5835 pandas::CategoricalArray arr({"yes", "no", "yes"});
5836 pandas::CategoricalIndex idx(arr);
5837
5838 std::unordered_map<std::string, std::string> mapping = {{"yes", "1"}, {"no", "0"}};
5839 pandas::CategoricalIndex mapped = idx.map(mapping);
5840
5841 bool passed = (mapped.has_category("1") && mapped.has_category("0") &&
5842 !mapped.has_category("yes") && !mapped.has_category("no"));
5843 if (!passed) {
5844 std::cout << " [FAIL] : in pd_test_categorical_index_map()" << std::endl;
5845 throw std::runtime_error("pd_test_categorical_index_map failed");
5846 }
5847
5848 std::cout << " -> tests passed" << std::endl;
5849}
add_categories (pd_test_1_all.cpp:555)
545 }
546
547 void pd_test_categorical_array_add_categories() {
548 std::cout << "========= CategoricalArray: add_categories ======================= ";
549
550 std::vector<std::string> cats = {"a", "b"};
551 std::vector<numpy::int32> codes = {0, 1, 0};
552 pandas::CategoricalArray arr = pandas::CategoricalArray::from_codes(codes, cats);
553
554 // Add new categories
555 pandas::CategoricalArray result = arr.add_categories({"c", "d"});
556 if (result.categories().size() != 4) {
557 std::cout << " [FAIL] : in pd_test_categorical_array_add_categories() : new categories size != 4" << std::endl;
558 throw std::runtime_error("pd_test_categorical_array_add_categories failed: new categories size != 4");
559 }
560
561 // Original values should be preserved
562 std::optional<std::string> val = result[0];
563 if (!val.has_value() || *val != "a") {
564 std::cout << " [FAIL] : in pd_test_categorical_array_add_categories() : value not preserved" << std::endl;
565 throw std::runtime_error("pd_test_categorical_array_add_categories failed: value not preserved");
equals (pd_test_1_all.cpp:5866)
5856 std::cout << "========= equals ======================================";
5857
5858 pandas::CategoricalArray arr1({"a", "b", "a"});
5859 pandas::CategoricalArray arr2({"a", "b", "a"});
5860 pandas::CategoricalArray arr3({"a", "b", "c"});
5861
5862 pandas::CategoricalIndex idx1(arr1);
5863 pandas::CategoricalIndex idx2(arr2);
5864 pandas::CategoricalIndex idx3(arr3);
5865
5866 bool passed = (idx1.equals(idx2) && !idx1.equals(idx3));
5867 if (!passed) {
5868 std::cout << " [FAIL] : in pd_test_categorical_index_equals()" << std::endl;
5869 throw std::runtime_error("pd_test_categorical_index_equals failed");
5870 }
5871
5872 std::cout << " -> tests passed" << std::endl;
5873}
5874
5875void pd_test_categorical_index_identical() {
5876 std::cout << "========= identical ===================================";
len (pd_test_3_all.cpp:20867)
20857 auto title_result = s.str().title();
20858 if (title_result[0] != "Hello World" || title_result[1] != "Hello World" || title_result[2] != "Hello World") {
20859 std::cout << " [FAIL] : title() failed" << std::endl;
20860 throw std::runtime_error("pd_test_str_capitalize_title: title() failed");
20861 }
20862
20863 std::cout << " -> tests passed" << std::endl;
20864}
20865
20866// ============================================================================
20867// Test str().len()
20868// ============================================================================
20869
20870void pd_test_str_len() {
20871 std::cout << "========= Series.str().len() ============================";
20872
20873 pandas::Series<std::string> s({"a", "bb", "ccc", ""});
20874
20875 auto lens = s.str().len();
20876 if (lens[0] != 1 || lens[1] != 2 || lens[2] != 3 || lens[3] != 0) {
20877 std::cout << " [FAIL] : len() failed" << std::endl;
argsort (pd_test_1_all.cpp:1304)
1294 std::cout << "========= DatetimeArray: sorting ======================= ";
1295
1296 pandas::DatetimeArray arr(std::vector<std::string>{
1297 "2023-06-15",
1298 "NaT",
1299 "2023-01-01",
1300 "2023-12-31"
1301 });
1302
1303 // argsort ascending
1304 auto indices = arr.argsort(true, "last");
1305 // Expected order: 2023-01-01(2), 2023-06-15(0), 2023-12-31(3), NaT(1)
1306 if (indices.getElementAt({0}) != 2) {
1307 std::cout << " [FAIL] : argsort: first should be index 2 (2023-01-01)" << std::endl;
1308 throw std::runtime_error("pd_test_datetime_array_sorting failed: argsort first");
1309 }
1310 if (indices.getElementAt({3}) != 1) {
1311 std::cout << " [FAIL] : argsort: last should be index 1 (NaT)" << std::endl;
1312 throw std::runtime_error("pd_test_datetime_array_sorting failed: NaT position");
1313 }
searchsorted (pd_test_1_all.cpp:18958)
18948 // =========================================================================
18949 // Search Tests
18950 // =========================================================================
18951
18952 void pd_test_range_index_searchsorted() {
18953 std::cout << "========= searchsorted ================================ ";
18954
18955 pandas::RangeIndex ri(0, 10, 2); // [0, 2, 4, 6, 8]
18956
18957 bool passed = (ri.searchsorted(4, "left") == 2 &&
18958 ri.searchsorted(4, "right") == 3 &&
18959 ri.searchsorted(3, "left") == 2 && // 3 would go between 2 and 4
18960 ri.searchsorted(-1, "left") == 0 && // Before all
18961 ri.searchsorted(10, "left") == 5); // After all
18962
18963 if (!passed) {
18964 std::cout << " [FAIL] : searchsorted" << std::endl;
18965 throw std::runtime_error("pd_test_range_index_searchsorted failed");
18966 }
sort_values (pd_test_1_all.cpp:6408)
6398 void pd_test_dataframe_sorting() {
6399 std::cout << "========= sorting ==========================";
6400
6401 std::map<std::string, std::vector<numpy::float64>> data;
6402 data["A"] = {3.0, 1.0, 4.0, 1.0, 5.0};
6403 data["B"] = {9.0, 2.0, 6.0, 5.0, 3.0};
6404
6405 pandas::DataFrame df(data);
6406
6407 // Test sort_values ascending
6408 auto sorted_asc = df.sort_values("A", true);
6409 // First value should be smallest (1.0)
6410 std::string first_val = sorted_asc["A"].get_value_str(0);
6411 if (std::stod(first_val) != 1.0) {
6412 std::cout << " [FAIL] : in pd_test_dataframe_sorting() : sort_values asc first != 1" << std::endl;
6413 throw std::runtime_error("pd_test_dataframe_sorting failed: sort_values asc first != 1");
6414 }
6415
6416 // Test sort_values descending
6417 auto sorted_desc = df.sort_values("A", false);
6418 first_val = sorted_desc["A"].get_value_str(0);
T (pd_test_1_all.cpp:128)
118 throw std::runtime_error("pd_test_boolean_array_kleene_and failed: NA & F");
119 }
120
121 std::cout << " -> tests passed" << std::endl;
122 }
123
124 void pd_test_boolean_array_kleene_or() {
125 std::cout << "========= BooleanArray: Kleene OR ======================= ";
126
127 // Kleene OR truth table:
128 // T | T = T, T | F = T, T | NA = T (True dominates)
129 // F | T = T, F | F = F, F | NA = NA
130 // NA | T = T, NA | F = NA, NA | NA = NA
131
132 pandas::BooleanArray t({std::optional<bool>(true)});
133 pandas::BooleanArray f({std::optional<bool>(false)});
134 pandas::BooleanArray na({std::nullopt});
135
136 // T | NA = T (True dominates)
137 auto tna = (t | na);
138 if (!tna[0].has_value() || !tna[0].value()) {
swapaxes (pd_test_3_all.cpp:2276)
2266 auto sorted_desc = arr.sort_values(false, "last");
2267 if (*sorted_desc[0] != "c" || *sorted_desc[1] != "b" ||
2268 *sorted_desc[2] != "a" || sorted_desc[3].has_value()) {
2269 throw std::runtime_error("sort_values descending failed");
2270 }
2271
2272 std::cout << " -> tests passed" << std::endl;
2273}
2274
2275void pd_test_3_all_categorical_swapaxes() {
2276 std::cout << "========= CategoricalArray.swapaxes() =================";
2277
2278 std::vector<std::optional<std::string>> values = {"a", "b", "c"};
2279 pandas::CategoricalArray arr(values);
2280
2281 auto result = arr.swapaxes(0, 0);
2282 if (result.size() != 3) {
2283 throw std::runtime_error("swapaxes failed");
2284 }
2285
2286 bool threw = false;
transpose (pd_test_1_all.cpp:16648)
16638 std::cout << " [FAIL] : in pd_test_ndframe_transpose() : T_() size" << std::endl;
16639 throw std::runtime_error("pd_test_ndframe_transpose failed: T_() size");
16640 }
16641
16642 passed = transposed[0] == 1 && transposed[1] == 2 && transposed[2] == 3;
16643 if (!passed) {
16644 std::cout << " [FAIL] : in pd_test_ndframe_transpose() : T_() values" << std::endl;
16645 throw std::runtime_error("pd_test_ndframe_transpose failed: T_() values");
16646 }
16647
16648 // Test transpose() alias
16649 auto transposed2 = s.transpose();
16650 passed = transposed2.size() == s.size();
16651 if (!passed) {
16652 std::cout << " [FAIL] : in pd_test_ndframe_transpose() : transpose() size" << std::endl;
16653 throw std::runtime_error("pd_test_ndframe_transpose failed: transpose() size");
16654 }
16655
16656 std::cout << " -> tests passed" << std::endl;
16657 }
concat (pd_test_1_all.cpp:17717)
17707}
17708
17709void pd_test_period_index_concat() {
17710 std::cout << "========= concat factory ==============================";
17711
17712 std::vector<int64_t> ordinals1 = {0, 1};
17713 std::vector<int64_t> ordinals2 = {2, 3};
17714 pandas::PeriodIndex idx1(ordinals1, "D");
17715 pandas::PeriodIndex idx2(ordinals2, "D");
17716
17717 pandas::PeriodIndex concatenated = pandas::PeriodIndex::concat({idx1, idx2});
17718
17719 bool passed = (concatenated.size() == 4);
17720 if (!passed) {
17721 std::cout << " [FAIL] : in pd_test_period_index_concat()" << std::endl;
17722 throw std::runtime_error("pd_test_period_index_concat failed");
17723 }
17724
17725 std::cout << " -> tests passed" << std::endl;
17726}
concat_merge (pd_test_3_all.cpp:26636)
26626 const auto& mi = result.multiindex();
26627 if (mi.get_level_values_str(0)[0] != "x") throw std::runtime_error("Key 'x' not found at level 0");
26628 if (mi.get_level_values_str(0)[2] != "y") throw std::runtime_error("Key 'y' not found at level 0");
26629 std::cout << " -> test passed" << std::endl;
26630}
26631
26632void test_categorical_array_concat_merge() {
26633 std::cout << " test_categorical_array_concat_merge" << std::endl;
26634 auto cat1 = CategoricalArray::from_codes({0, 1}, {"a", "b"});
26635 auto cat2 = CategoricalArray::from_codes({0, 1}, {"b", "c"});
26636 auto result = CategoricalArray::concat_merge({cat1, cat2});
26637 if (result.categories().size() != 3) throw std::runtime_error("Expected 3 merged categories");
26638 if (result.categories()[0] != "a" || result.categories()[1] != "b" || result.categories()[2] != "c")
26639 throw std::runtime_error("Merged categories wrong");
26640 if (result.size() != 4) throw std::runtime_error("Expected 4 elements");
26641 std::cout << " -> test passed" << std::endl;
26642}
26643
26644int pd_test_concat_ext_main() {
26645 try {
26646 std::cout << "========= concat extension tests =========" << std::endl;
shift (pd_test_1_all.cpp:5188)
5178 // First element should be NaN
5179 val = d["A"].get_value_str(0);
5180 passed = std::isnan(std::stod(val));
5181 if (!passed) {
5182 std::cout << " [FAIL] : in pd_test_arithmetic_dataframe_diff_shift() : diff NaN failed" << std::endl;
5183 throw std::runtime_error("pd_test_arithmetic_dataframe_diff_shift failed: diff NaN failed");
5184 }
5185
5186 // shift: [NaN, 1, 3, 6]
5187 auto s = df.shift();
5188 val = s["A"].get_value_str(1);
5189 passed = std::abs(std::stod(val) - 1.0) < 0.001;
5190 if (!passed) {
5191 std::cout << " [FAIL] : in pd_test_arithmetic_dataframe_diff_shift() : shift failed" << std::endl;
5192 throw std::runtime_error("pd_test_arithmetic_dataframe_diff_shift failed: shift failed");
5193 }
5194
5195 std::cout << " -> tests passed" << std::endl;
5196 }
to_list (pd_test_1_all.cpp:10247)
10237 std::cout << " -> tests passed" << std::endl;
10238}
10239
10240void pd_test_extension_index_to_list() {
10241 std::cout << "========= to_list =========================";
10242
10243 pandas::CategoricalArray arr({"x", "y", "z"});
10244 pandas::CategoricalIndex idx(arr);
10245
10246 auto list = idx.to_list();
10247
10248 bool passed = (list.size() == 3 &&
10249 list[0].has_value() && *list[0] == "x" &&
10250 list[1].has_value() && *list[1] == "y" &&
10251 list[2].has_value() && *list[2] == "z");
10252 if (!passed) {
10253 std::cout << " [FAIL] : in pd_test_extension_index_to_list() : to_list check failed" << std::endl;
10254 throw std::runtime_error("pd_test_extension_index_to_list failed");
10255 }
to_numpy (pd_test_1_all.cpp:16764)
16754 // =====================================================================
16755 // to_numpy Tests
16756 // =====================================================================
16757
16758 void pd_test_ndframe_to_numpy() {
16759 std::cout << "========= to_numpy =============================================" << std::endl;
16760
16761 pandas::Series<int> s({10, 20, 30});
16762
16763 auto arr = s.to_numpy();
16764
16765 bool passed = arr.getSize() == 3;
16766 if (!passed) {
16767 std::cout << " [FAIL] : in pd_test_ndframe_to_numpy() : size" << std::endl;
16768 throw std::runtime_error("pd_test_ndframe_to_numpy failed: size");
16769 }
16770
16771 passed = arr.getElementAt({0}) == 10 && arr.getElementAt({1}) == 20 && arr.getElementAt({2}) == 30;
16772 if (!passed) {
16773 std::cout << " [FAIL] : in pd_test_ndframe_to_numpy() : values" << std::endl;
to_string (pd_test_1_all.cpp:2693)
2683 pandas::PeriodArray arr_m(std::vector<std::string>{
2684 "2020-01",
2685 "NaT",
2686 "2025-06"
2687 }, "M");
2688
2689 // Year
2690 auto years = arr_m.year();
2691 auto y0 = years[0];
2692 if (!y0.has_value() || y0.value() != 2020) {
2693 std::cout << " [FAIL] : year[0] should be 2020, got " << (y0.has_value() ? std::to_string(y0.value()) : "NA") << std::endl;
2694 throw std::runtime_error("pd_test_period_array_year_month_quarter failed: year[0]");
2695 }
2696
2697 auto y1 = years[1];
2698 if (y1.has_value()) {
2699 std::cout << " [FAIL] : year[1] should be NA (NaT)" << std::endl;
2700 throw std::runtime_error("pd_test_period_array_year_month_quarter failed: year[1] should be NA");
2701 }
2702
2703 auto y2 = years[2];
tolist (pd_test_3_all.cpp:2300)
2290 threw = true;
2291 }
2292 if (!threw) {
2293 throw std::runtime_error("swapaxes should throw for invalid axes");
2294 }
2295
2296 std::cout << " -> tests passed" << std::endl;
2297}
2298
2299void pd_test_3_all_categorical_to_list() {
2300 std::cout << "========= CategoricalArray.to_list()/tolist() =========";
2301
2302 std::vector<std::optional<std::string>> values = {"a", "b", std::nullopt, "c"};
2303 pandas::CategoricalArray arr(values);
2304
2305 auto list = arr.to_list();
2306 if (list.size() != 4 || *list[0] != "a" || *list[1] != "b" ||
2307 list[2].has_value() || *list[3] != "c") {
2308 throw std::runtime_error("to_list failed");
2309 }
astype (pd_test_1_all.cpp:21292)
21282 std::cout << "========= astype all columns to float64 =============";
21283
21284 // Create DataFrame with int64 columns
21285 std::map<std::string, std::vector<numpy::int64>> data;
21286 data["A"] = {1, 2, 3, 4, 5};
21287 data["B"] = {10, 20, 30, 40, 50};
21288
21289 pandas::DataFrame df(data);
21290
21291 // Convert all columns to float64
21292 pandas::DataFrame df_float = df.astype("float64");
21293
21294 // Verify dtype changed
21295 pandas::Series<std::string> dtypes = df_float.dtypes();
21296
21297 bool passed = true;
21298 if (dtypes[static_cast<size_t>(0)] != "float64") {
21299 std::cout << " [FAIL] : in pd_test_astype_all_columns_to_float64() : column A dtype is " << dtypes[static_cast<size_t>(0)] << ", expected float64" << std::endl;
21300 passed = false;
21301 }
21302 if (dtypes[static_cast<size_t>(1)] != "float64") {
astype_codes (pd_test_3_all.cpp:1822)
1812 std::cout << "========= CategoricalArray.astype() ==================";
1813
1814 std::vector<std::optional<std::string>> values = {"a", "b", "c", "a", std::nullopt};
1815 pandas::CategoricalArray arr(values);
1816
1817 auto str_result = arr.astype<std::string>("str");
1818 if (str_result.size() != 5 || !str_result[0].has_value() || *str_result[0] != "a" || str_result[4].has_value()) {
1819 throw std::runtime_error("astype failed");
1820 }
1821
1822 auto codes = arr.astype_codes();
1823 if (codes.getSize() != 5) {
1824 throw std::runtime_error("astype_codes failed");
1825 }
1826
1827 std::cout << " -> tests passed" << std::endl;
1828}
1829
1830void pd_test_3_all_categorical_check_ordered() {
1831 std::cout << "========= CategoricalArray.check_for_ordered() ========";
copy (pd_test_1_all.cpp:5798)
5788// ============================================================================
5789// Copy/Rename Tests
5790// ============================================================================
5791
5792void pd_test_categorical_index_copy() {
5793 std::cout << "========= copy ========================================";
5794
5795 pandas::CategoricalArray arr({"a", "b", "c"});
5796 pandas::CategoricalIndex idx(arr, "original");
5797
5798 pandas::CategoricalIndex copied = idx.copy();
5799
5800 bool passed = (copied.size() == idx.size() && copied.name() == idx.name() &&
5801 copied.categories() == idx.categories() && copied.ordered() == idx.ordered());
5802 if (!passed) {
5803 std::cout << " [FAIL] : in pd_test_categorical_index_copy()" << std::endl;
5804 throw std::runtime_error("pd_test_categorical_index_copy failed");
5805 }
5806
5807 std::cout << " -> tests passed" << std::endl;
5808}
view (pd_test_3_all.cpp:2147)
2137 throw std::runtime_error("memory_usage shallow too small");
2138 }
2139 if (deep < shallow) {
2140 throw std::runtime_error("memory_usage deep should be >= shallow");
2141 }
2142
2143 std::cout << " -> tests passed" << std::endl;
2144}
2145
2146void pd_test_3_all_categorical_ravel_view() {
2147 std::cout << "========= CategoricalArray.ravel()/view() =============";
2148
2149 std::vector<std::optional<std::string>> values = {"a", "b", "c"};
2150 pandas::CategoricalArray arr(values);
2151
2152 auto raveled = arr.ravel();
2153 if (raveled.size() != 3 || !raveled.equals(arr)) {
2154 throw std::runtime_error("ravel failed");
2155 }
2156
2157 auto viewed = arr.view();
duplicated (pd_test_1_all.cpp:10583)
10573 std::cout << " -> tests passed" << std::endl;
10574}
10575
10576void pd_test_extension_index_duplicated() {
10577 std::cout << "========= duplicated =========================";
10578
10579 pandas::CategoricalArray arr({"a", "b", "a", "c", "a"});
10580 pandas::CategoricalIndex idx(arr);
10581
10582 auto dup_mask = idx.duplicated("first");
10583
10584 bool passed = (dup_mask.getElementAt({0}) == false &&
10585 dup_mask.getElementAt({1}) == false &&
10586 dup_mask.getElementAt({2}) == true &&
10587 dup_mask.getElementAt({3}) == false &&
10588 dup_mask.getElementAt({4}) == true);
10589 if (!passed) {
10590 std::cout << " [FAIL] : in pd_test_extension_index_duplicated() : duplicated check failed" << std::endl;
10591 throw std::runtime_error("pd_test_extension_index_duplicated failed");
10592 }
isin (pd_test_1_all.cpp:5938)
5928 std::cout << " -> tests passed" << std::endl;
5929}
5930
5931void pd_test_categorical_index_isin() {
5932 std::cout << "========= inherited isin ==============================";
5933
5934 pandas::CategoricalArray arr({"a", "b", "c", "d"});
5935 pandas::CategoricalIndex idx(arr);
5936
5937 std::vector<std::string> values = {"a", "c"};
5938 numpy::NDArray<numpy::bool_> mask = idx.isin(values);
5939
5940 bool passed = (mask.getSize() == 4 &&
5941 mask.getElementAt({0}) == true && // a
5942 mask.getElementAt({1}) == false && // b
5943 mask.getElementAt({2}) == true && // c
5944 mask.getElementAt({3}) == false); // d
5945 if (!passed) {
5946 std::cout << " [FAIL] : in pd_test_categorical_index_isin()" << std::endl;
5947 throw std::runtime_error("pd_test_categorical_index_isin failed");
5948 }
unique (pd_test_1_all.cpp:1345)
1335 pandas::DatetimeArray arr(std::vector<std::string>{
1336 "2023-01-01",
1337 "2023-06-15",
1338 "2023-01-01",
1339 "NaT",
1340 "2023-06-15",
1341 "NaT"
1342 });
1343
1344 // unique
1345 auto uniq = arr.unique();
1346 // Should have: NaT, 2023-01-01, 2023-06-15 (3 unique values)
1347 if (uniq.size() != 3) {
1348 std::cout << " [FAIL] : unique size should be 3, got " << uniq.size() << std::endl;
1349 throw std::runtime_error("pd_test_datetime_array_unique failed: size");
1350 }
1351
1352 // factorize
1353 auto [codes, uniques] = arr.factorize();
1354 // Codes for NaT should be -1
1355 if (codes.getElementAt({3}) != -1) {
is_na (pd_test_1_all.cpp:51)
41 void pd_test_boolean_array_na_handling() {
42 std::cout << "========= BooleanArray: NA handling ======================= ";
43
44 pandas::BooleanArray arr({
45 std::optional<bool>(true),
46 std::nullopt, // NA at index 1
47 std::optional<bool>(false)
48 });
49
50 if (!arr.is_na(1)) {
51 std::cout << " [FAIL] : in pd_test_boolean_array_na_handling() : is_na(1) should be true" << std::endl;
52 throw std::runtime_error("pd_test_boolean_array_na_handling failed: is_na(1) should be true");
53 }
54
55 if (arr.is_na(0)) {
56 std::cout << " [FAIL] : in pd_test_boolean_array_na_handling() : is_na(0) should be false" << std::endl;
57 throw std::runtime_error("pd_test_boolean_array_na_handling failed: is_na(0) should be false");
58 }
59
60 if (!arr.has_na()) {
argmax (pd_test_1_all.cpp:1323)
1313 }
1314
1315 // argmin
1316 auto min_idx = arr.argmin();
1317 if (!min_idx.has_value() || min_idx.value() != 2) {
1318 std::cout << " [FAIL] : argmin should be 2 (2023-01-01)" << std::endl;
1319 throw std::runtime_error("pd_test_datetime_array_sorting failed: argmin");
1320 }
1321
1322 // argmax
1323 auto max_idx = arr.argmax();
1324 if (!max_idx.has_value() || max_idx.value() != 3) {
1325 std::cout << " [FAIL] : argmax should be 3 (2023-12-31)" << std::endl;
1326 throw std::runtime_error("pd_test_datetime_array_sorting failed: argmax");
1327 }
1328
1329 std::cout << " -> tests passed" << std::endl;
1330 }
1331
1332 void pd_test_datetime_array_unique() {
1333 std::cout << "========= DatetimeArray: unique/factorize ======================= ";
argmin (pd_test_1_all.cpp:1316)
1306 if (indices.getElementAt({0}) != 2) {
1307 std::cout << " [FAIL] : argsort: first should be index 2 (2023-01-01)" << std::endl;
1308 throw std::runtime_error("pd_test_datetime_array_sorting failed: argsort first");
1309 }
1310 if (indices.getElementAt({3}) != 1) {
1311 std::cout << " [FAIL] : argsort: last should be index 1 (NaT)" << std::endl;
1312 throw std::runtime_error("pd_test_datetime_array_sorting failed: NaT position");
1313 }
1314
1315 // argmin
1316 auto min_idx = arr.argmin();
1317 if (!min_idx.has_value() || min_idx.value() != 2) {
1318 std::cout << " [FAIL] : argmin should be 2 (2023-01-01)" << std::endl;
1319 throw std::runtime_error("pd_test_datetime_array_sorting failed: argmin");
1320 }
1321
1322 // argmax
1323 auto max_idx = arr.argmax();
1324 if (!max_idx.has_value() || max_idx.value() != 3) {
1325 std::cout << " [FAIL] : argmax should be 3 (2023-12-31)" << std::endl;
1326 throw std::runtime_error("pd_test_datetime_array_sorting failed: argmax");
as_ordered (pd_test_1_all.cpp:791)
781 unordered.min();
782 } catch (const std::exception&) {
783 threw = true;
784 }
785 if (!threw) {
786 std::cout << " [FAIL] : in pd_test_categorical_array_ordered_operations() : unordered min should throw" << std::endl;
787 throw std::runtime_error("pd_test_categorical_array_ordered_operations failed: unordered min should throw");
788 }
789
790 // Test as_ordered / as_unordered
791 pandas::CategoricalArray reordered = unordered.as_ordered();
792 if (!reordered.ordered()) {
793 std::cout << " [FAIL] : in pd_test_categorical_array_ordered_operations() : as_ordered failed" << std::endl;
794 throw std::runtime_error("pd_test_categorical_array_ordered_operations failed: as_ordered failed");
795 }
796
797 std::cout << " -> tests passed" << std::endl;
798 }
799
800 void pd_test_categorical_array_comparisons() {
801 std::cout << "========= CategoricalArray: comparisons ======================= ";
as_unordered (pd_test_1_all.cpp:778)
768 }
769
770 // Test max
771 std::optional<std::string> max_val = arr.max();
772 if (!max_val.has_value() || *max_val != "high") {
773 std::cout << " [FAIL] : in pd_test_categorical_array_ordered_operations() : max != 'high'" << std::endl;
774 throw std::runtime_error("pd_test_categorical_array_ordered_operations failed: max != 'high'");
775 }
776
777 // Test unordered throws for min/max
778 pandas::CategoricalArray unordered = arr.as_unordered();
779 bool threw = false;
780 try {
781 unordered.min();
782 } catch (const std::exception&) {
783 threw = true;
784 }
785 if (!threw) {
786 std::cout << " [FAIL] : in pd_test_categorical_array_ordered_operations() : unordered min should throw" << std::endl;
787 throw std::runtime_error("pd_test_categorical_array_ordered_operations failed: unordered min should throw");
788 }
categories (pd_test_1_all.cpp:389)
379 std::vector<std::optional<std::string>> vals = {
380 std::optional<std::string>("low"),
381 std::optional<std::string>("high"),
382 std::optional<std::string>("medium")
383 };
384 pandas::CategoricalArray arr3(vals, cats, true); // ordered
385 if (!arr3.ordered()) {
386 std::cout << " [FAIL] : in pd_test_categorical_array_constructors() : should be ordered" << std::endl;
387 throw std::runtime_error("pd_test_categorical_array_constructors failed: should be ordered");
388 }
389 if (arr3.categories().size() != 3) {
390 std::cout << " [FAIL] : in pd_test_categorical_array_constructors() : categories size != 3" << std::endl;
391 throw std::runtime_error("pd_test_categorical_array_constructors failed: categories size != 3");
392 }
393
394 std::cout << " -> tests passed" << std::endl;
395 }
396
397 void pd_test_categorical_array_from_codes() {
398 std::cout << "========= CategoricalArray: from_codes ======================= ";
check_for_ordered (pd_test_3_all.cpp:1831)
1821 auto codes = arr.astype_codes();
1822 if (codes.getSize() != 5) {
1823 throw std::runtime_error("astype_codes failed");
1824 }
1825
1826 std::cout << " -> tests passed" << std::endl;
1827}
1828
1829void pd_test_3_all_categorical_check_ordered() {
1830 std::cout << "========= CategoricalArray.check_for_ordered() ========";
1831
1832 std::vector<std::optional<std::string>> values = {"a", "b", "c"};
1833 pandas::CategoricalArray unordered_arr(values, false);
1834 pandas::CategoricalArray ordered_arr(values, {"a", "b", "c"}, true);
1835
1836 bool threw = false;
1837 try {
1838 unordered_arr.check_for_ordered("test_op");
1839 } catch (const std::exception&) {
1840 threw = true;
check_for_ordered (pd_test_3_all.cpp:1831)
1821 auto codes = arr.astype_codes();
1822 if (codes.getSize() != 5) {
1823 throw std::runtime_error("astype_codes failed");
1824 }
1825
1826 std::cout << " -> tests passed" << std::endl;
1827}
1828
1829void pd_test_3_all_categorical_check_ordered() {
1830 std::cout << "========= CategoricalArray.check_for_ordered() ========";
1831
1832 std::vector<std::optional<std::string>> values = {"a", "b", "c"};
1833 pandas::CategoricalArray unordered_arr(values, false);
1834 pandas::CategoricalArray ordered_arr(values, {"a", "b", "c"}, true);
1835
1836 bool threw = false;
1837 try {
1838 unordered_arr.check_for_ordered("test_op");
1839 } catch (const std::exception&) {
1840 threw = true;
check_for_ordered (pd_test_3_all.cpp:1831)
1821 auto codes = arr.astype_codes();
1822 if (codes.getSize() != 5) {
1823 throw std::runtime_error("astype_codes failed");
1824 }
1825
1826 std::cout << " -> tests passed" << std::endl;
1827}
1828
1829void pd_test_3_all_categorical_check_ordered() {
1830 std::cout << "========= CategoricalArray.check_for_ordered() ========";
1831
1832 std::vector<std::optional<std::string>> values = {"a", "b", "c"};
1833 pandas::CategoricalArray unordered_arr(values, false);
1834 pandas::CategoricalArray ordered_arr(values, {"a", "b", "c"}, true);
1835
1836 bool threw = false;
1837 try {
1838 unordered_arr.check_for_ordered("test_op");
1839 } catch (const std::exception&) {
1840 threw = true;
codes (pd_test_1_all.cpp:473)
463 std::cout << " -> tests passed" << std::endl;
464 }
465
466 void pd_test_categorical_array_codes_property() {
467 std::cout << "========= CategoricalArray: codes property ======================= ";
468
469 std::vector<std::string> cats = {"x", "y", "z"};
470 std::vector<numpy::int32> codes = {0, 1, 2, 1, 0};
471 pandas::CategoricalArray arr = pandas::CategoricalArray::from_codes(codes, cats);
472
473 numpy::NDArray<numpy::int32> arr_codes = arr.codes();
474 if (arr_codes.getSize() != 5) {
475 std::cout << " [FAIL] : in pd_test_categorical_array_codes_property() : codes size != 5" << std::endl;
476 throw std::runtime_error("pd_test_categorical_array_codes_property failed: codes size != 5");
477 }
478
479 // Check codes match
480 for (size_t i = 0; i < codes.size(); ++i) {
481 if (arr_codes.getElementAt({i}) != codes[i]) {
482 std::cout << " [FAIL] : in pd_test_categorical_array_codes_property() : code mismatch at " << i << std::endl;
483 throw std::runtime_error("pd_test_categorical_array_codes_property failed: code mismatch");
delete_ (pd_test_1_all.cpp:10501)
10491 std::cout << " -> tests passed" << std::endl;
10492}
10493
10494void pd_test_extension_index_delete() {
10495 std::cout << "========= delete_ =========================";
10496
10497 pandas::CategoricalArray arr({"a", "b", "c", "d"});
10498 pandas::CategoricalIndex idx(arr);
10499
10500 auto deleted = idx.delete_(1);
10501 auto v0 = deleted[0];
10502 auto v1 = deleted[1];
10503 auto v2 = deleted[2];
10504
10505 bool passed = (deleted.size() == 3 &&
10506 v0.has_value() && *v0 == "a" &&
10507 v1.has_value() && *v1 == "c" &&
10508 v2.has_value() && *v2 == "d");
10509 if (!passed) {
10510 std::cout << " [FAIL] : in pd_test_extension_index_delete() : delete_ check failed" << std::endl;
delete_ (pd_test_1_all.cpp:10501)
10491 std::cout << " -> tests passed" << std::endl;
10492}
10493
10494void pd_test_extension_index_delete() {
10495 std::cout << "========= delete_ =========================";
10496
10497 pandas::CategoricalArray arr({"a", "b", "c", "d"});
10498 pandas::CategoricalIndex idx(arr);
10499
10500 auto deleted = idx.delete_(1);
10501 auto v0 = deleted[0];
10502 auto v1 = deleted[1];
10503 auto v2 = deleted[2];
10504
10505 bool passed = (deleted.size() == 3 &&
10506 v0.has_value() && *v0 == "a" &&
10507 v1.has_value() && *v1 == "c" &&
10508 v2.has_value() && *v2 == "d");
10509 if (!passed) {
10510 std::cout << " [FAIL] : in pd_test_extension_index_delete() : delete_ check failed" << std::endl;
dtype (pd_test_1_all.cpp:295)
285 throw std::runtime_error("pd_test_boolean_array_reductions failed: mean");
286 }
287
288 std::cout << " -> tests passed" << std::endl;
289 }
290
291 void pd_test_boolean_array_dtype() {
292 std::cout << "========= BooleanArray: dtype ======================= ";
293
294 pandas::BooleanArray arr;
295 if (arr.dtype().name() != "boolean") {
296 std::cout << " [FAIL] : in pd_test_boolean_array_dtype() : dtype name should be 'boolean'" << std::endl;
297 throw std::runtime_error("pd_test_boolean_array_dtype failed: dtype name");
298 }
299
300 if (arr.dtype().kind() != "b") {
301 std::cout << " [FAIL] : in pd_test_boolean_array_dtype() : dtype kind should be 'b'" << std::endl;
302 throw std::runtime_error("pd_test_boolean_array_dtype failed: dtype kind");
303 }
304
305 std::cout << " -> tests passed" << std::endl;
empty (pd_test_1_all.cpp:941)
931#include "../pandas/pd_config.h"
932
933namespace dataframe_tests {
934
935namespace dataframe_tests_config {
936
937 void pd_test_config_version() {
938 std::cout << "========= df_config: version info ======================= ";
939 const char* version = pandas::DataFrameInfo::version();
940 if (version == nullptr || std::string(version).empty()) {
941 std::cout << "[FAIL] : in pd_test_config_version() : version is null or empty" << std::endl;
942 throw std::runtime_error("pd_test_config_version failed: version is null or empty");
943 }
944 std::cout << "-> tests passed" << std::endl;
945 }
946
947 void pd_test_config_na_repr() {
948 std::cout << "========= df_config: NA representation ======================= ";
949 const char* na_repr = pandas::DataFrameConfig::get_na_repr();
950 if (na_repr == nullptr) {
factorize (pd_test_1_all.cpp:1353)
1343 // unique
1344 auto uniq = arr.unique();
1345 // Should have: NaT, 2023-01-01, 2023-06-15 (3 unique values)
1346 if (uniq.size() != 3) {
1347 std::cout << " [FAIL] : unique size should be 3, got " << uniq.size() << std::endl;
1348 throw std::runtime_error("pd_test_datetime_array_unique failed: size");
1349 }
1350
1351 // factorize
1352 auto [codes, uniques] = arr.factorize();
1353 // Codes for NaT should be -1
1354 if (codes.getElementAt({3}) != -1) {
1355 std::cout << " [FAIL] : factorize: NaT code should be -1" << std::endl;
1356 throw std::runtime_error("pd_test_datetime_array_unique failed: NaT code");
1357 }
1358 // Same values should have same codes
1359 if (codes.getElementAt({0}) != codes.getElementAt({2})) {
1360 std::cout << " [FAIL] : factorize: 2023-01-01 values should have same code" << std::endl;
1361 throw std::runtime_error("pd_test_datetime_array_unique failed: same code");
1362 }
has_na (pd_test_1_all.cpp:61)
51 if (!arr.is_na(1)) {
52 std::cout << " [FAIL] : in pd_test_boolean_array_na_handling() : is_na(1) should be true" << std::endl;
53 throw std::runtime_error("pd_test_boolean_array_na_handling failed: is_na(1) should be true");
54 }
55
56 if (arr.is_na(0)) {
57 std::cout << " [FAIL] : in pd_test_boolean_array_na_handling() : is_na(0) should be false" << std::endl;
58 throw std::runtime_error("pd_test_boolean_array_na_handling failed: is_na(0) should be false");
59 }
60
61 if (!arr.has_na()) {
62 std::cout << " [FAIL] : in pd_test_boolean_array_na_handling() : has_na() should be true" << std::endl;
63 throw std::runtime_error("pd_test_boolean_array_na_handling failed: has_na() should be true");
64 }
65
66 if (arr.count() != 2) {
67 std::cout << " [FAIL] : in pd_test_boolean_array_na_handling() : count() should be 2" << std::endl;
68 throw std::runtime_error("pd_test_boolean_array_na_handling failed: count() should be 2");
69 }
70
71 std::cout << " -> tests passed" << std::endl;
memory_usage (pd_test_1_all.cpp:27063)
27053 }
27054
27055 std::cout << "====================================== [OK] pd_test_value_counts test suite ========================== " << std::endl;
27056 return 0;
27057 }
27058
27059} // namespace dataframe_tests
27060// ------------------- pd_test_value_counts.cpp (end) -----------------------------
27061
27062// ------------------- pd_test_memory_usage.cpp (start) -----------------------------
27063// Tests for DataFrame.memory_usage() - pandas-compatible memory usage reporting
27064
27065namespace dataframe_tests {
27066 namespace dataframe_tests_memory_usage {
27067
27068 void pd_test_memory_usage_basic() {
27069 std::cout << "========= basic memory_usage =======================";
27070
27071 // Create a simple DataFrame with multiple columns
27072 std::map<std::string, std::vector<double>> data;
27073 data["A"] = {1.0, 2.0, 3.0, 4.0, 5.0};
name (pd_test_1_all.cpp:295)
285 throw std::runtime_error("pd_test_boolean_array_reductions failed: mean");
286 }
287
288 std::cout << " -> tests passed" << std::endl;
289 }
290
291 void pd_test_boolean_array_dtype() {
292 std::cout << "========= BooleanArray: dtype ======================= ";
293
294 pandas::BooleanArray arr;
295 if (arr.dtype().name() != "boolean") {
296 std::cout << " [FAIL] : in pd_test_boolean_array_dtype() : dtype name should be 'boolean'" << std::endl;
297 throw std::runtime_error("pd_test_boolean_array_dtype failed: dtype name");
298 }
299
300 if (arr.dtype().kind() != "b") {
301 std::cout << " [FAIL] : in pd_test_boolean_array_dtype() : dtype kind should be 'b'" << std::endl;
302 throw std::runtime_error("pd_test_boolean_array_dtype failed: dtype kind");
303 }
304
305 std::cout << " -> tests passed" << std::endl;
nbytes (pd_test_1_all.cpp:6214)
6204 }
6205
6206 // Test empty DataFrame
6207 pandas::DataFrame empty_df;
6208 if (!empty_df.empty()) {
6209 std::cout << " [FAIL] : in pd_test_dataframe_properties() : should be empty" << std::endl;
6210 throw std::runtime_error("pd_test_dataframe_properties failed: should be empty");
6211 }
6212
6213 // Test nbytes > 0 for non-empty
6214 if (df.nbytes() == 0) {
6215 std::cout << " [FAIL] : in pd_test_dataframe_properties() : nbytes should be > 0" << std::endl;
6216 throw std::runtime_error("pd_test_dataframe_properties failed: nbytes should be > 0");
6217 }
6218
6219 // Test columns index
6220 if (df.columns().size() != 3) {
6221 std::cout << " [FAIL] : in pd_test_dataframe_properties() : columns size != 3" << std::endl;
6222 throw std::runtime_error("pd_test_dataframe_properties failed: columns size != 3");
6223 }
ndim (pd_test_1_all.cpp:6195)
6185 pandas::DataFrame df(data);
6186
6187 // Test shape
6188 auto shape = df.shape();
6189 if (shape.size() != 2 || shape[0] != 4 || shape[1] != 3) {
6190 std::cout << " [FAIL] : in pd_test_dataframe_properties() : shape mismatch" << std::endl;
6191 throw std::runtime_error("pd_test_dataframe_properties failed: shape mismatch");
6192 }
6193
6194 // Test ndim
6195 if (df.ndim() != 2) {
6196 std::cout << " [FAIL] : in pd_test_dataframe_properties() : ndim != 2" << std::endl;
6197 throw std::runtime_error("pd_test_dataframe_properties failed: ndim != 2");
6198 }
6199
6200 // Test empty
6201 if (df.empty()) {
6202 std::cout << " [FAIL] : in pd_test_dataframe_properties() : should not be empty" << std::endl;
6203 throw std::runtime_error("pd_test_dataframe_properties failed: should not be empty");
6204 }
ordered (pd_test_1_all.cpp:359)
349 void pd_test_categorical_array_constructors() {
350 std::cout << "========= CategoricalArray: constructors ======================= ";
351
352 // Default constructor
353 pandas::CategoricalArray arr1;
354 if (arr1.size() != 0) {
355 std::cout << " [FAIL] : in pd_test_categorical_array_constructors() : default constructor size != 0" << std::endl;
356 throw std::runtime_error("pd_test_categorical_array_constructors failed: default constructor size != 0");
357 }
358 if (arr1.ordered()) {
359 std::cout << " [FAIL] : in pd_test_categorical_array_constructors() : default should be unordered" << std::endl;
360 throw std::runtime_error("pd_test_categorical_array_constructors failed: default should be unordered");
361 }
362
363 // Constructor from values (infer categories)
364 std::vector<std::optional<std::string>> values = {
365 std::optional<std::string>("a"),
366 std::optional<std::string>("b"),
367 std::optional<std::string>("a"),
368 std::optional<std::string>("c")
ravel (pd_test_3_all.cpp:2147)
2137 throw std::runtime_error("memory_usage shallow too small");
2138 }
2139 if (deep < shallow) {
2140 throw std::runtime_error("memory_usage deep should be >= shallow");
2141 }
2142
2143 std::cout << " -> tests passed" << std::endl;
2144}
2145
2146void pd_test_3_all_categorical_ravel_view() {
2147 std::cout << "========= CategoricalArray.ravel()/view() =============";
2148
2149 std::vector<std::optional<std::string>> values = {"a", "b", "c"};
2150 pandas::CategoricalArray arr(values);
2151
2152 auto raveled = arr.ravel();
2153 if (raveled.size() != 3 || !raveled.equals(arr)) {
2154 throw std::runtime_error("ravel failed");
2155 }
2156
2157 auto viewed = arr.view();
remove_categories (pd_test_1_all.cpp:591)
581 }
582
583 void pd_test_categorical_array_remove_categories() {
584 std::cout << "========= CategoricalArray: remove_categories ======================= ";
585
586 std::vector<std::string> cats = {"a", "b", "c"};
587 std::vector<numpy::int32> codes = {0, 1, 2, 1}; // a, b, c, b
588 pandas::CategoricalArray arr = pandas::CategoricalArray::from_codes(codes, cats);
589
590 // Remove 'c' - values with 'c' become NA
591 pandas::CategoricalArray result = arr.remove_categories({"c"});
592
593 if (result.categories().size() != 2) {
594 std::cout << " [FAIL] : in pd_test_categorical_array_remove_categories() : categories size != 2" << std::endl;
595 throw std::runtime_error("pd_test_categorical_array_remove_categories failed: categories size != 2");
596 }
597
598 // Element at index 2 should now be NA (was 'c')
599 if (!result.is_na(2)) {
600 std::cout << " [FAIL] : in pd_test_categorical_array_remove_categories() : removed category should be NA" << std::endl;
601 throw std::runtime_error("pd_test_categorical_array_remove_categories failed: removed category should be NA");
remove_unused_categories (pd_test_1_all.cpp:737)
727 std::cout << " -> tests passed" << std::endl;
728 }
729
730 void pd_test_categorical_array_remove_unused_categories() {
731 std::cout << "========= CategoricalArray: remove_unused_categories ======================= ";
732
733 std::vector<std::string> cats = {"a", "b", "c", "d"};
734 std::vector<numpy::int32> codes = {0, 0, 2}; // a, a, c (b and d unused)
735 pandas::CategoricalArray arr = pandas::CategoricalArray::from_codes(codes, cats);
736
737 pandas::CategoricalArray result = arr.remove_unused_categories();
738
739 // Only 'a' and 'c' should remain
740 if (result.categories().size() != 2) {
741 std::cout << " [FAIL] : in pd_test_categorical_array_remove_unused_categories() : categories size != 2" << std::endl;
742 throw std::runtime_error("pd_test_categorical_array_remove_unused_categories failed: categories size != 2");
743 }
744
745 // Values should be preserved
746 std::optional<std::string> val0 = result[0];
747 std::optional<std::string> val2 = result[2];
reorder_categories (pd_test_1_all.cpp:695)
685 void pd_test_categorical_array_reorder_categories() {
686 std::cout << "========= CategoricalArray: reorder_categories ======================= ";
687
688 std::vector<std::string> cats = {"a", "b", "c"};
689 std::vector<numpy::int32> codes = {0, 1, 2}; // a, b, c
690 pandas::CategoricalArray arr = pandas::CategoricalArray::from_codes(codes, cats);
691
692 // Reorder categories
693 std::vector<std::string> new_order = {"c", "b", "a"};
694 pandas::CategoricalArray result = arr.reorder_categories(new_order);
695
696 // Check categories are reordered
697 const std::vector<std::string>& result_cats = result.categories();
698 if (result_cats[0] != "c" || result_cats[1] != "b" || result_cats[2] != "a") {
699 std::cout << " [FAIL] : in pd_test_categorical_array_reorder_categories() : categories not reordered" << std::endl;
700 throw std::runtime_error("pd_test_categorical_array_reorder_categories failed: categories not reordered");
701 }
702
703 // Values should be preserved
704 std::optional<std::string> val0 = result[0];
repeat (pd_test_3_all.cpp:2166)
2156 auto viewed = arr.view();
2157 if (viewed.size() != 3 || !viewed.equals(arr)) {
2158 throw std::runtime_error("view failed");
2159 }
2160
2161 std::cout << " -> tests passed" << std::endl;
2162}
2163
2164void pd_test_3_all_categorical_repeat() {
2165 std::cout << "========= CategoricalArray.repeat() ===================";
2166
2167 std::vector<std::optional<std::string>> values = {"a", "b"};
2168 pandas::CategoricalArray arr(values);
2169
2170 auto result = arr.repeat(3);
2171 if (result.size() != 6 || *result[0] != "a" || *result[2] != "a" ||
2172 *result[3] != "b" || *result[5] != "b") {
2173 throw std::runtime_error("repeat scalar failed");
2174 }
repeat (pd_test_3_all.cpp:2166)
2156 auto viewed = arr.view();
2157 if (viewed.size() != 3 || !viewed.equals(arr)) {
2158 throw std::runtime_error("view failed");
2159 }
2160
2161 std::cout << " -> tests passed" << std::endl;
2162}
2163
2164void pd_test_3_all_categorical_repeat() {
2165 std::cout << "========= CategoricalArray.repeat() ===================";
2166
2167 std::vector<std::optional<std::string>> values = {"a", "b"};
2168 pandas::CategoricalArray arr(values);
2169
2170 auto result = arr.repeat(3);
2171 if (result.size() != 6 || *result[0] != "a" || *result[2] != "a" ||
2172 *result[3] != "b" || *result[5] != "b") {
2173 throw std::runtime_error("repeat scalar failed");
2174 }
repr (pd_test_1_all.cpp:10906)
10896 std::cout << " -> tests passed" << std::endl;
10897}
10898
10899void pd_test_extension_index_repr() {
10900 std::cout << "========= repr =========================";
10901
10902 pandas::CategoricalArray arr({"a", "b", "c"});
10903 // Use ExtensionIndex<CategoricalArray> directly to test base class repr
10904 pandas::ExtensionIndex<pandas::CategoricalArray> idx(arr, "test");
10905
10906 std::string repr_str = idx.repr();
10907
10908 bool passed = (!repr_str.empty() && repr_str.find("ExtensionIndex") != std::string::npos);
10909 if (!passed) {
10910 std::cout << " [FAIL] : in pd_test_extension_index_repr() : repr check failed" << std::endl;
10911 throw std::runtime_error("pd_test_extension_index_repr failed");
10912 }
10913
10914 std::cout << " -> tests passed" << std::endl;
10915}
reshape (pd_test_3_all.cpp:2186)
2176 auto result2 = arr.repeat({1, 2});
2177 if (result2.size() != 3 || *result2[0] != "a" || *result2[1] != "b" || *result2[2] != "b") {
2178 throw std::runtime_error("repeat array failed");
2179 }
2180
2181 std::cout << " -> tests passed" << std::endl;
2182}
2183
2184void pd_test_3_all_categorical_reshape() {
2185 std::cout << "========= CategoricalArray.reshape() ==================";
2186
2187 std::vector<std::optional<std::string>> values = {"a", "b", "c", "d"};
2188 pandas::CategoricalArray arr(values);
2189
2190 auto result = arr.reshape({4});
2191 if (result.size() != 4) {
2192 throw std::runtime_error("reshape failed");
2193 }
2194
2195 bool threw = false;
set_categories (pd_test_1_all.cpp:623)
613 void pd_test_categorical_array_set_categories() {
614 std::cout << "========= CategoricalArray: set_categories ======================= ";
615
616 std::vector<std::string> cats = {"a", "b"};
617 std::vector<numpy::int32> codes = {0, 1, 0}; // a, b, a
618 pandas::CategoricalArray arr = pandas::CategoricalArray::from_codes(codes, cats);
619
620 // Set new categories (values not in new categories become NA)
621 std::vector<std::string> new_cats = {"a", "c"}; // 'b' removed, 'c' added
622 pandas::CategoricalArray result = arr.set_categories(new_cats);
623
624 if (result.categories().size() != 2) {
625 std::cout << " [FAIL] : in pd_test_categorical_array_set_categories() : categories size != 2" << std::endl;
626 throw std::runtime_error("pd_test_categorical_array_set_categories failed: categories size != 2");
627 }
628
629 // Element at index 1 should be NA (was 'b', now not in categories)
630 if (!result.is_na(1)) {
631 std::cout << " [FAIL] : in pd_test_categorical_array_set_categories() : 'b' value should be NA" << std::endl;
632 throw std::runtime_error("pd_test_categorical_array_set_categories failed: 'b' value should be NA");
set_name (pd_test_1_all.cpp:11798)
11788 throw std::runtime_error("pd_test_index_vector_constructor failed");
11789 }
11790
11791 std::cout << " -> tests passed" << std::endl;
11792 }
11793
11794 void pd_test_index_copy_constructor() {
11795 std::cout << "========= copy constructor ============================";
11796
11797 pandas::Index<numpy::int64> idx1{1, 2, 3};
11798 idx1.set_name("original");
11799
11800 pandas::Index<numpy::int64> idx2(idx1);
11801
11802 bool passed = (idx2.size() == 3);
11803 passed = passed && (idx2.name().value() == "original");
11804 passed = passed && idx2.equals(idx1);
11805
11806 if (!passed) {
11807 std::cout << " [FAIL] : in pd_test_index_copy_constructor() : copy failed" << std::endl;
11808 throw std::runtime_error("pd_test_index_copy_constructor failed");
set_ordered (pd_test_3_all.cpp:2210)
2200 threw = true;
2201 }
2202 if (!threw) {
2203 throw std::runtime_error("reshape should throw for incompatible shape");
2204 }
2205
2206 std::cout << " -> tests passed" << std::endl;
2207}
2208
2209void pd_test_3_all_categorical_set_ordered() {
2210 std::cout << "========= CategoricalArray.set_ordered() ==============";
2211
2212 std::vector<std::optional<std::string>> values = {"a", "b", "c"};
2213 pandas::CategoricalArray arr(values, false);
2214
2215 if (arr.ordered()) {
2216 throw std::runtime_error("initial should be unordered");
2217 }
2218
2219 auto ordered = arr.set_ordered(true);
2220 if (!ordered.ordered()) {
shape (pd_test_1_all.cpp:6188)
6178 std::cout << "========= properties =======================";
6179
6180 std::map<std::string, std::vector<numpy::float64>> data;
6181 data["A"] = {1.0, 2.0, 3.0, 4.0};
6182 data["B"] = {5.0, 6.0, 7.0, 8.0};
6183 data["C"] = {9.0, 10.0, 11.0, 12.0};
6184
6185 pandas::DataFrame df(data);
6186
6187 // Test shape
6188 auto shape = df.shape();
6189 if (shape.size() != 2 || shape[0] != 4 || shape[1] != 3) {
6190 std::cout << " [FAIL] : in pd_test_dataframe_properties() : shape mismatch" << std::endl;
6191 throw std::runtime_error("pd_test_dataframe_properties failed: shape mismatch");
6192 }
6193
6194 // Test ndim
6195 if (df.ndim() != 2) {
6196 std::cout << " [FAIL] : in pd_test_dataframe_properties() : ndim != 2" << std::endl;
6197 throw std::runtime_error("pd_test_dataframe_properties failed: ndim != 2");
6198 }
size (pd_test_1_all.cpp:22)
12#include "../pandas/pd_boolean_array.h"
13
14namespace dataframe_tests {
15
16namespace dataframe_tests_boolean_array {
17 void pd_test_boolean_array_constructors() {
18 std::cout << "========= BooleanArray: constructors ======================= ";
19
20 // Default constructor
21 pandas::BooleanArray arr1;
22 if (arr1.size() != 0) {
23 std::cout << " [FAIL] : in pd_test_boolean_array_constructors() : default constructor size != 0" << std::endl;
24 throw std::runtime_error("pd_test_boolean_array_constructors failed: default constructor size != 0");
25 }
26
27 // Initializer list constructor
28 pandas::BooleanArray arr2({
29 std::optional<bool>(true),
30 std::optional<bool>(false),
31 std::nullopt,
32 std::optional<bool>(true)
slice (pd_test_1_all.cpp:17546)
17536// ============================================================================
17537// Slicing / Indexing Tests
17538// ============================================================================
17539
17540void pd_test_period_index_slice() {
17541 std::cout << "========= slice method ================================";
17542
17543 std::vector<int64_t> ordinals = {0, 1, 2, 3, 4};
17544 pandas::PeriodIndex idx(ordinals, "D");
17545
17546 pandas::PeriodIndex sliced = idx.slice(1, 4);
17547
17548 bool passed = (sliced.size() == 3 &&
17549 sliced[0].has_value() && *sliced[0] == 1);
17550 if (!passed) {
17551 std::cout << " [FAIL] : in pd_test_period_index_slice()" << std::endl;
17552 throw std::runtime_error("pd_test_period_index_slice failed");
17553 }
17554
17555 std::cout << " -> tests passed" << std::endl;
17556}