StringArray#
-
class pandas::StringArray#
Extension array type for specialized data storage.
Example#
#include <pandas/pandas.h>
using namespace pandas;
// Use StringArray
StringArray obj;
// ... operations ...
Constructors#
Signature |
Location |
Example |
|---|---|---|
|
pd_string_array.h:71 |
|
|
pd_string_array.h:84 |
|
|
pd_string_array.h:115 |
|
|
pd_string_array.h:133 |
Construction#
Signature |
Return Type |
Location |
Example |
|---|---|---|---|
|
static StringArray |
pd_string_array.h:330 |
Indexing / Selection#
Signature |
Return Type |
Location |
Example |
|---|---|---|---|
|
std::string |
pd_string_array.h:230 |
|
|
const numpy::NDArray<numpy::bool_>& |
pd_string_array.h:210 |
|
|
StringArray |
pd_string_array.h:277 |
Data Manipulation#
Signature |
Return Type |
Location |
Example |
|---|---|---|---|
|
StringArray |
pd_string_array.h:380 |
Missing Data#
Signature |
Return Type |
Location |
Example |
|---|---|---|---|
|
StringArray |
pd_string_array.h:366 |
|
|
numpy::NDArray<numpy::bool_> |
pd_string_array.h:252 |
|
|
numpy::NDArray<numpy::bool_> |
pd_string_array.h:259 |
Statistics#
Signature |
Return Type |
Location |
Example |
|---|---|---|---|
|
size_t |
pd_string_array.h:411 |
Comparison#
Signature |
Return Type |
Location |
Example |
|---|---|---|---|
|
size_t |
pd_string_array.h:192 |
Sorting#
Signature |
Return Type |
Location |
Example |
|---|---|---|---|
|
numpy::NDArray<size_t> |
pd_string_array.h:702 |
Combining#
Signature |
Return Type |
Location |
Example |
|---|---|---|---|
|
static StringArray |
pd_string_array.h:337 |
I/O#
Conversion#
Signature |
Return Type |
Location |
Example |
|---|---|---|---|
|
StringArray |
pd_string_array.h:270 |
Set Operations#
Signature |
Return Type |
Location |
Example |
|---|---|---|---|
|
StringArray |
pd_string_array.h:635 |
Type Checking#
Signature |
Return Type |
Location |
Example |
|---|---|---|---|
|
bool |
pd_string_array.h:241 |
Other Methods#
Signature |
Return Type |
Location |
Example |
|---|---|---|---|
|
std::optional<bool> |
pd_string_array.h:1087 |
|
|
std::optional<bool> |
pd_string_array.h:1060 |
|
|
std::optional<size_t> |
pd_string_array.h:755 |
|
|
std::optional<size_t> |
pd_string_array.h:734 |
|
|
const numpy::NDArray<numpy::vstring_>& |
pd_string_array.h:203 |
|
|
dtype_type |
pd_string_array.h:150 |
|
|
bool |
pd_string_array.h:185 |
|
|
std::pair<numpy::NDArray<numpy::int64>, StringArray> |
pd_string_array.h:662 |
|
|
bool |
pd_string_array.h:424 |
|
|
size_t |
pd_string_array.h:164 |
|
|
constexpr int |
pd_string_array.h:171 |
|
|
std::string |
pd_string_array.h:1155 |
|
|
std::vector<size_t> |
pd_string_array.h:178 |
|
|
size_t |
pd_string_array.h:157 |
|
|
StringArray |
pd_string_array.h:1010 |
|
|
StringArray |
pd_string_array.h:1036 |
|
|
BooleanArray |
pd_string_array.h:942 |
|
|
BooleanArray |
pd_string_array.h:986 |
|
|
IntegerArray<numpy::int64> |
pd_string_array.h:780 |
|
|
StringArray |
pd_string_array.h:798 |
|
|
StringArray |
pd_string_array.h:865 |
|
|
StringArray |
pd_string_array.h:910 |
|
|
StringArray |
pd_string_array.h:886 |
|
|
BooleanArray |
pd_string_array.h:963 |
|
|
StringArray |
pd_string_array.h:840 |
|
|
StringArray |
pd_string_array.h:819 |
|
|
void |
pd_string_array.h:1168 |
Internal Methods#
1 internal methods (prefixed with underscore)
Code Examples#
The following examples are extracted from the test suite.
at (pd_test_1_all.cpp:6581)
6571 // Test isna/notna with float data
6572 {
6573 std::map<std::string, std::vector<numpy::float64>> float_data;
6574 float_data["X"] = {1.0, std::nan(""), 3.0};
6575 float_data["Y"] = {4.0, 5.0, std::nan("")};
6576 pandas::DataFrame df_na(float_data);
6577
6578 auto na_mask = df_na.isna();
6579 // Row 1, col 0 (X) should be NA
6580 if (!na_mask.getElementAt({1, 0})) {
6581 std::cout << " [FAIL] : in pd_test_dataframe_manipulation() : isna at (1,0) should be true" << std::endl;
6582 throw std::runtime_error("pd_test_dataframe_manipulation failed: isna at (1,0)");
6583 }
6584 // Row 2, col 1 (Y) should be NA
6585 if (!na_mask.getElementAt({2, 1})) {
6586 std::cout << " [FAIL] : in pd_test_dataframe_manipulation() : isna at (2,1) should be true" << std::endl;
6587 throw std::runtime_error("pd_test_dataframe_manipulation failed: isna at (2,1)");
6588 }
6589 // Row 0, col 0 should NOT be NA
6590 if (na_mask.getElementAt({0, 0})) {
6591 std::cout << " [FAIL] : in pd_test_dataframe_manipulation() : isna at (0,0) should be false" << std::endl;
mask (pd_test_1_all.cpp:9119)
9109void pd_test_datetime_mixin_array_constructor() {
9110 std::cout << "========= DatetimeTDMixin array constructor =========================";
9111
9112 // Create DatetimeArray with some values
9113 numpy::NDArray<numpy::datetime64> data(std::vector<size_t>{3});
9114 data.setElementAt({0}, numpy::datetime64(1000000000000000000LL, numpy::DateTimeUnit::Nanosecond)); // ~2001
9115 data.setElementAt({1}, numpy::datetime64(1500000000000000000LL, numpy::DateTimeUnit::Nanosecond)); // ~2017
9116 data.setElementAt({2}, numpy::datetime64(1600000000000000000LL, numpy::DateTimeUnit::Nanosecond)); // ~2020
9117
9118 numpy::NDArray<numpy::bool_> mask(std::vector<size_t>{3});
9119 mask.setElementAt({0}, numpy::bool_(false));
9120 mask.setElementAt({1}, numpy::bool_(false));
9121 mask.setElementAt({2}, numpy::bool_(false));
9122
9123 pandas::DatetimeArray arr(data, mask);
9124 pandas::DatetimeTDMixin idx(arr, "timestamps");
9125
9126 bool passed = (idx.size() == 3 && !idx.empty() &&
9127 idx.name().has_value() && *idx.name() == "timestamps" &&
9128 idx.inferred_type() == "datetime");
take (pd_test_1_all.cpp:5903)
5893// Inherited Operations Tests
5894// ============================================================================
5895
5896void pd_test_categorical_index_take() {
5897 std::cout << "========= inherited take ==============================";
5898
5899 pandas::CategoricalArray arr({"a", "b", "c", "d"});
5900 pandas::CategoricalIndex idx(arr);
5901
5902 std::vector<size_t> indices = {0, 2, 3};
5903 pandas::ExtensionIndex<pandas::CategoricalArray> taken = idx.take(indices);
5904
5905 bool passed = (taken.size() == 3);
5906 if (!passed) {
5907 std::cout << " [FAIL] : in pd_test_categorical_index_take()" << std::endl;
5908 throw std::runtime_error("pd_test_categorical_index_take failed");
5909 }
5910
5911 std::cout << " -> tests passed" << std::endl;
5912}
dropna (pd_test_1_all.cpp:531)
521 }
522
523 // Test isna array
524 numpy::NDArray<numpy::bool_> na_mask = arr.isna();
525 if (na_mask.getSize() != 4) {
526 std::cout << " [FAIL] : in pd_test_categorical_array_na_handling() : isna size != 4" << std::endl;
527 throw std::runtime_error("pd_test_categorical_array_na_handling failed: isna size != 4");
528 }
529
530 // Test dropna
531 pandas::CategoricalArray dropped = arr.dropna();
532 if (dropped.size() != 2) {
533 std::cout << " [FAIL] : in pd_test_categorical_array_na_handling() : dropna size != 2" << std::endl;
534 throw std::runtime_error("pd_test_categorical_array_na_handling failed: dropna size != 2");
535 }
536
537 // Test fillna (fill with existing category)
538 pandas::CategoricalArray filled = arr.fillna("a"); // 'a' is in categories
539 if (filled.has_na()) {
540 std::cout << " [FAIL] : in pd_test_categorical_array_na_handling() : fillna should have no NA" << std::endl;
541 throw std::runtime_error("pd_test_categorical_array_na_handling failed: fillna should have no NA");
fillna (pd_test_1_all.cpp:537)
527 throw std::runtime_error("pd_test_categorical_array_na_handling failed: isna size != 4");
528 }
529
530 // Test dropna
531 pandas::CategoricalArray dropped = arr.dropna();
532 if (dropped.size() != 2) {
533 std::cout << " [FAIL] : in pd_test_categorical_array_na_handling() : dropna size != 2" << std::endl;
534 throw std::runtime_error("pd_test_categorical_array_na_handling failed: dropna size != 2");
535 }
536
537 // Test fillna (fill with existing category)
538 pandas::CategoricalArray filled = arr.fillna("a"); // 'a' is in categories
539 if (filled.has_na()) {
540 std::cout << " [FAIL] : in pd_test_categorical_array_na_handling() : fillna should have no NA" << std::endl;
541 throw std::runtime_error("pd_test_categorical_array_na_handling failed: fillna should have no NA");
542 }
543
544 std::cout << " -> tests passed" << std::endl;
545 }
546
547 void pd_test_categorical_array_add_categories() {
isna (pd_test_1_all.cpp:524)
514 throw std::runtime_error("pd_test_categorical_array_na_handling failed: has_na() should be true");
515 }
516
517 // Test count (non-NA)
518 if (arr.count() != 2) {
519 std::cout << " [FAIL] : in pd_test_categorical_array_na_handling() : count() != 2" << std::endl;
520 throw std::runtime_error("pd_test_categorical_array_na_handling failed: count() != 2");
521 }
522
523 // Test isna array
524 numpy::NDArray<numpy::bool_> na_mask = arr.isna();
525 if (na_mask.getSize() != 4) {
526 std::cout << " [FAIL] : in pd_test_categorical_array_na_handling() : isna size != 4" << std::endl;
527 throw std::runtime_error("pd_test_categorical_array_na_handling failed: isna size != 4");
528 }
529
530 // Test dropna
531 pandas::CategoricalArray dropped = arr.dropna();
532 if (dropped.size() != 2) {
533 std::cout << " [FAIL] : in pd_test_categorical_array_na_handling() : dropna size != 2" << std::endl;
534 throw std::runtime_error("pd_test_categorical_array_na_handling failed: dropna size != 2");
notna (pd_test_1_all.cpp:6595)
6585 if (!na_mask.getElementAt({2, 1})) {
6586 std::cout << " [FAIL] : in pd_test_dataframe_manipulation() : isna at (2,1) should be true" << std::endl;
6587 throw std::runtime_error("pd_test_dataframe_manipulation failed: isna at (2,1)");
6588 }
6589 // Row 0, col 0 should NOT be NA
6590 if (na_mask.getElementAt({0, 0})) {
6591 std::cout << " [FAIL] : in pd_test_dataframe_manipulation() : isna at (0,0) should be false" << std::endl;
6592 throw std::runtime_error("pd_test_dataframe_manipulation failed: isna at (0,0)");
6593 }
6594
6595 auto notna_mask = df_na.notna();
6596 if (notna_mask.getElementAt({1, 0})) {
6597 std::cout << " [FAIL] : in pd_test_dataframe_manipulation() : notna at (1,0) should be false" << std::endl;
6598 throw std::runtime_error("pd_test_dataframe_manipulation failed: notna at (1,0)");
6599 }
6600 }
6601
6602 // Test fillna
6603 {
6604 std::map<std::string, std::vector<numpy::float64>> float_data;
6605 float_data["X"] = {1.0, std::nan(""), 3.0};
count (pd_test_1_all.cpp:66)
56 if (arr.is_na(0)) {
57 std::cout << " [FAIL] : in pd_test_boolean_array_na_handling() : is_na(0) should be false" << std::endl;
58 throw std::runtime_error("pd_test_boolean_array_na_handling failed: is_na(0) should be false");
59 }
60
61 if (!arr.has_na()) {
62 std::cout << " [FAIL] : in pd_test_boolean_array_na_handling() : has_na() should be true" << std::endl;
63 throw std::runtime_error("pd_test_boolean_array_na_handling failed: has_na() should be true");
64 }
65
66 if (arr.count() != 2) {
67 std::cout << " [FAIL] : in pd_test_boolean_array_na_handling() : count() should be 2" << std::endl;
68 throw std::runtime_error("pd_test_boolean_array_na_handling failed: count() should be 2");
69 }
70
71 std::cout << " -> tests passed" << std::endl;
72 }
73
74 void pd_test_boolean_array_kleene_and() {
75 std::cout << "========= BooleanArray: Kleene AND ======================= ";
len (pd_test_3_all.cpp:20867)
20857 auto title_result = s.str().title();
20858 if (title_result[0] != "Hello World" || title_result[1] != "Hello World" || title_result[2] != "Hello World") {
20859 std::cout << " [FAIL] : title() failed" << std::endl;
20860 throw std::runtime_error("pd_test_str_capitalize_title: title() failed");
20861 }
20862
20863 std::cout << " -> tests passed" << std::endl;
20864}
20865
20866// ============================================================================
20867// Test str().len()
20868// ============================================================================
20869
20870void pd_test_str_len() {
20871 std::cout << "========= Series.str().len() ============================";
20872
20873 pandas::Series<std::string> s({"a", "bb", "ccc", ""});
20874
20875 auto lens = s.str().len();
20876 if (lens[0] != 1 || lens[1] != 2 || lens[2] != 3 || lens[3] != 0) {
20877 std::cout << " [FAIL] : len() failed" << std::endl;
argsort (pd_test_1_all.cpp:1304)
1294 std::cout << "========= DatetimeArray: sorting ======================= ";
1295
1296 pandas::DatetimeArray arr(std::vector<std::string>{
1297 "2023-06-15",
1298 "NaT",
1299 "2023-01-01",
1300 "2023-12-31"
1301 });
1302
1303 // argsort ascending
1304 auto indices = arr.argsort(true, "last");
1305 // Expected order: 2023-01-01(2), 2023-06-15(0), 2023-12-31(3), NaT(1)
1306 if (indices.getElementAt({0}) != 2) {
1307 std::cout << " [FAIL] : argsort: first should be index 2 (2023-01-01)" << std::endl;
1308 throw std::runtime_error("pd_test_datetime_array_sorting failed: argsort first");
1309 }
1310 if (indices.getElementAt({3}) != 1) {
1311 std::cout << " [FAIL] : argsort: last should be index 1 (NaT)" << std::endl;
1312 throw std::runtime_error("pd_test_datetime_array_sorting failed: NaT position");
1313 }
concat (pd_test_1_all.cpp:17717)
17707}
17708
17709void pd_test_period_index_concat() {
17710 std::cout << "========= concat factory ==============================";
17711
17712 std::vector<int64_t> ordinals1 = {0, 1};
17713 std::vector<int64_t> ordinals2 = {2, 3};
17714 pandas::PeriodIndex idx1(ordinals1, "D");
17715 pandas::PeriodIndex idx2(ordinals2, "D");
17716
17717 pandas::PeriodIndex concatenated = pandas::PeriodIndex::concat({idx1, idx2});
17718
17719 bool passed = (concatenated.size() == 4);
17720 if (!passed) {
17721 std::cout << " [FAIL] : in pd_test_period_index_concat()" << std::endl;
17722 throw std::runtime_error("pd_test_period_index_concat failed");
17723 }
17724
17725 std::cout << " -> tests passed" << std::endl;
17726}
to_numpy (pd_test_1_all.cpp:16764)
16754 // =====================================================================
16755 // to_numpy Tests
16756 // =====================================================================
16757
16758 void pd_test_ndframe_to_numpy() {
16759 std::cout << "========= to_numpy =============================================" << std::endl;
16760
16761 pandas::Series<int> s({10, 20, 30});
16762
16763 auto arr = s.to_numpy();
16764
16765 bool passed = arr.getSize() == 3;
16766 if (!passed) {
16767 std::cout << " [FAIL] : in pd_test_ndframe_to_numpy() : size" << std::endl;
16768 throw std::runtime_error("pd_test_ndframe_to_numpy failed: size");
16769 }
16770
16771 passed = arr.getElementAt({0}) == 10 && arr.getElementAt({1}) == 20 && arr.getElementAt({2}) == 30;
16772 if (!passed) {
16773 std::cout << " [FAIL] : in pd_test_ndframe_to_numpy() : values" << std::endl;
to_string (pd_test_1_all.cpp:2693)
2683 pandas::PeriodArray arr_m(std::vector<std::string>{
2684 "2020-01",
2685 "NaT",
2686 "2025-06"
2687 }, "M");
2688
2689 // Year
2690 auto years = arr_m.year();
2691 auto y0 = years[0];
2692 if (!y0.has_value() || y0.value() != 2020) {
2693 std::cout << " [FAIL] : year[0] should be 2020, got " << (y0.has_value() ? std::to_string(y0.value()) : "NA") << std::endl;
2694 throw std::runtime_error("pd_test_period_array_year_month_quarter failed: year[0]");
2695 }
2696
2697 auto y1 = years[1];
2698 if (y1.has_value()) {
2699 std::cout << " [FAIL] : year[1] should be NA (NaT)" << std::endl;
2700 throw std::runtime_error("pd_test_period_array_year_month_quarter failed: year[1] should be NA");
2701 }
2702
2703 auto y2 = years[2];
copy (pd_test_1_all.cpp:5798)
5788// ============================================================================
5789// Copy/Rename Tests
5790// ============================================================================
5791
5792void pd_test_categorical_index_copy() {
5793 std::cout << "========= copy ========================================";
5794
5795 pandas::CategoricalArray arr({"a", "b", "c"});
5796 pandas::CategoricalIndex idx(arr, "original");
5797
5798 pandas::CategoricalIndex copied = idx.copy();
5799
5800 bool passed = (copied.size() == idx.size() && copied.name() == idx.name() &&
5801 copied.categories() == idx.categories() && copied.ordered() == idx.ordered());
5802 if (!passed) {
5803 std::cout << " [FAIL] : in pd_test_categorical_index_copy()" << std::endl;
5804 throw std::runtime_error("pd_test_categorical_index_copy failed");
5805 }
5806
5807 std::cout << " -> tests passed" << std::endl;
5808}
unique (pd_test_1_all.cpp:1345)
1335 pandas::DatetimeArray arr(std::vector<std::string>{
1336 "2023-01-01",
1337 "2023-06-15",
1338 "2023-01-01",
1339 "NaT",
1340 "2023-06-15",
1341 "NaT"
1342 });
1343
1344 // unique
1345 auto uniq = arr.unique();
1346 // Should have: NaT, 2023-01-01, 2023-06-15 (3 unique values)
1347 if (uniq.size() != 3) {
1348 std::cout << " [FAIL] : unique size should be 3, got " << uniq.size() << std::endl;
1349 throw std::runtime_error("pd_test_datetime_array_unique failed: size");
1350 }
1351
1352 // factorize
1353 auto [codes, uniques] = arr.factorize();
1354 // Codes for NaT should be -1
1355 if (codes.getElementAt({3}) != -1) {
is_na (pd_test_1_all.cpp:51)
41 void pd_test_boolean_array_na_handling() {
42 std::cout << "========= BooleanArray: NA handling ======================= ";
43
44 pandas::BooleanArray arr({
45 std::optional<bool>(true),
46 std::nullopt, // NA at index 1
47 std::optional<bool>(false)
48 });
49
50 if (!arr.is_na(1)) {
51 std::cout << " [FAIL] : in pd_test_boolean_array_na_handling() : is_na(1) should be true" << std::endl;
52 throw std::runtime_error("pd_test_boolean_array_na_handling failed: is_na(1) should be true");
53 }
54
55 if (arr.is_na(0)) {
56 std::cout << " [FAIL] : in pd_test_boolean_array_na_handling() : is_na(0) should be false" << std::endl;
57 throw std::runtime_error("pd_test_boolean_array_na_handling failed: is_na(0) should be false");
58 }
59
60 if (!arr.has_na()) {
all (pd_test_1_all.cpp:247)
237 pandas::BooleanArray has_true({
238 std::optional<bool>(false),
239 std::optional<bool>(true)
240 });
241 any_result = has_true.any();
242 if (!any_result.has_value() || !any_result.value()) {
243 std::cout << " [FAIL] : in pd_test_boolean_array_reductions() : any() with True" << std::endl;
244 throw std::runtime_error("pd_test_boolean_array_reductions failed: any() with True");
245 }
246
247 // Test all()
248 pandas::BooleanArray all_true({
249 std::optional<bool>(true),
250 std::optional<bool>(true)
251 });
252 auto all_result = all_true.all();
253 if (!all_result.has_value() || !all_result.value()) {
254 std::cout << " [FAIL] : in pd_test_boolean_array_reductions() : all() of all True" << std::endl;
255 throw std::runtime_error("pd_test_boolean_array_reductions failed: all() all True");
256 }
any (pd_test_1_all.cpp:226)
216 std::cout << " [FAIL] : in pd_test_boolean_array_kleene_not() : ~NA should be NA" << std::endl;
217 throw std::runtime_error("pd_test_boolean_array_kleene_not failed: ~NA");
218 }
219
220 std::cout << " -> tests passed" << std::endl;
221 }
222
223 void pd_test_boolean_array_reductions() {
224 std::cout << "========= BooleanArray: reductions ======================= ";
225
226 // Test any()
227 pandas::BooleanArray all_false({
228 std::optional<bool>(false),
229 std::optional<bool>(false)
230 });
231 auto any_result = all_false.any();
232 if (!any_result.has_value() || any_result.value()) {
233 std::cout << " [FAIL] : in pd_test_boolean_array_reductions() : any() of all False" << std::endl;
234 throw std::runtime_error("pd_test_boolean_array_reductions failed: any() all False");
235 }
argmax (pd_test_1_all.cpp:1323)
1313 }
1314
1315 // argmin
1316 auto min_idx = arr.argmin();
1317 if (!min_idx.has_value() || min_idx.value() != 2) {
1318 std::cout << " [FAIL] : argmin should be 2 (2023-01-01)" << std::endl;
1319 throw std::runtime_error("pd_test_datetime_array_sorting failed: argmin");
1320 }
1321
1322 // argmax
1323 auto max_idx = arr.argmax();
1324 if (!max_idx.has_value() || max_idx.value() != 3) {
1325 std::cout << " [FAIL] : argmax should be 3 (2023-12-31)" << std::endl;
1326 throw std::runtime_error("pd_test_datetime_array_sorting failed: argmax");
1327 }
1328
1329 std::cout << " -> tests passed" << std::endl;
1330 }
1331
1332 void pd_test_datetime_array_unique() {
1333 std::cout << "========= DatetimeArray: unique/factorize ======================= ";
argmin (pd_test_1_all.cpp:1316)
1306 if (indices.getElementAt({0}) != 2) {
1307 std::cout << " [FAIL] : argsort: first should be index 2 (2023-01-01)" << std::endl;
1308 throw std::runtime_error("pd_test_datetime_array_sorting failed: argsort first");
1309 }
1310 if (indices.getElementAt({3}) != 1) {
1311 std::cout << " [FAIL] : argsort: last should be index 1 (NaT)" << std::endl;
1312 throw std::runtime_error("pd_test_datetime_array_sorting failed: NaT position");
1313 }
1314
1315 // argmin
1316 auto min_idx = arr.argmin();
1317 if (!min_idx.has_value() || min_idx.value() != 2) {
1318 std::cout << " [FAIL] : argmin should be 2 (2023-01-01)" << std::endl;
1319 throw std::runtime_error("pd_test_datetime_array_sorting failed: argmin");
1320 }
1321
1322 // argmax
1323 auto max_idx = arr.argmax();
1324 if (!max_idx.has_value() || max_idx.value() != 3) {
1325 std::cout << " [FAIL] : argmax should be 3 (2023-12-31)" << std::endl;
1326 throw std::runtime_error("pd_test_datetime_array_sorting failed: argmax");
data (pd_test_1_all.cpp:9114)
9104 throw std::runtime_error("pd_test_datetime_mixin_default_constructor failed");
9105 }
9106
9107 std::cout << " -> tests passed" << std::endl;
9108}
9109
9110void pd_test_datetime_mixin_array_constructor() {
9111 std::cout << "========= DatetimeTDMixin array constructor =========================";
9112
9113 // Create DatetimeArray with some values
9114 numpy::NDArray<numpy::datetime64> data(std::vector<size_t>{3});
9115 data.setElementAt({0}, numpy::datetime64(1000000000000000000LL, numpy::DateTimeUnit::Nanosecond)); // ~2001
9116 data.setElementAt({1}, numpy::datetime64(1500000000000000000LL, numpy::DateTimeUnit::Nanosecond)); // ~2017
9117 data.setElementAt({2}, numpy::datetime64(1600000000000000000LL, numpy::DateTimeUnit::Nanosecond)); // ~2020
9118
9119 numpy::NDArray<numpy::bool_> mask(std::vector<size_t>{3});
9120 mask.setElementAt({0}, numpy::bool_(false));
9121 mask.setElementAt({1}, numpy::bool_(false));
9122 mask.setElementAt({2}, numpy::bool_(false));
9123
9124 pandas::DatetimeArray arr(data, mask);
dtype (pd_test_1_all.cpp:295)
285 throw std::runtime_error("pd_test_boolean_array_reductions failed: mean");
286 }
287
288 std::cout << " -> tests passed" << std::endl;
289 }
290
291 void pd_test_boolean_array_dtype() {
292 std::cout << "========= BooleanArray: dtype ======================= ";
293
294 pandas::BooleanArray arr;
295 if (arr.dtype().name() != "boolean") {
296 std::cout << " [FAIL] : in pd_test_boolean_array_dtype() : dtype name should be 'boolean'" << std::endl;
297 throw std::runtime_error("pd_test_boolean_array_dtype failed: dtype name");
298 }
299
300 if (arr.dtype().kind() != "b") {
301 std::cout << " [FAIL] : in pd_test_boolean_array_dtype() : dtype kind should be 'b'" << std::endl;
302 throw std::runtime_error("pd_test_boolean_array_dtype failed: dtype kind");
303 }
304
305 std::cout << " -> tests passed" << std::endl;
empty (pd_test_1_all.cpp:941)
931#include "../pandas/pd_config.h"
932
933namespace dataframe_tests {
934
935namespace dataframe_tests_config {
936
937 void pd_test_config_version() {
938 std::cout << "========= df_config: version info ======================= ";
939 const char* version = pandas::DataFrameInfo::version();
940 if (version == nullptr || std::string(version).empty()) {
941 std::cout << "[FAIL] : in pd_test_config_version() : version is null or empty" << std::endl;
942 throw std::runtime_error("pd_test_config_version failed: version is null or empty");
943 }
944 std::cout << "-> tests passed" << std::endl;
945 }
946
947 void pd_test_config_na_repr() {
948 std::cout << "========= df_config: NA representation ======================= ";
949 const char* na_repr = pandas::DataFrameConfig::get_na_repr();
950 if (na_repr == nullptr) {
factorize (pd_test_1_all.cpp:1353)
1343 // unique
1344 auto uniq = arr.unique();
1345 // Should have: NaT, 2023-01-01, 2023-06-15 (3 unique values)
1346 if (uniq.size() != 3) {
1347 std::cout << " [FAIL] : unique size should be 3, got " << uniq.size() << std::endl;
1348 throw std::runtime_error("pd_test_datetime_array_unique failed: size");
1349 }
1350
1351 // factorize
1352 auto [codes, uniques] = arr.factorize();
1353 // Codes for NaT should be -1
1354 if (codes.getElementAt({3}) != -1) {
1355 std::cout << " [FAIL] : factorize: NaT code should be -1" << std::endl;
1356 throw std::runtime_error("pd_test_datetime_array_unique failed: NaT code");
1357 }
1358 // Same values should have same codes
1359 if (codes.getElementAt({0}) != codes.getElementAt({2})) {
1360 std::cout << " [FAIL] : factorize: 2023-01-01 values should have same code" << std::endl;
1361 throw std::runtime_error("pd_test_datetime_array_unique failed: same code");
1362 }
has_na (pd_test_1_all.cpp:61)
51 if (!arr.is_na(1)) {
52 std::cout << " [FAIL] : in pd_test_boolean_array_na_handling() : is_na(1) should be true" << std::endl;
53 throw std::runtime_error("pd_test_boolean_array_na_handling failed: is_na(1) should be true");
54 }
55
56 if (arr.is_na(0)) {
57 std::cout << " [FAIL] : in pd_test_boolean_array_na_handling() : is_na(0) should be false" << std::endl;
58 throw std::runtime_error("pd_test_boolean_array_na_handling failed: is_na(0) should be false");
59 }
60
61 if (!arr.has_na()) {
62 std::cout << " [FAIL] : in pd_test_boolean_array_na_handling() : has_na() should be true" << std::endl;
63 throw std::runtime_error("pd_test_boolean_array_na_handling failed: has_na() should be true");
64 }
65
66 if (arr.count() != 2) {
67 std::cout << " [FAIL] : in pd_test_boolean_array_na_handling() : count() should be 2" << std::endl;
68 throw std::runtime_error("pd_test_boolean_array_na_handling failed: count() should be 2");
69 }
70
71 std::cout << " -> tests passed" << std::endl;
nbytes (pd_test_1_all.cpp:6214)
6204 }
6205
6206 // Test empty DataFrame
6207 pandas::DataFrame empty_df;
6208 if (!empty_df.empty()) {
6209 std::cout << " [FAIL] : in pd_test_dataframe_properties() : should be empty" << std::endl;
6210 throw std::runtime_error("pd_test_dataframe_properties failed: should be empty");
6211 }
6212
6213 // Test nbytes > 0 for non-empty
6214 if (df.nbytes() == 0) {
6215 std::cout << " [FAIL] : in pd_test_dataframe_properties() : nbytes should be > 0" << std::endl;
6216 throw std::runtime_error("pd_test_dataframe_properties failed: nbytes should be > 0");
6217 }
6218
6219 // Test columns index
6220 if (df.columns().size() != 3) {
6221 std::cout << " [FAIL] : in pd_test_dataframe_properties() : columns size != 3" << std::endl;
6222 throw std::runtime_error("pd_test_dataframe_properties failed: columns size != 3");
6223 }
ndim (pd_test_1_all.cpp:6195)
6185 pandas::DataFrame df(data);
6186
6187 // Test shape
6188 auto shape = df.shape();
6189 if (shape.size() != 2 || shape[0] != 4 || shape[1] != 3) {
6190 std::cout << " [FAIL] : in pd_test_dataframe_properties() : shape mismatch" << std::endl;
6191 throw std::runtime_error("pd_test_dataframe_properties failed: shape mismatch");
6192 }
6193
6194 // Test ndim
6195 if (df.ndim() != 2) {
6196 std::cout << " [FAIL] : in pd_test_dataframe_properties() : ndim != 2" << std::endl;
6197 throw std::runtime_error("pd_test_dataframe_properties failed: ndim != 2");
6198 }
6199
6200 // Test empty
6201 if (df.empty()) {
6202 std::cout << " [FAIL] : in pd_test_dataframe_properties() : should not be empty" << std::endl;
6203 throw std::runtime_error("pd_test_dataframe_properties failed: should not be empty");
6204 }
repr (pd_test_1_all.cpp:10906)
10896 std::cout << " -> tests passed" << std::endl;
10897}
10898
10899void pd_test_extension_index_repr() {
10900 std::cout << "========= repr =========================";
10901
10902 pandas::CategoricalArray arr({"a", "b", "c"});
10903 // Use ExtensionIndex<CategoricalArray> directly to test base class repr
10904 pandas::ExtensionIndex<pandas::CategoricalArray> idx(arr, "test");
10905
10906 std::string repr_str = idx.repr();
10907
10908 bool passed = (!repr_str.empty() && repr_str.find("ExtensionIndex") != std::string::npos);
10909 if (!passed) {
10910 std::cout << " [FAIL] : in pd_test_extension_index_repr() : repr check failed" << std::endl;
10911 throw std::runtime_error("pd_test_extension_index_repr failed");
10912 }
10913
10914 std::cout << " -> tests passed" << std::endl;
10915}
shape (pd_test_1_all.cpp:6188)
6178 std::cout << "========= properties =======================";
6179
6180 std::map<std::string, std::vector<numpy::float64>> data;
6181 data["A"] = {1.0, 2.0, 3.0, 4.0};
6182 data["B"] = {5.0, 6.0, 7.0, 8.0};
6183 data["C"] = {9.0, 10.0, 11.0, 12.0};
6184
6185 pandas::DataFrame df(data);
6186
6187 // Test shape
6188 auto shape = df.shape();
6189 if (shape.size() != 2 || shape[0] != 4 || shape[1] != 3) {
6190 std::cout << " [FAIL] : in pd_test_dataframe_properties() : shape mismatch" << std::endl;
6191 throw std::runtime_error("pd_test_dataframe_properties failed: shape mismatch");
6192 }
6193
6194 // Test ndim
6195 if (df.ndim() != 2) {
6196 std::cout << " [FAIL] : in pd_test_dataframe_properties() : ndim != 2" << std::endl;
6197 throw std::runtime_error("pd_test_dataframe_properties failed: ndim != 2");
6198 }
size (pd_test_1_all.cpp:22)
12#include "../pandas/pd_boolean_array.h"
13
14namespace dataframe_tests {
15
16namespace dataframe_tests_boolean_array {
17 void pd_test_boolean_array_constructors() {
18 std::cout << "========= BooleanArray: constructors ======================= ";
19
20 // Default constructor
21 pandas::BooleanArray arr1;
22 if (arr1.size() != 0) {
23 std::cout << " [FAIL] : in pd_test_boolean_array_constructors() : default constructor size != 0" << std::endl;
24 throw std::runtime_error("pd_test_boolean_array_constructors failed: default constructor size != 0");
25 }
26
27 // Initializer list constructor
28 pandas::BooleanArray arr2({
29 std::optional<bool>(true),
30 std::optional<bool>(false),
31 std::nullopt,
32 std::optional<bool>(true)
str_cat (pd_test_1_all.cpp:3822)
3812 std::nullopt,
3813 std::optional<std::string>("foo")
3814 });
3815 pandas::StringArray arr2({
3816 std::optional<std::string>("world"),
3817 std::optional<std::string>("test"),
3818 std::nullopt
3819 });
3820
3821 // Array concatenation
3822 auto result = arr1.str_cat(arr2, " ");
3823 if (result[0].value() != "hello world") {
3824 std::cout << " [FAIL] : str_cat: expected 'hello world'" << std::endl;
3825 throw std::runtime_error("pd_test_string_array_str_cat failed: array cat");
3826 }
3827 if (result[1].has_value()) {
3828 std::cout << " [FAIL] : str_cat: NA + value should be NA" << std::endl;
3829 throw std::runtime_error("pd_test_string_array_str_cat failed: NA + value");
3830 }
3831 if (result[2].has_value()) {
3832 std::cout << " [FAIL] : str_cat: value + NA should be NA" << std::endl;
str_cat (pd_test_1_all.cpp:3822)
3812 std::nullopt,
3813 std::optional<std::string>("foo")
3814 });
3815 pandas::StringArray arr2({
3816 std::optional<std::string>("world"),
3817 std::optional<std::string>("test"),
3818 std::nullopt
3819 });
3820
3821 // Array concatenation
3822 auto result = arr1.str_cat(arr2, " ");
3823 if (result[0].value() != "hello world") {
3824 std::cout << " [FAIL] : str_cat: expected 'hello world'" << std::endl;
3825 throw std::runtime_error("pd_test_string_array_str_cat failed: array cat");
3826 }
3827 if (result[1].has_value()) {
3828 std::cout << " [FAIL] : str_cat: NA + value should be NA" << std::endl;
3829 throw std::runtime_error("pd_test_string_array_str_cat failed: NA + value");
3830 }
3831 if (result[2].has_value()) {
3832 std::cout << " [FAIL] : str_cat: value + NA should be NA" << std::endl;
str_contains (pd_test_1_all.cpp:3768)
3758 std::cout << "========= StringArray: string predicates ======================= ";
3759
3760 pandas::StringArray arr({
3761 std::optional<std::string>("hello world"),
3762 std::nullopt,
3763 std::optional<std::string>("python"),
3764 std::optional<std::string>("hello")
3765 });
3766
3767 // str_contains
3768 auto contains = arr.str_contains("llo");
3769 if (!contains[0].has_value() || !contains[0].value()) {
3770 std::cout << " [FAIL] : str_contains: 'hello world' contains 'llo'" << std::endl;
3771 throw std::runtime_error("pd_test_string_array_str_predicates failed: contains");
3772 }
3773 if (contains[1].has_value()) {
3774 std::cout << " [FAIL] : str_contains: NA should propagate" << std::endl;
3775 throw std::runtime_error("pd_test_string_array_str_predicates failed: contains NA");
3776 }
3777 if (!contains[2].has_value() || contains[2].value()) {
3778 std::cout << " [FAIL] : str_contains: 'python' does not contain 'llo'" << std::endl;
str_endswith (pd_test_1_all.cpp:3794)
3784 if (!starts[0].has_value() || !starts[0].value()) {
3785 std::cout << " [FAIL] : str_startswith: 'hello world' starts with 'hello'" << std::endl;
3786 throw std::runtime_error("pd_test_string_array_str_predicates failed: startswith");
3787 }
3788 if (!starts[2].has_value() || starts[2].value()) {
3789 std::cout << " [FAIL] : str_startswith: 'python' does not start with 'hello'" << std::endl;
3790 throw std::runtime_error("pd_test_string_array_str_predicates failed: startswith python");
3791 }
3792
3793 // str_endswith
3794 auto ends = arr.str_endswith("ld");
3795 if (!ends[0].has_value() || !ends[0].value()) {
3796 std::cout << " [FAIL] : str_endswith: 'hello world' ends with 'ld'" << std::endl;
3797 throw std::runtime_error("pd_test_string_array_str_predicates failed: endswith");
3798 }
3799 if (!ends[2].has_value() || ends[2].value()) {
3800 std::cout << " [FAIL] : str_endswith: 'python' does not end with 'ld'" << std::endl;
3801 throw std::runtime_error("pd_test_string_array_str_predicates failed: endswith python");
3802 }
3803
3804 std::cout << " -> tests passed" << std::endl;
str_len (pd_test_1_all.cpp:3728)
3718 void pd_test_string_array_str_len() {
3719 std::cout << "========= StringArray: str_len ======================= ";
3720
3721 pandas::StringArray arr({
3722 std::optional<std::string>("abc"),
3723 std::nullopt,
3724 std::optional<std::string>("hello"),
3725 std::optional<std::string>("")
3726 });
3727
3728 auto lengths = arr.str_len();
3729
3730 auto len0 = lengths[0];
3731 if (!len0.has_value() || len0.value() != 3) {
3732 std::cout << " [FAIL] : str_len[0] should be 3" << std::endl;
3733 throw std::runtime_error("pd_test_string_array_str_len failed: len[0]");
3734 }
3735
3736 auto len1 = lengths[1];
3737 if (len1.has_value()) {
3738 std::cout << " [FAIL] : str_len[1] should be NA" << std::endl;
str_lower (pd_test_1_all.cpp:3681)
3671 if (upper[0].value() != "HELLO WORLD") {
3672 std::cout << " [FAIL] : str_upper: expected 'HELLO WORLD'" << std::endl;
3673 throw std::runtime_error("pd_test_string_array_str_methods failed: str_upper");
3674 }
3675 if (upper[1].has_value()) {
3676 std::cout << " [FAIL] : str_upper: NA should propagate" << std::endl;
3677 throw std::runtime_error("pd_test_string_array_str_methods failed: str_upper NA");
3678 }
3679
3680 // str_lower
3681 auto lower = arr.str_lower();
3682 if (lower[0].value() != "hello world") {
3683 std::cout << " [FAIL] : str_lower: expected 'hello world'" << std::endl;
3684 throw std::runtime_error("pd_test_string_array_str_methods failed: str_lower");
3685 }
3686
3687 // str_strip
3688 auto stripped = arr.str_strip();
3689 if (stripped[2].value() != "spaced") {
3690 std::cout << " [FAIL] : str_strip: expected 'spaced'" << std::endl;
3691 throw std::runtime_error("pd_test_string_array_str_methods failed: str_strip");
str_lstrip (pd_test_1_all.cpp:3695)
3685 }
3686
3687 // str_strip
3688 auto stripped = arr.str_strip();
3689 if (stripped[2].value() != "spaced") {
3690 std::cout << " [FAIL] : str_strip: expected 'spaced'" << std::endl;
3691 throw std::runtime_error("pd_test_string_array_str_methods failed: str_strip");
3692 }
3693
3694 // str_lstrip
3695 auto lstripped = arr.str_lstrip();
3696 if (lstripped[2].value() != "spaced ") {
3697 std::cout << " [FAIL] : str_lstrip: expected 'spaced '" << std::endl;
3698 throw std::runtime_error("pd_test_string_array_str_methods failed: str_lstrip");
3699 }
3700
3701 // str_rstrip
3702 auto rstripped = arr.str_rstrip();
3703 if (rstripped[2].value() != " spaced") {
3704 std::cout << " [FAIL] : str_rstrip: expected ' spaced'" << std::endl;
3705 throw std::runtime_error("pd_test_string_array_str_methods failed: str_rstrip");
str_replace (pd_test_1_all.cpp:3709)
3699 }
3700
3701 // str_rstrip
3702 auto rstripped = arr.str_rstrip();
3703 if (rstripped[2].value() != " spaced") {
3704 std::cout << " [FAIL] : str_rstrip: expected ' spaced'" << std::endl;
3705 throw std::runtime_error("pd_test_string_array_str_methods failed: str_rstrip");
3706 }
3707
3708 // str_replace
3709 auto replaced = arr.str_replace("World", "Universe");
3710 if (replaced[0].value() != "Hello Universe") {
3711 std::cout << " [FAIL] : str_replace: expected 'Hello Universe'" << std::endl;
3712 throw std::runtime_error("pd_test_string_array_str_methods failed: str_replace");
3713 }
3714
3715 std::cout << " -> tests passed" << std::endl;
3716 }
3717
3718 void pd_test_string_array_str_len() {
3719 std::cout << "========= StringArray: str_len ======================= ";
str_rstrip (pd_test_1_all.cpp:3702)
3692 }
3693
3694 // str_lstrip
3695 auto lstripped = arr.str_lstrip();
3696 if (lstripped[2].value() != "spaced ") {
3697 std::cout << " [FAIL] : str_lstrip: expected 'spaced '" << std::endl;
3698 throw std::runtime_error("pd_test_string_array_str_methods failed: str_lstrip");
3699 }
3700
3701 // str_rstrip
3702 auto rstripped = arr.str_rstrip();
3703 if (rstripped[2].value() != " spaced") {
3704 std::cout << " [FAIL] : str_rstrip: expected ' spaced'" << std::endl;
3705 throw std::runtime_error("pd_test_string_array_str_methods failed: str_rstrip");
3706 }
3707
3708 // str_replace
3709 auto replaced = arr.str_replace("World", "Universe");
3710 if (replaced[0].value() != "Hello Universe") {
3711 std::cout << " [FAIL] : str_replace: expected 'Hello Universe'" << std::endl;
3712 throw std::runtime_error("pd_test_string_array_str_methods failed: str_replace");
str_startswith (pd_test_1_all.cpp:3783)
3773 if (contains[1].has_value()) {
3774 std::cout << " [FAIL] : str_contains: NA should propagate" << std::endl;
3775 throw std::runtime_error("pd_test_string_array_str_predicates failed: contains NA");
3776 }
3777 if (!contains[2].has_value() || contains[2].value()) {
3778 std::cout << " [FAIL] : str_contains: 'python' does not contain 'llo'" << std::endl;
3779 throw std::runtime_error("pd_test_string_array_str_predicates failed: contains python");
3780 }
3781
3782 // str_startswith
3783 auto starts = arr.str_startswith("hello");
3784 if (!starts[0].has_value() || !starts[0].value()) {
3785 std::cout << " [FAIL] : str_startswith: 'hello world' starts with 'hello'" << std::endl;
3786 throw std::runtime_error("pd_test_string_array_str_predicates failed: startswith");
3787 }
3788 if (!starts[2].has_value() || starts[2].value()) {
3789 std::cout << " [FAIL] : str_startswith: 'python' does not start with 'hello'" << std::endl;
3790 throw std::runtime_error("pd_test_string_array_str_predicates failed: startswith python");
3791 }
3792
3793 // str_endswith
str_strip (pd_test_1_all.cpp:3688)
3678 }
3679
3680 // str_lower
3681 auto lower = arr.str_lower();
3682 if (lower[0].value() != "hello world") {
3683 std::cout << " [FAIL] : str_lower: expected 'hello world'" << std::endl;
3684 throw std::runtime_error("pd_test_string_array_str_methods failed: str_lower");
3685 }
3686
3687 // str_strip
3688 auto stripped = arr.str_strip();
3689 if (stripped[2].value() != "spaced") {
3690 std::cout << " [FAIL] : str_strip: expected 'spaced'" << std::endl;
3691 throw std::runtime_error("pd_test_string_array_str_methods failed: str_strip");
3692 }
3693
3694 // str_lstrip
3695 auto lstripped = arr.str_lstrip();
3696 if (lstripped[2].value() != "spaced ") {
3697 std::cout << " [FAIL] : str_lstrip: expected 'spaced '" << std::endl;
3698 throw std::runtime_error("pd_test_string_array_str_methods failed: str_lstrip");
str_upper (pd_test_1_all.cpp:3670)
3660 void pd_test_string_array_str_methods() {
3661 std::cout << "========= StringArray: string methods ======================= ";
3662
3663 pandas::StringArray arr({
3664 std::optional<std::string>("Hello World"),
3665 std::nullopt,
3666 std::optional<std::string>(" spaced ")
3667 });
3668
3669 // str_upper
3670 auto upper = arr.str_upper();
3671 if (upper[0].value() != "HELLO WORLD") {
3672 std::cout << " [FAIL] : str_upper: expected 'HELLO WORLD'" << std::endl;
3673 throw std::runtime_error("pd_test_string_array_str_methods failed: str_upper");
3674 }
3675 if (upper[1].has_value()) {
3676 std::cout << " [FAIL] : str_upper: NA should propagate" << std::endl;
3677 throw std::runtime_error("pd_test_string_array_str_methods failed: str_upper NA");
3678 }
3679
3680 // str_lower