StringMethods#
-
class pandas::StringMethods#
pandas C++ class.
Example#
#include <pandas/pandas.h>
using namespace pandas;
// Use StringMethods
StringMethods obj;
// ... operations ...
Indexing / Selection#
Signature |
Return Type |
Location |
Example |
|---|---|---|---|
|
std::vector<std::string> |
pd_string_accessor.h:730 |
|
|
pd_string_accessor.h:1303 |
||
|
DummiesResult |
pd_string_accessor.h:1298 |
Data Manipulation#
Signature |
Return Type |
Location |
Example |
|---|---|---|---|
|
std::vector<std::string> |
pd_string_accessor.h:381 |
Missing Data#
Signature |
Return Type |
Location |
Example |
|---|---|---|---|
|
std::vector<std::string> |
pd_string_accessor.h:690 |
Statistics#
Comparison#
Signature |
Return Type |
Location |
Example |
|---|---|---|---|
|
std::vector<int64_t> |
pd_string_accessor.h:265 |
Combining#
Signature |
Return Type |
Location |
Example |
|---|---|---|---|
|
std::vector<std::string> |
pd_string_accessor.h:1106 |
Iteration#
Signature |
Return Type |
Location |
Example |
|---|---|---|---|
|
std::vector<bool> |
pd_string_accessor.h:366 |
Other Methods#
Signature |
Return Type |
Location |
Example |
|---|---|---|---|
|
static DataFrame |
pd_string_accessor.h:150 |
|
|
std::vector<std::string> |
pd_string_accessor.h:198 |
|
|
std::vector<std::string> |
pd_string_accessor.h:751 |
|
|
std::string |
pd_string_accessor.h:911 |
|
|
std::vector<std::string> |
pd_string_accessor.h:717 |
|
|
std::vector<bool> |
pd_string_accessor.h:322 |
|
|
std::vector<std::string> |
pd_string_accessor.h:1194 |
|
|
std::vector<std::string> |
pd_string_accessor.h:1184 |
|
|
std::vector<std::vector<std::string>> |
pd_string_accessor.h:1031 |
|
|
std::vector<std::vector<std::vector<std::string>>> |
pd_string_accessor.h:1078 |
|
|
ExtractAllResult |
pd_string_accessor.h:1074 |
|
|
std::vector<int64_t> |
pd_string_accessor.h:445 |
|
|
std::vector<std::optional<std::vector<std::string>>> |
pd_string_accessor.h:1002 |
|
|
std::vector<bool> |
pd_string_accessor.h:982 |
|
|
std::vector<int64_t> |
pd_string_accessor.h:1236 |
|
|
std::vector<bool> |
pd_string_accessor.h:593 |
|
|
std::vector<bool> |
pd_string_accessor.h:559 |
|
|
std::vector<bool> |
pd_string_accessor.h:827 |
|
|
std::vector<bool> |
pd_string_accessor.h:576 |
|
|
BoolWithNan |
pd_string_accessor.h:511 |
|
|
std::vector<bool> |
pd_string_accessor.h:627 |
|
|
std::vector<bool> |
pd_string_accessor.h:822 |
|
|
std::vector<bool> |
pd_string_accessor.h:610 |
|
|
std::vector<bool> |
pd_string_accessor.h:790 |
|
|
std::vector<bool> |
pd_string_accessor.h:648 |
|
|
std::vector<std::string> |
pd_string_accessor.h:721 |
|
|
std::vector<std::string> |
pd_string_accessor.h:166 |
|
|
std::vector<std::string> |
pd_string_accessor.h:291 |
|
|
std::vector<bool> |
pd_string_accessor.h:926 |
|
|
std::vector<std::optional<bool>> |
pd_string_accessor.h:948 |
|
|
std::vector<std::string> |
pd_string_accessor.h:1174 |
|
|
const ParentType& |
pd_string_accessor.h:158 |
|
|
explicit StringMethods(const ParentType& parent) : |
pd_string_accessor.h:155 |
|
|
std::optional<std::string> |
pd_string_accessor.h:161 |
|
|
pd_string_accessor.h:81 |
||
|
std::vector<std::vector<std::string>> |
pd_string_accessor.h:1204 |
|
|
std::regex |
pd_string_accessor.h:329 |
|
|
std::regex |
pd_string_accessor.h:395 |
|
|
std::regex |
pd_string_accessor.h:468 |
|
|
std::regex |
pd_string_accessor.h:932 |
|
|
std::regex |
pd_string_accessor.h:957 |
|
|
std::regex |
pd_string_accessor.h:988 |
|
|
std::regex |
pd_string_accessor.h:1007 |
|
|
std::regex |
pd_string_accessor.h:1035 |
|
|
std::regex |
pd_string_accessor.h:1082 |
|
|
std::vector<std::string> |
pd_string_accessor.h:832 |
|
|
std::vector<std::string> |
pd_string_accessor.h:847 |
|
|
std::vector<std::string> |
pd_string_accessor.h:775 |
|
|
std::vector<int64_t> |
pd_string_accessor.h:756 |
|
|
std::vector<int64_t> |
pd_string_accessor.h:1254 |
|
|
std::vector<std::string> |
pd_string_accessor.h:725 |
|
|
std::vector<std::vector<std::string>> |
pd_string_accessor.h:1220 |
|
|
std::vector<std::vector<std::string>> |
pd_string_accessor.h:863 |
|
|
std::vector<std::string> |
pd_string_accessor.h:306 |
|
|
std::vector<std::string> |
pd_string_accessor.h:670 |
|
|
std::vector<std::string> |
pd_string_accessor.h:1272 |
|
|
std::vector<std::vector<std::string>> |
pd_string_accessor.h:413 |
|
|
SplitExpandResult |
pd_string_accessor.h:535 |
|
|
std::vector<bool> |
pd_string_accessor.h:352 |
|
|
std::vector<std::string> |
pd_string_accessor.h:275 |
|
|
std::vector<std::string> |
pd_string_accessor.h:243 |
|
|
std::vector<std::string> |
pd_string_accessor.h:218 |
|
|
std::vector<std::string> |
pd_string_accessor.h:1122 |
|
|
std::vector<std::string> |
pd_string_accessor.h:182 |
|
|
std::vector<std::string> |
pd_string_accessor.h:1143 |
|
|
std::vector<std::string> |
pd_string_accessor.h:713 |
Code Examples#
The following examples are extracted from the test suite.
get (pd_test_1_all.cpp:10290)
10280void pd_test_extension_index_get_loc_unique() {
10281 std::cout << "========= get_loc (unique) =========================";
10282
10283 pandas::CategoricalArray arr({"apple", "banana", "cherry"});
10284 pandas::CategoricalIndex idx(arr);
10285
10286 auto loc_apple = idx.get_loc("apple");
10287 auto loc_banana = idx.get_loc("banana");
10288 auto loc_cherry = idx.get_loc("cherry");
10289
10290 bool passed = (std::holds_alternative<size_t>(loc_apple) && std::get<size_t>(loc_apple) == 0 &&
10291 std::get<size_t>(loc_banana) == 1 &&
10292 std::get<size_t>(loc_cherry) == 2);
10293 if (!passed) {
10294 std::cout << " [FAIL] : in pd_test_extension_index_get_loc_unique() : get_loc check failed" << std::endl;
10295 throw std::runtime_error("pd_test_extension_index_get_loc_unique failed");
10296 }
10297
10298 std::cout << " -> tests passed" << std::endl;
10299}
get_dummies (pd_test_3_all.cpp:13545)
13535 }
13536
13537 std::cout << " -> tests passed" << std::endl;
13538}
13539
13540// ============================================================================
13541// Get Dummies / From Dummies Tests
13542// ============================================================================
13543
13544void pd_test_top_level_get_dummies() {
13545 std::cout << "========= get_dummies() ===============================";
13546
13547 std::vector<std::string> data = {"A", "B", "A", "C", "B", "A"};
13548 pandas::Series<std::string> s(data, "category");
13549
13550 pandas::DataFrame result = pandas::get_dummies(s);
13551
13552 // Should have columns for A, B, C
13553 if (result.ncols() != 3) {
13554 std::cout << " [FAIL] : in pd_test_top_level_get_dummies() : expected 3 columns" << std::endl;
13555 throw std::runtime_error("pd_test_top_level_get_dummies failed: wrong column count");
get_dummies_as_multiindex (pd_test_5_all.cpp:123697)
123687 return rows;
123688}
123689
123690static std::string run_oracle_row(const OracleRow& r) {
123691 pandas::Series<std::string> s(r.input);
123692 if (r.op == "extractall") {
123693 auto res = s.str().extractall_with_index(r.arg);
123694 return format_extractall(res);
123695 }
123696 if (r.op == "get_dummies") {
123697 auto res = s.str().get_dummies_as_multiindex(r.arg);
123698 return format_get_dummies(res);
123699 }
123700 throw std::runtime_error("unknown op: " + r.op);
123701}
123702
123703static void run_oracle_subset(int sub_case, int begin_id, int end_id,
123704 int& local_fail) {
123705 std::cout << "-- case_" << (13 + sub_case) << "_oracle_rows_"
123706 << begin_id << "_to_" << (end_id - 1) << "\n";
123707 bool ok = false;
replace (pd_test_1_all.cpp:6623)
6613 }
6614 }
6615
6616 // Test replace
6617 {
6618 std::map<std::string, std::vector<numpy::float64>> float_data;
6619 float_data["X"] = {1.0, 2.0, 3.0};
6620 float_data["Y"] = {2.0, 2.0, 4.0};
6621 pandas::DataFrame df_repl(float_data);
6622
6623 auto replaced = df_repl.replace(2.0, 99.0);
6624 // Check some value was replaced (crude check via string)
6625 std::string val_str = replaced.col<numpy::float64>("X").get_value_str(1);
6626 if (val_str.find("99") == std::string::npos) {
6627 std::cout << " [FAIL] : in pd_test_dataframe_manipulation() : replace didn't work" << std::endl;
6628 throw std::runtime_error("pd_test_dataframe_manipulation failed: replace");
6629 }
6630 }
6631
6632 // Test drop_duplicates
6633 {
pad (pd_test_3_all.cpp:1771)
1761 if (result_single.nrows() != 3 || result_single.ncols() != 1) {
1762 std::cout << " [FAIL] : in pd_test_3_all_dataframe_unstack() : single col shape mismatch" << std::endl;
1763 throw std::runtime_error("pd_test_3_all_dataframe_unstack failed: single col shape");
1764 }
1765
1766 std::cout << " -> tests passed" << std::endl;
1767}
1768
1769void pd_test_3_all_fbbuilder_pad() {
1770 std::cout << "========= FBBuilder.pad() (internal) =================";
1771
1772 // Note: FBBuilder.pad() is an internal method for FlatBuffer serialization
1773 // It's not the pandas DataFrame.pad() method (which is ffill alias)
1774 // This test verifies the to_feather() serialization works, which uses FBBuilder.pad()
1775
1776 std::map<std::string, std::vector<double>> data = {
1777 {"A", {1.0, 2.0, 3.0}},
1778 {"B", {4.0, 5.0, 6.0}}
1779 };
1780 pandas::DataFrame df(data);
count (pd_test_1_all.cpp:66)
56 if (arr.is_na(0)) {
57 std::cout << " [FAIL] : in pd_test_boolean_array_na_handling() : is_na(0) should be false" << std::endl;
58 throw std::runtime_error("pd_test_boolean_array_na_handling failed: is_na(0) should be false");
59 }
60
61 if (!arr.has_na()) {
62 std::cout << " [FAIL] : in pd_test_boolean_array_na_handling() : has_na() should be true" << std::endl;
63 throw std::runtime_error("pd_test_boolean_array_na_handling failed: has_na() should be true");
64 }
65
66 if (arr.count() != 2) {
67 std::cout << " [FAIL] : in pd_test_boolean_array_na_handling() : count() should be 2" << std::endl;
68 throw std::runtime_error("pd_test_boolean_array_na_handling failed: count() should be 2");
69 }
70
71 std::cout << " -> tests passed" << std::endl;
72 }
73
74 void pd_test_boolean_array_kleene_and() {
75 std::cout << "========= BooleanArray: Kleene AND ======================= ";
count_with_nan (pd_test_3_all.cpp:28394)
28384static int sao_check(bool cond, const char* msg) {
28385 if (!cond) { std::cout << " FAIL: " << msg << std::endl; return 1; }
28386 return 0;
28387}
28388
28389void pd_test_str_count_with_nan() {
28390 std::cout << " -- pd_test_str_count_with_nan --" << std::endl;
28391 int fail = 0;
28392 pandas::Series<std::string> s({"aa", "NaN", "abab", "None"}, "x");
28393 auto r = s.str().count_with_nan("a");
28394 fail += sao_check(r.values.size() == 4, "size");
28395 fail += sao_check(r.has_nan, "has_nan true");
28396 fail += sao_check(r.is_nan[1] && r.is_nan[3], "nan positions");
28397 fail += sao_check(!r.is_nan[0] && !r.is_nan[2], "non-nan positions");
28398 fail += sao_check(r.values[0] == 2, "count aa");
28399 fail += sao_check(r.values[2] == 2, "count abab");
28400 if (fail == 0) std::cout << " OK" << std::endl;
28401}
28402
28403void pd_test_str_count_no_nan() {
len (pd_test_3_all.cpp:20867)
20857 auto title_result = s.str().title();
20858 if (title_result[0] != "Hello World" || title_result[1] != "Hello World" || title_result[2] != "Hello World") {
20859 std::cout << " [FAIL] : title() failed" << std::endl;
20860 throw std::runtime_error("pd_test_str_capitalize_title: title() failed");
20861 }
20862
20863 std::cout << " -> tests passed" << std::endl;
20864}
20865
20866// ============================================================================
20867// Test str().len()
20868// ============================================================================
20869
20870void pd_test_str_len() {
20871 std::cout << "========= Series.str().len() ============================";
20872
20873 pandas::Series<std::string> s({"a", "bb", "ccc", ""});
20874
20875 auto lens = s.str().len();
20876 if (lens[0] != 1 || lens[1] != 2 || lens[2] != 3 || lens[3] != 0) {
20877 std::cout << " [FAIL] : len() failed" << std::endl;
join (pd_test_1_all.cpp:12353)
12343 std::cout << " -> tests passed" << std::endl;
12344 }
12345
12346 void pd_test_index_join() {
12347 std::cout << "========= join ========================================";
12348
12349 pandas::Index<numpy::int64> idx1{1, 2, 3};
12350 pandas::Index<numpy::int64> idx2{2, 3, 4};
12351
12352 auto [inner_joined, left_idx, right_idx] = idx1.join(idx2, "inner");
12353 bool passed = (inner_joined.size() == 2); // {2, 3}
12354
12355 auto [outer_joined, ol_idx, or_idx] = idx1.join(idx2, "outer");
12356 passed = passed && (outer_joined.size() == 4); // {1, 2, 3, 4}
12357
12358 if (!passed) {
12359 std::cout << " [FAIL] : in pd_test_index_join() : join failed" << std::endl;
12360 throw std::runtime_error("pd_test_index_join failed");
12361 }
endswith (pd_test_3_all.cpp:20933)
20923 auto result = s.str().contains("an", true, false); // case_sensitive=true, regex=false
20924 if (result[0] != false || result[1] != true || result[2] != false) {
20925 std::cout << " [FAIL] : contains() failed" << std::endl;
20926 throw std::runtime_error("pd_test_str_contains: contains() failed");
20927 }
20928
20929 std::cout << " -> tests passed" << std::endl;
20930}
20931
20932// ============================================================================
20933// Test str().startswith() and str().endswith()
20934// ============================================================================
20935
20936void pd_test_str_startswith_endswith() {
20937 std::cout << "========= Series.str().startswith/endswith() ============";
20938
20939 pandas::Series<std::string> s({"hello", "world", "help"});
20940
20941 auto starts_result = s.str().startswith("hel");
20942 if (starts_result[0] != true || starts_result[1] != false || starts_result[2] != true) {
20943 std::cout << " [FAIL] : startswith() failed" << std::endl;
capitalize (pd_test_3_all.cpp:20843)
20833 auto upper_result = s.str().upper();
20834 if (upper_result[0] != "HELLO" || upper_result[1] != "WORLD" || upper_result[2] != "TEST") {
20835 std::cout << " [FAIL] : upper() failed" << std::endl;
20836 throw std::runtime_error("pd_test_str_lower_upper: upper() failed");
20837 }
20838
20839 std::cout << " -> tests passed" << std::endl;
20840}
20841
20842// ============================================================================
20843// Test str().capitalize() and str().title()
20844// ============================================================================
20845
20846void pd_test_str_capitalize_title() {
20847 std::cout << "========= Series.str().capitalize/title() ===============";
20848
20849 pandas::Series<std::string> s({"hello world", "HELLO WORLD", "hELLO wORLD"});
20850
20851 auto cap_result = s.str().capitalize();
20852 if (cap_result[0] != "Hello world" || cap_result[1] != "Hello world" || cap_result[2] != "Hello world") {
20853 std::cout << " [FAIL] : capitalize() failed" << std::endl;
casefold (pd_test_3_all.cpp:21059)
21049 auto result = s.str().cat("-");
21050 if (result != "a-b-c") {
21051 std::cout << " [FAIL] : cat() failed, got: " << result << std::endl;
21052 throw std::runtime_error("pd_test_str_cat: cat() failed");
21053 }
21054
21055 std::cout << " -> tests passed" << std::endl;
21056}
21057
21058// ============================================================================
21059// Test str().casefold() (plan_04a)
21060// ============================================================================
21061
21062void pd_test_str_casefold() {
21063 std::cout << "========= Series.str().casefold() =======================";
21064
21065 pandas::Series<std::string> s({"FOO", "Bar", "HELLO"});
21066 auto result = s.str().casefold();
21067 if (result[0] != "foo" || result[1] != "bar" || result[2] != "hello") {
21068 std::cout << " [FAIL] : casefold() failed" << std::endl;
21069 throw std::runtime_error("pd_test_str_casefold: casefold() failed");
cat (pd_test_3_all.cpp:16259)
16249 }
16250
16251 std::cout << " -> tests passed" << std::endl;
16252}
16253
16254void pd_test_categorical_fillna_params() {
16255 std::cout << "========= CategoricalArray fillna params =============";
16256
16257 // Create CategoricalArray using vector constructor with optional values
16258 std::vector<std::optional<std::string>> values = {"a", "b", std::nullopt, "a"};
16259 pandas::CategoricalArray cat(values);
16260
16261 // Test fillna with method and limit parameters (should compile and work)
16262 auto result = cat.fillna("b", "", std::nullopt, true);
16263
16264 bool passed = (result.size() == 4);
16265 if (!passed) {
16266 std::cout << " [FAIL] : in pd_test_categorical_fillna_params() : fillna failed" << std::endl;
16267 throw std::runtime_error("pd_test_categorical_fillna_params failed");
16268 }
center (pd_test_3_all.cpp:21005)
20995 auto alnum_result = s.str().isalnum();
20996 if (alnum_result[0] != true || alnum_result[1] != true || alnum_result[2] != true || alnum_result[3] != false) {
20997 std::cout << " [FAIL] : isalnum() failed" << std::endl;
20998 throw std::runtime_error("pd_test_str_is_methods: isalnum() failed");
20999 }
21000
21001 std::cout << " -> tests passed" << std::endl;
21002}
21003
21004// ============================================================================
21005// Test str().zfill(), str().center(), str().ljust(), str().rjust()
21006// ============================================================================
21007
21008void pd_test_str_padding() {
21009 std::cout << "========= Series.str().zfill/center/ljust/rjust() =======";
21010
21011 pandas::Series<std::string> s({"1", "22", "333"});
21012
21013 auto zfill_result = s.str().zfill(5);
21014 if (zfill_result[0] != "00001" || zfill_result[1] != "00022" || zfill_result[2] != "00333") {
21015 std::cout << " [FAIL] : zfill() failed" << std::endl;
contains (pd_test_1_all.cpp:2200)
2190// Test: contains method
2191// ============================================================================
2192void test_contains() {
2193 std::cout << "========= IntervalArray: contains ======================= ";
2194
2195 std::vector<numpy::float64> breaks = {0.0, 1.0, 2.0, 3.0};
2196
2197 // Right-closed intervals: (0, 1], (1, 2], (2, 3]
2198 auto arr_right = pandas::IntervalArrayFloat64::from_breaks(breaks, pandas::IntervalClosed::Right);
2199
2200 // Test contains(1.0) - should be in interval 0 but not 1 (since 1 is exclusive on left of interval 1)
2201 auto contains_1 = arr_right.contains(1.0);
2202 // (0, 1] contains 1: yes, (1, 2] contains 1: no (open on left), (2, 3] contains 1: no
2203 if (contains_1[0].value_or(false) != true ||
2204 contains_1[1].value_or(true) != false ||
2205 contains_1[2].value_or(true) != false) {
2206 std::cout << "[FAIL] : in test_contains() : right-closed contains 1.0" << std::endl;
2207 return;
2208 }
2209
2210 // Left-closed intervals: [0, 1), [1, 2), [2, 3)
decode (pd_test_3_all.cpp:21401)
21391 pandas::Series<std::string> s({"hello", "world"});
21392 auto result = s.str().normalize("NFC");
21393 if (result[0] != "hello" || result[1] != "world") {
21394 std::cout << " [FAIL] : normalize failed" << std::endl;
21395 throw std::runtime_error("pd_test_str_normalize: normalize failed");
21396 }
21397 std::cout << " -> tests passed" << std::endl;
21398}
21399
21400// ============================================================================
21401// Test str().encode() / decode() (plan_04c)
21402// ============================================================================
21403
21404void pd_test_str_encode_decode() {
21405 std::cout << "========= Series.str().encode/decode() ==================";
21406
21407 pandas::Series<std::string> s({"hello", "world"});
21408 auto encoded = s.str().encode("utf-8");
21409 if (encoded[0] != "hello" || encoded[1] != "world") {
21410 std::cout << " [FAIL] : encode failed" << std::endl;
21411 throw std::runtime_error("pd_test_str_encode_decode: encode failed");
encode (pd_test_3_all.cpp:21401)
21391 pandas::Series<std::string> s({"hello", "world"});
21392 auto result = s.str().normalize("NFC");
21393 if (result[0] != "hello" || result[1] != "world") {
21394 std::cout << " [FAIL] : normalize failed" << std::endl;
21395 throw std::runtime_error("pd_test_str_normalize: normalize failed");
21396 }
21397 std::cout << " -> tests passed" << std::endl;
21398}
21399
21400// ============================================================================
21401// Test str().encode() / decode() (plan_04c)
21402// ============================================================================
21403
21404void pd_test_str_encode_decode() {
21405 std::cout << "========= Series.str().encode/decode() ==================";
21406
21407 pandas::Series<std::string> s({"hello", "world"});
21408 auto encoded = s.str().encode("utf-8");
21409 if (encoded[0] != "hello" || encoded[1] != "world") {
21410 std::cout << " [FAIL] : encode failed" << std::endl;
21411 throw std::runtime_error("pd_test_str_encode_decode: encode failed");
extract (pd_test_3_all.cpp:21283)
21273 throw std::runtime_error("pd_test_str_findall: findall element 1 failed");
21274 }
21275 if (result[2].value().size() != 0) {
21276 std::cout << " [FAIL] : findall element 2 should be empty" << std::endl;
21277 throw std::runtime_error("pd_test_str_findall: findall element 2 failed");
21278 }
21279 std::cout << " -> tests passed" << std::endl;
21280}
21281
21282// ============================================================================
21283// Test str().extract() (plan_04b)
21284// ============================================================================
21285
21286void pd_test_str_extract() {
21287 std::cout << "========= Series.str().extract() ========================";
21288
21289 pandas::Series<std::string> s({"a1", "b2", "c3"});
21290 auto result = s.str().extract("([a-z])([0-9])");
21291 if (result[0].size() != 2 || result[0][0] != "a" || result[0][1] != "1") {
21292 std::cout << " [FAIL] : extract element 0 failed" << std::endl;
21293 throw std::runtime_error("pd_test_str_extract: extract element 0 failed");
extractall (pd_test_3_all.cpp:21310)
21300 pandas::Series<std::string> s2({"xyz"});
21301 auto result2 = s2.str().extract("([0-9])");
21302 if (result2[0].size() != 1 || result2[0][0] != "") {
21303 std::cout << " [FAIL] : extract no-match failed" << std::endl;
21304 throw std::runtime_error("pd_test_str_extract: extract no-match failed");
21305 }
21306 std::cout << " -> tests passed" << std::endl;
21307}
21308
21309// ============================================================================
21310// Test str().extractall() (plan_04b)
21311// ============================================================================
21312
21313void pd_test_str_extractall() {
21314 std::cout << "========= Series.str().extractall() =====================";
21315
21316 pandas::Series<std::string> s({"a1b2", "c3", "xyz"});
21317 auto result = s.str().extractall("([a-z])([0-9])");
21318 if (result[0].size() != 2 ||
21319 result[0][0][0] != "a" || result[0][0][1] != "1" ||
21320 result[0][1][0] != "b" || result[0][1][1] != "2") {
extractall_with_index (pd_test_5_all.cpp:123693)
123683 r.expected = cells[4];
123684 r.note = cells[5];
123685 rows.push_back(std::move(r));
123686 }
123687 return rows;
123688}
123689
123690static std::string run_oracle_row(const OracleRow& r) {
123691 pandas::Series<std::string> s(r.input);
123692 if (r.op == "extractall") {
123693 auto res = s.str().extractall_with_index(r.arg);
123694 return format_extractall(res);
123695 }
123696 if (r.op == "get_dummies") {
123697 auto res = s.str().get_dummies_as_multiindex(r.arg);
123698 return format_get_dummies(res);
123699 }
123700 throw std::runtime_error("unknown op: " + r.op);
123701}
123702
123703static void run_oracle_subset(int sub_case, int begin_id, int end_id,
find (pd_test_1_all.cpp:5400)
5390void pd_test_categorical_index_categories_property() {
5391 std::cout << "========= categories property =========================";
5392
5393 pandas::CategoricalArray arr({"red", "green", "blue", "red"});
5394 pandas::CategoricalIndex idx(arr);
5395
5396 const std::vector<std::string>& cats = idx.categories();
5397
5398 bool passed = (cats.size() == 3 &&
5399 std::find(cats.begin(), cats.end(), "red") != cats.end() &&
5400 std::find(cats.begin(), cats.end(), "green") != cats.end() &&
5401 std::find(cats.begin(), cats.end(), "blue") != cats.end());
5402 if (!passed) {
5403 std::cout << " [FAIL] : in pd_test_categorical_index_categories_property()" << std::endl;
5404 throw std::runtime_error("pd_test_categorical_index_categories_property failed");
5405 }
5406
5407 std::cout << " -> tests passed" << std::endl;
5408}
findall (pd_test_3_all.cpp:21259)
21249 auto result2 = s.str().fullmatch("foo.*");
21250 if (result2[0] != true || result2[1] != true || result2[2] != false || result2[3] != true) {
21251 std::cout << " [FAIL] : fullmatch('foo.*') failed" << std::endl;
21252 throw std::runtime_error("pd_test_str_fullmatch: fullmatch('foo.*') failed");
21253 }
21254 std::cout << " -> tests passed" << std::endl;
21255}
21256
21257// ============================================================================
21258// Test str().findall() (plan_04b)
21259// ============================================================================
21260
21261void pd_test_str_findall() {
21262 std::cout << "========= Series.str().findall() ========================";
21263
21264 pandas::Series<std::string> s({"a1b2c3", "x4y5", "no digits"});
21265 auto result = s.str().findall("[0-9]");
21266 if (result[0].value().size() != 3 || result[0].value()[0] != "1" || result[0].value()[1] != "2" || result[0].value()[2] != "3") {
21267 std::cout << " [FAIL] : findall element 0 failed" << std::endl;
21268 throw std::runtime_error("pd_test_str_findall: findall element 0 failed");
fullmatch (pd_test_3_all.cpp:21237)
21227 auto result2 = s.str().match("FOO", false);
21228 if (result2[0] != true || result2[1] != false || result2[2] != true || result2[3] != false) {
21229 std::cout << " [FAIL] : match('FOO', case=false) failed" << std::endl;
21230 throw std::runtime_error("pd_test_str_match: match case insensitive failed");
21231 }
21232 std::cout << " -> tests passed" << std::endl;
21233}
21234
21235// ============================================================================
21236// Test str().fullmatch() (plan_04b)
21237// ============================================================================
21238
21239void pd_test_str_fullmatch() {
21240 std::cout << "========= Series.str().fullmatch() ======================";
21241
21242 pandas::Series<std::string> s({"foo", "foobar", "bar", "foo1"});
21243 auto result = s.str().fullmatch("foo");
21244 if (result[0] != true || result[1] != false || result[2] != false || result[3] != false) {
21245 std::cout << " [FAIL] : fullmatch('foo') failed" << std::endl;
21246 throw std::runtime_error("pd_test_str_fullmatch: fullmatch('foo') failed");
index (pd_test_1_all.cpp:6680)
6670 void pd_test_dataframe_index_ops() {
6671 std::cout << "========= index operations =================";
6672
6673 // Test set_axis (rows)
6674 {
6675 std::map<std::string, std::vector<int>> data;
6676 data["A"] = {1, 2, 3};
6677 pandas::DataFrame df(data);
6678
6679 auto renamed = df.set_axis({"x", "y", "z"}, 0);
6680 std::string idx0 = renamed.index().get_value_str(0);
6681 if (idx0 != "x") {
6682 std::cout << " [FAIL] : in pd_test_dataframe_index_ops() : set_axis first label should be 'x'" << std::endl;
6683 throw std::runtime_error("pd_test_dataframe_index_ops failed: set_axis");
6684 }
6685 }
6686
6687 // Test set_axis (columns)
6688 {
6689 std::map<std::string, std::vector<int>> data;
6690 data["A"] = {1, 2};
isalnum (pd_test_3_all.cpp:20975)
20965 auto result = s.str().replace("hello", "hi", false); // regex=false
20966 if (result[0] != "hi" || result[1] != "world" || result[2] != "hi world") {
20967 std::cout << " [FAIL] : replace() failed" << std::endl;
20968 throw std::runtime_error("pd_test_str_replace: replace() failed");
20969 }
20970
20971 std::cout << " -> tests passed" << std::endl;
20972}
20973
20974// ============================================================================
20975// Test str().isalpha(), str().isdigit(), str().isalnum()
20976// ============================================================================
20977
20978void pd_test_str_is_methods() {
20979 std::cout << "========= Series.str().isalpha/isdigit/isalnum() ========";
20980
20981 pandas::Series<std::string> s({"abc", "123", "abc123", ""});
20982
20983 auto alpha_result = s.str().isalpha();
20984 if (alpha_result[0] != true || alpha_result[1] != false || alpha_result[2] != false || alpha_result[3] != false) {
20985 std::cout << " [FAIL] : isalpha() failed" << std::endl;
isalpha (pd_test_3_all.cpp:20975)
20965 auto result = s.str().replace("hello", "hi", false); // regex=false
20966 if (result[0] != "hi" || result[1] != "world" || result[2] != "hi world") {
20967 std::cout << " [FAIL] : replace() failed" << std::endl;
20968 throw std::runtime_error("pd_test_str_replace: replace() failed");
20969 }
20970
20971 std::cout << " -> tests passed" << std::endl;
20972}
20973
20974// ============================================================================
20975// Test str().isalpha(), str().isdigit(), str().isalnum()
20976// ============================================================================
20977
20978void pd_test_str_is_methods() {
20979 std::cout << "========= Series.str().isalpha/isdigit/isalnum() ========";
20980
20981 pandas::Series<std::string> s({"abc", "123", "abc123", ""});
20982
20983 auto alpha_result = s.str().isalpha();
20984 if (alpha_result[0] != true || alpha_result[1] != false || alpha_result[2] != false || alpha_result[3] != false) {
20985 std::cout << " [FAIL] : isalpha() failed" << std::endl;
isdecimal (pd_test_3_all.cpp:21124)
21114 pandas::Series<std::string> s({"Hello World", "hello world", "HELLO", "Hello"});
21115 auto result = s.str().istitle();
21116 if (result[0] != true || result[1] != false || result[2] != false || result[3] != true) {
21117 std::cout << " [FAIL] : istitle() failed" << std::endl;
21118 throw std::runtime_error("pd_test_str_istitle: istitle() failed");
21119 }
21120 std::cout << " -> tests passed" << std::endl;
21121}
21122
21123// ============================================================================
21124// Test str().isnumeric() and str().isdecimal() (plan_04a)
21125// ============================================================================
21126
21127void pd_test_str_isnumeric_isdecimal() {
21128 std::cout << "========= Series.str().isnumeric/isdecimal() ============";
21129
21130 pandas::Series<std::string> s({"123", "abc", "12.3", ""});
21131 auto numeric_result = s.str().isnumeric();
21132 if (numeric_result[0] != true || numeric_result[1] != false ||
21133 numeric_result[2] != false || numeric_result[3] != false) {
21134 std::cout << " [FAIL] : isnumeric() failed" << std::endl;
isdigit (pd_test_3_all.cpp:20975)
20965 auto result = s.str().replace("hello", "hi", false); // regex=false
20966 if (result[0] != "hi" || result[1] != "world" || result[2] != "hi world") {
20967 std::cout << " [FAIL] : replace() failed" << std::endl;
20968 throw std::runtime_error("pd_test_str_replace: replace() failed");
20969 }
20970
20971 std::cout << " -> tests passed" << std::endl;
20972}
20973
20974// ============================================================================
20975// Test str().isalpha(), str().isdigit(), str().isalnum()
20976// ============================================================================
20977
20978void pd_test_str_is_methods() {
20979 std::cout << "========= Series.str().isalpha/isdigit/isalnum() ========";
20980
20981 pandas::Series<std::string> s({"abc", "123", "abc123", ""});
20982
20983 auto alpha_result = s.str().isalpha();
20984 if (alpha_result[0] != true || alpha_result[1] != false || alpha_result[2] != false || alpha_result[3] != false) {
20985 std::cout << " [FAIL] : isalpha() failed" << std::endl;
isdigit_with_nan (pd_test_3_all.cpp:28418)
28408 auto r = s.str().count_with_nan("a");
28409 fail += sao_check(!r.has_nan, "has_nan false");
28410 fail += sao_check(r.values[0] == 1 && r.values[1] == 0 && r.values[2] == 2, "values");
28411 if (fail == 0) std::cout << " OK" << std::endl;
28412}
28413
28414void pd_test_str_isdigit_with_nan() {
28415 std::cout << " -- pd_test_str_isdigit_with_nan --" << std::endl;
28416 int fail = 0;
28417 pandas::Series<std::string> s({"123", "NaN", "abc", "None", "45"}, "x");
28418 auto r = s.str().isdigit_with_nan();
28419 fail += sao_check(r.values.size() == 5, "size");
28420 fail += sao_check(r.has_nan, "has_nan");
28421 fail += sao_check(r.values[0] == true, "123 digit");
28422 fail += sao_check(r.is_nan[1], "NaN pos 1");
28423 fail += sao_check(r.values[2] == false, "abc not digit");
28424 fail += sao_check(r.is_nan[3], "None pos 3");
28425 fail += sao_check(r.values[4] == true, "45 digit");
28426 if (fail == 0) std::cout << " OK" << std::endl;
28427}
isnumeric (pd_test_3_all.cpp:21124)
21114 pandas::Series<std::string> s({"Hello World", "hello world", "HELLO", "Hello"});
21115 auto result = s.str().istitle();
21116 if (result[0] != true || result[1] != false || result[2] != false || result[3] != true) {
21117 std::cout << " [FAIL] : istitle() failed" << std::endl;
21118 throw std::runtime_error("pd_test_str_istitle: istitle() failed");
21119 }
21120 std::cout << " -> tests passed" << std::endl;
21121}
21122
21123// ============================================================================
21124// Test str().isnumeric() and str().isdecimal() (plan_04a)
21125// ============================================================================
21126
21127void pd_test_str_isnumeric_isdecimal() {
21128 std::cout << "========= Series.str().isnumeric/isdecimal() ============";
21129
21130 pandas::Series<std::string> s({"123", "abc", "12.3", ""});
21131 auto numeric_result = s.str().isnumeric();
21132 if (numeric_result[0] != true || numeric_result[1] != false ||
21133 numeric_result[2] != false || numeric_result[3] != false) {
21134 std::cout << " [FAIL] : isnumeric() failed" << std::endl;
istitle (pd_test_3_all.cpp:21108)
21098 pandas::Series<std::string> s({"a", "bc", "xyz"});
21099 auto result = s.str().repeat(3);
21100 if (result[0] != "aaa" || result[1] != "bcbcbc" || result[2] != "xyzxyzxyz") {
21101 std::cout << " [FAIL] : repeat() failed" << std::endl;
21102 throw std::runtime_error("pd_test_str_repeat_method: repeat() failed");
21103 }
21104 std::cout << " -> tests passed" << std::endl;
21105}
21106
21107// ============================================================================
21108// Test str().istitle() (plan_04a)
21109// ============================================================================
21110
21111void pd_test_str_istitle() {
21112 std::cout << "========= Series.str().istitle() ========================";
21113
21114 pandas::Series<std::string> s({"Hello World", "hello world", "HELLO", "Hello"});
21115 auto result = s.str().istitle();
21116 if (result[0] != true || result[1] != false || result[2] != false || result[3] != true) {
21117 std::cout << " [FAIL] : istitle() failed" << std::endl;
21118 throw std::runtime_error("pd_test_str_istitle: istitle() failed");
ljust (pd_test_3_all.cpp:21005)
20995 auto alnum_result = s.str().isalnum();
20996 if (alnum_result[0] != true || alnum_result[1] != true || alnum_result[2] != true || alnum_result[3] != false) {
20997 std::cout << " [FAIL] : isalnum() failed" << std::endl;
20998 throw std::runtime_error("pd_test_str_is_methods: isalnum() failed");
20999 }
21000
21001 std::cout << " -> tests passed" << std::endl;
21002}
21003
21004// ============================================================================
21005// Test str().zfill(), str().center(), str().ljust(), str().rjust()
21006// ============================================================================
21007
21008void pd_test_str_padding() {
21009 std::cout << "========= Series.str().zfill/center/ljust/rjust() =======";
21010
21011 pandas::Series<std::string> s({"1", "22", "333"});
21012
21013 auto zfill_result = s.str().zfill(5);
21014 if (zfill_result[0] != "00001" || zfill_result[1] != "00022" || zfill_result[2] != "00333") {
21015 std::cout << " [FAIL] : zfill() failed" << std::endl;
lower (pd_test_3_all.cpp:20819)
20809#include <string>
20810
20811#include "../pandas/pd_series.h"
20812
20813// CRITICAL: No using namespace directives
20814
20815namespace dataframe_tests {
20816namespace dataframe_tests_string_accessor {
20817
20818// ============================================================================
20819// Test str().lower() and str().upper()
20820// ============================================================================
20821
20822void pd_test_str_lower_upper() {
20823 std::cout << "========= Series.str().lower/upper() ===================";
20824
20825 pandas::Series<std::string> s({"Hello", "WORLD", "TeSt"});
20826
20827 auto lower_result = s.str().lower();
20828 if (lower_result[0] != "hello" || lower_result[1] != "world" || lower_result[2] != "test") {
20829 std::cout << " [FAIL] : lower() failed" << std::endl;
lstrip (pd_test_3_all.cpp:20885)
20875 auto lens = s.str().len();
20876 if (lens[0] != 1 || lens[1] != 2 || lens[2] != 3 || lens[3] != 0) {
20877 std::cout << " [FAIL] : len() failed" << std::endl;
20878 throw std::runtime_error("pd_test_str_len: len() failed");
20879 }
20880
20881 std::cout << " -> tests passed" << std::endl;
20882}
20883
20884// ============================================================================
20885// Test str().strip(), str().lstrip(), str().rstrip()
20886// ============================================================================
20887
20888void pd_test_str_strip() {
20889 std::cout << "========= Series.str().strip() ==========================";
20890
20891 pandas::Series<std::string> s({" hello ", " world", "test "});
20892
20893 auto strip_result = s.str().strip();
20894 if (strip_result[0] != "hello" || strip_result[1] != "world" || strip_result[2] != "test") {
20895 std::cout << " [FAIL] : strip() failed" << std::endl;
match (pd_test_2_all.cpp:1467)
1457 void pd_test_between_time_overnight() {
1458 std::cout << "========= DataFrame between_time: overnight range ======";
1459
1460 // Test overnight range (e.g., 23:00 to 01:00)
1461 std::map<std::string, std::vector<double>> data = {
1462 {"A", {1.0, 2.0, 3.0, 4.0, 5.0}}
1463 };
1464 pandas::DataFrame df(data);
1465
1466 std::vector<std::string> datetime_index = {
1467 "2018-04-09 00:30:00", // Should match (before 01:00)
1468 "2018-04-09 12:00:00", // Should NOT match
1469 "2018-04-09 22:00:00", // Should NOT match
1470 "2018-04-09 23:30:00", // Should match (after 23:00)
1471 "2018-04-10 00:00:00" // Should match (at midnight, before 01:00)
1472 };
1473 df.set_index(std::make_unique<pandas::Index<std::string>>(datetime_index));
1474
1475 // Overnight range: 23:00 to 01:00
1476 auto result = df.between_time("23:00:00", "01:00:00");
normalize (pd_test_1_all.cpp:8723)
8713void pd_test_datetime_mixin_normalize() {
8714 std::cout << "========= normalize ===================================";
8715
8716 // Create datetime with time component
8717 std::vector<std::optional<numpy::datetime64>> values = {
8718 numpy::datetime64(86400000000000LL + 3600000000000LL, numpy::DateTimeUnit::Nanosecond) // 1 day + 1 hour
8719 };
8720 pandas::DatetimeArray arr(values);
8721 pandas::DatetimeMixinIndex idx(arr);
8722
8723 pandas::DatetimeMixinIndex normalized = idx.normalize();
8724
8725 bool passed = (normalized.size() == 1);
8726 if (!passed) {
8727 std::cout << " [FAIL] : in pd_test_datetime_mixin_normalize()" << std::endl;
8728 throw std::runtime_error("pd_test_datetime_mixin_normalize failed");
8729 }
8730
8731 std::cout << " -> tests passed" << std::endl;
8732}
partition (pd_test_3_all.cpp:21422)
21412 }
21413 auto decoded = s.str().decode("utf-8");
21414 if (decoded[0] != "hello" || decoded[1] != "world") {
21415 std::cout << " [FAIL] : decode failed" << std::endl;
21416 throw std::runtime_error("pd_test_str_encode_decode: decode failed");
21417 }
21418 std::cout << " -> tests passed" << std::endl;
21419}
21420
21421// ============================================================================
21422// Test str().partition() / rpartition() (plan_04c)
21423// ============================================================================
21424
21425void pd_test_str_partition() {
21426 std::cout << "========= Series.str().partition/rpartition() ===========";
21427
21428 pandas::Series<std::string> s({"hello-world", "foo-bar", "xyz"});
21429 auto result = s.str().partition("-");
21430 if (result[0][0] != "hello" || result[0][1] != "-" || result[0][2] != "world") {
21431 std::cout << " [FAIL] : partition element 0 failed" << std::endl;
21432 throw std::runtime_error("pd_test_str_partition: partition element 0 failed");
removeprefix (pd_test_3_all.cpp:21148)
21138 auto decimal_result = s.str().isdecimal();
21139 if (decimal_result[0] != true || decimal_result[1] != false ||
21140 decimal_result[2] != false || decimal_result[3] != false) {
21141 std::cout << " [FAIL] : isdecimal() failed" << std::endl;
21142 throw std::runtime_error("pd_test_str_isnumeric_isdecimal: isdecimal() failed");
21143 }
21144 std::cout << " -> tests passed" << std::endl;
21145}
21146
21147// ============================================================================
21148// Test str().removeprefix() and str().removesuffix() (plan_04a)
21149// ============================================================================
21150
21151void pd_test_str_removeprefix_removesuffix() {
21152 std::cout << "========= Series.str().removeprefix/removesuffix() ======";
21153
21154 pandas::Series<std::string> s({"prefix_foo", "prefix_bar", "other"});
21155 auto prefix_result = s.str().removeprefix("prefix_");
21156 if (prefix_result[0] != "foo" || prefix_result[1] != "bar" || prefix_result[2] != "other") {
21157 std::cout << " [FAIL] : removeprefix() failed" << std::endl;
21158 throw std::runtime_error("pd_test_str_removeprefix_removesuffix: removeprefix() failed");
removesuffix (pd_test_3_all.cpp:21148)
21138 auto decimal_result = s.str().isdecimal();
21139 if (decimal_result[0] != true || decimal_result[1] != false ||
21140 decimal_result[2] != false || decimal_result[3] != false) {
21141 std::cout << " [FAIL] : isdecimal() failed" << std::endl;
21142 throw std::runtime_error("pd_test_str_isnumeric_isdecimal: isdecimal() failed");
21143 }
21144 std::cout << " -> tests passed" << std::endl;
21145}
21146
21147// ============================================================================
21148// Test str().removeprefix() and str().removesuffix() (plan_04a)
21149// ============================================================================
21150
21151void pd_test_str_removeprefix_removesuffix() {
21152 std::cout << "========= Series.str().removeprefix/removesuffix() ======";
21153
21154 pandas::Series<std::string> s({"prefix_foo", "prefix_bar", "other"});
21155 auto prefix_result = s.str().removeprefix("prefix_");
21156 if (prefix_result[0] != "foo" || prefix_result[1] != "bar" || prefix_result[2] != "other") {
21157 std::cout << " [FAIL] : removeprefix() failed" << std::endl;
21158 throw std::runtime_error("pd_test_str_removeprefix_removesuffix: removeprefix() failed");
repeat (pd_test_3_all.cpp:2166)
2156 auto viewed = arr.view();
2157 if (viewed.size() != 3 || !viewed.equals(arr)) {
2158 throw std::runtime_error("view failed");
2159 }
2160
2161 std::cout << " -> tests passed" << std::endl;
2162}
2163
2164void pd_test_3_all_categorical_repeat() {
2165 std::cout << "========= CategoricalArray.repeat() ===================";
2166
2167 std::vector<std::optional<std::string>> values = {"a", "b"};
2168 pandas::CategoricalArray arr(values);
2169
2170 auto result = arr.repeat(3);
2171 if (result.size() != 6 || *result[0] != "a" || *result[2] != "a" ||
2172 *result[3] != "b" || *result[5] != "b") {
2173 throw std::runtime_error("repeat scalar failed");
2174 }
rfind (pd_test_3_all.cpp:21075)
21065 pandas::Series<std::string> s({"FOO", "Bar", "HELLO"});
21066 auto result = s.str().casefold();
21067 if (result[0] != "foo" || result[1] != "bar" || result[2] != "hello") {
21068 std::cout << " [FAIL] : casefold() failed" << std::endl;
21069 throw std::runtime_error("pd_test_str_casefold: casefold() failed");
21070 }
21071 std::cout << " -> tests passed" << std::endl;
21072}
21073
21074// ============================================================================
21075// Test str().rfind() (plan_04a)
21076// ============================================================================
21077
21078void pd_test_str_rfind() {
21079 std::cout << "========= Series.str().rfind() ==========================";
21080
21081 pandas::Series<std::string> s({"foobarfoo", "barfoo", "hello"});
21082 auto result = s.str().rfind("foo");
21083 if (result[0] != 6 || result[1] != 3 || result[2] != -1) {
21084 std::cout << " [FAIL] : rfind() failed, got: "
21085 << result[0] << ", " << result[1] << ", " << result[2] << std::endl;
rindex (pd_test_3_all.cpp:21449)
21439 pandas::Series<std::string> s2({"hello-world-test", "foo-bar"});
21440 auto result2 = s2.str().rpartition("-");
21441 if (result2[0][0] != "hello-world" || result2[0][1] != "-" || result2[0][2] != "test") {
21442 std::cout << " [FAIL] : rpartition element 0 failed" << std::endl;
21443 throw std::runtime_error("pd_test_str_partition: rpartition element 0 failed");
21444 }
21445 std::cout << " -> tests passed" << std::endl;
21446}
21447
21448// ============================================================================
21449// Test str().index() / rindex() (plan_04c)
21450// ============================================================================
21451
21452void pd_test_str_index_rindex() {
21453 std::cout << "========= Series.str().index/rindex() ===================";
21454
21455 pandas::Series<std::string> s({"foobar", "barfoo"});
21456 auto result = s.str().index("oo");
21457 if (result[0] != 1 || result[1] != 4) {
21458 std::cout << " [FAIL] : index('oo') failed" << std::endl;
21459 throw std::runtime_error("pd_test_str_index_rindex: index('oo') failed");
rjust (pd_test_3_all.cpp:21005)
20995 auto alnum_result = s.str().isalnum();
20996 if (alnum_result[0] != true || alnum_result[1] != true || alnum_result[2] != true || alnum_result[3] != false) {
20997 std::cout << " [FAIL] : isalnum() failed" << std::endl;
20998 throw std::runtime_error("pd_test_str_is_methods: isalnum() failed");
20999 }
21000
21001 std::cout << " -> tests passed" << std::endl;
21002}
21003
21004// ============================================================================
21005// Test str().zfill(), str().center(), str().ljust(), str().rjust()
21006// ============================================================================
21007
21008void pd_test_str_padding() {
21009 std::cout << "========= Series.str().zfill/center/ljust/rjust() =======";
21010
21011 pandas::Series<std::string> s({"1", "22", "333"});
21012
21013 auto zfill_result = s.str().zfill(5);
21014 if (zfill_result[0] != "00001" || zfill_result[1] != "00022" || zfill_result[2] != "00333") {
21015 std::cout << " [FAIL] : zfill() failed" << std::endl;
rpartition (pd_test_3_all.cpp:21422)
21412 }
21413 auto decoded = s.str().decode("utf-8");
21414 if (decoded[0] != "hello" || decoded[1] != "world") {
21415 std::cout << " [FAIL] : decode failed" << std::endl;
21416 throw std::runtime_error("pd_test_str_encode_decode: decode failed");
21417 }
21418 std::cout << " -> tests passed" << std::endl;
21419}
21420
21421// ============================================================================
21422// Test str().partition() / rpartition() (plan_04c)
21423// ============================================================================
21424
21425void pd_test_str_partition() {
21426 std::cout << "========= Series.str().partition/rpartition() ===========";
21427
21428 pandas::Series<std::string> s({"hello-world", "foo-bar", "xyz"});
21429 auto result = s.str().partition("-");
21430 if (result[0][0] != "hello" || result[0][1] != "-" || result[0][2] != "world") {
21431 std::cout << " [FAIL] : partition element 0 failed" << std::endl;
21432 throw std::runtime_error("pd_test_str_partition: partition element 0 failed");
rsplit (pd_test_3_all.cpp:21171)
21161 pandas::Series<std::string> s2({"foo_suffix", "bar_suffix", "other"});
21162 auto suffix_result = s2.str().removesuffix("_suffix");
21163 if (suffix_result[0] != "foo" || suffix_result[1] != "bar" || suffix_result[2] != "other") {
21164 std::cout << " [FAIL] : removesuffix() failed" << std::endl;
21165 throw std::runtime_error("pd_test_str_removeprefix_removesuffix: removesuffix() failed");
21166 }
21167 std::cout << " -> tests passed" << std::endl;
21168}
21169
21170// ============================================================================
21171// Test str().rsplit() (plan_04a)
21172// ============================================================================
21173
21174void pd_test_str_rsplit() {
21175 std::cout << "========= Series.str().rsplit() =========================";
21176
21177 pandas::Series<std::string> s({"a,b,c", "x,y"});
21178 auto result = s.str().rsplit(",");
21179 if (result[0].size() != 3 || result[0][0] != "a" || result[0][1] != "b" || result[0][2] != "c") {
21180 std::cout << " [FAIL] : rsplit() unlimited failed" << std::endl;
21181 throw std::runtime_error("pd_test_str_rsplit: rsplit() unlimited failed");
rstrip (pd_test_3_all.cpp:20885)
20875 auto lens = s.str().len();
20876 if (lens[0] != 1 || lens[1] != 2 || lens[2] != 3 || lens[3] != 0) {
20877 std::cout << " [FAIL] : len() failed" << std::endl;
20878 throw std::runtime_error("pd_test_str_len: len() failed");
20879 }
20880
20881 std::cout << " -> tests passed" << std::endl;
20882}
20883
20884// ============================================================================
20885// Test str().strip(), str().lstrip(), str().rstrip()
20886// ============================================================================
20887
20888void pd_test_str_strip() {
20889 std::cout << "========= Series.str().strip() ==========================";
20890
20891 pandas::Series<std::string> s({" hello ", " world", "test "});
20892
20893 auto strip_result = s.str().strip();
20894 if (strip_result[0] != "hello" || strip_result[1] != "world" || strip_result[2] != "test") {
20895 std::cout << " [FAIL] : strip() failed" << std::endl;
slice (pd_test_1_all.cpp:17546)
17536// ============================================================================
17537// Slicing / Indexing Tests
17538// ============================================================================
17539
17540void pd_test_period_index_slice() {
17541 std::cout << "========= slice method ================================";
17542
17543 std::vector<int64_t> ordinals = {0, 1, 2, 3, 4};
17544 pandas::PeriodIndex idx(ordinals, "D");
17545
17546 pandas::PeriodIndex sliced = idx.slice(1, 4);
17547
17548 bool passed = (sliced.size() == 3 &&
17549 sliced[0].has_value() && *sliced[0] == 1);
17550 if (!passed) {
17551 std::cout << " [FAIL] : in pd_test_period_index_slice()" << std::endl;
17552 throw std::runtime_error("pd_test_period_index_slice failed");
17553 }
17554
17555 std::cout << " -> tests passed" << std::endl;
17556}
slice_replace (pd_test_3_all.cpp:21485)
21475 threw = true;
21476 }
21477 if (!threw) {
21478 std::cout << " [FAIL] : index should throw on not found" << std::endl;
21479 throw std::runtime_error("pd_test_str_index_rindex: index should throw on not found");
21480 }
21481 std::cout << " -> tests passed" << std::endl;
21482}
21483
21484// ============================================================================
21485// Test str().slice_replace() (plan_04c)
21486// ============================================================================
21487
21488void pd_test_str_slice_replace() {
21489 std::cout << "========= Series.str().slice_replace() ==================";
21490
21491 pandas::Series<std::string> s({"hello", "world", "foo"});
21492 auto result = s.str().slice_replace(0, 2, "XX");
21493 if (result[0] != "XXllo" || result[1] != "XXrld" || result[2] != "XXo") {
21494 std::cout << " [FAIL] : slice_replace(0, 2, 'XX') failed" << std::endl;
21495 throw std::runtime_error("pd_test_str_slice_replace: slice_replace failed");
split (pd_test_4_all.cpp:3961)
3951// =============================================================================
3952// Standalone-only helpers (dropped when pasted into pd_test_repr_mismatch.cpp).
3953// =============================================================================
3954
3955// =============================================================================
3956// Case 1 — explode.split_comma
3957//
3958// Source: pandasPython_tests/test_pandas_reshaping_pivot_compare_full.py L511
3959// pd_df2 = pd.DataFrame([{"var1":"a,b,c","var2":1},
3960// {"var1":"d,e,f","var2":2}])
3961// .assign(var1=lambda d: d.var1.str.split(","))
3962// .explode("var1").reset_index(drop=True)
3963//
3964// Strategy C: we skip the split/explode and hand-build the 6-row result
3965// (var1 object/string, var2 int64, default RangeIndex(0..5)).
3966// =============================================================================
3967void explode_split_comma() {
3968 pandas::DataFrame df;
3969 df.add_column<std::string>("var1", {"a", "b", "c", "d", "e", "f"});
3970 df.add_column<int64_t>("var2", {1, 1, 1, 2, 2, 2});
3971 apply_default_display(df);
split_expand (pd_test_3_all.cpp:28443)
28433 auto r = s.str().isdigit_with_nan();
28434 fail += sao_check(!r.has_nan, "no nan");
28435 fail += sao_check(r.values[0] && !r.values[1] && r.values[2], "values");
28436 if (fail == 0) std::cout << " OK" << std::endl;
28437}
28438
28439void pd_test_str_split_expand() {
28440 std::cout << " -- pd_test_str_split_expand --" << std::endl;
28441 int fail = 0;
28442 pandas::Series<std::string> s({"a,b,c", "d,e,f"}, "x");
28443 auto r = s.str().split_expand(",", -1);
28444 fail += sao_check(r.num_cols == 3, "3 cols");
28445 fail += sao_check(r.num_rows == 2, "2 rows");
28446 fail += sao_check(r.columns[0][0] == "a" && r.columns[1][0] == "b" && r.columns[2][0] == "c", "row0");
28447 fail += sao_check(r.columns[0][1] == "d" && r.columns[1][1] == "e" && r.columns[2][1] == "f", "row1");
28448 if (fail == 0) std::cout << " OK" << std::endl;
28449}
28450
28451void pd_test_str_split_expand_nan() {
28452 std::cout << " -- pd_test_str_split_expand_nan --" << std::endl;
28453 int fail = 0;
startswith (pd_test_3_all.cpp:20933)
20923 auto result = s.str().contains("an", true, false); // case_sensitive=true, regex=false
20924 if (result[0] != false || result[1] != true || result[2] != false) {
20925 std::cout << " [FAIL] : contains() failed" << std::endl;
20926 throw std::runtime_error("pd_test_str_contains: contains() failed");
20927 }
20928
20929 std::cout << " -> tests passed" << std::endl;
20930}
20931
20932// ============================================================================
20933// Test str().startswith() and str().endswith()
20934// ============================================================================
20935
20936void pd_test_str_startswith_endswith() {
20937 std::cout << "========= Series.str().startswith/endswith() ============";
20938
20939 pandas::Series<std::string> s({"hello", "world", "help"});
20940
20941 auto starts_result = s.str().startswith("hel");
20942 if (starts_result[0] != true || starts_result[1] != false || starts_result[2] != true) {
20943 std::cout << " [FAIL] : startswith() failed" << std::endl;
strip (pd_test_3_all.cpp:20885)
20875 auto lens = s.str().len();
20876 if (lens[0] != 1 || lens[1] != 2 || lens[2] != 3 || lens[3] != 0) {
20877 std::cout << " [FAIL] : len() failed" << std::endl;
20878 throw std::runtime_error("pd_test_str_len: len() failed");
20879 }
20880
20881 std::cout << " -> tests passed" << std::endl;
20882}
20883
20884// ============================================================================
20885// Test str().strip(), str().lstrip(), str().rstrip()
20886// ============================================================================
20887
20888void pd_test_str_strip() {
20889 std::cout << "========= Series.str().strip() ==========================";
20890
20891 pandas::Series<std::string> s({" hello ", " world", "test "});
20892
20893 auto strip_result = s.str().strip();
20894 if (strip_result[0] != "hello" || strip_result[1] != "world" || strip_result[2] != "test") {
20895 std::cout << " [FAIL] : strip() failed" << std::endl;
title (pd_test_3_all.cpp:20843)
20833 auto upper_result = s.str().upper();
20834 if (upper_result[0] != "HELLO" || upper_result[1] != "WORLD" || upper_result[2] != "TEST") {
20835 std::cout << " [FAIL] : upper() failed" << std::endl;
20836 throw std::runtime_error("pd_test_str_lower_upper: upper() failed");
20837 }
20838
20839 std::cout << " -> tests passed" << std::endl;
20840}
20841
20842// ============================================================================
20843// Test str().capitalize() and str().title()
20844// ============================================================================
20845
20846void pd_test_str_capitalize_title() {
20847 std::cout << "========= Series.str().capitalize/title() ===============";
20848
20849 pandas::Series<std::string> s({"hello world", "HELLO WORLD", "hELLO wORLD"});
20850
20851 auto cap_result = s.str().capitalize();
20852 if (cap_result[0] != "Hello world" || cap_result[1] != "Hello world" || cap_result[2] != "Hello world") {
20853 std::cout << " [FAIL] : capitalize() failed" << std::endl;
translate (pd_test_3_all.cpp:21352)
21342 pandas::Series<std::string> s({"abc", "xy", "123"});
21343 auto result = s.str().join("-");
21344 if (result[0] != "a-b-c" || result[1] != "x-y" || result[2] != "1-2-3") {
21345 std::cout << " [FAIL] : join('-') failed" << std::endl;
21346 throw std::runtime_error("pd_test_str_join: join('-') failed");
21347 }
21348 std::cout << " -> tests passed" << std::endl;
21349}
21350
21351// ============================================================================
21352// Test str().translate() (plan_04c)
21353// ============================================================================
21354
21355void pd_test_str_translate() {
21356 std::cout << "========= Series.str().translate() ======================";
21357
21358 pandas::Series<std::string> s({"abc", "def", "xyz"});
21359 auto result = s.str().translate("abc", "XYZ");
21360 if (result[0] != "XYZ" || result[1] != "def" || result[2] != "xyz") {
21361 std::cout << " [FAIL] : translate failed" << std::endl;
21362 throw std::runtime_error("pd_test_str_translate: translate failed");
upper (pd_test_3_all.cpp:20819)
20809#include <string>
20810
20811#include "../pandas/pd_series.h"
20812
20813// CRITICAL: No using namespace directives
20814
20815namespace dataframe_tests {
20816namespace dataframe_tests_string_accessor {
20817
20818// ============================================================================
20819// Test str().lower() and str().upper()
20820// ============================================================================
20821
20822void pd_test_str_lower_upper() {
20823 std::cout << "========= Series.str().lower/upper() ===================";
20824
20825 pandas::Series<std::string> s({"Hello", "WORLD", "TeSt"});
20826
20827 auto lower_result = s.str().lower();
20828 if (lower_result[0] != "hello" || lower_result[1] != "world" || lower_result[2] != "test") {
20829 std::cout << " [FAIL] : lower() failed" << std::endl;
wrap (pd_test_3_all.cpp:21368)
21358 pandas::Series<std::string> s({"abc", "def", "xyz"});
21359 auto result = s.str().translate("abc", "XYZ");
21360 if (result[0] != "XYZ" || result[1] != "def" || result[2] != "xyz") {
21361 std::cout << " [FAIL] : translate failed" << std::endl;
21362 throw std::runtime_error("pd_test_str_translate: translate failed");
21363 }
21364 std::cout << " -> tests passed" << std::endl;
21365}
21366
21367// ============================================================================
21368// Test str().wrap() (plan_04c)
21369// ============================================================================
21370
21371void pd_test_str_wrap() {
21372 std::cout << "========= Series.str().wrap() ===========================";
21373
21374 pandas::Series<std::string> s({"hello world foo"});
21375 auto result = s.str().wrap(10);
21376 // Should wrap at word boundary
21377 if (result[0].find('\n') == std::string::npos) {
21378 std::cout << " [FAIL] : wrap should contain newline" << std::endl;
zfill (pd_test_3_all.cpp:21005)
20995 auto alnum_result = s.str().isalnum();
20996 if (alnum_result[0] != true || alnum_result[1] != true || alnum_result[2] != true || alnum_result[3] != false) {
20997 std::cout << " [FAIL] : isalnum() failed" << std::endl;
20998 throw std::runtime_error("pd_test_str_is_methods: isalnum() failed");
20999 }
21000
21001 std::cout << " -> tests passed" << std::endl;
21002}
21003
21004// ============================================================================
21005// Test str().zfill(), str().center(), str().ljust(), str().rjust()
21006// ============================================================================
21007
21008void pd_test_str_padding() {
21009 std::cout << "========= Series.str().zfill/center/ljust/rjust() =======";
21010
21011 pandas::Series<std::string> s({"1", "22", "333"});
21012
21013 auto zfill_result = s.str().zfill(5);
21014 if (zfill_result[0] != "00001" || zfill_result[1] != "00022" || zfill_result[2] != "00333") {
21015 std::cout << " [FAIL] : zfill() failed" << std::endl;