StringMethods#

class pandas::StringMethods#

pandas C++ class.

Example#

#include <pandas/pandas.h>
using namespace pandas;

// Use StringMethods
StringMethods obj;
// ... operations ...

Indexing / Selection#

Signature

Return Type

Location

Example

std::vector<std::string> get(int64_t pos) const

std::vector<std::string>

pd_string_accessor.h:730

View

get_dummies(const std::string& sep = "\|") const

pd_string_accessor.h:1303

View

DummiesResult get_dummies_as_multiindex(const std::string& sep = "\|") const

DummiesResult

pd_string_accessor.h:1298

View

Data Manipulation#

Signature

Return Type

Location

Example

std::vector<std::string> replace(const std::string& pat, const std::string& repl, bool regex_mode = true, bool case_sensitive = true, int flags = 0) const

std::vector<std::string>

pd_string_accessor.h:381

View

Missing Data#

Signature

Return Type

Location

Example

std::vector<std::string> pad(size_t width, const std::string& side = "left", char fillchar = ' ') const

std::vector<std::string>

pd_string_accessor.h:690

View

Statistics#

Signature

Return Type

Location

Example

std::vector<int64_t> count(const std::string& pat) const

std::vector<int64_t>

pd_string_accessor.h:464

View

CountResult count_with_nan(const std::string& pat) const

CountResult

pd_string_accessor.h:483

View

Comparison#

Signature

Return Type

Location

Example

std::vector<int64_t> len() const

std::vector<int64_t>

pd_string_accessor.h:265

View

Combining#

Signature

Return Type

Location

Example

std::vector<std::string> join(const std::string& sep) const

std::vector<std::string>

pd_string_accessor.h:1106

View

Iteration#

Signature

Return Type

Location

Example

std::vector<bool> endswith(const std::string& pat) const

std::vector<bool>

pd_string_accessor.h:366

View

Other Methods#

Signature

Return Type

Location

Example

static DataFrame build_string_dataframe( const std::vector<std::vector<std::string>>& columns_data, const std::vector<std::string>& column_names)

static DataFrame

pd_string_accessor.h:150

std::vector<std::string> capitalize() const

std::vector<std::string>

pd_string_accessor.h:198

View

std::vector<std::string> casefold() const

std::vector<std::string>

pd_string_accessor.h:751

View

std::string cat(const std::string& sep = "", const std::string& na_rep = "") const

std::string

pd_string_accessor.h:911

View

std::vector<std::string> center(size_t width, char fillchar = ' ') const

std::vector<std::string>

pd_string_accessor.h:717

View

std::vector<bool> contains(const std::string& pat, bool case_sensitive = true, bool regex_mode = true) const

std::vector<bool>

pd_string_accessor.h:322

View

std::vector<std::string> decode(const std::string& /\*encoding\*/) const

std::vector<std::string>

pd_string_accessor.h:1194

View

std::vector<std::string> encode(const std::string& /\*encoding\*/) const

std::vector<std::string>

pd_string_accessor.h:1184

View

std::vector<std::vector<std::string>> extract(const std::string& pat) const

std::vector<std::vector<std::string>>

pd_string_accessor.h:1031

View

std::vector<std::vector<std::vector<std::string>>> extractall(const std::string& pat) const

std::vector<std::vector<std::vector<std::string>>>

pd_string_accessor.h:1078

View

ExtractAllResult extractall_with_index(const std::string& pat) const

ExtractAllResult

pd_string_accessor.h:1074

View

std::vector<int64_t> find(const std::string& sub, int64_t start = 0, int64_t end = -1) const

std::vector<int64_t>

pd_string_accessor.h:445

View

std::vector<std::optional<std::vector<std::string>>> findall( const std::string& pat) const

std::vector<std::optional<std::vector<std::string>>>

pd_string_accessor.h:1002

View

std::vector<bool> fullmatch(const std::string& pat, bool case_sensitive = true) const

std::vector<bool>

pd_string_accessor.h:982

View

std::vector<int64_t> index(const std::string& sub, int64_t start = 0, int64_t end = -1) const

std::vector<int64_t>

pd_string_accessor.h:1236

View

std::vector<bool> isalnum() const

std::vector<bool>

pd_string_accessor.h:593

View

std::vector<bool> isalpha() const

std::vector<bool>

pd_string_accessor.h:559

View

std::vector<bool> isdecimal() const

std::vector<bool>

pd_string_accessor.h:827

View

std::vector<bool> isdigit() const

std::vector<bool>

pd_string_accessor.h:576

View

BoolWithNan isdigit_with_nan() const

BoolWithNan

pd_string_accessor.h:511

View

std::vector<bool> islower() const

std::vector<bool>

pd_string_accessor.h:627

std::vector<bool> isnumeric() const

std::vector<bool>

pd_string_accessor.h:822

View

std::vector<bool> isspace() const

std::vector<bool>

pd_string_accessor.h:610

std::vector<bool> istitle() const

std::vector<bool>

pd_string_accessor.h:790

View

std::vector<bool> isupper() const

std::vector<bool>

pd_string_accessor.h:648

std::vector<std::string> ljust(size_t width, char fillchar = ' ') const

std::vector<std::string>

pd_string_accessor.h:721

View

std::vector<std::string> lower() const

std::vector<std::string>

pd_string_accessor.h:166

View

std::vector<std::string> lstrip(const std::string& chars = " \\t\\n\\r") const

std::vector<std::string>

pd_string_accessor.h:291

View

std::vector<bool> match(const std::string& pat, bool case_sensitive = true) const

std::vector<bool>

pd_string_accessor.h:926

View

std::vector<std::optional<bool>> match_with_na( const std::string& pat, bool case_sensitive = true, std::optional<bool> na_value = std::nullopt) const

std::vector<std::optional<bool>>

pd_string_accessor.h:948

std::vector<std::string> normalize(const std::string& /\*form\*/) const

std::vector<std::string>

pd_string_accessor.h:1174

View

const ParentType& parent() const

const ParentType&

pd_string_accessor.h:158

explicit StringMethods(const ParentType& parent) : parent_(parent)

explicit StringMethods(const ParentType& parent) :

pd_string_accessor.h:155

std::optional<std::string> parent_name() const

std::optional<std::string>

pd_string_accessor.h:161

parse_named_groups(const std::string& pat)

pd_string_accessor.h:81

std::vector<std::vector<std::string>> partition(const std::string& sep) const

std::vector<std::vector<std::string>>

pd_string_accessor.h:1204

View

std::regex re(pat, case_sensitive ? std::regex::ECMAScript : std::regex::icase)

std::regex

pd_string_accessor.h:329

std::regex re(pat, rx_flags)

std::regex

pd_string_accessor.h:395

std::regex re(pat)

std::regex

pd_string_accessor.h:468

std::regex re(pat, flags)

std::regex

pd_string_accessor.h:932

std::regex re(pat, flags)

std::regex

pd_string_accessor.h:957

std::regex re(pat, flags)

std::regex

pd_string_accessor.h:988

std::regex re(pat)

std::regex

pd_string_accessor.h:1007

std::regex re(pat)

std::regex

pd_string_accessor.h:1035

std::regex re(pat)

std::regex

pd_string_accessor.h:1082

std::vector<std::string> removeprefix(const std::string& prefix) const

std::vector<std::string>

pd_string_accessor.h:832

View

std::vector<std::string> removesuffix(const std::string& suffix) const

std::vector<std::string>

pd_string_accessor.h:847

View

std::vector<std::string> repeat(int64_t repeats) const

std::vector<std::string>

pd_string_accessor.h:775

View

std::vector<int64_t> rfind(const std::string& sub, int64_t start = 0, int64_t end = -1) const

std::vector<int64_t>

pd_string_accessor.h:756

View

std::vector<int64_t> rindex(const std::string& sub, int64_t start = 0, int64_t end = -1) const

std::vector<int64_t>

pd_string_accessor.h:1254

View

std::vector<std::string> rjust(size_t width, char fillchar = ' ') const

std::vector<std::string>

pd_string_accessor.h:725

View

std::vector<std::vector<std::string>> rpartition(const std::string& sep) const

std::vector<std::vector<std::string>>

pd_string_accessor.h:1220

View

std::vector<std::vector<std::string>> rsplit(const std::string& pat = " ", int n = -1) const

std::vector<std::vector<std::string>>

pd_string_accessor.h:863

View

std::vector<std::string> rstrip(const std::string& chars = " \\t\\n\\r") const

std::vector<std::string>

pd_string_accessor.h:306

View

std::vector<std::string> slice(int64_t start = 0, int64_t stop = -1, int64_t step = 1) const

std::vector<std::string>

pd_string_accessor.h:670

View

std::vector<std::string> slice_replace(int64_t start = 0, int64_t stop = -1, const std::string& repl = "") const

std::vector<std::string>

pd_string_accessor.h:1272

View

std::vector<std::vector<std::string>> split(const std::string& pat = " ", int n = -1) const

std::vector<std::vector<std::string>>

pd_string_accessor.h:413

View

SplitExpandResult split_expand(const std::string& pat, int n = -1) const

SplitExpandResult

pd_string_accessor.h:535

View

std::vector<bool> startswith(const std::string& pat) const

std::vector<bool>

pd_string_accessor.h:352

View

std::vector<std::string> strip(const std::string& chars = " \\t\\n\\r") const

std::vector<std::string>

pd_string_accessor.h:275

View

std::vector<std::string> swapcase() const

std::vector<std::string>

pd_string_accessor.h:243

std::vector<std::string> title() const

std::vector<std::string>

pd_string_accessor.h:218

View

std::vector<std::string> translate(const std::string& from_chars, const std::string& to_chars) const

std::vector<std::string>

pd_string_accessor.h:1122

View

std::vector<std::string> upper() const

std::vector<std::string>

pd_string_accessor.h:182

View

std::vector<std::string> wrap(size_t width) const

std::vector<std::string>

pd_string_accessor.h:1143

View

std::vector<std::string> zfill(size_t width) const

std::vector<std::string>

pd_string_accessor.h:713

View

Code Examples#

The following examples are extracted from the test suite.

get (pd_test_1_all.cpp:10290)
10280void pd_test_extension_index_get_loc_unique() {
10281    std::cout << "========= get_loc (unique) =========================";
10282
10283    pandas::CategoricalArray arr({"apple", "banana", "cherry"});
10284    pandas::CategoricalIndex idx(arr);
10285
10286    auto loc_apple = idx.get_loc("apple");
10287    auto loc_banana = idx.get_loc("banana");
10288    auto loc_cherry = idx.get_loc("cherry");
10289
10290    bool passed = (std::holds_alternative<size_t>(loc_apple) && std::get<size_t>(loc_apple) == 0 &&
10291                   std::get<size_t>(loc_banana) == 1 &&
10292                   std::get<size_t>(loc_cherry) == 2);
10293    if (!passed) {
10294        std::cout << "  [FAIL] : in pd_test_extension_index_get_loc_unique() : get_loc check failed" << std::endl;
10295        throw std::runtime_error("pd_test_extension_index_get_loc_unique failed");
10296    }
10297
10298    std::cout << " -> tests passed" << std::endl;
10299}
get_dummies (pd_test_3_all.cpp:13545)
13535    }
13536
13537    std::cout << " -> tests passed" << std::endl;
13538}
13539
13540// ============================================================================
13541// Get Dummies / From Dummies Tests
13542// ============================================================================
13543
13544void pd_test_top_level_get_dummies() {
13545    std::cout << "========= get_dummies() ===============================";
13546
13547    std::vector<std::string> data = {"A", "B", "A", "C", "B", "A"};
13548    pandas::Series<std::string> s(data, "category");
13549
13550    pandas::DataFrame result = pandas::get_dummies(s);
13551
13552    // Should have columns for A, B, C
13553    if (result.ncols() != 3) {
13554        std::cout << "  [FAIL] : in pd_test_top_level_get_dummies() : expected 3 columns" << std::endl;
13555        throw std::runtime_error("pd_test_top_level_get_dummies failed: wrong column count");
get_dummies_as_multiindex (pd_test_5_all.cpp:123697)
123687    return rows;
123688}
123689
123690static std::string run_oracle_row(const OracleRow& r) {
123691    pandas::Series<std::string> s(r.input);
123692    if (r.op == "extractall") {
123693        auto res = s.str().extractall_with_index(r.arg);
123694        return format_extractall(res);
123695    }
123696    if (r.op == "get_dummies") {
123697        auto res = s.str().get_dummies_as_multiindex(r.arg);
123698        return format_get_dummies(res);
123699    }
123700    throw std::runtime_error("unknown op: " + r.op);
123701}
123702
123703static void run_oracle_subset(int sub_case, int begin_id, int end_id,
123704                              int& local_fail) {
123705    std::cout << "-- case_" << (13 + sub_case) << "_oracle_rows_"
123706              << begin_id << "_to_" << (end_id - 1) << "\n";
123707    bool ok = false;
replace (pd_test_1_all.cpp:6623)
6613                }
6614            }
6615
6616            // Test replace
6617            {
6618                std::map<std::string, std::vector<numpy::float64>> float_data;
6619                float_data["X"] = {1.0, 2.0, 3.0};
6620                float_data["Y"] = {2.0, 2.0, 4.0};
6621                pandas::DataFrame df_repl(float_data);
6622
6623                auto replaced = df_repl.replace(2.0, 99.0);
6624                // Check some value was replaced (crude check via string)
6625                std::string val_str = replaced.col<numpy::float64>("X").get_value_str(1);
6626                if (val_str.find("99") == std::string::npos) {
6627                    std::cout << "  [FAIL] : in pd_test_dataframe_manipulation() : replace didn't work" << std::endl;
6628                    throw std::runtime_error("pd_test_dataframe_manipulation failed: replace");
6629                }
6630            }
6631
6632            // Test drop_duplicates
6633            {
pad (pd_test_3_all.cpp:1771)
1761    if (result_single.nrows() != 3 || result_single.ncols() != 1) {
1762        std::cout << "  [FAIL] : in pd_test_3_all_dataframe_unstack() : single col shape mismatch" << std::endl;
1763        throw std::runtime_error("pd_test_3_all_dataframe_unstack failed: single col shape");
1764    }
1765
1766    std::cout << " -> tests passed" << std::endl;
1767}
1768
1769void pd_test_3_all_fbbuilder_pad() {
1770    std::cout << "========= FBBuilder.pad() (internal) =================";
1771
1772    // Note: FBBuilder.pad() is an internal method for FlatBuffer serialization
1773    // It's not the pandas DataFrame.pad() method (which is ffill alias)
1774    // This test verifies the to_feather() serialization works, which uses FBBuilder.pad()
1775
1776    std::map<std::string, std::vector<double>> data = {
1777        {"A", {1.0, 2.0, 3.0}},
1778        {"B", {4.0, 5.0, 6.0}}
1779    };
1780    pandas::DataFrame df(data);
count (pd_test_1_all.cpp:66)
56        if (arr.is_na(0)) {
57            std::cout << "  [FAIL] : in pd_test_boolean_array_na_handling() : is_na(0) should be false" << std::endl;
58            throw std::runtime_error("pd_test_boolean_array_na_handling failed: is_na(0) should be false");
59        }
60
61        if (!arr.has_na()) {
62            std::cout << "  [FAIL] : in pd_test_boolean_array_na_handling() : has_na() should be true" << std::endl;
63            throw std::runtime_error("pd_test_boolean_array_na_handling failed: has_na() should be true");
64        }
65
66        if (arr.count() != 2) {
67            std::cout << "  [FAIL] : in pd_test_boolean_array_na_handling() : count() should be 2" << std::endl;
68            throw std::runtime_error("pd_test_boolean_array_na_handling failed: count() should be 2");
69        }
70
71        std::cout << " -> tests passed" << std::endl;
72    }
73
74    void pd_test_boolean_array_kleene_and() {
75        std::cout << "========= BooleanArray: Kleene AND ======================= ";
count_with_nan (pd_test_3_all.cpp:28394)
28384static int sao_check(bool cond, const char* msg) {
28385    if (!cond) { std::cout << "    FAIL: " << msg << std::endl; return 1; }
28386    return 0;
28387}
28388
28389void pd_test_str_count_with_nan() {
28390    std::cout << "  -- pd_test_str_count_with_nan --" << std::endl;
28391    int fail = 0;
28392    pandas::Series<std::string> s({"aa", "NaN", "abab", "None"}, "x");
28393    auto r = s.str().count_with_nan("a");
28394    fail += sao_check(r.values.size() == 4, "size");
28395    fail += sao_check(r.has_nan, "has_nan true");
28396    fail += sao_check(r.is_nan[1] && r.is_nan[3], "nan positions");
28397    fail += sao_check(!r.is_nan[0] && !r.is_nan[2], "non-nan positions");
28398    fail += sao_check(r.values[0] == 2, "count aa");
28399    fail += sao_check(r.values[2] == 2, "count abab");
28400    if (fail == 0) std::cout << "    OK" << std::endl;
28401}
28402
28403void pd_test_str_count_no_nan() {
len (pd_test_3_all.cpp:20867)
20857    auto title_result = s.str().title();
20858    if (title_result[0] != "Hello World" || title_result[1] != "Hello World" || title_result[2] != "Hello World") {
20859        std::cout << "  [FAIL] : title() failed" << std::endl;
20860        throw std::runtime_error("pd_test_str_capitalize_title: title() failed");
20861    }
20862
20863    std::cout << " -> tests passed" << std::endl;
20864}
20865
20866// ============================================================================
20867// Test str().len()
20868// ============================================================================
20869
20870void pd_test_str_len() {
20871    std::cout << "========= Series.str().len() ============================";
20872
20873    pandas::Series<std::string> s({"a", "bb", "ccc", ""});
20874
20875    auto lens = s.str().len();
20876    if (lens[0] != 1 || lens[1] != 2 || lens[2] != 3 || lens[3] != 0) {
20877        std::cout << "  [FAIL] : len() failed" << std::endl;
join (pd_test_1_all.cpp:12353)
12343            std::cout << " -> tests passed" << std::endl;
12344        }
12345
12346        void pd_test_index_join() {
12347            std::cout << "========= join ========================================";
12348
12349            pandas::Index<numpy::int64> idx1{1, 2, 3};
12350            pandas::Index<numpy::int64> idx2{2, 3, 4};
12351
12352            auto [inner_joined, left_idx, right_idx] = idx1.join(idx2, "inner");
12353            bool passed = (inner_joined.size() == 2);  // {2, 3}
12354
12355            auto [outer_joined, ol_idx, or_idx] = idx1.join(idx2, "outer");
12356            passed = passed && (outer_joined.size() == 4);  // {1, 2, 3, 4}
12357
12358            if (!passed) {
12359                std::cout << "  [FAIL] : in pd_test_index_join() : join failed" << std::endl;
12360                throw std::runtime_error("pd_test_index_join failed");
12361            }
endswith (pd_test_3_all.cpp:20933)
20923    auto result = s.str().contains("an", true, false);  // case_sensitive=true, regex=false
20924    if (result[0] != false || result[1] != true || result[2] != false) {
20925        std::cout << "  [FAIL] : contains() failed" << std::endl;
20926        throw std::runtime_error("pd_test_str_contains: contains() failed");
20927    }
20928
20929    std::cout << " -> tests passed" << std::endl;
20930}
20931
20932// ============================================================================
20933// Test str().startswith() and str().endswith()
20934// ============================================================================
20935
20936void pd_test_str_startswith_endswith() {
20937    std::cout << "========= Series.str().startswith/endswith() ============";
20938
20939    pandas::Series<std::string> s({"hello", "world", "help"});
20940
20941    auto starts_result = s.str().startswith("hel");
20942    if (starts_result[0] != true || starts_result[1] != false || starts_result[2] != true) {
20943        std::cout << "  [FAIL] : startswith() failed" << std::endl;
capitalize (pd_test_3_all.cpp:20843)
20833    auto upper_result = s.str().upper();
20834    if (upper_result[0] != "HELLO" || upper_result[1] != "WORLD" || upper_result[2] != "TEST") {
20835        std::cout << "  [FAIL] : upper() failed" << std::endl;
20836        throw std::runtime_error("pd_test_str_lower_upper: upper() failed");
20837    }
20838
20839    std::cout << " -> tests passed" << std::endl;
20840}
20841
20842// ============================================================================
20843// Test str().capitalize() and str().title()
20844// ============================================================================
20845
20846void pd_test_str_capitalize_title() {
20847    std::cout << "========= Series.str().capitalize/title() ===============";
20848
20849    pandas::Series<std::string> s({"hello world", "HELLO WORLD", "hELLO wORLD"});
20850
20851    auto cap_result = s.str().capitalize();
20852    if (cap_result[0] != "Hello world" || cap_result[1] != "Hello world" || cap_result[2] != "Hello world") {
20853        std::cout << "  [FAIL] : capitalize() failed" << std::endl;
casefold (pd_test_3_all.cpp:21059)
21049    auto result = s.str().cat("-");
21050    if (result != "a-b-c") {
21051        std::cout << "  [FAIL] : cat() failed, got: " << result << std::endl;
21052        throw std::runtime_error("pd_test_str_cat: cat() failed");
21053    }
21054
21055    std::cout << " -> tests passed" << std::endl;
21056}
21057
21058// ============================================================================
21059// Test str().casefold() (plan_04a)
21060// ============================================================================
21061
21062void pd_test_str_casefold() {
21063    std::cout << "========= Series.str().casefold() =======================";
21064
21065    pandas::Series<std::string> s({"FOO", "Bar", "HELLO"});
21066    auto result = s.str().casefold();
21067    if (result[0] != "foo" || result[1] != "bar" || result[2] != "hello") {
21068        std::cout << "  [FAIL] : casefold() failed" << std::endl;
21069        throw std::runtime_error("pd_test_str_casefold: casefold() failed");
cat (pd_test_3_all.cpp:16259)
16249    }
16250
16251    std::cout << " -> tests passed" << std::endl;
16252}
16253
16254void pd_test_categorical_fillna_params() {
16255    std::cout << "========= CategoricalArray fillna params =============";
16256
16257    // Create CategoricalArray using vector constructor with optional values
16258    std::vector<std::optional<std::string>> values = {"a", "b", std::nullopt, "a"};
16259    pandas::CategoricalArray cat(values);
16260
16261    // Test fillna with method and limit parameters (should compile and work)
16262    auto result = cat.fillna("b", "", std::nullopt, true);
16263
16264    bool passed = (result.size() == 4);
16265    if (!passed) {
16266        std::cout << "  [FAIL] : in pd_test_categorical_fillna_params() : fillna failed" << std::endl;
16267        throw std::runtime_error("pd_test_categorical_fillna_params failed");
16268    }
center (pd_test_3_all.cpp:21005)
20995    auto alnum_result = s.str().isalnum();
20996    if (alnum_result[0] != true || alnum_result[1] != true || alnum_result[2] != true || alnum_result[3] != false) {
20997        std::cout << "  [FAIL] : isalnum() failed" << std::endl;
20998        throw std::runtime_error("pd_test_str_is_methods: isalnum() failed");
20999    }
21000
21001    std::cout << " -> tests passed" << std::endl;
21002}
21003
21004// ============================================================================
21005// Test str().zfill(), str().center(), str().ljust(), str().rjust()
21006// ============================================================================
21007
21008void pd_test_str_padding() {
21009    std::cout << "========= Series.str().zfill/center/ljust/rjust() =======";
21010
21011    pandas::Series<std::string> s({"1", "22", "333"});
21012
21013    auto zfill_result = s.str().zfill(5);
21014    if (zfill_result[0] != "00001" || zfill_result[1] != "00022" || zfill_result[2] != "00333") {
21015        std::cout << "  [FAIL] : zfill() failed" << std::endl;
contains (pd_test_1_all.cpp:2200)
2190// Test: contains method
2191// ============================================================================
2192void test_contains() {
2193    std::cout << "========= IntervalArray: contains ======================= ";
2194
2195    std::vector<numpy::float64> breaks = {0.0, 1.0, 2.0, 3.0};
2196
2197    // Right-closed intervals: (0, 1], (1, 2], (2, 3]
2198    auto arr_right = pandas::IntervalArrayFloat64::from_breaks(breaks, pandas::IntervalClosed::Right);
2199
2200    // Test contains(1.0) - should be in interval 0 but not 1 (since 1 is exclusive on left of interval 1)
2201    auto contains_1 = arr_right.contains(1.0);
2202    // (0, 1] contains 1: yes, (1, 2] contains 1: no (open on left), (2, 3] contains 1: no
2203    if (contains_1[0].value_or(false) != true ||
2204        contains_1[1].value_or(true) != false ||
2205        contains_1[2].value_or(true) != false) {
2206        std::cout << "[FAIL] : in test_contains() : right-closed contains 1.0" << std::endl;
2207        return;
2208    }
2209
2210    // Left-closed intervals: [0, 1), [1, 2), [2, 3)
decode (pd_test_3_all.cpp:21401)
21391    pandas::Series<std::string> s({"hello", "world"});
21392    auto result = s.str().normalize("NFC");
21393    if (result[0] != "hello" || result[1] != "world") {
21394        std::cout << "  [FAIL] : normalize failed" << std::endl;
21395        throw std::runtime_error("pd_test_str_normalize: normalize failed");
21396    }
21397    std::cout << " -> tests passed" << std::endl;
21398}
21399
21400// ============================================================================
21401// Test str().encode() / decode() (plan_04c)
21402// ============================================================================
21403
21404void pd_test_str_encode_decode() {
21405    std::cout << "========= Series.str().encode/decode() ==================";
21406
21407    pandas::Series<std::string> s({"hello", "world"});
21408    auto encoded = s.str().encode("utf-8");
21409    if (encoded[0] != "hello" || encoded[1] != "world") {
21410        std::cout << "  [FAIL] : encode failed" << std::endl;
21411        throw std::runtime_error("pd_test_str_encode_decode: encode failed");
encode (pd_test_3_all.cpp:21401)
21391    pandas::Series<std::string> s({"hello", "world"});
21392    auto result = s.str().normalize("NFC");
21393    if (result[0] != "hello" || result[1] != "world") {
21394        std::cout << "  [FAIL] : normalize failed" << std::endl;
21395        throw std::runtime_error("pd_test_str_normalize: normalize failed");
21396    }
21397    std::cout << " -> tests passed" << std::endl;
21398}
21399
21400// ============================================================================
21401// Test str().encode() / decode() (plan_04c)
21402// ============================================================================
21403
21404void pd_test_str_encode_decode() {
21405    std::cout << "========= Series.str().encode/decode() ==================";
21406
21407    pandas::Series<std::string> s({"hello", "world"});
21408    auto encoded = s.str().encode("utf-8");
21409    if (encoded[0] != "hello" || encoded[1] != "world") {
21410        std::cout << "  [FAIL] : encode failed" << std::endl;
21411        throw std::runtime_error("pd_test_str_encode_decode: encode failed");
extract (pd_test_3_all.cpp:21283)
21273        throw std::runtime_error("pd_test_str_findall: findall element 1 failed");
21274    }
21275    if (result[2].value().size() != 0) {
21276        std::cout << "  [FAIL] : findall element 2 should be empty" << std::endl;
21277        throw std::runtime_error("pd_test_str_findall: findall element 2 failed");
21278    }
21279    std::cout << " -> tests passed" << std::endl;
21280}
21281
21282// ============================================================================
21283// Test str().extract() (plan_04b)
21284// ============================================================================
21285
21286void pd_test_str_extract() {
21287    std::cout << "========= Series.str().extract() ========================";
21288
21289    pandas::Series<std::string> s({"a1", "b2", "c3"});
21290    auto result = s.str().extract("([a-z])([0-9])");
21291    if (result[0].size() != 2 || result[0][0] != "a" || result[0][1] != "1") {
21292        std::cout << "  [FAIL] : extract element 0 failed" << std::endl;
21293        throw std::runtime_error("pd_test_str_extract: extract element 0 failed");
extractall (pd_test_3_all.cpp:21310)
21300    pandas::Series<std::string> s2({"xyz"});
21301    auto result2 = s2.str().extract("([0-9])");
21302    if (result2[0].size() != 1 || result2[0][0] != "") {
21303        std::cout << "  [FAIL] : extract no-match failed" << std::endl;
21304        throw std::runtime_error("pd_test_str_extract: extract no-match failed");
21305    }
21306    std::cout << " -> tests passed" << std::endl;
21307}
21308
21309// ============================================================================
21310// Test str().extractall() (plan_04b)
21311// ============================================================================
21312
21313void pd_test_str_extractall() {
21314    std::cout << "========= Series.str().extractall() =====================";
21315
21316    pandas::Series<std::string> s({"a1b2", "c3", "xyz"});
21317    auto result = s.str().extractall("([a-z])([0-9])");
21318    if (result[0].size() != 2 ||
21319        result[0][0][0] != "a" || result[0][0][1] != "1" ||
21320        result[0][1][0] != "b" || result[0][1][1] != "2") {
extractall_with_index (pd_test_5_all.cpp:123693)
123683        r.expected = cells[4];
123684        r.note     = cells[5];
123685        rows.push_back(std::move(r));
123686    }
123687    return rows;
123688}
123689
123690static std::string run_oracle_row(const OracleRow& r) {
123691    pandas::Series<std::string> s(r.input);
123692    if (r.op == "extractall") {
123693        auto res = s.str().extractall_with_index(r.arg);
123694        return format_extractall(res);
123695    }
123696    if (r.op == "get_dummies") {
123697        auto res = s.str().get_dummies_as_multiindex(r.arg);
123698        return format_get_dummies(res);
123699    }
123700    throw std::runtime_error("unknown op: " + r.op);
123701}
123702
123703static void run_oracle_subset(int sub_case, int begin_id, int end_id,
find (pd_test_1_all.cpp:5400)
5390void pd_test_categorical_index_categories_property() {
5391    std::cout << "========= categories property =========================";
5392
5393    pandas::CategoricalArray arr({"red", "green", "blue", "red"});
5394    pandas::CategoricalIndex idx(arr);
5395
5396    const std::vector<std::string>& cats = idx.categories();
5397
5398    bool passed = (cats.size() == 3 &&
5399                   std::find(cats.begin(), cats.end(), "red") != cats.end() &&
5400                   std::find(cats.begin(), cats.end(), "green") != cats.end() &&
5401                   std::find(cats.begin(), cats.end(), "blue") != cats.end());
5402    if (!passed) {
5403        std::cout << "  [FAIL] : in pd_test_categorical_index_categories_property()" << std::endl;
5404        throw std::runtime_error("pd_test_categorical_index_categories_property failed");
5405    }
5406
5407    std::cout << " -> tests passed" << std::endl;
5408}
findall (pd_test_3_all.cpp:21259)
21249    auto result2 = s.str().fullmatch("foo.*");
21250    if (result2[0] != true || result2[1] != true || result2[2] != false || result2[3] != true) {
21251        std::cout << "  [FAIL] : fullmatch('foo.*') failed" << std::endl;
21252        throw std::runtime_error("pd_test_str_fullmatch: fullmatch('foo.*') failed");
21253    }
21254    std::cout << " -> tests passed" << std::endl;
21255}
21256
21257// ============================================================================
21258// Test str().findall() (plan_04b)
21259// ============================================================================
21260
21261void pd_test_str_findall() {
21262    std::cout << "========= Series.str().findall() ========================";
21263
21264    pandas::Series<std::string> s({"a1b2c3", "x4y5", "no digits"});
21265    auto result = s.str().findall("[0-9]");
21266    if (result[0].value().size() != 3 || result[0].value()[0] != "1" || result[0].value()[1] != "2" || result[0].value()[2] != "3") {
21267        std::cout << "  [FAIL] : findall element 0 failed" << std::endl;
21268        throw std::runtime_error("pd_test_str_findall: findall element 0 failed");
fullmatch (pd_test_3_all.cpp:21237)
21227    auto result2 = s.str().match("FOO", false);
21228    if (result2[0] != true || result2[1] != false || result2[2] != true || result2[3] != false) {
21229        std::cout << "  [FAIL] : match('FOO', case=false) failed" << std::endl;
21230        throw std::runtime_error("pd_test_str_match: match case insensitive failed");
21231    }
21232    std::cout << " -> tests passed" << std::endl;
21233}
21234
21235// ============================================================================
21236// Test str().fullmatch() (plan_04b)
21237// ============================================================================
21238
21239void pd_test_str_fullmatch() {
21240    std::cout << "========= Series.str().fullmatch() ======================";
21241
21242    pandas::Series<std::string> s({"foo", "foobar", "bar", "foo1"});
21243    auto result = s.str().fullmatch("foo");
21244    if (result[0] != true || result[1] != false || result[2] != false || result[3] != false) {
21245        std::cout << "  [FAIL] : fullmatch('foo') failed" << std::endl;
21246        throw std::runtime_error("pd_test_str_fullmatch: fullmatch('foo') failed");
index (pd_test_1_all.cpp:6680)
6670        void pd_test_dataframe_index_ops() {
6671            std::cout << "========= index operations =================";
6672
6673            // Test set_axis (rows)
6674            {
6675                std::map<std::string, std::vector<int>> data;
6676                data["A"] = {1, 2, 3};
6677                pandas::DataFrame df(data);
6678
6679                auto renamed = df.set_axis({"x", "y", "z"}, 0);
6680                std::string idx0 = renamed.index().get_value_str(0);
6681                if (idx0 != "x") {
6682                    std::cout << "  [FAIL] : in pd_test_dataframe_index_ops() : set_axis first label should be 'x'" << std::endl;
6683                    throw std::runtime_error("pd_test_dataframe_index_ops failed: set_axis");
6684                }
6685            }
6686
6687            // Test set_axis (columns)
6688            {
6689                std::map<std::string, std::vector<int>> data;
6690                data["A"] = {1, 2};
isalnum (pd_test_3_all.cpp:20975)
20965    auto result = s.str().replace("hello", "hi", false);  // regex=false
20966    if (result[0] != "hi" || result[1] != "world" || result[2] != "hi world") {
20967        std::cout << "  [FAIL] : replace() failed" << std::endl;
20968        throw std::runtime_error("pd_test_str_replace: replace() failed");
20969    }
20970
20971    std::cout << " -> tests passed" << std::endl;
20972}
20973
20974// ============================================================================
20975// Test str().isalpha(), str().isdigit(), str().isalnum()
20976// ============================================================================
20977
20978void pd_test_str_is_methods() {
20979    std::cout << "========= Series.str().isalpha/isdigit/isalnum() ========";
20980
20981    pandas::Series<std::string> s({"abc", "123", "abc123", ""});
20982
20983    auto alpha_result = s.str().isalpha();
20984    if (alpha_result[0] != true || alpha_result[1] != false || alpha_result[2] != false || alpha_result[3] != false) {
20985        std::cout << "  [FAIL] : isalpha() failed" << std::endl;
isalpha (pd_test_3_all.cpp:20975)
20965    auto result = s.str().replace("hello", "hi", false);  // regex=false
20966    if (result[0] != "hi" || result[1] != "world" || result[2] != "hi world") {
20967        std::cout << "  [FAIL] : replace() failed" << std::endl;
20968        throw std::runtime_error("pd_test_str_replace: replace() failed");
20969    }
20970
20971    std::cout << " -> tests passed" << std::endl;
20972}
20973
20974// ============================================================================
20975// Test str().isalpha(), str().isdigit(), str().isalnum()
20976// ============================================================================
20977
20978void pd_test_str_is_methods() {
20979    std::cout << "========= Series.str().isalpha/isdigit/isalnum() ========";
20980
20981    pandas::Series<std::string> s({"abc", "123", "abc123", ""});
20982
20983    auto alpha_result = s.str().isalpha();
20984    if (alpha_result[0] != true || alpha_result[1] != false || alpha_result[2] != false || alpha_result[3] != false) {
20985        std::cout << "  [FAIL] : isalpha() failed" << std::endl;
isdecimal (pd_test_3_all.cpp:21124)
21114    pandas::Series<std::string> s({"Hello World", "hello world", "HELLO", "Hello"});
21115    auto result = s.str().istitle();
21116    if (result[0] != true || result[1] != false || result[2] != false || result[3] != true) {
21117        std::cout << "  [FAIL] : istitle() failed" << std::endl;
21118        throw std::runtime_error("pd_test_str_istitle: istitle() failed");
21119    }
21120    std::cout << " -> tests passed" << std::endl;
21121}
21122
21123// ============================================================================
21124// Test str().isnumeric() and str().isdecimal() (plan_04a)
21125// ============================================================================
21126
21127void pd_test_str_isnumeric_isdecimal() {
21128    std::cout << "========= Series.str().isnumeric/isdecimal() ============";
21129
21130    pandas::Series<std::string> s({"123", "abc", "12.3", ""});
21131    auto numeric_result = s.str().isnumeric();
21132    if (numeric_result[0] != true || numeric_result[1] != false ||
21133        numeric_result[2] != false || numeric_result[3] != false) {
21134        std::cout << "  [FAIL] : isnumeric() failed" << std::endl;
isdigit (pd_test_3_all.cpp:20975)
20965    auto result = s.str().replace("hello", "hi", false);  // regex=false
20966    if (result[0] != "hi" || result[1] != "world" || result[2] != "hi world") {
20967        std::cout << "  [FAIL] : replace() failed" << std::endl;
20968        throw std::runtime_error("pd_test_str_replace: replace() failed");
20969    }
20970
20971    std::cout << " -> tests passed" << std::endl;
20972}
20973
20974// ============================================================================
20975// Test str().isalpha(), str().isdigit(), str().isalnum()
20976// ============================================================================
20977
20978void pd_test_str_is_methods() {
20979    std::cout << "========= Series.str().isalpha/isdigit/isalnum() ========";
20980
20981    pandas::Series<std::string> s({"abc", "123", "abc123", ""});
20982
20983    auto alpha_result = s.str().isalpha();
20984    if (alpha_result[0] != true || alpha_result[1] != false || alpha_result[2] != false || alpha_result[3] != false) {
20985        std::cout << "  [FAIL] : isalpha() failed" << std::endl;
isdigit_with_nan (pd_test_3_all.cpp:28418)
28408    auto r = s.str().count_with_nan("a");
28409    fail += sao_check(!r.has_nan, "has_nan false");
28410    fail += sao_check(r.values[0] == 1 && r.values[1] == 0 && r.values[2] == 2, "values");
28411    if (fail == 0) std::cout << "    OK" << std::endl;
28412}
28413
28414void pd_test_str_isdigit_with_nan() {
28415    std::cout << "  -- pd_test_str_isdigit_with_nan --" << std::endl;
28416    int fail = 0;
28417    pandas::Series<std::string> s({"123", "NaN", "abc", "None", "45"}, "x");
28418    auto r = s.str().isdigit_with_nan();
28419    fail += sao_check(r.values.size() == 5, "size");
28420    fail += sao_check(r.has_nan, "has_nan");
28421    fail += sao_check(r.values[0] == true, "123 digit");
28422    fail += sao_check(r.is_nan[1], "NaN pos 1");
28423    fail += sao_check(r.values[2] == false, "abc not digit");
28424    fail += sao_check(r.is_nan[3], "None pos 3");
28425    fail += sao_check(r.values[4] == true, "45 digit");
28426    if (fail == 0) std::cout << "    OK" << std::endl;
28427}
isnumeric (pd_test_3_all.cpp:21124)
21114    pandas::Series<std::string> s({"Hello World", "hello world", "HELLO", "Hello"});
21115    auto result = s.str().istitle();
21116    if (result[0] != true || result[1] != false || result[2] != false || result[3] != true) {
21117        std::cout << "  [FAIL] : istitle() failed" << std::endl;
21118        throw std::runtime_error("pd_test_str_istitle: istitle() failed");
21119    }
21120    std::cout << " -> tests passed" << std::endl;
21121}
21122
21123// ============================================================================
21124// Test str().isnumeric() and str().isdecimal() (plan_04a)
21125// ============================================================================
21126
21127void pd_test_str_isnumeric_isdecimal() {
21128    std::cout << "========= Series.str().isnumeric/isdecimal() ============";
21129
21130    pandas::Series<std::string> s({"123", "abc", "12.3", ""});
21131    auto numeric_result = s.str().isnumeric();
21132    if (numeric_result[0] != true || numeric_result[1] != false ||
21133        numeric_result[2] != false || numeric_result[3] != false) {
21134        std::cout << "  [FAIL] : isnumeric() failed" << std::endl;
istitle (pd_test_3_all.cpp:21108)
21098    pandas::Series<std::string> s({"a", "bc", "xyz"});
21099    auto result = s.str().repeat(3);
21100    if (result[0] != "aaa" || result[1] != "bcbcbc" || result[2] != "xyzxyzxyz") {
21101        std::cout << "  [FAIL] : repeat() failed" << std::endl;
21102        throw std::runtime_error("pd_test_str_repeat_method: repeat() failed");
21103    }
21104    std::cout << " -> tests passed" << std::endl;
21105}
21106
21107// ============================================================================
21108// Test str().istitle() (plan_04a)
21109// ============================================================================
21110
21111void pd_test_str_istitle() {
21112    std::cout << "========= Series.str().istitle() ========================";
21113
21114    pandas::Series<std::string> s({"Hello World", "hello world", "HELLO", "Hello"});
21115    auto result = s.str().istitle();
21116    if (result[0] != true || result[1] != false || result[2] != false || result[3] != true) {
21117        std::cout << "  [FAIL] : istitle() failed" << std::endl;
21118        throw std::runtime_error("pd_test_str_istitle: istitle() failed");
ljust (pd_test_3_all.cpp:21005)
20995    auto alnum_result = s.str().isalnum();
20996    if (alnum_result[0] != true || alnum_result[1] != true || alnum_result[2] != true || alnum_result[3] != false) {
20997        std::cout << "  [FAIL] : isalnum() failed" << std::endl;
20998        throw std::runtime_error("pd_test_str_is_methods: isalnum() failed");
20999    }
21000
21001    std::cout << " -> tests passed" << std::endl;
21002}
21003
21004// ============================================================================
21005// Test str().zfill(), str().center(), str().ljust(), str().rjust()
21006// ============================================================================
21007
21008void pd_test_str_padding() {
21009    std::cout << "========= Series.str().zfill/center/ljust/rjust() =======";
21010
21011    pandas::Series<std::string> s({"1", "22", "333"});
21012
21013    auto zfill_result = s.str().zfill(5);
21014    if (zfill_result[0] != "00001" || zfill_result[1] != "00022" || zfill_result[2] != "00333") {
21015        std::cout << "  [FAIL] : zfill() failed" << std::endl;
lower (pd_test_3_all.cpp:20819)
20809#include <string>
20810
20811#include "../pandas/pd_series.h"
20812
20813// CRITICAL: No using namespace directives
20814
20815namespace dataframe_tests {
20816namespace dataframe_tests_string_accessor {
20817
20818// ============================================================================
20819// Test str().lower() and str().upper()
20820// ============================================================================
20821
20822void pd_test_str_lower_upper() {
20823    std::cout << "========= Series.str().lower/upper() ===================";
20824
20825    pandas::Series<std::string> s({"Hello", "WORLD", "TeSt"});
20826
20827    auto lower_result = s.str().lower();
20828    if (lower_result[0] != "hello" || lower_result[1] != "world" || lower_result[2] != "test") {
20829        std::cout << "  [FAIL] : lower() failed" << std::endl;
lstrip (pd_test_3_all.cpp:20885)
20875    auto lens = s.str().len();
20876    if (lens[0] != 1 || lens[1] != 2 || lens[2] != 3 || lens[3] != 0) {
20877        std::cout << "  [FAIL] : len() failed" << std::endl;
20878        throw std::runtime_error("pd_test_str_len: len() failed");
20879    }
20880
20881    std::cout << " -> tests passed" << std::endl;
20882}
20883
20884// ============================================================================
20885// Test str().strip(), str().lstrip(), str().rstrip()
20886// ============================================================================
20887
20888void pd_test_str_strip() {
20889    std::cout << "========= Series.str().strip() ==========================";
20890
20891    pandas::Series<std::string> s({"  hello  ", "  world", "test  "});
20892
20893    auto strip_result = s.str().strip();
20894    if (strip_result[0] != "hello" || strip_result[1] != "world" || strip_result[2] != "test") {
20895        std::cout << "  [FAIL] : strip() failed" << std::endl;
match (pd_test_2_all.cpp:1467)
1457        void pd_test_between_time_overnight() {
1458            std::cout << "========= DataFrame between_time: overnight range ======";
1459
1460            // Test overnight range (e.g., 23:00 to 01:00)
1461            std::map<std::string, std::vector<double>> data = {
1462                {"A", {1.0, 2.0, 3.0, 4.0, 5.0}}
1463            };
1464            pandas::DataFrame df(data);
1465
1466            std::vector<std::string> datetime_index = {
1467                "2018-04-09 00:30:00",  // Should match (before 01:00)
1468                "2018-04-09 12:00:00",  // Should NOT match
1469                "2018-04-09 22:00:00",  // Should NOT match
1470                "2018-04-09 23:30:00",  // Should match (after 23:00)
1471                "2018-04-10 00:00:00"   // Should match (at midnight, before 01:00)
1472            };
1473            df.set_index(std::make_unique<pandas::Index<std::string>>(datetime_index));
1474
1475            // Overnight range: 23:00 to 01:00
1476            auto result = df.between_time("23:00:00", "01:00:00");
normalize (pd_test_1_all.cpp:8723)
8713void pd_test_datetime_mixin_normalize() {
8714    std::cout << "========= normalize ===================================";
8715
8716    // Create datetime with time component
8717    std::vector<std::optional<numpy::datetime64>> values = {
8718        numpy::datetime64(86400000000000LL + 3600000000000LL, numpy::DateTimeUnit::Nanosecond)  // 1 day + 1 hour
8719    };
8720    pandas::DatetimeArray arr(values);
8721    pandas::DatetimeMixinIndex idx(arr);
8722
8723    pandas::DatetimeMixinIndex normalized = idx.normalize();
8724
8725    bool passed = (normalized.size() == 1);
8726    if (!passed) {
8727        std::cout << "  [FAIL] : in pd_test_datetime_mixin_normalize()" << std::endl;
8728        throw std::runtime_error("pd_test_datetime_mixin_normalize failed");
8729    }
8730
8731    std::cout << " -> tests passed" << std::endl;
8732}
partition (pd_test_3_all.cpp:21422)
21412    }
21413    auto decoded = s.str().decode("utf-8");
21414    if (decoded[0] != "hello" || decoded[1] != "world") {
21415        std::cout << "  [FAIL] : decode failed" << std::endl;
21416        throw std::runtime_error("pd_test_str_encode_decode: decode failed");
21417    }
21418    std::cout << " -> tests passed" << std::endl;
21419}
21420
21421// ============================================================================
21422// Test str().partition() / rpartition() (plan_04c)
21423// ============================================================================
21424
21425void pd_test_str_partition() {
21426    std::cout << "========= Series.str().partition/rpartition() ===========";
21427
21428    pandas::Series<std::string> s({"hello-world", "foo-bar", "xyz"});
21429    auto result = s.str().partition("-");
21430    if (result[0][0] != "hello" || result[0][1] != "-" || result[0][2] != "world") {
21431        std::cout << "  [FAIL] : partition element 0 failed" << std::endl;
21432        throw std::runtime_error("pd_test_str_partition: partition element 0 failed");
removeprefix (pd_test_3_all.cpp:21148)
21138    auto decimal_result = s.str().isdecimal();
21139    if (decimal_result[0] != true || decimal_result[1] != false ||
21140        decimal_result[2] != false || decimal_result[3] != false) {
21141        std::cout << "  [FAIL] : isdecimal() failed" << std::endl;
21142        throw std::runtime_error("pd_test_str_isnumeric_isdecimal: isdecimal() failed");
21143    }
21144    std::cout << " -> tests passed" << std::endl;
21145}
21146
21147// ============================================================================
21148// Test str().removeprefix() and str().removesuffix() (plan_04a)
21149// ============================================================================
21150
21151void pd_test_str_removeprefix_removesuffix() {
21152    std::cout << "========= Series.str().removeprefix/removesuffix() ======";
21153
21154    pandas::Series<std::string> s({"prefix_foo", "prefix_bar", "other"});
21155    auto prefix_result = s.str().removeprefix("prefix_");
21156    if (prefix_result[0] != "foo" || prefix_result[1] != "bar" || prefix_result[2] != "other") {
21157        std::cout << "  [FAIL] : removeprefix() failed" << std::endl;
21158        throw std::runtime_error("pd_test_str_removeprefix_removesuffix: removeprefix() failed");
removesuffix (pd_test_3_all.cpp:21148)
21138    auto decimal_result = s.str().isdecimal();
21139    if (decimal_result[0] != true || decimal_result[1] != false ||
21140        decimal_result[2] != false || decimal_result[3] != false) {
21141        std::cout << "  [FAIL] : isdecimal() failed" << std::endl;
21142        throw std::runtime_error("pd_test_str_isnumeric_isdecimal: isdecimal() failed");
21143    }
21144    std::cout << " -> tests passed" << std::endl;
21145}
21146
21147// ============================================================================
21148// Test str().removeprefix() and str().removesuffix() (plan_04a)
21149// ============================================================================
21150
21151void pd_test_str_removeprefix_removesuffix() {
21152    std::cout << "========= Series.str().removeprefix/removesuffix() ======";
21153
21154    pandas::Series<std::string> s({"prefix_foo", "prefix_bar", "other"});
21155    auto prefix_result = s.str().removeprefix("prefix_");
21156    if (prefix_result[0] != "foo" || prefix_result[1] != "bar" || prefix_result[2] != "other") {
21157        std::cout << "  [FAIL] : removeprefix() failed" << std::endl;
21158        throw std::runtime_error("pd_test_str_removeprefix_removesuffix: removeprefix() failed");
repeat (pd_test_3_all.cpp:2166)
2156    auto viewed = arr.view();
2157    if (viewed.size() != 3 || !viewed.equals(arr)) {
2158        throw std::runtime_error("view failed");
2159    }
2160
2161    std::cout << " -> tests passed" << std::endl;
2162}
2163
2164void pd_test_3_all_categorical_repeat() {
2165    std::cout << "========= CategoricalArray.repeat() ===================";
2166
2167    std::vector<std::optional<std::string>> values = {"a", "b"};
2168    pandas::CategoricalArray arr(values);
2169
2170    auto result = arr.repeat(3);
2171    if (result.size() != 6 || *result[0] != "a" || *result[2] != "a" ||
2172        *result[3] != "b" || *result[5] != "b") {
2173        throw std::runtime_error("repeat scalar failed");
2174    }
rfind (pd_test_3_all.cpp:21075)
21065    pandas::Series<std::string> s({"FOO", "Bar", "HELLO"});
21066    auto result = s.str().casefold();
21067    if (result[0] != "foo" || result[1] != "bar" || result[2] != "hello") {
21068        std::cout << "  [FAIL] : casefold() failed" << std::endl;
21069        throw std::runtime_error("pd_test_str_casefold: casefold() failed");
21070    }
21071    std::cout << " -> tests passed" << std::endl;
21072}
21073
21074// ============================================================================
21075// Test str().rfind() (plan_04a)
21076// ============================================================================
21077
21078void pd_test_str_rfind() {
21079    std::cout << "========= Series.str().rfind() ==========================";
21080
21081    pandas::Series<std::string> s({"foobarfoo", "barfoo", "hello"});
21082    auto result = s.str().rfind("foo");
21083    if (result[0] != 6 || result[1] != 3 || result[2] != -1) {
21084        std::cout << "  [FAIL] : rfind() failed, got: "
21085                  << result[0] << ", " << result[1] << ", " << result[2] << std::endl;
rindex (pd_test_3_all.cpp:21449)
21439    pandas::Series<std::string> s2({"hello-world-test", "foo-bar"});
21440    auto result2 = s2.str().rpartition("-");
21441    if (result2[0][0] != "hello-world" || result2[0][1] != "-" || result2[0][2] != "test") {
21442        std::cout << "  [FAIL] : rpartition element 0 failed" << std::endl;
21443        throw std::runtime_error("pd_test_str_partition: rpartition element 0 failed");
21444    }
21445    std::cout << " -> tests passed" << std::endl;
21446}
21447
21448// ============================================================================
21449// Test str().index() / rindex() (plan_04c)
21450// ============================================================================
21451
21452void pd_test_str_index_rindex() {
21453    std::cout << "========= Series.str().index/rindex() ===================";
21454
21455    pandas::Series<std::string> s({"foobar", "barfoo"});
21456    auto result = s.str().index("oo");
21457    if (result[0] != 1 || result[1] != 4) {
21458        std::cout << "  [FAIL] : index('oo') failed" << std::endl;
21459        throw std::runtime_error("pd_test_str_index_rindex: index('oo') failed");
rjust (pd_test_3_all.cpp:21005)
20995    auto alnum_result = s.str().isalnum();
20996    if (alnum_result[0] != true || alnum_result[1] != true || alnum_result[2] != true || alnum_result[3] != false) {
20997        std::cout << "  [FAIL] : isalnum() failed" << std::endl;
20998        throw std::runtime_error("pd_test_str_is_methods: isalnum() failed");
20999    }
21000
21001    std::cout << " -> tests passed" << std::endl;
21002}
21003
21004// ============================================================================
21005// Test str().zfill(), str().center(), str().ljust(), str().rjust()
21006// ============================================================================
21007
21008void pd_test_str_padding() {
21009    std::cout << "========= Series.str().zfill/center/ljust/rjust() =======";
21010
21011    pandas::Series<std::string> s({"1", "22", "333"});
21012
21013    auto zfill_result = s.str().zfill(5);
21014    if (zfill_result[0] != "00001" || zfill_result[1] != "00022" || zfill_result[2] != "00333") {
21015        std::cout << "  [FAIL] : zfill() failed" << std::endl;
rpartition (pd_test_3_all.cpp:21422)
21412    }
21413    auto decoded = s.str().decode("utf-8");
21414    if (decoded[0] != "hello" || decoded[1] != "world") {
21415        std::cout << "  [FAIL] : decode failed" << std::endl;
21416        throw std::runtime_error("pd_test_str_encode_decode: decode failed");
21417    }
21418    std::cout << " -> tests passed" << std::endl;
21419}
21420
21421// ============================================================================
21422// Test str().partition() / rpartition() (plan_04c)
21423// ============================================================================
21424
21425void pd_test_str_partition() {
21426    std::cout << "========= Series.str().partition/rpartition() ===========";
21427
21428    pandas::Series<std::string> s({"hello-world", "foo-bar", "xyz"});
21429    auto result = s.str().partition("-");
21430    if (result[0][0] != "hello" || result[0][1] != "-" || result[0][2] != "world") {
21431        std::cout << "  [FAIL] : partition element 0 failed" << std::endl;
21432        throw std::runtime_error("pd_test_str_partition: partition element 0 failed");
rsplit (pd_test_3_all.cpp:21171)
21161    pandas::Series<std::string> s2({"foo_suffix", "bar_suffix", "other"});
21162    auto suffix_result = s2.str().removesuffix("_suffix");
21163    if (suffix_result[0] != "foo" || suffix_result[1] != "bar" || suffix_result[2] != "other") {
21164        std::cout << "  [FAIL] : removesuffix() failed" << std::endl;
21165        throw std::runtime_error("pd_test_str_removeprefix_removesuffix: removesuffix() failed");
21166    }
21167    std::cout << " -> tests passed" << std::endl;
21168}
21169
21170// ============================================================================
21171// Test str().rsplit() (plan_04a)
21172// ============================================================================
21173
21174void pd_test_str_rsplit() {
21175    std::cout << "========= Series.str().rsplit() =========================";
21176
21177    pandas::Series<std::string> s({"a,b,c", "x,y"});
21178    auto result = s.str().rsplit(",");
21179    if (result[0].size() != 3 || result[0][0] != "a" || result[0][1] != "b" || result[0][2] != "c") {
21180        std::cout << "  [FAIL] : rsplit() unlimited failed" << std::endl;
21181        throw std::runtime_error("pd_test_str_rsplit: rsplit() unlimited failed");
rstrip (pd_test_3_all.cpp:20885)
20875    auto lens = s.str().len();
20876    if (lens[0] != 1 || lens[1] != 2 || lens[2] != 3 || lens[3] != 0) {
20877        std::cout << "  [FAIL] : len() failed" << std::endl;
20878        throw std::runtime_error("pd_test_str_len: len() failed");
20879    }
20880
20881    std::cout << " -> tests passed" << std::endl;
20882}
20883
20884// ============================================================================
20885// Test str().strip(), str().lstrip(), str().rstrip()
20886// ============================================================================
20887
20888void pd_test_str_strip() {
20889    std::cout << "========= Series.str().strip() ==========================";
20890
20891    pandas::Series<std::string> s({"  hello  ", "  world", "test  "});
20892
20893    auto strip_result = s.str().strip();
20894    if (strip_result[0] != "hello" || strip_result[1] != "world" || strip_result[2] != "test") {
20895        std::cout << "  [FAIL] : strip() failed" << std::endl;
slice (pd_test_1_all.cpp:17546)
17536// ============================================================================
17537// Slicing / Indexing Tests
17538// ============================================================================
17539
17540void pd_test_period_index_slice() {
17541    std::cout << "========= slice method ================================";
17542
17543    std::vector<int64_t> ordinals = {0, 1, 2, 3, 4};
17544    pandas::PeriodIndex idx(ordinals, "D");
17545
17546    pandas::PeriodIndex sliced = idx.slice(1, 4);
17547
17548    bool passed = (sliced.size() == 3 &&
17549                   sliced[0].has_value() && *sliced[0] == 1);
17550    if (!passed) {
17551        std::cout << "  [FAIL] : in pd_test_period_index_slice()" << std::endl;
17552        throw std::runtime_error("pd_test_period_index_slice failed");
17553    }
17554
17555    std::cout << " -> tests passed" << std::endl;
17556}
slice_replace (pd_test_3_all.cpp:21485)
21475        threw = true;
21476    }
21477    if (!threw) {
21478        std::cout << "  [FAIL] : index should throw on not found" << std::endl;
21479        throw std::runtime_error("pd_test_str_index_rindex: index should throw on not found");
21480    }
21481    std::cout << " -> tests passed" << std::endl;
21482}
21483
21484// ============================================================================
21485// Test str().slice_replace() (plan_04c)
21486// ============================================================================
21487
21488void pd_test_str_slice_replace() {
21489    std::cout << "========= Series.str().slice_replace() ==================";
21490
21491    pandas::Series<std::string> s({"hello", "world", "foo"});
21492    auto result = s.str().slice_replace(0, 2, "XX");
21493    if (result[0] != "XXllo" || result[1] != "XXrld" || result[2] != "XXo") {
21494        std::cout << "  [FAIL] : slice_replace(0, 2, 'XX') failed" << std::endl;
21495        throw std::runtime_error("pd_test_str_slice_replace: slice_replace failed");
split (pd_test_4_all.cpp:3961)
3951// =============================================================================
3952// Standalone-only helpers (dropped when pasted into pd_test_repr_mismatch.cpp).
3953// =============================================================================
3954
3955// =============================================================================
3956// Case 1 — explode.split_comma
3957//
3958// Source: pandasPython_tests/test_pandas_reshaping_pivot_compare_full.py L511
3959//   pd_df2 = pd.DataFrame([{"var1":"a,b,c","var2":1},
3960//                          {"var1":"d,e,f","var2":2}])
3961//              .assign(var1=lambda d: d.var1.str.split(","))
3962//              .explode("var1").reset_index(drop=True)
3963//
3964// Strategy C: we skip the split/explode and hand-build the 6-row result
3965// (var1 object/string, var2 int64, default RangeIndex(0..5)).
3966// =============================================================================
3967void explode_split_comma() {
3968    pandas::DataFrame df;
3969    df.add_column<std::string>("var1", {"a", "b", "c", "d", "e", "f"});
3970    df.add_column<int64_t>("var2", {1, 1, 1, 2, 2, 2});
3971    apply_default_display(df);
split_expand (pd_test_3_all.cpp:28443)
28433    auto r = s.str().isdigit_with_nan();
28434    fail += sao_check(!r.has_nan, "no nan");
28435    fail += sao_check(r.values[0] && !r.values[1] && r.values[2], "values");
28436    if (fail == 0) std::cout << "    OK" << std::endl;
28437}
28438
28439void pd_test_str_split_expand() {
28440    std::cout << "  -- pd_test_str_split_expand --" << std::endl;
28441    int fail = 0;
28442    pandas::Series<std::string> s({"a,b,c", "d,e,f"}, "x");
28443    auto r = s.str().split_expand(",", -1);
28444    fail += sao_check(r.num_cols == 3, "3 cols");
28445    fail += sao_check(r.num_rows == 2, "2 rows");
28446    fail += sao_check(r.columns[0][0] == "a" && r.columns[1][0] == "b" && r.columns[2][0] == "c", "row0");
28447    fail += sao_check(r.columns[0][1] == "d" && r.columns[1][1] == "e" && r.columns[2][1] == "f", "row1");
28448    if (fail == 0) std::cout << "    OK" << std::endl;
28449}
28450
28451void pd_test_str_split_expand_nan() {
28452    std::cout << "  -- pd_test_str_split_expand_nan --" << std::endl;
28453    int fail = 0;
startswith (pd_test_3_all.cpp:20933)
20923    auto result = s.str().contains("an", true, false);  // case_sensitive=true, regex=false
20924    if (result[0] != false || result[1] != true || result[2] != false) {
20925        std::cout << "  [FAIL] : contains() failed" << std::endl;
20926        throw std::runtime_error("pd_test_str_contains: contains() failed");
20927    }
20928
20929    std::cout << " -> tests passed" << std::endl;
20930}
20931
20932// ============================================================================
20933// Test str().startswith() and str().endswith()
20934// ============================================================================
20935
20936void pd_test_str_startswith_endswith() {
20937    std::cout << "========= Series.str().startswith/endswith() ============";
20938
20939    pandas::Series<std::string> s({"hello", "world", "help"});
20940
20941    auto starts_result = s.str().startswith("hel");
20942    if (starts_result[0] != true || starts_result[1] != false || starts_result[2] != true) {
20943        std::cout << "  [FAIL] : startswith() failed" << std::endl;
strip (pd_test_3_all.cpp:20885)
20875    auto lens = s.str().len();
20876    if (lens[0] != 1 || lens[1] != 2 || lens[2] != 3 || lens[3] != 0) {
20877        std::cout << "  [FAIL] : len() failed" << std::endl;
20878        throw std::runtime_error("pd_test_str_len: len() failed");
20879    }
20880
20881    std::cout << " -> tests passed" << std::endl;
20882}
20883
20884// ============================================================================
20885// Test str().strip(), str().lstrip(), str().rstrip()
20886// ============================================================================
20887
20888void pd_test_str_strip() {
20889    std::cout << "========= Series.str().strip() ==========================";
20890
20891    pandas::Series<std::string> s({"  hello  ", "  world", "test  "});
20892
20893    auto strip_result = s.str().strip();
20894    if (strip_result[0] != "hello" || strip_result[1] != "world" || strip_result[2] != "test") {
20895        std::cout << "  [FAIL] : strip() failed" << std::endl;
title (pd_test_3_all.cpp:20843)
20833    auto upper_result = s.str().upper();
20834    if (upper_result[0] != "HELLO" || upper_result[1] != "WORLD" || upper_result[2] != "TEST") {
20835        std::cout << "  [FAIL] : upper() failed" << std::endl;
20836        throw std::runtime_error("pd_test_str_lower_upper: upper() failed");
20837    }
20838
20839    std::cout << " -> tests passed" << std::endl;
20840}
20841
20842// ============================================================================
20843// Test str().capitalize() and str().title()
20844// ============================================================================
20845
20846void pd_test_str_capitalize_title() {
20847    std::cout << "========= Series.str().capitalize/title() ===============";
20848
20849    pandas::Series<std::string> s({"hello world", "HELLO WORLD", "hELLO wORLD"});
20850
20851    auto cap_result = s.str().capitalize();
20852    if (cap_result[0] != "Hello world" || cap_result[1] != "Hello world" || cap_result[2] != "Hello world") {
20853        std::cout << "  [FAIL] : capitalize() failed" << std::endl;
translate (pd_test_3_all.cpp:21352)
21342    pandas::Series<std::string> s({"abc", "xy", "123"});
21343    auto result = s.str().join("-");
21344    if (result[0] != "a-b-c" || result[1] != "x-y" || result[2] != "1-2-3") {
21345        std::cout << "  [FAIL] : join('-') failed" << std::endl;
21346        throw std::runtime_error("pd_test_str_join: join('-') failed");
21347    }
21348    std::cout << " -> tests passed" << std::endl;
21349}
21350
21351// ============================================================================
21352// Test str().translate() (plan_04c)
21353// ============================================================================
21354
21355void pd_test_str_translate() {
21356    std::cout << "========= Series.str().translate() ======================";
21357
21358    pandas::Series<std::string> s({"abc", "def", "xyz"});
21359    auto result = s.str().translate("abc", "XYZ");
21360    if (result[0] != "XYZ" || result[1] != "def" || result[2] != "xyz") {
21361        std::cout << "  [FAIL] : translate failed" << std::endl;
21362        throw std::runtime_error("pd_test_str_translate: translate failed");
upper (pd_test_3_all.cpp:20819)
20809#include <string>
20810
20811#include "../pandas/pd_series.h"
20812
20813// CRITICAL: No using namespace directives
20814
20815namespace dataframe_tests {
20816namespace dataframe_tests_string_accessor {
20817
20818// ============================================================================
20819// Test str().lower() and str().upper()
20820// ============================================================================
20821
20822void pd_test_str_lower_upper() {
20823    std::cout << "========= Series.str().lower/upper() ===================";
20824
20825    pandas::Series<std::string> s({"Hello", "WORLD", "TeSt"});
20826
20827    auto lower_result = s.str().lower();
20828    if (lower_result[0] != "hello" || lower_result[1] != "world" || lower_result[2] != "test") {
20829        std::cout << "  [FAIL] : lower() failed" << std::endl;
wrap (pd_test_3_all.cpp:21368)
21358    pandas::Series<std::string> s({"abc", "def", "xyz"});
21359    auto result = s.str().translate("abc", "XYZ");
21360    if (result[0] != "XYZ" || result[1] != "def" || result[2] != "xyz") {
21361        std::cout << "  [FAIL] : translate failed" << std::endl;
21362        throw std::runtime_error("pd_test_str_translate: translate failed");
21363    }
21364    std::cout << " -> tests passed" << std::endl;
21365}
21366
21367// ============================================================================
21368// Test str().wrap() (plan_04c)
21369// ============================================================================
21370
21371void pd_test_str_wrap() {
21372    std::cout << "========= Series.str().wrap() ===========================";
21373
21374    pandas::Series<std::string> s({"hello world foo"});
21375    auto result = s.str().wrap(10);
21376    // Should wrap at word boundary
21377    if (result[0].find('\n') == std::string::npos) {
21378        std::cout << "  [FAIL] : wrap should contain newline" << std::endl;
zfill (pd_test_3_all.cpp:21005)
20995    auto alnum_result = s.str().isalnum();
20996    if (alnum_result[0] != true || alnum_result[1] != true || alnum_result[2] != true || alnum_result[3] != false) {
20997        std::cout << "  [FAIL] : isalnum() failed" << std::endl;
20998        throw std::runtime_error("pd_test_str_is_methods: isalnum() failed");
20999    }
21000
21001    std::cout << " -> tests passed" << std::endl;
21002}
21003
21004// ============================================================================
21005// Test str().zfill(), str().center(), str().ljust(), str().rjust()
21006// ============================================================================
21007
21008void pd_test_str_padding() {
21009    std::cout << "========= Series.str().zfill/center/ljust/rjust() =======";
21010
21011    pandas::Series<std::string> s({"1", "22", "333"});
21012
21013    auto zfill_result = s.str().zfill(5);
21014    if (zfill_result[0] != "00001" || zfill_result[1] != "00022" || zfill_result[2] != "00333") {
21015        std::cout << "  [FAIL] : zfill() failed" << std::endl;