SeriesGroupBy#

class pandas::SeriesGroupBy#

GroupBy class for split-apply-combine operations.

Example#

#include <pandas/pandas.h>
using namespace pandas;

// Use SeriesGroupBy
SeriesGroupBy obj;
// ... operations ...

Constructors#

Signature

Location

Example

SeriesGroupBy(const Series<T>& series, const Series<GroupT>& by, bool sort = true)

pd_series_groupby.h:78

SeriesGroupBy(std::shared_ptr<Series<T>> owned, const Series<GroupT>& by, bool sort = true)

pd_series_groupby.h:89

Construction#

Signature

Return Type

Location

Example

Series<T> create_result_series(const std::vector<T>& values) const

Series<T>

pd_series_groupby.h:1157

Series<double> create_result_series_double(const std::vector<double>& values) const

Series<double>

pd_series_groupby.h:1174

Series<int64_t> create_result_series_int64(const std::vector<int64_t>& values) const

Series<int64_t>

pd_series_groupby.h:1193

Indexing / Selection#

Signature

Return Type

Location

Example

Series<T> first() const

Series<T>

pd_series_groupby.h:380

View

Series<T> get_group(const GroupT& key) const

Series<T>

pd_series_groupby.h:1063

View

std::optional<std::string> get_index_name() const

std::optional<std::string>

pd_series_groupby.h:128

View

cat_values, cats, false, get_index_name())

cat_values, cats, false,

pd_series_groupby.h:1108

View

std::optional<std::string> get_series_name() const

std::optional<std::string>

pd_series_groupby.h:131

pandas::Result idxmin_with_dtype() const

pandas::Result

pd_series_groupby.h:728

View

Series<T> last() const

Series<T>

pd_series_groupby.h:472

View

Data Manipulation#

Signature

Return Type

Location

Example

void set_index_name(const std::string& name)

void

pd_series_groupby.h:120

View

Statistics#

Signature

Return Type

Location

Example

Series<int64_t> count() const

Series<int64_t>

pd_series_groupby.h:276

View

Series<double> cummax() const

Series<double>

pd_series_groupby.h:852

View

Series<double> cummin() const

Series<double>

pd_series_groupby.h:877

View

Series<double> cumprod() const

Series<double>

pd_series_groupby.h:827

View

Series<double> cumsum() const

Series<double>

pd_series_groupby.h:802

View

Series<T> max() const

Series<T>

pd_series_groupby.h:342

View

Series<double> mean() const

Series<double>

pd_series_groupby.h:240

View

Series<double> median() const

Series<double>

pd_series_groupby.h:589

View

Series<T> min() const

Series<T>

pd_series_groupby.h:304

View

Series<int64_t> nunique(bool dropna = true) const

Series<int64_t>

pd_series_groupby.h:955

View

Series<double> std_(int ddof = 1) const

Series<double>

pd_series_groupby.h:510

View

auto sum() const

auto

pd_series_groupby.h:180

View

Series<int64_t> sum_int64_bool_() const

Series<int64_t>

pd_series_groupby.h:220

View

Series<double> var(int ddof = 1) const

Series<double>

pd_series_groupby.h:550

View

Aggregation#

Signature

Return Type

Location

Example

Series<double> agg(const std::string& func) const

Series<double>

pd_series_groupby.h:634

View

DataFrame agg(const std::vector<std::string>& funcs) const

DataFrame

pd_series_groupby.h:708

View

pandas::Result agg_with_dtype(const std::string& how) const

pandas::Result

pd_series_groupby.h:716

View

pandas::Result agg_with_dtype_list(const std::vector<std::string>& funcs) const

pandas::Result

pd_series_groupby.h:722

View

auto apply(Func&& func) const -> Series<decltype(func(std::declval<Series<T>>()))>

auto

pd_series_groupby.h:737

View

void apply_result_index(Series<ResultT>& result) const

void

pd_series_groupby.h:1098

View

Series<T> transform(Func&& func) const

Series<T>

pd_series_groupby.h:767

View

Arithmetic#

Signature

Return Type

Location

Example

const std::vector<std::string>& multiindex_names() const

const std::vector<std::string>&

pd_series_groupby.h:137

View

Comparison#

Signature

Return Type

Location

Example

std::vector<std::vector<std::string>> level_arrays(nlevels)

std::vector<std::vector<std::string>>

pd_series_groupby.h:1114

Time Series#

Signature

Return Type

Location

Example

Series<double> diff(int periods = 1) const

Series<double>

pd_series_groupby.h:903

View

Series<double> shift(int periods = 1) const

Series<double>

pd_series_groupby.h:935

View

Other Methods#

Signature

Return Type

Location

Example

void build_groups()

void

pd_series_groupby.h:1136

const std::vector<std::string>& categorical_categories() const

const std::vector<std::string>&

pd_series_groupby.h:143

View

Series<int64_t> cumcount(bool ascending = true) const

Series<int64_t>

pd_series_groupby.h:991

const std::map<GroupT, std::vector<size_t>>& group_indices() const

const std::map<GroupT, std::vector<size_t>>&

pd_series_groupby.h:107

const std::vector<GroupT>& group_keys_order() const

const std::vector<GroupT>&

pd_series_groupby.h:112

View

const std::string& grouper_dtype() const

const std::string&

pd_series_groupby.h:149

View

std::vector<GroupT> groups() const

std::vector<GroupT>

pd_series_groupby.h:1046

View

const std::map<GroupT, std::vector<size_t>>& indices() const

const std::map<GroupT, std::vector<size_t>>&

pd_series_groupby.h:1054

View

Series<int64_t> ngroup(bool ascending = true) const

Series<int64_t>

pd_series_groupby.h:1015

size_t ngroups() const

size_t

pd_series_groupby.h:1038

View

Series<T> nth(int n, const std::string& dropna_mode = "") const

Series<T>

pd_series_groupby.h:419

View

const Series<T>& series() const

const Series<T>&

pd_series_groupby.h:117

View

bool series_name_is_int() const

bool

pd_series_groupby.h:125

void set_all_categories(const std::vector<std::string>& categories)

void

pd_series_groupby.h:153

void set_categorical_categories(const std::vector<std::string>& cats)

void

pd_series_groupby.h:140

View

void set_grouper_dtype(const std::string& dtype)

void

pd_series_groupby.h:146

void set_multiindex_names(const std::vector<std::string>& names)

void

pd_series_groupby.h:134

View

void set_series_name(const std::string& name)

void

pd_series_groupby.h:123

void set_series_name_is_int(bool flag)

void

pd_series_groupby.h:124

Series<int64_t> size() const

Series<int64_t>

pd_series_groupby.h:1082

View

std::string source_dtype() const

std::string

pd_series_groupby.h:95

std::string source_dtype_full() const

std::string

pd_series_groupby.h:102

Code Examples#

The following examples are extracted from the test suite.

first (pd_test_1_all.cpp:11616)
11606        void pd_test_groupby_first_last() {
11607            std::cout << "========= GroupBy first/last ====================";
11608
11609            std::map<std::string, std::vector<double>> data = {
11610                {"category", {1.0, 1.0, 2.0, 2.0}},
11611                {"value", {10.0, 20.0, 30.0, 40.0}}
11612            };
11613            pandas::DataFrame df(data);
11614
11615            auto first_result = df.groupby("category").first();
11616            auto last_result = df.groupby("category").last();
11617
11618            // First for group 1: 10, group 2: 30
11619            // Last for group 1: 20, group 2: 40
11620            double first1 = std::stod(first_result["value"].get_value_str(0));
11621            double first2 = std::stod(first_result["value"].get_value_str(1));
11622
11623            bool passed = ((std::abs(first1 - 10.0) < 0.001 && std::abs(first2 - 30.0) < 0.001) ||
11624                          (std::abs(first1 - 30.0) < 0.001 && std::abs(first2 - 10.0) < 0.001));
11625            if (!passed) {
get_group (pd_test_2_all.cpp:20487)
20477        ++g_fail;
20478    }
20479}
20480
20481static bool approx_eq(double a, double b, double tol = 1e-9) {
20482    if (std::isnan(a) && std::isnan(b)) return true;
20483    return std::abs(a - b) < tol;
20484}
20485
20486// =====================================================================
20487// Test: get_group() with exclude_cols removes groupby columns
20488// =====================================================================
20489
20490void pd_test_groupby_apply_get_group_exclude() {
20491    std::cout << "  -- pd_test_groupby_apply_get_group_exclude --" << std::endl;
20492
20493    pandas::DataFrame df;
20494    df.add_column("key", std::vector<std::string>{"a", "a", "b", "b"});
20495    df.add_column("val1", std::vector<double>{1.0, 2.0, 3.0, 4.0});
20496    df.add_column("val2", std::vector<double>{10.0, 20.0, 30.0, 40.0});
get_index_name (pd_test_3_all.cpp:23398)
23388    std::vector<std::optional<std::string>> level_names = {"first", "second"};
23389    auto mi = pandas::MultiIndex::from_arrays<std::string>(level_values, level_names);
23390    s.set_multiindex(mi);
23391
23392    auto gb = s.groupby_by_level(static_cast<size_t>(0), true);
23393    if (gb.group_keys_order().size() != 2)
23394        throw std::runtime_error("expected 2 groups");
23395    auto sums = gb.sum();
23396    if (sums[0] != 30.0 || sums[1] != 70.0)
23397        throw std::runtime_error("sum mismatch");
23398    if (!gb.get_index_name().has_value() || *gb.get_index_name() != "first")
23399        throw std::runtime_error("index name mismatch");
23400
23401    std::cout << " -> tests passed" << std::endl;
23402}
23403
23404void pd_test_groupby_level_multi() {
23405    std::cout << "========= groupby_by_level(multi) =====================";
23406
23407    pandas::Series<numpy::float64> s({1.0, 2.0, 3.0, 4.0});
23408    std::vector<std::vector<std::string>> level_values = {
get_index_name (pd_test_3_all.cpp:23398)
23388    std::vector<std::optional<std::string>> level_names = {"first", "second"};
23389    auto mi = pandas::MultiIndex::from_arrays<std::string>(level_values, level_names);
23390    s.set_multiindex(mi);
23391
23392    auto gb = s.groupby_by_level(static_cast<size_t>(0), true);
23393    if (gb.group_keys_order().size() != 2)
23394        throw std::runtime_error("expected 2 groups");
23395    auto sums = gb.sum();
23396    if (sums[0] != 30.0 || sums[1] != 70.0)
23397        throw std::runtime_error("sum mismatch");
23398    if (!gb.get_index_name().has_value() || *gb.get_index_name() != "first")
23399        throw std::runtime_error("index name mismatch");
23400
23401    std::cout << " -> tests passed" << std::endl;
23402}
23403
23404void pd_test_groupby_level_multi() {
23405    std::cout << "========= groupby_by_level(multi) =====================";
23406
23407    pandas::Series<numpy::float64> s({1.0, 2.0, 3.0, 4.0});
23408    std::vector<std::vector<std::string>> level_values = {
idxmin_with_dtype (pd_test_5_all.cpp:95397)
95387void case_701_dfgb_idxmin_rangeindex(int& local_fail) {
95388    std::cout << "-- case_701_dfgb_idxmin_rangeindex\n";
95389    // Default RangeIndex (int64). Result columns must keep int64 dtype.
95390    pandas::DataFrame df;
95391    df.add_column<double>("v", std::vector<double>{3.0, 1.0, 2.0, 0.5});
95392    df.add_column<int64_t>("key", std::vector<int64_t>{0, 0, 1, 1});
95393    auto gb = df.groupby("key");
95394    pandas::DataFrame out;
95395    std::string err;
95396    try { out = gb.idxmin_with_dtype(); }
95397    catch (const std::exception& e) { err = e.what(); }
95398    catch (...) { err = "<unknown>"; }
95399    pandas_tests::check(err.empty(),
95400        "C_26_case_701_dfgb_idxmin_rangeindex()_no_throw", local_fail);
95401    if (!err.empty()) { std::cout << "  err: " << err << "\n"; return; }
95402    std::string got = df_col_dtype(out, "v");
95403    bool ok = (got == "int64");
95404    pandas_tests::check(ok,
95405        "C_26_case_701_dfgb_idxmin_rangeindex()_dtype", local_fail);
95406    if (!ok) std::cout << "  got=[" << got << "] expected=[int64]\n";
last (pd_test_1_all.cpp:11617)
11607        void pd_test_groupby_first_last() {
11608            std::cout << "========= GroupBy first/last ====================";
11609
11610            std::map<std::string, std::vector<double>> data = {
11611                {"category", {1.0, 1.0, 2.0, 2.0}},
11612                {"value", {10.0, 20.0, 30.0, 40.0}}
11613            };
11614            pandas::DataFrame df(data);
11615
11616            auto first_result = df.groupby("category").first();
11617            auto last_result = df.groupby("category").last();
11618
11619            // First for group 1: 10, group 2: 30
11620            // Last for group 1: 20, group 2: 40
11621            double first1 = std::stod(first_result["value"].get_value_str(0));
11622            double first2 = std::stod(first_result["value"].get_value_str(1));
11623
11624            bool passed = ((std::abs(first1 - 10.0) < 0.001 && std::abs(first2 - 30.0) < 0.001) ||
11625                          (std::abs(first1 - 30.0) < 0.001 && std::abs(first2 - 10.0) < 0.001));
11626            if (!passed) {
11627                std::cout << "  [FAIL] : in pd_test_groupby_first_last() : first values incorrect" << std::endl;
set_index_name (pd_test_2_all.cpp:20842)
20832void test_sgb_apply_result_index_categorical() {
20833    std::cout << "  -- test_sgb_apply_result_index_categorical --" << std::endl;
20834
20835    std::vector<numpy::float64> values = {5.0, 10.0};
20836    pandas::Series<std::string> by({"A", "B"});
20837    pandas::Series<numpy::float64> data(values);
20838
20839    auto sgb = data.groupby(by);
20840    sgb.set_categorical_categories({"A", "B", "C"});
20841    sgb.set_index_name("cat_key");
20842
20843    pandas::Series<numpy::float64> result(values);
20844    std::vector<std::string> idx_labels = {"A", "B"};
20845    result.set_index(std::make_unique<pandas::Index<std::string>>(idx_labels));
20846
20847    sgb.apply_result_index(result);
20848
20849    // Should have CategoricalIndex (dtype_name() returns "category")
20850    check(result.index().dtype_name() == "category", "is_categorical_index");
20851}
count (pd_test_1_all.cpp:66)
56        if (arr.is_na(0)) {
57            std::cout << "  [FAIL] : in pd_test_boolean_array_na_handling() : is_na(0) should be false" << std::endl;
58            throw std::runtime_error("pd_test_boolean_array_na_handling failed: is_na(0) should be false");
59        }
60
61        if (!arr.has_na()) {
62            std::cout << "  [FAIL] : in pd_test_boolean_array_na_handling() : has_na() should be true" << std::endl;
63            throw std::runtime_error("pd_test_boolean_array_na_handling failed: has_na() should be true");
64        }
65
66        if (arr.count() != 2) {
67            std::cout << "  [FAIL] : in pd_test_boolean_array_na_handling() : count() should be 2" << std::endl;
68            throw std::runtime_error("pd_test_boolean_array_na_handling failed: count() should be 2");
69        }
70
71        std::cout << " -> tests passed" << std::endl;
72    }
73
74    void pd_test_boolean_array_kleene_and() {
75        std::cout << "========= BooleanArray: Kleene AND ======================= ";
cummax (pd_test_1_all.cpp:5152)
5142            // cummin: [1, 1, 1, 1]
5143            auto cmin = df.cummin();
5144            val = cmin["A"].get_value_str(3);
5145            passed = std::abs(std::stod(val) - 1.0) < 0.001;
5146            if (!passed) {
5147                std::cout << "  [FAIL] : in pd_test_arithmetic_dataframe_cumulative() : cummin failed" << std::endl;
5148                throw std::runtime_error("pd_test_arithmetic_dataframe_cumulative failed: cummin failed");
5149            }
5150
5151            // cummax: [1, 2, 3, 4]
5152            auto cmax = df.cummax();
5153            val = cmax["A"].get_value_str(2);
5154            passed = std::abs(std::stod(val) - 3.0) < 0.001;
5155            if (!passed) {
5156                std::cout << "  [FAIL] : in pd_test_arithmetic_dataframe_cumulative() : cummax failed" << std::endl;
5157                throw std::runtime_error("pd_test_arithmetic_dataframe_cumulative failed: cummax failed");
5158            }
5159
5160            std::cout << " -> tests passed" << std::endl;
5161        }
cummin (pd_test_1_all.cpp:5143)
5133            // cumprod: [1, 2, 6, 24]
5134            auto cp = df.cumprod();
5135            val = cp["A"].get_value_str(3);
5136            passed = std::abs(std::stod(val) - 24.0) < 0.001;
5137            if (!passed) {
5138                std::cout << "  [FAIL] : in pd_test_arithmetic_dataframe_cumulative() : cumprod failed" << std::endl;
5139                throw std::runtime_error("pd_test_arithmetic_dataframe_cumulative failed: cumprod failed");
5140            }
5141
5142            // cummin: [1, 1, 1, 1]
5143            auto cmin = df.cummin();
5144            val = cmin["A"].get_value_str(3);
5145            passed = std::abs(std::stod(val) - 1.0) < 0.001;
5146            if (!passed) {
5147                std::cout << "  [FAIL] : in pd_test_arithmetic_dataframe_cumulative() : cummin failed" << std::endl;
5148                throw std::runtime_error("pd_test_arithmetic_dataframe_cumulative failed: cummin failed");
5149            }
5150
5151            // cummax: [1, 2, 3, 4]
5152            auto cmax = df.cummax();
5153            val = cmax["A"].get_value_str(2);
cumprod (pd_test_1_all.cpp:5134)
5124            // cumsum: [1, 3, 6, 10]
5125            auto cs = df.cumsum();
5126            std::string val = cs["A"].get_value_str(2);
5127            bool passed = std::abs(std::stod(val) - 6.0) < 0.001;
5128            if (!passed) {
5129                std::cout << "  [FAIL] : in pd_test_arithmetic_dataframe_cumulative() : cumsum failed" << std::endl;
5130                throw std::runtime_error("pd_test_arithmetic_dataframe_cumulative failed: cumsum failed");
5131            }
5132
5133            // cumprod: [1, 2, 6, 24]
5134            auto cp = df.cumprod();
5135            val = cp["A"].get_value_str(3);
5136            passed = std::abs(std::stod(val) - 24.0) < 0.001;
5137            if (!passed) {
5138                std::cout << "  [FAIL] : in pd_test_arithmetic_dataframe_cumulative() : cumprod failed" << std::endl;
5139                throw std::runtime_error("pd_test_arithmetic_dataframe_cumulative failed: cumprod failed");
5140            }
5141
5142            // cummin: [1, 1, 1, 1]
5143            auto cmin = df.cummin();
5144            val = cmin["A"].get_value_str(3);
cumsum (pd_test_1_all.cpp:5125)
5115        }
5116
5117        void pd_test_arithmetic_dataframe_cumulative() {
5118            std::cout << "========= DataFrame cumulative ==================";
5119
5120            std::map<std::string, std::vector<double>> data;
5121            data["A"] = {1.0, 2.0, 3.0, 4.0};
5122            pandas::DataFrame df(data);
5123
5124            // cumsum: [1, 3, 6, 10]
5125            auto cs = df.cumsum();
5126            std::string val = cs["A"].get_value_str(2);
5127            bool passed = std::abs(std::stod(val) - 6.0) < 0.001;
5128            if (!passed) {
5129                std::cout << "  [FAIL] : in pd_test_arithmetic_dataframe_cumulative() : cumsum failed" << std::endl;
5130                throw std::runtime_error("pd_test_arithmetic_dataframe_cumulative failed: cumsum failed");
5131            }
5132
5133            // cumprod: [1, 2, 6, 24]
5134            auto cp = df.cumprod();
5135            val = cp["A"].get_value_str(3);
max (pd_test_1_all.cpp:771)
761        pandas::CategoricalArray arr = pandas::CategoricalArray::from_codes(codes, cats, true);  // ordered
762
763        // Test min
764        std::optional<std::string> min_val = arr.min();
765        if (!min_val.has_value() || *min_val != "low") {
766            std::cout << "  [FAIL] : in pd_test_categorical_array_ordered_operations() : min != 'low'" << std::endl;
767            throw std::runtime_error("pd_test_categorical_array_ordered_operations failed: min != 'low'");
768        }
769
770        // Test max
771        std::optional<std::string> max_val = arr.max();
772        if (!max_val.has_value() || *max_val != "high") {
773            std::cout << "  [FAIL] : in pd_test_categorical_array_ordered_operations() : max != 'high'" << std::endl;
774            throw std::runtime_error("pd_test_categorical_array_ordered_operations failed: max != 'high'");
775        }
776
777        // Test unordered throws for min/max
778        pandas::CategoricalArray unordered = arr.as_unordered();
779        bool threw = false;
780        try {
781            unordered.min();
mean (pd_test_1_all.cpp:282)
272            std::optional<bool>(true),
273            std::optional<bool>(true)
274        });
275
276        auto s = arr.sum();
277        if (!s.has_value() || s.value() != 3) {
278            std::cout << "  [FAIL] : in pd_test_boolean_array_reductions() : sum should be 3" << std::endl;
279            throw std::runtime_error("pd_test_boolean_array_reductions failed: sum");
280        }
281
282        auto m = arr.mean();
283        if (!m.has_value() || std::abs(m.value() - 0.75) > 0.001) {
284            std::cout << "  [FAIL] : in pd_test_boolean_array_reductions() : mean should be 0.75" << std::endl;
285            throw std::runtime_error("pd_test_boolean_array_reductions failed: mean");
286        }
287
288        std::cout << " -> tests passed" << std::endl;
289    }
290
291    void pd_test_boolean_array_dtype() {
292        std::cout << "========= BooleanArray: dtype ======================= ";
median (pd_test_1_all.cpp:20910)
20900                throw std::runtime_error("pd_test_expanding_var failed: expanding var values incorrect");
20901            }
20902
20903            std::cout << " -> tests passed" << std::endl;
20904        }
20905
20906        void pd_test_expanding_median() {
20907            std::cout << "========= Expanding median ======================";
20908
20909            pandas::Series<double> s({1.0, 2.0, 3.0, 4.0, 5.0});
20910            auto result = s.expanding().median();
20911
20912            // Expanding median: 1, 1.5, 2, 2.5, 3
20913            bool passed = std::abs(result[0] - 1.0) < 0.001 &&
20914                          std::abs(result[1] - 1.5) < 0.001 &&
20915                          std::abs(result[2] - 2.0) < 0.001 &&
20916                          std::abs(result[3] - 2.5) < 0.001 &&
20917                          std::abs(result[4] - 3.0) < 0.001;
20918            if (!passed) {
20919                std::cout << "  [FAIL] : in pd_test_expanding_median() : expanding median values incorrect" << std::endl;
20920                throw std::runtime_error("pd_test_expanding_median failed: expanding median values incorrect");
min (pd_test_1_all.cpp:764)
754    }
755
756    void pd_test_categorical_array_ordered_operations() {
757        std::cout << "========= CategoricalArray: ordered operations (min/max) ======================= ";
758
759        std::vector<std::string> cats = {"low", "medium", "high"};
760        std::vector<numpy::int32> codes = {0, 2, 1, 0, -1};  // low, high, medium, low, NA
761        pandas::CategoricalArray arr = pandas::CategoricalArray::from_codes(codes, cats, true);  // ordered
762
763        // Test min
764        std::optional<std::string> min_val = arr.min();
765        if (!min_val.has_value() || *min_val != "low") {
766            std::cout << "  [FAIL] : in pd_test_categorical_array_ordered_operations() : min != 'low'" << std::endl;
767            throw std::runtime_error("pd_test_categorical_array_ordered_operations failed: min != 'low'");
768        }
769
770        // Test max
771        std::optional<std::string> max_val = arr.max();
772        if (!max_val.has_value() || *max_val != "high") {
773            std::cout << "  [FAIL] : in pd_test_categorical_array_ordered_operations() : max != 'high'" << std::endl;
774            throw std::runtime_error("pd_test_categorical_array_ordered_operations failed: max != 'high'");
nunique (pd_test_1_all.cpp:10604)
10594    std::cout << " -> tests passed" << std::endl;
10595}
10596
10597void pd_test_extension_index_nunique() {
10598    std::cout << "========= nunique =========================";
10599
10600    pandas::CategoricalArray arr({"a", "b", "a", "c", "b", std::nullopt});
10601    pandas::CategoricalIndex idx(arr);
10602
10603    bool passed = (idx.nunique(true) == 3 && idx.nunique(false) == 4);
10604    if (!passed) {
10605        std::cout << "  [FAIL] : in pd_test_extension_index_nunique() : nunique check failed" << std::endl;
10606        throw std::runtime_error("pd_test_extension_index_nunique failed");
10607    }
10608
10609    std::cout << " -> tests passed" << std::endl;
10610}
10611
10612void pd_test_extension_index_factorize() {
10613    std::cout << "========= factorize =========================";
std_ (pd_test_1_all.cpp:20752)
20742                throw std::runtime_error("pd_test_rolling_min_periods failed: with min_periods=1, idx 1 should be 3.0");
20743            }
20744
20745            std::cout << " -> tests passed" << std::endl;
20746        }
20747
20748        void pd_test_rolling_std() {
20749            std::cout << "========= Rolling std ===========================";
20750
20751            pandas::Series<double> s({1.0, 2.0, 3.0, 4.0, 5.0});
20752            auto result = s.rolling(3).std_();
20753
20754            // std([1,2,3]) = 1.0 (ddof=1)
20755            // std([2,3,4]) = 1.0
20756            // std([3,4,5]) = 1.0
20757            bool passed = std::abs(result[2] - 1.0) < 0.001;
20758            if (!passed) {
20759                std::cout << "  [FAIL] : in pd_test_rolling_std() : rolling std should be 1.0" << std::endl;
20760                throw std::runtime_error("pd_test_rolling_std failed: rolling std should be 1.0");
20761            }
sum (pd_test_1_all.cpp:276)
266        }
267
268        // Test sum/mean
269        pandas::BooleanArray arr({
270            std::optional<bool>(true),
271            std::optional<bool>(false),
272            std::optional<bool>(true),
273            std::optional<bool>(true)
274        });
275
276        auto s = arr.sum();
277        if (!s.has_value() || s.value() != 3) {
278            std::cout << "  [FAIL] : in pd_test_boolean_array_reductions() : sum should be 3" << std::endl;
279            throw std::runtime_error("pd_test_boolean_array_reductions failed: sum");
280        }
281
282        auto m = arr.mean();
283        if (!m.has_value() || std::abs(m.value() - 0.75) > 0.001) {
284            std::cout << "  [FAIL] : in pd_test_boolean_array_reductions() : mean should be 0.75" << std::endl;
285            throw std::runtime_error("pd_test_boolean_array_reductions failed: mean");
286        }
sum_int64_bool_ (pd_test_5_all.cpp:55457)
55447    check_col_dtype("caseG3", df, 0, "float64", local_fail);
55448}
55449
55450static void f_seriesgroupby_agg_list_dtype_12_b3d2f7_caseH1_bool_sum(int& local_fail) {
55451    std::cout << "-- caseH1_bool_sum\n";
55452    pandas::Series<bool> v({true, false, true, true});
55453    pandas::Series<std::string> by({"a", "a", "b", "b"});
55454    auto sgb = v.groupby(by);
55455    pandas::DataFrame df = sgb.agg(std::vector<std::string>{"sum"});
55456    check_ncols("caseH1", df, 1, local_fail);
55457    // Plan 21: pandas widens bool sum to int64 — fixed via sum_int64_bool_().
55458    check_col_dtype("caseH1", df, 0, "int64", local_fail);
55459}
55460
55461static void f_seriesgroupby_agg_list_dtype_12_b3d2f7_caseH2_bool_first(int& local_fail) {
55462    std::cout << "-- caseH2_bool_first\n";
55463    pandas::Series<bool> v({true, false, true, true});
55464    pandas::Series<std::string> by({"a", "a", "b", "b"});
55465    auto sgb = v.groupby(by);
55466    pandas::DataFrame df = sgb.agg(std::vector<std::string>{"first"});
55467    check_ncols("caseH2", df, 1, local_fail);
var (pd_test_1_all.cpp:20890)
20880                throw std::runtime_error("pd_test_expanding_std failed: expanding std values incorrect");
20881            }
20882
20883            std::cout << " -> tests passed" << std::endl;
20884        }
20885
20886        void pd_test_expanding_var() {
20887            std::cout << "========= Expanding var =========================";
20888
20889            pandas::Series<double> s({1.0, 2.0, 3.0, 4.0, 5.0});
20890            auto result = s.expanding().var();
20891
20892            // Expanding var (ddof=1): NaN, 0.5, 1.0, 1.6667, 2.5
20893            bool passed = std::isnan(result[0]) &&
20894                          std::abs(result[1] - 0.5) < 0.001 &&
20895                          std::abs(result[2] - 1.0) < 0.001 &&
20896                          std::abs(result[3] - 1.6667) < 0.001 &&
20897                          std::abs(result[4] - 2.5) < 0.001;
20898            if (!passed) {
20899                std::cout << "  [FAIL] : in pd_test_expanding_var() : expanding var values incorrect" << std::endl;
20900                throw std::runtime_error("pd_test_expanding_var failed: expanding var values incorrect");
agg (pd_test_1_all.cpp:11100)
11090        }
11091
11092        void pd_test_func_apply_series_agg() {
11093            std::cout << "========= Series agg ==================================";
11094
11095            pandas::Series<double> s({1.0, 2.0, 3.0, 4.0, 5.0}, "values");
11096
11097            bool passed = true;
11098
11099            // Test string-based aggregation
11100            auto sum_result = s.agg("sum");
11101            if (!sum_result.has_value() || !approx_equal(sum_result.value(), 15.0)) {
11102                passed = false;
11103                std::cout << "  [FAIL] : in pd_test_func_apply_series_agg() : sum failed" << std::endl;
11104                throw std::runtime_error("pd_test_func_apply_series_agg failed: sum failed");
11105            }
11106
11107            auto mean_result = s.agg("mean");
11108            if (!mean_result.has_value() || !approx_equal(mean_result.value(), 3.0)) {
11109                passed = false;
11110                std::cout << "  [FAIL] : in pd_test_func_apply_series_agg() : mean failed" << std::endl;
agg (pd_test_1_all.cpp:11100)
11090        }
11091
11092        void pd_test_func_apply_series_agg() {
11093            std::cout << "========= Series agg ==================================";
11094
11095            pandas::Series<double> s({1.0, 2.0, 3.0, 4.0, 5.0}, "values");
11096
11097            bool passed = true;
11098
11099            // Test string-based aggregation
11100            auto sum_result = s.agg("sum");
11101            if (!sum_result.has_value() || !approx_equal(sum_result.value(), 15.0)) {
11102                passed = false;
11103                std::cout << "  [FAIL] : in pd_test_func_apply_series_agg() : sum failed" << std::endl;
11104                throw std::runtime_error("pd_test_func_apply_series_agg failed: sum failed");
11105            }
11106
11107            auto mean_result = s.agg("mean");
11108            if (!mean_result.has_value() || !approx_equal(mean_result.value(), 3.0)) {
11109                passed = false;
11110                std::cout << "  [FAIL] : in pd_test_func_apply_series_agg() : mean failed" << std::endl;
agg_with_dtype (pd_test_5_all.cpp:94652)
94642static void run_dfgb_case(const std::string& fn,
94643                          const std::string& col,
94644                          const std::string& expected_dtype,
94645                          const std::string& label,
94646                          int& local_fail) {
94647    pandas::DataFrame df = make_mixed_df();
94648    auto gb = df.groupby("key");
94649    pandas::DataFrame out;
94650    std::string err;
94651    try {
94652        out = gb.agg_with_dtype(fn);
94653    } catch (const std::exception& e) {
94654        err = e.what();
94655    } catch (...) {
94656        err = "<unknown>";
94657    }
94658    pandas_tests::check(err.empty(),
94659        label + "_no_throw",
94660        local_fail);
94661    if (!err.empty()) {
94662        std::cout << "  err: " << err << "\n";
agg_with_dtype_list (pd_test_5_all.cpp:94682)
94672static void run_dfgb_list_case(const std::vector<std::string>& fns,
94673                               const std::string& src_col,
94674                               const std::vector<std::string>& expected,
94675                               const std::string& label,
94676                               int& local_fail) {
94677    pandas::DataFrame df = make_mixed_df();
94678    auto gb = df.groupby("key");
94679    pandas::DataFrame out;
94680    std::string err;
94681    try {
94682        out = gb.agg_with_dtype_list(fns);
94683    } catch (const std::exception& e) {
94684        err = e.what();
94685    } catch (...) {
94686        err = "<unknown>";
94687    }
94688    pandas_tests::check(err.empty(),
94689        label + "_no_throw",
94690        local_fail);
94691    if (!err.empty()) {
94692        std::cout << "  err: " << err << "\n";
apply (pd_test_1_all.cpp:11244)
11234        void pd_test_func_apply_dataframe_apply_axis0() {
11235            std::cout << "========= DataFrame apply axis=0 ======================";
11236
11237            std::map<std::string, std::vector<double>> data = {
11238                {"A", {1.0, 2.0, 3.0}},
11239                {"B", {4.0, 5.0, 6.0}}
11240            };
11241            pandas::DataFrame df(data);
11242
11243            // apply axis=0 applies function to each column
11244            auto result = df.apply([](const std::vector<double>& col) {
11245                return std::accumulate(col.begin(), col.end(), 0.0);
11246            }, 0);
11247
11248            bool passed = true;
11249
11250            // Plan F·dtype: axis=0 reduce now returns a single "result" column
11251            // with the original column names ("A", "B") as the row index.
11252            // Sum of A: 1+2+3=6, Sum of B: 4+5+6=15
11253            const auto& result_col = result["result"];
11254            double sum_a = std::stod(result_col.get_value_str(0));
apply_result_index (pd_test_2_all.cpp:20781)
20771    pandas::Series<std::string> by(keys);
20772    pandas::Series<numpy::float64> data(values);
20773
20774    auto sgb = data.groupby(by);
20775    sgb.set_multiindex_names({"level0", "level1"});
20776
20777    // Create a "result" series with composite index
20778    pandas::Series<numpy::float64> result(values);
20779    result.set_index(std::make_unique<pandas::Index<std::string>>(keys));
20780
20781    sgb.apply_result_index(result);
20782
20783    // Should now have a MultiIndex
20784    check(result.has_multiindex(), "has_multiindex");
20785    check(result.multiindex().nlevels() == 2, "nlevels_2");
20786}
20787
20788void test_sgb_apply_result_index_3level() {
20789    std::cout << "  -- test_sgb_apply_result_index_3level --" << std::endl;
20790
20791    using std::string;
transform (pd_test_1_all.cpp:11071)
11061            std::cout << " -> tests passed" << std::endl;
11062        }
11063
11064        void pd_test_func_apply_series_transform() {
11065            std::cout << "========= Series transform ============================";
11066
11067            pandas::Series<double> s({1.0, 2.0, 3.0, 4.0}, "values");
11068
11069            // Transform must return same shape
11070            auto result = s.transform([](double x) { return x * 2 + 1; });
11071
11072            bool passed = true;
11073            if (result.size() != s.size()) {
11074                passed = false;
11075                std::cout << "  [FAIL] : in pd_test_func_apply_series_transform() : size changed" << std::endl;
11076                throw std::runtime_error("pd_test_func_apply_series_transform failed: size changed");
11077            }
11078
11079            std::vector<double> expected = {3.0, 5.0, 7.0, 9.0};
11080            for (size_t i = 0; i < result.size(); ++i) {
multiindex_names (pd_test_3_all.cpp:23419)
23409        {"a", "a", "b", "b"}, {"x", "y", "x", "y"}
23410    };
23411    std::vector<std::optional<std::string>> level_names = {"L0", "L1"};
23412    auto mi = pandas::MultiIndex::from_arrays<std::string>(level_values, level_names);
23413    s.set_multiindex(mi);
23414
23415    std::vector<size_t> levels = {0, 1};
23416    auto gb = s.groupby_by_level(levels, true);
23417    if (gb.group_keys_order().size() != 4)
23418        throw std::runtime_error("expected 4 composite groups");
23419    if (gb.multiindex_names().size() != 2 || gb.multiindex_names()[0] != "L0" || gb.multiindex_names()[1] != "L1")
23420        throw std::runtime_error("multiindex names mismatch");
23421
23422    std::cout << " -> tests passed" << std::endl;
23423}
23424
23425void pd_test_groupby_by_index() {
23426    std::cout << "========= groupby_by_index() ==========================";
23427
23428    pandas::Series<numpy::float64> s({10.0, 20.0, 30.0});
23429    s.set_index(pandas::Index<std::string>({"a", "b", "a"}));
diff (pd_test_1_all.cpp:5171)
5161        }
5162
5163        void pd_test_arithmetic_dataframe_diff_shift() {
5164            std::cout << "========= DataFrame diff/shift ==================";
5165
5166            std::map<std::string, std::vector<double>> data;
5167            data["A"] = {1.0, 3.0, 6.0, 10.0};
5168            pandas::DataFrame df(data);
5169
5170            // diff: [NaN, 2, 3, 4]
5171            auto d = df.diff();
5172            std::string val = d["A"].get_value_str(1);
5173            bool passed = std::abs(std::stod(val) - 2.0) < 0.001;
5174            if (!passed) {
5175                std::cout << "  [FAIL] : in pd_test_arithmetic_dataframe_diff_shift() : diff failed" << std::endl;
5176                throw std::runtime_error("pd_test_arithmetic_dataframe_diff_shift failed: diff failed");
5177            }
5178
5179            // First element should be NaN
5180            val = d["A"].get_value_str(0);
5181            passed = std::isnan(std::stod(val));
shift (pd_test_1_all.cpp:5188)
5178            // First element should be NaN
5179            val = d["A"].get_value_str(0);
5180            passed = std::isnan(std::stod(val));
5181            if (!passed) {
5182                std::cout << "  [FAIL] : in pd_test_arithmetic_dataframe_diff_shift() : diff NaN failed" << std::endl;
5183                throw std::runtime_error("pd_test_arithmetic_dataframe_diff_shift failed: diff NaN failed");
5184            }
5185
5186            // shift: [NaN, 1, 3, 6]
5187            auto s = df.shift();
5188            val = s["A"].get_value_str(1);
5189            passed = std::abs(std::stod(val) - 1.0) < 0.001;
5190            if (!passed) {
5191                std::cout << "  [FAIL] : in pd_test_arithmetic_dataframe_diff_shift() : shift failed" << std::endl;
5192                throw std::runtime_error("pd_test_arithmetic_dataframe_diff_shift failed: shift failed");
5193            }
5194
5195            std::cout << " -> tests passed" << std::endl;
5196        }
categorical_categories (pd_test_3_all.cpp:23513)
23503    pandas::CategoricalArray cat({"a", "b", "a"}, {"a", "b", "c"});
23504
23505    auto gb_obs = s.groupby_by_categorical(cat, true, true);
23506    if (gb_obs.group_keys_order().size() != 2)
23507        throw std::runtime_error("expected 2 observed groups");
23508
23509    auto gb_all = s.groupby_by_categorical(cat, true, false);
23510    if (gb_all.group_keys_order().size() != 3)
23511        throw std::runtime_error("expected 3 groups with observed=false");
23512
23513    if (gb_obs.categorical_categories().size() != 3)
23514        throw std::runtime_error("categorical_categories not set");
23515
23516    std::cout << " -> tests passed" << std::endl;
23517}
23518
23519void pd_test_groupby_by_labels() {
23520    std::cout << "========= groupby_by_labels() =========================";
23521
23522    pandas::Series<numpy::float64> s({1.0, 2.0, 3.0, 4.0});
23523    std::vector<std::string> labels = {"X", "Y", "X", "Y"};
group_keys_order (pd_test_3_all.cpp:23393)
23383    pandas::Series<numpy::float64> s({10.0, 20.0, 30.0, 40.0});
23384    std::vector<std::vector<std::string>> level_values = {
23385        {"a", "a", "b", "b"}, {"x", "y", "x", "y"}
23386    };
23387    std::vector<std::optional<std::string>> level_names = {"first", "second"};
23388    auto mi = pandas::MultiIndex::from_arrays<std::string>(level_values, level_names);
23389    s.set_multiindex(mi);
23390
23391    auto gb = s.groupby_by_level(static_cast<size_t>(0), true);
23392    if (gb.group_keys_order().size() != 2)
23393        throw std::runtime_error("expected 2 groups");
23394    auto sums = gb.sum();
23395    if (sums[0] != 30.0 || sums[1] != 70.0)
23396        throw std::runtime_error("sum mismatch");
23397    if (!gb.get_index_name().has_value() || *gb.get_index_name() != "first")
23398        throw std::runtime_error("index name mismatch");
23399
23400    std::cout << " -> tests passed" << std::endl;
23401}
grouper_dtype (pd_test_3_all.cpp:23493)
23483    std::cout << "========= groupby_by_numeric() ========================";
23484
23485    pandas::Series<numpy::float64> s({10.0, 20.0, 30.0, 40.0});
23486    pandas::Series<numpy::float64> by_s({1.0, 2.0, 1.0, 2.0});
23487    auto gb = s.groupby_by_numeric(by_s, true);
23488    if (gb.group_keys_order().size() != 2)
23489        throw std::runtime_error("expected 2 groups");
23490    auto sums = gb.sum();
23491    if (sums[0] != 40.0 || sums[1] != 60.0)
23492        throw std::runtime_error("sum mismatch");
23493    if (gb.grouper_dtype() != "float64")
23494        throw std::runtime_error("grouper_dtype mismatch");
23495
23496    std::cout << " -> tests passed" << std::endl;
23497}
23498
23499void pd_test_groupby_by_categorical() {
23500    std::cout << "========= groupby_by_categorical() ====================";
23501
23502    pandas::Series<numpy::float64> s({10.0, 20.0, 30.0});
23503    pandas::CategoricalArray cat({"a", "b", "a"}, {"a", "b", "c"});
groups (pd_test_2_all.cpp:20864)
20854// =====================================================================
20855// Per-group expanding tests
20856// =====================================================================
20857
20858void test_series_groupby_expanding_sum() {
20859    std::cout << "  -- test_series_groupby_expanding_sum --" << std::endl;
20860
20861    // Two groups: A=[1,2,3], B=[10,20]
20862    std::vector<numpy::float64> vals = {1.0, 10.0, 2.0, 20.0, 3.0};
20863    pandas::Series<numpy::float64> data(vals);
20864    pandas::Series<std::string> groups({"A", "B", "A", "B", "A"});
20865
20866    auto sgb = data.groupby(groups);
20867    pandas::SeriesGroupByExpandingWindow ew(sgb, 1);
20868    auto result = ew.sum();
20869
20870    check(result.size() == 5, "size_5");
20871    // A group: expanding sum = 1, 3, 6
20872    // B group: expanding sum = 10, 30
20873    // Original order: [A:1, B:10, A:3, B:30, A:6]
20874    check(approx_eq(result[0], 1.0), "A_exp_sum_0");
indices (pd_test_1_all.cpp:14921)
14911                passed = passed && r2_tup1[0] == "b" && r2_tup1[1] == "x";
14912                passed = passed && r2_tup2[0] == "c" && r2_tup2[1] == "x";
14913            }
14914
14915            // Test empty vector (no deletion)
14916            std::cout << "  Test 3: Empty delete_(std::vector<size_t>{})..." << std::endl;
14917            auto result3 = mi.delete_(std::vector<size_t>{});
14918            std::cout << "    Result size: " << result3.size() << " (expected " << mi.size() << ")" << std::endl;
14919            passed = passed && result3.size() == mi.size();
14920
14921            // Test duplicate indices (should be deduplicated)
14922            std::cout << "  Test 4: Duplicate delete_({1, 1, 2})..." << std::endl;
14923            auto result4 = mi.delete_({1, 1, 2});
14924            std::cout << "    Result size: " << result4.size() << " (expected 3)" << std::endl;
14925            passed = passed && result4.size() == 3;
14926
14927            // Test deleting all elements
14928            std::cout << "  Test 5: Delete all delete_({0,1,2,3,4})..." << std::endl;
14929            auto result5 = mi.delete_({0, 1, 2, 3, 4});
14930            std::cout << "    Result size: " << result5.size() << " (expected 0)" << std::endl;
14931            passed = passed && result5.size() == 0;
ngroups (pd_test_1_all.cpp:11497)
11487            // Create DataFrame with category column
11488            std::map<std::string, std::vector<double>> data = {
11489                {"category", {1.0, 1.0, 2.0, 2.0, 2.0}},
11490                {"value", {10.0, 20.0, 30.0, 40.0, 50.0}}
11491            };
11492            pandas::DataFrame df(data);
11493
11494            // Test groupby
11495            auto grouped = df.groupby("category");
11496
11497            bool passed = grouped.ngroups() == 2;
11498            if (!passed) {
11499                std::cout << "  [FAIL] : in pd_test_groupby_basic() : ngroups should be 2" << std::endl;
11500                throw std::runtime_error("pd_test_groupby_basic failed: ngroups should be 2");
11501            }
11502
11503            std::cout << " -> tests passed" << std::endl;
11504        }
11505
11506        void pd_test_groupby_multiple_columns() {
11507            std::cout << "========= GroupBy multiple columns ==============";
nth (pd_test_3_all.cpp:27491)
27481    check(result_cumsum["B"].get_value_double(1) == 2.0, "row 1 (bar) cumsum B = 2");
27482    check(result_cumsum["B"].get_value_double(3) == 6.0, "row 3 (bar) cumsum B = 6");
27483}
27484
27485void pd_test_gb_nth_basic() {
27486    std::cout << "  -- pd_test_gb_nth_basic --" << std::endl;
27487
27488    auto df = make_test_df();
27489    auto gb = df.groupby("A");
27490
27491    auto result = gb.nth(0);
27492    check(result.nrows() == 2, "nth(0) returns 2 rows (one per group)");
27493
27494    auto result_last = gb.nth(-1);
27495    check(result_last.nrows() == 2, "nth(-1) returns 2 rows");
27496
27497    auto result_multi = gb.nth(std::vector<int>{0, -1});
27498    check(result_multi.nrows() == 4, "nth([0,-1]) returns 4 rows");
27499}
27500
27501void pd_test_gb_nth_slice() {
series (pd_test_2_all.cpp:2307)
2297            std::vector<std::string> index = {"a", "b", "c", "d", "e"};
2298
2299            std::map<std::string, std::vector<numpy::float64>> data1;
2300            data1["col1"] = {1.0, 2.0, 3.0, 4.0, 5.0};
2301            data1["col2"] = {2.0, 4.0, 6.0, 8.0, 10.0};  // Perfectly correlated with col1
2302
2303            pandas::DataFrame df1(data1, std::make_unique<pandas::Index<std::string>>(index));
2304
2305            // Series with same index and values that correlate with df columns
2306            pandas::Series<numpy::float64> series({1.0, 2.0, 3.0, 4.0, 5.0});
2307            series.set_index(pandas::Index<std::string>(index));
2308
2309            pandas::Series<numpy::float64> result = df1.corrwith(series);
2310
2311            bool passed = true;
2312            // col1 should have correlation 1.0 with series
2313            if (!approx_equal(result[0], 1.0)) {
2314                std::cout << "\n  [FAIL] : Expected correlation 1.0 for col1, got " << result[0] << std::endl;
2315                passed = false;
2316            }
set_categorical_categories (pd_test_2_all.cpp:20841)
20831}
20832
20833void test_sgb_apply_result_index_categorical() {
20834    std::cout << "  -- test_sgb_apply_result_index_categorical --" << std::endl;
20835
20836    std::vector<numpy::float64> values = {5.0, 10.0};
20837    pandas::Series<std::string> by({"A", "B"});
20838    pandas::Series<numpy::float64> data(values);
20839
20840    auto sgb = data.groupby(by);
20841    sgb.set_categorical_categories({"A", "B", "C"});
20842    sgb.set_index_name("cat_key");
20843
20844    pandas::Series<numpy::float64> result(values);
20845    std::vector<std::string> idx_labels = {"A", "B"};
20846    result.set_index(std::make_unique<pandas::Index<std::string>>(idx_labels));
20847
20848    sgb.apply_result_index(result);
20849
20850    // Should have CategoricalIndex (dtype_name() returns "category")
20851    check(result.index().dtype_name() == "category", "is_categorical_index");
set_multiindex_names (pd_test_2_all.cpp:20775)
20765    // Simulate a 2-level groupby result with composite \x1f keys
20766    using std::string;
20767    string sep(1, '\x1f');
20768    std::vector<string> keys = {"A" + sep + "X", "A" + sep + "Y", "B" + sep + "X", "B" + sep + "Y"};
20769
20770    std::vector<numpy::float64> values = {1.0, 2.0, 3.0, 4.0};
20771    pandas::Series<std::string> by(keys);
20772    pandas::Series<numpy::float64> data(values);
20773
20774    auto sgb = data.groupby(by);
20775    sgb.set_multiindex_names({"level0", "level1"});
20776
20777    // Create a "result" series with composite index
20778    pandas::Series<numpy::float64> result(values);
20779    result.set_index(std::make_unique<pandas::Index<std::string>>(keys));
20780
20781    sgb.apply_result_index(result);
20782
20783    // Should now have a MultiIndex
20784    check(result.has_multiindex(), "has_multiindex");
20785    check(result.multiindex().nlevels() == 2, "nlevels_2");
size (pd_test_1_all.cpp:22)
12#include "../pandas/pd_boolean_array.h"
13
14namespace dataframe_tests {
15
16namespace dataframe_tests_boolean_array {
17    void pd_test_boolean_array_constructors() {
18        std::cout << "========= BooleanArray: constructors ======================= ";
19
20        // Default constructor
21        pandas::BooleanArray arr1;
22        if (arr1.size() != 0) {
23            std::cout << "  [FAIL] : in pd_test_boolean_array_constructors() : default constructor size != 0" << std::endl;
24            throw std::runtime_error("pd_test_boolean_array_constructors failed: default constructor size != 0");
25        }
26
27        // Initializer list constructor
28        pandas::BooleanArray arr2({
29            std::optional<bool>(true),
30            std::optional<bool>(false),
31            std::nullopt,
32            std::optional<bool>(true)