DataFrameResampler#

class pandas::DataFrameResampler#

Window operation class for rolling/expanding calculations.

Example#

#include <pandas/pandas.h>
using namespace pandas;

// Use DataFrameResampler
DataFrameResampler obj;
// ... operations ...

Constructors#

Signature

Location

Example

DataFrameResampler(const DataFrame& df, const std::string& freq, const std::string& closed = "", const std::string& label = "", const std::string& origin = "epoch", int64_t offset_nanos = 0)

pd_resampler.h:308

Indexing / Selection#

Signature

Return Type

Location

Example

DataFrame first() const

DataFrame

pd_resampler.h:361

View

std::vector<std::string> get_numeric_columns() const

std::vector<std::string>

pd_resampler.h:424

int64_t get_period_key(int64_t epoch_ns) const

int64_t

pd_resampler.h:422

DataFrame last() const

DataFrame

pd_resampler.h:364

View

Missing Data#

Signature

Return Type

Location

Example

DataFrame bfill() const

DataFrame

pd_resampler.h:387

View

DataFrame ffill() const

DataFrame

pd_resampler.h:377

View

Statistics#

Signature

Return Type

Location

Example

DataFrame count() const

DataFrame

pd_resampler.h:335

View

DataFrame max() const

DataFrame

pd_resampler.h:332

View

DataFrame mean() const

DataFrame

pd_resampler.h:330

View

DataFrame median() const

DataFrame

pd_resampler.h:336

View

DataFrame min() const

DataFrame

pd_resampler.h:331

View

DataFrame std_(int ddof = 1) const

DataFrame

pd_resampler.h:333

View

DataFrame sum() const

DataFrame

pd_resampler.h:329

View

DataFrame var(int ddof = 1) const

DataFrame

pd_resampler.h:334

View

Aggregation#

Signature

Return Type

Location

Example

DataFrame agg(const std::string& func_name) const

DataFrame

pd_resampler.h:342

View

DataFrame agg(const std::vector<std::string>& funcs) const

DataFrame

pd_resampler.h:348

View

DataFrame agg(const std::vector<std::pair<std::string, std::vector<std::string>>>& col_funcs) const

DataFrame

pd_resampler.h:354

View

std::vector<double> aggregate_column(size_t col_idx, const std::string& func) const

std::vector<double>

pd_resampler.h:427

DataFrame transform(const std::string& func_name) const

DataFrame

pd_resampler.h:384

View

Other Methods#

Signature

Return Type

Location

Example

void build_groups()

void

pd_resampler.h:421

const std::string& closed() const

const std::string&

pd_resampler.h:406

View

static double compute_agg(const std::vector<double>& values, const std::string& func, int ddof = 1)

static double

pd_resampler.h:430

View

const DataFrame& dataframe() const

const DataFrame&

pd_resampler.h:403

View

const std::string& freq() const

const std::string&

pd_resampler.h:400

View

const std::vector<int64_t>& group_keys() const

const std::vector<int64_t>&

pd_resampler.h:415

const std::unordered_map<int64_t, std::vector<size_t>>& groups() const

const std::unordered_map<int64_t, std::vector<size_t>>&

pd_resampler.h:412

View

const std::string& label() const

const std::string&

pd_resampler.h:409

View

size_t ngroups() const { return group_keys_order_.size()

size_t

pd_resampler.h:397

View

DataFrame ohlc() const

DataFrame

pd_resampler.h:370

View

int64_t period_key_to_timestamp(int64_t key) const

int64_t

pd_resampler.h:423

const std::vector<int64_t>& period_timestamps() const

const std::vector<int64_t>&

pd_resampler.h:418

DataFrame size() const

DataFrame

pd_resampler.h:394

View

Code Examples#

The following examples are extracted from the test suite.

first (pd_test_1_all.cpp:11616)
11606        void pd_test_groupby_first_last() {
11607            std::cout << "========= GroupBy first/last ====================";
11608
11609            std::map<std::string, std::vector<double>> data = {
11610                {"category", {1.0, 1.0, 2.0, 2.0}},
11611                {"value", {10.0, 20.0, 30.0, 40.0}}
11612            };
11613            pandas::DataFrame df(data);
11614
11615            auto first_result = df.groupby("category").first();
11616            auto last_result = df.groupby("category").last();
11617
11618            // First for group 1: 10, group 2: 30
11619            // Last for group 1: 20, group 2: 40
11620            double first1 = std::stod(first_result["value"].get_value_str(0));
11621            double first2 = std::stod(first_result["value"].get_value_str(1));
11622
11623            bool passed = ((std::abs(first1 - 10.0) < 0.001 && std::abs(first2 - 30.0) < 0.001) ||
11624                          (std::abs(first1 - 30.0) < 0.001 && std::abs(first2 - 10.0) < 0.001));
11625            if (!passed) {
last (pd_test_1_all.cpp:11617)
11607        void pd_test_groupby_first_last() {
11608            std::cout << "========= GroupBy first/last ====================";
11609
11610            std::map<std::string, std::vector<double>> data = {
11611                {"category", {1.0, 1.0, 2.0, 2.0}},
11612                {"value", {10.0, 20.0, 30.0, 40.0}}
11613            };
11614            pandas::DataFrame df(data);
11615
11616            auto first_result = df.groupby("category").first();
11617            auto last_result = df.groupby("category").last();
11618
11619            // First for group 1: 10, group 2: 30
11620            // Last for group 1: 20, group 2: 40
11621            double first1 = std::stod(first_result["value"].get_value_str(0));
11622            double first2 = std::stod(first_result["value"].get_value_str(1));
11623
11624            bool passed = ((std::abs(first1 - 10.0) < 0.001 && std::abs(first2 - 30.0) < 0.001) ||
11625                          (std::abs(first1 - 30.0) < 0.001 && std::abs(first2 - 10.0) < 0.001));
11626            if (!passed) {
11627                std::cout << "  [FAIL] : in pd_test_groupby_first_last() : first values incorrect" << std::endl;
bfill (pd_test_1_all.cpp:23603)
23593        std::cout << "====================================== [OK] pd_test_equals test suite ========================== " << std::endl;
23594        return 0;
23595    }
23596
23597} // namespace dataframe_tests
23598// ------------------- pd_test_equals.cpp (end) -----------------------------
23599
23600// ------------------- pd_test_ffill_bfill.cpp (start) -----------------------------
23601// dataframe_tests/pd_test_ffill_bfill.cpp
23602// Test file for DataFrame.ffill() and DataFrame.bfill() methods
23603
23604#include <iostream>
23605#include <stdexcept>
23606#include <cmath>
23607#include <limits>
23608#include <map>
23609#include "../pandas/pd_dataframe.h"
23610
23611// CRITICAL: No using namespace directives
ffill (pd_test_1_all.cpp:23603)
23593        std::cout << "====================================== [OK] pd_test_equals test suite ========================== " << std::endl;
23594        return 0;
23595    }
23596
23597} // namespace dataframe_tests
23598// ------------------- pd_test_equals.cpp (end) -----------------------------
23599
23600// ------------------- pd_test_ffill_bfill.cpp (start) -----------------------------
23601// dataframe_tests/pd_test_ffill_bfill.cpp
23602// Test file for DataFrame.ffill() and DataFrame.bfill() methods
23603
23604#include <iostream>
23605#include <stdexcept>
23606#include <cmath>
23607#include <limits>
23608#include <map>
23609#include "../pandas/pd_dataframe.h"
23610
23611// CRITICAL: No using namespace directives
count (pd_test_1_all.cpp:66)
56        if (arr.is_na(0)) {
57            std::cout << "  [FAIL] : in pd_test_boolean_array_na_handling() : is_na(0) should be false" << std::endl;
58            throw std::runtime_error("pd_test_boolean_array_na_handling failed: is_na(0) should be false");
59        }
60
61        if (!arr.has_na()) {
62            std::cout << "  [FAIL] : in pd_test_boolean_array_na_handling() : has_na() should be true" << std::endl;
63            throw std::runtime_error("pd_test_boolean_array_na_handling failed: has_na() should be true");
64        }
65
66        if (arr.count() != 2) {
67            std::cout << "  [FAIL] : in pd_test_boolean_array_na_handling() : count() should be 2" << std::endl;
68            throw std::runtime_error("pd_test_boolean_array_na_handling failed: count() should be 2");
69        }
70
71        std::cout << " -> tests passed" << std::endl;
72    }
73
74    void pd_test_boolean_array_kleene_and() {
75        std::cout << "========= BooleanArray: Kleene AND ======================= ";
max (pd_test_1_all.cpp:771)
761        pandas::CategoricalArray arr = pandas::CategoricalArray::from_codes(codes, cats, true);  // ordered
762
763        // Test min
764        std::optional<std::string> min_val = arr.min();
765        if (!min_val.has_value() || *min_val != "low") {
766            std::cout << "  [FAIL] : in pd_test_categorical_array_ordered_operations() : min != 'low'" << std::endl;
767            throw std::runtime_error("pd_test_categorical_array_ordered_operations failed: min != 'low'");
768        }
769
770        // Test max
771        std::optional<std::string> max_val = arr.max();
772        if (!max_val.has_value() || *max_val != "high") {
773            std::cout << "  [FAIL] : in pd_test_categorical_array_ordered_operations() : max != 'high'" << std::endl;
774            throw std::runtime_error("pd_test_categorical_array_ordered_operations failed: max != 'high'");
775        }
776
777        // Test unordered throws for min/max
778        pandas::CategoricalArray unordered = arr.as_unordered();
779        bool threw = false;
780        try {
781            unordered.min();
mean (pd_test_1_all.cpp:282)
272            std::optional<bool>(true),
273            std::optional<bool>(true)
274        });
275
276        auto s = arr.sum();
277        if (!s.has_value() || s.value() != 3) {
278            std::cout << "  [FAIL] : in pd_test_boolean_array_reductions() : sum should be 3" << std::endl;
279            throw std::runtime_error("pd_test_boolean_array_reductions failed: sum");
280        }
281
282        auto m = arr.mean();
283        if (!m.has_value() || std::abs(m.value() - 0.75) > 0.001) {
284            std::cout << "  [FAIL] : in pd_test_boolean_array_reductions() : mean should be 0.75" << std::endl;
285            throw std::runtime_error("pd_test_boolean_array_reductions failed: mean");
286        }
287
288        std::cout << " -> tests passed" << std::endl;
289    }
290
291    void pd_test_boolean_array_dtype() {
292        std::cout << "========= BooleanArray: dtype ======================= ";
median (pd_test_1_all.cpp:20910)
20900                throw std::runtime_error("pd_test_expanding_var failed: expanding var values incorrect");
20901            }
20902
20903            std::cout << " -> tests passed" << std::endl;
20904        }
20905
20906        void pd_test_expanding_median() {
20907            std::cout << "========= Expanding median ======================";
20908
20909            pandas::Series<double> s({1.0, 2.0, 3.0, 4.0, 5.0});
20910            auto result = s.expanding().median();
20911
20912            // Expanding median: 1, 1.5, 2, 2.5, 3
20913            bool passed = std::abs(result[0] - 1.0) < 0.001 &&
20914                          std::abs(result[1] - 1.5) < 0.001 &&
20915                          std::abs(result[2] - 2.0) < 0.001 &&
20916                          std::abs(result[3] - 2.5) < 0.001 &&
20917                          std::abs(result[4] - 3.0) < 0.001;
20918            if (!passed) {
20919                std::cout << "  [FAIL] : in pd_test_expanding_median() : expanding median values incorrect" << std::endl;
20920                throw std::runtime_error("pd_test_expanding_median failed: expanding median values incorrect");
min (pd_test_1_all.cpp:764)
754    }
755
756    void pd_test_categorical_array_ordered_operations() {
757        std::cout << "========= CategoricalArray: ordered operations (min/max) ======================= ";
758
759        std::vector<std::string> cats = {"low", "medium", "high"};
760        std::vector<numpy::int32> codes = {0, 2, 1, 0, -1};  // low, high, medium, low, NA
761        pandas::CategoricalArray arr = pandas::CategoricalArray::from_codes(codes, cats, true);  // ordered
762
763        // Test min
764        std::optional<std::string> min_val = arr.min();
765        if (!min_val.has_value() || *min_val != "low") {
766            std::cout << "  [FAIL] : in pd_test_categorical_array_ordered_operations() : min != 'low'" << std::endl;
767            throw std::runtime_error("pd_test_categorical_array_ordered_operations failed: min != 'low'");
768        }
769
770        // Test max
771        std::optional<std::string> max_val = arr.max();
772        if (!max_val.has_value() || *max_val != "high") {
773            std::cout << "  [FAIL] : in pd_test_categorical_array_ordered_operations() : max != 'high'" << std::endl;
774            throw std::runtime_error("pd_test_categorical_array_ordered_operations failed: max != 'high'");
std_ (pd_test_1_all.cpp:20752)
20742                throw std::runtime_error("pd_test_rolling_min_periods failed: with min_periods=1, idx 1 should be 3.0");
20743            }
20744
20745            std::cout << " -> tests passed" << std::endl;
20746        }
20747
20748        void pd_test_rolling_std() {
20749            std::cout << "========= Rolling std ===========================";
20750
20751            pandas::Series<double> s({1.0, 2.0, 3.0, 4.0, 5.0});
20752            auto result = s.rolling(3).std_();
20753
20754            // std([1,2,3]) = 1.0 (ddof=1)
20755            // std([2,3,4]) = 1.0
20756            // std([3,4,5]) = 1.0
20757            bool passed = std::abs(result[2] - 1.0) < 0.001;
20758            if (!passed) {
20759                std::cout << "  [FAIL] : in pd_test_rolling_std() : rolling std should be 1.0" << std::endl;
20760                throw std::runtime_error("pd_test_rolling_std failed: rolling std should be 1.0");
20761            }
sum (pd_test_1_all.cpp:276)
266        }
267
268        // Test sum/mean
269        pandas::BooleanArray arr({
270            std::optional<bool>(true),
271            std::optional<bool>(false),
272            std::optional<bool>(true),
273            std::optional<bool>(true)
274        });
275
276        auto s = arr.sum();
277        if (!s.has_value() || s.value() != 3) {
278            std::cout << "  [FAIL] : in pd_test_boolean_array_reductions() : sum should be 3" << std::endl;
279            throw std::runtime_error("pd_test_boolean_array_reductions failed: sum");
280        }
281
282        auto m = arr.mean();
283        if (!m.has_value() || std::abs(m.value() - 0.75) > 0.001) {
284            std::cout << "  [FAIL] : in pd_test_boolean_array_reductions() : mean should be 0.75" << std::endl;
285            throw std::runtime_error("pd_test_boolean_array_reductions failed: mean");
286        }
var (pd_test_1_all.cpp:20890)
20880                throw std::runtime_error("pd_test_expanding_std failed: expanding std values incorrect");
20881            }
20882
20883            std::cout << " -> tests passed" << std::endl;
20884        }
20885
20886        void pd_test_expanding_var() {
20887            std::cout << "========= Expanding var =========================";
20888
20889            pandas::Series<double> s({1.0, 2.0, 3.0, 4.0, 5.0});
20890            auto result = s.expanding().var();
20891
20892            // Expanding var (ddof=1): NaN, 0.5, 1.0, 1.6667, 2.5
20893            bool passed = std::isnan(result[0]) &&
20894                          std::abs(result[1] - 0.5) < 0.001 &&
20895                          std::abs(result[2] - 1.0) < 0.001 &&
20896                          std::abs(result[3] - 1.6667) < 0.001 &&
20897                          std::abs(result[4] - 2.5) < 0.001;
20898            if (!passed) {
20899                std::cout << "  [FAIL] : in pd_test_expanding_var() : expanding var values incorrect" << std::endl;
20900                throw std::runtime_error("pd_test_expanding_var failed: expanding var values incorrect");
agg (pd_test_1_all.cpp:11100)
11090        }
11091
11092        void pd_test_func_apply_series_agg() {
11093            std::cout << "========= Series agg ==================================";
11094
11095            pandas::Series<double> s({1.0, 2.0, 3.0, 4.0, 5.0}, "values");
11096
11097            bool passed = true;
11098
11099            // Test string-based aggregation
11100            auto sum_result = s.agg("sum");
11101            if (!sum_result.has_value() || !approx_equal(sum_result.value(), 15.0)) {
11102                passed = false;
11103                std::cout << "  [FAIL] : in pd_test_func_apply_series_agg() : sum failed" << std::endl;
11104                throw std::runtime_error("pd_test_func_apply_series_agg failed: sum failed");
11105            }
11106
11107            auto mean_result = s.agg("mean");
11108            if (!mean_result.has_value() || !approx_equal(mean_result.value(), 3.0)) {
11109                passed = false;
11110                std::cout << "  [FAIL] : in pd_test_func_apply_series_agg() : mean failed" << std::endl;
agg (pd_test_1_all.cpp:11100)
11090        }
11091
11092        void pd_test_func_apply_series_agg() {
11093            std::cout << "========= Series agg ==================================";
11094
11095            pandas::Series<double> s({1.0, 2.0, 3.0, 4.0, 5.0}, "values");
11096
11097            bool passed = true;
11098
11099            // Test string-based aggregation
11100            auto sum_result = s.agg("sum");
11101            if (!sum_result.has_value() || !approx_equal(sum_result.value(), 15.0)) {
11102                passed = false;
11103                std::cout << "  [FAIL] : in pd_test_func_apply_series_agg() : sum failed" << std::endl;
11104                throw std::runtime_error("pd_test_func_apply_series_agg failed: sum failed");
11105            }
11106
11107            auto mean_result = s.agg("mean");
11108            if (!mean_result.has_value() || !approx_equal(mean_result.value(), 3.0)) {
11109                passed = false;
11110                std::cout << "  [FAIL] : in pd_test_func_apply_series_agg() : mean failed" << std::endl;
agg (pd_test_1_all.cpp:11100)
11090        }
11091
11092        void pd_test_func_apply_series_agg() {
11093            std::cout << "========= Series agg ==================================";
11094
11095            pandas::Series<double> s({1.0, 2.0, 3.0, 4.0, 5.0}, "values");
11096
11097            bool passed = true;
11098
11099            // Test string-based aggregation
11100            auto sum_result = s.agg("sum");
11101            if (!sum_result.has_value() || !approx_equal(sum_result.value(), 15.0)) {
11102                passed = false;
11103                std::cout << "  [FAIL] : in pd_test_func_apply_series_agg() : sum failed" << std::endl;
11104                throw std::runtime_error("pd_test_func_apply_series_agg failed: sum failed");
11105            }
11106
11107            auto mean_result = s.agg("mean");
11108            if (!mean_result.has_value() || !approx_equal(mean_result.value(), 3.0)) {
11109                passed = false;
11110                std::cout << "  [FAIL] : in pd_test_func_apply_series_agg() : mean failed" << std::endl;
transform (pd_test_1_all.cpp:11071)
11061            std::cout << " -> tests passed" << std::endl;
11062        }
11063
11064        void pd_test_func_apply_series_transform() {
11065            std::cout << "========= Series transform ============================";
11066
11067            pandas::Series<double> s({1.0, 2.0, 3.0, 4.0}, "values");
11068
11069            // Transform must return same shape
11070            auto result = s.transform([](double x) { return x * 2 + 1; });
11071
11072            bool passed = true;
11073            if (result.size() != s.size()) {
11074                passed = false;
11075                std::cout << "  [FAIL] : in pd_test_func_apply_series_transform() : size changed" << std::endl;
11076                throw std::runtime_error("pd_test_func_apply_series_transform failed: size changed");
11077            }
11078
11079            std::vector<double> expected = {3.0, 5.0, 7.0, 9.0};
11080            for (size_t i = 0; i < result.size(); ++i) {
closed (pd_test_1_all.cpp:1903)
1893// ============================================================================
1894void test_constructors() {
1895    std::cout << "========= IntervalArray: constructors ======================= ";
1896
1897    // Default constructor
1898    pandas::IntervalArrayFloat64 empty;
1899    if (empty.size() != 0) {
1900        std::cout << "[FAIL] : in test_constructors() : default constructor size" << std::endl;
1901        return;
1902    }
1903    if (empty.closed() != pandas::IntervalClosed::Right) {
1904        std::cout << "[FAIL] : in test_constructors() : default closure" << std::endl;
1905        return;
1906    }
1907
1908    // Constructor from left/right arrays
1909    numpy::NDArray<numpy::float64> left(std::vector<size_t>{3});
1910    numpy::NDArray<numpy::float64> right(std::vector<size_t>{3});
1911    left.setElementAt({0}, 0.0);  right.setElementAt({0}, 1.0);
1912    left.setElementAt({1}, 1.0);  right.setElementAt({1}, 2.0);
1913    left.setElementAt({2}, 2.0);  right.setElementAt({2}, 3.0);
compute_agg (pd_test_5_all.cpp:112204)
112194    // Default signature is groupby(by, axis, level, as_index, sort, group_keys, observed, dropna).
112195    auto gb = df_in.groupby("k", 0, std::nullopt, /*as_index=*/true,
112196                            /*sort=*/true, /*group_keys=*/true,
112197                            /*observed=*/false, /*dropna=*/true);
112198    pandas::DataFrame df = gb.agg("sum");
112199    std::string actual = df.to_string();
112200
112201    // Pandas oracle (verified by analysis1 H3 logic + compute_agg empty=0.0):
112202    // - "a" observed, sum=10
112203    // - "b" observed, sum=20
112204    // - "c" unobserved -> compute_agg(empty, "sum") -> 0
112205    // Plan 12 (Logic-C int widening) has landed: aggregate_column now
112206    // preserves int64 for integer inputs, so the oracle is int64 with
112207    // integer literal display (no .0 suffix).
112208    std::string expected =
112209        "    v\n"
112210        "k    \n"
112211        "a  10\n"
112212        "b  20\n"
112213        "c   0";
112214    check_case("groupby_agg_dispatch_7c3a91_case_41",
dataframe (pd_test_2_all.cpp:11742)
11732                std::cout << "  [FAIL] : wrong dimensions" << std::endl;
11733                std::remove(temp_path.c_str());
11734                throw std::runtime_error("pd_test_to_hdf_mixed_types failed");
11735            }
11736
11737            std::remove(temp_path.c_str());
11738            std::cout << " -> tests passed" << std::endl;
11739        }
11740
11741        void pd_test_to_hdf_empty_dataframe() {
11742            std::cout << "========= to_hdf empty dataframe (real HDF5) ===================";
11743
11744            pandas::DataFrame df;
11745            std::string temp_path = "temp/test_hdf5_empty.h5";
11746            df.to_hdf(temp_path, "df", "w");
11747
11748            // Just verify file was created
11749            std::ifstream file(temp_path);
11750            if (!file.is_open()) {
11751                std::cout << "  [FAIL] : file not created" << std::endl;
11752                throw std::runtime_error("pd_test_to_hdf_empty_dataframe failed");
freq (pd_test_1_all.cpp:8233)
8223    std::cout << "========= freq property ===============================";
8224
8225    std::vector<std::optional<numpy::datetime64>> values = {
8226        numpy::datetime64(0LL, numpy::DateTimeUnit::Nanosecond),
8227        numpy::datetime64(86400000000000LL, numpy::DateTimeUnit::Nanosecond)  // 1 day
8228    };
8229    pandas::DatetimeArray arr(values);
8230    pandas::DatetimeMixinIndex idx(arr);
8231
8232    // Default freq is nullopt or inferred
8233    auto f = idx.freq();
8234    std::string fs = idx.freqstr();
8235
8236    bool passed = true;  // freq may or may not be set
8237    if (!passed) {
8238        std::cout << "  [FAIL] : in pd_test_datetime_mixin_freq()" << std::endl;
8239        throw std::runtime_error("pd_test_datetime_mixin_freq failed");
8240    }
8241
8242    std::cout << " -> tests passed" << std::endl;
8243}
groups (pd_test_2_all.cpp:20864)
20854// =====================================================================
20855// Per-group expanding tests
20856// =====================================================================
20857
20858void test_series_groupby_expanding_sum() {
20859    std::cout << "  -- test_series_groupby_expanding_sum --" << std::endl;
20860
20861    // Two groups: A=[1,2,3], B=[10,20]
20862    std::vector<numpy::float64> vals = {1.0, 10.0, 2.0, 20.0, 3.0};
20863    pandas::Series<numpy::float64> data(vals);
20864    pandas::Series<std::string> groups({"A", "B", "A", "B", "A"});
20865
20866    auto sgb = data.groupby(groups);
20867    pandas::SeriesGroupByExpandingWindow ew(sgb, 1);
20868    auto result = ew.sum();
20869
20870    check(result.size() == 5, "size_5");
20871    // A group: expanding sum = 1, 3, 6
20872    // B group: expanding sum = 10, 30
20873    // Original order: [A:1, B:10, A:3, B:30, A:6]
20874    check(approx_eq(result[0], 1.0), "A_exp_sum_0");
label (pd_test_4_all.cpp:4935)
4925// Helper: compare and report
4926// ----------------------------------------------------------------------------
4927static void check_str(const std::string& label,
4928                      const std::string& expected,
4929                      const std::string& actual) {
4930    int _f = 0;
4931    pandas_tests::check_str_ws(label, expected, actual, _f);
4932    if (_f > 0) throw std::runtime_error(label + ": str mismatch");
4933}
4934
4935// Slugify a python compare-test label ("a.b.c" → "a_b_c") matching the
4936// scheme in scripts/gen_repr_mismatch_fixtures.py.
4937static std::string slugify_label(const std::string& label) {
4938    std::string out = label;
4939    for (char& ch : out) {
4940        if (ch == '.') ch = '_';
4941    }
4942    return out;
4943}
4944
4945// Load a captured pandas-generated expected output file. The file is written
ngroups (pd_test_1_all.cpp:11497)
11487            // Create DataFrame with category column
11488            std::map<std::string, std::vector<double>> data = {
11489                {"category", {1.0, 1.0, 2.0, 2.0, 2.0}},
11490                {"value", {10.0, 20.0, 30.0, 40.0, 50.0}}
11491            };
11492            pandas::DataFrame df(data);
11493
11494            // Test groupby
11495            auto grouped = df.groupby("category");
11496
11497            bool passed = grouped.ngroups() == 2;
11498            if (!passed) {
11499                std::cout << "  [FAIL] : in pd_test_groupby_basic() : ngroups should be 2" << std::endl;
11500                throw std::runtime_error("pd_test_groupby_basic failed: ngroups should be 2");
11501            }
11502
11503            std::cout << " -> tests passed" << std::endl;
11504        }
11505
11506        void pd_test_groupby_multiple_columns() {
11507            std::cout << "========= GroupBy multiple columns ==============";
ohlc (pd_test_1_all.cpp:20388)
20378                "2020-01-01 11:00:00",
20379                "2020-01-01 12:00:00",
20380                "2020-01-02 09:00:00",
20381                "2020-01-02 10:00:00",
20382                "2020-01-02 11:00:00",
20383                "2020-01-02 12:00:00"
20384            };
20385            df.set_index(std::make_unique<pandas::Index<std::string>>(dates));
20386
20387            auto resampler = df.resample("D");
20388            pandas::DataFrame result = resampler.ohlc();
20389
20390            // Should have open, high, low, close columns
20391            const auto& cols = result.columns();
20392            bool has_open = false, has_high = false, has_low = false, has_close = false;
20393            for (size_t i = 0; i < cols.size(); ++i) {
20394                std::string c = cols[i];
20395                if (c.find("open") != std::string::npos) has_open = true;
20396                if (c.find("high") != std::string::npos) has_high = true;
20397                if (c.find("low") != std::string::npos) has_low = true;
20398                if (c.find("close") != std::string::npos) has_close = true;
size (pd_test_1_all.cpp:22)
12#include "../pandas/pd_boolean_array.h"
13
14namespace dataframe_tests {
15
16namespace dataframe_tests_boolean_array {
17    void pd_test_boolean_array_constructors() {
18        std::cout << "========= BooleanArray: constructors ======================= ";
19
20        // Default constructor
21        pandas::BooleanArray arr1;
22        if (arr1.size() != 0) {
23            std::cout << "  [FAIL] : in pd_test_boolean_array_constructors() : default constructor size != 0" << std::endl;
24            throw std::runtime_error("pd_test_boolean_array_constructors failed: default constructor size != 0");
25        }
26
27        // Initializer list constructor
28        pandas::BooleanArray arr2({
29            std::optional<bool>(true),
30            std::optional<bool>(false),
31            std::nullopt,
32            std::optional<bool>(true)