SeriesResampler#

class pandas::SeriesResampler#

Window operation class for rolling/expanding calculations.

Example#

#include <pandas/pandas.h>
using namespace pandas;

// Use SeriesResampler
SeriesResampler obj;
// ... operations ...

Constructors#

Signature

Location

Example

SeriesResampler(const Series<T>& series, const std::string& freq, const std::string& closed = "", const std::string& label = "", const std::string& origin = "epoch", int64_t offset_nanos = 0)

pd_resampler.h:456

Indexing / Selection#

Signature

Return Type

Location

Example

Series<T> first() const

Series<T>

pd_resampler.h:493

View

int64_t get_period_key(int64_t epoch_ns) const

int64_t

pd_resampler.h:525

Series<T> last() const

Series<T>

pd_resampler.h:494

View

Missing Data#

Signature

Return Type

Location

Example

Series<T> bfill() const

Series<T>

pd_resampler.h:498

View

Series<T> ffill(int limit = -1) const

Series<T>

pd_resampler.h:497

View

Statistics#

Signature

Return Type

Location

Example

Series<int64_t> count() const

Series<int64_t>

pd_resampler.h:482

View

Series<T> max() const

Series<T>

pd_resampler.h:475

View

Series<double> mean() const

Series<double>

pd_resampler.h:473

View

Series<double> median() const

Series<double>

pd_resampler.h:483

View

Series<T> min() const

Series<T>

pd_resampler.h:474

View

Series<double> prod() const

Series<double>

pd_resampler.h:484

View

Series<double> std_(int ddof = 1) const

Series<double>

pd_resampler.h:476

View

Series<double> sum() const

Series<double>

pd_resampler.h:472

View

Series<double> var(int ddof = 1) const

Series<double>

pd_resampler.h:477

View

Aggregation#

Signature

Return Type

Location

Example

Series<double> agg(const std::string& func) const

Series<double>

pd_resampler.h:487

View

DataFrame agg_dict(const std::vector<std::pair<std::string, std::vector<std::string>>>& col_funcs) const

DataFrame

pd_resampler.h:490

Series<T> apply( const std::function<pandas::ApplyCellResult(const Series<T>&)>& cb) const

Series<T>

pd_resampler.h:512

View

Comparison#

Signature

Return Type

Location

Example

Series<T> nearest() const

Series<T>

pd_resampler.h:500

Time Series#

Signature

Return Type

Location

Example

Series<T> asfreq() const

Series<T>

pd_resampler.h:499

View

Other Methods#

Signature

Return Type

Location

Example

void build_groups()

void

pd_resampler.h:524

const std::vector<int64_t>& group_keys() const

const std::vector<int64_t>&

pd_resampler.h:520

const std::unordered_map<int64_t, std::vector<size_t>>& groups() const

const std::unordered_map<int64_t, std::vector<size_t>>&

pd_resampler.h:519

View

size_t ngroups() const { return group_keys_order_.size()

size_t

pd_resampler.h:516

View

int64_t period_key_to_timestamp(int64_t key) const

int64_t

pd_resampler.h:526

const Series<T>& series() const

const Series<T>&

pd_resampler.h:521

View

void set_result_datetime_index(Series<ResultT>& result) const

void

pd_resampler.h:529

Code Examples#

The following examples are extracted from the test suite.

first (pd_test_1_all.cpp:11616)
11606        void pd_test_groupby_first_last() {
11607            std::cout << "========= GroupBy first/last ====================";
11608
11609            std::map<std::string, std::vector<double>> data = {
11610                {"category", {1.0, 1.0, 2.0, 2.0}},
11611                {"value", {10.0, 20.0, 30.0, 40.0}}
11612            };
11613            pandas::DataFrame df(data);
11614
11615            auto first_result = df.groupby("category").first();
11616            auto last_result = df.groupby("category").last();
11617
11618            // First for group 1: 10, group 2: 30
11619            // Last for group 1: 20, group 2: 40
11620            double first1 = std::stod(first_result["value"].get_value_str(0));
11621            double first2 = std::stod(first_result["value"].get_value_str(1));
11622
11623            bool passed = ((std::abs(first1 - 10.0) < 0.001 && std::abs(first2 - 30.0) < 0.001) ||
11624                          (std::abs(first1 - 30.0) < 0.001 && std::abs(first2 - 10.0) < 0.001));
11625            if (!passed) {
last (pd_test_1_all.cpp:11617)
11607        void pd_test_groupby_first_last() {
11608            std::cout << "========= GroupBy first/last ====================";
11609
11610            std::map<std::string, std::vector<double>> data = {
11611                {"category", {1.0, 1.0, 2.0, 2.0}},
11612                {"value", {10.0, 20.0, 30.0, 40.0}}
11613            };
11614            pandas::DataFrame df(data);
11615
11616            auto first_result = df.groupby("category").first();
11617            auto last_result = df.groupby("category").last();
11618
11619            // First for group 1: 10, group 2: 30
11620            // Last for group 1: 20, group 2: 40
11621            double first1 = std::stod(first_result["value"].get_value_str(0));
11622            double first2 = std::stod(first_result["value"].get_value_str(1));
11623
11624            bool passed = ((std::abs(first1 - 10.0) < 0.001 && std::abs(first2 - 30.0) < 0.001) ||
11625                          (std::abs(first1 - 30.0) < 0.001 && std::abs(first2 - 10.0) < 0.001));
11626            if (!passed) {
11627                std::cout << "  [FAIL] : in pd_test_groupby_first_last() : first values incorrect" << std::endl;
bfill (pd_test_1_all.cpp:23603)
23593        std::cout << "====================================== [OK] pd_test_equals test suite ========================== " << std::endl;
23594        return 0;
23595    }
23596
23597} // namespace dataframe_tests
23598// ------------------- pd_test_equals.cpp (end) -----------------------------
23599
23600// ------------------- pd_test_ffill_bfill.cpp (start) -----------------------------
23601// dataframe_tests/pd_test_ffill_bfill.cpp
23602// Test file for DataFrame.ffill() and DataFrame.bfill() methods
23603
23604#include <iostream>
23605#include <stdexcept>
23606#include <cmath>
23607#include <limits>
23608#include <map>
23609#include "../pandas/pd_dataframe.h"
23610
23611// CRITICAL: No using namespace directives
ffill (pd_test_1_all.cpp:23603)
23593        std::cout << "====================================== [OK] pd_test_equals test suite ========================== " << std::endl;
23594        return 0;
23595    }
23596
23597} // namespace dataframe_tests
23598// ------------------- pd_test_equals.cpp (end) -----------------------------
23599
23600// ------------------- pd_test_ffill_bfill.cpp (start) -----------------------------
23601// dataframe_tests/pd_test_ffill_bfill.cpp
23602// Test file for DataFrame.ffill() and DataFrame.bfill() methods
23603
23604#include <iostream>
23605#include <stdexcept>
23606#include <cmath>
23607#include <limits>
23608#include <map>
23609#include "../pandas/pd_dataframe.h"
23610
23611// CRITICAL: No using namespace directives
count (pd_test_1_all.cpp:66)
56        if (arr.is_na(0)) {
57            std::cout << "  [FAIL] : in pd_test_boolean_array_na_handling() : is_na(0) should be false" << std::endl;
58            throw std::runtime_error("pd_test_boolean_array_na_handling failed: is_na(0) should be false");
59        }
60
61        if (!arr.has_na()) {
62            std::cout << "  [FAIL] : in pd_test_boolean_array_na_handling() : has_na() should be true" << std::endl;
63            throw std::runtime_error("pd_test_boolean_array_na_handling failed: has_na() should be true");
64        }
65
66        if (arr.count() != 2) {
67            std::cout << "  [FAIL] : in pd_test_boolean_array_na_handling() : count() should be 2" << std::endl;
68            throw std::runtime_error("pd_test_boolean_array_na_handling failed: count() should be 2");
69        }
70
71        std::cout << " -> tests passed" << std::endl;
72    }
73
74    void pd_test_boolean_array_kleene_and() {
75        std::cout << "========= BooleanArray: Kleene AND ======================= ";
max (pd_test_1_all.cpp:771)
761        pandas::CategoricalArray arr = pandas::CategoricalArray::from_codes(codes, cats, true);  // ordered
762
763        // Test min
764        std::optional<std::string> min_val = arr.min();
765        if (!min_val.has_value() || *min_val != "low") {
766            std::cout << "  [FAIL] : in pd_test_categorical_array_ordered_operations() : min != 'low'" << std::endl;
767            throw std::runtime_error("pd_test_categorical_array_ordered_operations failed: min != 'low'");
768        }
769
770        // Test max
771        std::optional<std::string> max_val = arr.max();
772        if (!max_val.has_value() || *max_val != "high") {
773            std::cout << "  [FAIL] : in pd_test_categorical_array_ordered_operations() : max != 'high'" << std::endl;
774            throw std::runtime_error("pd_test_categorical_array_ordered_operations failed: max != 'high'");
775        }
776
777        // Test unordered throws for min/max
778        pandas::CategoricalArray unordered = arr.as_unordered();
779        bool threw = false;
780        try {
781            unordered.min();
mean (pd_test_1_all.cpp:282)
272            std::optional<bool>(true),
273            std::optional<bool>(true)
274        });
275
276        auto s = arr.sum();
277        if (!s.has_value() || s.value() != 3) {
278            std::cout << "  [FAIL] : in pd_test_boolean_array_reductions() : sum should be 3" << std::endl;
279            throw std::runtime_error("pd_test_boolean_array_reductions failed: sum");
280        }
281
282        auto m = arr.mean();
283        if (!m.has_value() || std::abs(m.value() - 0.75) > 0.001) {
284            std::cout << "  [FAIL] : in pd_test_boolean_array_reductions() : mean should be 0.75" << std::endl;
285            throw std::runtime_error("pd_test_boolean_array_reductions failed: mean");
286        }
287
288        std::cout << " -> tests passed" << std::endl;
289    }
290
291    void pd_test_boolean_array_dtype() {
292        std::cout << "========= BooleanArray: dtype ======================= ";
median (pd_test_1_all.cpp:20910)
20900                throw std::runtime_error("pd_test_expanding_var failed: expanding var values incorrect");
20901            }
20902
20903            std::cout << " -> tests passed" << std::endl;
20904        }
20905
20906        void pd_test_expanding_median() {
20907            std::cout << "========= Expanding median ======================";
20908
20909            pandas::Series<double> s({1.0, 2.0, 3.0, 4.0, 5.0});
20910            auto result = s.expanding().median();
20911
20912            // Expanding median: 1, 1.5, 2, 2.5, 3
20913            bool passed = std::abs(result[0] - 1.0) < 0.001 &&
20914                          std::abs(result[1] - 1.5) < 0.001 &&
20915                          std::abs(result[2] - 2.0) < 0.001 &&
20916                          std::abs(result[3] - 2.5) < 0.001 &&
20917                          std::abs(result[4] - 3.0) < 0.001;
20918            if (!passed) {
20919                std::cout << "  [FAIL] : in pd_test_expanding_median() : expanding median values incorrect" << std::endl;
20920                throw std::runtime_error("pd_test_expanding_median failed: expanding median values incorrect");
min (pd_test_1_all.cpp:764)
754    }
755
756    void pd_test_categorical_array_ordered_operations() {
757        std::cout << "========= CategoricalArray: ordered operations (min/max) ======================= ";
758
759        std::vector<std::string> cats = {"low", "medium", "high"};
760        std::vector<numpy::int32> codes = {0, 2, 1, 0, -1};  // low, high, medium, low, NA
761        pandas::CategoricalArray arr = pandas::CategoricalArray::from_codes(codes, cats, true);  // ordered
762
763        // Test min
764        std::optional<std::string> min_val = arr.min();
765        if (!min_val.has_value() || *min_val != "low") {
766            std::cout << "  [FAIL] : in pd_test_categorical_array_ordered_operations() : min != 'low'" << std::endl;
767            throw std::runtime_error("pd_test_categorical_array_ordered_operations failed: min != 'low'");
768        }
769
770        // Test max
771        std::optional<std::string> max_val = arr.max();
772        if (!max_val.has_value() || *max_val != "high") {
773            std::cout << "  [FAIL] : in pd_test_categorical_array_ordered_operations() : max != 'high'" << std::endl;
774            throw std::runtime_error("pd_test_categorical_array_ordered_operations failed: max != 'high'");
prod (pd_test_1_all.cpp:26082)
26072        std::cout << "====================================== [OK] pd_test_pivot_table test suite ========================== " << std::endl;
26073        return 0;
26074    }
26075
26076} // namespace dataframe_tests
26077// ------------------- pd_test_pivot_table.cpp (end) -----------------------------
26078
26079// ------------------- pd_test_prod.cpp (start) -----------------------------
26080// dataframe_tests/pd_test_prod.cpp
26081// Tests for DataFrame.prod() and DataFrame.prod_cols() methods
26082
26083#include <iostream>
26084#include <stdexcept>
26085#include <cmath>
26086#include <limits>
26087#include "../pandas/pd_dataframe.h"
26088
26089// CRITICAL: No using namespace directives
26090
26091namespace dataframe_tests {
std_ (pd_test_1_all.cpp:20752)
20742                throw std::runtime_error("pd_test_rolling_min_periods failed: with min_periods=1, idx 1 should be 3.0");
20743            }
20744
20745            std::cout << " -> tests passed" << std::endl;
20746        }
20747
20748        void pd_test_rolling_std() {
20749            std::cout << "========= Rolling std ===========================";
20750
20751            pandas::Series<double> s({1.0, 2.0, 3.0, 4.0, 5.0});
20752            auto result = s.rolling(3).std_();
20753
20754            // std([1,2,3]) = 1.0 (ddof=1)
20755            // std([2,3,4]) = 1.0
20756            // std([3,4,5]) = 1.0
20757            bool passed = std::abs(result[2] - 1.0) < 0.001;
20758            if (!passed) {
20759                std::cout << "  [FAIL] : in pd_test_rolling_std() : rolling std should be 1.0" << std::endl;
20760                throw std::runtime_error("pd_test_rolling_std failed: rolling std should be 1.0");
20761            }
sum (pd_test_1_all.cpp:276)
266        }
267
268        // Test sum/mean
269        pandas::BooleanArray arr({
270            std::optional<bool>(true),
271            std::optional<bool>(false),
272            std::optional<bool>(true),
273            std::optional<bool>(true)
274        });
275
276        auto s = arr.sum();
277        if (!s.has_value() || s.value() != 3) {
278            std::cout << "  [FAIL] : in pd_test_boolean_array_reductions() : sum should be 3" << std::endl;
279            throw std::runtime_error("pd_test_boolean_array_reductions failed: sum");
280        }
281
282        auto m = arr.mean();
283        if (!m.has_value() || std::abs(m.value() - 0.75) > 0.001) {
284            std::cout << "  [FAIL] : in pd_test_boolean_array_reductions() : mean should be 0.75" << std::endl;
285            throw std::runtime_error("pd_test_boolean_array_reductions failed: mean");
286        }
var (pd_test_1_all.cpp:20890)
20880                throw std::runtime_error("pd_test_expanding_std failed: expanding std values incorrect");
20881            }
20882
20883            std::cout << " -> tests passed" << std::endl;
20884        }
20885
20886        void pd_test_expanding_var() {
20887            std::cout << "========= Expanding var =========================";
20888
20889            pandas::Series<double> s({1.0, 2.0, 3.0, 4.0, 5.0});
20890            auto result = s.expanding().var();
20891
20892            // Expanding var (ddof=1): NaN, 0.5, 1.0, 1.6667, 2.5
20893            bool passed = std::isnan(result[0]) &&
20894                          std::abs(result[1] - 0.5) < 0.001 &&
20895                          std::abs(result[2] - 1.0) < 0.001 &&
20896                          std::abs(result[3] - 1.6667) < 0.001 &&
20897                          std::abs(result[4] - 2.5) < 0.001;
20898            if (!passed) {
20899                std::cout << "  [FAIL] : in pd_test_expanding_var() : expanding var values incorrect" << std::endl;
20900                throw std::runtime_error("pd_test_expanding_var failed: expanding var values incorrect");
agg (pd_test_1_all.cpp:11100)
11090        }
11091
11092        void pd_test_func_apply_series_agg() {
11093            std::cout << "========= Series agg ==================================";
11094
11095            pandas::Series<double> s({1.0, 2.0, 3.0, 4.0, 5.0}, "values");
11096
11097            bool passed = true;
11098
11099            // Test string-based aggregation
11100            auto sum_result = s.agg("sum");
11101            if (!sum_result.has_value() || !approx_equal(sum_result.value(), 15.0)) {
11102                passed = false;
11103                std::cout << "  [FAIL] : in pd_test_func_apply_series_agg() : sum failed" << std::endl;
11104                throw std::runtime_error("pd_test_func_apply_series_agg failed: sum failed");
11105            }
11106
11107            auto mean_result = s.agg("mean");
11108            if (!mean_result.has_value() || !approx_equal(mean_result.value(), 3.0)) {
11109                passed = false;
11110                std::cout << "  [FAIL] : in pd_test_func_apply_series_agg() : mean failed" << std::endl;
apply (pd_test_1_all.cpp:11244)
11234        void pd_test_func_apply_dataframe_apply_axis0() {
11235            std::cout << "========= DataFrame apply axis=0 ======================";
11236
11237            std::map<std::string, std::vector<double>> data = {
11238                {"A", {1.0, 2.0, 3.0}},
11239                {"B", {4.0, 5.0, 6.0}}
11240            };
11241            pandas::DataFrame df(data);
11242
11243            // apply axis=0 applies function to each column
11244            auto result = df.apply([](const std::vector<double>& col) {
11245                return std::accumulate(col.begin(), col.end(), 0.0);
11246            }, 0);
11247
11248            bool passed = true;
11249
11250            // Plan F·dtype: axis=0 reduce now returns a single "result" column
11251            // with the original column names ("A", "B") as the row index.
11252            // Sum of A: 1+2+3=6, Sum of B: 4+5+6=15
11253            const auto& result_col = result["result"];
11254            double sum_a = std::stod(result_col.get_value_str(0));
asfreq (pd_test_1_all.cpp:2869)
2859        std::cout << "========= PeriodArray: asfreq ======================= ";
2860
2861        // Monthly to quarterly
2862        pandas::PeriodArray arr_m(std::vector<std::string>{
2863            "2024-01",
2864            "2024-04",
2865            "2024-07",
2866            "NaT"
2867        }, "M");
2868
2869        auto arr_q = arr_m.asfreq("Q");
2870        if (arr_q.size() != 4) {
2871            std::cout << "  [FAIL] : asfreq size should be 4" << std::endl;
2872            throw std::runtime_error("pd_test_period_array_asfreq failed: size");
2873        }
2874        if (arr_q.freqstr() != "Q") {
2875            std::cout << "  [FAIL] : asfreq freqstr should be 'Q'" << std::endl;
2876            throw std::runtime_error("pd_test_period_array_asfreq failed: freqstr");
2877        }
2878
2879        // Check NaT is preserved
groups (pd_test_2_all.cpp:20864)
20854// =====================================================================
20855// Per-group expanding tests
20856// =====================================================================
20857
20858void test_series_groupby_expanding_sum() {
20859    std::cout << "  -- test_series_groupby_expanding_sum --" << std::endl;
20860
20861    // Two groups: A=[1,2,3], B=[10,20]
20862    std::vector<numpy::float64> vals = {1.0, 10.0, 2.0, 20.0, 3.0};
20863    pandas::Series<numpy::float64> data(vals);
20864    pandas::Series<std::string> groups({"A", "B", "A", "B", "A"});
20865
20866    auto sgb = data.groupby(groups);
20867    pandas::SeriesGroupByExpandingWindow ew(sgb, 1);
20868    auto result = ew.sum();
20869
20870    check(result.size() == 5, "size_5");
20871    // A group: expanding sum = 1, 3, 6
20872    // B group: expanding sum = 10, 30
20873    // Original order: [A:1, B:10, A:3, B:30, A:6]
20874    check(approx_eq(result[0], 1.0), "A_exp_sum_0");
ngroups (pd_test_1_all.cpp:11497)
11487            // Create DataFrame with category column
11488            std::map<std::string, std::vector<double>> data = {
11489                {"category", {1.0, 1.0, 2.0, 2.0, 2.0}},
11490                {"value", {10.0, 20.0, 30.0, 40.0, 50.0}}
11491            };
11492            pandas::DataFrame df(data);
11493
11494            // Test groupby
11495            auto grouped = df.groupby("category");
11496
11497            bool passed = grouped.ngroups() == 2;
11498            if (!passed) {
11499                std::cout << "  [FAIL] : in pd_test_groupby_basic() : ngroups should be 2" << std::endl;
11500                throw std::runtime_error("pd_test_groupby_basic failed: ngroups should be 2");
11501            }
11502
11503            std::cout << " -> tests passed" << std::endl;
11504        }
11505
11506        void pd_test_groupby_multiple_columns() {
11507            std::cout << "========= GroupBy multiple columns ==============";
series (pd_test_2_all.cpp:2307)
2297            std::vector<std::string> index = {"a", "b", "c", "d", "e"};
2298
2299            std::map<std::string, std::vector<numpy::float64>> data1;
2300            data1["col1"] = {1.0, 2.0, 3.0, 4.0, 5.0};
2301            data1["col2"] = {2.0, 4.0, 6.0, 8.0, 10.0};  // Perfectly correlated with col1
2302
2303            pandas::DataFrame df1(data1, std::make_unique<pandas::Index<std::string>>(index));
2304
2305            // Series with same index and values that correlate with df columns
2306            pandas::Series<numpy::float64> series({1.0, 2.0, 3.0, 4.0, 5.0});
2307            series.set_index(pandas::Index<std::string>(index));
2308
2309            pandas::Series<numpy::float64> result = df1.corrwith(series);
2310
2311            bool passed = true;
2312            // col1 should have correlation 1.0 with series
2313            if (!approx_equal(result[0], 1.0)) {
2314                std::cout << "\n  [FAIL] : Expected correlation 1.0 for col1, got " << result[0] << std::endl;
2315                passed = false;
2316            }