SeriesResampler#
-
class pandas::SeriesResampler#
Window operation class for rolling/expanding calculations.
Example#
#include <pandas/pandas.h>
using namespace pandas;
// Use SeriesResampler
SeriesResampler obj;
// ... operations ...
Constructors#
Signature |
Location |
Example |
|---|---|---|
|
pd_resampler.h:456 |
Indexing / Selection#
Missing Data#
Statistics#
Signature |
Return Type |
Location |
Example |
|---|---|---|---|
|
Series<int64_t> |
pd_resampler.h:482 |
|
|
Series<T> |
pd_resampler.h:475 |
|
|
Series<double> |
pd_resampler.h:473 |
|
|
Series<double> |
pd_resampler.h:483 |
|
|
Series<T> |
pd_resampler.h:474 |
|
|
Series<double> |
pd_resampler.h:484 |
|
|
Series<double> |
pd_resampler.h:476 |
|
|
Series<double> |
pd_resampler.h:472 |
|
|
Series<double> |
pd_resampler.h:477 |
Aggregation#
Signature |
Return Type |
Location |
Example |
|---|---|---|---|
|
Series<double> |
pd_resampler.h:487 |
|
|
DataFrame |
pd_resampler.h:490 |
|
|
Series<T> |
pd_resampler.h:512 |
Comparison#
Signature |
Return Type |
Location |
Example |
|---|---|---|---|
|
Series<T> |
pd_resampler.h:500 |
Time Series#
Signature |
Return Type |
Location |
Example |
|---|---|---|---|
|
Series<T> |
pd_resampler.h:499 |
Other Methods#
Signature |
Return Type |
Location |
Example |
|---|---|---|---|
|
void |
pd_resampler.h:524 |
|
|
const std::vector<int64_t>& |
pd_resampler.h:520 |
|
|
const std::unordered_map<int64_t, std::vector<size_t>>& |
pd_resampler.h:519 |
|
|
size_t |
pd_resampler.h:516 |
|
|
int64_t |
pd_resampler.h:526 |
|
|
const Series<T>& |
pd_resampler.h:521 |
|
|
void |
pd_resampler.h:529 |
Code Examples#
The following examples are extracted from the test suite.
first (pd_test_1_all.cpp:11616)
11606 void pd_test_groupby_first_last() {
11607 std::cout << "========= GroupBy first/last ====================";
11608
11609 std::map<std::string, std::vector<double>> data = {
11610 {"category", {1.0, 1.0, 2.0, 2.0}},
11611 {"value", {10.0, 20.0, 30.0, 40.0}}
11612 };
11613 pandas::DataFrame df(data);
11614
11615 auto first_result = df.groupby("category").first();
11616 auto last_result = df.groupby("category").last();
11617
11618 // First for group 1: 10, group 2: 30
11619 // Last for group 1: 20, group 2: 40
11620 double first1 = std::stod(first_result["value"].get_value_str(0));
11621 double first2 = std::stod(first_result["value"].get_value_str(1));
11622
11623 bool passed = ((std::abs(first1 - 10.0) < 0.001 && std::abs(first2 - 30.0) < 0.001) ||
11624 (std::abs(first1 - 30.0) < 0.001 && std::abs(first2 - 10.0) < 0.001));
11625 if (!passed) {
last (pd_test_1_all.cpp:11617)
11607 void pd_test_groupby_first_last() {
11608 std::cout << "========= GroupBy first/last ====================";
11609
11610 std::map<std::string, std::vector<double>> data = {
11611 {"category", {1.0, 1.0, 2.0, 2.0}},
11612 {"value", {10.0, 20.0, 30.0, 40.0}}
11613 };
11614 pandas::DataFrame df(data);
11615
11616 auto first_result = df.groupby("category").first();
11617 auto last_result = df.groupby("category").last();
11618
11619 // First for group 1: 10, group 2: 30
11620 // Last for group 1: 20, group 2: 40
11621 double first1 = std::stod(first_result["value"].get_value_str(0));
11622 double first2 = std::stod(first_result["value"].get_value_str(1));
11623
11624 bool passed = ((std::abs(first1 - 10.0) < 0.001 && std::abs(first2 - 30.0) < 0.001) ||
11625 (std::abs(first1 - 30.0) < 0.001 && std::abs(first2 - 10.0) < 0.001));
11626 if (!passed) {
11627 std::cout << " [FAIL] : in pd_test_groupby_first_last() : first values incorrect" << std::endl;
bfill (pd_test_1_all.cpp:23603)
23593 std::cout << "====================================== [OK] pd_test_equals test suite ========================== " << std::endl;
23594 return 0;
23595 }
23596
23597} // namespace dataframe_tests
23598// ------------------- pd_test_equals.cpp (end) -----------------------------
23599
23600// ------------------- pd_test_ffill_bfill.cpp (start) -----------------------------
23601// dataframe_tests/pd_test_ffill_bfill.cpp
23602// Test file for DataFrame.ffill() and DataFrame.bfill() methods
23603
23604#include <iostream>
23605#include <stdexcept>
23606#include <cmath>
23607#include <limits>
23608#include <map>
23609#include "../pandas/pd_dataframe.h"
23610
23611// CRITICAL: No using namespace directives
ffill (pd_test_1_all.cpp:23603)
23593 std::cout << "====================================== [OK] pd_test_equals test suite ========================== " << std::endl;
23594 return 0;
23595 }
23596
23597} // namespace dataframe_tests
23598// ------------------- pd_test_equals.cpp (end) -----------------------------
23599
23600// ------------------- pd_test_ffill_bfill.cpp (start) -----------------------------
23601// dataframe_tests/pd_test_ffill_bfill.cpp
23602// Test file for DataFrame.ffill() and DataFrame.bfill() methods
23603
23604#include <iostream>
23605#include <stdexcept>
23606#include <cmath>
23607#include <limits>
23608#include <map>
23609#include "../pandas/pd_dataframe.h"
23610
23611// CRITICAL: No using namespace directives
count (pd_test_1_all.cpp:66)
56 if (arr.is_na(0)) {
57 std::cout << " [FAIL] : in pd_test_boolean_array_na_handling() : is_na(0) should be false" << std::endl;
58 throw std::runtime_error("pd_test_boolean_array_na_handling failed: is_na(0) should be false");
59 }
60
61 if (!arr.has_na()) {
62 std::cout << " [FAIL] : in pd_test_boolean_array_na_handling() : has_na() should be true" << std::endl;
63 throw std::runtime_error("pd_test_boolean_array_na_handling failed: has_na() should be true");
64 }
65
66 if (arr.count() != 2) {
67 std::cout << " [FAIL] : in pd_test_boolean_array_na_handling() : count() should be 2" << std::endl;
68 throw std::runtime_error("pd_test_boolean_array_na_handling failed: count() should be 2");
69 }
70
71 std::cout << " -> tests passed" << std::endl;
72 }
73
74 void pd_test_boolean_array_kleene_and() {
75 std::cout << "========= BooleanArray: Kleene AND ======================= ";
max (pd_test_1_all.cpp:771)
761 pandas::CategoricalArray arr = pandas::CategoricalArray::from_codes(codes, cats, true); // ordered
762
763 // Test min
764 std::optional<std::string> min_val = arr.min();
765 if (!min_val.has_value() || *min_val != "low") {
766 std::cout << " [FAIL] : in pd_test_categorical_array_ordered_operations() : min != 'low'" << std::endl;
767 throw std::runtime_error("pd_test_categorical_array_ordered_operations failed: min != 'low'");
768 }
769
770 // Test max
771 std::optional<std::string> max_val = arr.max();
772 if (!max_val.has_value() || *max_val != "high") {
773 std::cout << " [FAIL] : in pd_test_categorical_array_ordered_operations() : max != 'high'" << std::endl;
774 throw std::runtime_error("pd_test_categorical_array_ordered_operations failed: max != 'high'");
775 }
776
777 // Test unordered throws for min/max
778 pandas::CategoricalArray unordered = arr.as_unordered();
779 bool threw = false;
780 try {
781 unordered.min();
mean (pd_test_1_all.cpp:282)
272 std::optional<bool>(true),
273 std::optional<bool>(true)
274 });
275
276 auto s = arr.sum();
277 if (!s.has_value() || s.value() != 3) {
278 std::cout << " [FAIL] : in pd_test_boolean_array_reductions() : sum should be 3" << std::endl;
279 throw std::runtime_error("pd_test_boolean_array_reductions failed: sum");
280 }
281
282 auto m = arr.mean();
283 if (!m.has_value() || std::abs(m.value() - 0.75) > 0.001) {
284 std::cout << " [FAIL] : in pd_test_boolean_array_reductions() : mean should be 0.75" << std::endl;
285 throw std::runtime_error("pd_test_boolean_array_reductions failed: mean");
286 }
287
288 std::cout << " -> tests passed" << std::endl;
289 }
290
291 void pd_test_boolean_array_dtype() {
292 std::cout << "========= BooleanArray: dtype ======================= ";
median (pd_test_1_all.cpp:20910)
20900 throw std::runtime_error("pd_test_expanding_var failed: expanding var values incorrect");
20901 }
20902
20903 std::cout << " -> tests passed" << std::endl;
20904 }
20905
20906 void pd_test_expanding_median() {
20907 std::cout << "========= Expanding median ======================";
20908
20909 pandas::Series<double> s({1.0, 2.0, 3.0, 4.0, 5.0});
20910 auto result = s.expanding().median();
20911
20912 // Expanding median: 1, 1.5, 2, 2.5, 3
20913 bool passed = std::abs(result[0] - 1.0) < 0.001 &&
20914 std::abs(result[1] - 1.5) < 0.001 &&
20915 std::abs(result[2] - 2.0) < 0.001 &&
20916 std::abs(result[3] - 2.5) < 0.001 &&
20917 std::abs(result[4] - 3.0) < 0.001;
20918 if (!passed) {
20919 std::cout << " [FAIL] : in pd_test_expanding_median() : expanding median values incorrect" << std::endl;
20920 throw std::runtime_error("pd_test_expanding_median failed: expanding median values incorrect");
min (pd_test_1_all.cpp:764)
754 }
755
756 void pd_test_categorical_array_ordered_operations() {
757 std::cout << "========= CategoricalArray: ordered operations (min/max) ======================= ";
758
759 std::vector<std::string> cats = {"low", "medium", "high"};
760 std::vector<numpy::int32> codes = {0, 2, 1, 0, -1}; // low, high, medium, low, NA
761 pandas::CategoricalArray arr = pandas::CategoricalArray::from_codes(codes, cats, true); // ordered
762
763 // Test min
764 std::optional<std::string> min_val = arr.min();
765 if (!min_val.has_value() || *min_val != "low") {
766 std::cout << " [FAIL] : in pd_test_categorical_array_ordered_operations() : min != 'low'" << std::endl;
767 throw std::runtime_error("pd_test_categorical_array_ordered_operations failed: min != 'low'");
768 }
769
770 // Test max
771 std::optional<std::string> max_val = arr.max();
772 if (!max_val.has_value() || *max_val != "high") {
773 std::cout << " [FAIL] : in pd_test_categorical_array_ordered_operations() : max != 'high'" << std::endl;
774 throw std::runtime_error("pd_test_categorical_array_ordered_operations failed: max != 'high'");
prod (pd_test_1_all.cpp:26082)
26072 std::cout << "====================================== [OK] pd_test_pivot_table test suite ========================== " << std::endl;
26073 return 0;
26074 }
26075
26076} // namespace dataframe_tests
26077// ------------------- pd_test_pivot_table.cpp (end) -----------------------------
26078
26079// ------------------- pd_test_prod.cpp (start) -----------------------------
26080// dataframe_tests/pd_test_prod.cpp
26081// Tests for DataFrame.prod() and DataFrame.prod_cols() methods
26082
26083#include <iostream>
26084#include <stdexcept>
26085#include <cmath>
26086#include <limits>
26087#include "../pandas/pd_dataframe.h"
26088
26089// CRITICAL: No using namespace directives
26090
26091namespace dataframe_tests {
std_ (pd_test_1_all.cpp:20752)
20742 throw std::runtime_error("pd_test_rolling_min_periods failed: with min_periods=1, idx 1 should be 3.0");
20743 }
20744
20745 std::cout << " -> tests passed" << std::endl;
20746 }
20747
20748 void pd_test_rolling_std() {
20749 std::cout << "========= Rolling std ===========================";
20750
20751 pandas::Series<double> s({1.0, 2.0, 3.0, 4.0, 5.0});
20752 auto result = s.rolling(3).std_();
20753
20754 // std([1,2,3]) = 1.0 (ddof=1)
20755 // std([2,3,4]) = 1.0
20756 // std([3,4,5]) = 1.0
20757 bool passed = std::abs(result[2] - 1.0) < 0.001;
20758 if (!passed) {
20759 std::cout << " [FAIL] : in pd_test_rolling_std() : rolling std should be 1.0" << std::endl;
20760 throw std::runtime_error("pd_test_rolling_std failed: rolling std should be 1.0");
20761 }
sum (pd_test_1_all.cpp:276)
266 }
267
268 // Test sum/mean
269 pandas::BooleanArray arr({
270 std::optional<bool>(true),
271 std::optional<bool>(false),
272 std::optional<bool>(true),
273 std::optional<bool>(true)
274 });
275
276 auto s = arr.sum();
277 if (!s.has_value() || s.value() != 3) {
278 std::cout << " [FAIL] : in pd_test_boolean_array_reductions() : sum should be 3" << std::endl;
279 throw std::runtime_error("pd_test_boolean_array_reductions failed: sum");
280 }
281
282 auto m = arr.mean();
283 if (!m.has_value() || std::abs(m.value() - 0.75) > 0.001) {
284 std::cout << " [FAIL] : in pd_test_boolean_array_reductions() : mean should be 0.75" << std::endl;
285 throw std::runtime_error("pd_test_boolean_array_reductions failed: mean");
286 }
var (pd_test_1_all.cpp:20890)
20880 throw std::runtime_error("pd_test_expanding_std failed: expanding std values incorrect");
20881 }
20882
20883 std::cout << " -> tests passed" << std::endl;
20884 }
20885
20886 void pd_test_expanding_var() {
20887 std::cout << "========= Expanding var =========================";
20888
20889 pandas::Series<double> s({1.0, 2.0, 3.0, 4.0, 5.0});
20890 auto result = s.expanding().var();
20891
20892 // Expanding var (ddof=1): NaN, 0.5, 1.0, 1.6667, 2.5
20893 bool passed = std::isnan(result[0]) &&
20894 std::abs(result[1] - 0.5) < 0.001 &&
20895 std::abs(result[2] - 1.0) < 0.001 &&
20896 std::abs(result[3] - 1.6667) < 0.001 &&
20897 std::abs(result[4] - 2.5) < 0.001;
20898 if (!passed) {
20899 std::cout << " [FAIL] : in pd_test_expanding_var() : expanding var values incorrect" << std::endl;
20900 throw std::runtime_error("pd_test_expanding_var failed: expanding var values incorrect");
agg (pd_test_1_all.cpp:11100)
11090 }
11091
11092 void pd_test_func_apply_series_agg() {
11093 std::cout << "========= Series agg ==================================";
11094
11095 pandas::Series<double> s({1.0, 2.0, 3.0, 4.0, 5.0}, "values");
11096
11097 bool passed = true;
11098
11099 // Test string-based aggregation
11100 auto sum_result = s.agg("sum");
11101 if (!sum_result.has_value() || !approx_equal(sum_result.value(), 15.0)) {
11102 passed = false;
11103 std::cout << " [FAIL] : in pd_test_func_apply_series_agg() : sum failed" << std::endl;
11104 throw std::runtime_error("pd_test_func_apply_series_agg failed: sum failed");
11105 }
11106
11107 auto mean_result = s.agg("mean");
11108 if (!mean_result.has_value() || !approx_equal(mean_result.value(), 3.0)) {
11109 passed = false;
11110 std::cout << " [FAIL] : in pd_test_func_apply_series_agg() : mean failed" << std::endl;
apply (pd_test_1_all.cpp:11244)
11234 void pd_test_func_apply_dataframe_apply_axis0() {
11235 std::cout << "========= DataFrame apply axis=0 ======================";
11236
11237 std::map<std::string, std::vector<double>> data = {
11238 {"A", {1.0, 2.0, 3.0}},
11239 {"B", {4.0, 5.0, 6.0}}
11240 };
11241 pandas::DataFrame df(data);
11242
11243 // apply axis=0 applies function to each column
11244 auto result = df.apply([](const std::vector<double>& col) {
11245 return std::accumulate(col.begin(), col.end(), 0.0);
11246 }, 0);
11247
11248 bool passed = true;
11249
11250 // Plan F·dtype: axis=0 reduce now returns a single "result" column
11251 // with the original column names ("A", "B") as the row index.
11252 // Sum of A: 1+2+3=6, Sum of B: 4+5+6=15
11253 const auto& result_col = result["result"];
11254 double sum_a = std::stod(result_col.get_value_str(0));
asfreq (pd_test_1_all.cpp:2869)
2859 std::cout << "========= PeriodArray: asfreq ======================= ";
2860
2861 // Monthly to quarterly
2862 pandas::PeriodArray arr_m(std::vector<std::string>{
2863 "2024-01",
2864 "2024-04",
2865 "2024-07",
2866 "NaT"
2867 }, "M");
2868
2869 auto arr_q = arr_m.asfreq("Q");
2870 if (arr_q.size() != 4) {
2871 std::cout << " [FAIL] : asfreq size should be 4" << std::endl;
2872 throw std::runtime_error("pd_test_period_array_asfreq failed: size");
2873 }
2874 if (arr_q.freqstr() != "Q") {
2875 std::cout << " [FAIL] : asfreq freqstr should be 'Q'" << std::endl;
2876 throw std::runtime_error("pd_test_period_array_asfreq failed: freqstr");
2877 }
2878
2879 // Check NaT is preserved
groups (pd_test_2_all.cpp:20864)
20854// =====================================================================
20855// Per-group expanding tests
20856// =====================================================================
20857
20858void test_series_groupby_expanding_sum() {
20859 std::cout << " -- test_series_groupby_expanding_sum --" << std::endl;
20860
20861 // Two groups: A=[1,2,3], B=[10,20]
20862 std::vector<numpy::float64> vals = {1.0, 10.0, 2.0, 20.0, 3.0};
20863 pandas::Series<numpy::float64> data(vals);
20864 pandas::Series<std::string> groups({"A", "B", "A", "B", "A"});
20865
20866 auto sgb = data.groupby(groups);
20867 pandas::SeriesGroupByExpandingWindow ew(sgb, 1);
20868 auto result = ew.sum();
20869
20870 check(result.size() == 5, "size_5");
20871 // A group: expanding sum = 1, 3, 6
20872 // B group: expanding sum = 10, 30
20873 // Original order: [A:1, B:10, A:3, B:30, A:6]
20874 check(approx_eq(result[0], 1.0), "A_exp_sum_0");
ngroups (pd_test_1_all.cpp:11497)
11487 // Create DataFrame with category column
11488 std::map<std::string, std::vector<double>> data = {
11489 {"category", {1.0, 1.0, 2.0, 2.0, 2.0}},
11490 {"value", {10.0, 20.0, 30.0, 40.0, 50.0}}
11491 };
11492 pandas::DataFrame df(data);
11493
11494 // Test groupby
11495 auto grouped = df.groupby("category");
11496
11497 bool passed = grouped.ngroups() == 2;
11498 if (!passed) {
11499 std::cout << " [FAIL] : in pd_test_groupby_basic() : ngroups should be 2" << std::endl;
11500 throw std::runtime_error("pd_test_groupby_basic failed: ngroups should be 2");
11501 }
11502
11503 std::cout << " -> tests passed" << std::endl;
11504 }
11505
11506 void pd_test_groupby_multiple_columns() {
11507 std::cout << "========= GroupBy multiple columns ==============";
series (pd_test_2_all.cpp:2307)
2297 std::vector<std::string> index = {"a", "b", "c", "d", "e"};
2298
2299 std::map<std::string, std::vector<numpy::float64>> data1;
2300 data1["col1"] = {1.0, 2.0, 3.0, 4.0, 5.0};
2301 data1["col2"] = {2.0, 4.0, 6.0, 8.0, 10.0}; // Perfectly correlated with col1
2302
2303 pandas::DataFrame df1(data1, std::make_unique<pandas::Index<std::string>>(index));
2304
2305 // Series with same index and values that correlate with df columns
2306 pandas::Series<numpy::float64> series({1.0, 2.0, 3.0, 4.0, 5.0});
2307 series.set_index(pandas::Index<std::string>(index));
2308
2309 pandas::Series<numpy::float64> result = df1.corrwith(series);
2310
2311 bool passed = true;
2312 // col1 should have correlation 1.0 with series
2313 if (!approx_equal(result[0], 1.0)) {
2314 std::cout << "\n [FAIL] : Expected correlation 1.0 for col1, got " << result[0] << std::endl;
2315 passed = false;
2316 }