Expanding#
-
class pandas::Expanding#
Window operation class for rolling/expanding calculations.
Example#
#include <pandas/pandas.h>
using namespace pandas;
// Use Expanding
Expanding obj;
// ... operations ...
Constructors#
Signature |
Location |
Example |
|---|---|---|
|
pd_expanding.h:45 |
Indexing / Selection#
Signature |
Return Type |
Location |
Example |
|---|---|---|---|
|
double |
pd_expanding.h:536 |
Statistics#
Signature |
Return Type |
Location |
Example |
|---|---|---|---|
|
Series<double> |
pd_expanding.h:101 |
|
|
Series<double> |
pd_expanding.h:392 |
|
|
Series<double> |
pd_expanding.h:145 |
|
|
Series<double> |
pd_expanding.h:78 |
|
|
Series<double> |
pd_expanding.h:225 |
|
|
Series<double> |
pd_expanding.h:122 |
|
|
Series<double> |
pd_expanding.h:441 |
|
|
Series<double> |
pd_expanding.h:479 |
|
|
Series<double> |
pd_expanding.h:349 |
|
|
Series<double> |
pd_expanding.h:168 |
|
|
Series<double> |
pd_expanding.h:53 |
|
|
Series<double> |
pd_expanding.h:197 |
Aggregation#
Signature |
Return Type |
Location |
Example |
|---|---|---|---|
|
Series<double> |
pd_expanding.h:259 |
Sorting#
Signature |
Return Type |
Location |
Example |
|---|---|---|---|
|
Series<double> |
pd_expanding.h:503 |
Other Methods#
Signature |
Return Type |
Location |
Example |
|---|---|---|---|
|
Series<double> |
pd_expanding.h:325 |
|
|
Series<double> |
pd_expanding.h:289 |
|
|
pd_expanding.h:74 |
||
|
void |
pd_expanding.h:547 |
Code Examples#
The following examples are extracted from the test suite.
get_value (pd_test_2_all.cpp:17379)
17369 namespace dataframe_tests_transform {
17370
17371 // Helper to check if two doubles are approximately equal
17372 bool approx_equal(double a, double b, double tol = 1e-9) {
17373 if (std::isnan(a) && std::isnan(b)) return true;
17374 if (std::isnan(a) || std::isnan(b)) return false;
17375 return std::abs(a - b) < tol;
17376 }
17377
17378 // Helper to get double value from DataFrame
17379 double get_value(const pandas::DataFrame& df, size_t row, size_t col) {
17380 return df.iloc<double>(row, col);
17381 }
17382
17383 void pd_test_transform_callable() {
17384 std::cout << "========= transform with callable =====================";
17385
17386 std::map<std::string, std::vector<double>> data = {
17387 {"A", {1.0, 4.0, 9.0, 16.0}},
17388 {"B", {2.0, 3.0, 4.0, 5.0}}
17389 };
count (pd_test_1_all.cpp:66)
56 if (arr.is_na(0)) {
57 std::cout << " [FAIL] : in pd_test_boolean_array_na_handling() : is_na(0) should be false" << std::endl;
58 throw std::runtime_error("pd_test_boolean_array_na_handling failed: is_na(0) should be false");
59 }
60
61 if (!arr.has_na()) {
62 std::cout << " [FAIL] : in pd_test_boolean_array_na_handling() : has_na() should be true" << std::endl;
63 throw std::runtime_error("pd_test_boolean_array_na_handling failed: has_na() should be true");
64 }
65
66 if (arr.count() != 2) {
67 std::cout << " [FAIL] : in pd_test_boolean_array_na_handling() : count() should be 2" << std::endl;
68 throw std::runtime_error("pd_test_boolean_array_na_handling failed: count() should be 2");
69 }
70
71 std::cout << " -> tests passed" << std::endl;
72 }
73
74 void pd_test_boolean_array_kleene_and() {
75 std::cout << "========= BooleanArray: Kleene AND ======================= ";
kurt (pd_test_1_all.cpp:4599)
4589 std::cout << "========= Series skew/kurt ======================";
4590
4591 pandas::Series<double> s({1.0, 2.0, 2.0, 3.0, 9.0});
4592 auto skew_val = s.skew();
4593 bool passed = skew_val.has_value() && *skew_val > 0; // Should be right-skewed
4594 if (!passed) {
4595 std::cout << " [FAIL] : in pd_test_aggregation_series_skew_kurt() : skew should be positive" << std::endl;
4596 throw std::runtime_error("pd_test_aggregation_series_skew_kurt failed: skew should be positive");
4597 }
4598
4599 auto kurt_val = s.kurt();
4600 passed = kurt_val.has_value();
4601 if (!passed) {
4602 std::cout << " [FAIL] : in pd_test_aggregation_series_skew_kurt() : kurt should have value" << std::endl;
4603 throw std::runtime_error("pd_test_aggregation_series_skew_kurt failed: kurt should have value");
4604 }
4605
4606 // Test kurtosis alias
4607 auto kurt_alias = s.kurtosis();
4608 passed = kurt_alias.has_value() && std::abs(*kurt_alias - *kurt_val) < 0.0001;
4609 if (!passed) {
max (pd_test_1_all.cpp:771)
761 pandas::CategoricalArray arr = pandas::CategoricalArray::from_codes(codes, cats, true); // ordered
762
763 // Test min
764 std::optional<std::string> min_val = arr.min();
765 if (!min_val.has_value() || *min_val != "low") {
766 std::cout << " [FAIL] : in pd_test_categorical_array_ordered_operations() : min != 'low'" << std::endl;
767 throw std::runtime_error("pd_test_categorical_array_ordered_operations failed: min != 'low'");
768 }
769
770 // Test max
771 std::optional<std::string> max_val = arr.max();
772 if (!max_val.has_value() || *max_val != "high") {
773 std::cout << " [FAIL] : in pd_test_categorical_array_ordered_operations() : max != 'high'" << std::endl;
774 throw std::runtime_error("pd_test_categorical_array_ordered_operations failed: max != 'high'");
775 }
776
777 // Test unordered throws for min/max
778 pandas::CategoricalArray unordered = arr.as_unordered();
779 bool threw = false;
780 try {
781 unordered.min();
mean (pd_test_1_all.cpp:282)
272 std::optional<bool>(true),
273 std::optional<bool>(true)
274 });
275
276 auto s = arr.sum();
277 if (!s.has_value() || s.value() != 3) {
278 std::cout << " [FAIL] : in pd_test_boolean_array_reductions() : sum should be 3" << std::endl;
279 throw std::runtime_error("pd_test_boolean_array_reductions failed: sum");
280 }
281
282 auto m = arr.mean();
283 if (!m.has_value() || std::abs(m.value() - 0.75) > 0.001) {
284 std::cout << " [FAIL] : in pd_test_boolean_array_reductions() : mean should be 0.75" << std::endl;
285 throw std::runtime_error("pd_test_boolean_array_reductions failed: mean");
286 }
287
288 std::cout << " -> tests passed" << std::endl;
289 }
290
291 void pd_test_boolean_array_dtype() {
292 std::cout << "========= BooleanArray: dtype ======================= ";
median (pd_test_1_all.cpp:20910)
20900 throw std::runtime_error("pd_test_expanding_var failed: expanding var values incorrect");
20901 }
20902
20903 std::cout << " -> tests passed" << std::endl;
20904 }
20905
20906 void pd_test_expanding_median() {
20907 std::cout << "========= Expanding median ======================";
20908
20909 pandas::Series<double> s({1.0, 2.0, 3.0, 4.0, 5.0});
20910 auto result = s.expanding().median();
20911
20912 // Expanding median: 1, 1.5, 2, 2.5, 3
20913 bool passed = std::abs(result[0] - 1.0) < 0.001 &&
20914 std::abs(result[1] - 1.5) < 0.001 &&
20915 std::abs(result[2] - 2.0) < 0.001 &&
20916 std::abs(result[3] - 2.5) < 0.001 &&
20917 std::abs(result[4] - 3.0) < 0.001;
20918 if (!passed) {
20919 std::cout << " [FAIL] : in pd_test_expanding_median() : expanding median values incorrect" << std::endl;
20920 throw std::runtime_error("pd_test_expanding_median failed: expanding median values incorrect");
min (pd_test_1_all.cpp:764)
754 }
755
756 void pd_test_categorical_array_ordered_operations() {
757 std::cout << "========= CategoricalArray: ordered operations (min/max) ======================= ";
758
759 std::vector<std::string> cats = {"low", "medium", "high"};
760 std::vector<numpy::int32> codes = {0, 2, 1, 0, -1}; // low, high, medium, low, NA
761 pandas::CategoricalArray arr = pandas::CategoricalArray::from_codes(codes, cats, true); // ordered
762
763 // Test min
764 std::optional<std::string> min_val = arr.min();
765 if (!min_val.has_value() || *min_val != "low") {
766 std::cout << " [FAIL] : in pd_test_categorical_array_ordered_operations() : min != 'low'" << std::endl;
767 throw std::runtime_error("pd_test_categorical_array_ordered_operations failed: min != 'low'");
768 }
769
770 // Test max
771 std::optional<std::string> max_val = arr.max();
772 if (!max_val.has_value() || *max_val != "high") {
773 std::cout << " [FAIL] : in pd_test_categorical_array_ordered_operations() : max != 'high'" << std::endl;
774 throw std::runtime_error("pd_test_categorical_array_ordered_operations failed: max != 'high'");
quantile (pd_test_1_all.cpp:4540)
4530 throw std::runtime_error("pd_test_aggregation_series_sem failed: sem value incorrect");
4531 }
4532
4533 std::cout << " -> tests passed" << std::endl;
4534 }
4535
4536 void pd_test_aggregation_series_quantile() {
4537 std::cout << "========= Series quantile =======================";
4538
4539 pandas::Series<double> s({1.0, 2.0, 3.0, 4.0, 5.0});
4540 auto q50 = s.quantile(0.5);
4541 bool passed = q50.has_value() && std::abs(*q50 - 3.0) < 0.001;
4542 if (!passed) {
4543 std::cout << " [FAIL] : in pd_test_aggregation_series_quantile() : quantile(0.5) should be 3.0" << std::endl;
4544 throw std::runtime_error("pd_test_aggregation_series_quantile failed: quantile(0.5) should be 3.0");
4545 }
4546
4547 // Test q=0 and q=1
4548 auto q0 = s.quantile(0.0);
4549 passed = q0.has_value() && std::abs(*q0 - 1.0) < 0.001;
4550 if (!passed) {
sem (pd_test_1_all.cpp:4525)
4515#include "../pandas/pd_dataframe.h"
4516#include "../pandas/pd_series.h"
4517
4518namespace dataframe_tests {
4519 namespace dataframe_tests_aggregation {
4520
4521 void pd_test_aggregation_series_sem() {
4522 std::cout << "========= Series sem ============================";
4523
4524 pandas::Series<double> s({1.0, 2.0, 3.0, 4.0, 5.0});
4525 auto sem_val = s.sem();
4526 // std(ddof=1) = sqrt(2.5), sem = sqrt(2.5)/sqrt(5) ≈ 0.707
4527 bool passed = sem_val.has_value() && std::abs(*sem_val - 0.707) < 0.01;
4528 if (!passed) {
4529 std::cout << " [FAIL] : in pd_test_aggregation_series_sem() : sem value incorrect" << std::endl;
4530 throw std::runtime_error("pd_test_aggregation_series_sem failed: sem value incorrect");
4531 }
4532
4533 std::cout << " -> tests passed" << std::endl;
4534 }
skew (pd_test_1_all.cpp:4592)
4582 throw std::runtime_error("pd_test_aggregation_series_mode failed: multi-mode should return 2 values");
4583 }
4584
4585 std::cout << " -> tests passed" << std::endl;
4586 }
4587
4588 void pd_test_aggregation_series_skew_kurt() {
4589 std::cout << "========= Series skew/kurt ======================";
4590
4591 pandas::Series<double> s({1.0, 2.0, 2.0, 3.0, 9.0});
4592 auto skew_val = s.skew();
4593 bool passed = skew_val.has_value() && *skew_val > 0; // Should be right-skewed
4594 if (!passed) {
4595 std::cout << " [FAIL] : in pd_test_aggregation_series_skew_kurt() : skew should be positive" << std::endl;
4596 throw std::runtime_error("pd_test_aggregation_series_skew_kurt failed: skew should be positive");
4597 }
4598
4599 auto kurt_val = s.kurt();
4600 passed = kurt_val.has_value();
4601 if (!passed) {
4602 std::cout << " [FAIL] : in pd_test_aggregation_series_skew_kurt() : kurt should have value" << std::endl;
std_ (pd_test_1_all.cpp:20752)
20742 throw std::runtime_error("pd_test_rolling_min_periods failed: with min_periods=1, idx 1 should be 3.0");
20743 }
20744
20745 std::cout << " -> tests passed" << std::endl;
20746 }
20747
20748 void pd_test_rolling_std() {
20749 std::cout << "========= Rolling std ===========================";
20750
20751 pandas::Series<double> s({1.0, 2.0, 3.0, 4.0, 5.0});
20752 auto result = s.rolling(3).std_();
20753
20754 // std([1,2,3]) = 1.0 (ddof=1)
20755 // std([2,3,4]) = 1.0
20756 // std([3,4,5]) = 1.0
20757 bool passed = std::abs(result[2] - 1.0) < 0.001;
20758 if (!passed) {
20759 std::cout << " [FAIL] : in pd_test_rolling_std() : rolling std should be 1.0" << std::endl;
20760 throw std::runtime_error("pd_test_rolling_std failed: rolling std should be 1.0");
20761 }
sum (pd_test_1_all.cpp:276)
266 }
267
268 // Test sum/mean
269 pandas::BooleanArray arr({
270 std::optional<bool>(true),
271 std::optional<bool>(false),
272 std::optional<bool>(true),
273 std::optional<bool>(true)
274 });
275
276 auto s = arr.sum();
277 if (!s.has_value() || s.value() != 3) {
278 std::cout << " [FAIL] : in pd_test_boolean_array_reductions() : sum should be 3" << std::endl;
279 throw std::runtime_error("pd_test_boolean_array_reductions failed: sum");
280 }
281
282 auto m = arr.mean();
283 if (!m.has_value() || std::abs(m.value() - 0.75) > 0.001) {
284 std::cout << " [FAIL] : in pd_test_boolean_array_reductions() : mean should be 0.75" << std::endl;
285 throw std::runtime_error("pd_test_boolean_array_reductions failed: mean");
286 }
var (pd_test_1_all.cpp:20890)
20880 throw std::runtime_error("pd_test_expanding_std failed: expanding std values incorrect");
20881 }
20882
20883 std::cout << " -> tests passed" << std::endl;
20884 }
20885
20886 void pd_test_expanding_var() {
20887 std::cout << "========= Expanding var =========================";
20888
20889 pandas::Series<double> s({1.0, 2.0, 3.0, 4.0, 5.0});
20890 auto result = s.expanding().var();
20891
20892 // Expanding var (ddof=1): NaN, 0.5, 1.0, 1.6667, 2.5
20893 bool passed = std::isnan(result[0]) &&
20894 std::abs(result[1] - 0.5) < 0.001 &&
20895 std::abs(result[2] - 1.0) < 0.001 &&
20896 std::abs(result[3] - 1.6667) < 0.001 &&
20897 std::abs(result[4] - 2.5) < 0.001;
20898 if (!passed) {
20899 std::cout << " [FAIL] : in pd_test_expanding_var() : expanding var values incorrect" << std::endl;
20900 throw std::runtime_error("pd_test_expanding_var failed: expanding var values incorrect");
apply (pd_test_1_all.cpp:11244)
11234 void pd_test_func_apply_dataframe_apply_axis0() {
11235 std::cout << "========= DataFrame apply axis=0 ======================";
11236
11237 std::map<std::string, std::vector<double>> data = {
11238 {"A", {1.0, 2.0, 3.0}},
11239 {"B", {4.0, 5.0, 6.0}}
11240 };
11241 pandas::DataFrame df(data);
11242
11243 // apply axis=0 applies function to each column
11244 auto result = df.apply([](const std::vector<double>& col) {
11245 return std::accumulate(col.begin(), col.end(), 0.0);
11246 }, 0);
11247
11248 bool passed = true;
11249
11250 // Plan F·dtype: axis=0 reduce now returns a single "result" column
11251 // with the original column names ("A", "B") as the row index.
11252 // Sum of A: 1+2+3=6, Sum of B: 4+5+6=15
11253 const auto& result_col = result["result"];
11254 double sum_a = std::stod(result_col.get_value_str(0));
rank (pd_test_1_all.cpp:6451)
6441 // =====================================================================
6442 // Test: Rank
6443 // =====================================================================
6444 void pd_test_dataframe_rank() {
6445 std::cout << "========= rank =============================";
6446
6447 // Test Series rank with default method (average)
6448 {
6449 std::vector<double> data = {3.0, 1.0, 4.0, 1.0, 5.0};
6450 pandas::Series<double> s(data, "test");
6451 auto ranked = s.rank();
6452
6453 // Values: 3, 1, 4, 1, 5 -> Sorted: 1, 1, 3, 4, 5
6454 // Ranks (average): 1.5, 1.5, 3, 4, 5
6455 // Original positions: 3->3, 1->1.5, 4->4, 1->1.5, 5->5
6456 double r0 = std::stod(ranked.get_value_str(0)); // 3.0 -> rank 3
6457 double r1 = std::stod(ranked.get_value_str(1)); // 1.0 -> rank 1.5
6458
6459 if (std::abs(r0 - 3.0) > 1e-10) {
6460 std::cout << " [FAIL] : in pd_test_dataframe_rank() : value 3.0 should have rank 3, got " << r0 << std::endl;
6461 throw std::runtime_error("pd_test_dataframe_rank failed: value 3.0 rank");
corr (pd_test_1_all.cpp:4655)
4645 }
4646
4647 void pd_test_aggregation_dataframe_corr() {
4648 std::cout << "========= DataFrame corr ========================";
4649
4650 std::map<std::string, std::vector<double>> data;
4651 data["A"] = {1.0, 2.0, 3.0, 4.0, 5.0};
4652 data["B"] = {2.0, 4.0, 6.0, 8.0, 10.0}; // Perfect correlation
4653 pandas::DataFrame df(data);
4654
4655 auto corr_df = df.corr();
4656
4657 // Check dimensions
4658 bool passed = corr_df.nrows() == 2 && corr_df.ncols() == 2;
4659 if (!passed) {
4660 std::cout << " [FAIL] : in pd_test_aggregation_dataframe_corr() : corr should be 2x2" << std::endl;
4661 throw std::runtime_error("pd_test_aggregation_dataframe_corr failed: corr should be 2x2");
4662 }
4663
4664 // Diagonal should be 1.0
4665 std::string aa = corr_df["A"].get_value_str(0);
cov (pd_test_1_all.cpp:4690)
4680 std::cout << " -> tests passed" << std::endl;
4681 }
4682
4683 void pd_test_aggregation_dataframe_cov() {
4684 std::cout << "========= DataFrame cov =========================";
4685
4686 std::map<std::string, std::vector<double>> data;
4687 data["A"] = {1.0, 2.0, 3.0};
4688 pandas::DataFrame df(data);
4689
4690 auto cov_df = df.cov();
4691
4692 // Check dimensions
4693 bool passed = cov_df.nrows() == 1 && cov_df.ncols() == 1;
4694 if (!passed) {
4695 std::cout << " [FAIL] : in pd_test_aggregation_dataframe_cov() : cov should be 1x1" << std::endl;
4696 throw std::runtime_error("pd_test_aggregation_dataframe_cov failed: cov should be 1x1");
4697 }
4698
4699 // Var(A) = 1.0 with ddof=1
4700 std::string aa = cov_df["A"].get_value_str(0);