Expanding#

class pandas::Expanding#

Window operation class for rolling/expanding calculations.

Example#

#include <pandas/pandas.h>
using namespace pandas;

// Use Expanding
Expanding obj;
// ... operations ...

Constructors#

Signature

Location

Example

Expanding(const Series<T>& series, size_t min_periods = 1)

pd_expanding.h:45

Indexing / Selection#

Signature

Return Type

Location

Example

double get_value(size_t i) const

double

pd_expanding.h:536

View

Statistics#

Signature

Return Type

Location

Example

Series<double> count() const

Series<double>

pd_expanding.h:101

View

Series<double> kurt() const

Series<double>

pd_expanding.h:392

View

Series<double> max() const

Series<double>

pd_expanding.h:145

View

Series<double> mean() const

Series<double>

pd_expanding.h:78

View

Series<double> median() const

Series<double>

pd_expanding.h:225

View

Series<double> min() const

Series<double>

pd_expanding.h:122

View

Series<double> quantile(double q) const

Series<double>

pd_expanding.h:441

View

Series<double> sem(int ddof = 1) const

Series<double>

pd_expanding.h:479

View

Series<double> skew() const

Series<double>

pd_expanding.h:349

View

Series<double> std_(int ddof = 1) const

Series<double>

pd_expanding.h:168

View

Series<double> sum() const

Series<double>

pd_expanding.h:53

View

Series<double> var(int ddof = 1) const

Series<double>

pd_expanding.h:197

View

Aggregation#

Signature

Return Type

Location

Example

Series<double> apply(Func&& func) const

Series<double>

pd_expanding.h:259

View

Sorting#

Signature

Return Type

Location

Example

Series<double> rank() const

Series<double>

pd_expanding.h:503

View

Other Methods#

Signature

Return Type

Location

Example

Series<double> corr(const Series<T>& other) const

Series<double>

pd_expanding.h:325

View

Series<double> cov(const Series<T>& other, int ddof = 1) const

Series<double>

pd_expanding.h:289

View

propagate_source_index(res)

pd_expanding.h:74

void propagate_source_index(Series<double>& result) const

void

pd_expanding.h:547

Code Examples#

The following examples are extracted from the test suite.

get_value (pd_test_2_all.cpp:17379)
17369    namespace dataframe_tests_transform {
17370
17371        // Helper to check if two doubles are approximately equal
17372        bool approx_equal(double a, double b, double tol = 1e-9) {
17373            if (std::isnan(a) && std::isnan(b)) return true;
17374            if (std::isnan(a) || std::isnan(b)) return false;
17375            return std::abs(a - b) < tol;
17376        }
17377
17378        // Helper to get double value from DataFrame
17379        double get_value(const pandas::DataFrame& df, size_t row, size_t col) {
17380            return df.iloc<double>(row, col);
17381        }
17382
17383        void pd_test_transform_callable() {
17384            std::cout << "========= transform with callable =====================";
17385
17386            std::map<std::string, std::vector<double>> data = {
17387                {"A", {1.0, 4.0, 9.0, 16.0}},
17388                {"B", {2.0, 3.0, 4.0, 5.0}}
17389            };
count (pd_test_1_all.cpp:66)
56        if (arr.is_na(0)) {
57            std::cout << "  [FAIL] : in pd_test_boolean_array_na_handling() : is_na(0) should be false" << std::endl;
58            throw std::runtime_error("pd_test_boolean_array_na_handling failed: is_na(0) should be false");
59        }
60
61        if (!arr.has_na()) {
62            std::cout << "  [FAIL] : in pd_test_boolean_array_na_handling() : has_na() should be true" << std::endl;
63            throw std::runtime_error("pd_test_boolean_array_na_handling failed: has_na() should be true");
64        }
65
66        if (arr.count() != 2) {
67            std::cout << "  [FAIL] : in pd_test_boolean_array_na_handling() : count() should be 2" << std::endl;
68            throw std::runtime_error("pd_test_boolean_array_na_handling failed: count() should be 2");
69        }
70
71        std::cout << " -> tests passed" << std::endl;
72    }
73
74    void pd_test_boolean_array_kleene_and() {
75        std::cout << "========= BooleanArray: Kleene AND ======================= ";
kurt (pd_test_1_all.cpp:4599)
4589            std::cout << "========= Series skew/kurt ======================";
4590
4591            pandas::Series<double> s({1.0, 2.0, 2.0, 3.0, 9.0});
4592            auto skew_val = s.skew();
4593            bool passed = skew_val.has_value() && *skew_val > 0;  // Should be right-skewed
4594            if (!passed) {
4595                std::cout << "  [FAIL] : in pd_test_aggregation_series_skew_kurt() : skew should be positive" << std::endl;
4596                throw std::runtime_error("pd_test_aggregation_series_skew_kurt failed: skew should be positive");
4597            }
4598
4599            auto kurt_val = s.kurt();
4600            passed = kurt_val.has_value();
4601            if (!passed) {
4602                std::cout << "  [FAIL] : in pd_test_aggregation_series_skew_kurt() : kurt should have value" << std::endl;
4603                throw std::runtime_error("pd_test_aggregation_series_skew_kurt failed: kurt should have value");
4604            }
4605
4606            // Test kurtosis alias
4607            auto kurt_alias = s.kurtosis();
4608            passed = kurt_alias.has_value() && std::abs(*kurt_alias - *kurt_val) < 0.0001;
4609            if (!passed) {
max (pd_test_1_all.cpp:771)
761        pandas::CategoricalArray arr = pandas::CategoricalArray::from_codes(codes, cats, true);  // ordered
762
763        // Test min
764        std::optional<std::string> min_val = arr.min();
765        if (!min_val.has_value() || *min_val != "low") {
766            std::cout << "  [FAIL] : in pd_test_categorical_array_ordered_operations() : min != 'low'" << std::endl;
767            throw std::runtime_error("pd_test_categorical_array_ordered_operations failed: min != 'low'");
768        }
769
770        // Test max
771        std::optional<std::string> max_val = arr.max();
772        if (!max_val.has_value() || *max_val != "high") {
773            std::cout << "  [FAIL] : in pd_test_categorical_array_ordered_operations() : max != 'high'" << std::endl;
774            throw std::runtime_error("pd_test_categorical_array_ordered_operations failed: max != 'high'");
775        }
776
777        // Test unordered throws for min/max
778        pandas::CategoricalArray unordered = arr.as_unordered();
779        bool threw = false;
780        try {
781            unordered.min();
mean (pd_test_1_all.cpp:282)
272            std::optional<bool>(true),
273            std::optional<bool>(true)
274        });
275
276        auto s = arr.sum();
277        if (!s.has_value() || s.value() != 3) {
278            std::cout << "  [FAIL] : in pd_test_boolean_array_reductions() : sum should be 3" << std::endl;
279            throw std::runtime_error("pd_test_boolean_array_reductions failed: sum");
280        }
281
282        auto m = arr.mean();
283        if (!m.has_value() || std::abs(m.value() - 0.75) > 0.001) {
284            std::cout << "  [FAIL] : in pd_test_boolean_array_reductions() : mean should be 0.75" << std::endl;
285            throw std::runtime_error("pd_test_boolean_array_reductions failed: mean");
286        }
287
288        std::cout << " -> tests passed" << std::endl;
289    }
290
291    void pd_test_boolean_array_dtype() {
292        std::cout << "========= BooleanArray: dtype ======================= ";
median (pd_test_1_all.cpp:20910)
20900                throw std::runtime_error("pd_test_expanding_var failed: expanding var values incorrect");
20901            }
20902
20903            std::cout << " -> tests passed" << std::endl;
20904        }
20905
20906        void pd_test_expanding_median() {
20907            std::cout << "========= Expanding median ======================";
20908
20909            pandas::Series<double> s({1.0, 2.0, 3.0, 4.0, 5.0});
20910            auto result = s.expanding().median();
20911
20912            // Expanding median: 1, 1.5, 2, 2.5, 3
20913            bool passed = std::abs(result[0] - 1.0) < 0.001 &&
20914                          std::abs(result[1] - 1.5) < 0.001 &&
20915                          std::abs(result[2] - 2.0) < 0.001 &&
20916                          std::abs(result[3] - 2.5) < 0.001 &&
20917                          std::abs(result[4] - 3.0) < 0.001;
20918            if (!passed) {
20919                std::cout << "  [FAIL] : in pd_test_expanding_median() : expanding median values incorrect" << std::endl;
20920                throw std::runtime_error("pd_test_expanding_median failed: expanding median values incorrect");
min (pd_test_1_all.cpp:764)
754    }
755
756    void pd_test_categorical_array_ordered_operations() {
757        std::cout << "========= CategoricalArray: ordered operations (min/max) ======================= ";
758
759        std::vector<std::string> cats = {"low", "medium", "high"};
760        std::vector<numpy::int32> codes = {0, 2, 1, 0, -1};  // low, high, medium, low, NA
761        pandas::CategoricalArray arr = pandas::CategoricalArray::from_codes(codes, cats, true);  // ordered
762
763        // Test min
764        std::optional<std::string> min_val = arr.min();
765        if (!min_val.has_value() || *min_val != "low") {
766            std::cout << "  [FAIL] : in pd_test_categorical_array_ordered_operations() : min != 'low'" << std::endl;
767            throw std::runtime_error("pd_test_categorical_array_ordered_operations failed: min != 'low'");
768        }
769
770        // Test max
771        std::optional<std::string> max_val = arr.max();
772        if (!max_val.has_value() || *max_val != "high") {
773            std::cout << "  [FAIL] : in pd_test_categorical_array_ordered_operations() : max != 'high'" << std::endl;
774            throw std::runtime_error("pd_test_categorical_array_ordered_operations failed: max != 'high'");
quantile (pd_test_1_all.cpp:4540)
4530                throw std::runtime_error("pd_test_aggregation_series_sem failed: sem value incorrect");
4531            }
4532
4533            std::cout << " -> tests passed" << std::endl;
4534        }
4535
4536        void pd_test_aggregation_series_quantile() {
4537            std::cout << "========= Series quantile =======================";
4538
4539            pandas::Series<double> s({1.0, 2.0, 3.0, 4.0, 5.0});
4540            auto q50 = s.quantile(0.5);
4541            bool passed = q50.has_value() && std::abs(*q50 - 3.0) < 0.001;
4542            if (!passed) {
4543                std::cout << "  [FAIL] : in pd_test_aggregation_series_quantile() : quantile(0.5) should be 3.0" << std::endl;
4544                throw std::runtime_error("pd_test_aggregation_series_quantile failed: quantile(0.5) should be 3.0");
4545            }
4546
4547            // Test q=0 and q=1
4548            auto q0 = s.quantile(0.0);
4549            passed = q0.has_value() && std::abs(*q0 - 1.0) < 0.001;
4550            if (!passed) {
sem (pd_test_1_all.cpp:4525)
4515#include "../pandas/pd_dataframe.h"
4516#include "../pandas/pd_series.h"
4517
4518namespace dataframe_tests {
4519    namespace dataframe_tests_aggregation {
4520
4521        void pd_test_aggregation_series_sem() {
4522            std::cout << "========= Series sem ============================";
4523
4524            pandas::Series<double> s({1.0, 2.0, 3.0, 4.0, 5.0});
4525            auto sem_val = s.sem();
4526            // std(ddof=1) = sqrt(2.5), sem = sqrt(2.5)/sqrt(5) ≈ 0.707
4527            bool passed = sem_val.has_value() && std::abs(*sem_val - 0.707) < 0.01;
4528            if (!passed) {
4529                std::cout << "  [FAIL] : in pd_test_aggregation_series_sem() : sem value incorrect" << std::endl;
4530                throw std::runtime_error("pd_test_aggregation_series_sem failed: sem value incorrect");
4531            }
4532
4533            std::cout << " -> tests passed" << std::endl;
4534        }
skew (pd_test_1_all.cpp:4592)
4582                throw std::runtime_error("pd_test_aggregation_series_mode failed: multi-mode should return 2 values");
4583            }
4584
4585            std::cout << " -> tests passed" << std::endl;
4586        }
4587
4588        void pd_test_aggregation_series_skew_kurt() {
4589            std::cout << "========= Series skew/kurt ======================";
4590
4591            pandas::Series<double> s({1.0, 2.0, 2.0, 3.0, 9.0});
4592            auto skew_val = s.skew();
4593            bool passed = skew_val.has_value() && *skew_val > 0;  // Should be right-skewed
4594            if (!passed) {
4595                std::cout << "  [FAIL] : in pd_test_aggregation_series_skew_kurt() : skew should be positive" << std::endl;
4596                throw std::runtime_error("pd_test_aggregation_series_skew_kurt failed: skew should be positive");
4597            }
4598
4599            auto kurt_val = s.kurt();
4600            passed = kurt_val.has_value();
4601            if (!passed) {
4602                std::cout << "  [FAIL] : in pd_test_aggregation_series_skew_kurt() : kurt should have value" << std::endl;
std_ (pd_test_1_all.cpp:20752)
20742                throw std::runtime_error("pd_test_rolling_min_periods failed: with min_periods=1, idx 1 should be 3.0");
20743            }
20744
20745            std::cout << " -> tests passed" << std::endl;
20746        }
20747
20748        void pd_test_rolling_std() {
20749            std::cout << "========= Rolling std ===========================";
20750
20751            pandas::Series<double> s({1.0, 2.0, 3.0, 4.0, 5.0});
20752            auto result = s.rolling(3).std_();
20753
20754            // std([1,2,3]) = 1.0 (ddof=1)
20755            // std([2,3,4]) = 1.0
20756            // std([3,4,5]) = 1.0
20757            bool passed = std::abs(result[2] - 1.0) < 0.001;
20758            if (!passed) {
20759                std::cout << "  [FAIL] : in pd_test_rolling_std() : rolling std should be 1.0" << std::endl;
20760                throw std::runtime_error("pd_test_rolling_std failed: rolling std should be 1.0");
20761            }
sum (pd_test_1_all.cpp:276)
266        }
267
268        // Test sum/mean
269        pandas::BooleanArray arr({
270            std::optional<bool>(true),
271            std::optional<bool>(false),
272            std::optional<bool>(true),
273            std::optional<bool>(true)
274        });
275
276        auto s = arr.sum();
277        if (!s.has_value() || s.value() != 3) {
278            std::cout << "  [FAIL] : in pd_test_boolean_array_reductions() : sum should be 3" << std::endl;
279            throw std::runtime_error("pd_test_boolean_array_reductions failed: sum");
280        }
281
282        auto m = arr.mean();
283        if (!m.has_value() || std::abs(m.value() - 0.75) > 0.001) {
284            std::cout << "  [FAIL] : in pd_test_boolean_array_reductions() : mean should be 0.75" << std::endl;
285            throw std::runtime_error("pd_test_boolean_array_reductions failed: mean");
286        }
var (pd_test_1_all.cpp:20890)
20880                throw std::runtime_error("pd_test_expanding_std failed: expanding std values incorrect");
20881            }
20882
20883            std::cout << " -> tests passed" << std::endl;
20884        }
20885
20886        void pd_test_expanding_var() {
20887            std::cout << "========= Expanding var =========================";
20888
20889            pandas::Series<double> s({1.0, 2.0, 3.0, 4.0, 5.0});
20890            auto result = s.expanding().var();
20891
20892            // Expanding var (ddof=1): NaN, 0.5, 1.0, 1.6667, 2.5
20893            bool passed = std::isnan(result[0]) &&
20894                          std::abs(result[1] - 0.5) < 0.001 &&
20895                          std::abs(result[2] - 1.0) < 0.001 &&
20896                          std::abs(result[3] - 1.6667) < 0.001 &&
20897                          std::abs(result[4] - 2.5) < 0.001;
20898            if (!passed) {
20899                std::cout << "  [FAIL] : in pd_test_expanding_var() : expanding var values incorrect" << std::endl;
20900                throw std::runtime_error("pd_test_expanding_var failed: expanding var values incorrect");
apply (pd_test_1_all.cpp:11244)
11234        void pd_test_func_apply_dataframe_apply_axis0() {
11235            std::cout << "========= DataFrame apply axis=0 ======================";
11236
11237            std::map<std::string, std::vector<double>> data = {
11238                {"A", {1.0, 2.0, 3.0}},
11239                {"B", {4.0, 5.0, 6.0}}
11240            };
11241            pandas::DataFrame df(data);
11242
11243            // apply axis=0 applies function to each column
11244            auto result = df.apply([](const std::vector<double>& col) {
11245                return std::accumulate(col.begin(), col.end(), 0.0);
11246            }, 0);
11247
11248            bool passed = true;
11249
11250            // Plan F·dtype: axis=0 reduce now returns a single "result" column
11251            // with the original column names ("A", "B") as the row index.
11252            // Sum of A: 1+2+3=6, Sum of B: 4+5+6=15
11253            const auto& result_col = result["result"];
11254            double sum_a = std::stod(result_col.get_value_str(0));
rank (pd_test_1_all.cpp:6451)
6441        // =====================================================================
6442        // Test: Rank
6443        // =====================================================================
6444        void pd_test_dataframe_rank() {
6445            std::cout << "========= rank =============================";
6446
6447            // Test Series rank with default method (average)
6448            {
6449                std::vector<double> data = {3.0, 1.0, 4.0, 1.0, 5.0};
6450                pandas::Series<double> s(data, "test");
6451                auto ranked = s.rank();
6452
6453                // Values: 3, 1, 4, 1, 5 -> Sorted: 1, 1, 3, 4, 5
6454                // Ranks (average): 1.5, 1.5, 3, 4, 5
6455                // Original positions: 3->3, 1->1.5, 4->4, 1->1.5, 5->5
6456                double r0 = std::stod(ranked.get_value_str(0));  // 3.0 -> rank 3
6457                double r1 = std::stod(ranked.get_value_str(1));  // 1.0 -> rank 1.5
6458
6459                if (std::abs(r0 - 3.0) > 1e-10) {
6460                    std::cout << "  [FAIL] : in pd_test_dataframe_rank() : value 3.0 should have rank 3, got " << r0 << std::endl;
6461                    throw std::runtime_error("pd_test_dataframe_rank failed: value 3.0 rank");
corr (pd_test_1_all.cpp:4655)
4645        }
4646
4647        void pd_test_aggregation_dataframe_corr() {
4648            std::cout << "========= DataFrame corr ========================";
4649
4650            std::map<std::string, std::vector<double>> data;
4651            data["A"] = {1.0, 2.0, 3.0, 4.0, 5.0};
4652            data["B"] = {2.0, 4.0, 6.0, 8.0, 10.0};  // Perfect correlation
4653            pandas::DataFrame df(data);
4654
4655            auto corr_df = df.corr();
4656
4657            // Check dimensions
4658            bool passed = corr_df.nrows() == 2 && corr_df.ncols() == 2;
4659            if (!passed) {
4660                std::cout << "  [FAIL] : in pd_test_aggregation_dataframe_corr() : corr should be 2x2" << std::endl;
4661                throw std::runtime_error("pd_test_aggregation_dataframe_corr failed: corr should be 2x2");
4662            }
4663
4664            // Diagonal should be 1.0
4665            std::string aa = corr_df["A"].get_value_str(0);
cov (pd_test_1_all.cpp:4690)
4680            std::cout << " -> tests passed" << std::endl;
4681        }
4682
4683        void pd_test_aggregation_dataframe_cov() {
4684            std::cout << "========= DataFrame cov =========================";
4685
4686            std::map<std::string, std::vector<double>> data;
4687            data["A"] = {1.0, 2.0, 3.0};
4688            pandas::DataFrame df(data);
4689
4690            auto cov_df = df.cov();
4691
4692            // Check dimensions
4693            bool passed = cov_df.nrows() == 1 && cov_df.ncols() == 1;
4694            if (!passed) {
4695                std::cout << "  [FAIL] : in pd_test_aggregation_dataframe_cov() : cov should be 1x1" << std::endl;
4696                throw std::runtime_error("pd_test_aggregation_dataframe_cov failed: cov should be 1x1");
4697            }
4698
4699            // Var(A) = 1.0 with ddof=1
4700            std::string aa = cov_df["A"].get_value_str(0);