DataFrameResampler#
-
class pandas::DataFrameResampler#
Window operation class for rolling/expanding calculations.
Example#
#include <pandas/pandas.h>
using namespace pandas;
// Use DataFrameResampler
DataFrameResampler obj;
// ... operations ...
Constructors#
Signature |
Location |
Example |
|---|---|---|
|
pd_resampler.h:308 |
Indexing / Selection#
Signature |
Return Type |
Location |
Example |
|---|---|---|---|
|
DataFrame |
pd_resampler.h:361 |
|
|
std::vector<std::string> |
pd_resampler.h:424 |
|
|
int64_t |
pd_resampler.h:422 |
|
|
DataFrame |
pd_resampler.h:364 |
Missing Data#
Statistics#
Signature |
Return Type |
Location |
Example |
|---|---|---|---|
|
DataFrame |
pd_resampler.h:335 |
|
|
DataFrame |
pd_resampler.h:332 |
|
|
DataFrame |
pd_resampler.h:330 |
|
|
DataFrame |
pd_resampler.h:336 |
|
|
DataFrame |
pd_resampler.h:331 |
|
|
DataFrame |
pd_resampler.h:333 |
|
|
DataFrame |
pd_resampler.h:329 |
|
|
DataFrame |
pd_resampler.h:334 |
Aggregation#
Signature |
Return Type |
Location |
Example |
|---|---|---|---|
|
DataFrame |
pd_resampler.h:342 |
|
|
DataFrame |
pd_resampler.h:348 |
|
|
DataFrame |
pd_resampler.h:354 |
|
|
std::vector<double> |
pd_resampler.h:427 |
|
|
DataFrame |
pd_resampler.h:384 |
Other Methods#
Signature |
Return Type |
Location |
Example |
|---|---|---|---|
|
void |
pd_resampler.h:421 |
|
|
const std::string& |
pd_resampler.h:406 |
|
|
static double |
pd_resampler.h:430 |
|
|
const DataFrame& |
pd_resampler.h:403 |
|
|
const std::string& |
pd_resampler.h:400 |
|
|
const std::vector<int64_t>& |
pd_resampler.h:415 |
|
|
const std::unordered_map<int64_t, std::vector<size_t>>& |
pd_resampler.h:412 |
|
|
const std::string& |
pd_resampler.h:409 |
|
|
size_t |
pd_resampler.h:397 |
|
|
DataFrame |
pd_resampler.h:370 |
|
|
int64_t |
pd_resampler.h:423 |
|
|
const std::vector<int64_t>& |
pd_resampler.h:418 |
|
|
DataFrame |
pd_resampler.h:394 |
Code Examples#
The following examples are extracted from the test suite.
first (pd_test_1_all.cpp:11616)
11606 void pd_test_groupby_first_last() {
11607 std::cout << "========= GroupBy first/last ====================";
11608
11609 std::map<std::string, std::vector<double>> data = {
11610 {"category", {1.0, 1.0, 2.0, 2.0}},
11611 {"value", {10.0, 20.0, 30.0, 40.0}}
11612 };
11613 pandas::DataFrame df(data);
11614
11615 auto first_result = df.groupby("category").first();
11616 auto last_result = df.groupby("category").last();
11617
11618 // First for group 1: 10, group 2: 30
11619 // Last for group 1: 20, group 2: 40
11620 double first1 = std::stod(first_result["value"].get_value_str(0));
11621 double first2 = std::stod(first_result["value"].get_value_str(1));
11622
11623 bool passed = ((std::abs(first1 - 10.0) < 0.001 && std::abs(first2 - 30.0) < 0.001) ||
11624 (std::abs(first1 - 30.0) < 0.001 && std::abs(first2 - 10.0) < 0.001));
11625 if (!passed) {
last (pd_test_1_all.cpp:11617)
11607 void pd_test_groupby_first_last() {
11608 std::cout << "========= GroupBy first/last ====================";
11609
11610 std::map<std::string, std::vector<double>> data = {
11611 {"category", {1.0, 1.0, 2.0, 2.0}},
11612 {"value", {10.0, 20.0, 30.0, 40.0}}
11613 };
11614 pandas::DataFrame df(data);
11615
11616 auto first_result = df.groupby("category").first();
11617 auto last_result = df.groupby("category").last();
11618
11619 // First for group 1: 10, group 2: 30
11620 // Last for group 1: 20, group 2: 40
11621 double first1 = std::stod(first_result["value"].get_value_str(0));
11622 double first2 = std::stod(first_result["value"].get_value_str(1));
11623
11624 bool passed = ((std::abs(first1 - 10.0) < 0.001 && std::abs(first2 - 30.0) < 0.001) ||
11625 (std::abs(first1 - 30.0) < 0.001 && std::abs(first2 - 10.0) < 0.001));
11626 if (!passed) {
11627 std::cout << " [FAIL] : in pd_test_groupby_first_last() : first values incorrect" << std::endl;
bfill (pd_test_1_all.cpp:23603)
23593 std::cout << "====================================== [OK] pd_test_equals test suite ========================== " << std::endl;
23594 return 0;
23595 }
23596
23597} // namespace dataframe_tests
23598// ------------------- pd_test_equals.cpp (end) -----------------------------
23599
23600// ------------------- pd_test_ffill_bfill.cpp (start) -----------------------------
23601// dataframe_tests/pd_test_ffill_bfill.cpp
23602// Test file for DataFrame.ffill() and DataFrame.bfill() methods
23603
23604#include <iostream>
23605#include <stdexcept>
23606#include <cmath>
23607#include <limits>
23608#include <map>
23609#include "../pandas/pd_dataframe.h"
23610
23611// CRITICAL: No using namespace directives
ffill (pd_test_1_all.cpp:23603)
23593 std::cout << "====================================== [OK] pd_test_equals test suite ========================== " << std::endl;
23594 return 0;
23595 }
23596
23597} // namespace dataframe_tests
23598// ------------------- pd_test_equals.cpp (end) -----------------------------
23599
23600// ------------------- pd_test_ffill_bfill.cpp (start) -----------------------------
23601// dataframe_tests/pd_test_ffill_bfill.cpp
23602// Test file for DataFrame.ffill() and DataFrame.bfill() methods
23603
23604#include <iostream>
23605#include <stdexcept>
23606#include <cmath>
23607#include <limits>
23608#include <map>
23609#include "../pandas/pd_dataframe.h"
23610
23611// CRITICAL: No using namespace directives
count (pd_test_1_all.cpp:66)
56 if (arr.is_na(0)) {
57 std::cout << " [FAIL] : in pd_test_boolean_array_na_handling() : is_na(0) should be false" << std::endl;
58 throw std::runtime_error("pd_test_boolean_array_na_handling failed: is_na(0) should be false");
59 }
60
61 if (!arr.has_na()) {
62 std::cout << " [FAIL] : in pd_test_boolean_array_na_handling() : has_na() should be true" << std::endl;
63 throw std::runtime_error("pd_test_boolean_array_na_handling failed: has_na() should be true");
64 }
65
66 if (arr.count() != 2) {
67 std::cout << " [FAIL] : in pd_test_boolean_array_na_handling() : count() should be 2" << std::endl;
68 throw std::runtime_error("pd_test_boolean_array_na_handling failed: count() should be 2");
69 }
70
71 std::cout << " -> tests passed" << std::endl;
72 }
73
74 void pd_test_boolean_array_kleene_and() {
75 std::cout << "========= BooleanArray: Kleene AND ======================= ";
max (pd_test_1_all.cpp:771)
761 pandas::CategoricalArray arr = pandas::CategoricalArray::from_codes(codes, cats, true); // ordered
762
763 // Test min
764 std::optional<std::string> min_val = arr.min();
765 if (!min_val.has_value() || *min_val != "low") {
766 std::cout << " [FAIL] : in pd_test_categorical_array_ordered_operations() : min != 'low'" << std::endl;
767 throw std::runtime_error("pd_test_categorical_array_ordered_operations failed: min != 'low'");
768 }
769
770 // Test max
771 std::optional<std::string> max_val = arr.max();
772 if (!max_val.has_value() || *max_val != "high") {
773 std::cout << " [FAIL] : in pd_test_categorical_array_ordered_operations() : max != 'high'" << std::endl;
774 throw std::runtime_error("pd_test_categorical_array_ordered_operations failed: max != 'high'");
775 }
776
777 // Test unordered throws for min/max
778 pandas::CategoricalArray unordered = arr.as_unordered();
779 bool threw = false;
780 try {
781 unordered.min();
mean (pd_test_1_all.cpp:282)
272 std::optional<bool>(true),
273 std::optional<bool>(true)
274 });
275
276 auto s = arr.sum();
277 if (!s.has_value() || s.value() != 3) {
278 std::cout << " [FAIL] : in pd_test_boolean_array_reductions() : sum should be 3" << std::endl;
279 throw std::runtime_error("pd_test_boolean_array_reductions failed: sum");
280 }
281
282 auto m = arr.mean();
283 if (!m.has_value() || std::abs(m.value() - 0.75) > 0.001) {
284 std::cout << " [FAIL] : in pd_test_boolean_array_reductions() : mean should be 0.75" << std::endl;
285 throw std::runtime_error("pd_test_boolean_array_reductions failed: mean");
286 }
287
288 std::cout << " -> tests passed" << std::endl;
289 }
290
291 void pd_test_boolean_array_dtype() {
292 std::cout << "========= BooleanArray: dtype ======================= ";
median (pd_test_1_all.cpp:20910)
20900 throw std::runtime_error("pd_test_expanding_var failed: expanding var values incorrect");
20901 }
20902
20903 std::cout << " -> tests passed" << std::endl;
20904 }
20905
20906 void pd_test_expanding_median() {
20907 std::cout << "========= Expanding median ======================";
20908
20909 pandas::Series<double> s({1.0, 2.0, 3.0, 4.0, 5.0});
20910 auto result = s.expanding().median();
20911
20912 // Expanding median: 1, 1.5, 2, 2.5, 3
20913 bool passed = std::abs(result[0] - 1.0) < 0.001 &&
20914 std::abs(result[1] - 1.5) < 0.001 &&
20915 std::abs(result[2] - 2.0) < 0.001 &&
20916 std::abs(result[3] - 2.5) < 0.001 &&
20917 std::abs(result[4] - 3.0) < 0.001;
20918 if (!passed) {
20919 std::cout << " [FAIL] : in pd_test_expanding_median() : expanding median values incorrect" << std::endl;
20920 throw std::runtime_error("pd_test_expanding_median failed: expanding median values incorrect");
min (pd_test_1_all.cpp:764)
754 }
755
756 void pd_test_categorical_array_ordered_operations() {
757 std::cout << "========= CategoricalArray: ordered operations (min/max) ======================= ";
758
759 std::vector<std::string> cats = {"low", "medium", "high"};
760 std::vector<numpy::int32> codes = {0, 2, 1, 0, -1}; // low, high, medium, low, NA
761 pandas::CategoricalArray arr = pandas::CategoricalArray::from_codes(codes, cats, true); // ordered
762
763 // Test min
764 std::optional<std::string> min_val = arr.min();
765 if (!min_val.has_value() || *min_val != "low") {
766 std::cout << " [FAIL] : in pd_test_categorical_array_ordered_operations() : min != 'low'" << std::endl;
767 throw std::runtime_error("pd_test_categorical_array_ordered_operations failed: min != 'low'");
768 }
769
770 // Test max
771 std::optional<std::string> max_val = arr.max();
772 if (!max_val.has_value() || *max_val != "high") {
773 std::cout << " [FAIL] : in pd_test_categorical_array_ordered_operations() : max != 'high'" << std::endl;
774 throw std::runtime_error("pd_test_categorical_array_ordered_operations failed: max != 'high'");
std_ (pd_test_1_all.cpp:20752)
20742 throw std::runtime_error("pd_test_rolling_min_periods failed: with min_periods=1, idx 1 should be 3.0");
20743 }
20744
20745 std::cout << " -> tests passed" << std::endl;
20746 }
20747
20748 void pd_test_rolling_std() {
20749 std::cout << "========= Rolling std ===========================";
20750
20751 pandas::Series<double> s({1.0, 2.0, 3.0, 4.0, 5.0});
20752 auto result = s.rolling(3).std_();
20753
20754 // std([1,2,3]) = 1.0 (ddof=1)
20755 // std([2,3,4]) = 1.0
20756 // std([3,4,5]) = 1.0
20757 bool passed = std::abs(result[2] - 1.0) < 0.001;
20758 if (!passed) {
20759 std::cout << " [FAIL] : in pd_test_rolling_std() : rolling std should be 1.0" << std::endl;
20760 throw std::runtime_error("pd_test_rolling_std failed: rolling std should be 1.0");
20761 }
sum (pd_test_1_all.cpp:276)
266 }
267
268 // Test sum/mean
269 pandas::BooleanArray arr({
270 std::optional<bool>(true),
271 std::optional<bool>(false),
272 std::optional<bool>(true),
273 std::optional<bool>(true)
274 });
275
276 auto s = arr.sum();
277 if (!s.has_value() || s.value() != 3) {
278 std::cout << " [FAIL] : in pd_test_boolean_array_reductions() : sum should be 3" << std::endl;
279 throw std::runtime_error("pd_test_boolean_array_reductions failed: sum");
280 }
281
282 auto m = arr.mean();
283 if (!m.has_value() || std::abs(m.value() - 0.75) > 0.001) {
284 std::cout << " [FAIL] : in pd_test_boolean_array_reductions() : mean should be 0.75" << std::endl;
285 throw std::runtime_error("pd_test_boolean_array_reductions failed: mean");
286 }
var (pd_test_1_all.cpp:20890)
20880 throw std::runtime_error("pd_test_expanding_std failed: expanding std values incorrect");
20881 }
20882
20883 std::cout << " -> tests passed" << std::endl;
20884 }
20885
20886 void pd_test_expanding_var() {
20887 std::cout << "========= Expanding var =========================";
20888
20889 pandas::Series<double> s({1.0, 2.0, 3.0, 4.0, 5.0});
20890 auto result = s.expanding().var();
20891
20892 // Expanding var (ddof=1): NaN, 0.5, 1.0, 1.6667, 2.5
20893 bool passed = std::isnan(result[0]) &&
20894 std::abs(result[1] - 0.5) < 0.001 &&
20895 std::abs(result[2] - 1.0) < 0.001 &&
20896 std::abs(result[3] - 1.6667) < 0.001 &&
20897 std::abs(result[4] - 2.5) < 0.001;
20898 if (!passed) {
20899 std::cout << " [FAIL] : in pd_test_expanding_var() : expanding var values incorrect" << std::endl;
20900 throw std::runtime_error("pd_test_expanding_var failed: expanding var values incorrect");
agg (pd_test_1_all.cpp:11100)
11090 }
11091
11092 void pd_test_func_apply_series_agg() {
11093 std::cout << "========= Series agg ==================================";
11094
11095 pandas::Series<double> s({1.0, 2.0, 3.0, 4.0, 5.0}, "values");
11096
11097 bool passed = true;
11098
11099 // Test string-based aggregation
11100 auto sum_result = s.agg("sum");
11101 if (!sum_result.has_value() || !approx_equal(sum_result.value(), 15.0)) {
11102 passed = false;
11103 std::cout << " [FAIL] : in pd_test_func_apply_series_agg() : sum failed" << std::endl;
11104 throw std::runtime_error("pd_test_func_apply_series_agg failed: sum failed");
11105 }
11106
11107 auto mean_result = s.agg("mean");
11108 if (!mean_result.has_value() || !approx_equal(mean_result.value(), 3.0)) {
11109 passed = false;
11110 std::cout << " [FAIL] : in pd_test_func_apply_series_agg() : mean failed" << std::endl;
agg (pd_test_1_all.cpp:11100)
11090 }
11091
11092 void pd_test_func_apply_series_agg() {
11093 std::cout << "========= Series agg ==================================";
11094
11095 pandas::Series<double> s({1.0, 2.0, 3.0, 4.0, 5.0}, "values");
11096
11097 bool passed = true;
11098
11099 // Test string-based aggregation
11100 auto sum_result = s.agg("sum");
11101 if (!sum_result.has_value() || !approx_equal(sum_result.value(), 15.0)) {
11102 passed = false;
11103 std::cout << " [FAIL] : in pd_test_func_apply_series_agg() : sum failed" << std::endl;
11104 throw std::runtime_error("pd_test_func_apply_series_agg failed: sum failed");
11105 }
11106
11107 auto mean_result = s.agg("mean");
11108 if (!mean_result.has_value() || !approx_equal(mean_result.value(), 3.0)) {
11109 passed = false;
11110 std::cout << " [FAIL] : in pd_test_func_apply_series_agg() : mean failed" << std::endl;
agg (pd_test_1_all.cpp:11100)
11090 }
11091
11092 void pd_test_func_apply_series_agg() {
11093 std::cout << "========= Series agg ==================================";
11094
11095 pandas::Series<double> s({1.0, 2.0, 3.0, 4.0, 5.0}, "values");
11096
11097 bool passed = true;
11098
11099 // Test string-based aggregation
11100 auto sum_result = s.agg("sum");
11101 if (!sum_result.has_value() || !approx_equal(sum_result.value(), 15.0)) {
11102 passed = false;
11103 std::cout << " [FAIL] : in pd_test_func_apply_series_agg() : sum failed" << std::endl;
11104 throw std::runtime_error("pd_test_func_apply_series_agg failed: sum failed");
11105 }
11106
11107 auto mean_result = s.agg("mean");
11108 if (!mean_result.has_value() || !approx_equal(mean_result.value(), 3.0)) {
11109 passed = false;
11110 std::cout << " [FAIL] : in pd_test_func_apply_series_agg() : mean failed" << std::endl;
transform (pd_test_1_all.cpp:11071)
11061 std::cout << " -> tests passed" << std::endl;
11062 }
11063
11064 void pd_test_func_apply_series_transform() {
11065 std::cout << "========= Series transform ============================";
11066
11067 pandas::Series<double> s({1.0, 2.0, 3.0, 4.0}, "values");
11068
11069 // Transform must return same shape
11070 auto result = s.transform([](double x) { return x * 2 + 1; });
11071
11072 bool passed = true;
11073 if (result.size() != s.size()) {
11074 passed = false;
11075 std::cout << " [FAIL] : in pd_test_func_apply_series_transform() : size changed" << std::endl;
11076 throw std::runtime_error("pd_test_func_apply_series_transform failed: size changed");
11077 }
11078
11079 std::vector<double> expected = {3.0, 5.0, 7.0, 9.0};
11080 for (size_t i = 0; i < result.size(); ++i) {
closed (pd_test_1_all.cpp:1903)
1893// ============================================================================
1894void test_constructors() {
1895 std::cout << "========= IntervalArray: constructors ======================= ";
1896
1897 // Default constructor
1898 pandas::IntervalArrayFloat64 empty;
1899 if (empty.size() != 0) {
1900 std::cout << "[FAIL] : in test_constructors() : default constructor size" << std::endl;
1901 return;
1902 }
1903 if (empty.closed() != pandas::IntervalClosed::Right) {
1904 std::cout << "[FAIL] : in test_constructors() : default closure" << std::endl;
1905 return;
1906 }
1907
1908 // Constructor from left/right arrays
1909 numpy::NDArray<numpy::float64> left(std::vector<size_t>{3});
1910 numpy::NDArray<numpy::float64> right(std::vector<size_t>{3});
1911 left.setElementAt({0}, 0.0); right.setElementAt({0}, 1.0);
1912 left.setElementAt({1}, 1.0); right.setElementAt({1}, 2.0);
1913 left.setElementAt({2}, 2.0); right.setElementAt({2}, 3.0);
compute_agg (pd_test_5_all.cpp:112204)
112194 // Default signature is groupby(by, axis, level, as_index, sort, group_keys, observed, dropna).
112195 auto gb = df_in.groupby("k", 0, std::nullopt, /*as_index=*/true,
112196 /*sort=*/true, /*group_keys=*/true,
112197 /*observed=*/false, /*dropna=*/true);
112198 pandas::DataFrame df = gb.agg("sum");
112199 std::string actual = df.to_string();
112200
112201 // Pandas oracle (verified by analysis1 H3 logic + compute_agg empty=0.0):
112202 // - "a" observed, sum=10
112203 // - "b" observed, sum=20
112204 // - "c" unobserved -> compute_agg(empty, "sum") -> 0
112205 // Plan 12 (Logic-C int widening) has landed: aggregate_column now
112206 // preserves int64 for integer inputs, so the oracle is int64 with
112207 // integer literal display (no .0 suffix).
112208 std::string expected =
112209 " v\n"
112210 "k \n"
112211 "a 10\n"
112212 "b 20\n"
112213 "c 0";
112214 check_case("groupby_agg_dispatch_7c3a91_case_41",
dataframe (pd_test_2_all.cpp:11742)
11732 std::cout << " [FAIL] : wrong dimensions" << std::endl;
11733 std::remove(temp_path.c_str());
11734 throw std::runtime_error("pd_test_to_hdf_mixed_types failed");
11735 }
11736
11737 std::remove(temp_path.c_str());
11738 std::cout << " -> tests passed" << std::endl;
11739 }
11740
11741 void pd_test_to_hdf_empty_dataframe() {
11742 std::cout << "========= to_hdf empty dataframe (real HDF5) ===================";
11743
11744 pandas::DataFrame df;
11745 std::string temp_path = "temp/test_hdf5_empty.h5";
11746 df.to_hdf(temp_path, "df", "w");
11747
11748 // Just verify file was created
11749 std::ifstream file(temp_path);
11750 if (!file.is_open()) {
11751 std::cout << " [FAIL] : file not created" << std::endl;
11752 throw std::runtime_error("pd_test_to_hdf_empty_dataframe failed");
freq (pd_test_1_all.cpp:8233)
8223 std::cout << "========= freq property ===============================";
8224
8225 std::vector<std::optional<numpy::datetime64>> values = {
8226 numpy::datetime64(0LL, numpy::DateTimeUnit::Nanosecond),
8227 numpy::datetime64(86400000000000LL, numpy::DateTimeUnit::Nanosecond) // 1 day
8228 };
8229 pandas::DatetimeArray arr(values);
8230 pandas::DatetimeMixinIndex idx(arr);
8231
8232 // Default freq is nullopt or inferred
8233 auto f = idx.freq();
8234 std::string fs = idx.freqstr();
8235
8236 bool passed = true; // freq may or may not be set
8237 if (!passed) {
8238 std::cout << " [FAIL] : in pd_test_datetime_mixin_freq()" << std::endl;
8239 throw std::runtime_error("pd_test_datetime_mixin_freq failed");
8240 }
8241
8242 std::cout << " -> tests passed" << std::endl;
8243}
groups (pd_test_2_all.cpp:20864)
20854// =====================================================================
20855// Per-group expanding tests
20856// =====================================================================
20857
20858void test_series_groupby_expanding_sum() {
20859 std::cout << " -- test_series_groupby_expanding_sum --" << std::endl;
20860
20861 // Two groups: A=[1,2,3], B=[10,20]
20862 std::vector<numpy::float64> vals = {1.0, 10.0, 2.0, 20.0, 3.0};
20863 pandas::Series<numpy::float64> data(vals);
20864 pandas::Series<std::string> groups({"A", "B", "A", "B", "A"});
20865
20866 auto sgb = data.groupby(groups);
20867 pandas::SeriesGroupByExpandingWindow ew(sgb, 1);
20868 auto result = ew.sum();
20869
20870 check(result.size() == 5, "size_5");
20871 // A group: expanding sum = 1, 3, 6
20872 // B group: expanding sum = 10, 30
20873 // Original order: [A:1, B:10, A:3, B:30, A:6]
20874 check(approx_eq(result[0], 1.0), "A_exp_sum_0");
label (pd_test_4_all.cpp:4935)
4925// Helper: compare and report
4926// ----------------------------------------------------------------------------
4927static void check_str(const std::string& label,
4928 const std::string& expected,
4929 const std::string& actual) {
4930 int _f = 0;
4931 pandas_tests::check_str_ws(label, expected, actual, _f);
4932 if (_f > 0) throw std::runtime_error(label + ": str mismatch");
4933}
4934
4935// Slugify a python compare-test label ("a.b.c" → "a_b_c") matching the
4936// scheme in scripts/gen_repr_mismatch_fixtures.py.
4937static std::string slugify_label(const std::string& label) {
4938 std::string out = label;
4939 for (char& ch : out) {
4940 if (ch == '.') ch = '_';
4941 }
4942 return out;
4943}
4944
4945// Load a captured pandas-generated expected output file. The file is written
ngroups (pd_test_1_all.cpp:11497)
11487 // Create DataFrame with category column
11488 std::map<std::string, std::vector<double>> data = {
11489 {"category", {1.0, 1.0, 2.0, 2.0, 2.0}},
11490 {"value", {10.0, 20.0, 30.0, 40.0, 50.0}}
11491 };
11492 pandas::DataFrame df(data);
11493
11494 // Test groupby
11495 auto grouped = df.groupby("category");
11496
11497 bool passed = grouped.ngroups() == 2;
11498 if (!passed) {
11499 std::cout << " [FAIL] : in pd_test_groupby_basic() : ngroups should be 2" << std::endl;
11500 throw std::runtime_error("pd_test_groupby_basic failed: ngroups should be 2");
11501 }
11502
11503 std::cout << " -> tests passed" << std::endl;
11504 }
11505
11506 void pd_test_groupby_multiple_columns() {
11507 std::cout << "========= GroupBy multiple columns ==============";
ohlc (pd_test_1_all.cpp:20388)
20378 "2020-01-01 11:00:00",
20379 "2020-01-01 12:00:00",
20380 "2020-01-02 09:00:00",
20381 "2020-01-02 10:00:00",
20382 "2020-01-02 11:00:00",
20383 "2020-01-02 12:00:00"
20384 };
20385 df.set_index(std::make_unique<pandas::Index<std::string>>(dates));
20386
20387 auto resampler = df.resample("D");
20388 pandas::DataFrame result = resampler.ohlc();
20389
20390 // Should have open, high, low, close columns
20391 const auto& cols = result.columns();
20392 bool has_open = false, has_high = false, has_low = false, has_close = false;
20393 for (size_t i = 0; i < cols.size(); ++i) {
20394 std::string c = cols[i];
20395 if (c.find("open") != std::string::npos) has_open = true;
20396 if (c.find("high") != std::string::npos) has_high = true;
20397 if (c.find("low") != std::string::npos) has_low = true;
20398 if (c.find("close") != std::string::npos) has_close = true;
size (pd_test_1_all.cpp:22)
12#include "../pandas/pd_boolean_array.h"
13
14namespace dataframe_tests {
15
16namespace dataframe_tests_boolean_array {
17 void pd_test_boolean_array_constructors() {
18 std::cout << "========= BooleanArray: constructors ======================= ";
19
20 // Default constructor
21 pandas::BooleanArray arr1;
22 if (arr1.size() != 0) {
23 std::cout << " [FAIL] : in pd_test_boolean_array_constructors() : default constructor size != 0" << std::endl;
24 throw std::runtime_error("pd_test_boolean_array_constructors failed: default constructor size != 0");
25 }
26
27 // Initializer list constructor
28 pandas::BooleanArray arr2({
29 std::optional<bool>(true),
30 std::optional<bool>(false),
31 std::nullopt,
32 std::optional<bool>(true)