DataFrame#
-
class pandas::DataFrame#
Core data container class in the pandas namespace.
Example#
#include <pandas/pandas.h>
using namespace pandas;
// Create DataFrame
DataFrame df;
df["A"] = {1, 2, 3};
df["B"] = {4.0, 5.0, 6.0};
// Access data
auto shape = df.shape();
auto columns = df.columns();
Constructors#
Signature |
Location |
Example |
|---|---|---|
|
pd_dataframe.h:761 |
|
|
pd_dataframe.h:819 |
|
|
pd_dataframe.h:884 |
|
|
pd_dataframe.h:939 |
|
|
pd_dataframe.h:964 |
|
|
pd_dataframe.h:1078 |
|
|
pd_dataframe.h:1123 |
Construction#
Signature |
Return Type |
Location |
Example |
|---|---|---|---|
|
std::unique_ptr<NDFrameBase> |
pd_dataframe.h:27133 |
Indexing / Selection#
Signature |
Return Type |
Location |
Example |
|---|---|---|---|
|
T |
pd_dataframe.h:3997 |
|
|
DataFrame |
pd_dataframe.h:26966 |
|
|
const Attrs& |
pd_dataframe.h:1825 |
|
|
Attrs& |
pd_dataframe.h:1832 |
|
|
DataFrame |
pd_dataframe.h:27022 |
|
|
std::optional<std::string> |
pd_dataframe.h:13168 |
|
|
std::optional<ReturnType> |
pd_dataframe.h:13303 |
|
|
Series<numpy::bool_> |
pd_dataframe.h:3379 |
|
|
Series<numpy::int64> |
pd_dataframe.h:3458 |
|
|
Series<T> |
pd_dataframe.h:3149 |
|
|
Series<std::string> |
pd_dataframe.h:3422 |
|
|
const Series<numpy::uint64>* |
pd_dataframe.h:3510 |
|
|
Series<numpy::float64> |
pd_dataframe.h:3489 |
|
|
size_t |
pd_dataframe.h:3282 |
|
|
static std::pair<double, double> |
pd_dataframe.h:22474 |
|
|
std::vector<double> |
pd_dataframe.h:23099 |
|
|
const std::vector<std::vector<std::string>>& |
pd_dataframe.h:1537 |
|
|
const std::vector<bool>& |
pd_dataframe.h:1544 |
|
|
DataFrame |
pd_dataframe.h:7226 |
|
|
DataFrame |
pd_dataframe.h:3347 |
|
|
DataFrame |
pd_dataframe.h:3562 |
|
|
size_t |
pd_dataframe.h:15914 |
|
|
std::optional<const NDFrameBase*> |
pd_dataframe.h:6753 |
|
|
Series<T> |
pd_dataframe.h:3630 |
|
|
bool |
pd_dataframe.h:8435 |
|
|
DataFrame |
pd_dataframe.h:4016 |
|
|
T |
pd_dataframe.h:4005 |
|
|
Series<std::string> |
pd_dataframe.h:10501 |
|
|
Series<std::string> |
pd_dataframe.h:10587 |
|
|
DataFrame |
pd_dataframe.h:3794 |
|
|
T |
pd_dataframe.h:3802 |
|
|
DataFrame |
pd_dataframe.h:3983 |
|
|
DataFrame |
pd_dataframe.h:3969 |
|
|
DataFrame |
pd_dataframe.h:3820 |
|
|
DataFrame |
pd_dataframe.h:3830 |
|
|
DataFrame |
pd_dataframe.h:3865 |
|
|
DataFrame |
pd_dataframe.h:27029 |
|
|
std::optional<std::string> |
pd_dataframe.h:13188 |
|
|
T |
pd_dataframe.h:3709 |
|
|
DataFrame |
pd_dataframe.h:3718 |
|
|
LocProxy |
pd_dataframe.h:3730 |
|
|
DataFrame |
pd_dataframe.h:3737 |
|
|
DataFrame |
pd_dataframe.h:3781 |
|
|
DataFrame |
pd_dataframe.h:3759 |
|
|
DataFrame |
pd_dataframe.h:3767 |
|
|
DataFrame |
pd_dataframe.h:9354 |
|
|
DataFrame |
pd_dataframe.h:15038 |
|
|
DataFrame |
pd_dataframe.h:15051 |
|
|
DataFrame |
pd_dataframe.h:9486 |
|
|
std::string |
pd_dataframe.h:9539 |
|
|
static double |
pd_dataframe.h:9617 |
|
|
DataFrame |
pd_dataframe.h:4040 |
|
|
DataFrame |
pd_dataframe.h:4141 |
|
|
DataFrame |
pd_dataframe.h:4155 |
|
|
DataFrame |
pd_dataframe.h:4176 |
|
|
DataFrame |
pd_dataframe.h:4187 |
|
|
DataFrame |
pd_dataframe.h:4168 |
|
|
void |
pd_dataframe.h:8465 |
|
|
void |
pd_dataframe.h:8449 |
|
|
DataFrame |
pd_dataframe.h:4024 |
|
|
DataFrame |
pd_dataframe.h:4201 |
|
|
std::unique_ptr<NDFrameBase> |
pd_dataframe.h:27152 |
|
|
DataFrame |
pd_dataframe.h:8953 |
|
|
DataFrame |
pd_dataframe.h:8993 |
|
|
DataFrame |
pd_dataframe.h:9130 |
|
|
DataFrame |
pd_dataframe.h:9271 |
|
|
DataFrame |
pd_dataframe.h:4274 |
|
|
DataFrame |
pd_dataframe.h:4315 |
|
|
DataFrame |
pd_dataframe.h:4347 |
|
|
DataFrame |
pd_dataframe.h:4444 |
|
|
DataFrame |
pd_dataframe.h:4550 |
|
|
DataFrame |
pd_dataframe.h:4367 |
|
|
DataFrame |
pd_dataframe.h:4392 |
|
|
DataFrame |
pd_dataframe.h:4492 |
Data Manipulation#
Signature |
Return Type |
Location |
Example |
|---|---|---|---|
|
DataFrame |
pd_dataframe.h:10259 |
|
|
DataFrame |
pd_dataframe.h:10300 |
|
|
DataFrame |
pd_dataframe.h:10322 |
|
|
DataFrame |
pd_dataframe.h:7177 |
|
|
DataFrame |
pd_dataframe.h:10113 |
|
|
void |
pd_dataframe.h:7243 |
|
|
static std::string |
pd_dataframe.h:15555 |
|
|
static void |
pd_dataframe.h:16405 |
|
|
pd_dataframe.h:16470 |
||
|
pd_dataframe.h:16957 |
||
|
pd_dataframe.h:17001 |
||
|
DataFrame |
pd_dataframe.h:15296 |
|
|
DataFrame |
pd_dataframe.h:15313 |
|
|
DataFrame |
pd_dataframe.h:15335 |
|
|
DataFrame |
pd_dataframe.h:15365 |
|
|
DataFrame |
pd_dataframe.h:15499 |
|
|
DataFrame |
pd_dataframe.h:15388 |
|
|
DataFrame |
pd_dataframe.h:7518 |
|
|
DataFrame |
pd_dataframe.h:7525 |
|
|
void |
pd_dataframe.h:6806 |
|
|
pd_dataframe.h:6874 |
||
|
pd_dataframe.h:6877 |
||
|
pd_dataframe.h:6906 |
||
|
pd_dataframe.h:6942 |
||
|
pd_dataframe.h:6965 |
||
|
pd_dataframe.h:7006 |
||
|
pd_dataframe.h:7026 |
||
|
pd_dataframe.h:7039 |
||
|
DataFrame |
pd_dataframe.h:5422 |
|
|
DataFrame |
pd_dataframe.h:6475 |
|
|
DataFrame |
pd_dataframe.h:6486 |
|
|
DataFrame |
pd_dataframe.h:6503 |
|
|
DataFrame |
pd_dataframe.h:6513 |
|
|
DataFrame |
pd_dataframe.h:27072 |
|
|
DataFrame |
pd_dataframe.h:5480 |
|
|
DataFrame |
pd_dataframe.h:6769 |
|
|
std::unique_ptr<NDFrameBase> |
pd_dataframe.h:27156 |
|
|
DataFrame |
pd_dataframe.h:6531 |
|
|
DataFrame |
pd_dataframe.h:5336 |
|
|
DataFrame |
pd_dataframe.h:5385 |
|
|
DataFrame |
pd_dataframe.h:6604 |
|
|
DataFrame |
pd_dataframe.h:6642 |
|
|
DataFrame |
pd_dataframe.h:5278 |
|
|
Result |
pd_dataframe.h:5403 |
|
|
DataFrame |
pd_dataframe.h:15657 |
|
|
DataFrame |
pd_dataframe.h:15708 |
|
|
DataFrame |
pd_dataframe.h:15731 |
|
|
DataFrame |
pd_dataframe.h:15766 |
|
|
DataFrame |
pd_dataframe.h:15986 |
|
|
static std::string |
pd_dataframe.h:15590 |
|
|
DataFrame |
pd_dataframe.h:15932 |
|
|
DataFrame |
pd_dataframe.h:9027 |
|
|
DataFrame |
pd_dataframe.h:9050 |
|
|
DataFrame |
pd_dataframe.h:9075 |
|
|
DataFrame |
pd_dataframe.h:9095 |
|
|
DataFrame |
pd_dataframe.h:9100 |
|
|
void |
pd_dataframe.h:7092 |
|
|
void |
pd_dataframe.h:8423 |
|
|
DataFrame& |
pd_dataframe.h:5027 |
|
|
DataFrame |
pd_dataframe.h:6559 |
|
|
void |
pd_dataframe.h:4611 |
|
|
void |
pd_dataframe.h:4630 |
|
|
DataFrame |
pd_dataframe.h:4640 |
|
|
DataFrame |
pd_dataframe.h:4652 |
|
|
DataFrame |
pd_dataframe.h:4663 |
|
|
void |
pd_dataframe.h:4682 |
|
|
pd_dataframe.h:4684 |
||
|
void |
pd_dataframe.h:4690 |
|
|
void |
pd_dataframe.h:4693 |
|
|
void |
pd_dataframe.h:4713 |
|
|
pd_dataframe.h:4715 |
||
|
void |
pd_dataframe.h:4720 |
|
|
pd_dataframe.h:4721 |
||
|
void |
pd_dataframe.h:4726 |
|
|
pd_dataframe.h:4727 |
||
|
pd_dataframe.h:4740 |
||
|
pd_dataframe.h:4745 |
||
|
pd_dataframe.h:4752 |
||
|
pd_dataframe.h:4762 |
||
|
pd_dataframe.h:4766 |
||
|
pd_dataframe.h:4842 |
||
|
pd_dataframe.h:4890 |
||
|
DataFrame |
pd_dataframe.h:4901 |
|
|
DataFrame |
pd_dataframe.h:4948 |
|
|
DataFrame |
pd_dataframe.h:5007 |
|
|
void |
pd_dataframe.h:4884 |
|
|
void |
pd_dataframe.h:4738 |
|
|
void |
pd_dataframe.h:4743 |
|
|
void |
pd_dataframe.h:4748 |
|
|
void |
pd_dataframe.h:4758 |
|
|
void |
pd_dataframe.h:4765 |
|
|
pd_dataframe.h:4771 |
||
|
pd_dataframe.h:4774 |
||
|
pd_dataframe.h:4777 |
||
|
pd_dataframe.h:4780 |
||
|
pd_dataframe.h:4783 |
||
|
void |
pd_dataframe.h:4779 |
|
|
void |
pd_dataframe.h:4782 |
|
|
void |
pd_dataframe.h:4789 |
|
|
pd_dataframe.h:4798 |
||
|
DataFrame |
pd_dataframe.h:15815 |
|
|
DataFrame |
pd_dataframe.h:15863 |
|
|
DataFrame |
pd_dataframe.h:15886 |
|
|
void |
pd_dataframe.h:26357 |
Missing Data#
Signature |
Return Type |
Location |
Example |
|---|---|---|---|
|
DataFrame |
pd_dataframe.h:13146 |
|
|
DataFrame |
pd_dataframe.h:7738 |
|
|
DataFrame |
pd_dataframe.h:7644 |
|
|
DataFrame |
pd_dataframe.h:8771 |
|
|
DataFrame |
pd_dataframe.h:8819 |
|
|
DataFrame |
pd_dataframe.h:8828 |
|
|
DataFrame |
pd_dataframe.h:8857 |
|
|
DataFrame |
pd_dataframe.h:8877 |
|
|
void |
pd_dataframe.h:8413 |
|
|
DataFrame |
pd_dataframe.h:7835 |
|
|
pd_dataframe.h:7869 |
||
|
void |
pd_dataframe.h:7886 |
|
|
pd_dataframe.h:7874 |
||
|
void |
pd_dataframe.h:8293 |
|
|
numpy::NDArray<numpy::bool_> |
pd_dataframe.h:8508 |
|
|
DataFrame |
pd_dataframe.h:8538 |
|
|
numpy::NDArray<numpy::bool_> |
pd_dataframe.h:13126 |
|
|
DataFrame |
pd_dataframe.h:8591 |
|
|
numpy::NDArray<numpy::bool_> |
pd_dataframe.h:8523 |
|
|
DataFrame |
pd_dataframe.h:8565 |
|
|
numpy::NDArray<numpy::bool_> |
pd_dataframe.h:13134 |
|
|
DataFrame |
pd_dataframe.h:8598 |
|
|
DataFrame |
pd_dataframe.h:13159 |
Statistics#
Signature |
Return Type |
Location |
Example |
|---|---|---|---|
|
size_t |
pd_dataframe.h:7458 |
|
|
Series<numpy::int64> |
pd_dataframe.h:7472 |
|
|
Series<numpy::int64> |
pd_dataframe.h:10719 |
|
|
DataFrame |
pd_dataframe.h:20426 |
|
|
DataFrame |
pd_dataframe.h:20311 |
|
|
DataFrame |
pd_dataframe.h:20214 |
|
|
DataFrame |
pd_dataframe.h:20108 |
|
|
DataFrame |
pd_dataframe.h:11001 |
|
|
DataFrame |
pd_dataframe.h:10868 |
|
|
Series<numpy::float64> |
pd_dataframe.h:12411 |
|
|
Series<numpy::float64> |
pd_dataframe.h:11071 |
|
|
Series<numpy::float64> |
pd_dataframe.h:12465 |
|
|
Series<numpy::float64> |
pd_dataframe.h:12250 |
|
|
Series<numpy::float64> |
pd_dataframe.h:10459 |
|
|
Series<numpy::float64> |
pd_dataframe.h:12011 |
|
|
Series<numpy::float64> |
pd_dataframe.h:10392 |
|
|
Series<numpy::float64> |
pd_dataframe.h:11174 |
|
|
Series<numpy::float64> |
pd_dataframe.h:11126 |
|
|
Series<numpy::float64> |
pd_dataframe.h:12158 |
|
|
Series<numpy::float64> |
pd_dataframe.h:10430 |
|
|
DataFrame |
pd_dataframe.h:11242 |
|
|
DataFrame |
pd_dataframe.h:11261 |
|
|
DataFrame |
pd_dataframe.h:11406 |
|
|
Series<numpy::int64> |
pd_dataframe.h:13667 |
|
|
Series<numpy::int64> |
pd_dataframe.h:13622 |
|
|
Series<numpy::float64> |
pd_dataframe.h:11554 |
|
|
Series<numpy::float64> |
pd_dataframe.h:11507 |
|
|
Series<numpy::float64> |
pd_dataframe.h:12592 |
|
|
Series<numpy::float64> |
pd_dataframe.h:12478 |
|
|
Series<numpy::float64> |
pd_dataframe.h:13949 |
|
|
Series<numpy::float64> |
pd_dataframe.h:12312 |
|
|
Series<numpy::float64> |
pd_dataframe.h:11008 |
|
|
Series<numpy::float64> |
pd_dataframe.h:12358 |
|
|
Series<numpy::float64> |
pd_dataframe.h:11024 |
|
|
Series<numpy::float64> |
pd_dataframe.h:12065 |
|
|
pandas::Series<numpy::timedelta64> |
pd_dataframe.h:21232 |
|
|
pandas::Series<numpy::timedelta64> |
pd_dataframe.h:21597 |
|
|
Series<numpy::float64> |
pd_dataframe.h:10660 |
|
|
Series<numpy::float64> |
pd_dataframe.h:11822 |
|
|
Series<numpy::float64> |
pd_dataframe.h:10341 |
|
|
std::variant<Series<numpy::int64>, Series<numpy::float64>> |
pd_dataframe.h:13754 |
|
|
Series<numpy::float64> |
pd_dataframe.h:12118 |
|
|
Series<numpy::float64> |
pd_dataframe.h:10703 |
Aggregation#
Signature |
Return Type |
Location |
Example |
|---|---|---|---|
|
DataFrame |
pd_dataframe.h:23703 |
|
|
DataFrame |
pd_dataframe.h:23742 |
|
|
DataFrame |
pd_dataframe.h:23795 |
|
|
DataFrame |
pd_dataframe.h:23820 |
|
|
Series<numpy::float64> |
pd_dataframe.h:23874 |
|
|
Series<numpy::float64> |
pd_dataframe.h:23893 |
|
|
DataFrame |
pd_dataframe.h:23776 |
|
|
DataFrame |
pd_dataframe.h:23785 |
|
|
DataFrame |
pd_dataframe.h:23861 |
|
|
DataFrame |
pd_dataframe.h:23864 |
|
|
DataFrame |
pd_dataframe.h:20839 |
|
|
DataFrame |
pd_dataframe.h:20879 |
|
|
PANDASCORE_API Result |
pd_dataframe.h:21738 |
|
|
static std::string |
pd_dataframe.h:20709 |
|
|
PANDASCORE_API ApplyResult |
pd_dataframe.h:21659 |
|
|
pd_dataframe.h:22263 |
||
|
pd_dataframe.h:22317 |
||
|
std::vector<std::complex<double>> |
pd_dataframe.h:20773 |
|
|
std::vector<double> |
pd_dataframe.h:20744 |
|
|
DataFrame |
pd_dataframe.h:22510 |
|
|
static std::string |
pd_dataframe.h:20657 |
|
|
pd_dataframe.h:21029 |
||
|
std::vector<double> |
pd_dataframe.h:23117 |
|
|
PANDASCORE_API DataFrame |
pd_dataframe.h:21701 |
|
|
DataFrame |
pd_dataframe.h:21746 |
|
|
DataFrame |
pd_dataframe.h:21859 |
|
|
DataFrameEWM |
pd_dataframe.h:13081 |
|
|
DataFrameExpanding |
pd_dataframe.h:13055 |
|
|
DataFrameGroupBy |
pd_dataframe.h:26598 |
|
|
DataFrameGroupBy |
pd_dataframe.h:26608 |
|
|
DataFrameGroupBy |
pd_dataframe.h:26629 |
|
|
DataFrameGroupBy |
pd_dataframe.h:26652 |
|
|
DataFrameGroupBy |
pd_dataframe.h:26672 |
|
|
DataFrameGroupBy |
pd_dataframe.h:26685 |
|
|
DataFrame |
pd_dataframe.h:21888 |
|
|
PANDASCORE_API DataFrame |
pd_dataframe.h:21666 |
|
|
PANDASCORE_API DataFrame |
pd_dataframe.h:21707 |
|
|
auto |
pd_dataframe.h:23921 |
|
|
auto |
pd_dataframe.h:23930 |
|
|
DataFrameResampler |
pd_dataframe.h:26725 |
|
|
DataFrameRolling |
pd_dataframe.h:13032 |
|
|
DataFrame |
pd_dataframe.h:23357 |
|
|
DataFrame |
pd_dataframe.h:23455 |
|
|
DataFrame |
pd_dataframe.h:23525 |
|
|
DataFrame |
pd_dataframe.h:23565 |
|
|
DataFrame |
pd_dataframe.h:23614 |
|
|
PANDASCORE_API DataFrame |
pd_dataframe.h:21712 |
Arithmetic#
Signature |
Return Type |
Location |
Example |
|---|---|---|---|
|
DataFrame |
pd_dataframe.h:19521 |
|
|
DataFrame |
pd_dataframe.h:19529 |
|
|
DataFrame |
pd_dataframe.h:19533 |
|
|
void |
pd_dataframe.h:6854 |
|
|
void |
pd_dataframe.h:6984 |
|
|
void |
pd_dataframe.h:7034 |
|
|
DataFrame |
pd_dataframe.h:6660 |
|
|
DataFrame |
pd_dataframe.h:6711 |
|
|
DataFrame |
pd_dataframe.h:19623 |
|
|
DataFrame |
pd_dataframe.h:19631 |
|
|
DataFrame |
pd_dataframe.h:19635 |
|
|
DataFrame |
pd_dataframe.h:19642 |
|
|
DataFrame |
pd_dataframe.h:19648 |
|
|
DataFrame |
pd_dataframe.h:19264 |
|
|
Series<double> |
pd_dataframe.h:19323 |
|
|
DataFrame |
pd_dataframe.h:19678 |
|
|
DataFrame |
pd_dataframe.h:19687 |
|
|
DataFrame |
pd_dataframe.h:19698 |
|
|
DataFrame |
pd_dataframe.h:19707 |
|
|
DataFrame |
pd_dataframe.h:3239 |
|
|
DataFrame |
pd_dataframe.h:19585 |
|
|
DataFrame |
pd_dataframe.h:19593 |
|
|
DataFrame |
pd_dataframe.h:19597 |
|
|
MultiIndex |
pd_dataframe.h:13932 |
|
|
const MultiIndex& |
pd_dataframe.h:1351 |
|
|
DataFrame |
pd_dataframe.h:19604 |
|
|
DataFrame |
pd_dataframe.h:19610 |
|
|
DataFrame |
pd_dataframe.h:19718 |
|
|
DataFrame |
pd_dataframe.h:19727 |
|
|
DataFrame |
pd_dataframe.h:19747 |
|
|
DataFrame |
pd_dataframe.h:19756 |
|
|
DataFrame |
pd_dataframe.h:19765 |
|
|
DataFrame |
pd_dataframe.h:19855 |
|
|
DataFrame |
pd_dataframe.h:19863 |
|
|
DataFrame |
pd_dataframe.h:19871 |
|
|
DataFrame |
pd_dataframe.h:19907 |
|
|
DataFrame |
pd_dataframe.h:19915 |
|
|
DataFrame |
pd_dataframe.h:19935 |
|
|
DataFrame |
pd_dataframe.h:19943 |
|
|
DataFrame |
pd_dataframe.h:19819 |
|
|
DataFrame |
pd_dataframe.h:19828 |
|
|
DataFrame |
pd_dataframe.h:19836 |
|
|
DataFrame |
pd_dataframe.h:19963 |
|
|
DataFrame |
pd_dataframe.h:19971 |
|
|
DataFrame |
pd_dataframe.h:19784 |
|
|
DataFrame |
pd_dataframe.h:19792 |
|
|
DataFrame |
pd_dataframe.h:19800 |
|
|
DataFrame |
pd_dataframe.h:19885 |
|
|
DataFrame |
pd_dataframe.h:19891 |
|
|
DataFrame |
pd_dataframe.h:19547 |
|
|
DataFrame |
pd_dataframe.h:19555 |
|
|
DataFrame |
pd_dataframe.h:19559 |
|
|
DataFrame |
pd_dataframe.h:19566 |
|
|
DataFrame |
pd_dataframe.h:19572 |
|
|
DataFrame |
pd_dataframe.h:19661 |
|
|
DataFrame |
pd_dataframe.h:19667 |
Comparison#
Signature |
Return Type |
Location |
Example |
|---|---|---|---|
|
DataFrame |
pd_dataframe.h:26425 |
|
|
DataFrame |
pd_dataframe.h:19373 |
|
|
DataFrame |
pd_dataframe.h:19446 |
|
|
DataFrame |
pd_dataframe.h:19466 |
|
|
bool |
pd_dataframe.h:10199 |
|
|
DataFrame |
pd_dataframe.h:19438 |
|
|
DataFrame |
pd_dataframe.h:19461 |
|
|
DataFrame |
pd_dataframe.h:19486 |
|
|
std::mt19937 |
pd_dataframe.h:4063 |
|
|
DataFrame |
pd_dataframe.h:19425 |
|
|
DataFrame |
pd_dataframe.h:19458 |
|
|
DataFrame |
pd_dataframe.h:19482 |
|
|
DataFrame |
pd_dataframe.h:19412 |
|
|
DataFrame |
pd_dataframe.h:19455 |
|
|
DataFrame |
pd_dataframe.h:19478 |
|
|
std::vector<std::vector<std::string>> |
pd_dataframe.h:4967 |
|
|
std::vector<std::vector<std::string>> |
pd_dataframe.h:13882 |
|
|
std::vector<std::vector<std::string>> |
pd_dataframe.h:16563 |
|
|
std::vector<std::vector<std::string>> |
pd_dataframe.h:18130 |
|
|
std::vector<std::vector<std::string>> |
pd_dataframe.h:15002 |
|
|
std::vector<std::vector<std::string>> |
pd_dataframe.h:17926 |
|
|
std::vector<std::vector<std::string>> |
pd_dataframe.h:16881 |
|
|
std::vector<std::vector<std::string>> |
pd_dataframe.h:17300 |
|
|
DataFrame |
pd_dataframe.h:19399 |
|
|
DataFrame |
pd_dataframe.h:19452 |
|
|
DataFrame |
pd_dataframe.h:19474 |
|
|
DataFrame |
pd_dataframe.h:19386 |
|
|
DataFrame |
pd_dataframe.h:19449 |
|
|
DataFrame |
pd_dataframe.h:19470 |
|
|
DatetimeArray |
pd_dataframe.h:3880 |
|
|
std::vector<std::vector<std::string>> |
pd_dataframe.h:2837 |
|
|
std::vector<std::vector<std::string>> |
pd_dataframe.h:2886 |
|
|
std::vector<std::vector<std::string>> |
pd_dataframe.h:18526 |
Sorting#
Signature |
Return Type |
Location |
Example |
|---|---|---|---|
|
DataFrame |
pd_dataframe.h:15068 |
|
|
DataFrame |
pd_dataframe.h:14908 |
|
|
DataFrame |
pd_dataframe.h:14911 |
|
|
DataFrame |
pd_dataframe.h:14914 |
|
|
DataFrame |
pd_dataframe.h:14619 |
|
|
DataFrame |
pd_dataframe.h:14631 |
|
|
DataFrame |
pd_dataframe.h:14642 |
|
|
DataFrame |
pd_dataframe.h:14648 |
|
|
DataFrame |
pd_dataframe.h:14661 |
|
|
DataFrame |
pd_dataframe.h:14683 |
|
|
DataFrame |
pd_dataframe.h:14843 |
Reshaping#
Signature |
Return Type |
Location |
Example |
|---|---|---|---|
|
DataFrame |
pd_dataframe.h:1270 |
|
|
DataFrame |
pd_dataframe.h:17984 |
|
|
DataFrame |
pd_dataframe.h:17667 |
|
|
DataFrame |
pd_dataframe.h:17679 |
|
|
DataFrame |
pd_dataframe.h:17686 |
|
|
DataFrame |
pd_dataframe.h:17695 |
|
|
DataFrame |
pd_dataframe.h:16099 |
|
|
static std::string |
pd_dataframe.h:16032 |
|
|
DataFrame |
pd_dataframe.h:16285 |
|
|
DataFrame |
pd_dataframe.h:16307 |
|
|
DataFrame |
pd_dataframe.h:16328 |
|
|
DataFrame |
pd_dataframe.h:16347 |
|
|
DataFrame |
pd_dataframe.h:16447 |
|
|
DataFrame |
pd_dataframe.h:17456 |
|
|
DataFrame |
pd_dataframe.h:16982 |
|
|
DataFrame |
pd_dataframe.h:17017 |
|
|
DataFrame |
pd_dataframe.h:18091 |
|
|
DataFrame |
pd_dataframe.h:18216 |
|
|
DataFrame |
pd_dataframe.h:18469 |
|
|
Series<numpy::float64> |
pd_dataframe.h:18115 |
|
|
DataFrame |
pd_dataframe.h:13340 |
|
|
DataFrame |
pd_dataframe.h:15126 |
|
|
DataFrame |
pd_dataframe.h:18759 |
Combining#
Signature |
Return Type |
Location |
Example |
|---|---|---|---|
|
std::pair<DataFrame, DataFrame> |
pd_dataframe.h:25826 |
|
|
std::pair<DataFrame, Series<T>> |
pd_dataframe.h:25916 |
|
|
DataFrame |
pd_dataframe.h:26038 |
|
|
DataFrame |
pd_dataframe.h:26266 |
|
|
static std::string |
pd_dataframe.h:26032 |
|
|
static DataFrame |
pd_dataframe.h:24240 |
|
|
static DataFrame |
pd_dataframe.h:24720 |
|
|
std::unique_ptr<NDFrameBase> |
pd_dataframe.h:27144 |
|
|
DataFrame |
pd_dataframe.h:25540 |
|
|
DataFrame |
pd_dataframe.h:24739 |
|
|
DataFrame |
pd_dataframe.h:25504 |
|
|
DataFrame |
pd_dataframe.h:25515 |
Time Series#
Signature |
Return Type |
Location |
Example |
|---|---|---|---|
|
DataFrame |
pd_dataframe.h:26750 |
|
|
Series<numpy::float64> |
pd_dataframe.h:27059 |
|
|
DataFrame |
pd_dataframe.h:27065 |
|
|
DataFrame |
pd_dataframe.h:26982 |
|
|
DataFrame |
pd_dataframe.h:26999 |
|
|
DataFrame |
pd_dataframe.h:20536 |
|
|
DataFrame |
pd_dataframe.h:14556 |
|
|
DataFrame |
pd_dataframe.h:20584 |
|
|
DataFrame |
pd_dataframe.h:26800 |
|
|
DataFrame |
pd_dataframe.h:26841 |
|
|
DataFrame |
pd_dataframe.h:26893 |
|
|
DataFrame |
pd_dataframe.h:26951 |
I/O#
Signature |
Return Type |
Location |
Example |
|---|---|---|---|
|
std::string |
pd_dataframe.h:27185 |
|
|
void |
pd_dataframe.h:13478 |
|
|
std::string |
pd_dataframe.h:27462 |
|
|
std::string |
pd_dataframe.h:13519 |
Conversion#
Signature |
Return Type |
Location |
Example |
|---|---|---|---|
|
DataFrame |
pd_dataframe.h:21972 |
|
|
DataFrame |
pd_dataframe.h:21998 |
|
|
pd_dataframe.h:22160 |
||
|
DataFrame |
pd_dataframe.h:22219 |
|
|
bool |
pd_dataframe.h:13217 |
|
|
DataFrame |
pd_dataframe.h:22796 |
|
|
DataFrame |
pd_dataframe.h:27121 |
|
|
void |
pd_dataframe.h:8484 |
|
|
DataFrame |
pd_dataframe.h:22948 |
Iteration#
Signature |
Return Type |
Location |
Example |
|---|---|---|---|
|
iterator |
pd_dataframe.h:1984 |
|
|
const_iterator |
pd_dataframe.h:1988 |
|
|
iterator |
pd_dataframe.h:2000 |
|
|
const_iterator |
pd_dataframe.h:2004 |
|
|
ItemsRange |
pd_dataframe.h:2141 |
|
|
RowsRange |
pd_dataframe.h:2282 |
|
|
TuplesRange |
pd_dataframe.h:2507 |
|
|
const Index<std::string>& |
pd_dataframe.h:2029 |
Set Operations#
Signature |
Return Type |
Location |
Example |
|---|---|---|---|
|
Series<numpy::bool_> |
pd_dataframe.h:10044 |
|
|
DataFrame |
pd_dataframe.h:8623 |
|
|
DataFrame |
pd_dataframe.h:8693 |
Type Checking#
Signature |
Return Type |
Location |
Example |
|---|---|---|---|
|
static bool |
pd_dataframe.h:22597 |
|
|
static bool |
pd_dataframe.h:22643 |
|
|
static bool |
pd_dataframe.h:22623 |
|
|
bool |
pd_dataframe.h:1872 |
|
|
static bool |
pd_dataframe.h:22605 |
|
|
bool |
pd_dataframe.h:8400 |
|
|
pd_dataframe.h:15185 |
||
|
static bool |
pd_dataframe.h:10774 |
|
|
static bool |
pd_dataframe.h:11683 |
Other Methods#
Signature |
Return Type |
Location |
Example |
|---|---|---|---|
|
std::vector<std::vector<double>> |
pd_dataframe.h:8116 |
|
|
std::vector<std::vector<double>> |
pd_dataframe.h:8155 |
|
|
std::vector<std::vector<double>> |
pd_dataframe.h:8171 |
|
|
pd_dataframe.h:13430 |
||
|
pd_dataframe.h:13431 |
||
|
pd_dataframe.h:13432 |
||
|
pd_dataframe.h:13433 |
||
|
pd_dataframe.h:13434 |
||
|
pd_dataframe.h:13435 |
||
|
pd_dataframe.h:13436 |
||
|
pd_dataframe.h:13437 |
||
|
pd_dataframe.h:13438 |
||
|
pd_dataframe.h:13439 |
||
|
pd_dataframe.h:13440 |
||
|
std::vector<std::vector<double>> |
pd_dataframe.h:8133 |
|
|
DataFrame |
pd_dataframe.h:20051 |
|
|
bool |
pd_dataframe.h:23947 |
|
|
Series<numpy::bool_> |
pd_dataframe.h:23984 |
|
|
bool |
pd_dataframe.h:727 |
|
|
bool |
pd_dataframe.h:23964 |
|
|
Series<numpy::bool_> |
pd_dataframe.h:24114 |
|
|
DatetimeArray |
pd_dataframe.h:5517 |
|
|
static DataFrame |
pd_dataframe.h:2709 |
|
|
Series<numpy::float64> |
pd_dataframe.h:2668 |
|
|
Series<std::string> |
pd_dataframe.h:2691 |
|
|
std::vector<const IndexBase*> |
pd_dataframe.h:1561 |
|
|
DataFrame |
pd_dataframe.h:8908 |
|
|
Axis1ColumnPlan |
pd_dataframe.h:11695 |
|
|
else |
pd_dataframe.h:6353 |
|
|
pd_dataframe.h:6388 |
||
|
pd_dataframe.h:6392 |
||
|
MultiIndexColumnLabel |
pd_dataframe.h:3064 |
|
|
pd_dataframe.h:6372 |
||
|
pd_dataframe.h:6374 |
||
|
pd_dataframe.h:6376 |
||
|
pd_dataframe.h:6378 |
||
|
pd_dataframe.h:6380 |
||
|
pd_dataframe.h:6382 |
||
|
pd_dataframe.h:6384 |
||
|
pd_dataframe.h:6386 |
||
|
else |
pd_dataframe.h:6357 |
|
|
pd_dataframe.h:21175 |
||
|
pd_dataframe.h:21177 |
||
|
pd_dataframe.h:21179 |
||
|
pd_dataframe.h:21181 |
||
|
pd_dataframe.h:21246 |
||
|
pd_dataframe.h:21249 |
||
|
pd_dataframe.h:21544 |
||
|
pd_dataframe.h:21546 |
||
|
pd_dataframe.h:21548 |
||
|
pd_dataframe.h:21550 |
||
|
pd_dataframe.h:21611 |
||
|
pd_dataframe.h:21613 |
||
|
std::vector<std::map<std::string, int>> |
pd_dataframe.h:14702 |
|
|
const_iterator |
pd_dataframe.h:1992 |
|
|
DataFrame |
pd_dataframe.h:20044 |
|
|
const_iterator |
pd_dataframe.h:2008 |
|
|
ColumnAccessType |
pd_dataframe.h:3311 |
|
|
std::vector<AggColumnInfo> |
pd_dataframe.h:11715 |
|
|
void |
pd_dataframe.h:1498 |
|
|
void |
pd_dataframe.h:1361 |
|
|
void |
pd_dataframe.h:1412 |
|
|
DataFrame |
pd_dataframe.h:20063 |
|
|
std::unique_ptr<NDFrameBase> |
pd_dataframe.h:27129 |
|
|
Series<T>& |
pd_dataframe.h:3247 |
|
|
const Series<T>& |
pd_dataframe.h:3261 |
|
|
std::vector<std::vector<numpy::float64>> |
pd_dataframe.h:20170 |
|
|
std::vector<std::vector<numpy::float64>> |
pd_dataframe.h:20271 |
|
|
std::vector<std::vector<numpy::float64>> |
pd_dataframe.h:20384 |
|
|
std::vector<std::vector<numpy::float64>> |
pd_dataframe.h:20497 |
|
|
const Series<T>& |
pd_dataframe.h:3275 |
|
|
Series<T>& |
pd_dataframe.h:3277 |
|
|
const NDFrameBase& |
pd_dataframe.h:2518 |
|
|
std::string |
pd_dataframe.h:1593 |
|
|
Series<numpy::float64> |
pd_dataframe.h:2527 |
|
|
Series<std::string> |
pd_dataframe.h:2547 |
|
|
std::unique_ptr<NDFrameBase> |
pd_dataframe.h:2641 |
|
|
static bool |
pd_dataframe.h:22454 |
|
|
const Index<std::string>& |
pd_dataframe.h:1277 |
|
|
const std::string& |
pd_dataframe.h:1397 |
|
|
const std::vector<std::string>& |
pd_dataframe.h:1530 |
|
|
const std::vector<std::vector<std::string>>& |
pd_dataframe.h:1516 |
|
|
void |
pd_dataframe.h:1385 |
|
|
const std::vector<std::vector<std::string>>& |
pd_dataframe.h:1523 |
|
|
std::string |
pd_dataframe.h:20721 |
|
|
std::string |
pd_dataframe.h:12222 |
|
|
static double |
pd_dataframe.h:23657 |
|
|
StringColumnStats |
pd_dataframe.h:10826 |
|
|
static numpy::bool_ |
pd_dataframe.h:22397 |
|
|
static numpy::float64 |
pd_dataframe.h:22363 |
|
|
static numpy::int64 |
pd_dataframe.h:22335 |
|
|
DataFrame |
pd_dataframe.h:13990 |
|
|
Series<numpy::float64> |
pd_dataframe.h:14102 |
|
|
Series<numpy::float64> |
pd_dataframe.h:14398 |
|
|
DataFrame |
pd_dataframe.h:14044 |
|
|
static std::string |
pd_dataframe.h:11797 |
|
|
numpy::datetime64 |
pd_dataframe.h:5604 |
|
|
std::string |
pd_dataframe.h:1234 |
|
|
std::string |
pd_dataframe.h:1606 |
|
|
Series<std::string> |
pd_dataframe.h:1568 |
|
|
pd_dataframe.h:27621 |
||
|
pd_dataframe.h:27647 |
||
|
pd_dataframe.h:27623 |
||
|
pd_dataframe.h:27637 |
||
|
pd_dataframe.h:27652 |
||
|
pd_dataframe.h:27665 |
||
|
pd_dataframe.h:27688 |
||
|
pd_dataframe.h:27782 |
||
|
bool |
pd_dataframe.h:1196 |
|
|
std::vector<std::vector<std::string>> |
pd_dataframe.h:2979 |
|
|
static std::string |
pd_dataframe.h:13598 |
|
|
std::variant<Series<numpy::float64>, DataFrame, std::monostate> |
pd_dataframe.h:9786 |
|
|
std::vector<double> |
pd_dataframe.h:9907 |
|
|
double |
pd_dataframe.h:9929 |
|
|
bool |
pd_dataframe.h:9653 |
|
|
bool |
pd_dataframe.h:9623 |
|
|
bool |
pd_dataframe.h:9677 |
|
|
bool |
pd_dataframe.h:9576 |
|
|
pd_dataframe.h:9602 |
||
|
pd_dataframe.h:9607 |
||
|
Series<numpy::float64> |
pd_dataframe.h:3514 |
|
|
std::unique_ptr<NDFrameBase> |
pd_dataframe.h:3556 |
|
|
RowData |
pd_dataframe.h:2605 |
|
|
std::ofstream |
pd_dataframe.h:13582 |
|
|
DataFrame |
pd_dataframe.h:13241 |
|
|
DataFrame |
pd_dataframe.h:4225 |
|
|
DataFrame |
pd_dataframe.h:4232 |
|
|
int |
pd_dataframe.h:1421 |
|
|
static std::string |
pd_dataframe.h:22433 |
|
|
const Flags& |
pd_dataframe.h:1846 |
|
|
Flags& |
pd_dataframe.h:1853 |
|
|
DataFrame |
pd_dataframe.h:20037 |
|
|
static std::vector<std::string> |
pd_dataframe.h:10849 |
|
|
const std::string& |
pd_dataframe.h:21721 |
|
|
bool |
pd_dataframe.h:3293 |
|
|
bool |
pd_dataframe.h:1551 |
|
|
bool |
pd_dataframe.h:1509 |
|
|
bool |
pd_dataframe.h:1344 |
|
|
bool |
pd_dataframe.h:7445 |
|
|
const IndexBase& |
pd_dataframe.h:1284 |
|
|
IndexBase& |
pd_dataframe.h:1291 |
|
|
void |
pd_dataframe.h:4797 |
|
|
std::optional<std::string> |
pd_dataframe.h:4800 |
|
|
static std::string |
pd_dataframe.h:22658 |
|
|
std::vector<std::vector<std::string>> |
pd_dataframe.h:16419 |
|
|
std::vector<std::vector<std::string>> |
pd_dataframe.h:16421 |
|
|
void |
pd_dataframe.h:13320 |
|
|
std::istringstream |
pd_dataframe.h:9819 |
|
|
std::istringstream |
pd_dataframe.h:15557 |
|
|
std::istringstream |
pd_dataframe.h:15592 |
|
|
std::istringstream |
pd_dataframe.h:16565 |
|
|
std::istringstream |
pd_dataframe.h:16905 |
|
|
std::istringstream |
pd_dataframe.h:16936 |
|
|
std::istringstream |
pd_dataframe.h:17309 |
|
|
std::vector<std::vector<std::string>> |
pd_dataframe.h:18572 |
|
|
MultiIndexMatchResult |
pd_dataframe.h:2937 |
|
|
std::vector<std::vector<std::string>> |
pd_dataframe.h:16931 |
|
|
std::vector<std::vector<std::string>> |
pd_dataframe.h:17222 |
|
|
size_t |
pd_dataframe.h:1222 |
|
|
size_t |
pd_dataframe.h:1263 |
|
|
size_t |
pd_dataframe.h:1215 |
|
|
static std::vector<double> |
pd_dataframe.h:10737 |
|
|
size_t |
pd_dataframe.h:1256 |
|
|
pd_dataframe.h:16637 |
||
|
pd_dataframe.h:16638 |
||
|
query::QueryExpression |
pd_dataframe.h:9489 |
|
|
pd_dataframe.h:3418 |
||
|
pd_dataframe.h:3437 |
||
|
pd_dataframe.h:3485 |
||
|
pd_dataframe.h:3506 |
||
|
pd_dataframe.h:3522 |
||
|
void |
pd_dataframe.h:3608 |
|
|
pd_dataframe.h:3635 |
||
|
oss << |
pd_dataframe.h:27293 |
|
|
oss << |
pd_dataframe.h:27299 |
|
|
oss << |
pd_dataframe.h:27320 |
|
|
oss << |
pd_dataframe.h:27323 |
|
|
oss << |
pd_dataframe.h:27338 |
|
|
oss << |
pd_dataframe.h:27345 |
|
|
oss << |
pd_dataframe.h:27350 |
|
|
oss << |
pd_dataframe.h:27363 |
|
|
oss << |
pd_dataframe.h:27366 |
|
|
oss << |
pd_dataframe.h:27384 |
|
|
oss << |
pd_dataframe.h:27386 |
|
|
oss << |
pd_dataframe.h:27398 |
|
|
oss << |
pd_dataframe.h:27431 |
|
|
oss << |
pd_dataframe.h:27437 |
|
|
oss << |
pd_dataframe.h:27439 |
|
|
std::regex |
pd_dataframe.h:13253 |
|
|
std::regex |
pd_dataframe.h:13280 |
|
|
int |
pd_dataframe.h:18436 |
|
|
int |
pd_dataframe.h:18444 |
|
|
size_t |
pd_dataframe.h:3038 |
|
|
MultiIndexSortResolution |
pd_dataframe.h:14799 |
|
|
DataFrame |
pd_dataframe.h:19987 |
|
|
std::vector<numpy::object_> |
pd_dataframe.h:3841 |
|
|
std::vector<double> |
pd_dataframe.h:2564 |
|
|
pandas::Series<T> |
pd_dataframe.h:21157 |
|
|
pandas::Series<T> |
pd_dataframe.h:21526 |
|
|
std::string |
pd_dataframe.h:27537 |
|
|
std::string |
pd_dataframe.h:27566 |
|
|
pandas::Series<std::complex<double>> |
pd_dataframe.h:20938 |
|
|
pandas::Series<std::complex<double>> |
pd_dataframe.h:21068 |
|
|
pandas::Series<numpy::datetime64> |
pd_dataframe.h:21202 |
|
|
pandas::Series<numpy::datetime64> |
pd_dataframe.h:21568 |
|
|
DataFrame |
pd_dataframe.h:2970 |
|
|
DataFrame |
pd_dataframe.h:7315 |
|
|
DataFrame |
pd_dataframe.h:2870 |
|
|
DataFrame |
pd_dataframe.h:3088 |
|
|
void |
pd_dataframe.h:1839 |
|
|
void |
pd_dataframe.h:6890 |
|
|
void |
pd_dataframe.h:1750 |
|
|
void |
pd_dataframe.h:1760 |
|
|
void |
pd_dataframe.h:1679 |
|
|
pd_dataframe.h:1744 |
||
|
void |
pd_dataframe.h:1739 |
|
|
void |
pd_dataframe.h:1622 |
|
|
pd_dataframe.h:26410 |
||
|
void |
pd_dataframe.h:1640 |
|
|
void |
pd_dataframe.h:1712 |
|
|
DataFrame& |
pd_dataframe.h:5247 |
|
|
void |
pd_dataframe.h:7061 |
|
|
void |
pd_dataframe.h:1775 |
|
|
void |
pd_dataframe.h:1390 |
|
|
void |
pd_dataframe.h:1489 |
|
|
void |
pd_dataframe.h:1465 |
|
|
void |
pd_dataframe.h:1378 |
|
|
void |
pd_dataframe.h:4832 |
|
|
void |
pd_dataframe.h:1439 |
|
|
void |
pd_dataframe.h:1863 |
|
|
void |
pd_dataframe.h:21722 |
|
|
void |
pd_dataframe.h:4773 |
|
|
void |
pd_dataframe.h:4776 |
|
|
void |
pd_dataframe.h:4808 |
|
|
void |
pd_dataframe.h:4820 |
|
|
void |
pd_dataframe.h:1298 |
|
|
void |
pd_dataframe.h:1319 |
|
|
pd_dataframe.h:1328 |
||
|
void |
pd_dataframe.h:1334 |
|
|
pd_dataframe.h:1338 |
||
|
pd_dataframe.h:4694 |
||
|
pd_dataframe.h:4817 |
||
|
pd_dataframe.h:4823 |
||
|
void |
pd_dataframe.h:1368 |
|
|
void |
pd_dataframe.h:4854 |
|
|
void |
pd_dataframe.h:1453 |
|
|
void |
pd_dataframe.h:1407 |
|
|
void |
pd_dataframe.h:4770 |
|
|
static size_t |
pd_dataframe.h:10816 |
|
|
std::vector<size_t> |
pd_dataframe.h:1203 |
|
|
std::pair<size_t, size_t> |
pd_dataframe.h:1210 |
|
|
size_t |
pd_dataframe.h:1189 |
|
|
std::vector<std::vector<std::string>> |
pd_dataframe.h:3943 |
|
|
std::vector<std::vector<std::string>> |
pd_dataframe.h:14978 |
|
|
DataFrameSparseAccessor |
pd_dataframe.h:1899 |
|
|
std::stringstream |
pd_dataframe.h:18002 |
|
|
static int64_t |
pd_dataframe.h:7078 |
|
|
pd_dataframe.h:22149 |
||
|
pd_dataframe.h:22150 |
||
|
DataFrame |
pd_dataframe.h:27012 |
|
|
pandas::Timestamp |
pd_dataframe.h:1428 |
|
|
static size_t |
pd_dataframe.h:10798 |
|
|
numpy::NDArray<numpy::float64> |
pd_dataframe.h:1810 |
Code Examples#
The following examples are extracted from the test suite.
DataFrame (pd_test_1_all.cpp:22011)
22001 void pd_test_where_basic() {
22002 std::cout << "========= where basic functionality =======================";
22003
22004 // Create DataFrame
22005 std::map<std::string, std::vector<double>> data;
22006 data["A"] = {1.0, 2.0, 3.0, 4.0};
22007 data["B"] = {5.0, 6.0, 7.0, 8.0};
22008 pandas::DataFrame df(data);
22009
22010 // Create condition DataFrame (values > 2)
22011 std::map<std::string, std::vector<numpy::bool_>> cond_data;
22012 cond_data["A"] = {false, false, true, true}; // 1<=2, 2<=2, 3>2, 4>2
22013 cond_data["B"] = {true, true, true, true}; // all >2
22014 pandas::DataFrame cond(cond_data);
22015
22016 // Apply where with replacement value -1
22017 pandas::DataFrame result = df.where(cond, -1.0);
22018
22019 // Get column index for A - it's sorted alphabetically in std::map
22020 size_t col_a_idx = df.get_column_index("A");
DataFrame (pd_test_1_all.cpp:22011)
22001 void pd_test_where_basic() {
22002 std::cout << "========= where basic functionality =======================";
22003
22004 // Create DataFrame
22005 std::map<std::string, std::vector<double>> data;
22006 data["A"] = {1.0, 2.0, 3.0, 4.0};
22007 data["B"] = {5.0, 6.0, 7.0, 8.0};
22008 pandas::DataFrame df(data);
22009
22010 // Create condition DataFrame (values > 2)
22011 std::map<std::string, std::vector<numpy::bool_>> cond_data;
22012 cond_data["A"] = {false, false, true, true}; // 1<=2, 2<=2, 3>2, 4>2
22013 cond_data["B"] = {true, true, true, true}; // all >2
22014 pandas::DataFrame cond(cond_data);
22015
22016 // Apply where with replacement value -1
22017 pandas::DataFrame result = df.where(cond, -1.0);
22018
22019 // Get column index for A - it's sorted alphabetically in std::map
22020 size_t col_a_idx = df.get_column_index("A");
DataFrame (pd_test_1_all.cpp:22011)
22001 void pd_test_where_basic() {
22002 std::cout << "========= where basic functionality =======================";
22003
22004 // Create DataFrame
22005 std::map<std::string, std::vector<double>> data;
22006 data["A"] = {1.0, 2.0, 3.0, 4.0};
22007 data["B"] = {5.0, 6.0, 7.0, 8.0};
22008 pandas::DataFrame df(data);
22009
22010 // Create condition DataFrame (values > 2)
22011 std::map<std::string, std::vector<numpy::bool_>> cond_data;
22012 cond_data["A"] = {false, false, true, true}; // 1<=2, 2<=2, 3>2, 4>2
22013 cond_data["B"] = {true, true, true, true}; // all >2
22014 pandas::DataFrame cond(cond_data);
22015
22016 // Apply where with replacement value -1
22017 pandas::DataFrame result = df.where(cond, -1.0);
22018
22019 // Get column index for A - it's sorted alphabetically in std::map
22020 size_t col_a_idx = df.get_column_index("A");
DataFrame (pd_test_1_all.cpp:22011)
22001 void pd_test_where_basic() {
22002 std::cout << "========= where basic functionality =======================";
22003
22004 // Create DataFrame
22005 std::map<std::string, std::vector<double>> data;
22006 data["A"] = {1.0, 2.0, 3.0, 4.0};
22007 data["B"] = {5.0, 6.0, 7.0, 8.0};
22008 pandas::DataFrame df(data);
22009
22010 // Create condition DataFrame (values > 2)
22011 std::map<std::string, std::vector<numpy::bool_>> cond_data;
22012 cond_data["A"] = {false, false, true, true}; // 1<=2, 2<=2, 3>2, 4>2
22013 cond_data["B"] = {true, true, true, true}; // all >2
22014 pandas::DataFrame cond(cond_data);
22015
22016 // Apply where with replacement value -1
22017 pandas::DataFrame result = df.where(cond, -1.0);
22018
22019 // Get column index for A - it's sorted alphabetically in std::map
22020 size_t col_a_idx = df.get_column_index("A");
DataFrame (pd_test_1_all.cpp:22011)
22001 void pd_test_where_basic() {
22002 std::cout << "========= where basic functionality =======================";
22003
22004 // Create DataFrame
22005 std::map<std::string, std::vector<double>> data;
22006 data["A"] = {1.0, 2.0, 3.0, 4.0};
22007 data["B"] = {5.0, 6.0, 7.0, 8.0};
22008 pandas::DataFrame df(data);
22009
22010 // Create condition DataFrame (values > 2)
22011 std::map<std::string, std::vector<numpy::bool_>> cond_data;
22012 cond_data["A"] = {false, false, true, true}; // 1<=2, 2<=2, 3>2, 4>2
22013 cond_data["B"] = {true, true, true, true}; // all >2
22014 pandas::DataFrame cond(cond_data);
22015
22016 // Apply where with replacement value -1
22017 pandas::DataFrame result = df.where(cond, -1.0);
22018
22019 // Get column index for A - it's sorted alphabetically in std::map
22020 size_t col_a_idx = df.get_column_index("A");
DataFrame (pd_test_1_all.cpp:22011)
22001 void pd_test_where_basic() {
22002 std::cout << "========= where basic functionality =======================";
22003
22004 // Create DataFrame
22005 std::map<std::string, std::vector<double>> data;
22006 data["A"] = {1.0, 2.0, 3.0, 4.0};
22007 data["B"] = {5.0, 6.0, 7.0, 8.0};
22008 pandas::DataFrame df(data);
22009
22010 // Create condition DataFrame (values > 2)
22011 std::map<std::string, std::vector<numpy::bool_>> cond_data;
22012 cond_data["A"] = {false, false, true, true}; // 1<=2, 2<=2, 3>2, 4>2
22013 cond_data["B"] = {true, true, true, true}; // all >2
22014 pandas::DataFrame cond(cond_data);
22015
22016 // Apply where with replacement value -1
22017 pandas::DataFrame result = df.where(cond, -1.0);
22018
22019 // Get column index for A - it's sorted alphabetically in std::map
22020 size_t col_a_idx = df.get_column_index("A");
DataFrame (pd_test_1_all.cpp:22011)
22001 void pd_test_where_basic() {
22002 std::cout << "========= where basic functionality =======================";
22003
22004 // Create DataFrame
22005 std::map<std::string, std::vector<double>> data;
22006 data["A"] = {1.0, 2.0, 3.0, 4.0};
22007 data["B"] = {5.0, 6.0, 7.0, 8.0};
22008 pandas::DataFrame df(data);
22009
22010 // Create condition DataFrame (values > 2)
22011 std::map<std::string, std::vector<numpy::bool_>> cond_data;
22012 cond_data["A"] = {false, false, true, true}; // 1<=2, 2<=2, 3>2, 4>2
22013 cond_data["B"] = {true, true, true, true}; // all >2
22014 pandas::DataFrame cond(cond_data);
22015
22016 // Apply where with replacement value -1
22017 pandas::DataFrame result = df.where(cond, -1.0);
22018
22019 // Get column index for A - it's sorted alphabetically in std::map
22020 size_t col_a_idx = df.get_column_index("A");
at (pd_test_1_all.cpp:6581)
6571 // Test isna/notna with float data
6572 {
6573 std::map<std::string, std::vector<numpy::float64>> float_data;
6574 float_data["X"] = {1.0, std::nan(""), 3.0};
6575 float_data["Y"] = {4.0, 5.0, std::nan("")};
6576 pandas::DataFrame df_na(float_data);
6577
6578 auto na_mask = df_na.isna();
6579 // Row 1, col 0 (X) should be NA
6580 if (!na_mask.getElementAt({1, 0})) {
6581 std::cout << " [FAIL] : in pd_test_dataframe_manipulation() : isna at (1,0) should be true" << std::endl;
6582 throw std::runtime_error("pd_test_dataframe_manipulation failed: isna at (1,0)");
6583 }
6584 // Row 2, col 1 (Y) should be NA
6585 if (!na_mask.getElementAt({2, 1})) {
6586 std::cout << " [FAIL] : in pd_test_dataframe_manipulation() : isna at (2,1) should be true" << std::endl;
6587 throw std::runtime_error("pd_test_dataframe_manipulation failed: isna at (2,1)");
6588 }
6589 // Row 0, col 0 should NOT be NA
6590 if (na_mask.getElementAt({0, 0})) {
6591 std::cout << " [FAIL] : in pd_test_dataframe_manipulation() : isna at (0,0) should be false" << std::endl;
at_time (pd_test_2_all.cpp:728)
718 std::cout << "====================================== [OK] pd_test_asof test suite ========================== " << std::endl;
719 return 0;
720 }
721
722} // namespace dataframe_tests
723// ------------------- pd_test_asof.cpp (end) -----------------------------
724
725// ------------------- pd_test_at_time.cpp (start) -----------------------------
726// dataframe_tests/pd_test_at_time.cpp
727// Tests for DataFrame.at_time() method (pandas 2.0+ API)
728// Selects values at particular time of day from datetime-indexed DataFrame
729#include <iostream>
730#include <stdexcept>
731#include <vector>
732#include <string>
733#include <map>
734#include "../pandas/pd_dataframe.h"
735
736// CRITICAL: No using namespace directives
attrs (pd_test_1_all.cpp:16361)
16351 // =====================================================================
16352 // Series Attrs Integration Tests
16353 // =====================================================================
16354
16355 void pd_test_ndframe_series_attrs() {
16356 std::cout << "========= series attrs integration =============================" << std::endl;
16357
16358 pandas::Series<double> s({1.0, 2.0, 3.0});
16359
16360 // Test setting attrs on Series
16361 s.attrs().set("source", std::string("test_data"));
16362 s.attrs().set("timestamp", 1234567890);
16363
16364 bool passed = s.attrs().get<std::string>("source") == "test_data";
16365 if (!passed) {
16366 std::cout << " [FAIL] : in pd_test_ndframe_series_attrs() : set/get source" << std::endl;
16367 throw std::runtime_error("pd_test_ndframe_series_attrs failed: set/get source");
16368 }
16369
16370 passed = s.attrs().get<int>("timestamp") == 1234567890;
16371 if (!passed) {
attrs (pd_test_1_all.cpp:16361)
16351 // =====================================================================
16352 // Series Attrs Integration Tests
16353 // =====================================================================
16354
16355 void pd_test_ndframe_series_attrs() {
16356 std::cout << "========= series attrs integration =============================" << std::endl;
16357
16358 pandas::Series<double> s({1.0, 2.0, 3.0});
16359
16360 // Test setting attrs on Series
16361 s.attrs().set("source", std::string("test_data"));
16362 s.attrs().set("timestamp", 1234567890);
16363
16364 bool passed = s.attrs().get<std::string>("source") == "test_data";
16365 if (!passed) {
16366 std::cout << " [FAIL] : in pd_test_ndframe_series_attrs() : set/get source" << std::endl;
16367 throw std::runtime_error("pd_test_ndframe_series_attrs failed: set/get source");
16368 }
16369
16370 passed = s.attrs().get<int>("timestamp") == 1234567890;
16371 if (!passed) {
first (pd_test_1_all.cpp:11616)
11606 void pd_test_groupby_first_last() {
11607 std::cout << "========= GroupBy first/last ====================";
11608
11609 std::map<std::string, std::vector<double>> data = {
11610 {"category", {1.0, 1.0, 2.0, 2.0}},
11611 {"value", {10.0, 20.0, 30.0, 40.0}}
11612 };
11613 pandas::DataFrame df(data);
11614
11615 auto first_result = df.groupby("category").first();
11616 auto last_result = df.groupby("category").last();
11617
11618 // First for group 1: 10, group 2: 30
11619 // Last for group 1: 20, group 2: 40
11620 double first1 = std::stod(first_result["value"].get_value_str(0));
11621 double first2 = std::stod(first_result["value"].get_value_str(1));
11622
11623 bool passed = ((std::abs(first1 - 10.0) < 0.001 && std::abs(first2 - 30.0) < 0.001) ||
11624 (std::abs(first1 - 30.0) < 0.001 && std::abs(first2 - 10.0) < 0.001));
11625 if (!passed) {
first_valid_index (pd_test_1_all.cpp:20555)
20545 std::vector<double> values = {
20546 std::numeric_limits<double>::quiet_NaN(),
20547 std::numeric_limits<double>::quiet_NaN(),
20548 3.0,
20549 4.0,
20550 5.0
20551 };
20552 pandas::Series<double> s(values, "test");
20553
20554 auto first_idx = s.first_valid_index();
20555
20556 bool passed = first_idx.has_value() && first_idx.value() == 2;
20557
20558 if (!passed) {
20559 std::cout << " [FAIL] : in pd_test_timeseries_first_valid_index() : expected index 2" << std::endl;
20560 throw std::runtime_error("pd_test_timeseries_first_valid_index failed");
20561 }
20562
20563 std::cout << " -> tests passed" << std::endl;
20564 }
get (pd_test_1_all.cpp:10290)
10280void pd_test_extension_index_get_loc_unique() {
10281 std::cout << "========= get_loc (unique) =========================";
10282
10283 pandas::CategoricalArray arr({"apple", "banana", "cherry"});
10284 pandas::CategoricalIndex idx(arr);
10285
10286 auto loc_apple = idx.get_loc("apple");
10287 auto loc_banana = idx.get_loc("banana");
10288 auto loc_cherry = idx.get_loc("cherry");
10289
10290 bool passed = (std::holds_alternative<size_t>(loc_apple) && std::get<size_t>(loc_apple) == 0 &&
10291 std::get<size_t>(loc_banana) == 1 &&
10292 std::get<size_t>(loc_cherry) == 2);
10293 if (!passed) {
10294 std::cout << " [FAIL] : in pd_test_extension_index_get_loc_unique() : get_loc check failed" << std::endl;
10295 throw std::runtime_error("pd_test_extension_index_get_loc_unique failed");
10296 }
10297
10298 std::cout << " -> tests passed" << std::endl;
10299}
get_column_as_series (pd_test_5_all.cpp:123545)
123535 auto tuples = mi.to_list();
123536 if (tuples.size() != nrows) {
123537 throw std::runtime_error(
123538 "format_extractall: MultiIndex size " + std::to_string(tuples.size()) +
123539 " != DataFrame nrows " + std::to_string(nrows));
123540 }
123541 // Pre-extract each column's string values to avoid repeated lookups.
123542 std::vector<std::vector<std::string>> col_values;
123543 col_values.reserve(ncols);
123544 for (size_t c = 0; c < ncols; ++c) {
123545 auto s = df.template get_column_as_series<std::string>(c);
123546 std::vector<std::string> vals;
123547 vals.reserve(s.size());
123548 for (size_t r = 0; r < s.size(); ++r) vals.push_back(s.at(r));
123549 col_values.push_back(std::move(vals));
123550 }
123551 for (size_t r = 0; r < nrows; ++r) {
123552 oss << "\n";
123553 // tuples[r] is a vector<string> of length 2.
123554 oss << tuples[r][0] << "," << tuples[r][1];
123555 for (size_t c = 0; c < ncols; ++c) {
get_column_as_string_series (pd_test_2_all.cpp:20370)
20360void pd_test_getitem_dispatch_category_metadata() {
20361 std::cout << "pd_test_getitem_dispatch_category_metadata" << std::endl;
20362 pandas::DataFrame df;
20363 std::vector<std::string> svals = {"a", "b", "a", "c"};
20364 auto cs = std::make_unique<pandas::Series<std::string>>(svals, "cat");
20365 cs->set_dtype_override("category");
20366 cs->set_cat_categories({"a", "b", "c"});
20367 cs->set_cat_ordered(true);
20368 df.insert(0, "cat", std::move(cs), true);
20369
20370 auto s = df.get_column_as_string_series("cat");
20371 check(s.dtype_name() == "category", "cat dtype");
20372 check(s.has_cat_categories(), "cat has_categories");
20373 check(s.cat_ordered() == true, "cat ordered");
20374 auto cats = s.get_cat_categories();
20375 check(cats.size() == 3, "cat categories size");
20376 std::set<std::string> cat_set(cats.begin(), cats.end());
20377 check(cat_set.count("a") && cat_set.count("b") && cat_set.count("c"), "cat categories content");
20378}
20379
20380void pd_test_getitem_dispatch_index_propagation() {
get_column_index (pd_test_1_all.cpp:22021)
22011 // Create condition DataFrame (values > 2)
22012 std::map<std::string, std::vector<numpy::bool_>> cond_data;
22013 cond_data["A"] = {false, false, true, true}; // 1<=2, 2<=2, 3>2, 4>2
22014 cond_data["B"] = {true, true, true, true}; // all >2
22015 pandas::DataFrame cond(cond_data);
22016
22017 // Apply where with replacement value -1
22018 pandas::DataFrame result = df.where(cond, -1.0);
22019
22020 // Get column index for A - it's sorted alphabetically in std::map
22021 size_t col_a_idx = df.get_column_index("A");
22022 size_t col_b_idx = df.get_column_index("B");
22023
22024 bool passed = true;
22025 std::string error_msg;
22026
22027 // Check A column values
22028 std::string a0 = result.iat<double>(0, col_a_idx) == -1.0 ? "ok" : "fail";
22029 std::string a1 = result.iat<double>(1, col_a_idx) == -1.0 ? "ok" : "fail";
22030 std::string a2 = result.iat<double>(2, col_a_idx) == 3.0 ? "ok" : "fail";
22031 std::string a3 = result.iat<double>(3, col_a_idx) == 4.0 ? "ok" : "fail";
get_dummies (pd_test_3_all.cpp:13545)
13535 }
13536
13537 std::cout << " -> tests passed" << std::endl;
13538}
13539
13540// ============================================================================
13541// Get Dummies / From Dummies Tests
13542// ============================================================================
13543
13544void pd_test_top_level_get_dummies() {
13545 std::cout << "========= get_dummies() ===============================";
13546
13547 std::vector<std::string> data = {"A", "B", "A", "C", "B", "A"};
13548 pandas::Series<std::string> s(data, "category");
13549
13550 pandas::DataFrame result = pandas::get_dummies(s);
13551
13552 // Should have columns for A, B, C
13553 if (result.ncols() != 3) {
13554 std::cout << " [FAIL] : in pd_test_top_level_get_dummies() : expected 3 columns" << std::endl;
13555 throw std::runtime_error("pd_test_top_level_get_dummies failed: wrong column count");
get_duplicate_columns (pd_test_2_all.cpp:20352)
20342}
20343
20344void pd_test_getitem_dispatch_get_duplicates() {
20345 std::cout << "pd_test_getitem_dispatch_get_duplicates" << std::endl;
20346 pandas::DataFrame df;
20347 std::vector<numpy::float64> v1 = {1.0, 2.0, 3.0};
20348 std::vector<numpy::float64> v2 = {4.0, 5.0, 6.0};
20349 df.insert(0, "col", std::make_unique<pandas::Series<numpy::float64>>(v1, "col"), true);
20350 df.insert(1, "col", std::make_unique<pandas::Series<numpy::float64>>(v2, "col"), true);
20351
20352 auto sub = df.get_duplicate_columns("col");
20353 check(sub.ncols() == 2, "dup ncols");
20354 check(sub.nrows() == 3, "dup nrows");
20355 // Both columns named "col"
20356 check(sub.columns().get_value_str(0) == "col", "dup col0 name");
20357 check(sub.columns().get_value_str(1) == "col", "dup col1 name");
20358}
20359
20360void pd_test_getitem_dispatch_category_metadata() {
20361 std::cout << "pd_test_getitem_dispatch_category_metadata" << std::endl;
20362 pandas::DataFrame df;
get_optional (pd_test_1_all.cpp:6741)
6731 throw std::runtime_error("pd_test_dataframe_index_ops failed: reindex cols");
6732 }
6733 }
6734
6735 // Test get_optional
6736 {
6737 std::map<std::string, std::vector<int>> data;
6738 data["A"] = {1, 2, 3};
6739 pandas::DataFrame df(data);
6740
6741 auto col_opt = df.get_optional("A");
6742 if (!col_opt.has_value()) {
6743 std::cout << " [FAIL] : in pd_test_dataframe_index_ops() : get_optional A should exist" << std::endl;
6744 throw std::runtime_error("pd_test_dataframe_index_ops failed: get_optional A");
6745 }
6746
6747 auto missing = df.get_optional("Z");
6748 if (missing.has_value()) {
6749 std::cout << " [FAIL] : in pd_test_dataframe_index_ops() : get_optional Z should not exist" << std::endl;
6750 throw std::runtime_error("pd_test_dataframe_index_ops failed: get_optional Z");
6751 }
get_series (pd_test_5_all.cpp:12970)
12960 pandas_tests::check(!threw, "query_bool_and_numeric.no_throw", local_fail);
12961 if (!threw) {
12962 pandas_tests::check(result.nrows() == 1, "query_bool_and_numeric.nrows == 1 (got " + std::to_string(result.nrows()) + ")", local_fail);
12963 }
12964 }
12965
12966 // === xs_level tests (Error 2) ===
12967 // Note: xs_level() doesn't exist yet — test will verify it after implementation
12968
12969 // === get_series + unstack tests (Error 1) ===
12970 // Note: get_series<T>() doesn't exist yet — test will verify it after implementation
12971
12972 if (local_fail > 0) {
12973 std::cout << " [FAIL] : in f_test_anal_i_query_bool_unstack() : " << local_fail << " checks failed" << std::endl;
12974 throw std::runtime_error("f_test_anal_i_query_bool_unstack failed");
12975 }
12976 std::cout << " -> tests passed" << std::endl;
12977}
12978
12979
12980// --- cpp_f_test_zanal_a_column_width.cpp ---
get_value_bool (pd_test_5_all.cpp:35197)
35187 df.add_column_nullable<bool>("X", {true, pandas::NA_BOOL, false});
35188 pandas_tests::check(df["X"].get_value_double(0) == 1.0, "case_2.idx0_one", local_fail);
35189 pandas_tests::check(std::isnan(df["X"].get_value_double(1)),
35190 "case_2.idx1_nan", local_fail);
35191 pandas_tests::check(df["X"].get_value_double(2) == 0.0, "case_2.idx2_zero", local_fail);
35192}
35193
35194void bool_nullable_826495_case_3_get_value_bool_mask_aware(int& local_fail) {
35195 pandas::DataFrame df;
35196 df.add_column_nullable<bool>("X", {true, pandas::NA_BOOL, false});
35197 pandas_tests::check(df["X"].get_value_bool(0) == true, "case_3.idx0_true", local_fail);
35198 pandas_tests::check(df["X"].get_value_bool(1) == false, "case_3.idx1_NA_false", local_fail);
35199 pandas_tests::check(df["X"].get_value_bool(2) == false, "case_3.idx2_false", local_fail);
35200}
35201
35202void bool_nullable_826495_case_4_is_na_at_mask_aware(int& local_fail) {
35203 pandas::DataFrame df;
35204 df.add_column_nullable<bool>("X", {true, pandas::NA_BOOL, false});
35205 pandas_tests::check(df["X"].is_na_at(0) == false, "case_4.idx0_not_na", local_fail);
35206 pandas_tests::check(df["X"].is_na_at(1) == true, "case_4.idx1_is_na", local_fail);
35207 pandas_tests::check(df["X"].is_na_at(2) == false, "case_4.idx2_not_na", local_fail);
head (pd_test_1_all.cpp:6301)
6291 void pd_test_dataframe_indexing() {
6292 std::cout << "========= indexing (loc/iloc) ==============";
6293
6294 std::map<std::string, std::vector<numpy::float64>> data;
6295 data["A"] = {10.0, 20.0, 30.0, 40.0, 50.0};
6296 data["B"] = {1.0, 2.0, 3.0, 4.0, 5.0};
6297
6298 pandas::DataFrame df(data);
6299
6300 // Test head
6301 auto head_df = df.head(3);
6302 if (head_df.nrows() != 3) {
6303 std::cout << " [FAIL] : in pd_test_dataframe_indexing() : head(3) nrows != 3" << std::endl;
6304 throw std::runtime_error("pd_test_dataframe_indexing failed: head(3) nrows != 3");
6305 }
6306
6307 // Test tail
6308 auto tail_df = df.tail(2);
6309 if (tail_df.nrows() != 2) {
6310 std::cout << " [FAIL] : in pd_test_dataframe_indexing() : tail(2) nrows != 2" << std::endl;
6311 throw std::runtime_error("pd_test_dataframe_indexing failed: tail(2) nrows != 2");
iat (pd_test_1_all.cpp:22028)
22018 pandas::DataFrame result = df.where(cond, -1.0);
22019
22020 // Get column index for A - it's sorted alphabetically in std::map
22021 size_t col_a_idx = df.get_column_index("A");
22022 size_t col_b_idx = df.get_column_index("B");
22023
22024 bool passed = true;
22025 std::string error_msg;
22026
22027 // Check A column values
22028 std::string a0 = result.iat<double>(0, col_a_idx) == -1.0 ? "ok" : "fail";
22029 std::string a1 = result.iat<double>(1, col_a_idx) == -1.0 ? "ok" : "fail";
22030 std::string a2 = result.iat<double>(2, col_a_idx) == 3.0 ? "ok" : "fail";
22031 std::string a3 = result.iat<double>(3, col_a_idx) == 4.0 ? "ok" : "fail";
22032
22033 if (a0 != "ok" || a1 != "ok" || a2 != "ok" || a3 != "ok") {
22034 passed = false;
22035 error_msg = "Column A values incorrect: A[0]=" + a0 + ", A[1]=" + a1 +
22036 ", A[2]=" + a2 + ", A[3]=" + a3;
22037 }
idxmax (pd_test_1_all.cpp:23956)
23946 std::cout << "====================================== [OK] pd_test_ffill_bfill test suite ========================== " << std::endl;
23947 return 0;
23948 }
23949
23950} // namespace dataframe_tests
23951// ------------------- pd_test_ffill_bfill.cpp (end) -----------------------------
23952
23953// ------------------- pd_test_idxmax_idxmin.cpp (start) -----------------------------
23954// dataframe_tests/pd_test_idxmax_idxmin.cpp
23955// Test for DataFrame.idxmax() and idxmin() methods
23956
23957#include <iostream>
23958#include <stdexcept>
23959#include <cmath>
23960#include <limits>
23961#include "../pandas/pd_dataframe.h"
23962
23963// CRITICAL: No using namespace directives
23964
23965namespace dataframe_tests {
idxmin (pd_test_1_all.cpp:23956)
23946 std::cout << "====================================== [OK] pd_test_ffill_bfill test suite ========================== " << std::endl;
23947 return 0;
23948 }
23949
23950} // namespace dataframe_tests
23951// ------------------- pd_test_ffill_bfill.cpp (end) -----------------------------
23952
23953// ------------------- pd_test_idxmax_idxmin.cpp (start) -----------------------------
23954// dataframe_tests/pd_test_idxmax_idxmin.cpp
23955// Test for DataFrame.idxmax() and idxmin() methods
23956
23957#include <iostream>
23958#include <stdexcept>
23959#include <cmath>
23960#include <limits>
23961#include "../pandas/pd_dataframe.h"
23962
23963// CRITICAL: No using namespace directives
23964
23965namespace dataframe_tests {
iloc (pd_test_1_all.cpp:19149)
19139 pandas::Series<int> s({10, 20, 30, 40, 50});
19140
19141 // Positional indexing
19142 bool passed = s[0] == 10 && s[4] == 50 && s.at(2) == 30;
19143 if (!passed) {
19144 std::cout << " [FAIL] : in pd_test_series_indexing() : positional indexing failed" << std::endl;
19145 throw std::runtime_error("pd_test_series_indexing failed: positional indexing");
19146 }
19147
19148 // iloc slice
19149 auto slice = s.iloc(1, 4);
19150 passed = slice.size() == 3 && slice[0] == 20 && slice[2] == 40;
19151 if (!passed) {
19152 std::cout << " [FAIL] : in pd_test_series_indexing() : iloc slice failed" << std::endl;
19153 throw std::runtime_error("pd_test_series_indexing failed: iloc slice");
19154 }
19155
19156 // iloc with indices
19157 auto selected = s.iloc({0, 2, 4});
19158 passed = selected.size() == 3 && selected[0] == 10 && selected[1] == 30 && selected[2] == 50;
19159 if (!passed) {
iloc (pd_test_1_all.cpp:19149)
19139 pandas::Series<int> s({10, 20, 30, 40, 50});
19140
19141 // Positional indexing
19142 bool passed = s[0] == 10 && s[4] == 50 && s.at(2) == 30;
19143 if (!passed) {
19144 std::cout << " [FAIL] : in pd_test_series_indexing() : positional indexing failed" << std::endl;
19145 throw std::runtime_error("pd_test_series_indexing failed: positional indexing");
19146 }
19147
19148 // iloc slice
19149 auto slice = s.iloc(1, 4);
19150 passed = slice.size() == 3 && slice[0] == 20 && slice[2] == 40;
19151 if (!passed) {
19152 std::cout << " [FAIL] : in pd_test_series_indexing() : iloc slice failed" << std::endl;
19153 throw std::runtime_error("pd_test_series_indexing failed: iloc slice");
19154 }
19155
19156 // iloc with indices
19157 auto selected = s.iloc({0, 2, 4});
19158 passed = selected.size() == 3 && selected[0] == 10 && selected[1] == 30 && selected[2] == 50;
19159 if (!passed) {
iloc (pd_test_1_all.cpp:19149)
19139 pandas::Series<int> s({10, 20, 30, 40, 50});
19140
19141 // Positional indexing
19142 bool passed = s[0] == 10 && s[4] == 50 && s.at(2) == 30;
19143 if (!passed) {
19144 std::cout << " [FAIL] : in pd_test_series_indexing() : positional indexing failed" << std::endl;
19145 throw std::runtime_error("pd_test_series_indexing failed: positional indexing");
19146 }
19147
19148 // iloc slice
19149 auto slice = s.iloc(1, 4);
19150 passed = slice.size() == 3 && slice[0] == 20 && slice[2] == 40;
19151 if (!passed) {
19152 std::cout << " [FAIL] : in pd_test_series_indexing() : iloc slice failed" << std::endl;
19153 throw std::runtime_error("pd_test_series_indexing failed: iloc slice");
19154 }
19155
19156 // iloc with indices
19157 auto selected = s.iloc({0, 2, 4});
19158 passed = selected.size() == 3 && selected[0] == 10 && selected[1] == 30 && selected[2] == 50;
19159 if (!passed) {
iloc_rows (pd_test_1_all.cpp:6315)
6305 }
6306
6307 // Test tail
6308 auto tail_df = df.tail(2);
6309 if (tail_df.nrows() != 2) {
6310 std::cout << " [FAIL] : in pd_test_dataframe_indexing() : tail(2) nrows != 2" << std::endl;
6311 throw std::runtime_error("pd_test_dataframe_indexing failed: tail(2) nrows != 2");
6312 }
6313
6314 // Test iloc_rows range
6315 auto slice = df.iloc_rows(1, 4);
6316 if (slice.nrows() != 3) {
6317 std::cout << " [FAIL] : in pd_test_dataframe_indexing() : iloc_rows(1,4) nrows != 3" << std::endl;
6318 throw std::runtime_error("pd_test_dataframe_indexing failed: iloc_rows(1,4) nrows != 3");
6319 }
6320
6321 // Test iloc_rows with indices
6322 auto selected = df.iloc_rows(std::vector<size_t>{0, 2, 4});
6323 if (selected.nrows() != 3) {
6324 std::cout << " [FAIL] : in pd_test_dataframe_indexing() : iloc_rows vector nrows != 3" << std::endl;
6325 throw std::runtime_error("pd_test_dataframe_indexing failed: iloc_rows vector nrows != 3");
iloc_rows (pd_test_1_all.cpp:6315)
6305 }
6306
6307 // Test tail
6308 auto tail_df = df.tail(2);
6309 if (tail_df.nrows() != 2) {
6310 std::cout << " [FAIL] : in pd_test_dataframe_indexing() : tail(2) nrows != 2" << std::endl;
6311 throw std::runtime_error("pd_test_dataframe_indexing failed: tail(2) nrows != 2");
6312 }
6313
6314 // Test iloc_rows range
6315 auto slice = df.iloc_rows(1, 4);
6316 if (slice.nrows() != 3) {
6317 std::cout << " [FAIL] : in pd_test_dataframe_indexing() : iloc_rows(1,4) nrows != 3" << std::endl;
6318 throw std::runtime_error("pd_test_dataframe_indexing failed: iloc_rows(1,4) nrows != 3");
6319 }
6320
6321 // Test iloc_rows with indices
6322 auto selected = df.iloc_rows(std::vector<size_t>{0, 2, 4});
6323 if (selected.nrows() != 3) {
6324 std::cout << " [FAIL] : in pd_test_dataframe_indexing() : iloc_rows vector nrows != 3" << std::endl;
6325 throw std::runtime_error("pd_test_dataframe_indexing failed: iloc_rows vector nrows != 3");
last (pd_test_1_all.cpp:11617)
11607 void pd_test_groupby_first_last() {
11608 std::cout << "========= GroupBy first/last ====================";
11609
11610 std::map<std::string, std::vector<double>> data = {
11611 {"category", {1.0, 1.0, 2.0, 2.0}},
11612 {"value", {10.0, 20.0, 30.0, 40.0}}
11613 };
11614 pandas::DataFrame df(data);
11615
11616 auto first_result = df.groupby("category").first();
11617 auto last_result = df.groupby("category").last();
11618
11619 // First for group 1: 10, group 2: 30
11620 // Last for group 1: 20, group 2: 40
11621 double first1 = std::stod(first_result["value"].get_value_str(0));
11622 double first2 = std::stod(first_result["value"].get_value_str(1));
11623
11624 bool passed = ((std::abs(first1 - 10.0) < 0.001 && std::abs(first2 - 30.0) < 0.001) ||
11625 (std::abs(first1 - 30.0) < 0.001 && std::abs(first2 - 10.0) < 0.001));
11626 if (!passed) {
11627 std::cout << " [FAIL] : in pd_test_groupby_first_last() : first values incorrect" << std::endl;
last_valid_index (pd_test_1_all.cpp:20579)
20569 std::vector<double> values = {
20570 1.0,
20571 2.0,
20572 3.0,
20573 std::numeric_limits<double>::quiet_NaN(),
20574 std::numeric_limits<double>::quiet_NaN()
20575 };
20576 pandas::Series<double> s(values, "test");
20577
20578 auto last_idx = s.last_valid_index();
20579
20580 bool passed = last_idx.has_value() && last_idx.value() == 2;
20581
20582 if (!passed) {
20583 std::cout << " [FAIL] : in pd_test_timeseries_last_valid_index() : expected index 2" << std::endl;
20584 throw std::runtime_error("pd_test_timeseries_last_valid_index failed");
20585 }
20586
20587 std::cout << " -> tests passed" << std::endl;
20588 }
loc (pd_test_3_all.cpp:10916)
10906 {{"A","A","B"}, {"x","y","x"}});
10907 df.set_index(mi);
10908 if (!df.has_multiindex() || df.multiindex().nlevels() != 2) {
10909 std::cout << " [FAIL] : in pd_test_3_all_set_index_multiindex()" << std::endl;
10910 throw std::runtime_error("set_index MultiIndex failed");
10911 }
10912 std::cout << " -> tests passed" << std::endl;
10913}
10914
10915void pd_test_3_all_loc_single_arg() {
10916 std::cout << "========= Fix 3: loc(string) MultiIndex =================";
10917 pandas::DataFrame df;
10918 df.add_column<int64_t>("val", {10, 20, 30, 40});
10919 auto mi = pandas::MultiIndex::from_arrays<std::string>(
10920 {{"London","London","Paris","Paris"}, {"2020","2021","2020","2021"}});
10921 df.set_multiindex(mi);
10922 pandas::DataFrame result = df.loc("London");
10923 if (result.nrows() != 2) {
10924 std::cout << " [FAIL] : in pd_test_3_all_loc_single_arg() : expected 2 rows" << std::endl;
10925 throw std::runtime_error("loc single-arg failed");
10926 }
loc (pd_test_3_all.cpp:10916)
10906 {{"A","A","B"}, {"x","y","x"}});
10907 df.set_index(mi);
10908 if (!df.has_multiindex() || df.multiindex().nlevels() != 2) {
10909 std::cout << " [FAIL] : in pd_test_3_all_set_index_multiindex()" << std::endl;
10910 throw std::runtime_error("set_index MultiIndex failed");
10911 }
10912 std::cout << " -> tests passed" << std::endl;
10913}
10914
10915void pd_test_3_all_loc_single_arg() {
10916 std::cout << "========= Fix 3: loc(string) MultiIndex =================";
10917 pandas::DataFrame df;
10918 df.add_column<int64_t>("val", {10, 20, 30, 40});
10919 auto mi = pandas::MultiIndex::from_arrays<std::string>(
10920 {{"London","London","Paris","Paris"}, {"2020","2021","2020","2021"}});
10921 df.set_multiindex(mi);
10922 pandas::DataFrame result = df.loc("London");
10923 if (result.nrows() != 2) {
10924 std::cout << " [FAIL] : in pd_test_3_all_loc_single_arg() : expected 2 rows" << std::endl;
10925 throw std::runtime_error("loc single-arg failed");
10926 }
loc (pd_test_3_all.cpp:10916)
10906 {{"A","A","B"}, {"x","y","x"}});
10907 df.set_index(mi);
10908 if (!df.has_multiindex() || df.multiindex().nlevels() != 2) {
10909 std::cout << " [FAIL] : in pd_test_3_all_set_index_multiindex()" << std::endl;
10910 throw std::runtime_error("set_index MultiIndex failed");
10911 }
10912 std::cout << " -> tests passed" << std::endl;
10913}
10914
10915void pd_test_3_all_loc_single_arg() {
10916 std::cout << "========= Fix 3: loc(string) MultiIndex =================";
10917 pandas::DataFrame df;
10918 df.add_column<int64_t>("val", {10, 20, 30, 40});
10919 auto mi = pandas::MultiIndex::from_arrays<std::string>(
10920 {{"London","London","Paris","Paris"}, {"2020","2021","2020","2021"}});
10921 df.set_multiindex(mi);
10922 pandas::DataFrame result = df.loc("London");
10923 if (result.nrows() != 2) {
10924 std::cout << " [FAIL] : in pd_test_3_all_loc_single_arg() : expected 2 rows" << std::endl;
10925 throw std::runtime_error("loc single-arg failed");
10926 }
loc (pd_test_3_all.cpp:10916)
10906 {{"A","A","B"}, {"x","y","x"}});
10907 df.set_index(mi);
10908 if (!df.has_multiindex() || df.multiindex().nlevels() != 2) {
10909 std::cout << " [FAIL] : in pd_test_3_all_set_index_multiindex()" << std::endl;
10910 throw std::runtime_error("set_index MultiIndex failed");
10911 }
10912 std::cout << " -> tests passed" << std::endl;
10913}
10914
10915void pd_test_3_all_loc_single_arg() {
10916 std::cout << "========= Fix 3: loc(string) MultiIndex =================";
10917 pandas::DataFrame df;
10918 df.add_column<int64_t>("val", {10, 20, 30, 40});
10919 auto mi = pandas::MultiIndex::from_arrays<std::string>(
10920 {{"London","London","Paris","Paris"}, {"2020","2021","2020","2021"}});
10921 df.set_multiindex(mi);
10922 pandas::DataFrame result = df.loc("London");
10923 if (result.nrows() != 2) {
10924 std::cout << " [FAIL] : in pd_test_3_all_loc_single_arg() : expected 2 rows" << std::endl;
10925 throw std::runtime_error("loc single-arg failed");
10926 }
loc (pd_test_3_all.cpp:10916)
10906 {{"A","A","B"}, {"x","y","x"}});
10907 df.set_index(mi);
10908 if (!df.has_multiindex() || df.multiindex().nlevels() != 2) {
10909 std::cout << " [FAIL] : in pd_test_3_all_set_index_multiindex()" << std::endl;
10910 throw std::runtime_error("set_index MultiIndex failed");
10911 }
10912 std::cout << " -> tests passed" << std::endl;
10913}
10914
10915void pd_test_3_all_loc_single_arg() {
10916 std::cout << "========= Fix 3: loc(string) MultiIndex =================";
10917 pandas::DataFrame df;
10918 df.add_column<int64_t>("val", {10, 20, 30, 40});
10919 auto mi = pandas::MultiIndex::from_arrays<std::string>(
10920 {{"London","London","Paris","Paris"}, {"2020","2021","2020","2021"}});
10921 df.set_multiindex(mi);
10922 pandas::DataFrame result = df.loc("London");
10923 if (result.nrows() != 2) {
10924 std::cout << " [FAIL] : in pd_test_3_all_loc_single_arg() : expected 2 rows" << std::endl;
10925 throw std::runtime_error("loc single-arg failed");
10926 }
mask (pd_test_1_all.cpp:9119)
9109void pd_test_datetime_mixin_array_constructor() {
9110 std::cout << "========= DatetimeTDMixin array constructor =========================";
9111
9112 // Create DatetimeArray with some values
9113 numpy::NDArray<numpy::datetime64> data(std::vector<size_t>{3});
9114 data.setElementAt({0}, numpy::datetime64(1000000000000000000LL, numpy::DateTimeUnit::Nanosecond)); // ~2001
9115 data.setElementAt({1}, numpy::datetime64(1500000000000000000LL, numpy::DateTimeUnit::Nanosecond)); // ~2017
9116 data.setElementAt({2}, numpy::datetime64(1600000000000000000LL, numpy::DateTimeUnit::Nanosecond)); // ~2020
9117
9118 numpy::NDArray<numpy::bool_> mask(std::vector<size_t>{3});
9119 mask.setElementAt({0}, numpy::bool_(false));
9120 mask.setElementAt({1}, numpy::bool_(false));
9121 mask.setElementAt({2}, numpy::bool_(false));
9122
9123 pandas::DatetimeArray arr(data, mask);
9124 pandas::DatetimeTDMixin idx(arr, "timestamps");
9125
9126 bool passed = (idx.size() == 3 && !idx.empty() &&
9127 idx.name().has_value() && *idx.name() == "timestamps" &&
9128 idx.inferred_type() == "datetime");
nlargest (pd_test_1_all.cpp:6425)
6415 // Test sort_values descending
6416 auto sorted_desc = df.sort_values("A", false);
6417 first_val = sorted_desc["A"].get_value_str(0);
6418 if (std::stod(first_val) != 5.0) {
6419 std::cout << " [FAIL] : in pd_test_dataframe_sorting() : sort_values desc first != 5" << std::endl;
6420 throw std::runtime_error("pd_test_dataframe_sorting failed: sort_values desc first != 5");
6421 }
6422
6423 // Test nlargest
6424 auto largest = df.nlargest(2, "A");
6425 if (largest.nrows() != 2) {
6426 std::cout << " [FAIL] : in pd_test_dataframe_sorting() : nlargest nrows != 2" << std::endl;
6427 throw std::runtime_error("pd_test_dataframe_sorting failed: nlargest nrows != 2");
6428 }
6429
6430 // Test nsmallest
6431 auto smallest = df.nsmallest(2, "A");
6432 if (smallest.nrows() != 2) {
6433 std::cout << " [FAIL] : in pd_test_dataframe_sorting() : nsmallest nrows != 2" << std::endl;
6434 throw std::runtime_error("pd_test_dataframe_sorting failed: nsmallest nrows != 2");
nsmallest (pd_test_1_all.cpp:6432)
6422 }
6423
6424 // Test nlargest
6425 auto largest = df.nlargest(2, "A");
6426 if (largest.nrows() != 2) {
6427 std::cout << " [FAIL] : in pd_test_dataframe_sorting() : nlargest nrows != 2" << std::endl;
6428 throw std::runtime_error("pd_test_dataframe_sorting failed: nlargest nrows != 2");
6429 }
6430
6431 // Test nsmallest
6432 auto smallest = df.nsmallest(2, "A");
6433 if (smallest.nrows() != 2) {
6434 std::cout << " [FAIL] : in pd_test_dataframe_sorting() : nsmallest nrows != 2" << std::endl;
6435 throw std::runtime_error("pd_test_dataframe_sorting failed: nsmallest nrows != 2");
6436 }
6437
6438 std::cout << " -> tests passed" << std::endl;
6439 }
6440
6441 // =====================================================================
6442 // Test: Rank
query (pd_test_1_all.cpp:26418)
26408 std::cout << "====================================== [OK] pd_test_prod test suite ========================== " << std::endl;
26409 return 0;
26410 }
26411
26412} // namespace dataframe_tests
26413// ------------------- pd_test_prod.cpp (end) -----------------------------
26414
26415// ------------------- pd_test_query.cpp (start) -----------------------------
26416// dataframe_tests/pd_test_query.cpp
26417// Tests for DataFrame.query() method
26418
26419#include <iostream>
26420#include <stdexcept>
26421#include <map>
26422#include <vector>
26423#include <string>
26424#include "../pandas/pd_dataframe.h"
26425
26426namespace dataframe_tests {
26427 namespace dataframe_tests_query {
sample (pd_test_3_all.cpp:207)
197 if (df.index().dtype_name() != "int64") {
198 std::cout << " [FAIL] : in pd_test_3_all_dtype_typing_overloads() : cookbook_105 integration dtype" << std::endl;
199 throw std::runtime_error("pd_test_3_all_dtype_typing_overloads failed: cookbook_105 integration dtype");
200 }
201 }
202
203 std::cout << " -> tests passed" << std::endl;
204}
205
206void pd_test_3_all_sample() {
207 std::cout << "========= DataFrame.sample() =======================";
208
209 std::map<std::string, std::vector<double>> data = {
210 {"A", {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0}},
211 {"B", {10.0, 20.0, 30.0, 40.0, 50.0, 60.0, 70.0, 80.0, 90.0, 100.0}}
212 };
213 pandas::DataFrame df(data);
214
215 // Sample 3 rows
216 pandas::DataFrame result = df.sample(3, 42); // seed=42 for reproducibility
sample (pd_test_3_all.cpp:207)
197 if (df.index().dtype_name() != "int64") {
198 std::cout << " [FAIL] : in pd_test_3_all_dtype_typing_overloads() : cookbook_105 integration dtype" << std::endl;
199 throw std::runtime_error("pd_test_3_all_dtype_typing_overloads failed: cookbook_105 integration dtype");
200 }
201 }
202
203 std::cout << " -> tests passed" << std::endl;
204}
205
206void pd_test_3_all_sample() {
207 std::cout << "========= DataFrame.sample() =======================";
208
209 std::map<std::string, std::vector<double>> data = {
210 {"A", {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0}},
211 {"B", {10.0, 20.0, 30.0, 40.0, 50.0, 60.0, 70.0, 80.0, 90.0, 100.0}}
212 };
213 pandas::DataFrame df(data);
214
215 // Sample 3 rows
216 pandas::DataFrame result = df.sample(3, 42); // seed=42 for reproducibility
sample (pd_test_3_all.cpp:207)
197 if (df.index().dtype_name() != "int64") {
198 std::cout << " [FAIL] : in pd_test_3_all_dtype_typing_overloads() : cookbook_105 integration dtype" << std::endl;
199 throw std::runtime_error("pd_test_3_all_dtype_typing_overloads failed: cookbook_105 integration dtype");
200 }
201 }
202
203 std::cout << " -> tests passed" << std::endl;
204}
205
206void pd_test_3_all_sample() {
207 std::cout << "========= DataFrame.sample() =======================";
208
209 std::map<std::string, std::vector<double>> data = {
210 {"A", {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0}},
211 {"B", {10.0, 20.0, 30.0, 40.0, 50.0, 60.0, 70.0, 80.0, 90.0, 100.0}}
212 };
213 pandas::DataFrame df(data);
214
215 // Sample 3 rows
216 pandas::DataFrame result = df.sample(3, 42); // seed=42 for reproducibility
sample (pd_test_3_all.cpp:207)
197 if (df.index().dtype_name() != "int64") {
198 std::cout << " [FAIL] : in pd_test_3_all_dtype_typing_overloads() : cookbook_105 integration dtype" << std::endl;
199 throw std::runtime_error("pd_test_3_all_dtype_typing_overloads failed: cookbook_105 integration dtype");
200 }
201 }
202
203 std::cout << " -> tests passed" << std::endl;
204}
205
206void pd_test_3_all_sample() {
207 std::cout << "========= DataFrame.sample() =======================";
208
209 std::map<std::string, std::vector<double>> data = {
210 {"A", {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0}},
211 {"B", {10.0, 20.0, 30.0, 40.0, 50.0, 60.0, 70.0, 80.0, 90.0, 100.0}}
212 };
213 pandas::DataFrame df(data);
214
215 // Sample 3 rows
216 pandas::DataFrame result = df.sample(3, 42); // seed=42 for reproducibility
sample (pd_test_3_all.cpp:207)
197 if (df.index().dtype_name() != "int64") {
198 std::cout << " [FAIL] : in pd_test_3_all_dtype_typing_overloads() : cookbook_105 integration dtype" << std::endl;
199 throw std::runtime_error("pd_test_3_all_dtype_typing_overloads failed: cookbook_105 integration dtype");
200 }
201 }
202
203 std::cout << " -> tests passed" << std::endl;
204}
205
206void pd_test_3_all_sample() {
207 std::cout << "========= DataFrame.sample() =======================";
208
209 std::map<std::string, std::vector<double>> data = {
210 {"A", {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0}},
211 {"B", {10.0, 20.0, 30.0, 40.0, 50.0, 60.0, 70.0, 80.0, 90.0, 100.0}}
212 };
213 pandas::DataFrame df(data);
214
215 // Sample 3 rows
216 pandas::DataFrame result = df.sample(3, 42); // seed=42 for reproducibility
sample_frac (pd_test_5_all.cpp:2387)
2377 // Build stats with string index (asset tickers), then shuffle and sort_index
2378 pandas::DataFrame stats;
2379 stats.add_column<double>("Mean", { 0.002212, 0.001136, 0.000029, 0.000988, -0.000533, 0.000408, 0.000692});
2380 stats.add_column<double>("Std", { 0.017966, 0.016225, 0.020372, 0.021692, 0.025519, 0.026644, 0.014562});
2381 stats.add_column<double>("Sharpe", { 1.955, 1.111, 0.023, 0.723, -0.331, 0.243, 0.755});
2382 stats.add_column<double>("Skew", {-0.049545, 0.024990,-0.075349,-0.009316, 0.088344, 0.017202, 0.182708});
2383 stats.add_column<double>("Kurt", {-0.053650,-0.085255,-0.098852, 0.069301, 0.370648,-0.038772, 0.079181});
2384 stats.set_index_from_list({"AAPL", "MSFT", "GOOGL", "AMZN", "META", "NVDA", "JPM"});
2385
2386 // Shuffle: sample(frac=1, random_state=42) produces order: NVDA,AMZN,JPM,META,AAPL,MSFT,GOOGL
2387 auto shuf = stats.sample_frac(1.0, 42);
2388 auto si = shuf.sort_index();
2389
2390 std::string expected =
2391 " Mean Std Sharpe Skew Kurt\n"
2392 "AAPL 0.002212 0.017966 1.955 -0.049545 -0.053650\n"
2393 "AMZN 0.000988 0.021692 0.723 -0.009316 0.069301\n"
2394 "GOOGL 0.000029 0.020372 0.023 -0.075349 -0.098852\n"
2395 "JPM 0.000692 0.014562 0.755 0.182708 0.079181\n"
2396 "META -0.000533 0.025519 -0.331 0.088344 0.370648\n"
2397 "MSFT 0.001136 0.016225 1.111 0.024990 -0.085255\n"
set_value_nan (pd_test_5_all.cpp:18478)
18468 "0 a\n"
18469 "1 NaN\n"
18470 "2 c";
18471 bool ok = (actual == expected);
18472 pandas_tests::check(ok, "where_mask_dtype_promotion_2_503514_case_10_str_col_where_default.to_string", local_fail);
18473 if (!ok) dump_diff("case_10", expected, actual);
18474}
18475
18476void where_mask_dtype_promotion_2_503514_case_11_get_value_str_mask_int_renders_NaN(int& local_fail) {
18477 pandas::Series<std::int64_t> s({10, 20, 30});
18478 s.set_value_nan(0);
18479
18480 std::string actual = s.get_value_str(0);
18481 std::string expected = "NaN";
18482 bool ok = (actual == expected);
18483 pandas_tests::check(ok, "where_mask_dtype_promotion_2_503514_case_11_get_value_str_mask_int_renders_NaN (got " +
18484 actual + ")", local_fail);
18485
18486 bool ok1 = (s.get_value_str(1) == "20");
18487 bool ok2 = (s.get_value_str(2) == "30");
18488 pandas_tests::check(ok1, "case_11.kept_idx1_eq_20", local_fail);
tail (pd_test_1_all.cpp:6308)
6298 pandas::DataFrame df(data);
6299
6300 // Test head
6301 auto head_df = df.head(3);
6302 if (head_df.nrows() != 3) {
6303 std::cout << " [FAIL] : in pd_test_dataframe_indexing() : head(3) nrows != 3" << std::endl;
6304 throw std::runtime_error("pd_test_dataframe_indexing failed: head(3) nrows != 3");
6305 }
6306
6307 // Test tail
6308 auto tail_df = df.tail(2);
6309 if (tail_df.nrows() != 2) {
6310 std::cout << " [FAIL] : in pd_test_dataframe_indexing() : tail(2) nrows != 2" << std::endl;
6311 throw std::runtime_error("pd_test_dataframe_indexing failed: tail(2) nrows != 2");
6312 }
6313
6314 // Test iloc_rows range
6315 auto slice = df.iloc_rows(1, 4);
6316 if (slice.nrows() != 3) {
6317 std::cout << " [FAIL] : in pd_test_dataframe_indexing() : iloc_rows(1,4) nrows != 3" << std::endl;
6318 throw std::runtime_error("pd_test_dataframe_indexing failed: iloc_rows(1,4) nrows != 3");
take (pd_test_1_all.cpp:5903)
5893// Inherited Operations Tests
5894// ============================================================================
5895
5896void pd_test_categorical_index_take() {
5897 std::cout << "========= inherited take ==============================";
5898
5899 pandas::CategoricalArray arr({"a", "b", "c", "d"});
5900 pandas::CategoricalIndex idx(arr);
5901
5902 std::vector<size_t> indices = {0, 2, 3};
5903 pandas::ExtensionIndex<pandas::CategoricalArray> taken = idx.take(indices);
5904
5905 bool passed = (taken.size() == 3);
5906 if (!passed) {
5907 std::cout << " [FAIL] : in pd_test_categorical_index_take()" << std::endl;
5908 throw std::runtime_error("pd_test_categorical_index_take failed");
5909 }
5910
5911 std::cout << " -> tests passed" << std::endl;
5912}
where (pd_test_1_all.cpp:22018)
22008 data["B"] = {5.0, 6.0, 7.0, 8.0};
22009 pandas::DataFrame df(data);
22010
22011 // Create condition DataFrame (values > 2)
22012 std::map<std::string, std::vector<numpy::bool_>> cond_data;
22013 cond_data["A"] = {false, false, true, true}; // 1<=2, 2<=2, 3>2, 4>2
22014 cond_data["B"] = {true, true, true, true}; // all >2
22015 pandas::DataFrame cond(cond_data);
22016
22017 // Apply where with replacement value -1
22018 pandas::DataFrame result = df.where(cond, -1.0);
22019
22020 // Get column index for A - it's sorted alphabetically in std::map
22021 size_t col_a_idx = df.get_column_index("A");
22022 size_t col_b_idx = df.get_column_index("B");
22023
22024 bool passed = true;
22025 std::string error_msg;
22026
22027 // Check A column values
22028 std::string a0 = result.iat<double>(0, col_a_idx) == -1.0 ? "ok" : "fail";
where (pd_test_1_all.cpp:22018)
22008 data["B"] = {5.0, 6.0, 7.0, 8.0};
22009 pandas::DataFrame df(data);
22010
22011 // Create condition DataFrame (values > 2)
22012 std::map<std::string, std::vector<numpy::bool_>> cond_data;
22013 cond_data["A"] = {false, false, true, true}; // 1<=2, 2<=2, 3>2, 4>2
22014 cond_data["B"] = {true, true, true, true}; // all >2
22015 pandas::DataFrame cond(cond_data);
22016
22017 // Apply where with replacement value -1
22018 pandas::DataFrame result = df.where(cond, -1.0);
22019
22020 // Get column index for A - it's sorted alphabetically in std::map
22021 size_t col_a_idx = df.get_column_index("A");
22022 size_t col_b_idx = df.get_column_index("B");
22023
22024 bool passed = true;
22025 std::string error_msg;
22026
22027 // Check A column values
22028 std::string a0 = result.iat<double>(0, col_a_idx) == -1.0 ? "ok" : "fail";
where (pd_test_1_all.cpp:22018)
22008 data["B"] = {5.0, 6.0, 7.0, 8.0};
22009 pandas::DataFrame df(data);
22010
22011 // Create condition DataFrame (values > 2)
22012 std::map<std::string, std::vector<numpy::bool_>> cond_data;
22013 cond_data["A"] = {false, false, true, true}; // 1<=2, 2<=2, 3>2, 4>2
22014 cond_data["B"] = {true, true, true, true}; // all >2
22015 pandas::DataFrame cond(cond_data);
22016
22017 // Apply where with replacement value -1
22018 pandas::DataFrame result = df.where(cond, -1.0);
22019
22020 // Get column index for A - it's sorted alphabetically in std::map
22021 size_t col_a_idx = df.get_column_index("A");
22022 size_t col_b_idx = df.get_column_index("B");
22023
22024 bool passed = true;
22025 std::string error_msg;
22026
22027 // Check A column values
22028 std::string a0 = result.iat<double>(0, col_a_idx) == -1.0 ? "ok" : "fail";
where_resolved (pd_test_5_all.cpp:91939)
91929 auto cond_df = make_2x3_cond_mixed();
91930
91931 pandas::WhereCondition cond;
91932 cond.kind = pandas::WhereCondKind::DATAFRAME;
91933 cond.dataframe_ptr = &cond_df;
91934
91935 pandas::WhereOther other;
91936 other.kind = pandas::WhereOtherKind::SCALAR;
91937 other.scalar_val = -999.0;
91938
91939 pandas::DataFrame result = df.where_resolved(cond, other, /*axis=*/0);
91940
91941 pandas_tests::check(result.nrows() == 2,
91942 "case_1_cond_df_other_scalar.nrows_eq_2", local_fail);
91943 pandas_tests::check(result.ncols() == 3,
91944 "case_1_cond_df_other_scalar.ncols_eq_3", local_fail);
91945 pandas_tests::check(!result.to_string().empty(),
91946 "case_1_cond_df_other_scalar.to_string_nonempty", local_fail);
91947}
91948
91949void case_2_cond_df_other_series() {
xs (pd_test_2_all.cpp:18668)
18658 std::cout << "====================================== [OK] pd_test_tz_localize test suite ========================== " << std::endl;
18659 return 0;
18660 }
18661
18662} // namespace dataframe_tests
18663// ------------------- pd_test_tz_localize.cpp (end) -----------------------------
18664
18665// ------------------- pd_test_xs.cpp (start) -----------------------------
18666// dataframe_tests/pd_test_xs.cpp
18667// Tests for DataFrame xs() (cross-section) implementation
18668
18669#include <iostream>
18670#include <stdexcept>
18671#include <vector>
18672#include <string>
18673#include <map>
18674
18675#include "../pandas/pd_dataframe.h"
18676
18677// CRITICAL: No using namespace directives
xs (pd_test_2_all.cpp:18668)
18658 std::cout << "====================================== [OK] pd_test_tz_localize test suite ========================== " << std::endl;
18659 return 0;
18660 }
18661
18662} // namespace dataframe_tests
18663// ------------------- pd_test_tz_localize.cpp (end) -----------------------------
18664
18665// ------------------- pd_test_xs.cpp (start) -----------------------------
18666// dataframe_tests/pd_test_xs.cpp
18667// Tests for DataFrame xs() (cross-section) implementation
18668
18669#include <iostream>
18670#include <stdexcept>
18671#include <vector>
18672#include <string>
18673#include <map>
18674
18675#include "../pandas/pd_dataframe.h"
18676
18677// CRITICAL: No using namespace directives
xs (pd_test_2_all.cpp:18668)
18658 std::cout << "====================================== [OK] pd_test_tz_localize test suite ========================== " << std::endl;
18659 return 0;
18660 }
18661
18662} // namespace dataframe_tests
18663// ------------------- pd_test_tz_localize.cpp (end) -----------------------------
18664
18665// ------------------- pd_test_xs.cpp (start) -----------------------------
18666// dataframe_tests/pd_test_xs.cpp
18667// Tests for DataFrame xs() (cross-section) implementation
18668
18669#include <iostream>
18670#include <stdexcept>
18671#include <vector>
18672#include <string>
18673#include <map>
18674
18675#include "../pandas/pd_dataframe.h"
18676
18677// CRITICAL: No using namespace directives
xs_level (pd_test_5_all.cpp:12967)
12957 } catch (const std::exception&) {
12958 threw = true;
12959 }
12960 pandas_tests::check(!threw, "query_bool_and_numeric.no_throw", local_fail);
12961 if (!threw) {
12962 pandas_tests::check(result.nrows() == 1, "query_bool_and_numeric.nrows == 1 (got " + std::to_string(result.nrows()) + ")", local_fail);
12963 }
12964 }
12965
12966 // === xs_level tests (Error 2) ===
12967 // Note: xs_level() doesn't exist yet — test will verify it after implementation
12968
12969 // === get_series + unstack tests (Error 1) ===
12970 // Note: get_series<T>() doesn't exist yet — test will verify it after implementation
12971
12972 if (local_fail > 0) {
12973 std::cout << " [FAIL] : in f_test_anal_i_query_bool_unstack() : " << local_fail << " checks failed" << std::endl;
12974 throw std::runtime_error("f_test_anal_i_query_bool_unstack failed");
12975 }
12976 std::cout << " -> tests passed" << std::endl;
12977}
assign (pd_test_1_all.cpp:6653)
6643 throw std::runtime_error("pd_test_dataframe_manipulation failed: drop_duplicates");
6644 }
6645 }
6646
6647 // Test assign
6648 {
6649 std::map<std::string, std::vector<numpy::int64>> assign_data;
6650 assign_data["A"] = {1, 2, 3};
6651 pandas::DataFrame df_assign(assign_data);
6652
6653 auto df2 = df_assign.assign<numpy::int64>("B", {10, 20, 30});
6654 if (df2.ncols() != 2) {
6655 std::cout << " [FAIL] : in pd_test_dataframe_manipulation() : assign ncols != 2" << std::endl;
6656 throw std::runtime_error("pd_test_dataframe_manipulation failed: assign ncols");
6657 }
6658 if (!df2.has_column("B")) {
6659 std::cout << " [FAIL] : in pd_test_dataframe_manipulation() : assign didn't add column B" << std::endl;
6660 throw std::runtime_error("pd_test_dataframe_manipulation failed: assign column B");
6661 }
6662 }
assign (pd_test_1_all.cpp:6653)
6643 throw std::runtime_error("pd_test_dataframe_manipulation failed: drop_duplicates");
6644 }
6645 }
6646
6647 // Test assign
6648 {
6649 std::map<std::string, std::vector<numpy::int64>> assign_data;
6650 assign_data["A"] = {1, 2, 3};
6651 pandas::DataFrame df_assign(assign_data);
6652
6653 auto df2 = df_assign.assign<numpy::int64>("B", {10, 20, 30});
6654 if (df2.ncols() != 2) {
6655 std::cout << " [FAIL] : in pd_test_dataframe_manipulation() : assign ncols != 2" << std::endl;
6656 throw std::runtime_error("pd_test_dataframe_manipulation failed: assign ncols");
6657 }
6658 if (!df2.has_column("B")) {
6659 std::cout << " [FAIL] : in pd_test_dataframe_manipulation() : assign didn't add column B" << std::endl;
6660 throw std::runtime_error("pd_test_dataframe_manipulation failed: assign column B");
6661 }
6662 }
assign (pd_test_1_all.cpp:6653)
6643 throw std::runtime_error("pd_test_dataframe_manipulation failed: drop_duplicates");
6644 }
6645 }
6646
6647 // Test assign
6648 {
6649 std::map<std::string, std::vector<numpy::int64>> assign_data;
6650 assign_data["A"] = {1, 2, 3};
6651 pandas::DataFrame df_assign(assign_data);
6652
6653 auto df2 = df_assign.assign<numpy::int64>("B", {10, 20, 30});
6654 if (df2.ncols() != 2) {
6655 std::cout << " [FAIL] : in pd_test_dataframe_manipulation() : assign ncols != 2" << std::endl;
6656 throw std::runtime_error("pd_test_dataframe_manipulation failed: assign ncols");
6657 }
6658 if (!df2.has_column("B")) {
6659 std::cout << " [FAIL] : in pd_test_dataframe_manipulation() : assign didn't add column B" << std::endl;
6660 throw std::runtime_error("pd_test_dataframe_manipulation failed: assign column B");
6661 }
6662 }
drop (pd_test_1_all.cpp:6558)
6548 if (df.ncols() != 2) {
6549 std::cout << " [FAIL] : in pd_test_dataframe_manipulation() : pop ncols != 2" << std::endl;
6550 throw std::runtime_error("pd_test_dataframe_manipulation failed: pop ncols != 2");
6551 }
6552 if (!popped) {
6553 std::cout << " [FAIL] : in pd_test_dataframe_manipulation() : popped is null" << std::endl;
6554 throw std::runtime_error("pd_test_dataframe_manipulation failed: popped is null");
6555 }
6556
6557 // Test drop columns
6558 auto dropped = df.drop(std::vector<std::string>{"B"}, 1);
6559 if (dropped.ncols() != 1) {
6560 std::cout << " [FAIL] : in pd_test_dataframe_manipulation() : drop ncols != 1" << std::endl;
6561 throw std::runtime_error("pd_test_dataframe_manipulation failed: drop ncols != 1");
6562 }
6563
6564 // Test rename
6565 auto renamed = df.rename_columns(std::map<std::string, std::string>{{"A", "X"}});
6566 if (!renamed.has_column("X")) {
6567 std::cout << " [FAIL] : in pd_test_dataframe_manipulation() : rename failed" << std::endl;
6568 throw std::runtime_error("pd_test_dataframe_manipulation failed: rename failed");
drop_duplicates (pd_test_1_all.cpp:6639)
6629 }
6630 }
6631
6632 // Test drop_duplicates
6633 {
6634 std::map<std::string, std::vector<numpy::int64>> dup_data;
6635 dup_data["A"] = {1, 1, 2, 2};
6636 dup_data["B"] = {1, 1, 2, 3};
6637 pandas::DataFrame df_dup(dup_data);
6638
6639 auto deduped = df_dup.drop_duplicates();
6640 // Rows 0 and 1 are duplicates (A=1, B=1), so should have 3 rows
6641 if (deduped.nrows() != 3) {
6642 std::cout << " [FAIL] : in pd_test_dataframe_manipulation() : drop_duplicates nrows != 3, got " << deduped.nrows() << std::endl;
6643 throw std::runtime_error("pd_test_dataframe_manipulation failed: drop_duplicates");
6644 }
6645 }
6646
6647 // Test assign
6648 {
6649 std::map<std::string, std::vector<numpy::int64>> assign_data;
droplevel (pd_test_1_all.cpp:14428)
14418 void pd_test_multiindex_droplevel() {
14419 std::cout << "========= droplevel =================================== ";
14420
14421 std::vector<std::vector<std::string>> arrays = {
14422 {"a", "a", "b"},
14423 {"x", "y", "z"},
14424 {"1", "2", "3"}
14425 };
14426
14427 pandas::MultiIndex mi = pandas::MultiIndex::from_arrays<std::string>(arrays);
14428 pandas::MultiIndex dropped = mi.droplevel(1);
14429
14430 bool passed = true;
14431
14432 if (dropped.nlevels() != 2) {
14433 std::cout << " [FAIL] : nlevels should be 2 after drop" << std::endl;
14434 passed = false;
14435 }
14436
14437 // Check remaining levels
14438 auto tup = dropped[0];
droplevel (pd_test_1_all.cpp:14428)
14418 void pd_test_multiindex_droplevel() {
14419 std::cout << "========= droplevel =================================== ";
14420
14421 std::vector<std::vector<std::string>> arrays = {
14422 {"a", "a", "b"},
14423 {"x", "y", "z"},
14424 {"1", "2", "3"}
14425 };
14426
14427 pandas::MultiIndex mi = pandas::MultiIndex::from_arrays<std::string>(arrays);
14428 pandas::MultiIndex dropped = mi.droplevel(1);
14429
14430 bool passed = true;
14431
14432 if (dropped.nlevels() != 2) {
14433 std::cout << " [FAIL] : nlevels should be 2 after drop" << std::endl;
14434 passed = false;
14435 }
14436
14437 // Check remaining levels
14438 auto tup = dropped[0];
droplevel (pd_test_1_all.cpp:14428)
14418 void pd_test_multiindex_droplevel() {
14419 std::cout << "========= droplevel =================================== ";
14420
14421 std::vector<std::vector<std::string>> arrays = {
14422 {"a", "a", "b"},
14423 {"x", "y", "z"},
14424 {"1", "2", "3"}
14425 };
14426
14427 pandas::MultiIndex mi = pandas::MultiIndex::from_arrays<std::string>(arrays);
14428 pandas::MultiIndex dropped = mi.droplevel(1);
14429
14430 bool passed = true;
14431
14432 if (dropped.nlevels() != 2) {
14433 std::cout << " [FAIL] : nlevels should be 2 after drop" << std::endl;
14434 passed = false;
14435 }
14436
14437 // Check remaining levels
14438 auto tup = dropped[0];
droplevel (pd_test_1_all.cpp:14428)
14418 void pd_test_multiindex_droplevel() {
14419 std::cout << "========= droplevel =================================== ";
14420
14421 std::vector<std::vector<std::string>> arrays = {
14422 {"a", "a", "b"},
14423 {"x", "y", "z"},
14424 {"1", "2", "3"}
14425 };
14426
14427 pandas::MultiIndex mi = pandas::MultiIndex::from_arrays<std::string>(arrays);
14428 pandas::MultiIndex dropped = mi.droplevel(1);
14429
14430 bool passed = true;
14431
14432 if (dropped.nlevels() != 2) {
14433 std::cout << " [FAIL] : nlevels should be 2 after drop" << std::endl;
14434 passed = false;
14435 }
14436
14437 // Check remaining levels
14438 auto tup = dropped[0];
dropna (pd_test_1_all.cpp:531)
521 }
522
523 // Test isna array
524 numpy::NDArray<numpy::bool_> na_mask = arr.isna();
525 if (na_mask.getSize() != 4) {
526 std::cout << " [FAIL] : in pd_test_categorical_array_na_handling() : isna size != 4" << std::endl;
527 throw std::runtime_error("pd_test_categorical_array_na_handling failed: isna size != 4");
528 }
529
530 // Test dropna
531 pandas::CategoricalArray dropped = arr.dropna();
532 if (dropped.size() != 2) {
533 std::cout << " [FAIL] : in pd_test_categorical_array_na_handling() : dropna size != 2" << std::endl;
534 throw std::runtime_error("pd_test_categorical_array_na_handling failed: dropna size != 2");
535 }
536
537 // Test fillna (fill with existing category)
538 pandas::CategoricalArray filled = arr.fillna("a"); // 'a' is in categories
539 if (filled.has_na()) {
540 std::cout << " [FAIL] : in pd_test_categorical_array_na_handling() : fillna should have no NA" << std::endl;
541 throw std::runtime_error("pd_test_categorical_array_na_handling failed: fillna should have no NA");
dropna (pd_test_1_all.cpp:531)
521 }
522
523 // Test isna array
524 numpy::NDArray<numpy::bool_> na_mask = arr.isna();
525 if (na_mask.getSize() != 4) {
526 std::cout << " [FAIL] : in pd_test_categorical_array_na_handling() : isna size != 4" << std::endl;
527 throw std::runtime_error("pd_test_categorical_array_na_handling failed: isna size != 4");
528 }
529
530 // Test dropna
531 pandas::CategoricalArray dropped = arr.dropna();
532 if (dropped.size() != 2) {
533 std::cout << " [FAIL] : in pd_test_categorical_array_na_handling() : dropna size != 2" << std::endl;
534 throw std::runtime_error("pd_test_categorical_array_na_handling failed: dropna size != 2");
535 }
536
537 // Test fillna (fill with existing category)
538 pandas::CategoricalArray filled = arr.fillna("a"); // 'a' is in categories
539 if (filled.has_na()) {
540 std::cout << " [FAIL] : in pd_test_categorical_array_na_handling() : fillna should have no NA" << std::endl;
541 throw std::runtime_error("pd_test_categorical_array_na_handling failed: fillna should have no NA");
insert (pd_test_1_all.cpp:12028)
12018 }
12019
12020 std::cout << " -> tests passed" << std::endl;
12021 }
12022
12023 void pd_test_index_insert_delete() {
12024 std::cout << "========= insert and delete ===========================";
12025
12026 pandas::Index<numpy::int64> idx{1, 2, 4, 5};
12027
12028 auto inserted = idx.insert(2, 3);
12029 bool passed = (inserted.size() == 5);
12030 passed = passed && (inserted[2] == 3);
12031
12032 auto deleted = inserted.delete_(2);
12033 passed = passed && (deleted.size() == 4);
12034 passed = passed && deleted.equals(idx);
12035
12036 if (!passed) {
12037 std::cout << " [FAIL] : in pd_test_index_insert_delete() : insert/delete failed" << std::endl;
12038 throw std::runtime_error("pd_test_index_insert_delete failed");
insert (pd_test_1_all.cpp:12028)
12018 }
12019
12020 std::cout << " -> tests passed" << std::endl;
12021 }
12022
12023 void pd_test_index_insert_delete() {
12024 std::cout << "========= insert and delete ===========================";
12025
12026 pandas::Index<numpy::int64> idx{1, 2, 4, 5};
12027
12028 auto inserted = idx.insert(2, 3);
12029 bool passed = (inserted.size() == 5);
12030 passed = passed && (inserted[2] == 3);
12031
12032 auto deleted = inserted.delete_(2);
12033 passed = passed && (deleted.size() == 4);
12034 passed = passed && deleted.equals(idx);
12035
12036 if (!passed) {
12037 std::cout << " [FAIL] : in pd_test_index_insert_delete() : insert/delete failed" << std::endl;
12038 throw std::runtime_error("pd_test_index_insert_delete failed");
insert (pd_test_1_all.cpp:12028)
12018 }
12019
12020 std::cout << " -> tests passed" << std::endl;
12021 }
12022
12023 void pd_test_index_insert_delete() {
12024 std::cout << "========= insert and delete ===========================";
12025
12026 pandas::Index<numpy::int64> idx{1, 2, 4, 5};
12027
12028 auto inserted = idx.insert(2, 3);
12029 bool passed = (inserted.size() == 5);
12030 passed = passed && (inserted[2] == 3);
12031
12032 auto deleted = inserted.delete_(2);
12033 passed = passed && (deleted.size() == 4);
12034 passed = passed && deleted.equals(idx);
12035
12036 if (!passed) {
12037 std::cout << " [FAIL] : in pd_test_index_insert_delete() : insert/delete failed" << std::endl;
12038 throw std::runtime_error("pd_test_index_insert_delete failed");
insert (pd_test_1_all.cpp:12028)
12018 }
12019
12020 std::cout << " -> tests passed" << std::endl;
12021 }
12022
12023 void pd_test_index_insert_delete() {
12024 std::cout << "========= insert and delete ===========================";
12025
12026 pandas::Index<numpy::int64> idx{1, 2, 4, 5};
12027
12028 auto inserted = idx.insert(2, 3);
12029 bool passed = (inserted.size() == 5);
12030 passed = passed && (inserted[2] == 3);
12031
12032 auto deleted = inserted.delete_(2);
12033 passed = passed && (deleted.size() == 4);
12034 passed = passed && deleted.equals(idx);
12035
12036 if (!passed) {
12037 std::cout << " [FAIL] : in pd_test_index_insert_delete() : insert/delete failed" << std::endl;
12038 throw std::runtime_error("pd_test_index_insert_delete failed");
insert (pd_test_1_all.cpp:12028)
12018 }
12019
12020 std::cout << " -> tests passed" << std::endl;
12021 }
12022
12023 void pd_test_index_insert_delete() {
12024 std::cout << "========= insert and delete ===========================";
12025
12026 pandas::Index<numpy::int64> idx{1, 2, 4, 5};
12027
12028 auto inserted = idx.insert(2, 3);
12029 bool passed = (inserted.size() == 5);
12030 passed = passed && (inserted[2] == 3);
12031
12032 auto deleted = inserted.delete_(2);
12033 passed = passed && (deleted.size() == 4);
12034 passed = passed && deleted.equals(idx);
12035
12036 if (!passed) {
12037 std::cout << " [FAIL] : in pd_test_index_insert_delete() : insert/delete failed" << std::endl;
12038 throw std::runtime_error("pd_test_index_insert_delete failed");
insert (pd_test_1_all.cpp:12028)
12018 }
12019
12020 std::cout << " -> tests passed" << std::endl;
12021 }
12022
12023 void pd_test_index_insert_delete() {
12024 std::cout << "========= insert and delete ===========================";
12025
12026 pandas::Index<numpy::int64> idx{1, 2, 4, 5};
12027
12028 auto inserted = idx.insert(2, 3);
12029 bool passed = (inserted.size() == 5);
12030 passed = passed && (inserted[2] == 3);
12031
12032 auto deleted = inserted.delete_(2);
12033 passed = passed && (deleted.size() == 4);
12034 passed = passed && deleted.equals(idx);
12035
12036 if (!passed) {
12037 std::cout << " [FAIL] : in pd_test_index_insert_delete() : insert/delete failed" << std::endl;
12038 throw std::runtime_error("pd_test_index_insert_delete failed");
insert (pd_test_1_all.cpp:12028)
12018 }
12019
12020 std::cout << " -> tests passed" << std::endl;
12021 }
12022
12023 void pd_test_index_insert_delete() {
12024 std::cout << "========= insert and delete ===========================";
12025
12026 pandas::Index<numpy::int64> idx{1, 2, 4, 5};
12027
12028 auto inserted = idx.insert(2, 3);
12029 bool passed = (inserted.size() == 5);
12030 passed = passed && (inserted[2] == 3);
12031
12032 auto deleted = inserted.delete_(2);
12033 passed = passed && (deleted.size() == 4);
12034 passed = passed && deleted.equals(idx);
12035
12036 if (!passed) {
12037 std::cout << " [FAIL] : in pd_test_index_insert_delete() : insert/delete failed" << std::endl;
12038 throw std::runtime_error("pd_test_index_insert_delete failed");
insert (pd_test_1_all.cpp:12028)
12018 }
12019
12020 std::cout << " -> tests passed" << std::endl;
12021 }
12022
12023 void pd_test_index_insert_delete() {
12024 std::cout << "========= insert and delete ===========================";
12025
12026 pandas::Index<numpy::int64> idx{1, 2, 4, 5};
12027
12028 auto inserted = idx.insert(2, 3);
12029 bool passed = (inserted.size() == 5);
12030 passed = passed && (inserted[2] == 3);
12031
12032 auto deleted = inserted.delete_(2);
12033 passed = passed && (deleted.size() == 4);
12034 passed = passed && deleted.equals(idx);
12035
12036 if (!passed) {
12037 std::cout << " [FAIL] : in pd_test_index_insert_delete() : insert/delete failed" << std::endl;
12038 throw std::runtime_error("pd_test_index_insert_delete failed");
insert (pd_test_1_all.cpp:12028)
12018 }
12019
12020 std::cout << " -> tests passed" << std::endl;
12021 }
12022
12023 void pd_test_index_insert_delete() {
12024 std::cout << "========= insert and delete ===========================";
12025
12026 pandas::Index<numpy::int64> idx{1, 2, 4, 5};
12027
12028 auto inserted = idx.insert(2, 3);
12029 bool passed = (inserted.size() == 5);
12030 passed = passed && (inserted[2] == 3);
12031
12032 auto deleted = inserted.delete_(2);
12033 passed = passed && (deleted.size() == 4);
12034 passed = passed && deleted.equals(idx);
12035
12036 if (!passed) {
12037 std::cout << " [FAIL] : in pd_test_index_insert_delete() : insert/delete failed" << std::endl;
12038 throw std::runtime_error("pd_test_index_insert_delete failed");
reindex (pd_test_1_all.cpp:6708)
6698 }
6699 }
6700
6701 // Test reindex rows
6702 {
6703 std::map<std::string, std::vector<double>> data;
6704 data["A"] = {1.0, 2.0, 3.0};
6705 pandas::DataFrame df(data);
6706 df = df.set_axis({"x", "y", "z"}, 0);
6707
6708 auto reindexed = df.reindex({"x", "z", "w"}, 0);
6709 if (reindexed.nrows() != 3) {
6710 std::cout << " [FAIL] : in pd_test_dataframe_index_ops() : reindex wrong nrows" << std::endl;
6711 throw std::runtime_error("pd_test_dataframe_index_ops failed: reindex nrows");
6712 }
6713 // 'w' should have NaN
6714 std::string val = reindexed["A"].get_value_str(2);
6715 if (!std::isnan(std::stod(val))) {
6716 std::cout << " [FAIL] : in pd_test_dataframe_index_ops() : missing label should be NaN" << std::endl;
6717 throw std::runtime_error("pd_test_dataframe_index_ops failed: reindex NaN");
6718 }
reindex (pd_test_1_all.cpp:6708)
6698 }
6699 }
6700
6701 // Test reindex rows
6702 {
6703 std::map<std::string, std::vector<double>> data;
6704 data["A"] = {1.0, 2.0, 3.0};
6705 pandas::DataFrame df(data);
6706 df = df.set_axis({"x", "y", "z"}, 0);
6707
6708 auto reindexed = df.reindex({"x", "z", "w"}, 0);
6709 if (reindexed.nrows() != 3) {
6710 std::cout << " [FAIL] : in pd_test_dataframe_index_ops() : reindex wrong nrows" << std::endl;
6711 throw std::runtime_error("pd_test_dataframe_index_ops failed: reindex nrows");
6712 }
6713 // 'w' should have NaN
6714 std::string val = reindexed["A"].get_value_str(2);
6715 if (!std::isnan(std::stod(val))) {
6716 std::cout << " [FAIL] : in pd_test_dataframe_index_ops() : missing label should be NaN" << std::endl;
6717 throw std::runtime_error("pd_test_dataframe_index_ops failed: reindex NaN");
6718 }
reindex (pd_test_1_all.cpp:6708)
6698 }
6699 }
6700
6701 // Test reindex rows
6702 {
6703 std::map<std::string, std::vector<double>> data;
6704 data["A"] = {1.0, 2.0, 3.0};
6705 pandas::DataFrame df(data);
6706 df = df.set_axis({"x", "y", "z"}, 0);
6707
6708 auto reindexed = df.reindex({"x", "z", "w"}, 0);
6709 if (reindexed.nrows() != 3) {
6710 std::cout << " [FAIL] : in pd_test_dataframe_index_ops() : reindex wrong nrows" << std::endl;
6711 throw std::runtime_error("pd_test_dataframe_index_ops failed: reindex nrows");
6712 }
6713 // 'w' should have NaN
6714 std::string val = reindexed["A"].get_value_str(2);
6715 if (!std::isnan(std::stod(val))) {
6716 std::cout << " [FAIL] : in pd_test_dataframe_index_ops() : missing label should be NaN" << std::endl;
6717 throw std::runtime_error("pd_test_dataframe_index_ops failed: reindex NaN");
6718 }
reindex (pd_test_1_all.cpp:6708)
6698 }
6699 }
6700
6701 // Test reindex rows
6702 {
6703 std::map<std::string, std::vector<double>> data;
6704 data["A"] = {1.0, 2.0, 3.0};
6705 pandas::DataFrame df(data);
6706 df = df.set_axis({"x", "y", "z"}, 0);
6707
6708 auto reindexed = df.reindex({"x", "z", "w"}, 0);
6709 if (reindexed.nrows() != 3) {
6710 std::cout << " [FAIL] : in pd_test_dataframe_index_ops() : reindex wrong nrows" << std::endl;
6711 throw std::runtime_error("pd_test_dataframe_index_ops failed: reindex nrows");
6712 }
6713 // 'w' should have NaN
6714 std::string val = reindexed["A"].get_value_str(2);
6715 if (!std::isnan(std::stod(val))) {
6716 std::cout << " [FAIL] : in pd_test_dataframe_index_ops() : missing label should be NaN" << std::endl;
6717 throw std::runtime_error("pd_test_dataframe_index_ops failed: reindex NaN");
6718 }
reindex (pd_test_1_all.cpp:6708)
6698 }
6699 }
6700
6701 // Test reindex rows
6702 {
6703 std::map<std::string, std::vector<double>> data;
6704 data["A"] = {1.0, 2.0, 3.0};
6705 pandas::DataFrame df(data);
6706 df = df.set_axis({"x", "y", "z"}, 0);
6707
6708 auto reindexed = df.reindex({"x", "z", "w"}, 0);
6709 if (reindexed.nrows() != 3) {
6710 std::cout << " [FAIL] : in pd_test_dataframe_index_ops() : reindex wrong nrows" << std::endl;
6711 throw std::runtime_error("pd_test_dataframe_index_ops failed: reindex nrows");
6712 }
6713 // 'w' should have NaN
6714 std::string val = reindexed["A"].get_value_str(2);
6715 if (!std::isnan(std::stod(val))) {
6716 std::cout << " [FAIL] : in pd_test_dataframe_index_ops() : missing label should be NaN" << std::endl;
6717 throw std::runtime_error("pd_test_dataframe_index_ops failed: reindex NaN");
6718 }
reindex_like (pd_test_1_all.cpp:6777)
6767 data1["A"] = {1, 2};
6768 data1["B"] = {3, 4};
6769 pandas::DataFrame df1(data1);
6770
6771 std::map<std::string, std::vector<int>> data2;
6772 data2["B"] = {10, 20, 30};
6773 data2["C"] = {40, 50, 60};
6774 pandas::DataFrame df2(data2);
6775 df2 = df2.set_axis({"x", "y", "z"}, 0);
6776
6777 auto reindexed = df1.reindex_like(df2);
6778 if (reindexed.nrows() != 3 || reindexed.ncols() != 2) {
6779 std::cout << " [FAIL] : in pd_test_dataframe_index_ops() : reindex_like wrong shape" << std::endl;
6780 throw std::runtime_error("pd_test_dataframe_index_ops failed: reindex_like");
6781 }
6782 }
6783
6784 std::cout << " -> tests passed" << std::endl;
6785 }
6786
6787 // =====================================================================
reindex_with_indexer (pd_test_5_all.cpp:40388)
40378 s.set_dtype_override("boolean");
40379 s.set_freq(std::optional<std::string>("D"));
40380 s.set_string_na_sentinel_disabled(true);
40381
40382 // Indexer: identity over the 3 source positions.
40383 numpy::NDArray<numpy::int64> indexer(std::vector<size_t>{3});
40384 indexer.setElementAt({0}, 0);
40385 indexer.setElementAt({1}, 1);
40386 indexer.setElementAt({2}, 2);
40387
40388 auto base = s.reindex_with_indexer(indexer);
40389 pandas_tests::check(base != nullptr, "case7.reindex_with_indexer_nonnull", local_fail);
40390 if (!base) return;
40391
40392 auto* r = dynamic_cast<pandas::Series<std::int64_t>*>(base.get());
40393 pandas_tests::check(r != nullptr, "case7.reindex_with_indexer_is_Series_int64",
40394 local_fail);
40395 if (!r) return;
40396
40397 // dtype_override propagates (oracle says yes).
40398 pandas_tests::check(r->dtype_override().has_value() &&
reindex_with_spec (pd_test_5_all.cpp:54746)
54736}
54737
54738void case_25_b_bool_int_fill_explicit_spec(int& local_fail) {
54739 std::cout << "---- 25b bool_int_fill_explicit_spec\n";
54740
54741 pandas::DataFrame df;
54742 df.add_column<bool>("b", {true, false, true});
54743
54744 // Explicit-spec path: tag=Int is authoritative.
54745 pandas::FillSpec spec = pandas::FillSpec::integer(0);
54746 auto r = df.reindex_with_spec({"0", "1", "2", "3", "4"}, 0, spec);
54747
54748 std::string dt = r["b"].dtype_name();
54749 pandas_tests::check(dt == "object",
54750 "25b.b dtype should be object via reindex_with_spec "
54751 "(got '" + dt + "')", local_fail);
54752
54753 std::string rep = r.to_string();
54754 // Post-fix target: fill is literal int 0 (not False).
54755 size_t n_false = 0, pos = 0;
54756 while ((pos = rep.find("False", pos)) != std::string::npos) {
rename (pd_test_1_all.cpp:5816)
5806 std::cout << " -> tests passed" << std::endl;
5807}
5808
5809void pd_test_categorical_index_rename() {
5810 std::cout << "========= rename ======================================";
5811
5812 pandas::CategoricalArray arr({"x", "y"});
5813 pandas::CategoricalIndex idx(arr, "old_name");
5814
5815 pandas::CategoricalIndex renamed = idx.rename("new_name");
5816
5817 bool passed = (renamed.name().has_value() && *renamed.name() == "new_name" &&
5818 renamed.size() == idx.size() && renamed.categories() == idx.categories());
5819 if (!passed) {
5820 std::cout << " [FAIL] : in pd_test_categorical_index_rename()" << std::endl;
5821 throw std::runtime_error("pd_test_categorical_index_rename failed");
5822 }
5823
5824 std::cout << " -> tests passed" << std::endl;
5825}
rename (pd_test_1_all.cpp:5816)
5806 std::cout << " -> tests passed" << std::endl;
5807}
5808
5809void pd_test_categorical_index_rename() {
5810 std::cout << "========= rename ======================================";
5811
5812 pandas::CategoricalArray arr({"x", "y"});
5813 pandas::CategoricalIndex idx(arr, "old_name");
5814
5815 pandas::CategoricalIndex renamed = idx.rename("new_name");
5816
5817 bool passed = (renamed.name().has_value() && *renamed.name() == "new_name" &&
5818 renamed.size() == idx.size() && renamed.categories() == idx.categories());
5819 if (!passed) {
5820 std::cout << " [FAIL] : in pd_test_categorical_index_rename()" << std::endl;
5821 throw std::runtime_error("pd_test_categorical_index_rename failed");
5822 }
5823
5824 std::cout << " -> tests passed" << std::endl;
5825}
rename_axis (pd_test_1_all.cpp:6760)
6750 throw std::runtime_error("pd_test_dataframe_index_ops failed: get_optional Z");
6751 }
6752 }
6753
6754 // Test rename_axis
6755 {
6756 std::map<std::string, std::vector<int>> data;
6757 data["A"] = {1, 2, 3};
6758 pandas::DataFrame df(data);
6759
6760 auto renamed = df.rename_axis("my_index", 0);
6761 // Should not throw
6762 }
6763
6764 // Test reindex_like
6765 {
6766 std::map<std::string, std::vector<int>> data1;
6767 data1["A"] = {1, 2};
6768 data1["B"] = {3, 4};
6769 pandas::DataFrame df1(data1);
rename_axis (pd_test_1_all.cpp:6760)
6750 throw std::runtime_error("pd_test_dataframe_index_ops failed: get_optional Z");
6751 }
6752 }
6753
6754 // Test rename_axis
6755 {
6756 std::map<std::string, std::vector<int>> data;
6757 data["A"] = {1, 2, 3};
6758 pandas::DataFrame df(data);
6759
6760 auto renamed = df.rename_axis("my_index", 0);
6761 // Should not throw
6762 }
6763
6764 // Test reindex_like
6765 {
6766 std::map<std::string, std::vector<int>> data1;
6767 data1["A"] = {1, 2};
6768 data1["B"] = {3, 4};
6769 pandas::DataFrame df1(data1);
rename_columns (pd_test_1_all.cpp:6565)
6555 }
6556
6557 // Test drop columns
6558 auto dropped = df.drop(std::vector<std::string>{"B"}, 1);
6559 if (dropped.ncols() != 1) {
6560 std::cout << " [FAIL] : in pd_test_dataframe_manipulation() : drop ncols != 1" << std::endl;
6561 throw std::runtime_error("pd_test_dataframe_manipulation failed: drop ncols != 1");
6562 }
6563
6564 // Test rename
6565 auto renamed = df.rename_columns(std::map<std::string, std::string>{{"A", "X"}});
6566 if (!renamed.has_column("X")) {
6567 std::cout << " [FAIL] : in pd_test_dataframe_manipulation() : rename failed" << std::endl;
6568 throw std::runtime_error("pd_test_dataframe_manipulation failed: rename failed");
6569 }
6570
6571 // Test isna/notna with float data
6572 {
6573 std::map<std::string, std::vector<numpy::float64>> float_data;
6574 float_data["X"] = {1.0, std::nan(""), 3.0};
6575 float_data["Y"] = {4.0, 5.0, std::nan("")};
rename_result (pd_test_5_all.cpp:50613)
50603 return df;
50604}
50605
50606void case_1_dict_axis1(int& local_fail) {
50607 std::cout << "-- case_1_dict_axis1\n";
50608 auto df = make_flat_frame();
50609 pandas::RenameSpec spec;
50610 spec.kind = pandas::RenameSpec::Kind::Dict;
50611 spec.dict_mapper["a"] = "A";
50612 spec.dict_mapper["b"] = "B";
50613 auto r = df.rename_result(spec, /*axis=*/1, /*inplace=*/false, /*errors_raise=*/false);
50614 pandas_tests::check(r.is_frame(), "case_1.is_frame", local_fail);
50615 if (!r.is_frame()) return;
50616 auto& f = *std::get<std::unique_ptr<pandas::DataFrame>>(r.value);
50617 pandas_tests::check(f.columns().get_value_str(0) == "A", "case_1.col0_A", local_fail);
50618 pandas_tests::check(f.columns().get_value_str(1) == "B", "case_1.col1_B", local_fail);
50619}
50620
50621void case_2_dict_axis0(int& local_fail) {
50622 std::cout << "-- case_2_dict_axis0\n";
50623 auto df = make_flat_frame();
reorder_levels (pd_test_1_all.cpp:14495)
14485 void pd_test_multiindex_reorder_levels() {
14486 std::cout << "========= reorder_levels ============================== ";
14487
14488 std::vector<std::vector<std::string>> arrays = {
14489 {"a", "b"},
14490 {"x", "y"},
14491 {"1", "2"}
14492 };
14493
14494 pandas::MultiIndex mi = pandas::MultiIndex::from_arrays<std::string>(arrays);
14495 pandas::MultiIndex reordered = mi.reorder_levels({2, 0, 1});
14496
14497 bool passed = true;
14498
14499 auto tup = reordered[0];
14500 if (tup[0] != "1" || tup[1] != "a" || tup[2] != "x") {
14501 std::cout << " [FAIL] : reordered tuple should be ('1', 'a', 'x')" << std::endl;
14502 passed = false;
14503 }
14504
14505 if (!passed) {
reorder_levels (pd_test_1_all.cpp:14495)
14485 void pd_test_multiindex_reorder_levels() {
14486 std::cout << "========= reorder_levels ============================== ";
14487
14488 std::vector<std::vector<std::string>> arrays = {
14489 {"a", "b"},
14490 {"x", "y"},
14491 {"1", "2"}
14492 };
14493
14494 pandas::MultiIndex mi = pandas::MultiIndex::from_arrays<std::string>(arrays);
14495 pandas::MultiIndex reordered = mi.reorder_levels({2, 0, 1});
14496
14497 bool passed = true;
14498
14499 auto tup = reordered[0];
14500 if (tup[0] != "1" || tup[1] != "a" || tup[2] != "x") {
14501 std::cout << " [FAIL] : reordered tuple should be ('1', 'a', 'x')" << std::endl;
14502 passed = false;
14503 }
14504
14505 if (!passed) {
reorder_levels (pd_test_1_all.cpp:14495)
14485 void pd_test_multiindex_reorder_levels() {
14486 std::cout << "========= reorder_levels ============================== ";
14487
14488 std::vector<std::vector<std::string>> arrays = {
14489 {"a", "b"},
14490 {"x", "y"},
14491 {"1", "2"}
14492 };
14493
14494 pandas::MultiIndex mi = pandas::MultiIndex::from_arrays<std::string>(arrays);
14495 pandas::MultiIndex reordered = mi.reorder_levels({2, 0, 1});
14496
14497 bool passed = true;
14498
14499 auto tup = reordered[0];
14500 if (tup[0] != "1" || tup[1] != "a" || tup[2] != "x") {
14501 std::cout << " [FAIL] : reordered tuple should be ('1', 'a', 'x')" << std::endl;
14502 passed = false;
14503 }
14504
14505 if (!passed) {
reorder_levels (pd_test_1_all.cpp:14495)
14485 void pd_test_multiindex_reorder_levels() {
14486 std::cout << "========= reorder_levels ============================== ";
14487
14488 std::vector<std::vector<std::string>> arrays = {
14489 {"a", "b"},
14490 {"x", "y"},
14491 {"1", "2"}
14492 };
14493
14494 pandas::MultiIndex mi = pandas::MultiIndex::from_arrays<std::string>(arrays);
14495 pandas::MultiIndex reordered = mi.reorder_levels({2, 0, 1});
14496
14497 bool passed = true;
14498
14499 auto tup = reordered[0];
14500 if (tup[0] != "1" || tup[1] != "a" || tup[2] != "x") {
14501 std::cout << " [FAIL] : reordered tuple should be ('1', 'a', 'x')" << std::endl;
14502 passed = false;
14503 }
14504
14505 if (!passed) {
replace (pd_test_1_all.cpp:6623)
6613 }
6614 }
6615
6616 // Test replace
6617 {
6618 std::map<std::string, std::vector<numpy::float64>> float_data;
6619 float_data["X"] = {1.0, 2.0, 3.0};
6620 float_data["Y"] = {2.0, 2.0, 4.0};
6621 pandas::DataFrame df_repl(float_data);
6622
6623 auto replaced = df_repl.replace(2.0, 99.0);
6624 // Check some value was replaced (crude check via string)
6625 std::string val_str = replaced.col<numpy::float64>("X").get_value_str(1);
6626 if (val_str.find("99") == std::string::npos) {
6627 std::cout << " [FAIL] : in pd_test_dataframe_manipulation() : replace didn't work" << std::endl;
6628 throw std::runtime_error("pd_test_dataframe_manipulation failed: replace");
6629 }
6630 }
6631
6632 // Test drop_duplicates
6633 {
replace (pd_test_1_all.cpp:6623)
6613 }
6614 }
6615
6616 // Test replace
6617 {
6618 std::map<std::string, std::vector<numpy::float64>> float_data;
6619 float_data["X"] = {1.0, 2.0, 3.0};
6620 float_data["Y"] = {2.0, 2.0, 4.0};
6621 pandas::DataFrame df_repl(float_data);
6622
6623 auto replaced = df_repl.replace(2.0, 99.0);
6624 // Check some value was replaced (crude check via string)
6625 std::string val_str = replaced.col<numpy::float64>("X").get_value_str(1);
6626 if (val_str.find("99") == std::string::npos) {
6627 std::cout << " [FAIL] : in pd_test_dataframe_manipulation() : replace didn't work" << std::endl;
6628 throw std::runtime_error("pd_test_dataframe_manipulation failed: replace");
6629 }
6630 }
6631
6632 // Test drop_duplicates
6633 {
replace (pd_test_1_all.cpp:6623)
6613 }
6614 }
6615
6616 // Test replace
6617 {
6618 std::map<std::string, std::vector<numpy::float64>> float_data;
6619 float_data["X"] = {1.0, 2.0, 3.0};
6620 float_data["Y"] = {2.0, 2.0, 4.0};
6621 pandas::DataFrame df_repl(float_data);
6622
6623 auto replaced = df_repl.replace(2.0, 99.0);
6624 // Check some value was replaced (crude check via string)
6625 std::string val_str = replaced.col<numpy::float64>("X").get_value_str(1);
6626 if (val_str.find("99") == std::string::npos) {
6627 std::cout << " [FAIL] : in pd_test_dataframe_manipulation() : replace didn't work" << std::endl;
6628 throw std::runtime_error("pd_test_dataframe_manipulation failed: replace");
6629 }
6630 }
6631
6632 // Test drop_duplicates
6633 {
replace (pd_test_1_all.cpp:6623)
6613 }
6614 }
6615
6616 // Test replace
6617 {
6618 std::map<std::string, std::vector<numpy::float64>> float_data;
6619 float_data["X"] = {1.0, 2.0, 3.0};
6620 float_data["Y"] = {2.0, 2.0, 4.0};
6621 pandas::DataFrame df_repl(float_data);
6622
6623 auto replaced = df_repl.replace(2.0, 99.0);
6624 // Check some value was replaced (crude check via string)
6625 std::string val_str = replaced.col<numpy::float64>("X").get_value_str(1);
6626 if (val_str.find("99") == std::string::npos) {
6627 std::cout << " [FAIL] : in pd_test_dataframe_manipulation() : replace didn't work" << std::endl;
6628 throw std::runtime_error("pd_test_dataframe_manipulation failed: replace");
6629 }
6630 }
6631
6632 // Test drop_duplicates
6633 {
replace (pd_test_1_all.cpp:6623)
6613 }
6614 }
6615
6616 // Test replace
6617 {
6618 std::map<std::string, std::vector<numpy::float64>> float_data;
6619 float_data["X"] = {1.0, 2.0, 3.0};
6620 float_data["Y"] = {2.0, 2.0, 4.0};
6621 pandas::DataFrame df_repl(float_data);
6622
6623 auto replaced = df_repl.replace(2.0, 99.0);
6624 // Check some value was replaced (crude check via string)
6625 std::string val_str = replaced.col<numpy::float64>("X").get_value_str(1);
6626 if (val_str.find("99") == std::string::npos) {
6627 std::cout << " [FAIL] : in pd_test_dataframe_manipulation() : replace didn't work" << std::endl;
6628 throw std::runtime_error("pd_test_dataframe_manipulation failed: replace");
6629 }
6630 }
6631
6632 // Test drop_duplicates
6633 {
reset_index (pd_test_3_all.cpp:1618)
1608 }
1609
1610 std::cout << " -> tests passed" << std::endl;
1611}
1612
1613// ============================================================================
1614// Category 10: Remaining Untested Functions
1615// ============================================================================
1616
1617void pd_test_3_all_series_reset_index() {
1618 std::cout << "========= Series.reset_index() =======================";
1619
1620 std::vector<double> vals = {10.0, 20.0, 30.0};
1621 pandas::Series<double> s(vals, "test");
1622
1623 // Set a custom index
1624 pandas::Index<std::string> custom_idx({"a", "b", "c"});
1625 s.set_index(custom_idx);
1626
1627 // Reset the index
1628 s.reset_index(true); // drop=true
set_axis (pd_test_1_all.cpp:6673)
6663 std::cout << " -> tests passed" << std::endl;
6664 }
6665
6666 // =====================================================================
6667 // Test: Index Operations
6668 // =====================================================================
6669 void pd_test_dataframe_index_ops() {
6670 std::cout << "========= index operations =================";
6671
6672 // Test set_axis (rows)
6673 {
6674 std::map<std::string, std::vector<int>> data;
6675 data["A"] = {1, 2, 3};
6676 pandas::DataFrame df(data);
6677
6678 auto renamed = df.set_axis({"x", "y", "z"}, 0);
6679 std::string idx0 = renamed.index().get_value_str(0);
6680 if (idx0 != "x") {
6681 std::cout << " [FAIL] : in pd_test_dataframe_index_ops() : set_axis first label should be 'x'" << std::endl;
6682 throw std::runtime_error("pd_test_dataframe_index_ops failed: set_axis");
set_index (pd_test_1_all.cpp:20318)
20308 // Set datetime index
20309 std::vector<std::string> dates = {
20310 "2020-01-01 00:00:00",
20311 "2020-01-01 12:00:00",
20312 "2020-01-02 00:00:00",
20313 "2020-01-02 12:00:00",
20314 "2020-01-03 00:00:00",
20315 "2020-01-03 12:00:00"
20316 };
20317 df.set_index(std::make_unique<pandas::Index<std::string>>(dates));
20318
20319 // Resample to daily
20320 auto resampler = df.resample("D");
20321 pandas::DataFrame result = resampler.sum();
20322
20323 // Check that we got aggregated results
20324 bool passed = (result.nrows() <= df.nrows());
20325
20326 if (!passed) {
20327 std::cout << " [FAIL] : in pd_test_timeseries_resample_basic() : resample didn't reduce rows" << std::endl;
set_index (pd_test_1_all.cpp:20318)
20308 // Set datetime index
20309 std::vector<std::string> dates = {
20310 "2020-01-01 00:00:00",
20311 "2020-01-01 12:00:00",
20312 "2020-01-02 00:00:00",
20313 "2020-01-02 12:00:00",
20314 "2020-01-03 00:00:00",
20315 "2020-01-03 12:00:00"
20316 };
20317 df.set_index(std::make_unique<pandas::Index<std::string>>(dates));
20318
20319 // Resample to daily
20320 auto resampler = df.resample("D");
20321 pandas::DataFrame result = resampler.sum();
20322
20323 // Check that we got aggregated results
20324 bool passed = (result.nrows() <= df.nrows());
20325
20326 if (!passed) {
20327 std::cout << " [FAIL] : in pd_test_timeseries_resample_basic() : resample didn't reduce rows" << std::endl;
set_index (pd_test_1_all.cpp:20318)
20308 // Set datetime index
20309 std::vector<std::string> dates = {
20310 "2020-01-01 00:00:00",
20311 "2020-01-01 12:00:00",
20312 "2020-01-02 00:00:00",
20313 "2020-01-02 12:00:00",
20314 "2020-01-03 00:00:00",
20315 "2020-01-03 12:00:00"
20316 };
20317 df.set_index(std::make_unique<pandas::Index<std::string>>(dates));
20318
20319 // Resample to daily
20320 auto resampler = df.resample("D");
20321 pandas::DataFrame result = resampler.sum();
20322
20323 // Check that we got aggregated results
20324 bool passed = (result.nrows() <= df.nrows());
20325
20326 if (!passed) {
20327 std::cout << " [FAIL] : in pd_test_timeseries_resample_basic() : resample didn't reduce rows" << std::endl;
set_index (pd_test_1_all.cpp:20318)
20308 // Set datetime index
20309 std::vector<std::string> dates = {
20310 "2020-01-01 00:00:00",
20311 "2020-01-01 12:00:00",
20312 "2020-01-02 00:00:00",
20313 "2020-01-02 12:00:00",
20314 "2020-01-03 00:00:00",
20315 "2020-01-03 12:00:00"
20316 };
20317 df.set_index(std::make_unique<pandas::Index<std::string>>(dates));
20318
20319 // Resample to daily
20320 auto resampler = df.resample("D");
20321 pandas::DataFrame result = resampler.sum();
20322
20323 // Check that we got aggregated results
20324 bool passed = (result.nrows() <= df.nrows());
20325
20326 if (!passed) {
20327 std::cout << " [FAIL] : in pd_test_timeseries_resample_basic() : resample didn't reduce rows" << std::endl;
set_index (pd_test_1_all.cpp:20318)
20308 // Set datetime index
20309 std::vector<std::string> dates = {
20310 "2020-01-01 00:00:00",
20311 "2020-01-01 12:00:00",
20312 "2020-01-02 00:00:00",
20313 "2020-01-02 12:00:00",
20314 "2020-01-03 00:00:00",
20315 "2020-01-03 12:00:00"
20316 };
20317 df.set_index(std::make_unique<pandas::Index<std::string>>(dates));
20318
20319 // Resample to daily
20320 auto resampler = df.resample("D");
20321 pandas::DataFrame result = resampler.sum();
20322
20323 // Check that we got aggregated results
20324 bool passed = (result.nrows() <= df.nrows());
20325
20326 if (!passed) {
20327 std::cout << " [FAIL] : in pd_test_timeseries_resample_basic() : resample didn't reduce rows" << std::endl;
set_index (pd_test_1_all.cpp:20318)
20308 // Set datetime index
20309 std::vector<std::string> dates = {
20310 "2020-01-01 00:00:00",
20311 "2020-01-01 12:00:00",
20312 "2020-01-02 00:00:00",
20313 "2020-01-02 12:00:00",
20314 "2020-01-03 00:00:00",
20315 "2020-01-03 12:00:00"
20316 };
20317 df.set_index(std::make_unique<pandas::Index<std::string>>(dates));
20318
20319 // Resample to daily
20320 auto resampler = df.resample("D");
20321 pandas::DataFrame result = resampler.sum();
20322
20323 // Check that we got aggregated results
20324 bool passed = (result.nrows() <= df.nrows());
20325
20326 if (!passed) {
20327 std::cout << " [FAIL] : in pd_test_timeseries_resample_basic() : resample didn't reduce rows" << std::endl;
set_index (pd_test_1_all.cpp:20318)
20308 // Set datetime index
20309 std::vector<std::string> dates = {
20310 "2020-01-01 00:00:00",
20311 "2020-01-01 12:00:00",
20312 "2020-01-02 00:00:00",
20313 "2020-01-02 12:00:00",
20314 "2020-01-03 00:00:00",
20315 "2020-01-03 12:00:00"
20316 };
20317 df.set_index(std::make_unique<pandas::Index<std::string>>(dates));
20318
20319 // Resample to daily
20320 auto resampler = df.resample("D");
20321 pandas::DataFrame result = resampler.sum();
20322
20323 // Check that we got aggregated results
20324 bool passed = (result.nrows() <= df.nrows());
20325
20326 if (!passed) {
20327 std::cout << " [FAIL] : in pd_test_timeseries_resample_basic() : resample didn't reduce rows" << std::endl;
set_index (pd_test_1_all.cpp:20318)
20308 // Set datetime index
20309 std::vector<std::string> dates = {
20310 "2020-01-01 00:00:00",
20311 "2020-01-01 12:00:00",
20312 "2020-01-02 00:00:00",
20313 "2020-01-02 12:00:00",
20314 "2020-01-03 00:00:00",
20315 "2020-01-03 12:00:00"
20316 };
20317 df.set_index(std::make_unique<pandas::Index<std::string>>(dates));
20318
20319 // Resample to daily
20320 auto resampler = df.resample("D");
20321 pandas::DataFrame result = resampler.sum();
20322
20323 // Check that we got aggregated results
20324 bool passed = (result.nrows() <= df.nrows());
20325
20326 if (!passed) {
20327 std::cout << " [FAIL] : in pd_test_timeseries_resample_basic() : resample didn't reduce rows" << std::endl;
set_index (pd_test_1_all.cpp:20318)
20308 // Set datetime index
20309 std::vector<std::string> dates = {
20310 "2020-01-01 00:00:00",
20311 "2020-01-01 12:00:00",
20312 "2020-01-02 00:00:00",
20313 "2020-01-02 12:00:00",
20314 "2020-01-03 00:00:00",
20315 "2020-01-03 12:00:00"
20316 };
20317 df.set_index(std::make_unique<pandas::Index<std::string>>(dates));
20318
20319 // Resample to daily
20320 auto resampler = df.resample("D");
20321 pandas::DataFrame result = resampler.sum();
20322
20323 // Check that we got aggregated results
20324 bool passed = (result.nrows() <= df.nrows());
20325
20326 if (!passed) {
20327 std::cout << " [FAIL] : in pd_test_timeseries_resample_basic() : resample didn't reduce rows" << std::endl;
set_index (pd_test_1_all.cpp:20318)
20308 // Set datetime index
20309 std::vector<std::string> dates = {
20310 "2020-01-01 00:00:00",
20311 "2020-01-01 12:00:00",
20312 "2020-01-02 00:00:00",
20313 "2020-01-02 12:00:00",
20314 "2020-01-03 00:00:00",
20315 "2020-01-03 12:00:00"
20316 };
20317 df.set_index(std::make_unique<pandas::Index<std::string>>(dates));
20318
20319 // Resample to daily
20320 auto resampler = df.resample("D");
20321 pandas::DataFrame result = resampler.sum();
20322
20323 // Check that we got aggregated results
20324 bool passed = (result.nrows() <= df.nrows());
20325
20326 if (!passed) {
20327 std::cout << " [FAIL] : in pd_test_timeseries_resample_basic() : resample didn't reduce rows" << std::endl;
set_index (pd_test_1_all.cpp:20318)
20308 // Set datetime index
20309 std::vector<std::string> dates = {
20310 "2020-01-01 00:00:00",
20311 "2020-01-01 12:00:00",
20312 "2020-01-02 00:00:00",
20313 "2020-01-02 12:00:00",
20314 "2020-01-03 00:00:00",
20315 "2020-01-03 12:00:00"
20316 };
20317 df.set_index(std::make_unique<pandas::Index<std::string>>(dates));
20318
20319 // Resample to daily
20320 auto resampler = df.resample("D");
20321 pandas::DataFrame result = resampler.sum();
20322
20323 // Check that we got aggregated results
20324 bool passed = (result.nrows() <= df.nrows());
20325
20326 if (!passed) {
20327 std::cout << " [FAIL] : in pd_test_timeseries_resample_basic() : resample didn't reduce rows" << std::endl;
set_index (pd_test_1_all.cpp:20318)
20308 // Set datetime index
20309 std::vector<std::string> dates = {
20310 "2020-01-01 00:00:00",
20311 "2020-01-01 12:00:00",
20312 "2020-01-02 00:00:00",
20313 "2020-01-02 12:00:00",
20314 "2020-01-03 00:00:00",
20315 "2020-01-03 12:00:00"
20316 };
20317 df.set_index(std::make_unique<pandas::Index<std::string>>(dates));
20318
20319 // Resample to daily
20320 auto resampler = df.resample("D");
20321 pandas::DataFrame result = resampler.sum();
20322
20323 // Check that we got aggregated results
20324 bool passed = (result.nrows() <= df.nrows());
20325
20326 if (!passed) {
20327 std::cout << " [FAIL] : in pd_test_timeseries_resample_basic() : resample didn't reduce rows" << std::endl;
set_index (pd_test_1_all.cpp:20318)
20308 // Set datetime index
20309 std::vector<std::string> dates = {
20310 "2020-01-01 00:00:00",
20311 "2020-01-01 12:00:00",
20312 "2020-01-02 00:00:00",
20313 "2020-01-02 12:00:00",
20314 "2020-01-03 00:00:00",
20315 "2020-01-03 12:00:00"
20316 };
20317 df.set_index(std::make_unique<pandas::Index<std::string>>(dates));
20318
20319 // Resample to daily
20320 auto resampler = df.resample("D");
20321 pandas::DataFrame result = resampler.sum();
20322
20323 // Check that we got aggregated results
20324 bool passed = (result.nrows() <= df.nrows());
20325
20326 if (!passed) {
20327 std::cout << " [FAIL] : in pd_test_timeseries_resample_basic() : resample didn't reduce rows" << std::endl;
set_index (pd_test_1_all.cpp:20318)
20308 // Set datetime index
20309 std::vector<std::string> dates = {
20310 "2020-01-01 00:00:00",
20311 "2020-01-01 12:00:00",
20312 "2020-01-02 00:00:00",
20313 "2020-01-02 12:00:00",
20314 "2020-01-03 00:00:00",
20315 "2020-01-03 12:00:00"
20316 };
20317 df.set_index(std::make_unique<pandas::Index<std::string>>(dates));
20318
20319 // Resample to daily
20320 auto resampler = df.resample("D");
20321 pandas::DataFrame result = resampler.sum();
20322
20323 // Check that we got aggregated results
20324 bool passed = (result.nrows() <= df.nrows());
20325
20326 if (!passed) {
20327 std::cout << " [FAIL] : in pd_test_timeseries_resample_basic() : resample didn't reduce rows" << std::endl;
set_index (pd_test_1_all.cpp:20318)
20308 // Set datetime index
20309 std::vector<std::string> dates = {
20310 "2020-01-01 00:00:00",
20311 "2020-01-01 12:00:00",
20312 "2020-01-02 00:00:00",
20313 "2020-01-02 12:00:00",
20314 "2020-01-03 00:00:00",
20315 "2020-01-03 12:00:00"
20316 };
20317 df.set_index(std::make_unique<pandas::Index<std::string>>(dates));
20318
20319 // Resample to daily
20320 auto resampler = df.resample("D");
20321 pandas::DataFrame result = resampler.sum();
20322
20323 // Check that we got aggregated results
20324 bool passed = (result.nrows() <= df.nrows());
20325
20326 if (!passed) {
20327 std::cout << " [FAIL] : in pd_test_timeseries_resample_basic() : resample didn't reduce rows" << std::endl;
set_index (pd_test_1_all.cpp:20318)
20308 // Set datetime index
20309 std::vector<std::string> dates = {
20310 "2020-01-01 00:00:00",
20311 "2020-01-01 12:00:00",
20312 "2020-01-02 00:00:00",
20313 "2020-01-02 12:00:00",
20314 "2020-01-03 00:00:00",
20315 "2020-01-03 12:00:00"
20316 };
20317 df.set_index(std::make_unique<pandas::Index<std::string>>(dates));
20318
20319 // Resample to daily
20320 auto resampler = df.resample("D");
20321 pandas::DataFrame result = resampler.sum();
20322
20323 // Check that we got aggregated results
20324 bool passed = (result.nrows() <= df.nrows());
20325
20326 if (!passed) {
20327 std::cout << " [FAIL] : in pd_test_timeseries_resample_basic() : resample didn't reduce rows" << std::endl;
set_index (pd_test_1_all.cpp:20318)
20308 // Set datetime index
20309 std::vector<std::string> dates = {
20310 "2020-01-01 00:00:00",
20311 "2020-01-01 12:00:00",
20312 "2020-01-02 00:00:00",
20313 "2020-01-02 12:00:00",
20314 "2020-01-03 00:00:00",
20315 "2020-01-03 12:00:00"
20316 };
20317 df.set_index(std::make_unique<pandas::Index<std::string>>(dates));
20318
20319 // Resample to daily
20320 auto resampler = df.resample("D");
20321 pandas::DataFrame result = resampler.sum();
20322
20323 // Check that we got aggregated results
20324 bool passed = (result.nrows() <= df.nrows());
20325
20326 if (!passed) {
20327 std::cout << " [FAIL] : in pd_test_timeseries_resample_basic() : resample didn't reduce rows" << std::endl;
set_index (pd_test_1_all.cpp:20318)
20308 // Set datetime index
20309 std::vector<std::string> dates = {
20310 "2020-01-01 00:00:00",
20311 "2020-01-01 12:00:00",
20312 "2020-01-02 00:00:00",
20313 "2020-01-02 12:00:00",
20314 "2020-01-03 00:00:00",
20315 "2020-01-03 12:00:00"
20316 };
20317 df.set_index(std::make_unique<pandas::Index<std::string>>(dates));
20318
20319 // Resample to daily
20320 auto resampler = df.resample("D");
20321 pandas::DataFrame result = resampler.sum();
20322
20323 // Check that we got aggregated results
20324 bool passed = (result.nrows() <= df.nrows());
20325
20326 if (!passed) {
20327 std::cout << " [FAIL] : in pd_test_timeseries_resample_basic() : resample didn't reduce rows" << std::endl;
set_index (pd_test_1_all.cpp:20318)
20308 // Set datetime index
20309 std::vector<std::string> dates = {
20310 "2020-01-01 00:00:00",
20311 "2020-01-01 12:00:00",
20312 "2020-01-02 00:00:00",
20313 "2020-01-02 12:00:00",
20314 "2020-01-03 00:00:00",
20315 "2020-01-03 12:00:00"
20316 };
20317 df.set_index(std::make_unique<pandas::Index<std::string>>(dates));
20318
20319 // Resample to daily
20320 auto resampler = df.resample("D");
20321 pandas::DataFrame result = resampler.sum();
20322
20323 // Check that we got aggregated results
20324 bool passed = (result.nrows() <= df.nrows());
20325
20326 if (!passed) {
20327 std::cout << " [FAIL] : in pd_test_timeseries_resample_basic() : resample didn't reduce rows" << std::endl;
set_index (pd_test_1_all.cpp:20318)
20308 // Set datetime index
20309 std::vector<std::string> dates = {
20310 "2020-01-01 00:00:00",
20311 "2020-01-01 12:00:00",
20312 "2020-01-02 00:00:00",
20313 "2020-01-02 12:00:00",
20314 "2020-01-03 00:00:00",
20315 "2020-01-03 12:00:00"
20316 };
20317 df.set_index(std::make_unique<pandas::Index<std::string>>(dates));
20318
20319 // Resample to daily
20320 auto resampler = df.resample("D");
20321 pandas::DataFrame result = resampler.sum();
20322
20323 // Check that we got aggregated results
20324 bool passed = (result.nrows() <= df.nrows());
20325
20326 if (!passed) {
20327 std::cout << " [FAIL] : in pd_test_timeseries_resample_basic() : resample didn't reduce rows" << std::endl;
set_index (pd_test_1_all.cpp:20318)
20308 // Set datetime index
20309 std::vector<std::string> dates = {
20310 "2020-01-01 00:00:00",
20311 "2020-01-01 12:00:00",
20312 "2020-01-02 00:00:00",
20313 "2020-01-02 12:00:00",
20314 "2020-01-03 00:00:00",
20315 "2020-01-03 12:00:00"
20316 };
20317 df.set_index(std::make_unique<pandas::Index<std::string>>(dates));
20318
20319 // Resample to daily
20320 auto resampler = df.resample("D");
20321 pandas::DataFrame result = resampler.sum();
20322
20323 // Check that we got aggregated results
20324 bool passed = (result.nrows() <= df.nrows());
20325
20326 if (!passed) {
20327 std::cout << " [FAIL] : in pd_test_timeseries_resample_basic() : resample didn't reduce rows" << std::endl;
set_index (pd_test_1_all.cpp:20318)
20308 // Set datetime index
20309 std::vector<std::string> dates = {
20310 "2020-01-01 00:00:00",
20311 "2020-01-01 12:00:00",
20312 "2020-01-02 00:00:00",
20313 "2020-01-02 12:00:00",
20314 "2020-01-03 00:00:00",
20315 "2020-01-03 12:00:00"
20316 };
20317 df.set_index(std::make_unique<pandas::Index<std::string>>(dates));
20318
20319 // Resample to daily
20320 auto resampler = df.resample("D");
20321 pandas::DataFrame result = resampler.sum();
20322
20323 // Check that we got aggregated results
20324 bool passed = (result.nrows() <= df.nrows());
20325
20326 if (!passed) {
20327 std::cout << " [FAIL] : in pd_test_timeseries_resample_basic() : resample didn't reduce rows" << std::endl;
set_index_col (pd_test_3_all.cpp:828)
818 std::cout << "========= DataFrame.set_index_col brace-init overload =====";
819
820 // Case A: 2-element brace-init (the C2668 trigger)
821 {
822 pandas::DataFrame df;
823 df.add_column<std::string>("City", {"London", "London", "NYC", "NYC"});
824 df.add_column<std::string>("Name", {"Alice", "Bob", "Carol","Dave"});
825 df.add_column<std::int64_t>("Age", {28, 35, 30, 25});
826 df.add_column<std::int64_t>("Salary",{50000, 60000, 55000, 45000});
827
828 auto mi_df = df.set_index_col({"City", "Name"});
829
830 if (mi_df.ncols() != 2) {
831 throw std::runtime_error("set_index_col brace-init: ncols mismatch");
832 }
833 if (mi_df.nrows() != 4) {
834 throw std::runtime_error("set_index_col brace-init: nrows mismatch");
835 }
836 }
837
838 // Case B: single-element brace-init
set_index_col (pd_test_3_all.cpp:828)
818 std::cout << "========= DataFrame.set_index_col brace-init overload =====";
819
820 // Case A: 2-element brace-init (the C2668 trigger)
821 {
822 pandas::DataFrame df;
823 df.add_column<std::string>("City", {"London", "London", "NYC", "NYC"});
824 df.add_column<std::string>("Name", {"Alice", "Bob", "Carol","Dave"});
825 df.add_column<std::int64_t>("Age", {28, 35, 30, 25});
826 df.add_column<std::int64_t>("Salary",{50000, 60000, 55000, 45000});
827
828 auto mi_df = df.set_index_col({"City", "Name"});
829
830 if (mi_df.ncols() != 2) {
831 throw std::runtime_error("set_index_col brace-init: ncols mismatch");
832 }
833 if (mi_df.nrows() != 4) {
834 throw std::runtime_error("set_index_col brace-init: nrows mismatch");
835 }
836 }
837
838 // Case B: single-element brace-init
set_index_col (pd_test_3_all.cpp:828)
818 std::cout << "========= DataFrame.set_index_col brace-init overload =====";
819
820 // Case A: 2-element brace-init (the C2668 trigger)
821 {
822 pandas::DataFrame df;
823 df.add_column<std::string>("City", {"London", "London", "NYC", "NYC"});
824 df.add_column<std::string>("Name", {"Alice", "Bob", "Carol","Dave"});
825 df.add_column<std::int64_t>("Age", {28, 35, 30, 25});
826 df.add_column<std::int64_t>("Salary",{50000, 60000, 55000, 45000});
827
828 auto mi_df = df.set_index_col({"City", "Name"});
829
830 if (mi_df.ncols() != 2) {
831 throw std::runtime_error("set_index_col brace-init: ncols mismatch");
832 }
833 if (mi_df.nrows() != 4) {
834 throw std::runtime_error("set_index_col brace-init: nrows mismatch");
835 }
836 }
837
838 // Case B: single-element brace-init
set_index_from_column (pd_test_3_all.cpp:1343)
1333 df.add_column<int64_t>("v_num", {1, 2, 3, 4});
1334 df.set_nan_marker("v_str", {true, false, false, true});
1335 // exact mask-bit assertions depend on Series<string> mask API
1336 }
1337
1338 // I. set_index_from_column<T>
1339 {
1340 pandas::DataFrame df;
1341 df.add_column<int64_t>("val", {1, 3});
1342 df.add_column<int64_t>("count", {30, 70});
1343 df.set_index_from_column<std::string>("group", {"A", "B"});
1344 if (!df.index_name().has_value() || df.index_name().value() != "group") {
1345 throw std::runtime_error("set_index_from_column: name not set");
1346 }
1347 }
1348
1349 std::cout << " -> tests passed" << std::endl;
1350}
1351
1352// ============================================================================
1353// Category 8: DateTime Functions
set_index_from_list (pd_test_3_all.cpp:1236)
1226}
1227
1228void pd_test_3_all_dataframe_pandas_facade_methods() {
1229 std::cout << "========= DataFrame pandas-style facade methods =======";
1230
1231 // A. set_index_from_list -- string form
1232 {
1233 pandas::DataFrame df;
1234 df.add_column<int64_t>("one", {1, 2, 3, 4});
1235 df.add_column<int64_t>("two", {18, 20, 20, 18});
1236 df.set_index_from_list({"a", "b", "c", "d"});
1237 if (df.nrows() != 4) {
1238 throw std::runtime_error("set_index_from_list(string): nrows changed");
1239 }
1240 if (df.index().get_value_str(0) != "a" || df.index().get_value_str(3) != "d") {
1241 throw std::runtime_error("set_index_from_list(string): values wrong");
1242 }
1243 }
1244
1245 // B. set_index_from_list -- int form (reproduces test 1184)
1246 {
set_index_from_list (pd_test_3_all.cpp:1236)
1226}
1227
1228void pd_test_3_all_dataframe_pandas_facade_methods() {
1229 std::cout << "========= DataFrame pandas-style facade methods =======";
1230
1231 // A. set_index_from_list -- string form
1232 {
1233 pandas::DataFrame df;
1234 df.add_column<int64_t>("one", {1, 2, 3, 4});
1235 df.add_column<int64_t>("two", {18, 20, 20, 18});
1236 df.set_index_from_list({"a", "b", "c", "d"});
1237 if (df.nrows() != 4) {
1238 throw std::runtime_error("set_index_from_list(string): nrows changed");
1239 }
1240 if (df.index().get_value_str(0) != "a" || df.index().get_value_str(3) != "d") {
1241 throw std::runtime_error("set_index_from_list(string): values wrong");
1242 }
1243 }
1244
1245 // B. set_index_from_list -- int form (reproduces test 1184)
1246 {
set_index_from_list (pd_test_3_all.cpp:1236)
1226}
1227
1228void pd_test_3_all_dataframe_pandas_facade_methods() {
1229 std::cout << "========= DataFrame pandas-style facade methods =======";
1230
1231 // A. set_index_from_list -- string form
1232 {
1233 pandas::DataFrame df;
1234 df.add_column<int64_t>("one", {1, 2, 3, 4});
1235 df.add_column<int64_t>("two", {18, 20, 20, 18});
1236 df.set_index_from_list({"a", "b", "c", "d"});
1237 if (df.nrows() != 4) {
1238 throw std::runtime_error("set_index_from_list(string): nrows changed");
1239 }
1240 if (df.index().get_value_str(0) != "a" || df.index().get_value_str(3) != "d") {
1241 throw std::runtime_error("set_index_from_list(string): values wrong");
1242 }
1243 }
1244
1245 // B. set_index_from_list -- int form (reproduces test 1184)
1246 {
set_index_from_list (pd_test_3_all.cpp:1236)
1226}
1227
1228void pd_test_3_all_dataframe_pandas_facade_methods() {
1229 std::cout << "========= DataFrame pandas-style facade methods =======";
1230
1231 // A. set_index_from_list -- string form
1232 {
1233 pandas::DataFrame df;
1234 df.add_column<int64_t>("one", {1, 2, 3, 4});
1235 df.add_column<int64_t>("two", {18, 20, 20, 18});
1236 df.set_index_from_list({"a", "b", "c", "d"});
1237 if (df.nrows() != 4) {
1238 throw std::runtime_error("set_index_from_list(string): nrows changed");
1239 }
1240 if (df.index().get_value_str(0) != "a" || df.index().get_value_str(3) != "d") {
1241 throw std::runtime_error("set_index_from_list(string): values wrong");
1242 }
1243 }
1244
1245 // B. set_index_from_list -- int form (reproduces test 1184)
1246 {
set_index_from_list (pd_test_3_all.cpp:1236)
1226}
1227
1228void pd_test_3_all_dataframe_pandas_facade_methods() {
1229 std::cout << "========= DataFrame pandas-style facade methods =======";
1230
1231 // A. set_index_from_list -- string form
1232 {
1233 pandas::DataFrame df;
1234 df.add_column<int64_t>("one", {1, 2, 3, 4});
1235 df.add_column<int64_t>("two", {18, 20, 20, 18});
1236 df.set_index_from_list({"a", "b", "c", "d"});
1237 if (df.nrows() != 4) {
1238 throw std::runtime_error("set_index_from_list(string): nrows changed");
1239 }
1240 if (df.index().get_value_str(0) != "a" || df.index().get_value_str(3) != "d") {
1241 throw std::runtime_error("set_index_from_list(string): values wrong");
1242 }
1243 }
1244
1245 // B. set_index_from_list -- int form (reproduces test 1184)
1246 {
set_index_from_list (pd_test_3_all.cpp:1236)
1226}
1227
1228void pd_test_3_all_dataframe_pandas_facade_methods() {
1229 std::cout << "========= DataFrame pandas-style facade methods =======";
1230
1231 // A. set_index_from_list -- string form
1232 {
1233 pandas::DataFrame df;
1234 df.add_column<int64_t>("one", {1, 2, 3, 4});
1235 df.add_column<int64_t>("two", {18, 20, 20, 18});
1236 df.set_index_from_list({"a", "b", "c", "d"});
1237 if (df.nrows() != 4) {
1238 throw std::runtime_error("set_index_from_list(string): nrows changed");
1239 }
1240 if (df.index().get_value_str(0) != "a" || df.index().get_value_str(3) != "d") {
1241 throw std::runtime_error("set_index_from_list(string): values wrong");
1242 }
1243 }
1244
1245 // B. set_index_from_list -- int form (reproduces test 1184)
1246 {
set_index_from_list (pd_test_3_all.cpp:1236)
1226}
1227
1228void pd_test_3_all_dataframe_pandas_facade_methods() {
1229 std::cout << "========= DataFrame pandas-style facade methods =======";
1230
1231 // A. set_index_from_list -- string form
1232 {
1233 pandas::DataFrame df;
1234 df.add_column<int64_t>("one", {1, 2, 3, 4});
1235 df.add_column<int64_t>("two", {18, 20, 20, 18});
1236 df.set_index_from_list({"a", "b", "c", "d"});
1237 if (df.nrows() != 4) {
1238 throw std::runtime_error("set_index_from_list(string): nrows changed");
1239 }
1240 if (df.index().get_value_str(0) != "a" || df.index().get_value_str(3) != "d") {
1241 throw std::runtime_error("set_index_from_list(string): values wrong");
1242 }
1243 }
1244
1245 // B. set_index_from_list -- int form (reproduces test 1184)
1246 {
set_index_from_list (pd_test_3_all.cpp:1236)
1226}
1227
1228void pd_test_3_all_dataframe_pandas_facade_methods() {
1229 std::cout << "========= DataFrame pandas-style facade methods =======";
1230
1231 // A. set_index_from_list -- string form
1232 {
1233 pandas::DataFrame df;
1234 df.add_column<int64_t>("one", {1, 2, 3, 4});
1235 df.add_column<int64_t>("two", {18, 20, 20, 18});
1236 df.set_index_from_list({"a", "b", "c", "d"});
1237 if (df.nrows() != 4) {
1238 throw std::runtime_error("set_index_from_list(string): nrows changed");
1239 }
1240 if (df.index().get_value_str(0) != "a" || df.index().get_value_str(3) != "d") {
1241 throw std::runtime_error("set_index_from_list(string): values wrong");
1242 }
1243 }
1244
1245 // B. set_index_from_list -- int form (reproduces test 1184)
1246 {
set_index_from_list (pd_test_3_all.cpp:1236)
1226}
1227
1228void pd_test_3_all_dataframe_pandas_facade_methods() {
1229 std::cout << "========= DataFrame pandas-style facade methods =======";
1230
1231 // A. set_index_from_list -- string form
1232 {
1233 pandas::DataFrame df;
1234 df.add_column<int64_t>("one", {1, 2, 3, 4});
1235 df.add_column<int64_t>("two", {18, 20, 20, 18});
1236 df.set_index_from_list({"a", "b", "c", "d"});
1237 if (df.nrows() != 4) {
1238 throw std::runtime_error("set_index_from_list(string): nrows changed");
1239 }
1240 if (df.index().get_value_str(0) != "a" || df.index().get_value_str(3) != "d") {
1241 throw std::runtime_error("set_index_from_list(string): values wrong");
1242 }
1243 }
1244
1245 // B. set_index_from_list -- int form (reproduces test 1184)
1246 {
set_index_from_list (pd_test_3_all.cpp:1236)
1226}
1227
1228void pd_test_3_all_dataframe_pandas_facade_methods() {
1229 std::cout << "========= DataFrame pandas-style facade methods =======";
1230
1231 // A. set_index_from_list -- string form
1232 {
1233 pandas::DataFrame df;
1234 df.add_column<int64_t>("one", {1, 2, 3, 4});
1235 df.add_column<int64_t>("two", {18, 20, 20, 18});
1236 df.set_index_from_list({"a", "b", "c", "d"});
1237 if (df.nrows() != 4) {
1238 throw std::runtime_error("set_index_from_list(string): nrows changed");
1239 }
1240 if (df.index().get_value_str(0) != "a" || df.index().get_value_str(3) != "d") {
1241 throw std::runtime_error("set_index_from_list(string): values wrong");
1242 }
1243 }
1244
1245 // B. set_index_from_list -- int form (reproduces test 1184)
1246 {
set_index_from_strings (pd_test_3_all.cpp:1284)
1274 pandas::DataFrame df2;
1275 df2.add_column<int64_t>("val", {3, 4});
1276 df2.set_integer_index({2, 3});
1277 if (df2.index().size() != 2) {
1278 throw std::runtime_error("set_integer_index: size wrong");
1279 }
1280
1281 pandas::DataFrame df3;
1282 df3.add_column<int64_t>("val", {1, 2, 3, 4, 5});
1283 df3.set_index_from_strings({"a", "b", "c", "a", "b"});
1284 if (df3.index().get_value_str(4) != "b") {
1285 throw std::runtime_error("set_index_from_strings: values wrong");
1286 }
1287 }
1288
1289 // E. set_index_name + index_name() getter/setter
1290 {
1291 pandas::DataFrame df;
1292 df.add_column<int64_t>("val", {1, 2, 3});
1293 df.set_index_from_list({"r0", "r1", "r2"});
set_index_from_strings (pd_test_3_all.cpp:1284)
1274 pandas::DataFrame df2;
1275 df2.add_column<int64_t>("val", {3, 4});
1276 df2.set_integer_index({2, 3});
1277 if (df2.index().size() != 2) {
1278 throw std::runtime_error("set_integer_index: size wrong");
1279 }
1280
1281 pandas::DataFrame df3;
1282 df3.add_column<int64_t>("val", {1, 2, 3, 4, 5});
1283 df3.set_index_from_strings({"a", "b", "c", "a", "b"});
1284 if (df3.index().get_value_str(4) != "b") {
1285 throw std::runtime_error("set_index_from_strings: values wrong");
1286 }
1287 }
1288
1289 // E. set_index_name + index_name() getter/setter
1290 {
1291 pandas::DataFrame df;
1292 df.add_column<int64_t>("val", {1, 2, 3});
1293 df.set_index_from_list({"r0", "r1", "r2"});
set_index_name (pd_test_2_all.cpp:20842)
20832void test_sgb_apply_result_index_categorical() {
20833 std::cout << " -- test_sgb_apply_result_index_categorical --" << std::endl;
20834
20835 std::vector<numpy::float64> values = {5.0, 10.0};
20836 pandas::Series<std::string> by({"A", "B"});
20837 pandas::Series<numpy::float64> data(values);
20838
20839 auto sgb = data.groupby(by);
20840 sgb.set_categorical_categories({"A", "B", "C"});
20841 sgb.set_index_name("cat_key");
20842
20843 pandas::Series<numpy::float64> result(values);
20844 std::vector<std::string> idx_labels = {"A", "B"};
20845 result.set_index(std::make_unique<pandas::Index<std::string>>(idx_labels));
20846
20847 sgb.apply_result_index(result);
20848
20849 // Should have CategoricalIndex (dtype_name() returns "category")
20850 check(result.index().dtype_name() == "category", "is_categorical_index");
20851}
set_index_name (pd_test_2_all.cpp:20842)
20832void test_sgb_apply_result_index_categorical() {
20833 std::cout << " -- test_sgb_apply_result_index_categorical --" << std::endl;
20834
20835 std::vector<numpy::float64> values = {5.0, 10.0};
20836 pandas::Series<std::string> by({"A", "B"});
20837 pandas::Series<numpy::float64> data(values);
20838
20839 auto sgb = data.groupby(by);
20840 sgb.set_categorical_categories({"A", "B", "C"});
20841 sgb.set_index_name("cat_key");
20842
20843 pandas::Series<numpy::float64> result(values);
20844 std::vector<std::string> idx_labels = {"A", "B"};
20845 result.set_index(std::make_unique<pandas::Index<std::string>>(idx_labels));
20846
20847 sgb.apply_result_index(result);
20848
20849 // Should have CategoricalIndex (dtype_name() returns "category")
20850 check(result.index().dtype_name() == "category", "is_categorical_index");
20851}
swaplevel (pd_test_1_all.cpp:14461)
14451 void pd_test_multiindex_swaplevel() {
14452 std::cout << "========= swaplevel =================================== ";
14453
14454 std::vector<std::vector<std::string>> arrays = {
14455 {"a", "b"},
14456 {"x", "y"}
14457 };
14458 std::vector<std::optional<std::string>> names = {"first", "second"};
14459
14460 pandas::MultiIndex mi = pandas::MultiIndex::from_arrays<std::string>(arrays, names);
14461 pandas::MultiIndex swapped = mi.swaplevel(0, 1);
14462
14463 bool passed = true;
14464
14465 // Tuple should be reversed
14466 auto tup = swapped[0];
14467 if (tup[0] != "x" || tup[1] != "a") {
14468 std::cout << " [FAIL] : swapped tuple should be ('x', 'a')" << std::endl;
14469 passed = false;
14470 }
swaplevel (pd_test_1_all.cpp:14461)
14451 void pd_test_multiindex_swaplevel() {
14452 std::cout << "========= swaplevel =================================== ";
14453
14454 std::vector<std::vector<std::string>> arrays = {
14455 {"a", "b"},
14456 {"x", "y"}
14457 };
14458 std::vector<std::optional<std::string>> names = {"first", "second"};
14459
14460 pandas::MultiIndex mi = pandas::MultiIndex::from_arrays<std::string>(arrays, names);
14461 pandas::MultiIndex swapped = mi.swaplevel(0, 1);
14462
14463 bool passed = true;
14464
14465 // Tuple should be reversed
14466 auto tup = swapped[0];
14467 if (tup[0] != "x" || tup[1] != "a") {
14468 std::cout << " [FAIL] : swapped tuple should be ('x', 'a')" << std::endl;
14469 passed = false;
14470 }
swaplevel (pd_test_1_all.cpp:14461)
14451 void pd_test_multiindex_swaplevel() {
14452 std::cout << "========= swaplevel =================================== ";
14453
14454 std::vector<std::vector<std::string>> arrays = {
14455 {"a", "b"},
14456 {"x", "y"}
14457 };
14458 std::vector<std::optional<std::string>> names = {"first", "second"};
14459
14460 pandas::MultiIndex mi = pandas::MultiIndex::from_arrays<std::string>(arrays, names);
14461 pandas::MultiIndex swapped = mi.swaplevel(0, 1);
14462
14463 bool passed = true;
14464
14465 // Tuple should be reversed
14466 auto tup = swapped[0];
14467 if (tup[0] != "x" || tup[1] != "a") {
14468 std::cout << " [FAIL] : swapped tuple should be ('x', 'a')" << std::endl;
14469 passed = false;
14470 }
update (pd_test_1_all.cpp:13945)
13935 if (!result.has_column("C")) {
13936 passed = false;
13937 std::cout << " [FAIL] : in pd_test_joining_combine_first() : missing column C" << std::endl;
13938 throw std::runtime_error("pd_test_joining_combine_first failed: col C missing");
13939 }
13940
13941 std::cout << " -> tests passed" << std::endl;
13942 }
13943
13944 // =====================================================================
13945 // update() Tests
13946 // =====================================================================
13947
13948 void pd_test_joining_update() {
13949 std::cout << "========= update ======================================";
13950
13951 std::map<std::string, std::vector<double>> left_data = {
13952 {"A", {1.0, 2.0, 3.0}},
13953 {"B", {10.0, 20.0, 30.0}}
13954 };
13955 std::vector<std::string> left_idx = {"x", "y", "z"};
backfill (pd_test_3_all.cpp:2645)
2635void pd_test_3_all_df_backfill_pad() {
2636 std::cout << "========= DataFrame.backfill/pad() =======================";
2637
2638 std::map<std::string, std::vector<double>> data = {
2639 {"A", {1.0, std::nan(""), std::nan(""), 4.0}},
2640 {"B", {std::nan(""), 2.0, std::nan(""), 4.0}}
2641 };
2642 pandas::DataFrame df(data);
2643
2644 // Test backfill (should fill backward)
2645 pandas::DataFrame bfill_result = df.backfill(0);
2646 if (bfill_result.nrows() != 4 || bfill_result.ncols() != 2) {
2647 throw std::runtime_error("backfill shape failed");
2648 }
2649
2650 // Test pad (should fill forward)
2651 pandas::DataFrame pad_result = df.pad(0);
2652 if (pad_result.nrows() != 4 || pad_result.ncols() != 2) {
2653 throw std::runtime_error("pad shape failed");
2654 }
bfill (pd_test_1_all.cpp:23603)
23593 std::cout << "====================================== [OK] pd_test_equals test suite ========================== " << std::endl;
23594 return 0;
23595 }
23596
23597} // namespace dataframe_tests
23598// ------------------- pd_test_equals.cpp (end) -----------------------------
23599
23600// ------------------- pd_test_ffill_bfill.cpp (start) -----------------------------
23601// dataframe_tests/pd_test_ffill_bfill.cpp
23602// Test file for DataFrame.ffill() and DataFrame.bfill() methods
23603
23604#include <iostream>
23605#include <stdexcept>
23606#include <cmath>
23607#include <limits>
23608#include <map>
23609#include "../pandas/pd_dataframe.h"
23610
23611// CRITICAL: No using namespace directives
ffill (pd_test_1_all.cpp:23603)
23593 std::cout << "====================================== [OK] pd_test_equals test suite ========================== " << std::endl;
23594 return 0;
23595 }
23596
23597} // namespace dataframe_tests
23598// ------------------- pd_test_equals.cpp (end) -----------------------------
23599
23600// ------------------- pd_test_ffill_bfill.cpp (start) -----------------------------
23601// dataframe_tests/pd_test_ffill_bfill.cpp
23602// Test file for DataFrame.ffill() and DataFrame.bfill() methods
23603
23604#include <iostream>
23605#include <stdexcept>
23606#include <cmath>
23607#include <limits>
23608#include <map>
23609#include "../pandas/pd_dataframe.h"
23610
23611// CRITICAL: No using namespace directives
fillna (pd_test_1_all.cpp:537)
527 throw std::runtime_error("pd_test_categorical_array_na_handling failed: isna size != 4");
528 }
529
530 // Test dropna
531 pandas::CategoricalArray dropped = arr.dropna();
532 if (dropped.size() != 2) {
533 std::cout << " [FAIL] : in pd_test_categorical_array_na_handling() : dropna size != 2" << std::endl;
534 throw std::runtime_error("pd_test_categorical_array_na_handling failed: dropna size != 2");
535 }
536
537 // Test fillna (fill with existing category)
538 pandas::CategoricalArray filled = arr.fillna("a"); // 'a' is in categories
539 if (filled.has_na()) {
540 std::cout << " [FAIL] : in pd_test_categorical_array_na_handling() : fillna should have no NA" << std::endl;
541 throw std::runtime_error("pd_test_categorical_array_na_handling failed: fillna should have no NA");
542 }
543
544 std::cout << " -> tests passed" << std::endl;
545 }
546
547 void pd_test_categorical_array_add_categories() {
fillna (pd_test_1_all.cpp:537)
527 throw std::runtime_error("pd_test_categorical_array_na_handling failed: isna size != 4");
528 }
529
530 // Test dropna
531 pandas::CategoricalArray dropped = arr.dropna();
532 if (dropped.size() != 2) {
533 std::cout << " [FAIL] : in pd_test_categorical_array_na_handling() : dropna size != 2" << std::endl;
534 throw std::runtime_error("pd_test_categorical_array_na_handling failed: dropna size != 2");
535 }
536
537 // Test fillna (fill with existing category)
538 pandas::CategoricalArray filled = arr.fillna("a"); // 'a' is in categories
539 if (filled.has_na()) {
540 std::cout << " [FAIL] : in pd_test_categorical_array_na_handling() : fillna should have no NA" << std::endl;
541 throw std::runtime_error("pd_test_categorical_array_na_handling failed: fillna should have no NA");
542 }
543
544 std::cout << " -> tests passed" << std::endl;
545 }
546
547 void pd_test_categorical_array_add_categories() {
fillna (pd_test_1_all.cpp:537)
527 throw std::runtime_error("pd_test_categorical_array_na_handling failed: isna size != 4");
528 }
529
530 // Test dropna
531 pandas::CategoricalArray dropped = arr.dropna();
532 if (dropped.size() != 2) {
533 std::cout << " [FAIL] : in pd_test_categorical_array_na_handling() : dropna size != 2" << std::endl;
534 throw std::runtime_error("pd_test_categorical_array_na_handling failed: dropna size != 2");
535 }
536
537 // Test fillna (fill with existing category)
538 pandas::CategoricalArray filled = arr.fillna("a"); // 'a' is in categories
539 if (filled.has_na()) {
540 std::cout << " [FAIL] : in pd_test_categorical_array_na_handling() : fillna should have no NA" << std::endl;
541 throw std::runtime_error("pd_test_categorical_array_na_handling failed: fillna should have no NA");
542 }
543
544 std::cout << " -> tests passed" << std::endl;
545 }
546
547 void pd_test_categorical_array_add_categories() {
fillna (pd_test_1_all.cpp:537)
527 throw std::runtime_error("pd_test_categorical_array_na_handling failed: isna size != 4");
528 }
529
530 // Test dropna
531 pandas::CategoricalArray dropped = arr.dropna();
532 if (dropped.size() != 2) {
533 std::cout << " [FAIL] : in pd_test_categorical_array_na_handling() : dropna size != 2" << std::endl;
534 throw std::runtime_error("pd_test_categorical_array_na_handling failed: dropna size != 2");
535 }
536
537 // Test fillna (fill with existing category)
538 pandas::CategoricalArray filled = arr.fillna("a"); // 'a' is in categories
539 if (filled.has_na()) {
540 std::cout << " [FAIL] : in pd_test_categorical_array_na_handling() : fillna should have no NA" << std::endl;
541 throw std::runtime_error("pd_test_categorical_array_na_handling failed: fillna should have no NA");
542 }
543
544 std::cout << " -> tests passed" << std::endl;
545 }
546
547 void pd_test_categorical_array_add_categories() {
fillna (pd_test_1_all.cpp:537)
527 throw std::runtime_error("pd_test_categorical_array_na_handling failed: isna size != 4");
528 }
529
530 // Test dropna
531 pandas::CategoricalArray dropped = arr.dropna();
532 if (dropped.size() != 2) {
533 std::cout << " [FAIL] : in pd_test_categorical_array_na_handling() : dropna size != 2" << std::endl;
534 throw std::runtime_error("pd_test_categorical_array_na_handling failed: dropna size != 2");
535 }
536
537 // Test fillna (fill with existing category)
538 pandas::CategoricalArray filled = arr.fillna("a"); // 'a' is in categories
539 if (filled.has_na()) {
540 std::cout << " [FAIL] : in pd_test_categorical_array_na_handling() : fillna should have no NA" << std::endl;
541 throw std::runtime_error("pd_test_categorical_array_na_handling failed: fillna should have no NA");
542 }
543
544 std::cout << " -> tests passed" << std::endl;
545 }
546
547 void pd_test_categorical_array_add_categories() {
interpolate (pd_test_1_all.cpp:24365)
24355 std::cout << "====================================== [OK] pd_test_idxmax_idxmin test suite ========================== " << std::endl;
24356 return 0;
24357 }
24358
24359} // namespace dataframe_tests
24360// ------------------- pd_test_idxmax_idxmin.cpp (end) -----------------------------
24361
24362// ------------------- pd_test_interpolate.cpp (start) -----------------------------
24363// dataframe_tests/pd_test_interpolate.cpp
24364// Test file for DataFrame.interpolate() method
24365
24366#include <iostream>
24367#include <stdexcept>
24368#include <cmath>
24369#include <limits>
24370#include <map>
24371#include "../pandas/pd_dataframe.h"
24372
24373// CRITICAL: No using namespace directives
isna (pd_test_1_all.cpp:524)
514 throw std::runtime_error("pd_test_categorical_array_na_handling failed: has_na() should be true");
515 }
516
517 // Test count (non-NA)
518 if (arr.count() != 2) {
519 std::cout << " [FAIL] : in pd_test_categorical_array_na_handling() : count() != 2" << std::endl;
520 throw std::runtime_error("pd_test_categorical_array_na_handling failed: count() != 2");
521 }
522
523 // Test isna array
524 numpy::NDArray<numpy::bool_> na_mask = arr.isna();
525 if (na_mask.getSize() != 4) {
526 std::cout << " [FAIL] : in pd_test_categorical_array_na_handling() : isna size != 4" << std::endl;
527 throw std::runtime_error("pd_test_categorical_array_na_handling failed: isna size != 4");
528 }
529
530 // Test dropna
531 pandas::CategoricalArray dropped = arr.dropna();
532 if (dropped.size() != 2) {
533 std::cout << " [FAIL] : in pd_test_categorical_array_na_handling() : dropna size != 2" << std::endl;
534 throw std::runtime_error("pd_test_categorical_array_na_handling failed: dropna size != 2");
isna_frame (pd_test_3_all.cpp:10770)
10760 if (dtype_name.find("int") == std::string::npos) {
10761 std::cout << " [FAIL] : in pd_test_3_all_niche_residual_fixes() : Case 3 dtype" << std::endl;
10762 throw std::runtime_error("pd_test_3_all_niche_residual_fixes failed: Case 3 dtype");
10763 }
10764 if (r[static_cast<size_t>(0)] != 50) {
10765 std::cout << " [FAIL] : in pd_test_3_all_niche_residual_fixes() : Case 3 value" << std::endl;
10766 throw std::runtime_error("pd_test_3_all_niche_residual_fixes failed: Case 3 value");
10767 }
10768 }
10769
10770 // --- Case 4: isna_frame().to_string() ---
10771 {
10772 pandas::DataFrame df;
10773 df.add_column<double>("A", {1.0, std::nan(""), 3.0});
10774 df.add_column<double>("B", {std::nan(""), 2.0, std::nan("")});
10775 std::string s = df.isna_frame().to_string();
10776 if (s.empty()) {
10777 std::cout << " [FAIL] : in pd_test_3_all_niche_residual_fixes() : Case 4 empty" << std::endl;
10778 throw std::runtime_error("pd_test_3_all_niche_residual_fixes failed: Case 4 empty");
10779 }
10780 // Also verify existing NDArray-returning isna() still compiles
isnull (pd_test_3_all.cpp:671)
661// Category 5: Index Null Detection
662// ============================================================================
663
664void pd_test_3_all_index_null_detection() {
665 std::cout << "========= Index.isnull/notnull() =====================";
666
667 // Test with float index (can have NaN)
668 std::vector<double> vals = {1.0, std::nan(""), 3.0, std::nan("")};
669 pandas::Index<double> idx(vals);
670
671 numpy::NDArray<numpy::bool_> isnull_result = idx.isnull();
672 if (isnull_result.getSize() != 4) {
673 std::cout << " [FAIL] : in pd_test_3_all_index_null_detection() : isnull() size mismatch" << std::endl;
674 throw std::runtime_error("pd_test_3_all_index_null_detection failed: isnull() size");
675 }
676 // Index 0: 1.0 -> not null
677 if (isnull_result.getElementAt({0})) {
678 std::cout << " [FAIL] : in pd_test_3_all_index_null_detection() : index 0 should not be null" << std::endl;
679 throw std::runtime_error("pd_test_3_all_index_null_detection failed: index 0");
680 }
681 // Index 1: NaN -> null
isnull_frame (pd_test_5_all.cpp:34434)
34424 pandas_tests::check(row_mi_ok, "case2.notna_preserves_row_multiindex", local_fail);
34425 pandas_tests::check(lvl_vals_ok, "case2.notna_preserves_level_values", local_fail);
34426}
34427
34428void isnamultiindex_629108_case_3_isnull_alias_row_mi_only(int& local_fail) {
34429 const std::string tag = "[case3]";
34430 std::cout << "\n" << tag << " === isnull_frame alias: row MI only ===\n";
34431 bool row_mi_ok = false;
34432 try {
34433 auto df = mk_row_mi_only();
34434 auto out = df.isnull_frame();
34435 dump_df_mi_state(tag, "isnull", out);
34436 row_mi_ok = out.has_multiindex();
34437 } catch (const std::exception& e) {
34438 std::cout << tag << " exception: " << e.what() << "\n";
34439 }
34440 pandas_tests::check(row_mi_ok, "case3.isnull_preserves_row_multiindex", local_fail);
34441}
34442
34443void isnamultiindex_629108_case_4_notnull_alias_row_mi_only(int& local_fail) {
34444 const std::string tag = "[case4]";
notna (pd_test_1_all.cpp:6595)
6585 if (!na_mask.getElementAt({2, 1})) {
6586 std::cout << " [FAIL] : in pd_test_dataframe_manipulation() : isna at (2,1) should be true" << std::endl;
6587 throw std::runtime_error("pd_test_dataframe_manipulation failed: isna at (2,1)");
6588 }
6589 // Row 0, col 0 should NOT be NA
6590 if (na_mask.getElementAt({0, 0})) {
6591 std::cout << " [FAIL] : in pd_test_dataframe_manipulation() : isna at (0,0) should be false" << std::endl;
6592 throw std::runtime_error("pd_test_dataframe_manipulation failed: isna at (0,0)");
6593 }
6594
6595 auto notna_mask = df_na.notna();
6596 if (notna_mask.getElementAt({1, 0})) {
6597 std::cout << " [FAIL] : in pd_test_dataframe_manipulation() : notna at (1,0) should be false" << std::endl;
6598 throw std::runtime_error("pd_test_dataframe_manipulation failed: notna at (1,0)");
6599 }
6600 }
6601
6602 // Test fillna
6603 {
6604 std::map<std::string, std::vector<numpy::float64>> float_data;
6605 float_data["X"] = {1.0, std::nan(""), 3.0};
notna_frame (pd_test_5_all.cpp:34411)
34401 pandas_tests::check(lvl_vals_ok, "case1.isna_preserves_level_values", local_fail);
34402}
34403
34404void isnamultiindex_629108_case_2_notna_row_mi_only(int& local_fail) {
34405 const std::string tag = "[case2]";
34406 std::cout << "\n" << tag << " === notna_frame: row MI only ===\n";
34407 bool row_mi_ok = false;
34408 bool lvl_vals_ok = false;
34409 try {
34410 auto df = mk_row_mi_only();
34411 auto out = df.notna_frame();
34412 dump_df_mi_state(tag, "src", df);
34413 dump_df_mi_state(tag, "notna", out);
34414 row_mi_ok = out.has_multiindex();
34415 if (row_mi_ok) {
34416 const auto& lv0 = out.multiindex().get_level_values_str(0);
34417 const auto& lv1 = out.multiindex().get_level_values_str(1);
34418 lvl_vals_ok = (lv0.size() == 3 && lv0[0] == "A" && lv0[1] == "A" && lv0[2] == "A"
34419 && lv1.size() == 3 && lv1[0] == "B" && lv1[1] == "C" && lv1[2] == "D");
34420 }
34421 } catch (const std::exception& e) {
notnull (pd_test_3_all.cpp:665)
655 }
656
657 std::cout << " -> tests passed" << std::endl;
658}
659
660// ============================================================================
661// Category 5: Index Null Detection
662// ============================================================================
663
664void pd_test_3_all_index_null_detection() {
665 std::cout << "========= Index.isnull/notnull() =====================";
666
667 // Test with float index (can have NaN)
668 std::vector<double> vals = {1.0, std::nan(""), 3.0, std::nan("")};
669 pandas::Index<double> idx(vals);
670
671 numpy::NDArray<numpy::bool_> isnull_result = idx.isnull();
672 if (isnull_result.getSize() != 4) {
673 std::cout << " [FAIL] : in pd_test_3_all_index_null_detection() : isnull() size mismatch" << std::endl;
674 throw std::runtime_error("pd_test_3_all_index_null_detection failed: isnull() size");
675 }
notnull_frame (pd_test_5_all.cpp:34449)
34439 }
34440 pandas_tests::check(row_mi_ok, "case3.isnull_preserves_row_multiindex", local_fail);
34441}
34442
34443void isnamultiindex_629108_case_4_notnull_alias_row_mi_only(int& local_fail) {
34444 const std::string tag = "[case4]";
34445 std::cout << "\n" << tag << " === notnull_frame alias: row MI only ===\n";
34446 bool row_mi_ok = false;
34447 try {
34448 auto df = mk_row_mi_only();
34449 auto out = df.notnull_frame();
34450 dump_df_mi_state(tag, "notnull", out);
34451 row_mi_ok = out.has_multiindex();
34452 } catch (const std::exception& e) {
34453 std::cout << tag << " exception: " << e.what() << "\n";
34454 }
34455 pandas_tests::check(row_mi_ok, "case4.notnull_preserves_row_multiindex", local_fail);
34456}
34457
34458void isnamultiindex_629108_case_5_isna_row_mi_and_col_mi(int& local_fail) {
34459 const std::string tag = "[case5]";
pad (pd_test_3_all.cpp:1771)
1761 if (result_single.nrows() != 3 || result_single.ncols() != 1) {
1762 std::cout << " [FAIL] : in pd_test_3_all_dataframe_unstack() : single col shape mismatch" << std::endl;
1763 throw std::runtime_error("pd_test_3_all_dataframe_unstack failed: single col shape");
1764 }
1765
1766 std::cout << " -> tests passed" << std::endl;
1767}
1768
1769void pd_test_3_all_fbbuilder_pad() {
1770 std::cout << "========= FBBuilder.pad() (internal) =================";
1771
1772 // Note: FBBuilder.pad() is an internal method for FlatBuffer serialization
1773 // It's not the pandas DataFrame.pad() method (which is ffill alias)
1774 // This test verifies the to_feather() serialization works, which uses FBBuilder.pad()
1775
1776 std::map<std::string, std::vector<double>> data = {
1777 {"A", {1.0, 2.0, 3.0}},
1778 {"B", {4.0, 5.0, 6.0}}
1779 };
1780 pandas::DataFrame df(data);
count (pd_test_1_all.cpp:66)
56 if (arr.is_na(0)) {
57 std::cout << " [FAIL] : in pd_test_boolean_array_na_handling() : is_na(0) should be false" << std::endl;
58 throw std::runtime_error("pd_test_boolean_array_na_handling failed: is_na(0) should be false");
59 }
60
61 if (!arr.has_na()) {
62 std::cout << " [FAIL] : in pd_test_boolean_array_na_handling() : has_na() should be true" << std::endl;
63 throw std::runtime_error("pd_test_boolean_array_na_handling failed: has_na() should be true");
64 }
65
66 if (arr.count() != 2) {
67 std::cout << " [FAIL] : in pd_test_boolean_array_na_handling() : count() should be 2" << std::endl;
68 throw std::runtime_error("pd_test_boolean_array_na_handling failed: count() should be 2");
69 }
70
71 std::cout << " -> tests passed" << std::endl;
72 }
73
74 void pd_test_boolean_array_kleene_and() {
75 std::cout << "========= BooleanArray: Kleene AND ======================= ";
count (pd_test_1_all.cpp:66)
56 if (arr.is_na(0)) {
57 std::cout << " [FAIL] : in pd_test_boolean_array_na_handling() : is_na(0) should be false" << std::endl;
58 throw std::runtime_error("pd_test_boolean_array_na_handling failed: is_na(0) should be false");
59 }
60
61 if (!arr.has_na()) {
62 std::cout << " [FAIL] : in pd_test_boolean_array_na_handling() : has_na() should be true" << std::endl;
63 throw std::runtime_error("pd_test_boolean_array_na_handling failed: has_na() should be true");
64 }
65
66 if (arr.count() != 2) {
67 std::cout << " [FAIL] : in pd_test_boolean_array_na_handling() : count() should be 2" << std::endl;
68 throw std::runtime_error("pd_test_boolean_array_na_handling failed: count() should be 2");
69 }
70
71 std::cout << " -> tests passed" << std::endl;
72 }
73
74 void pd_test_boolean_array_kleene_and() {
75 std::cout << "========= BooleanArray: Kleene AND ======================= ";
count_cols (pd_test_1_all.cpp:6386)
6376 }
6377
6378 // Test max
6379 auto maxs = df.max_cols();
6380 if (std::abs(maxs[0] - 5.0) > 1e-10) {
6381 std::cout << " [FAIL] : in pd_test_dataframe_aggregations() : max A != 5" << std::endl;
6382 throw std::runtime_error("pd_test_dataframe_aggregations failed: max A != 5");
6383 }
6384
6385 // Test count
6386 auto counts = df.count_cols();
6387 if (counts[0] != 5) {
6388 std::cout << " [FAIL] : in pd_test_dataframe_aggregations() : count A != 5" << std::endl;
6389 throw std::runtime_error("pd_test_dataframe_aggregations failed: count A != 5");
6390 }
6391
6392 std::cout << " -> tests passed" << std::endl;
6393 }
6394
6395 // =====================================================================
6396 // Test: Sorting
cummax (pd_test_1_all.cpp:5152)
5142 // cummin: [1, 1, 1, 1]
5143 auto cmin = df.cummin();
5144 val = cmin["A"].get_value_str(3);
5145 passed = std::abs(std::stod(val) - 1.0) < 0.001;
5146 if (!passed) {
5147 std::cout << " [FAIL] : in pd_test_arithmetic_dataframe_cumulative() : cummin failed" << std::endl;
5148 throw std::runtime_error("pd_test_arithmetic_dataframe_cumulative failed: cummin failed");
5149 }
5150
5151 // cummax: [1, 2, 3, 4]
5152 auto cmax = df.cummax();
5153 val = cmax["A"].get_value_str(2);
5154 passed = std::abs(std::stod(val) - 3.0) < 0.001;
5155 if (!passed) {
5156 std::cout << " [FAIL] : in pd_test_arithmetic_dataframe_cumulative() : cummax failed" << std::endl;
5157 throw std::runtime_error("pd_test_arithmetic_dataframe_cumulative failed: cummax failed");
5158 }
5159
5160 std::cout << " -> tests passed" << std::endl;
5161 }
cummin (pd_test_1_all.cpp:5143)
5133 // cumprod: [1, 2, 6, 24]
5134 auto cp = df.cumprod();
5135 val = cp["A"].get_value_str(3);
5136 passed = std::abs(std::stod(val) - 24.0) < 0.001;
5137 if (!passed) {
5138 std::cout << " [FAIL] : in pd_test_arithmetic_dataframe_cumulative() : cumprod failed" << std::endl;
5139 throw std::runtime_error("pd_test_arithmetic_dataframe_cumulative failed: cumprod failed");
5140 }
5141
5142 // cummin: [1, 1, 1, 1]
5143 auto cmin = df.cummin();
5144 val = cmin["A"].get_value_str(3);
5145 passed = std::abs(std::stod(val) - 1.0) < 0.001;
5146 if (!passed) {
5147 std::cout << " [FAIL] : in pd_test_arithmetic_dataframe_cumulative() : cummin failed" << std::endl;
5148 throw std::runtime_error("pd_test_arithmetic_dataframe_cumulative failed: cummin failed");
5149 }
5150
5151 // cummax: [1, 2, 3, 4]
5152 auto cmax = df.cummax();
5153 val = cmax["A"].get_value_str(2);
cumprod (pd_test_1_all.cpp:5134)
5124 // cumsum: [1, 3, 6, 10]
5125 auto cs = df.cumsum();
5126 std::string val = cs["A"].get_value_str(2);
5127 bool passed = std::abs(std::stod(val) - 6.0) < 0.001;
5128 if (!passed) {
5129 std::cout << " [FAIL] : in pd_test_arithmetic_dataframe_cumulative() : cumsum failed" << std::endl;
5130 throw std::runtime_error("pd_test_arithmetic_dataframe_cumulative failed: cumsum failed");
5131 }
5132
5133 // cumprod: [1, 2, 6, 24]
5134 auto cp = df.cumprod();
5135 val = cp["A"].get_value_str(3);
5136 passed = std::abs(std::stod(val) - 24.0) < 0.001;
5137 if (!passed) {
5138 std::cout << " [FAIL] : in pd_test_arithmetic_dataframe_cumulative() : cumprod failed" << std::endl;
5139 throw std::runtime_error("pd_test_arithmetic_dataframe_cumulative failed: cumprod failed");
5140 }
5141
5142 // cummin: [1, 1, 1, 1]
5143 auto cmin = df.cummin();
5144 val = cmin["A"].get_value_str(3);
cumsum (pd_test_1_all.cpp:5125)
5115 }
5116
5117 void pd_test_arithmetic_dataframe_cumulative() {
5118 std::cout << "========= DataFrame cumulative ==================";
5119
5120 std::map<std::string, std::vector<double>> data;
5121 data["A"] = {1.0, 2.0, 3.0, 4.0};
5122 pandas::DataFrame df(data);
5123
5124 // cumsum: [1, 3, 6, 10]
5125 auto cs = df.cumsum();
5126 std::string val = cs["A"].get_value_str(2);
5127 bool passed = std::abs(std::stod(val) - 6.0) < 0.001;
5128 if (!passed) {
5129 std::cout << " [FAIL] : in pd_test_arithmetic_dataframe_cumulative() : cumsum failed" << std::endl;
5130 throw std::runtime_error("pd_test_arithmetic_dataframe_cumulative failed: cumsum failed");
5131 }
5132
5133 // cumprod: [1, 2, 6, 24]
5134 auto cp = df.cumprod();
5135 val = cp["A"].get_value_str(3);
describe (pd_test_2_all.cpp:19793)
19783 ++g_fail;
19784 }
19785}
19786
19787static bool approx_eq(double a, double b, double tol = 1e-9) {
19788 if (std::isnan(a) && std::isnan(b)) return true;
19789 return std::abs(a - b) < tol;
19790}
19791
19792// =====================================================================
19793// Test: describe() default mode — numeric columns only
19794// =====================================================================
19795
19796void pd_test_describe_numeric_only() {
19797 std::cout << " -- pd_test_describe_numeric_only --" << std::endl;
19798
19799 pandas::DataFrame df;
19800 df.add_column("A", std::vector<double>{1.0, 2.0, 3.0, 4.0, 5.0});
19801 df.add_column("B", std::vector<double>{10.0, 20.0, 30.0, 40.0, 50.0});
19802 df.add_column("Name", std::vector<std::string>{"a", "b", "c", "d", "e"});
describe_full (pd_test_2_all.cpp:19758)
19748 << (dataframe_tests_broadcasting::g_fail == 0 ? "OK" : "FAIL")
19749 << "] pd_test_broadcasting test suite ==========================" << std::endl;
19750
19751 return dataframe_tests_broadcasting::g_fail;
19752}
19753
19754} // namespace dataframe_tests
19755// ------------------- pd_test_broadcasting.cpp (end) -----------------------------
19756
19757// ------------------- pd_test_describe.cpp (start) -----------------------------
19758// pd_test_describe.cpp - Tests for describe_full() migration
19759// StringColumnStats, compute_string_column_stats(), describe_full() modes
19760
19761#include <iostream>
19762#include <string>
19763#include <vector>
19764#include <cmath>
19765
19766#include "../pandas/pd_dataframe.h"
19767#include "../pandas/pd_series.h"
19768#include "../pandas/pd_index.h"
kurt (pd_test_1_all.cpp:4599)
4589 std::cout << "========= Series skew/kurt ======================";
4590
4591 pandas::Series<double> s({1.0, 2.0, 2.0, 3.0, 9.0});
4592 auto skew_val = s.skew();
4593 bool passed = skew_val.has_value() && *skew_val > 0; // Should be right-skewed
4594 if (!passed) {
4595 std::cout << " [FAIL] : in pd_test_aggregation_series_skew_kurt() : skew should be positive" << std::endl;
4596 throw std::runtime_error("pd_test_aggregation_series_skew_kurt failed: skew should be positive");
4597 }
4598
4599 auto kurt_val = s.kurt();
4600 passed = kurt_val.has_value();
4601 if (!passed) {
4602 std::cout << " [FAIL] : in pd_test_aggregation_series_skew_kurt() : kurt should have value" << std::endl;
4603 throw std::runtime_error("pd_test_aggregation_series_skew_kurt failed: kurt should have value");
4604 }
4605
4606 // Test kurtosis alias
4607 auto kurt_alias = s.kurtosis();
4608 passed = kurt_alias.has_value() && std::abs(*kurt_alias - *kurt_val) < 0.0001;
4609 if (!passed) {
kurt_cols (pd_test_1_all.cpp:4786)
4776 throw std::runtime_error("pd_test_aggregation_dataframe_skew_kurt_cols failed: skew_cols should return 1 value");
4777 }
4778
4779 // Skew should be positive for right-skewed data
4780 passed = skew[0] > 0;
4781 if (!passed) {
4782 std::cout << " [FAIL] : in pd_test_aggregation_dataframe_skew_kurt_cols() : skew should be positive" << std::endl;
4783 throw std::runtime_error("pd_test_aggregation_dataframe_skew_kurt_cols failed: skew should be positive");
4784 }
4785
4786 auto kurt = df.kurt_cols();
4787 passed = kurt.size() == 1 && !std::isnan(kurt[0]);
4788 if (!passed) {
4789 std::cout << " [FAIL] : in pd_test_aggregation_dataframe_skew_kurt_cols() : kurt_cols should return valid value" << std::endl;
4790 throw std::runtime_error("pd_test_aggregation_dataframe_skew_kurt_cols failed: kurt_cols should return valid value");
4791 }
4792
4793 std::cout << " -> tests passed" << std::endl;
4794 }
4795
4796 } // namespace dataframe_tests_aggregation
kurtosis (pd_test_1_all.cpp:4607)
4597 }
4598
4599 auto kurt_val = s.kurt();
4600 passed = kurt_val.has_value();
4601 if (!passed) {
4602 std::cout << " [FAIL] : in pd_test_aggregation_series_skew_kurt() : kurt should have value" << std::endl;
4603 throw std::runtime_error("pd_test_aggregation_series_skew_kurt failed: kurt should have value");
4604 }
4605
4606 // Test kurtosis alias
4607 auto kurt_alias = s.kurtosis();
4608 passed = kurt_alias.has_value() && std::abs(*kurt_alias - *kurt_val) < 0.0001;
4609 if (!passed) {
4610 std::cout << " [FAIL] : in pd_test_aggregation_series_skew_kurt() : kurtosis alias failed" << std::endl;
4611 throw std::runtime_error("pd_test_aggregation_series_skew_kurt failed: kurtosis alias failed");
4612 }
4613
4614 std::cout << " -> tests passed" << std::endl;
4615 }
4616
4617 void pd_test_aggregation_series_pct_change() {
max (pd_test_1_all.cpp:771)
761 pandas::CategoricalArray arr = pandas::CategoricalArray::from_codes(codes, cats, true); // ordered
762
763 // Test min
764 std::optional<std::string> min_val = arr.min();
765 if (!min_val.has_value() || *min_val != "low") {
766 std::cout << " [FAIL] : in pd_test_categorical_array_ordered_operations() : min != 'low'" << std::endl;
767 throw std::runtime_error("pd_test_categorical_array_ordered_operations failed: min != 'low'");
768 }
769
770 // Test max
771 std::optional<std::string> max_val = arr.max();
772 if (!max_val.has_value() || *max_val != "high") {
773 std::cout << " [FAIL] : in pd_test_categorical_array_ordered_operations() : max != 'high'" << std::endl;
774 throw std::runtime_error("pd_test_categorical_array_ordered_operations failed: max != 'high'");
775 }
776
777 // Test unordered throws for min/max
778 pandas::CategoricalArray unordered = arr.as_unordered();
779 bool threw = false;
780 try {
781 unordered.min();
max_cols (pd_test_1_all.cpp:6379)
6369 }
6370
6371 // Test min
6372 auto mins = df.min_cols();
6373 if (std::abs(mins[0] - 1.0) > 1e-10) {
6374 std::cout << " [FAIL] : in pd_test_dataframe_aggregations() : min A != 1" << std::endl;
6375 throw std::runtime_error("pd_test_dataframe_aggregations failed: min A != 1");
6376 }
6377
6378 // Test max
6379 auto maxs = df.max_cols();
6380 if (std::abs(maxs[0] - 5.0) > 1e-10) {
6381 std::cout << " [FAIL] : in pd_test_dataframe_aggregations() : max A != 5" << std::endl;
6382 throw std::runtime_error("pd_test_dataframe_aggregations failed: max A != 5");
6383 }
6384
6385 // Test count
6386 auto counts = df.count_cols();
6387 if (counts[0] != 5) {
6388 std::cout << " [FAIL] : in pd_test_dataframe_aggregations() : count A != 5" << std::endl;
6389 throw std::runtime_error("pd_test_dataframe_aggregations failed: count A != 5");
mean (pd_test_1_all.cpp:282)
272 std::optional<bool>(true),
273 std::optional<bool>(true)
274 });
275
276 auto s = arr.sum();
277 if (!s.has_value() || s.value() != 3) {
278 std::cout << " [FAIL] : in pd_test_boolean_array_reductions() : sum should be 3" << std::endl;
279 throw std::runtime_error("pd_test_boolean_array_reductions failed: sum");
280 }
281
282 auto m = arr.mean();
283 if (!m.has_value() || std::abs(m.value() - 0.75) > 0.001) {
284 std::cout << " [FAIL] : in pd_test_boolean_array_reductions() : mean should be 0.75" << std::endl;
285 throw std::runtime_error("pd_test_boolean_array_reductions failed: mean");
286 }
287
288 std::cout << " -> tests passed" << std::endl;
289 }
290
291 void pd_test_boolean_array_dtype() {
292 std::cout << "========= BooleanArray: dtype ======================= ";
mean_cols (pd_test_1_all.cpp:6364)
6354 if (std::abs(sum_a - 15.0) > 1e-10) {
6355 std::cout << " [FAIL] : in pd_test_dataframe_aggregations() : sum A != 15" << std::endl;
6356 throw std::runtime_error("pd_test_dataframe_aggregations failed: sum A != 15");
6357 }
6358 if (std::abs(sum_b - 150.0) > 1e-10) {
6359 std::cout << " [FAIL] : in pd_test_dataframe_aggregations() : sum B != 150" << std::endl;
6360 throw std::runtime_error("pd_test_dataframe_aggregations failed: sum B != 150");
6361 }
6362
6363 // Test mean
6364 auto means = df.mean_cols();
6365 double mean_a = means[0];
6366 if (std::abs(mean_a - 3.0) > 1e-10) {
6367 std::cout << " [FAIL] : in pd_test_dataframe_aggregations() : mean A != 3" << std::endl;
6368 throw std::runtime_error("pd_test_dataframe_aggregations failed: mean A != 3");
6369 }
6370
6371 // Test min
6372 auto mins = df.min_cols();
6373 if (std::abs(mins[0] - 1.0) > 1e-10) {
6374 std::cout << " [FAIL] : in pd_test_dataframe_aggregations() : min A != 1" << std::endl;
median (pd_test_1_all.cpp:20910)
20900 throw std::runtime_error("pd_test_expanding_var failed: expanding var values incorrect");
20901 }
20902
20903 std::cout << " -> tests passed" << std::endl;
20904 }
20905
20906 void pd_test_expanding_median() {
20907 std::cout << "========= Expanding median ======================";
20908
20909 pandas::Series<double> s({1.0, 2.0, 3.0, 4.0, 5.0});
20910 auto result = s.expanding().median();
20911
20912 // Expanding median: 1, 1.5, 2, 2.5, 3
20913 bool passed = std::abs(result[0] - 1.0) < 0.001 &&
20914 std::abs(result[1] - 1.5) < 0.001 &&
20915 std::abs(result[2] - 2.0) < 0.001 &&
20916 std::abs(result[3] - 2.5) < 0.001 &&
20917 std::abs(result[4] - 3.0) < 0.001;
20918 if (!passed) {
20919 std::cout << " [FAIL] : in pd_test_expanding_median() : expanding median values incorrect" << std::endl;
20920 throw std::runtime_error("pd_test_expanding_median failed: expanding median values incorrect");
median_cols (pd_test_1_all.cpp:24860)
24850 std::cout << "====================================== [OK] pd_test_interpolate test suite ========================== " << std::endl;
24851 return 0;
24852 }
24853
24854} // namespace dataframe_tests
24855// ------------------- pd_test_interpolate.cpp (end) -----------------------------
24856
24857// ------------------- pd_test_median.cpp (start) -----------------------------
24858// dataframe_tests/pd_test_median.cpp
24859// Tests for DataFrame.median() and DataFrame.median_cols() methods
24860
24861#include <iostream>
24862#include <stdexcept>
24863#include <cmath>
24864#include <limits>
24865#include "../pandas/pd_dataframe.h"
24866
24867// CRITICAL: No using namespace directives
24868
24869namespace dataframe_tests {
min (pd_test_1_all.cpp:764)
754 }
755
756 void pd_test_categorical_array_ordered_operations() {
757 std::cout << "========= CategoricalArray: ordered operations (min/max) ======================= ";
758
759 std::vector<std::string> cats = {"low", "medium", "high"};
760 std::vector<numpy::int32> codes = {0, 2, 1, 0, -1}; // low, high, medium, low, NA
761 pandas::CategoricalArray arr = pandas::CategoricalArray::from_codes(codes, cats, true); // ordered
762
763 // Test min
764 std::optional<std::string> min_val = arr.min();
765 if (!min_val.has_value() || *min_val != "low") {
766 std::cout << " [FAIL] : in pd_test_categorical_array_ordered_operations() : min != 'low'" << std::endl;
767 throw std::runtime_error("pd_test_categorical_array_ordered_operations failed: min != 'low'");
768 }
769
770 // Test max
771 std::optional<std::string> max_val = arr.max();
772 if (!max_val.has_value() || *max_val != "high") {
773 std::cout << " [FAIL] : in pd_test_categorical_array_ordered_operations() : max != 'high'" << std::endl;
774 throw std::runtime_error("pd_test_categorical_array_ordered_operations failed: max != 'high'");
min_cols (pd_test_1_all.cpp:6372)
6362 // Test mean
6363 auto means = df.mean_cols();
6364 double mean_a = means[0];
6365 if (std::abs(mean_a - 3.0) > 1e-10) {
6366 std::cout << " [FAIL] : in pd_test_dataframe_aggregations() : mean A != 3" << std::endl;
6367 throw std::runtime_error("pd_test_dataframe_aggregations failed: mean A != 3");
6368 }
6369
6370 // Test min
6371 auto mins = df.min_cols();
6372 if (std::abs(mins[0] - 1.0) > 1e-10) {
6373 std::cout << " [FAIL] : in pd_test_dataframe_aggregations() : min A != 1" << std::endl;
6374 throw std::runtime_error("pd_test_dataframe_aggregations failed: min A != 1");
6375 }
6376
6377 // Test max
6378 auto maxs = df.max_cols();
6379 if (std::abs(maxs[0] - 5.0) > 1e-10) {
6380 std::cout << " [FAIL] : in pd_test_dataframe_aggregations() : max A != 5" << std::endl;
6381 throw std::runtime_error("pd_test_dataframe_aggregations failed: max A != 5");
mode (pd_test_1_all.cpp:4569)
4559 throw std::runtime_error("pd_test_aggregation_series_quantile failed: quantile(1) should be 5.0");
4560 }
4561
4562 std::cout << " -> tests passed" << std::endl;
4563 }
4564
4565 void pd_test_aggregation_series_mode() {
4566 std::cout << "========= Series mode ===========================";
4567
4568 pandas::Series<int> s({1, 2, 2, 3, 3, 3});
4569 auto m = s.mode();
4570 bool passed = m.size() == 1 && m[0] == 3;
4571 if (!passed) {
4572 std::cout << " [FAIL] : in pd_test_aggregation_series_mode() : mode should be 3" << std::endl;
4573 throw std::runtime_error("pd_test_aggregation_series_mode failed: mode should be 3");
4574 }
4575
4576 // Test multi-mode
4577 pandas::Series<int> s2({1, 1, 2, 2});
4578 auto m2 = s2.mode();
4579 passed = m2.size() == 2; // Both 1 and 2 are modes
nunique (pd_test_1_all.cpp:10604)
10594 std::cout << " -> tests passed" << std::endl;
10595}
10596
10597void pd_test_extension_index_nunique() {
10598 std::cout << "========= nunique =========================";
10599
10600 pandas::CategoricalArray arr({"a", "b", "a", "c", "b", std::nullopt});
10601 pandas::CategoricalIndex idx(arr);
10602
10603 bool passed = (idx.nunique(true) == 3 && idx.nunique(false) == 4);
10604 if (!passed) {
10605 std::cout << " [FAIL] : in pd_test_extension_index_nunique() : nunique check failed" << std::endl;
10606 throw std::runtime_error("pd_test_extension_index_nunique failed");
10607 }
10608
10609 std::cout << " -> tests passed" << std::endl;
10610}
10611
10612void pd_test_extension_index_factorize() {
10613 std::cout << "========= factorize =========================";
nunique_cols (pd_test_1_all.cpp:25375)
25365 std::cout << "====================================== [OK] pd_test_mode test suite ========================== " << std::endl;
25366 return 0;
25367 }
25368
25369} // namespace dataframe_tests
25370// ------------------- pd_test_mode.cpp (end) -----------------------------
25371
25372// ------------------- pd_test_nunique.cpp (start) -----------------------------
25373// dataframe_tests/pd_test_nunique.cpp
25374// Tests for DataFrame.nunique() and DataFrame.nunique_cols() methods
25375
25376#include <iostream>
25377#include <stdexcept>
25378#include <cmath>
25379#include <limits>
25380#include "../pandas/pd_dataframe.h"
25381
25382// CRITICAL: No using namespace directives
25383
25384namespace dataframe_tests {
prod (pd_test_1_all.cpp:26082)
26072 std::cout << "====================================== [OK] pd_test_pivot_table test suite ========================== " << std::endl;
26073 return 0;
26074 }
26075
26076} // namespace dataframe_tests
26077// ------------------- pd_test_pivot_table.cpp (end) -----------------------------
26078
26079// ------------------- pd_test_prod.cpp (start) -----------------------------
26080// dataframe_tests/pd_test_prod.cpp
26081// Tests for DataFrame.prod() and DataFrame.prod_cols() methods
26082
26083#include <iostream>
26084#include <stdexcept>
26085#include <cmath>
26086#include <limits>
26087#include "../pandas/pd_dataframe.h"
26088
26089// CRITICAL: No using namespace directives
26090
26091namespace dataframe_tests {
prod_cols (pd_test_1_all.cpp:26082)
26072 std::cout << "====================================== [OK] pd_test_pivot_table test suite ========================== " << std::endl;
26073 return 0;
26074 }
26075
26076} // namespace dataframe_tests
26077// ------------------- pd_test_pivot_table.cpp (end) -----------------------------
26078
26079// ------------------- pd_test_prod.cpp (start) -----------------------------
26080// dataframe_tests/pd_test_prod.cpp
26081// Tests for DataFrame.prod() and DataFrame.prod_cols() methods
26082
26083#include <iostream>
26084#include <stdexcept>
26085#include <cmath>
26086#include <limits>
26087#include "../pandas/pd_dataframe.h"
26088
26089// CRITICAL: No using namespace directives
26090
26091namespace dataframe_tests {
product (pd_test_3_all.cpp:2584)
2574 // Test quantile along rows
2575 pandas::Series<numpy::float64> q50_rows = df.quantile(0.5, 1);
2576 if (q50_rows.size() != 5) {
2577 throw std::runtime_error("quantile(0.5, axis=1) failed");
2578 }
2579
2580 std::cout << " -> tests passed" << std::endl;
2581}
2582
2583void pd_test_3_all_df_product() {
2584 std::cout << "========= DataFrame.product(axis) ========================";
2585
2586 std::map<std::string, std::vector<double>> data = {
2587 {"A", {1.0, 2.0, 3.0}},
2588 {"B", {4.0, 5.0, 6.0}}
2589 };
2590 pandas::DataFrame df(data);
2591
2592 // Test product along columns
2593 pandas::Series<numpy::float64> prod_cols = df.product(0);
2594 if (prod_cols.size() != 2 || std::abs(prod_cols[static_cast<size_t>(0)] - 6.0) > 0.001 ||
quantile (pd_test_1_all.cpp:4540)
4530 throw std::runtime_error("pd_test_aggregation_series_sem failed: sem value incorrect");
4531 }
4532
4533 std::cout << " -> tests passed" << std::endl;
4534 }
4535
4536 void pd_test_aggregation_series_quantile() {
4537 std::cout << "========= Series quantile =======================";
4538
4539 pandas::Series<double> s({1.0, 2.0, 3.0, 4.0, 5.0});
4540 auto q50 = s.quantile(0.5);
4541 bool passed = q50.has_value() && std::abs(*q50 - 3.0) < 0.001;
4542 if (!passed) {
4543 std::cout << " [FAIL] : in pd_test_aggregation_series_quantile() : quantile(0.5) should be 3.0" << std::endl;
4544 throw std::runtime_error("pd_test_aggregation_series_quantile failed: quantile(0.5) should be 3.0");
4545 }
4546
4547 // Test q=0 and q=1
4548 auto q0 = s.quantile(0.0);
4549 passed = q0.has_value() && std::abs(*q0 - 1.0) < 0.001;
4550 if (!passed) {
quantile_cols (pd_test_1_all.cpp:4753)
4743 }
4744
4745 void pd_test_aggregation_dataframe_quantile_cols() {
4746 std::cout << "========= DataFrame quantile_cols ===============";
4747
4748 std::map<std::string, std::vector<double>> data;
4749 data["A"] = {1.0, 2.0, 3.0, 4.0, 5.0};
4750 data["B"] = {10.0, 20.0, 30.0, 40.0, 50.0};
4751 pandas::DataFrame df(data);
4752
4753 auto q50 = df.quantile_cols(0.5);
4754
4755 // Check A median = 3.0
4756 bool passed = std::abs(q50[0] - 3.0) < 0.001 || std::abs(q50[1] - 3.0) < 0.001;
4757 if (!passed) {
4758 std::cout << " [FAIL] : in pd_test_aggregation_dataframe_quantile_cols() : A median should be 3.0" << std::endl;
4759 throw std::runtime_error("pd_test_aggregation_dataframe_quantile_cols failed: A median should be 3.0");
4760 }
4761
4762 std::cout << " -> tests passed" << std::endl;
4763 }
sem (pd_test_1_all.cpp:4525)
4515#include "../pandas/pd_dataframe.h"
4516#include "../pandas/pd_series.h"
4517
4518namespace dataframe_tests {
4519 namespace dataframe_tests_aggregation {
4520
4521 void pd_test_aggregation_series_sem() {
4522 std::cout << "========= Series sem ============================";
4523
4524 pandas::Series<double> s({1.0, 2.0, 3.0, 4.0, 5.0});
4525 auto sem_val = s.sem();
4526 // std(ddof=1) = sqrt(2.5), sem = sqrt(2.5)/sqrt(5) ≈ 0.707
4527 bool passed = sem_val.has_value() && std::abs(*sem_val - 0.707) < 0.01;
4528 if (!passed) {
4529 std::cout << " [FAIL] : in pd_test_aggregation_series_sem() : sem value incorrect" << std::endl;
4530 throw std::runtime_error("pd_test_aggregation_series_sem failed: sem value incorrect");
4531 }
4532
4533 std::cout << " -> tests passed" << std::endl;
4534 }
skew (pd_test_1_all.cpp:4592)
4582 throw std::runtime_error("pd_test_aggregation_series_mode failed: multi-mode should return 2 values");
4583 }
4584
4585 std::cout << " -> tests passed" << std::endl;
4586 }
4587
4588 void pd_test_aggregation_series_skew_kurt() {
4589 std::cout << "========= Series skew/kurt ======================";
4590
4591 pandas::Series<double> s({1.0, 2.0, 2.0, 3.0, 9.0});
4592 auto skew_val = s.skew();
4593 bool passed = skew_val.has_value() && *skew_val > 0; // Should be right-skewed
4594 if (!passed) {
4595 std::cout << " [FAIL] : in pd_test_aggregation_series_skew_kurt() : skew should be positive" << std::endl;
4596 throw std::runtime_error("pd_test_aggregation_series_skew_kurt failed: skew should be positive");
4597 }
4598
4599 auto kurt_val = s.kurt();
4600 passed = kurt_val.has_value();
4601 if (!passed) {
4602 std::cout << " [FAIL] : in pd_test_aggregation_series_skew_kurt() : kurt should have value" << std::endl;
skew_cols (pd_test_1_all.cpp:4772)
4762 std::cout << " -> tests passed" << std::endl;
4763 }
4764
4765 void pd_test_aggregation_dataframe_skew_kurt_cols() {
4766 std::cout << "========= DataFrame skew/kurt_cols ==============";
4767
4768 std::map<std::string, std::vector<double>> data;
4769 data["A"] = {1.0, 2.0, 2.0, 3.0, 9.0};
4770 pandas::DataFrame df(data);
4771
4772 auto skew = df.skew_cols();
4773 bool passed = skew.size() == 1;
4774 if (!passed) {
4775 std::cout << " [FAIL] : in pd_test_aggregation_dataframe_skew_kurt_cols() : skew_cols should return 1 value" << std::endl;
4776 throw std::runtime_error("pd_test_aggregation_dataframe_skew_kurt_cols failed: skew_cols should return 1 value");
4777 }
4778
4779 // Skew should be positive for right-skewed data
4780 passed = skew[0] > 0;
4781 if (!passed) {
4782 std::cout << " [FAIL] : in pd_test_aggregation_dataframe_skew_kurt_cols() : skew should be positive" << std::endl;
std (pd_test_1_all.cpp:4526)
4516#include "../pandas/pd_series.h"
4517
4518namespace dataframe_tests {
4519 namespace dataframe_tests_aggregation {
4520
4521 void pd_test_aggregation_series_sem() {
4522 std::cout << "========= Series sem ============================";
4523
4524 pandas::Series<double> s({1.0, 2.0, 3.0, 4.0, 5.0});
4525 auto sem_val = s.sem();
4526 // std(ddof=1) = sqrt(2.5), sem = sqrt(2.5)/sqrt(5) ≈ 0.707
4527 bool passed = sem_val.has_value() && std::abs(*sem_val - 0.707) < 0.01;
4528 if (!passed) {
4529 std::cout << " [FAIL] : in pd_test_aggregation_series_sem() : sem value incorrect" << std::endl;
4530 throw std::runtime_error("pd_test_aggregation_series_sem failed: sem value incorrect");
4531 }
4532
4533 std::cout << " -> tests passed" << std::endl;
4534 }
4535
4536 void pd_test_aggregation_series_quantile() {
std_ (pd_test_1_all.cpp:20752)
20742 throw std::runtime_error("pd_test_rolling_min_periods failed: with min_periods=1, idx 1 should be 3.0");
20743 }
20744
20745 std::cout << " -> tests passed" << std::endl;
20746 }
20747
20748 void pd_test_rolling_std() {
20749 std::cout << "========= Rolling std ===========================";
20750
20751 pandas::Series<double> s({1.0, 2.0, 3.0, 4.0, 5.0});
20752 auto result = s.rolling(3).std_();
20753
20754 // std([1,2,3]) = 1.0 (ddof=1)
20755 // std([2,3,4]) = 1.0
20756 // std([3,4,5]) = 1.0
20757 bool passed = std::abs(result[2] - 1.0) < 0.001;
20758 if (!passed) {
20759 std::cout << " [FAIL] : in pd_test_rolling_std() : rolling std should be 1.0" << std::endl;
20760 throw std::runtime_error("pd_test_rolling_std failed: rolling std should be 1.0");
20761 }
std_ (pd_test_1_all.cpp:20752)
20742 throw std::runtime_error("pd_test_rolling_min_periods failed: with min_periods=1, idx 1 should be 3.0");
20743 }
20744
20745 std::cout << " -> tests passed" << std::endl;
20746 }
20747
20748 void pd_test_rolling_std() {
20749 std::cout << "========= Rolling std ===========================";
20750
20751 pandas::Series<double> s({1.0, 2.0, 3.0, 4.0, 5.0});
20752 auto result = s.rolling(3).std_();
20753
20754 // std([1,2,3]) = 1.0 (ddof=1)
20755 // std([2,3,4]) = 1.0
20756 // std([3,4,5]) = 1.0
20757 bool passed = std::abs(result[2] - 1.0) < 0.001;
20758 if (!passed) {
20759 std::cout << " [FAIL] : in pd_test_rolling_std() : rolling std should be 1.0" << std::endl;
20760 throw std::runtime_error("pd_test_rolling_std failed: rolling std should be 1.0");
20761 }
std_cols (pd_test_plan_coverage_gaps.cpp:130)
120 if (local_fail > 0) throw std::runtime_error("pd_test_cut_retbins_front_equals_min_val_right_false failed");
121 std::cout << " -> tests passed" << std::endl;
122}
123
124// -------------------------------------------------------------------------
125// plan_float_reduction_precision — DataFrame axis=0 reducers coverage (R10 gap)
126//
127// The `_3` suite only exercises Series<double>::sum/mean/var/std. The plan's
128// R10 Files-to-Update adds classify_column_dtypes (DataFrame::sum(axis=0)),
129// mean_cols() (DataFrame::mean(axis=0), describe), and std_cols()
130// (DataFrame::std/var(axis=0), describe). None are touched by _3 assertions.
131// -------------------------------------------------------------------------
132void pd_test_dataframe_sum_axis0_small() {
133 std::cout << "========= DataFrame::sum(axis=0) small-data sanity =======";
134 int local_fail = 0;
135
136 std::map<std::string, std::vector<double>> data = {
137 {"A", {1.0, 2.0, 3.0, 4.0}},
138 {"B", {10.0, 20.0, 30.0, 40.0}}
139 };
sum (pd_test_1_all.cpp:276)
266 }
267
268 // Test sum/mean
269 pandas::BooleanArray arr({
270 std::optional<bool>(true),
271 std::optional<bool>(false),
272 std::optional<bool>(true),
273 std::optional<bool>(true)
274 });
275
276 auto s = arr.sum();
277 if (!s.has_value() || s.value() != 3) {
278 std::cout << " [FAIL] : in pd_test_boolean_array_reductions() : sum should be 3" << std::endl;
279 throw std::runtime_error("pd_test_boolean_array_reductions failed: sum");
280 }
281
282 auto m = arr.mean();
283 if (!m.has_value() || std::abs(m.value() - 0.75) > 0.001) {
284 std::cout << " [FAIL] : in pd_test_boolean_array_reductions() : mean should be 0.75" << std::endl;
285 throw std::runtime_error("pd_test_boolean_array_reductions failed: mean");
286 }
sum_cols (pd_test_1_all.cpp:6351)
6341 void pd_test_dataframe_aggregations() {
6342 std::cout << "========= aggregations =====================";
6343
6344 std::map<std::string, std::vector<numpy::float64>> data;
6345 data["A"] = {1.0, 2.0, 3.0, 4.0, 5.0};
6346 data["B"] = {10.0, 20.0, 30.0, 40.0, 50.0};
6347
6348 pandas::DataFrame df(data);
6349
6350 // Test sum
6351 auto sums = df.sum_cols();
6352 double sum_a = sums[0];
6353 double sum_b = sums[1];
6354 if (std::abs(sum_a - 15.0) > 1e-10) {
6355 std::cout << " [FAIL] : in pd_test_dataframe_aggregations() : sum A != 15" << std::endl;
6356 throw std::runtime_error("pd_test_dataframe_aggregations failed: sum A != 15");
6357 }
6358 if (std::abs(sum_b - 150.0) > 1e-10) {
6359 std::cout << " [FAIL] : in pd_test_dataframe_aggregations() : sum B != 150" << std::endl;
6360 throw std::runtime_error("pd_test_dataframe_aggregations failed: sum B != 150");
6361 }
value_counts (pd_test_1_all.cpp:865)
855 std::vector<std::optional<std::string>> values = {
856 std::optional<std::string>("a"),
857 std::optional<std::string>("b"),
858 std::optional<std::string>("a"),
859 std::optional<std::string>("a"),
860 std::optional<std::string>("b"),
861 std::nullopt // NA not counted
862 };
863 pandas::CategoricalArray arr(values);
864
865 auto [cats, counts] = arr.value_counts();
866
867 // Should have 2 categories
868 if (cats.size() != 2 || counts.size() != 2) {
869 std::cout << " [FAIL] : in pd_test_categorical_array_value_counts() : wrong size" << std::endl;
870 throw std::runtime_error("pd_test_categorical_array_value_counts failed: wrong size");
871 }
872
873 // Find 'a' count
874 int64_t a_count = 0, b_count = 0;
875 for (size_t i = 0; i < cats.size(); ++i) {
var (pd_test_1_all.cpp:20890)
20880 throw std::runtime_error("pd_test_expanding_std failed: expanding std values incorrect");
20881 }
20882
20883 std::cout << " -> tests passed" << std::endl;
20884 }
20885
20886 void pd_test_expanding_var() {
20887 std::cout << "========= Expanding var =========================";
20888
20889 pandas::Series<double> s({1.0, 2.0, 3.0, 4.0, 5.0});
20890 auto result = s.expanding().var();
20891
20892 // Expanding var (ddof=1): NaN, 0.5, 1.0, 1.6667, 2.5
20893 bool passed = std::isnan(result[0]) &&
20894 std::abs(result[1] - 0.5) < 0.001 &&
20895 std::abs(result[2] - 1.0) < 0.001 &&
20896 std::abs(result[3] - 1.6667) < 0.001 &&
20897 std::abs(result[4] - 2.5) < 0.001;
20898 if (!passed) {
20899 std::cout << " [FAIL] : in pd_test_expanding_var() : expanding var values incorrect" << std::endl;
20900 throw std::runtime_error("pd_test_expanding_var failed: expanding var values incorrect");
agg (pd_test_1_all.cpp:11100)
11090 }
11091
11092 void pd_test_func_apply_series_agg() {
11093 std::cout << "========= Series agg ==================================";
11094
11095 pandas::Series<double> s({1.0, 2.0, 3.0, 4.0, 5.0}, "values");
11096
11097 bool passed = true;
11098
11099 // Test string-based aggregation
11100 auto sum_result = s.agg("sum");
11101 if (!sum_result.has_value() || !approx_equal(sum_result.value(), 15.0)) {
11102 passed = false;
11103 std::cout << " [FAIL] : in pd_test_func_apply_series_agg() : sum failed" << std::endl;
11104 throw std::runtime_error("pd_test_func_apply_series_agg failed: sum failed");
11105 }
11106
11107 auto mean_result = s.agg("mean");
11108 if (!mean_result.has_value() || !approx_equal(mean_result.value(), 3.0)) {
11109 passed = false;
11110 std::cout << " [FAIL] : in pd_test_func_apply_series_agg() : mean failed" << std::endl;
agg (pd_test_1_all.cpp:11100)
11090 }
11091
11092 void pd_test_func_apply_series_agg() {
11093 std::cout << "========= Series agg ==================================";
11094
11095 pandas::Series<double> s({1.0, 2.0, 3.0, 4.0, 5.0}, "values");
11096
11097 bool passed = true;
11098
11099 // Test string-based aggregation
11100 auto sum_result = s.agg("sum");
11101 if (!sum_result.has_value() || !approx_equal(sum_result.value(), 15.0)) {
11102 passed = false;
11103 std::cout << " [FAIL] : in pd_test_func_apply_series_agg() : sum failed" << std::endl;
11104 throw std::runtime_error("pd_test_func_apply_series_agg failed: sum failed");
11105 }
11106
11107 auto mean_result = s.agg("mean");
11108 if (!mean_result.has_value() || !approx_equal(mean_result.value(), 3.0)) {
11109 passed = false;
11110 std::cout << " [FAIL] : in pd_test_func_apply_series_agg() : mean failed" << std::endl;
agg (pd_test_1_all.cpp:11100)
11090 }
11091
11092 void pd_test_func_apply_series_agg() {
11093 std::cout << "========= Series agg ==================================";
11094
11095 pandas::Series<double> s({1.0, 2.0, 3.0, 4.0, 5.0}, "values");
11096
11097 bool passed = true;
11098
11099 // Test string-based aggregation
11100 auto sum_result = s.agg("sum");
11101 if (!sum_result.has_value() || !approx_equal(sum_result.value(), 15.0)) {
11102 passed = false;
11103 std::cout << " [FAIL] : in pd_test_func_apply_series_agg() : sum failed" << std::endl;
11104 throw std::runtime_error("pd_test_func_apply_series_agg failed: sum failed");
11105 }
11106
11107 auto mean_result = s.agg("mean");
11108 if (!mean_result.has_value() || !approx_equal(mean_result.value(), 3.0)) {
11109 passed = false;
11110 std::cout << " [FAIL] : in pd_test_func_apply_series_agg() : mean failed" << std::endl;
agg (pd_test_1_all.cpp:11100)
11090 }
11091
11092 void pd_test_func_apply_series_agg() {
11093 std::cout << "========= Series agg ==================================";
11094
11095 pandas::Series<double> s({1.0, 2.0, 3.0, 4.0, 5.0}, "values");
11096
11097 bool passed = true;
11098
11099 // Test string-based aggregation
11100 auto sum_result = s.agg("sum");
11101 if (!sum_result.has_value() || !approx_equal(sum_result.value(), 15.0)) {
11102 passed = false;
11103 std::cout << " [FAIL] : in pd_test_func_apply_series_agg() : sum failed" << std::endl;
11104 throw std::runtime_error("pd_test_func_apply_series_agg failed: sum failed");
11105 }
11106
11107 auto mean_result = s.agg("mean");
11108 if (!mean_result.has_value() || !approx_equal(mean_result.value(), 3.0)) {
11109 passed = false;
11110 std::cout << " [FAIL] : in pd_test_func_apply_series_agg() : mean failed" << std::endl;
agg_to_series (pd_test_2_all.cpp:19154)
19144 pandas::DataFrame df;
19145 df.add_column("a", std::vector<numpy::float64>{1.0, 2.0, 3.0, 4.0});
19146 df.add_column("b", std::vector<numpy::float64>{10.0, 20.0, 30.0, 40.0});
19147
19148 // Dict-simple form: {col: "func"} -> Series
19149 std::map<std::string, std::string> col_funcs;
19150 col_funcs["a"] = "sum";
19151 col_funcs["b"] = "mean";
19152
19153 pandas::Series<numpy::float64> result = df.agg_to_series(col_funcs);
19154
19155 // a.sum() = 10.0, b.mean() = 25.0
19156 check(result.size() == 2, "result_size_2");
19157
19158 // std::map iterates in alphabetical order: a, b
19159 check(std::abs(result.get_value_double(0) - 10.0) < 1e-9, "a_sum_10");
19160 check(std::abs(result.get_value_double(1) - 25.0) < 1e-9, "b_mean_25");
19161
19162 // Check index labels
19163 check(result.index().get_value_str(0) == "a", "index_0_a");
agg_to_series (pd_test_2_all.cpp:19154)
19144 pandas::DataFrame df;
19145 df.add_column("a", std::vector<numpy::float64>{1.0, 2.0, 3.0, 4.0});
19146 df.add_column("b", std::vector<numpy::float64>{10.0, 20.0, 30.0, 40.0});
19147
19148 // Dict-simple form: {col: "func"} -> Series
19149 std::map<std::string, std::string> col_funcs;
19150 col_funcs["a"] = "sum";
19151 col_funcs["b"] = "mean";
19152
19153 pandas::Series<numpy::float64> result = df.agg_to_series(col_funcs);
19154
19155 // a.sum() = 10.0, b.mean() = 25.0
19156 check(result.size() == 2, "result_size_2");
19157
19158 // std::map iterates in alphabetical order: a, b
19159 check(std::abs(result.get_value_double(0) - 10.0) < 1e-9, "a_sum_10");
19160 check(std::abs(result.get_value_double(1) - 25.0) < 1e-9, "b_mean_25");
19161
19162 // Check index labels
19163 check(result.index().get_value_str(0) == "a", "index_0_a");
aggregate (pd_test_1_all.cpp:11139)
11129 auto custom_agg = s.agg([](const std::vector<double>& v) {
11130 return std::accumulate(v.begin(), v.end(), 0.0) / v.size();
11131 });
11132 if (!approx_equal(custom_agg, 3.0)) {
11133 passed = false;
11134 std::cout << " [FAIL] : in pd_test_func_apply_series_agg() : custom agg failed" << std::endl;
11135 throw std::runtime_error("pd_test_func_apply_series_agg failed: custom agg failed");
11136 }
11137
11138 // Test aggregate alias
11139 auto alias_result = s.aggregate("sum");
11140 if (!alias_result.has_value() || !approx_equal(alias_result.value(), 15.0)) {
11141 passed = false;
11142 std::cout << " [FAIL] : in pd_test_func_apply_series_agg() : aggregate alias failed" << std::endl;
11143 throw std::runtime_error("pd_test_func_apply_series_agg failed: aggregate alias failed");
11144 }
11145
11146 std::cout << " -> tests passed" << std::endl;
11147 }
11148
11149 void pd_test_func_apply_series_pipe() {
aggregate (pd_test_1_all.cpp:11139)
11129 auto custom_agg = s.agg([](const std::vector<double>& v) {
11130 return std::accumulate(v.begin(), v.end(), 0.0) / v.size();
11131 });
11132 if (!approx_equal(custom_agg, 3.0)) {
11133 passed = false;
11134 std::cout << " [FAIL] : in pd_test_func_apply_series_agg() : custom agg failed" << std::endl;
11135 throw std::runtime_error("pd_test_func_apply_series_agg failed: custom agg failed");
11136 }
11137
11138 // Test aggregate alias
11139 auto alias_result = s.aggregate("sum");
11140 if (!alias_result.has_value() || !approx_equal(alias_result.value(), 15.0)) {
11141 passed = false;
11142 std::cout << " [FAIL] : in pd_test_func_apply_series_agg() : aggregate alias failed" << std::endl;
11143 throw std::runtime_error("pd_test_func_apply_series_agg failed: aggregate alias failed");
11144 }
11145
11146 std::cout << " -> tests passed" << std::endl;
11147 }
11148
11149 void pd_test_func_apply_series_pipe() {
aggregate (pd_test_1_all.cpp:11139)
11129 auto custom_agg = s.agg([](const std::vector<double>& v) {
11130 return std::accumulate(v.begin(), v.end(), 0.0) / v.size();
11131 });
11132 if (!approx_equal(custom_agg, 3.0)) {
11133 passed = false;
11134 std::cout << " [FAIL] : in pd_test_func_apply_series_agg() : custom agg failed" << std::endl;
11135 throw std::runtime_error("pd_test_func_apply_series_agg failed: custom agg failed");
11136 }
11137
11138 // Test aggregate alias
11139 auto alias_result = s.aggregate("sum");
11140 if (!alias_result.has_value() || !approx_equal(alias_result.value(), 15.0)) {
11141 passed = false;
11142 std::cout << " [FAIL] : in pd_test_func_apply_series_agg() : aggregate alias failed" << std::endl;
11143 throw std::runtime_error("pd_test_func_apply_series_agg failed: aggregate alias failed");
11144 }
11145
11146 std::cout << " -> tests passed" << std::endl;
11147 }
11148
11149 void pd_test_func_apply_series_pipe() {
aggregate (pd_test_1_all.cpp:11139)
11129 auto custom_agg = s.agg([](const std::vector<double>& v) {
11130 return std::accumulate(v.begin(), v.end(), 0.0) / v.size();
11131 });
11132 if (!approx_equal(custom_agg, 3.0)) {
11133 passed = false;
11134 std::cout << " [FAIL] : in pd_test_func_apply_series_agg() : custom agg failed" << std::endl;
11135 throw std::runtime_error("pd_test_func_apply_series_agg failed: custom agg failed");
11136 }
11137
11138 // Test aggregate alias
11139 auto alias_result = s.aggregate("sum");
11140 if (!alias_result.has_value() || !approx_equal(alias_result.value(), 15.0)) {
11141 passed = false;
11142 std::cout << " [FAIL] : in pd_test_func_apply_series_agg() : aggregate alias failed" << std::endl;
11143 throw std::runtime_error("pd_test_func_apply_series_agg failed: aggregate alias failed");
11144 }
11145
11146 std::cout << " -> tests passed" << std::endl;
11147 }
11148
11149 void pd_test_func_apply_series_pipe() {
apply (pd_test_1_all.cpp:11244)
11234 void pd_test_func_apply_dataframe_apply_axis0() {
11235 std::cout << "========= DataFrame apply axis=0 ======================";
11236
11237 std::map<std::string, std::vector<double>> data = {
11238 {"A", {1.0, 2.0, 3.0}},
11239 {"B", {4.0, 5.0, 6.0}}
11240 };
11241 pandas::DataFrame df(data);
11242
11243 // apply axis=0 applies function to each column
11244 auto result = df.apply([](const std::vector<double>& col) {
11245 return std::accumulate(col.begin(), col.end(), 0.0);
11246 }, 0);
11247
11248 bool passed = true;
11249
11250 // Plan F·dtype: axis=0 reduce now returns a single "result" column
11251 // with the original column names ("A", "B") as the row index.
11252 // Sum of A: 1+2+3=6, Sum of B: 4+5+6=15
11253 const auto& result_col = result["result"];
11254 double sum_a = std::stod(result_col.get_value_str(0));
apply (pd_test_1_all.cpp:11244)
11234 void pd_test_func_apply_dataframe_apply_axis0() {
11235 std::cout << "========= DataFrame apply axis=0 ======================";
11236
11237 std::map<std::string, std::vector<double>> data = {
11238 {"A", {1.0, 2.0, 3.0}},
11239 {"B", {4.0, 5.0, 6.0}}
11240 };
11241 pandas::DataFrame df(data);
11242
11243 // apply axis=0 applies function to each column
11244 auto result = df.apply([](const std::vector<double>& col) {
11245 return std::accumulate(col.begin(), col.end(), 0.0);
11246 }, 0);
11247
11248 bool passed = true;
11249
11250 // Plan F·dtype: axis=0 reduce now returns a single "result" column
11251 // with the original column names ("A", "B") as the row index.
11252 // Sum of A: 1+2+3=6, Sum of B: 4+5+6=15
11253 const auto& result_col = result["result"];
11254 double sum_a = std::stod(result_col.get_value_str(0));
apply (pd_test_1_all.cpp:11244)
11234 void pd_test_func_apply_dataframe_apply_axis0() {
11235 std::cout << "========= DataFrame apply axis=0 ======================";
11236
11237 std::map<std::string, std::vector<double>> data = {
11238 {"A", {1.0, 2.0, 3.0}},
11239 {"B", {4.0, 5.0, 6.0}}
11240 };
11241 pandas::DataFrame df(data);
11242
11243 // apply axis=0 applies function to each column
11244 auto result = df.apply([](const std::vector<double>& col) {
11245 return std::accumulate(col.begin(), col.end(), 0.0);
11246 }, 0);
11247
11248 bool passed = true;
11249
11250 // Plan F·dtype: axis=0 reduce now returns a single "result" column
11251 // with the original column names ("A", "B") as the row index.
11252 // Sum of A: 1+2+3=6, Sum of B: 4+5+6=15
11253 const auto& result_col = result["result"];
11254 double sum_a = std::stod(result_col.get_value_str(0));
apply_callable (pd_test_5_all.cpp:86745)
86735 auto row_cb = [&](const pandas::ApplyRowInput&) -> pandas::ApplyCellResult {
86736 pandas::ApplyCellResult out;
86737 out.kind = pandas::ApplyCellResult::Float;
86738 out.f = std::numeric_limits<double>::quiet_NaN();
86739 return out;
86740 };
86741
86742 bool threw = false;
86743 pandas::ApplyResult result;
86744 try {
86745 result = df.apply_callable(/*axis=*/0, row_cb, col_cb);
86746 } catch (const std::exception& e) {
86747 threw = true;
86748 std::cout << " threw: " << e.what() << "\n";
86749 }
86750 pandas_tests::check(!threw, "case_27.apply_callable_no_throw", local_fail);
86751 pandas_tests::check(col_cb_called, "case_27.col_cb_invoked", local_fail);
86752 pandas_tests::check(observed_idx == 0, "case_27.col_cb_idx_is_0", local_fail);
86753 pandas_tests::check(observed_name == "a", "case_27.col_cb_name_is_a", local_fail);
86754}
apply_resolved_typed (pd_test_5_all.cpp:98141)
98131 switch (cid) {
98132 case CbId::Int: cb = cb_int(hist); break;
98133 case CbId::Bool: cb = cb_bool(hist); break;
98134 case CbId::Float: cb = cb_float(hist); break;
98135 case CbId::Str: cb = cb_string(hist); break;
98136 case CbId::Mixed: cb = cb_mixed(hist); break;
98137 }
98138
98139 pandas::Result r;
98140 try {
98141 r = s.apply_resolved_typed(cb, hist);
98142 } catch (const std::exception& e) {
98143 std::string tag = std::string("apply src=") + src_name(sid) +
98144 " cb=" + cb_name(cid) + " mode=" + mode_name(mid);
98145 std::cout << "[FAIL] : in f_27a_core_3094022_apply_resolved_typed_post_cb_dtype() "
98146 << tag << " unexpected exception: " << e.what() << "\n";
98147 ++pandas_tests::g_failed; ++local_fail;
98148 ++pandas_tests::g_failed; ++local_fail;
98149 ++pandas_tests::g_failed; ++local_fail;
98150 return;
98151 }
apply_with_args (pd_test_3_all.cpp:16993)
16983 }
16984 }
16985
16986 if (!passed) {
16987 throw std::runtime_error("pd_test_apply_axis1_broadcast failed");
16988 }
16989 std::cout << " -> tests passed" << std::endl;
16990}
16991
16992void pd_test_apply_with_args() {
16993 std::cout << "========= DataFrame.apply_with_args() =================";
16994
16995 std::map<std::string, std::vector<double>> data = {
16996 {"A", {1.0, 2.0, 3.0}},
16997 {"B", {4.0, 5.0, 6.0}}
16998 };
16999 pandas::DataFrame df(data);
17000
17001 // Apply with additional argument: multiply sum by factor
17002 auto result = df.apply_with_args(
17003 [](const std::vector<double>& col, double factor) {
applymap (pd_test_1_all.cpp:11194)
11184 void pd_test_func_apply_dataframe_applymap() {
11185 std::cout << "========= DataFrame applymap ==========================";
11186
11187 std::map<std::string, std::vector<double>> data = {
11188 {"A", {1.0, 2.0, 3.0}},
11189 {"B", {4.0, 5.0, 6.0}}
11190 };
11191 pandas::DataFrame df(data);
11192
11193 // applymap applies function element-wise
11194 auto result = df.applymap([](double x) { return x * x; });
11195
11196 bool passed = true;
11197
11198 // Check column A squared
11199 const auto& col_a = result["A"];
11200 std::vector<double> expected_a = {1.0, 4.0, 9.0};
11201 for (size_t i = 0; i < 3; ++i) {
11202 double val = std::stod(col_a.get_value_str(i));
11203 if (!approx_equal(val, expected_a[i])) {
11204 passed = false;
ewm (pd_test_3_all.cpp:2961)
2951 // Test expanding sum
2952 pandas::DataFrame expanding_sum = df.expanding().sum();
2953 if (expanding_sum.nrows() != 5 || expanding_sum.ncols() != 2) {
2954 throw std::runtime_error("expanding().sum() shape failed");
2955 }
2956
2957 std::cout << " -> tests passed" << std::endl;
2958}
2959
2960void pd_test_3_all_df_ewm() {
2961 std::cout << "========= DataFrame.ewm() ================================";
2962
2963 std::map<std::string, std::vector<double>> data = {
2964 {"A", {1.0, 2.0, 3.0, 4.0, 5.0}},
2965 {"B", {10.0, 20.0, 30.0, 40.0, 50.0}}
2966 };
2967 pandas::DataFrame df(data);
2968
2969 // Test ewm mean with span=3
2970 pandas::DataFrame ewm_mean = df.ewm(std::nullopt, 3.0).mean();
2971 if (ewm_mean.nrows() != 5 || ewm_mean.ncols() != 2) {
expanding (pd_test_1_all.cpp:20770)
20760 throw std::runtime_error("pd_test_rolling_std failed: rolling std should be 1.0");
20761 }
20762
20763 std::cout << " -> tests passed" << std::endl;
20764 }
20765
20766 void pd_test_expanding_sum() {
20767 std::cout << "========= Expanding sum =========================";
20768
20769 pandas::Series<double> s({1.0, 2.0, 3.0, 4.0, 5.0});
20770 auto result = s.expanding().sum();
20771
20772 // Cumulative sum: 1, 3, 6, 10, 15
20773 bool passed = std::abs(result[0] - 1.0) < 0.001 &&
20774 std::abs(result[1] - 3.0) < 0.001 &&
20775 std::abs(result[2] - 6.0) < 0.001 &&
20776 std::abs(result[3] - 10.0) < 0.001 &&
20777 std::abs(result[4] - 15.0) < 0.001;
20778 if (!passed) {
20779 std::cout << " [FAIL] : in pd_test_expanding_sum() : expanding sum values incorrect" << std::endl;
20780 throw std::runtime_error("pd_test_expanding_sum failed: expanding sum values incorrect");
groupby (pd_test_1_all.cpp:11495)
11485 std::cout << "========= GroupBy basic =========================";
11486
11487 // Create DataFrame with category column
11488 std::map<std::string, std::vector<double>> data = {
11489 {"category", {1.0, 1.0, 2.0, 2.0, 2.0}},
11490 {"value", {10.0, 20.0, 30.0, 40.0, 50.0}}
11491 };
11492 pandas::DataFrame df(data);
11493
11494 // Test groupby
11495 auto grouped = df.groupby("category");
11496
11497 bool passed = grouped.ngroups() == 2;
11498 if (!passed) {
11499 std::cout << " [FAIL] : in pd_test_groupby_basic() : ngroups should be 2" << std::endl;
11500 throw std::runtime_error("pd_test_groupby_basic failed: ngroups should be 2");
11501 }
11502
11503 std::cout << " -> tests passed" << std::endl;
11504 }
groupby (pd_test_1_all.cpp:11495)
11485 std::cout << "========= GroupBy basic =========================";
11486
11487 // Create DataFrame with category column
11488 std::map<std::string, std::vector<double>> data = {
11489 {"category", {1.0, 1.0, 2.0, 2.0, 2.0}},
11490 {"value", {10.0, 20.0, 30.0, 40.0, 50.0}}
11491 };
11492 pandas::DataFrame df(data);
11493
11494 // Test groupby
11495 auto grouped = df.groupby("category");
11496
11497 bool passed = grouped.ngroups() == 2;
11498 if (!passed) {
11499 std::cout << " [FAIL] : in pd_test_groupby_basic() : ngroups should be 2" << std::endl;
11500 throw std::runtime_error("pd_test_groupby_basic failed: ngroups should be 2");
11501 }
11502
11503 std::cout << " -> tests passed" << std::endl;
11504 }
groupby (pd_test_1_all.cpp:11495)
11485 std::cout << "========= GroupBy basic =========================";
11486
11487 // Create DataFrame with category column
11488 std::map<std::string, std::vector<double>> data = {
11489 {"category", {1.0, 1.0, 2.0, 2.0, 2.0}},
11490 {"value", {10.0, 20.0, 30.0, 40.0, 50.0}}
11491 };
11492 pandas::DataFrame df(data);
11493
11494 // Test groupby
11495 auto grouped = df.groupby("category");
11496
11497 bool passed = grouped.ngroups() == 2;
11498 if (!passed) {
11499 std::cout << " [FAIL] : in pd_test_groupby_basic() : ngroups should be 2" << std::endl;
11500 throw std::runtime_error("pd_test_groupby_basic failed: ngroups should be 2");
11501 }
11502
11503 std::cout << " -> tests passed" << std::endl;
11504 }
groupby (pd_test_1_all.cpp:11495)
11485 std::cout << "========= GroupBy basic =========================";
11486
11487 // Create DataFrame with category column
11488 std::map<std::string, std::vector<double>> data = {
11489 {"category", {1.0, 1.0, 2.0, 2.0, 2.0}},
11490 {"value", {10.0, 20.0, 30.0, 40.0, 50.0}}
11491 };
11492 pandas::DataFrame df(data);
11493
11494 // Test groupby
11495 auto grouped = df.groupby("category");
11496
11497 bool passed = grouped.ngroups() == 2;
11498 if (!passed) {
11499 std::cout << " [FAIL] : in pd_test_groupby_basic() : ngroups should be 2" << std::endl;
11500 throw std::runtime_error("pd_test_groupby_basic failed: ngroups should be 2");
11501 }
11502
11503 std::cout << " -> tests passed" << std::endl;
11504 }
groupby (pd_test_1_all.cpp:11495)
11485 std::cout << "========= GroupBy basic =========================";
11486
11487 // Create DataFrame with category column
11488 std::map<std::string, std::vector<double>> data = {
11489 {"category", {1.0, 1.0, 2.0, 2.0, 2.0}},
11490 {"value", {10.0, 20.0, 30.0, 40.0, 50.0}}
11491 };
11492 pandas::DataFrame df(data);
11493
11494 // Test groupby
11495 auto grouped = df.groupby("category");
11496
11497 bool passed = grouped.ngroups() == 2;
11498 if (!passed) {
11499 std::cout << " [FAIL] : in pd_test_groupby_basic() : ngroups should be 2" << std::endl;
11500 throw std::runtime_error("pd_test_groupby_basic failed: ngroups should be 2");
11501 }
11502
11503 std::cout << " -> tests passed" << std::endl;
11504 }
groupby (pd_test_1_all.cpp:11495)
11485 std::cout << "========= GroupBy basic =========================";
11486
11487 // Create DataFrame with category column
11488 std::map<std::string, std::vector<double>> data = {
11489 {"category", {1.0, 1.0, 2.0, 2.0, 2.0}},
11490 {"value", {10.0, 20.0, 30.0, 40.0, 50.0}}
11491 };
11492 pandas::DataFrame df(data);
11493
11494 // Test groupby
11495 auto grouped = df.groupby("category");
11496
11497 bool passed = grouped.ngroups() == 2;
11498 if (!passed) {
11499 std::cout << " [FAIL] : in pd_test_groupby_basic() : ngroups should be 2" << std::endl;
11500 throw std::runtime_error("pd_test_groupby_basic failed: ngroups should be 2");
11501 }
11502
11503 std::cout << " -> tests passed" << std::endl;
11504 }
map (pd_test_1_all.cpp:5839)
5829// Map Tests
5830// ============================================================================
5831
5832void pd_test_categorical_index_map() {
5833 std::cout << "========= map =========================================";
5834
5835 pandas::CategoricalArray arr({"yes", "no", "yes"});
5836 pandas::CategoricalIndex idx(arr);
5837
5838 std::unordered_map<std::string, std::string> mapping = {{"yes", "1"}, {"no", "0"}};
5839 pandas::CategoricalIndex mapped = idx.map(mapping);
5840
5841 bool passed = (mapped.has_category("1") && mapped.has_category("0") &&
5842 !mapped.has_category("yes") && !mapped.has_category("no"));
5843 if (!passed) {
5844 std::cout << " [FAIL] : in pd_test_categorical_index_map()" << std::endl;
5845 throw std::runtime_error("pd_test_categorical_index_map failed");
5846 }
5847
5848 std::cout << " -> tests passed" << std::endl;
5849}
map_callable_resolved (pd_test_5_all.cpp:98564)
98554 case MethodId::TransformA0:
98555 case MethodId::TransformA1: return "transform";
98556 }
98557 return "?";
98558}
98559
98560static pandas::DataFrame run_method(MethodId mid, const pandas::DataFrame& df,
98561 CbId cid, Hist& hist) {
98562 if (mid == MethodId::Map) {
98563 MapCb cb = mk_map_cb(cid, hist);
98564 return df.map_callable_resolved(cb, hist);
98565 }
98566 SeriesCb cb = mk_series_cb(cid, hist);
98567 switch (mid) {
98568 case MethodId::ApplyA0: return df.apply_resolved_typed(cb, hist, 0);
98569 case MethodId::ApplyA1: return df.apply_resolved_typed(cb, hist, 1);
98570 case MethodId::TransformA0: return df.transform_callable_resolved(cb, hist, 0);
98571 case MethodId::TransformA1: return df.transform_callable_resolved(cb, hist, 1);
98572 default: break;
98573 }
98574 return pandas::DataFrame{};
pipe (pd_test_1_all.cpp:11164)
11154 // Pipe applies function to entire Series
11155 auto add_mean = [](const pandas::Series<double>& ser, double offset) {
11156 auto mean_val = ser.mean();
11157 std::vector<double> result;
11158 for (size_t i = 0; i < ser.size(); ++i) {
11159 result.push_back(ser[i] + mean_val.value_or(0.0) + offset);
11160 }
11161 return pandas::Series<double>(result, ser.name());
11162 };
11163
11164 auto result = s.pipe(add_mean, 10.0);
11165
11166 bool passed = true;
11167 // mean is 2.5, offset is 10.0, so each value + 12.5
11168 std::vector<double> expected = {13.5, 14.5, 15.5, 16.5};
11169 for (size_t i = 0; i < result.size(); ++i) {
11170 if (!approx_equal(result[i], expected[i])) {
11171 passed = false;
11172 std::cout << " [FAIL] : in pd_test_func_apply_series_pipe() : value mismatch at " << i << std::endl;
11173 throw std::runtime_error("pd_test_func_apply_series_pipe failed: value mismatch");
11174 }
pipe (pd_test_1_all.cpp:11164)
11154 // Pipe applies function to entire Series
11155 auto add_mean = [](const pandas::Series<double>& ser, double offset) {
11156 auto mean_val = ser.mean();
11157 std::vector<double> result;
11158 for (size_t i = 0; i < ser.size(); ++i) {
11159 result.push_back(ser[i] + mean_val.value_or(0.0) + offset);
11160 }
11161 return pandas::Series<double>(result, ser.name());
11162 };
11163
11164 auto result = s.pipe(add_mean, 10.0);
11165
11166 bool passed = true;
11167 // mean is 2.5, offset is 10.0, so each value + 12.5
11168 std::vector<double> expected = {13.5, 14.5, 15.5, 16.5};
11169 for (size_t i = 0; i < result.size(); ++i) {
11170 if (!approx_equal(result[i], expected[i])) {
11171 passed = false;
11172 std::cout << " [FAIL] : in pd_test_func_apply_series_pipe() : value mismatch at " << i << std::endl;
11173 throw std::runtime_error("pd_test_func_apply_series_pipe failed: value mismatch");
11174 }
resample (pd_test_1_all.cpp:20321)
20311 "2020-01-01 00:00:00",
20312 "2020-01-01 12:00:00",
20313 "2020-01-02 00:00:00",
20314 "2020-01-02 12:00:00",
20315 "2020-01-03 00:00:00",
20316 "2020-01-03 12:00:00"
20317 };
20318 df.set_index(std::make_unique<pandas::Index<std::string>>(dates));
20319
20320 // Resample to daily
20321 auto resampler = df.resample("D");
20322 pandas::DataFrame result = resampler.sum();
20323
20324 // Check that we got aggregated results
20325 bool passed = (result.nrows() <= df.nrows());
20326
20327 if (!passed) {
20328 std::cout << " [FAIL] : in pd_test_timeseries_resample_basic() : resample didn't reduce rows" << std::endl;
20329 throw std::runtime_error("pd_test_timeseries_resample_basic failed");
20330 }
rolling (pd_test_1_all.cpp:20667)
20657#include <vector>
20658#include "../pandas/pd_series.h"
20659
20660namespace dataframe_tests {
20661 namespace dataframe_tests_windowing {
20662
20663 void pd_test_rolling_sum() {
20664 std::cout << "========= Rolling sum ===========================";
20665
20666 pandas::Series<double> s({1.0, 2.0, 3.0, 4.0, 5.0});
20667 auto result = s.rolling(3).sum();
20668
20669 // Window 3:
20670 // idx 0: [1] -> NaN (not enough values)
20671 // idx 1: [1,2] -> NaN (not enough values)
20672 // idx 2: [1,2,3] -> 6
20673 // idx 3: [2,3,4] -> 9
20674 // idx 4: [3,4,5] -> 12
20675 bool passed = result.size() == 5;
20676 if (!passed) {
20677 std::cout << " [FAIL] : in pd_test_rolling_sum() : result size should be 5" << std::endl;
transform (pd_test_1_all.cpp:11071)
11061 std::cout << " -> tests passed" << std::endl;
11062 }
11063
11064 void pd_test_func_apply_series_transform() {
11065 std::cout << "========= Series transform ============================";
11066
11067 pandas::Series<double> s({1.0, 2.0, 3.0, 4.0}, "values");
11068
11069 // Transform must return same shape
11070 auto result = s.transform([](double x) { return x * 2 + 1; });
11071
11072 bool passed = true;
11073 if (result.size() != s.size()) {
11074 passed = false;
11075 std::cout << " [FAIL] : in pd_test_func_apply_series_transform() : size changed" << std::endl;
11076 throw std::runtime_error("pd_test_func_apply_series_transform failed: size changed");
11077 }
11078
11079 std::vector<double> expected = {3.0, 5.0, 7.0, 9.0};
11080 for (size_t i = 0; i < result.size(); ++i) {
transform (pd_test_1_all.cpp:11071)
11061 std::cout << " -> tests passed" << std::endl;
11062 }
11063
11064 void pd_test_func_apply_series_transform() {
11065 std::cout << "========= Series transform ============================";
11066
11067 pandas::Series<double> s({1.0, 2.0, 3.0, 4.0}, "values");
11068
11069 // Transform must return same shape
11070 auto result = s.transform([](double x) { return x * 2 + 1; });
11071
11072 bool passed = true;
11073 if (result.size() != s.size()) {
11074 passed = false;
11075 std::cout << " [FAIL] : in pd_test_func_apply_series_transform() : size changed" << std::endl;
11076 throw std::runtime_error("pd_test_func_apply_series_transform failed: size changed");
11077 }
11078
11079 std::vector<double> expected = {3.0, 5.0, 7.0, 9.0};
11080 for (size_t i = 0; i < result.size(); ++i) {
transform (pd_test_1_all.cpp:11071)
11061 std::cout << " -> tests passed" << std::endl;
11062 }
11063
11064 void pd_test_func_apply_series_transform() {
11065 std::cout << "========= Series transform ============================";
11066
11067 pandas::Series<double> s({1.0, 2.0, 3.0, 4.0}, "values");
11068
11069 // Transform must return same shape
11070 auto result = s.transform([](double x) { return x * 2 + 1; });
11071
11072 bool passed = true;
11073 if (result.size() != s.size()) {
11074 passed = false;
11075 std::cout << " [FAIL] : in pd_test_func_apply_series_transform() : size changed" << std::endl;
11076 throw std::runtime_error("pd_test_func_apply_series_transform failed: size changed");
11077 }
11078
11079 std::vector<double> expected = {3.0, 5.0, 7.0, 9.0};
11080 for (size_t i = 0; i < result.size(); ++i) {
transform (pd_test_1_all.cpp:11071)
11061 std::cout << " -> tests passed" << std::endl;
11062 }
11063
11064 void pd_test_func_apply_series_transform() {
11065 std::cout << "========= Series transform ============================";
11066
11067 pandas::Series<double> s({1.0, 2.0, 3.0, 4.0}, "values");
11068
11069 // Transform must return same shape
11070 auto result = s.transform([](double x) { return x * 2 + 1; });
11071
11072 bool passed = true;
11073 if (result.size() != s.size()) {
11074 passed = false;
11075 std::cout << " [FAIL] : in pd_test_func_apply_series_transform() : size changed" << std::endl;
11076 throw std::runtime_error("pd_test_func_apply_series_transform failed: size changed");
11077 }
11078
11079 std::vector<double> expected = {3.0, 5.0, 7.0, 9.0};
11080 for (size_t i = 0; i < result.size(); ++i) {
transform (pd_test_1_all.cpp:11071)
11061 std::cout << " -> tests passed" << std::endl;
11062 }
11063
11064 void pd_test_func_apply_series_transform() {
11065 std::cout << "========= Series transform ============================";
11066
11067 pandas::Series<double> s({1.0, 2.0, 3.0, 4.0}, "values");
11068
11069 // Transform must return same shape
11070 auto result = s.transform([](double x) { return x * 2 + 1; });
11071
11072 bool passed = true;
11073 if (result.size() != s.size()) {
11074 passed = false;
11075 std::cout << " [FAIL] : in pd_test_func_apply_series_transform() : size changed" << std::endl;
11076 throw std::runtime_error("pd_test_func_apply_series_transform failed: size changed");
11077 }
11078
11079 std::vector<double> expected = {3.0, 5.0, 7.0, 9.0};
11080 for (size_t i = 0; i < result.size(); ++i) {
transform_callable_resolved (pd_test_5_all.cpp:98570)
98560static pandas::DataFrame run_method(MethodId mid, const pandas::DataFrame& df,
98561 CbId cid, Hist& hist) {
98562 if (mid == MethodId::Map) {
98563 MapCb cb = mk_map_cb(cid, hist);
98564 return df.map_callable_resolved(cb, hist);
98565 }
98566 SeriesCb cb = mk_series_cb(cid, hist);
98567 switch (mid) {
98568 case MethodId::ApplyA0: return df.apply_resolved_typed(cb, hist, 0);
98569 case MethodId::ApplyA1: return df.apply_resolved_typed(cb, hist, 1);
98570 case MethodId::TransformA0: return df.transform_callable_resolved(cb, hist, 0);
98571 case MethodId::TransformA1: return df.transform_callable_resolved(cb, hist, 1);
98572 default: break;
98573 }
98574 return pandas::DataFrame{};
98575}
98576
98577static Shape expected_shape(MethodId mid, const pandas::DataFrame& src) {
98578 switch (mid) {
98579 case MethodId::ApplyA0: return {1u, src.ncols()};
98580 case MethodId::ApplyA1: return {src.nrows(), 1u};
add (pd_test_1_all.cpp:4844)
4834namespace dataframe_tests {
4835 namespace dataframe_tests_arithmetic {
4836
4837 void pd_test_arithmetic_series_named_ops() {
4838 std::cout << "========= Series named ops ======================";
4839
4840 pandas::Series<double> a({1.0, 2.0, 3.0});
4841 pandas::Series<double> b({4.0, 5.0, 6.0});
4842
4843 auto sum = a.add(b);
4844 bool passed = std::abs(sum[0] - 5.0) < 0.001 && std::abs(sum[1] - 7.0) < 0.001;
4845 if (!passed) {
4846 std::cout << " [FAIL] : in pd_test_arithmetic_series_named_ops() : add failed" << std::endl;
4847 throw std::runtime_error("pd_test_arithmetic_series_named_ops failed: add failed");
4848 }
4849
4850 auto diff = a.sub(b);
4851 passed = std::abs(diff[0] - (-3.0)) < 0.001;
4852 if (!passed) {
4853 std::cout << " [FAIL] : in pd_test_arithmetic_series_named_ops() : sub failed" << std::endl;
add (pd_test_1_all.cpp:4844)
4834namespace dataframe_tests {
4835 namespace dataframe_tests_arithmetic {
4836
4837 void pd_test_arithmetic_series_named_ops() {
4838 std::cout << "========= Series named ops ======================";
4839
4840 pandas::Series<double> a({1.0, 2.0, 3.0});
4841 pandas::Series<double> b({4.0, 5.0, 6.0});
4842
4843 auto sum = a.add(b);
4844 bool passed = std::abs(sum[0] - 5.0) < 0.001 && std::abs(sum[1] - 7.0) < 0.001;
4845 if (!passed) {
4846 std::cout << " [FAIL] : in pd_test_arithmetic_series_named_ops() : add failed" << std::endl;
4847 throw std::runtime_error("pd_test_arithmetic_series_named_ops failed: add failed");
4848 }
4849
4850 auto diff = a.sub(b);
4851 passed = std::abs(diff[0] - (-3.0)) < 0.001;
4852 if (!passed) {
4853 std::cout << " [FAIL] : in pd_test_arithmetic_series_named_ops() : sub failed" << std::endl;
add (pd_test_1_all.cpp:4844)
4834namespace dataframe_tests {
4835 namespace dataframe_tests_arithmetic {
4836
4837 void pd_test_arithmetic_series_named_ops() {
4838 std::cout << "========= Series named ops ======================";
4839
4840 pandas::Series<double> a({1.0, 2.0, 3.0});
4841 pandas::Series<double> b({4.0, 5.0, 6.0});
4842
4843 auto sum = a.add(b);
4844 bool passed = std::abs(sum[0] - 5.0) < 0.001 && std::abs(sum[1] - 7.0) < 0.001;
4845 if (!passed) {
4846 std::cout << " [FAIL] : in pd_test_arithmetic_series_named_ops() : add failed" << std::endl;
4847 throw std::runtime_error("pd_test_arithmetic_series_named_ops failed: add failed");
4848 }
4849
4850 auto diff = a.sub(b);
4851 passed = std::abs(diff[0] - (-3.0)) < 0.001;
4852 if (!passed) {
4853 std::cout << " [FAIL] : in pd_test_arithmetic_series_named_ops() : sub failed" << std::endl;
add_column (pd_test_1_all.cpp:6540)
6530 void pd_test_dataframe_manipulation() {
6531 std::cout << "========= data manipulation ================";
6532
6533 std::map<std::string, std::vector<numpy::int64>> data;
6534 data["A"] = {1, 2, 3};
6535 data["B"] = {4, 5, 6};
6536
6537 pandas::DataFrame df(data);
6538
6539 // Test add_column
6540 df.add_column<numpy::int64>("C", {7, 8, 9});
6541 if (df.ncols() != 3) {
6542 std::cout << " [FAIL] : in pd_test_dataframe_manipulation() : add_column ncols != 3" << std::endl;
6543 throw std::runtime_error("pd_test_dataframe_manipulation failed: add_column ncols != 3");
6544 }
6545
6546 // Test pop
6547 auto popped = df.pop("C");
6548 if (df.ncols() != 2) {
6549 std::cout << " [FAIL] : in pd_test_dataframe_manipulation() : pop ncols != 2" << std::endl;
6550 throw std::runtime_error("pd_test_dataframe_manipulation failed: pop ncols != 2");
add_column_nullable (pd_test_3_all.cpp:953)
943 df.add_column<pandas::Nullable<bool>>("A", {true, false, pandas::None});
944 df.add_column<std::int64_t>("B", {1, 2, 3});
945 if (df.ncols() != 2 || df.nrows() != 3) {
946 throw std::runtime_error("Nullable<bool> add_column: shape");
947 }
948 }
949
950 // Case E: add_column_nullable<int64> + NA_INT64
951 {
952 pandas::DataFrame df;
953 df.add_column_nullable<int64_t>("int_na",
954 {1, pandas::NA_INT64, 3, pandas::NA_INT64});
955 df.add_column_nullable<bool>("bool_na",
956 {true, pandas::NA_BOOL, false, pandas::NA_BOOL});
957 df.add_column_nullable<std::string>("str_na",
958 {std::string("a"), pandas::NA_STRING, std::string("c"), pandas::NA_STRING});
959
960 if (df.ncols() != 3 || df.nrows() != 4) {
961 throw std::runtime_error("add_column_nullable: shape");
962 }
963 }
add_column_with_dtype_override (pd_test_2_all.cpp:19266)
19256// =====================================================================
19257// Test: sum() converts bool columns to int64 counts
19258// =====================================================================
19259
19260void pd_test_agg_dtype_sum_bool_to_int() {
19261 std::cout << " -- pd_test_agg_dtype_sum_bool_to_int --" << std::endl;
19262
19263 pandas::DataFrame df;
19264 // Create a bool column using string values with dtype override
19265 df.add_column_with_dtype_override("flag", std::vector<std::string>{"True", "False", "True"}, "bool");
19266
19267 auto result = df.sum(0, true, false, 0);
19268 check(approx_eq(result.iat(static_cast<size_t>(0)), 2.0), "bool_count_true_eq_2");
19269 check(result.dtype_name() == "int64", "bool_sum_dtype_int64");
19270}
19271
19272// =====================================================================
19273// Test: sum() preserves nullable Int64 dtype
19274// =====================================================================
add_prefix (pd_test_2_all.cpp:4)
1// ------------------- pd_test_add_prefix.cpp (start) -----------------------------
2// dataframe_tests/pd_test_add_prefix.cpp
3// Tests for DataFrame.add_prefix() and add_suffix() methods (pandas 2.0+ API)
4#include <iostream>
5#include <stdexcept>
6#include <vector>
7#include <string>
8#include <map>
9#include "../pandas/pd_dataframe.h"
10#include "../pandas/pd_groupby.h"
11
12// CRITICAL: No using namespace directives
add_suffix (pd_test_2_all.cpp:4)
1// ------------------- pd_test_add_prefix.cpp (start) -----------------------------
2// dataframe_tests/pd_test_add_prefix.cpp
3// Tests for DataFrame.add_prefix() and add_suffix() methods (pandas 2.0+ API)
4#include <iostream>
5#include <stdexcept>
6#include <vector>
7#include <string>
8#include <map>
9#include "../pandas/pd_dataframe.h"
10#include "../pandas/pd_groupby.h"
11
12// CRITICAL: No using namespace directives
div (pd_test_1_all.cpp:4865)
4855 throw std::runtime_error("pd_test_arithmetic_series_named_ops failed: sub failed");
4856 }
4857
4858 auto prod = a.mul(b);
4859 passed = std::abs(prod[0] - 4.0) < 0.001 && std::abs(prod[1] - 10.0) < 0.001;
4860 if (!passed) {
4861 std::cout << " [FAIL] : in pd_test_arithmetic_series_named_ops() : mul failed" << std::endl;
4862 throw std::runtime_error("pd_test_arithmetic_series_named_ops failed: mul failed");
4863 }
4864
4865 auto quot = a.div(b);
4866 passed = std::abs(quot[0] - 0.25) < 0.001;
4867 if (!passed) {
4868 std::cout << " [FAIL] : in pd_test_arithmetic_series_named_ops() : div failed" << std::endl;
4869 throw std::runtime_error("pd_test_arithmetic_series_named_ops failed: div failed");
4870 }
4871
4872 std::cout << " -> tests passed" << std::endl;
4873 }
4874
4875 void pd_test_arithmetic_series_floordiv_mod() {
div (pd_test_1_all.cpp:4865)
4855 throw std::runtime_error("pd_test_arithmetic_series_named_ops failed: sub failed");
4856 }
4857
4858 auto prod = a.mul(b);
4859 passed = std::abs(prod[0] - 4.0) < 0.001 && std::abs(prod[1] - 10.0) < 0.001;
4860 if (!passed) {
4861 std::cout << " [FAIL] : in pd_test_arithmetic_series_named_ops() : mul failed" << std::endl;
4862 throw std::runtime_error("pd_test_arithmetic_series_named_ops failed: mul failed");
4863 }
4864
4865 auto quot = a.div(b);
4866 passed = std::abs(quot[0] - 0.25) < 0.001;
4867 if (!passed) {
4868 std::cout << " [FAIL] : in pd_test_arithmetic_series_named_ops() : div failed" << std::endl;
4869 throw std::runtime_error("pd_test_arithmetic_series_named_ops failed: div failed");
4870 }
4871
4872 std::cout << " -> tests passed" << std::endl;
4873 }
4874
4875 void pd_test_arithmetic_series_floordiv_mod() {
div (pd_test_1_all.cpp:4865)
4855 throw std::runtime_error("pd_test_arithmetic_series_named_ops failed: sub failed");
4856 }
4857
4858 auto prod = a.mul(b);
4859 passed = std::abs(prod[0] - 4.0) < 0.001 && std::abs(prod[1] - 10.0) < 0.001;
4860 if (!passed) {
4861 std::cout << " [FAIL] : in pd_test_arithmetic_series_named_ops() : mul failed" << std::endl;
4862 throw std::runtime_error("pd_test_arithmetic_series_named_ops failed: mul failed");
4863 }
4864
4865 auto quot = a.div(b);
4866 passed = std::abs(quot[0] - 0.25) < 0.001;
4867 if (!passed) {
4868 std::cout << " [FAIL] : in pd_test_arithmetic_series_named_ops() : div failed" << std::endl;
4869 throw std::runtime_error("pd_test_arithmetic_series_named_ops failed: div failed");
4870 }
4871
4872 std::cout << " -> tests passed" << std::endl;
4873 }
4874
4875 void pd_test_arithmetic_series_floordiv_mod() {
divide (pd_test_3_all.cpp:555)
545 if (mul_result.size() != 4) {
546 std::cout << " [FAIL] : in pd_test_3_all_series_arithmetic() : multiply() size mismatch" << std::endl;
547 throw std::runtime_error("pd_test_3_all_series_arithmetic failed: multiply()");
548 }
549 // 10*2=20
550 if (std::abs(mul_result[static_cast<size_t>(0)] - 20.0) > 0.001) {
551 std::cout << " [FAIL] : in pd_test_3_all_series_arithmetic() : multiply() value mismatch" << std::endl;
552 throw std::runtime_error("pd_test_3_all_series_arithmetic failed: multiply() value");
553 }
554
555 // Test divide()
556 pandas::Series<numpy::float64> div_result = s1.divide(s2);
557 if (div_result.size() != 4) {
558 std::cout << " [FAIL] : in pd_test_3_all_series_arithmetic() : divide() size mismatch" << std::endl;
559 throw std::runtime_error("pd_test_3_all_series_arithmetic failed: divide()");
560 }
561 // 10/2=5
562 if (std::abs(div_result[static_cast<size_t>(0)] - 5.0) > 0.001) {
563 std::cout << " [FAIL] : in pd_test_3_all_series_arithmetic() : divide() value mismatch" << std::endl;
564 throw std::runtime_error("pd_test_3_all_series_arithmetic failed: divide() value");
565 }
divide (pd_test_3_all.cpp:555)
545 if (mul_result.size() != 4) {
546 std::cout << " [FAIL] : in pd_test_3_all_series_arithmetic() : multiply() size mismatch" << std::endl;
547 throw std::runtime_error("pd_test_3_all_series_arithmetic failed: multiply()");
548 }
549 // 10*2=20
550 if (std::abs(mul_result[static_cast<size_t>(0)] - 20.0) > 0.001) {
551 std::cout << " [FAIL] : in pd_test_3_all_series_arithmetic() : multiply() value mismatch" << std::endl;
552 throw std::runtime_error("pd_test_3_all_series_arithmetic failed: multiply() value");
553 }
554
555 // Test divide()
556 pandas::Series<numpy::float64> div_result = s1.divide(s2);
557 if (div_result.size() != 4) {
558 std::cout << " [FAIL] : in pd_test_3_all_series_arithmetic() : divide() size mismatch" << std::endl;
559 throw std::runtime_error("pd_test_3_all_series_arithmetic failed: divide()");
560 }
561 // 10/2=5
562 if (std::abs(div_result[static_cast<size_t>(0)] - 5.0) > 0.001) {
563 std::cout << " [FAIL] : in pd_test_3_all_series_arithmetic() : divide() value mismatch" << std::endl;
564 throw std::runtime_error("pd_test_3_all_series_arithmetic failed: divide() value");
565 }
dot (pd_test_1_all.cpp:22594)
22584 std::cout << "====================================== [OK] pd_test_all_any test suite ========================== " << std::endl;
22585 return 0;
22586 }
22587
22588} // namespace dataframe_tests
22589// ------------------- pd_test_all_any.cpp (end) -----------------------------
22590
22591// ------------------- pd_test_dot.cpp (start) -----------------------------
22592// dataframe_tests/pd_test_dot.cpp
22593// Test DataFrame.dot() method - matrix multiplication
22594
22595#include <iostream>
22596#include <stdexcept>
22597#include <cmath>
22598#include "../pandas/pd_dataframe.h"
22599
22600// CRITICAL: No using namespace directives
22601
22602namespace dataframe_tests {
22603 namespace dataframe_tests_dot {
dot (pd_test_1_all.cpp:22594)
22584 std::cout << "====================================== [OK] pd_test_all_any test suite ========================== " << std::endl;
22585 return 0;
22586 }
22587
22588} // namespace dataframe_tests
22589// ------------------- pd_test_all_any.cpp (end) -----------------------------
22590
22591// ------------------- pd_test_dot.cpp (start) -----------------------------
22592// dataframe_tests/pd_test_dot.cpp
22593// Test DataFrame.dot() method - matrix multiplication
22594
22595#include <iostream>
22596#include <stdexcept>
22597#include <cmath>
22598#include "../pandas/pd_dataframe.h"
22599
22600// CRITICAL: No using namespace directives
22601
22602namespace dataframe_tests {
22603 namespace dataframe_tests_dot {
floordiv (pd_test_1_all.cpp:4881)
4871 std::cout << " -> tests passed" << std::endl;
4872 }
4873
4874 void pd_test_arithmetic_series_floordiv_mod() {
4875 std::cout << "========= Series floordiv/mod ===================";
4876
4877 pandas::Series<double> a({7.0, 8.0, 9.0});
4878 pandas::Series<double> b({2.0, 3.0, 4.0});
4879
4880 auto fd = a.floordiv(b);
4881 bool passed = std::abs(fd[0] - 3.0) < 0.001; // 7 // 2 = 3
4882 if (!passed) {
4883 std::cout << " [FAIL] : in pd_test_arithmetic_series_floordiv_mod() : floordiv failed" << std::endl;
4884 throw std::runtime_error("pd_test_arithmetic_series_floordiv_mod failed: floordiv failed");
4885 }
4886
4887 auto m = a.mod(b);
4888 passed = std::abs(m[0] - 1.0) < 0.001; // 7 % 2 = 1
4889 if (!passed) {
4890 std::cout << " [FAIL] : in pd_test_arithmetic_series_floordiv_mod() : mod failed" << std::endl;
floordiv (pd_test_1_all.cpp:4881)
4871 std::cout << " -> tests passed" << std::endl;
4872 }
4873
4874 void pd_test_arithmetic_series_floordiv_mod() {
4875 std::cout << "========= Series floordiv/mod ===================";
4876
4877 pandas::Series<double> a({7.0, 8.0, 9.0});
4878 pandas::Series<double> b({2.0, 3.0, 4.0});
4879
4880 auto fd = a.floordiv(b);
4881 bool passed = std::abs(fd[0] - 3.0) < 0.001; // 7 // 2 = 3
4882 if (!passed) {
4883 std::cout << " [FAIL] : in pd_test_arithmetic_series_floordiv_mod() : floordiv failed" << std::endl;
4884 throw std::runtime_error("pd_test_arithmetic_series_floordiv_mod failed: floordiv failed");
4885 }
4886
4887 auto m = a.mod(b);
4888 passed = std::abs(m[0] - 1.0) < 0.001; // 7 % 2 = 1
4889 if (!passed) {
4890 std::cout << " [FAIL] : in pd_test_arithmetic_series_floordiv_mod() : mod failed" << std::endl;
mod (pd_test_1_all.cpp:4888)
4878 pandas::Series<double> a({7.0, 8.0, 9.0});
4879 pandas::Series<double> b({2.0, 3.0, 4.0});
4880
4881 auto fd = a.floordiv(b);
4882 bool passed = std::abs(fd[0] - 3.0) < 0.001; // 7 // 2 = 3
4883 if (!passed) {
4884 std::cout << " [FAIL] : in pd_test_arithmetic_series_floordiv_mod() : floordiv failed" << std::endl;
4885 throw std::runtime_error("pd_test_arithmetic_series_floordiv_mod failed: floordiv failed");
4886 }
4887
4888 auto m = a.mod(b);
4889 passed = std::abs(m[0] - 1.0) < 0.001; // 7 % 2 = 1
4890 if (!passed) {
4891 std::cout << " [FAIL] : in pd_test_arithmetic_series_floordiv_mod() : mod failed" << std::endl;
4892 throw std::runtime_error("pd_test_arithmetic_series_floordiv_mod failed: mod failed");
4893 }
4894
4895 // Scalar operations
4896 auto fd_scalar = a.floordiv(2.0);
4897 passed = std::abs(fd_scalar[0] - 3.0) < 0.001 && std::abs(fd_scalar[1] - 4.0) < 0.001;
4898 if (!passed) {
mod (pd_test_1_all.cpp:4888)
4878 pandas::Series<double> a({7.0, 8.0, 9.0});
4879 pandas::Series<double> b({2.0, 3.0, 4.0});
4880
4881 auto fd = a.floordiv(b);
4882 bool passed = std::abs(fd[0] - 3.0) < 0.001; // 7 // 2 = 3
4883 if (!passed) {
4884 std::cout << " [FAIL] : in pd_test_arithmetic_series_floordiv_mod() : floordiv failed" << std::endl;
4885 throw std::runtime_error("pd_test_arithmetic_series_floordiv_mod failed: floordiv failed");
4886 }
4887
4888 auto m = a.mod(b);
4889 passed = std::abs(m[0] - 1.0) < 0.001; // 7 % 2 = 1
4890 if (!passed) {
4891 std::cout << " [FAIL] : in pd_test_arithmetic_series_floordiv_mod() : mod failed" << std::endl;
4892 throw std::runtime_error("pd_test_arithmetic_series_floordiv_mod failed: mod failed");
4893 }
4894
4895 // Scalar operations
4896 auto fd_scalar = a.floordiv(2.0);
4897 passed = std::abs(fd_scalar[0] - 3.0) < 0.001 && std::abs(fd_scalar[1] - 4.0) < 0.001;
4898 if (!passed) {
mul (pd_test_1_all.cpp:4858)
4848 throw std::runtime_error("pd_test_arithmetic_series_named_ops failed: add failed");
4849 }
4850
4851 auto diff = a.sub(b);
4852 passed = std::abs(diff[0] - (-3.0)) < 0.001;
4853 if (!passed) {
4854 std::cout << " [FAIL] : in pd_test_arithmetic_series_named_ops() : sub failed" << std::endl;
4855 throw std::runtime_error("pd_test_arithmetic_series_named_ops failed: sub failed");
4856 }
4857
4858 auto prod = a.mul(b);
4859 passed = std::abs(prod[0] - 4.0) < 0.001 && std::abs(prod[1] - 10.0) < 0.001;
4860 if (!passed) {
4861 std::cout << " [FAIL] : in pd_test_arithmetic_series_named_ops() : mul failed" << std::endl;
4862 throw std::runtime_error("pd_test_arithmetic_series_named_ops failed: mul failed");
4863 }
4864
4865 auto quot = a.div(b);
4866 passed = std::abs(quot[0] - 0.25) < 0.001;
4867 if (!passed) {
4868 std::cout << " [FAIL] : in pd_test_arithmetic_series_named_ops() : div failed" << std::endl;
mul (pd_test_1_all.cpp:4858)
4848 throw std::runtime_error("pd_test_arithmetic_series_named_ops failed: add failed");
4849 }
4850
4851 auto diff = a.sub(b);
4852 passed = std::abs(diff[0] - (-3.0)) < 0.001;
4853 if (!passed) {
4854 std::cout << " [FAIL] : in pd_test_arithmetic_series_named_ops() : sub failed" << std::endl;
4855 throw std::runtime_error("pd_test_arithmetic_series_named_ops failed: sub failed");
4856 }
4857
4858 auto prod = a.mul(b);
4859 passed = std::abs(prod[0] - 4.0) < 0.001 && std::abs(prod[1] - 10.0) < 0.001;
4860 if (!passed) {
4861 std::cout << " [FAIL] : in pd_test_arithmetic_series_named_ops() : mul failed" << std::endl;
4862 throw std::runtime_error("pd_test_arithmetic_series_named_ops failed: mul failed");
4863 }
4864
4865 auto quot = a.div(b);
4866 passed = std::abs(quot[0] - 0.25) < 0.001;
4867 if (!passed) {
4868 std::cout << " [FAIL] : in pd_test_arithmetic_series_named_ops() : div failed" << std::endl;
mul (pd_test_1_all.cpp:4858)
4848 throw std::runtime_error("pd_test_arithmetic_series_named_ops failed: add failed");
4849 }
4850
4851 auto diff = a.sub(b);
4852 passed = std::abs(diff[0] - (-3.0)) < 0.001;
4853 if (!passed) {
4854 std::cout << " [FAIL] : in pd_test_arithmetic_series_named_ops() : sub failed" << std::endl;
4855 throw std::runtime_error("pd_test_arithmetic_series_named_ops failed: sub failed");
4856 }
4857
4858 auto prod = a.mul(b);
4859 passed = std::abs(prod[0] - 4.0) < 0.001 && std::abs(prod[1] - 10.0) < 0.001;
4860 if (!passed) {
4861 std::cout << " [FAIL] : in pd_test_arithmetic_series_named_ops() : mul failed" << std::endl;
4862 throw std::runtime_error("pd_test_arithmetic_series_named_ops failed: mul failed");
4863 }
4864
4865 auto quot = a.div(b);
4866 passed = std::abs(quot[0] - 0.25) < 0.001;
4867 if (!passed) {
4868 std::cout << " [FAIL] : in pd_test_arithmetic_series_named_ops() : div failed" << std::endl;
mul (pd_test_1_all.cpp:4858)
4848 throw std::runtime_error("pd_test_arithmetic_series_named_ops failed: add failed");
4849 }
4850
4851 auto diff = a.sub(b);
4852 passed = std::abs(diff[0] - (-3.0)) < 0.001;
4853 if (!passed) {
4854 std::cout << " [FAIL] : in pd_test_arithmetic_series_named_ops() : sub failed" << std::endl;
4855 throw std::runtime_error("pd_test_arithmetic_series_named_ops failed: sub failed");
4856 }
4857
4858 auto prod = a.mul(b);
4859 passed = std::abs(prod[0] - 4.0) < 0.001 && std::abs(prod[1] - 10.0) < 0.001;
4860 if (!passed) {
4861 std::cout << " [FAIL] : in pd_test_arithmetic_series_named_ops() : mul failed" << std::endl;
4862 throw std::runtime_error("pd_test_arithmetic_series_named_ops failed: mul failed");
4863 }
4864
4865 auto quot = a.div(b);
4866 passed = std::abs(quot[0] - 0.25) < 0.001;
4867 if (!passed) {
4868 std::cout << " [FAIL] : in pd_test_arithmetic_series_named_ops() : div failed" << std::endl;
multiindex (pd_test_1_all.cpp:27024)
27014 pandas::DataFrame df(data);
27015
27016 auto result = df.value_counts();
27017 auto& counts = std::get<pandas::Series<numpy::int64>>(result);
27018
27019 if (!counts.has_multiindex()) {
27020 std::cout << " [FAIL] : expected MultiIndex" << std::endl;
27021 throw std::runtime_error("pd_test_value_counts_multiindex_levels failed: no multiindex");
27022 }
27023
27024 const auto& midx = counts.multiindex();
27025
27026 // Should have 2 levels
27027 if (midx.nlevels() != 2) {
27028 std::cout << " [FAIL] : expected 2 levels, got " << midx.nlevels() << std::endl;
27029 throw std::runtime_error("pd_test_value_counts_multiindex_levels failed: wrong nlevels");
27030 }
27031
27032 std::cout << " -> tests passed" << std::endl;
27033 }
multiply (pd_test_3_all.cpp:543)
533 if (sub_result.size() != 4) {
534 std::cout << " [FAIL] : in pd_test_3_all_series_arithmetic() : subtract() size mismatch" << std::endl;
535 throw std::runtime_error("pd_test_3_all_series_arithmetic failed: subtract()");
536 }
537 // 10-2=8
538 if (std::abs(sub_result[static_cast<size_t>(0)] - 8.0) > 0.001) {
539 std::cout << " [FAIL] : in pd_test_3_all_series_arithmetic() : subtract() value mismatch" << std::endl;
540 throw std::runtime_error("pd_test_3_all_series_arithmetic failed: subtract() value");
541 }
542
543 // Test multiply()
544 pandas::Series<double> mul_result = s1.multiply(s2);
545 if (mul_result.size() != 4) {
546 std::cout << " [FAIL] : in pd_test_3_all_series_arithmetic() : multiply() size mismatch" << std::endl;
547 throw std::runtime_error("pd_test_3_all_series_arithmetic failed: multiply()");
548 }
549 // 10*2=20
550 if (std::abs(mul_result[static_cast<size_t>(0)] - 20.0) > 0.001) {
551 std::cout << " [FAIL] : in pd_test_3_all_series_arithmetic() : multiply() value mismatch" << std::endl;
552 throw std::runtime_error("pd_test_3_all_series_arithmetic failed: multiply() value");
553 }
multiply (pd_test_3_all.cpp:543)
533 if (sub_result.size() != 4) {
534 std::cout << " [FAIL] : in pd_test_3_all_series_arithmetic() : subtract() size mismatch" << std::endl;
535 throw std::runtime_error("pd_test_3_all_series_arithmetic failed: subtract()");
536 }
537 // 10-2=8
538 if (std::abs(sub_result[static_cast<size_t>(0)] - 8.0) > 0.001) {
539 std::cout << " [FAIL] : in pd_test_3_all_series_arithmetic() : subtract() value mismatch" << std::endl;
540 throw std::runtime_error("pd_test_3_all_series_arithmetic failed: subtract() value");
541 }
542
543 // Test multiply()
544 pandas::Series<double> mul_result = s1.multiply(s2);
545 if (mul_result.size() != 4) {
546 std::cout << " [FAIL] : in pd_test_3_all_series_arithmetic() : multiply() size mismatch" << std::endl;
547 throw std::runtime_error("pd_test_3_all_series_arithmetic failed: multiply()");
548 }
549 // 10*2=20
550 if (std::abs(mul_result[static_cast<size_t>(0)] - 20.0) > 0.001) {
551 std::cout << " [FAIL] : in pd_test_3_all_series_arithmetic() : multiply() value mismatch" << std::endl;
552 throw std::runtime_error("pd_test_3_all_series_arithmetic failed: multiply() value");
553 }
pow (pd_test_1_all.cpp:4911)
4901 }
4902
4903 std::cout << " -> tests passed" << std::endl;
4904 }
4905
4906 void pd_test_arithmetic_series_pow() {
4907 std::cout << "========= Series pow ============================";
4908
4909 pandas::Series<double> a({2.0, 3.0, 4.0});
4910
4911 auto p = a.pow(2.0);
4912 bool passed = std::abs(p[0] - 4.0) < 0.001 && std::abs(p[1] - 9.0) < 0.001 && std::abs(p[2] - 16.0) < 0.001;
4913 if (!passed) {
4914 std::cout << " [FAIL] : in pd_test_arithmetic_series_pow() : pow scalar failed" << std::endl;
4915 throw std::runtime_error("pd_test_arithmetic_series_pow failed: pow scalar failed");
4916 }
4917
4918 // Series pow Series
4919 pandas::Series<double> exp({1.0, 2.0, 0.5});
4920 auto p2 = a.pow(exp);
4921 passed = std::abs(p2[0] - 2.0) < 0.001 && std::abs(p2[1] - 9.0) < 0.001; // 3^2=9
pow (pd_test_1_all.cpp:4911)
4901 }
4902
4903 std::cout << " -> tests passed" << std::endl;
4904 }
4905
4906 void pd_test_arithmetic_series_pow() {
4907 std::cout << "========= Series pow ============================";
4908
4909 pandas::Series<double> a({2.0, 3.0, 4.0});
4910
4911 auto p = a.pow(2.0);
4912 bool passed = std::abs(p[0] - 4.0) < 0.001 && std::abs(p[1] - 9.0) < 0.001 && std::abs(p[2] - 16.0) < 0.001;
4913 if (!passed) {
4914 std::cout << " [FAIL] : in pd_test_arithmetic_series_pow() : pow scalar failed" << std::endl;
4915 throw std::runtime_error("pd_test_arithmetic_series_pow failed: pow scalar failed");
4916 }
4917
4918 // Series pow Series
4919 pandas::Series<double> exp({1.0, 2.0, 0.5});
4920 auto p2 = a.pow(exp);
4921 passed = std::abs(p2[0] - 2.0) < 0.001 && std::abs(p2[1] - 9.0) < 0.001; // 3^2=9
radd (pd_test_2_all.cpp:7440)
7430 if (std::isinf(a) && std::isinf(b)) return (a > 0) == (b > 0);
7431 return std::abs(a - b) < tol;
7432 }
7433
7434 // Helper to get double value from DataFrame at position
7435 double get_val(const pandas::DataFrame& df, size_t row, size_t col) {
7436 return df.iloc<numpy::float64>(row, col);
7437 }
7438
7439 void pd_test_radd_scalar() {
7440 std::cout << "========= radd() with scalar =====================";
7441
7442 // Create DataFrame: angles=[0, 3, 4], degrees=[360, 180, 360]
7443 std::map<std::string, std::vector<double>> data = {
7444 {"angles", {0.0, 3.0, 4.0}},
7445 {"degrees", {360.0, 180.0, 360.0}}
7446 };
7447 pandas::DataFrame df(data);
7448
7449 // df.radd(1) should be equivalent to 1 + df
7450 pandas::DataFrame result = df.radd(1.0);
radd (pd_test_2_all.cpp:7440)
7430 if (std::isinf(a) && std::isinf(b)) return (a > 0) == (b > 0);
7431 return std::abs(a - b) < tol;
7432 }
7433
7434 // Helper to get double value from DataFrame at position
7435 double get_val(const pandas::DataFrame& df, size_t row, size_t col) {
7436 return df.iloc<numpy::float64>(row, col);
7437 }
7438
7439 void pd_test_radd_scalar() {
7440 std::cout << "========= radd() with scalar =====================";
7441
7442 // Create DataFrame: angles=[0, 3, 4], degrees=[360, 180, 360]
7443 std::map<std::string, std::vector<double>> data = {
7444 {"angles", {0.0, 3.0, 4.0}},
7445 {"degrees", {360.0, 180.0, 360.0}}
7446 };
7447 pandas::DataFrame df(data);
7448
7449 // df.radd(1) should be equivalent to 1 + df
7450 pandas::DataFrame result = df.radd(1.0);
radd (pd_test_2_all.cpp:7440)
7430 if (std::isinf(a) && std::isinf(b)) return (a > 0) == (b > 0);
7431 return std::abs(a - b) < tol;
7432 }
7433
7434 // Helper to get double value from DataFrame at position
7435 double get_val(const pandas::DataFrame& df, size_t row, size_t col) {
7436 return df.iloc<numpy::float64>(row, col);
7437 }
7438
7439 void pd_test_radd_scalar() {
7440 std::cout << "========= radd() with scalar =====================";
7441
7442 // Create DataFrame: angles=[0, 3, 4], degrees=[360, 180, 360]
7443 std::map<std::string, std::vector<double>> data = {
7444 {"angles", {0.0, 3.0, 4.0}},
7445 {"degrees", {360.0, 180.0, 360.0}}
7446 };
7447 pandas::DataFrame df(data);
7448
7449 // df.radd(1) should be equivalent to 1 + df
7450 pandas::DataFrame result = df.radd(1.0);
rdiv (pd_test_2_all.cpp:7713)
7703 }
7704
7705 if (!passed) {
7706 throw std::runtime_error("pd_test_rmul_with_fill_value failed");
7707 }
7708
7709 std::cout << " -> tests passed" << std::endl;
7710 }
7711
7712 void pd_test_rdiv_scalar() {
7713 std::cout << "========= rdiv() with scalar =====================";
7714
7715 // From pandas docs example: df.rdiv(10) divides 10 BY the dataframe
7716 std::map<std::string, std::vector<double>> data = {
7717 {"angles", {0.0, 3.0, 4.0}},
7718 {"degrees", {360.0, 180.0, 360.0}}
7719 };
7720 pandas::DataFrame df(data);
7721
7722 // df.rdiv(10) = 10 / df
7723 pandas::DataFrame result = df.rdiv(10.0);
rdiv (pd_test_2_all.cpp:7713)
7703 }
7704
7705 if (!passed) {
7706 throw std::runtime_error("pd_test_rmul_with_fill_value failed");
7707 }
7708
7709 std::cout << " -> tests passed" << std::endl;
7710 }
7711
7712 void pd_test_rdiv_scalar() {
7713 std::cout << "========= rdiv() with scalar =====================";
7714
7715 // From pandas docs example: df.rdiv(10) divides 10 BY the dataframe
7716 std::map<std::string, std::vector<double>> data = {
7717 {"angles", {0.0, 3.0, 4.0}},
7718 {"degrees", {360.0, 180.0, 360.0}}
7719 };
7720 pandas::DataFrame df(data);
7721
7722 // df.rdiv(10) = 10 / df
7723 pandas::DataFrame result = df.rdiv(10.0);
rdiv (pd_test_2_all.cpp:7713)
7703 }
7704
7705 if (!passed) {
7706 throw std::runtime_error("pd_test_rmul_with_fill_value failed");
7707 }
7708
7709 std::cout << " -> tests passed" << std::endl;
7710 }
7711
7712 void pd_test_rdiv_scalar() {
7713 std::cout << "========= rdiv() with scalar =====================";
7714
7715 // From pandas docs example: df.rdiv(10) divides 10 BY the dataframe
7716 std::map<std::string, std::vector<double>> data = {
7717 {"angles", {0.0, 3.0, 4.0}},
7718 {"degrees", {360.0, 180.0, 360.0}}
7719 };
7720 pandas::DataFrame df(data);
7721
7722 // df.rdiv(10) = 10 / df
7723 pandas::DataFrame result = df.rdiv(10.0);
rfloordiv (pd_test_2_all.cpp:7909)
7899 }
7900
7901 if (!passed) {
7902 throw std::runtime_error("pd_test_rtruediv_with_fill_value failed");
7903 }
7904
7905 std::cout << " -> tests passed" << std::endl;
7906 }
7907
7908 void pd_test_rfloordiv_scalar() {
7909 std::cout << "========= rfloordiv() with scalar ================";
7910
7911 std::map<std::string, std::vector<double>> data = {
7912 {"A", {3.0, 4.0}},
7913 {"B", {7.0, 8.0}}
7914 };
7915 pandas::DataFrame df(data);
7916
7917 // df.rfloordiv(10) = 10 // df (floor division)
7918 pandas::DataFrame result = df.rfloordiv(10.0);
rfloordiv (pd_test_2_all.cpp:7909)
7899 }
7900
7901 if (!passed) {
7902 throw std::runtime_error("pd_test_rtruediv_with_fill_value failed");
7903 }
7904
7905 std::cout << " -> tests passed" << std::endl;
7906 }
7907
7908 void pd_test_rfloordiv_scalar() {
7909 std::cout << "========= rfloordiv() with scalar ================";
7910
7911 std::map<std::string, std::vector<double>> data = {
7912 {"A", {3.0, 4.0}},
7913 {"B", {7.0, 8.0}}
7914 };
7915 pandas::DataFrame df(data);
7916
7917 // df.rfloordiv(10) = 10 // df (floor division)
7918 pandas::DataFrame result = df.rfloordiv(10.0);
rmod (pd_test_2_all.cpp:8121)
8111 }
8112
8113 if (!passed) {
8114 throw std::runtime_error("pd_test_rfloordiv_division_by_zero failed");
8115 }
8116
8117 std::cout << " -> tests passed" << std::endl;
8118 }
8119
8120 void pd_test_rmod_scalar() {
8121 std::cout << "========= rmod() with scalar =====================";
8122
8123 std::map<std::string, std::vector<double>> data = {
8124 {"A", {3.0, 4.0}}
8125 };
8126 pandas::DataFrame df(data);
8127
8128 // df.rmod(10) = 10 % df
8129 pandas::DataFrame result = df.rmod(10.0);
8130
8131 bool passed = true;
rmod (pd_test_2_all.cpp:8121)
8111 }
8112
8113 if (!passed) {
8114 throw std::runtime_error("pd_test_rfloordiv_division_by_zero failed");
8115 }
8116
8117 std::cout << " -> tests passed" << std::endl;
8118 }
8119
8120 void pd_test_rmod_scalar() {
8121 std::cout << "========= rmod() with scalar =====================";
8122
8123 std::map<std::string, std::vector<double>> data = {
8124 {"A", {3.0, 4.0}}
8125 };
8126 pandas::DataFrame df(data);
8127
8128 // df.rmod(10) = 10 % df
8129 pandas::DataFrame result = df.rmod(10.0);
8130
8131 bool passed = true;
rmul (pd_test_2_all.cpp:7591)
7581 }
7582
7583 if (!passed) {
7584 throw std::runtime_error("pd_test_rsub_dataframe failed");
7585 }
7586
7587 std::cout << " -> tests passed" << std::endl;
7588 }
7589
7590 void pd_test_rmul_scalar() {
7591 std::cout << "========= rmul() with scalar =====================";
7592
7593 std::map<std::string, std::vector<double>> data = {
7594 {"A", {2.0, 3.0}},
7595 {"B", {4.0, 5.0}}
7596 };
7597 pandas::DataFrame df(data);
7598
7599 // df.rmul(10) = 10 * df
7600 pandas::DataFrame result = df.rmul(10.0);
rmul (pd_test_2_all.cpp:7591)
7581 }
7582
7583 if (!passed) {
7584 throw std::runtime_error("pd_test_rsub_dataframe failed");
7585 }
7586
7587 std::cout << " -> tests passed" << std::endl;
7588 }
7589
7590 void pd_test_rmul_scalar() {
7591 std::cout << "========= rmul() with scalar =====================";
7592
7593 std::map<std::string, std::vector<double>> data = {
7594 {"A", {2.0, 3.0}},
7595 {"B", {4.0, 5.0}}
7596 };
7597 pandas::DataFrame df(data);
7598
7599 // df.rmul(10) = 10 * df
7600 pandas::DataFrame result = df.rmul(10.0);
rmul (pd_test_2_all.cpp:7591)
7581 }
7582
7583 if (!passed) {
7584 throw std::runtime_error("pd_test_rsub_dataframe failed");
7585 }
7586
7587 std::cout << " -> tests passed" << std::endl;
7588 }
7589
7590 void pd_test_rmul_scalar() {
7591 std::cout << "========= rmul() with scalar =====================";
7592
7593 std::map<std::string, std::vector<double>> data = {
7594 {"A", {2.0, 3.0}},
7595 {"B", {4.0, 5.0}}
7596 };
7597 pandas::DataFrame df(data);
7598
7599 // df.rmul(10) = 10 * df
7600 pandas::DataFrame result = df.rmul(10.0);
rpow (pd_test_2_all.cpp:8327)
8317 }
8318
8319 if (!passed) {
8320 throw std::runtime_error("pd_test_rmod_modulo_by_zero failed");
8321 }
8322
8323 std::cout << " -> tests passed" << std::endl;
8324 }
8325
8326 void pd_test_rpow_scalar() {
8327 std::cout << "========= rpow() with scalar =====================";
8328
8329 std::map<std::string, std::vector<double>> data = {
8330 {"A", {2.0, 3.0}},
8331 {"B", {0.0, 1.0}}
8332 };
8333 pandas::DataFrame df(data);
8334
8335 // df.rpow(2) = 2 ** df
8336 pandas::DataFrame result = df.rpow(2.0);
rpow (pd_test_2_all.cpp:8327)
8317 }
8318
8319 if (!passed) {
8320 throw std::runtime_error("pd_test_rmod_modulo_by_zero failed");
8321 }
8322
8323 std::cout << " -> tests passed" << std::endl;
8324 }
8325
8326 void pd_test_rpow_scalar() {
8327 std::cout << "========= rpow() with scalar =====================";
8328
8329 std::map<std::string, std::vector<double>> data = {
8330 {"A", {2.0, 3.0}},
8331 {"B", {0.0, 1.0}}
8332 };
8333 pandas::DataFrame df(data);
8334
8335 // df.rpow(2) = 2 ** df
8336 pandas::DataFrame result = df.rpow(2.0);
rsub (pd_test_2_all.cpp:7520)
7510 }
7511
7512 if (!passed) {
7513 throw std::runtime_error("pd_test_radd_dataframe failed");
7514 }
7515
7516 std::cout << " -> tests passed" << std::endl;
7517 }
7518
7519 void pd_test_rsub_scalar() {
7520 std::cout << "========= rsub() with scalar =====================";
7521
7522 std::map<std::string, std::vector<double>> data = {
7523 {"A", {1.0, 2.0, 3.0}},
7524 {"B", {4.0, 5.0, 6.0}}
7525 };
7526 pandas::DataFrame df(data);
7527
7528 // df.rsub(10) = 10 - df
7529 pandas::DataFrame result = df.rsub(10.0);
rsub (pd_test_2_all.cpp:7520)
7510 }
7511
7512 if (!passed) {
7513 throw std::runtime_error("pd_test_radd_dataframe failed");
7514 }
7515
7516 std::cout << " -> tests passed" << std::endl;
7517 }
7518
7519 void pd_test_rsub_scalar() {
7520 std::cout << "========= rsub() with scalar =====================";
7521
7522 std::map<std::string, std::vector<double>> data = {
7523 {"A", {1.0, 2.0, 3.0}},
7524 {"B", {4.0, 5.0, 6.0}}
7525 };
7526 pandas::DataFrame df(data);
7527
7528 // df.rsub(10) = 10 - df
7529 pandas::DataFrame result = df.rsub(10.0);
rsub (pd_test_2_all.cpp:7520)
7510 }
7511
7512 if (!passed) {
7513 throw std::runtime_error("pd_test_radd_dataframe failed");
7514 }
7515
7516 std::cout << " -> tests passed" << std::endl;
7517 }
7518
7519 void pd_test_rsub_scalar() {
7520 std::cout << "========= rsub() with scalar =====================";
7521
7522 std::map<std::string, std::vector<double>> data = {
7523 {"A", {1.0, 2.0, 3.0}},
7524 {"B", {4.0, 5.0, 6.0}}
7525 };
7526 pandas::DataFrame df(data);
7527
7528 // df.rsub(10) = 10 - df
7529 pandas::DataFrame result = df.rsub(10.0);
rtruediv (pd_test_2_all.cpp:7795)
7785 }
7786
7787 if (!passed) {
7788 throw std::runtime_error("pd_test_rdiv_dataframe failed");
7789 }
7790
7791 std::cout << " -> tests passed" << std::endl;
7792 }
7793
7794 void pd_test_rtruediv_scalar() {
7795 std::cout << "========= rtruediv() with scalar =================";
7796
7797 std::map<std::string, std::vector<double>> data = {
7798 {"A", {2.0, 4.0}}
7799 };
7800 pandas::DataFrame df(data);
7801
7802 // rtruediv is alias for rdiv
7803 pandas::DataFrame result = df.rtruediv(10.0);
7804
7805 bool passed = true;
rtruediv (pd_test_2_all.cpp:7795)
7785 }
7786
7787 if (!passed) {
7788 throw std::runtime_error("pd_test_rdiv_dataframe failed");
7789 }
7790
7791 std::cout << " -> tests passed" << std::endl;
7792 }
7793
7794 void pd_test_rtruediv_scalar() {
7795 std::cout << "========= rtruediv() with scalar =================";
7796
7797 std::map<std::string, std::vector<double>> data = {
7798 {"A", {2.0, 4.0}}
7799 };
7800 pandas::DataFrame df(data);
7801
7802 // rtruediv is alias for rdiv
7803 pandas::DataFrame result = df.rtruediv(10.0);
7804
7805 bool passed = true;
sub (pd_test_1_all.cpp:4851)
4841 pandas::Series<double> a({1.0, 2.0, 3.0});
4842 pandas::Series<double> b({4.0, 5.0, 6.0});
4843
4844 auto sum = a.add(b);
4845 bool passed = std::abs(sum[0] - 5.0) < 0.001 && std::abs(sum[1] - 7.0) < 0.001;
4846 if (!passed) {
4847 std::cout << " [FAIL] : in pd_test_arithmetic_series_named_ops() : add failed" << std::endl;
4848 throw std::runtime_error("pd_test_arithmetic_series_named_ops failed: add failed");
4849 }
4850
4851 auto diff = a.sub(b);
4852 passed = std::abs(diff[0] - (-3.0)) < 0.001;
4853 if (!passed) {
4854 std::cout << " [FAIL] : in pd_test_arithmetic_series_named_ops() : sub failed" << std::endl;
4855 throw std::runtime_error("pd_test_arithmetic_series_named_ops failed: sub failed");
4856 }
4857
4858 auto prod = a.mul(b);
4859 passed = std::abs(prod[0] - 4.0) < 0.001 && std::abs(prod[1] - 10.0) < 0.001;
4860 if (!passed) {
4861 std::cout << " [FAIL] : in pd_test_arithmetic_series_named_ops() : mul failed" << std::endl;
sub (pd_test_1_all.cpp:4851)
4841 pandas::Series<double> a({1.0, 2.0, 3.0});
4842 pandas::Series<double> b({4.0, 5.0, 6.0});
4843
4844 auto sum = a.add(b);
4845 bool passed = std::abs(sum[0] - 5.0) < 0.001 && std::abs(sum[1] - 7.0) < 0.001;
4846 if (!passed) {
4847 std::cout << " [FAIL] : in pd_test_arithmetic_series_named_ops() : add failed" << std::endl;
4848 throw std::runtime_error("pd_test_arithmetic_series_named_ops failed: add failed");
4849 }
4850
4851 auto diff = a.sub(b);
4852 passed = std::abs(diff[0] - (-3.0)) < 0.001;
4853 if (!passed) {
4854 std::cout << " [FAIL] : in pd_test_arithmetic_series_named_ops() : sub failed" << std::endl;
4855 throw std::runtime_error("pd_test_arithmetic_series_named_ops failed: sub failed");
4856 }
4857
4858 auto prod = a.mul(b);
4859 passed = std::abs(prod[0] - 4.0) < 0.001 && std::abs(prod[1] - 10.0) < 0.001;
4860 if (!passed) {
4861 std::cout << " [FAIL] : in pd_test_arithmetic_series_named_ops() : mul failed" << std::endl;
sub (pd_test_1_all.cpp:4851)
4841 pandas::Series<double> a({1.0, 2.0, 3.0});
4842 pandas::Series<double> b({4.0, 5.0, 6.0});
4843
4844 auto sum = a.add(b);
4845 bool passed = std::abs(sum[0] - 5.0) < 0.001 && std::abs(sum[1] - 7.0) < 0.001;
4846 if (!passed) {
4847 std::cout << " [FAIL] : in pd_test_arithmetic_series_named_ops() : add failed" << std::endl;
4848 throw std::runtime_error("pd_test_arithmetic_series_named_ops failed: add failed");
4849 }
4850
4851 auto diff = a.sub(b);
4852 passed = std::abs(diff[0] - (-3.0)) < 0.001;
4853 if (!passed) {
4854 std::cout << " [FAIL] : in pd_test_arithmetic_series_named_ops() : sub failed" << std::endl;
4855 throw std::runtime_error("pd_test_arithmetic_series_named_ops failed: sub failed");
4856 }
4857
4858 auto prod = a.mul(b);
4859 passed = std::abs(prod[0] - 4.0) < 0.001 && std::abs(prod[1] - 10.0) < 0.001;
4860 if (!passed) {
4861 std::cout << " [FAIL] : in pd_test_arithmetic_series_named_ops() : mul failed" << std::endl;
subtract (pd_test_3_all.cpp:531)
521// ============================================================================
522
523void pd_test_3_all_series_arithmetic() {
524 std::cout << "========= Series.subtract/multiply/divide/truediv() =";
525
526 std::vector<double> vals1 = {10.0, 20.0, 30.0, 40.0};
527 std::vector<double> vals2 = {2.0, 4.0, 6.0, 8.0};
528 pandas::Series<double> s1(vals1, "s1");
529 pandas::Series<double> s2(vals2, "s2");
530
531 // Test subtract()
532 pandas::Series<double> sub_result = s1.subtract(s2);
533 if (sub_result.size() != 4) {
534 std::cout << " [FAIL] : in pd_test_3_all_series_arithmetic() : subtract() size mismatch" << std::endl;
535 throw std::runtime_error("pd_test_3_all_series_arithmetic failed: subtract()");
536 }
537 // 10-2=8
538 if (std::abs(sub_result[static_cast<size_t>(0)] - 8.0) > 0.001) {
539 std::cout << " [FAIL] : in pd_test_3_all_series_arithmetic() : subtract() value mismatch" << std::endl;
540 throw std::runtime_error("pd_test_3_all_series_arithmetic failed: subtract() value");
541 }
subtract (pd_test_3_all.cpp:531)
521// ============================================================================
522
523void pd_test_3_all_series_arithmetic() {
524 std::cout << "========= Series.subtract/multiply/divide/truediv() =";
525
526 std::vector<double> vals1 = {10.0, 20.0, 30.0, 40.0};
527 std::vector<double> vals2 = {2.0, 4.0, 6.0, 8.0};
528 pandas::Series<double> s1(vals1, "s1");
529 pandas::Series<double> s2(vals2, "s2");
530
531 // Test subtract()
532 pandas::Series<double> sub_result = s1.subtract(s2);
533 if (sub_result.size() != 4) {
534 std::cout << " [FAIL] : in pd_test_3_all_series_arithmetic() : subtract() size mismatch" << std::endl;
535 throw std::runtime_error("pd_test_3_all_series_arithmetic failed: subtract()");
536 }
537 // 10-2=8
538 if (std::abs(sub_result[static_cast<size_t>(0)] - 8.0) > 0.001) {
539 std::cout << " [FAIL] : in pd_test_3_all_series_arithmetic() : subtract() value mismatch" << std::endl;
540 throw std::runtime_error("pd_test_3_all_series_arithmetic failed: subtract() value");
541 }
truediv (pd_test_3_all.cpp:524)
514 }
515
516 std::cout << " -> tests passed" << std::endl;
517}
518
519// ============================================================================
520// Category 3: Series Arithmetic Operations
521// ============================================================================
522
523void pd_test_3_all_series_arithmetic() {
524 std::cout << "========= Series.subtract/multiply/divide/truediv() =";
525
526 std::vector<double> vals1 = {10.0, 20.0, 30.0, 40.0};
527 std::vector<double> vals2 = {2.0, 4.0, 6.0, 8.0};
528 pandas::Series<double> s1(vals1, "s1");
529 pandas::Series<double> s2(vals2, "s2");
530
531 // Test subtract()
532 pandas::Series<double> sub_result = s1.subtract(s2);
533 if (sub_result.size() != 4) {
534 std::cout << " [FAIL] : in pd_test_3_all_series_arithmetic() : subtract() size mismatch" << std::endl;
truediv (pd_test_3_all.cpp:524)
514 }
515
516 std::cout << " -> tests passed" << std::endl;
517}
518
519// ============================================================================
520// Category 3: Series Arithmetic Operations
521// ============================================================================
522
523void pd_test_3_all_series_arithmetic() {
524 std::cout << "========= Series.subtract/multiply/divide/truediv() =";
525
526 std::vector<double> vals1 = {10.0, 20.0, 30.0, 40.0};
527 std::vector<double> vals2 = {2.0, 4.0, 6.0, 8.0};
528 pandas::Series<double> s1(vals1, "s1");
529 pandas::Series<double> s2(vals2, "s2");
530
531 // Test subtract()
532 pandas::Series<double> sub_result = s1.subtract(s2);
533 if (sub_result.size() != 4) {
534 std::cout << " [FAIL] : in pd_test_3_all_series_arithmetic() : subtract() size mismatch" << std::endl;
compare (pd_test_1_all.cpp:13989)
13979 if (!approx_equal(std::stod(b_col.get_value_str(0)), 10.0)) {
13980 passed = false;
13981 std::cout << " [FAIL] : in pd_test_joining_update() : column B was changed" << std::endl;
13982 throw std::runtime_error("pd_test_joining_update failed: B changed");
13983 }
13984
13985 std::cout << " -> tests passed" << std::endl;
13986 }
13987
13988 // =====================================================================
13989 // compare() Tests
13990 // =====================================================================
13991
13992 void pd_test_joining_compare() {
13993 std::cout << "========= compare =====================================";
13994
13995 std::map<std::string, std::vector<double>> left_data = {
13996 {"A", {1.0, 2.0, 3.0}},
13997 {"B", {10.0, 20.0, 30.0}}
13998 };
13999 pandas::DataFrame left(left_data);
eq (pd_test_2_all.cpp:19680)
19670 std::vector<pandas::Series<numpy::float64>> cols;
19671 cols.push_back(pandas::Series<numpy::float64>({1.0, 2.0}, "A"));
19672 cols.push_back(pandas::Series<numpy::float64>({3.0, 3.0}, "B"));
19673 pandas::DataFrame df(cols, {"A", "B"});
19674
19675 pandas::Series<numpy::float64> s({1.0, 3.0}, "vals");
19676 s.set_index(std::make_unique<pandas::Index<std::string>>(
19677 std::vector<std::string>{"A", "B"}));
19678
19679 auto result = df.eq(s, 1);
19680
19681 check(approx(result["A"].get_value_double(0), 1.0), "eq_A_r0_true");
19682 check(approx(result["A"].get_value_double(1), 0.0), "eq_A_r1_false");
19683 check(approx(result["B"].get_value_double(0), 1.0), "eq_B_r0_true");
19684 check(approx(result["B"].get_value_double(1), 1.0), "eq_B_r1_true");
19685}
19686
19687// Test 5: mul scalar broadcast (verify existing behavior still works)
19688void pd_test_broadcasting_mul_scalar() {
19689 std::cout << " -- pd_test_broadcasting_mul_scalar --" << std::endl;
eq (pd_test_2_all.cpp:19680)
19670 std::vector<pandas::Series<numpy::float64>> cols;
19671 cols.push_back(pandas::Series<numpy::float64>({1.0, 2.0}, "A"));
19672 cols.push_back(pandas::Series<numpy::float64>({3.0, 3.0}, "B"));
19673 pandas::DataFrame df(cols, {"A", "B"});
19674
19675 pandas::Series<numpy::float64> s({1.0, 3.0}, "vals");
19676 s.set_index(std::make_unique<pandas::Index<std::string>>(
19677 std::vector<std::string>{"A", "B"}));
19678
19679 auto result = df.eq(s, 1);
19680
19681 check(approx(result["A"].get_value_double(0), 1.0), "eq_A_r0_true");
19682 check(approx(result["A"].get_value_double(1), 0.0), "eq_A_r1_false");
19683 check(approx(result["B"].get_value_double(0), 1.0), "eq_B_r0_true");
19684 check(approx(result["B"].get_value_double(1), 1.0), "eq_B_r1_true");
19685}
19686
19687// Test 5: mul scalar broadcast (verify existing behavior still works)
19688void pd_test_broadcasting_mul_scalar() {
19689 std::cout << " -- pd_test_broadcasting_mul_scalar --" << std::endl;
eq (pd_test_2_all.cpp:19680)
19670 std::vector<pandas::Series<numpy::float64>> cols;
19671 cols.push_back(pandas::Series<numpy::float64>({1.0, 2.0}, "A"));
19672 cols.push_back(pandas::Series<numpy::float64>({3.0, 3.0}, "B"));
19673 pandas::DataFrame df(cols, {"A", "B"});
19674
19675 pandas::Series<numpy::float64> s({1.0, 3.0}, "vals");
19676 s.set_index(std::make_unique<pandas::Index<std::string>>(
19677 std::vector<std::string>{"A", "B"}));
19678
19679 auto result = df.eq(s, 1);
19680
19681 check(approx(result["A"].get_value_double(0), 1.0), "eq_A_r0_true");
19682 check(approx(result["A"].get_value_double(1), 0.0), "eq_A_r1_false");
19683 check(approx(result["B"].get_value_double(0), 1.0), "eq_B_r0_true");
19684 check(approx(result["B"].get_value_double(1), 1.0), "eq_B_r1_true");
19685}
19686
19687// Test 5: mul scalar broadcast (verify existing behavior still works)
19688void pd_test_broadcasting_mul_scalar() {
19689 std::cout << " -- pd_test_broadcasting_mul_scalar --" << std::endl;
equals (pd_test_1_all.cpp:5866)
5856 std::cout << "========= equals ======================================";
5857
5858 pandas::CategoricalArray arr1({"a", "b", "a"});
5859 pandas::CategoricalArray arr2({"a", "b", "a"});
5860 pandas::CategoricalArray arr3({"a", "b", "c"});
5861
5862 pandas::CategoricalIndex idx1(arr1);
5863 pandas::CategoricalIndex idx2(arr2);
5864 pandas::CategoricalIndex idx3(arr3);
5865
5866 bool passed = (idx1.equals(idx2) && !idx1.equals(idx3));
5867 if (!passed) {
5868 std::cout << " [FAIL] : in pd_test_categorical_index_equals()" << std::endl;
5869 throw std::runtime_error("pd_test_categorical_index_equals failed");
5870 }
5871
5872 std::cout << " -> tests passed" << std::endl;
5873}
5874
5875void pd_test_categorical_index_identical() {
5876 std::cout << "========= identical ===================================";
ge (pd_test_3_all.cpp:303)
293 }
294
295 std::cout << " -> tests passed" << std::endl;
296}
297
298// ============================================================================
299// Category 2: DataFrame Comparison Operations
300// ============================================================================
301
302void pd_test_3_all_comparison_ops() {
303 std::cout << "========= DataFrame.eq/ne/lt/le/gt/ge() =============";
304
305 std::map<std::string, std::vector<double>> data1 = {
306 {"A", {1.0, 2.0, 3.0}},
307 {"B", {4.0, 5.0, 6.0}}
308 };
309 std::map<std::string, std::vector<double>> data2 = {
310 {"A", {1.0, 3.0, 3.0}},
311 {"B", {4.0, 4.0, 7.0}}
312 };
313 pandas::DataFrame df1(data1);
ge (pd_test_3_all.cpp:303)
293 }
294
295 std::cout << " -> tests passed" << std::endl;
296}
297
298// ============================================================================
299// Category 2: DataFrame Comparison Operations
300// ============================================================================
301
302void pd_test_3_all_comparison_ops() {
303 std::cout << "========= DataFrame.eq/ne/lt/le/gt/ge() =============";
304
305 std::map<std::string, std::vector<double>> data1 = {
306 {"A", {1.0, 2.0, 3.0}},
307 {"B", {4.0, 5.0, 6.0}}
308 };
309 std::map<std::string, std::vector<double>> data2 = {
310 {"A", {1.0, 3.0, 3.0}},
311 {"B", {4.0, 4.0, 7.0}}
312 };
313 pandas::DataFrame df1(data1);
ge (pd_test_3_all.cpp:303)
293 }
294
295 std::cout << " -> tests passed" << std::endl;
296}
297
298// ============================================================================
299// Category 2: DataFrame Comparison Operations
300// ============================================================================
301
302void pd_test_3_all_comparison_ops() {
303 std::cout << "========= DataFrame.eq/ne/lt/le/gt/ge() =============";
304
305 std::map<std::string, std::vector<double>> data1 = {
306 {"A", {1.0, 2.0, 3.0}},
307 {"B", {4.0, 5.0, 6.0}}
308 };
309 std::map<std::string, std::vector<double>> data2 = {
310 {"A", {1.0, 3.0, 3.0}},
311 {"B", {4.0, 4.0, 7.0}}
312 };
313 pandas::DataFrame df1(data1);
gen (pd_test_5_all.cpp:35852)
35842 double pc = pct_change_pc(a, b);
35843 double pd = pct_change_pd(a, b);
35844 pandas_tests::check(std::abs(pc - pd) < 1e-12,
35845 "case_12.formulas_within_ULP", local_fail);
35846}
35847
35848void bin_edge_412638_case_13_entropy_pct_change_invariance(int& local_fail) {
35849 // Generate prices via deterministic walk; compute returns by both
35850 // formulas; bin both; entropy should be IDENTICAL (bin assignments
35851 // not shifted by ULP-scale formula drift). Cycle-1 finding.
35852 std::mt19937_64 gen(42);
35853 std::normal_distribution<double> nd(0.0003, 0.02);
35854 std::vector<double> prices;
35855 prices.reserve(500);
35856 double s = 100.0;
35857 for (int i = 0; i < 500; ++i) {
35858 if (i > 0) s = s * std::exp(nd(gen));
35859 prices.push_back(s);
35860 }
35861 std::vector<double> r_pc, r_pd;
35862 for (size_t i = 1; i < prices.size(); ++i) {
gt (pd_test_3_all.cpp:344)
334 throw std::runtime_error("pd_test_3_all_comparison_ops failed: lt() shape");
335 }
336
337 // Test le()
338 pandas::DataFrame le_result = df1.le(df2);
339 if (le_result.nrows() != 3 || le_result.ncols() != 2) {
340 std::cout << " [FAIL] : in pd_test_3_all_comparison_ops() : le() shape mismatch" << std::endl;
341 throw std::runtime_error("pd_test_3_all_comparison_ops failed: le() shape");
342 }
343
344 // Test gt()
345 pandas::DataFrame gt_result = df1.gt(df2);
346 if (gt_result.nrows() != 3 || gt_result.ncols() != 2) {
347 std::cout << " [FAIL] : in pd_test_3_all_comparison_ops() : gt() shape mismatch" << std::endl;
348 throw std::runtime_error("pd_test_3_all_comparison_ops failed: gt() shape");
349 }
350
351 // Test ge()
352 pandas::DataFrame ge_result = df1.ge(df2);
353 if (ge_result.nrows() != 3 || ge_result.ncols() != 2) {
354 std::cout << " [FAIL] : in pd_test_3_all_comparison_ops() : ge() shape mismatch" << std::endl;
gt (pd_test_3_all.cpp:344)
334 throw std::runtime_error("pd_test_3_all_comparison_ops failed: lt() shape");
335 }
336
337 // Test le()
338 pandas::DataFrame le_result = df1.le(df2);
339 if (le_result.nrows() != 3 || le_result.ncols() != 2) {
340 std::cout << " [FAIL] : in pd_test_3_all_comparison_ops() : le() shape mismatch" << std::endl;
341 throw std::runtime_error("pd_test_3_all_comparison_ops failed: le() shape");
342 }
343
344 // Test gt()
345 pandas::DataFrame gt_result = df1.gt(df2);
346 if (gt_result.nrows() != 3 || gt_result.ncols() != 2) {
347 std::cout << " [FAIL] : in pd_test_3_all_comparison_ops() : gt() shape mismatch" << std::endl;
348 throw std::runtime_error("pd_test_3_all_comparison_ops failed: gt() shape");
349 }
350
351 // Test ge()
352 pandas::DataFrame ge_result = df1.ge(df2);
353 if (ge_result.nrows() != 3 || ge_result.ncols() != 2) {
354 std::cout << " [FAIL] : in pd_test_3_all_comparison_ops() : ge() shape mismatch" << std::endl;
gt (pd_test_3_all.cpp:344)
334 throw std::runtime_error("pd_test_3_all_comparison_ops failed: lt() shape");
335 }
336
337 // Test le()
338 pandas::DataFrame le_result = df1.le(df2);
339 if (le_result.nrows() != 3 || le_result.ncols() != 2) {
340 std::cout << " [FAIL] : in pd_test_3_all_comparison_ops() : le() shape mismatch" << std::endl;
341 throw std::runtime_error("pd_test_3_all_comparison_ops failed: le() shape");
342 }
343
344 // Test gt()
345 pandas::DataFrame gt_result = df1.gt(df2);
346 if (gt_result.nrows() != 3 || gt_result.ncols() != 2) {
347 std::cout << " [FAIL] : in pd_test_3_all_comparison_ops() : gt() shape mismatch" << std::endl;
348 throw std::runtime_error("pd_test_3_all_comparison_ops failed: gt() shape");
349 }
350
351 // Test ge()
352 pandas::DataFrame ge_result = df1.ge(df2);
353 if (ge_result.nrows() != 3 || ge_result.ncols() != 2) {
354 std::cout << " [FAIL] : in pd_test_3_all_comparison_ops() : ge() shape mismatch" << std::endl;
le (pd_test_3_all.cpp:337)
327 throw std::runtime_error("pd_test_3_all_comparison_ops failed: ne() shape");
328 }
329
330 // Test lt()
331 pandas::DataFrame lt_result = df1.lt(df2);
332 if (lt_result.nrows() != 3 || lt_result.ncols() != 2) {
333 std::cout << " [FAIL] : in pd_test_3_all_comparison_ops() : lt() shape mismatch" << std::endl;
334 throw std::runtime_error("pd_test_3_all_comparison_ops failed: lt() shape");
335 }
336
337 // Test le()
338 pandas::DataFrame le_result = df1.le(df2);
339 if (le_result.nrows() != 3 || le_result.ncols() != 2) {
340 std::cout << " [FAIL] : in pd_test_3_all_comparison_ops() : le() shape mismatch" << std::endl;
341 throw std::runtime_error("pd_test_3_all_comparison_ops failed: le() shape");
342 }
343
344 // Test gt()
345 pandas::DataFrame gt_result = df1.gt(df2);
346 if (gt_result.nrows() != 3 || gt_result.ncols() != 2) {
347 std::cout << " [FAIL] : in pd_test_3_all_comparison_ops() : gt() shape mismatch" << std::endl;
le (pd_test_3_all.cpp:337)
327 throw std::runtime_error("pd_test_3_all_comparison_ops failed: ne() shape");
328 }
329
330 // Test lt()
331 pandas::DataFrame lt_result = df1.lt(df2);
332 if (lt_result.nrows() != 3 || lt_result.ncols() != 2) {
333 std::cout << " [FAIL] : in pd_test_3_all_comparison_ops() : lt() shape mismatch" << std::endl;
334 throw std::runtime_error("pd_test_3_all_comparison_ops failed: lt() shape");
335 }
336
337 // Test le()
338 pandas::DataFrame le_result = df1.le(df2);
339 if (le_result.nrows() != 3 || le_result.ncols() != 2) {
340 std::cout << " [FAIL] : in pd_test_3_all_comparison_ops() : le() shape mismatch" << std::endl;
341 throw std::runtime_error("pd_test_3_all_comparison_ops failed: le() shape");
342 }
343
344 // Test gt()
345 pandas::DataFrame gt_result = df1.gt(df2);
346 if (gt_result.nrows() != 3 || gt_result.ncols() != 2) {
347 std::cout << " [FAIL] : in pd_test_3_all_comparison_ops() : gt() shape mismatch" << std::endl;
le (pd_test_3_all.cpp:337)
327 throw std::runtime_error("pd_test_3_all_comparison_ops failed: ne() shape");
328 }
329
330 // Test lt()
331 pandas::DataFrame lt_result = df1.lt(df2);
332 if (lt_result.nrows() != 3 || lt_result.ncols() != 2) {
333 std::cout << " [FAIL] : in pd_test_3_all_comparison_ops() : lt() shape mismatch" << std::endl;
334 throw std::runtime_error("pd_test_3_all_comparison_ops failed: lt() shape");
335 }
336
337 // Test le()
338 pandas::DataFrame le_result = df1.le(df2);
339 if (le_result.nrows() != 3 || le_result.ncols() != 2) {
340 std::cout << " [FAIL] : in pd_test_3_all_comparison_ops() : le() shape mismatch" << std::endl;
341 throw std::runtime_error("pd_test_3_all_comparison_ops failed: le() shape");
342 }
343
344 // Test gt()
345 pandas::DataFrame gt_result = df1.gt(df2);
346 if (gt_result.nrows() != 3 || gt_result.ncols() != 2) {
347 std::cout << " [FAIL] : in pd_test_3_all_comparison_ops() : gt() shape mismatch" << std::endl;
levels (pd_test_2_all.cpp:9787)
9777 pandas::DataFrame df(data);
9778
9779 std::vector<std::string> hier_index = {
9780 "Final exam:History:January",
9781 "Final exam:Geography:February",
9782 "Coursework:History:March",
9783 "Coursework:Geography:April"
9784 };
9785 df.set_index(std::make_unique<pandas::Index<std::string>>(hier_index));
9786
9787 // Default: swap last two levels (i=-2, j=-1)
9788 pandas::DataFrame result = df.swaplevel();
9789
9790 std::string idx0 = result.index().get_value_str(0);
9791 std::string idx1 = result.index().get_value_str(1);
9792 std::string idx2 = result.index().get_value_str(2);
9793 std::string idx3 = result.index().get_value_str(3);
9794
9795 bool passed = (idx0 == "Final exam:January:History" &&
9796 idx1 == "Final exam:February:Geography" &&
9797 idx2 == "Coursework:March:History" &&
levels (pd_test_2_all.cpp:9787)
9777 pandas::DataFrame df(data);
9778
9779 std::vector<std::string> hier_index = {
9780 "Final exam:History:January",
9781 "Final exam:Geography:February",
9782 "Coursework:History:March",
9783 "Coursework:Geography:April"
9784 };
9785 df.set_index(std::make_unique<pandas::Index<std::string>>(hier_index));
9786
9787 // Default: swap last two levels (i=-2, j=-1)
9788 pandas::DataFrame result = df.swaplevel();
9789
9790 std::string idx0 = result.index().get_value_str(0);
9791 std::string idx1 = result.index().get_value_str(1);
9792 std::string idx2 = result.index().get_value_str(2);
9793 std::string idx3 = result.index().get_value_str(3);
9794
9795 bool passed = (idx0 == "Final exam:January:History" &&
9796 idx1 == "Final exam:February:Geography" &&
9797 idx2 == "Coursework:March:History" &&
lt (pd_test_3_all.cpp:330)
320 throw std::runtime_error("pd_test_3_all_comparison_ops failed: eq() shape");
321 }
322
323 // Test ne()
324 pandas::DataFrame ne_result = df1.ne(df2);
325 if (ne_result.nrows() != 3 || ne_result.ncols() != 2) {
326 std::cout << " [FAIL] : in pd_test_3_all_comparison_ops() : ne() shape mismatch" << std::endl;
327 throw std::runtime_error("pd_test_3_all_comparison_ops failed: ne() shape");
328 }
329
330 // Test lt()
331 pandas::DataFrame lt_result = df1.lt(df2);
332 if (lt_result.nrows() != 3 || lt_result.ncols() != 2) {
333 std::cout << " [FAIL] : in pd_test_3_all_comparison_ops() : lt() shape mismatch" << std::endl;
334 throw std::runtime_error("pd_test_3_all_comparison_ops failed: lt() shape");
335 }
336
337 // Test le()
338 pandas::DataFrame le_result = df1.le(df2);
339 if (le_result.nrows() != 3 || le_result.ncols() != 2) {
340 std::cout << " [FAIL] : in pd_test_3_all_comparison_ops() : le() shape mismatch" << std::endl;
lt (pd_test_3_all.cpp:330)
320 throw std::runtime_error("pd_test_3_all_comparison_ops failed: eq() shape");
321 }
322
323 // Test ne()
324 pandas::DataFrame ne_result = df1.ne(df2);
325 if (ne_result.nrows() != 3 || ne_result.ncols() != 2) {
326 std::cout << " [FAIL] : in pd_test_3_all_comparison_ops() : ne() shape mismatch" << std::endl;
327 throw std::runtime_error("pd_test_3_all_comparison_ops failed: ne() shape");
328 }
329
330 // Test lt()
331 pandas::DataFrame lt_result = df1.lt(df2);
332 if (lt_result.nrows() != 3 || lt_result.ncols() != 2) {
333 std::cout << " [FAIL] : in pd_test_3_all_comparison_ops() : lt() shape mismatch" << std::endl;
334 throw std::runtime_error("pd_test_3_all_comparison_ops failed: lt() shape");
335 }
336
337 // Test le()
338 pandas::DataFrame le_result = df1.le(df2);
339 if (le_result.nrows() != 3 || le_result.ncols() != 2) {
340 std::cout << " [FAIL] : in pd_test_3_all_comparison_ops() : le() shape mismatch" << std::endl;
lt (pd_test_3_all.cpp:330)
320 throw std::runtime_error("pd_test_3_all_comparison_ops failed: eq() shape");
321 }
322
323 // Test ne()
324 pandas::DataFrame ne_result = df1.ne(df2);
325 if (ne_result.nrows() != 3 || ne_result.ncols() != 2) {
326 std::cout << " [FAIL] : in pd_test_3_all_comparison_ops() : ne() shape mismatch" << std::endl;
327 throw std::runtime_error("pd_test_3_all_comparison_ops failed: ne() shape");
328 }
329
330 // Test lt()
331 pandas::DataFrame lt_result = df1.lt(df2);
332 if (lt_result.nrows() != 3 || lt_result.ncols() != 2) {
333 std::cout << " [FAIL] : in pd_test_3_all_comparison_ops() : lt() shape mismatch" << std::endl;
334 throw std::runtime_error("pd_test_3_all_comparison_ops failed: lt() shape");
335 }
336
337 // Test le()
338 pandas::DataFrame le_result = df1.le(df2);
339 if (le_result.nrows() != 3 || le_result.ncols() != 2) {
340 std::cout << " [FAIL] : in pd_test_3_all_comparison_ops() : le() shape mismatch" << std::endl;
ne (pd_test_3_all.cpp:323)
313 pandas::DataFrame df1(data1);
314 pandas::DataFrame df2(data2);
315
316 // Test eq()
317 pandas::DataFrame eq_result = df1.eq(df2);
318 if (eq_result.nrows() != 3 || eq_result.ncols() != 2) {
319 std::cout << " [FAIL] : in pd_test_3_all_comparison_ops() : eq() shape mismatch" << std::endl;
320 throw std::runtime_error("pd_test_3_all_comparison_ops failed: eq() shape");
321 }
322
323 // Test ne()
324 pandas::DataFrame ne_result = df1.ne(df2);
325 if (ne_result.nrows() != 3 || ne_result.ncols() != 2) {
326 std::cout << " [FAIL] : in pd_test_3_all_comparison_ops() : ne() shape mismatch" << std::endl;
327 throw std::runtime_error("pd_test_3_all_comparison_ops failed: ne() shape");
328 }
329
330 // Test lt()
331 pandas::DataFrame lt_result = df1.lt(df2);
332 if (lt_result.nrows() != 3 || lt_result.ncols() != 2) {
333 std::cout << " [FAIL] : in pd_test_3_all_comparison_ops() : lt() shape mismatch" << std::endl;
ne (pd_test_3_all.cpp:323)
313 pandas::DataFrame df1(data1);
314 pandas::DataFrame df2(data2);
315
316 // Test eq()
317 pandas::DataFrame eq_result = df1.eq(df2);
318 if (eq_result.nrows() != 3 || eq_result.ncols() != 2) {
319 std::cout << " [FAIL] : in pd_test_3_all_comparison_ops() : eq() shape mismatch" << std::endl;
320 throw std::runtime_error("pd_test_3_all_comparison_ops failed: eq() shape");
321 }
322
323 // Test ne()
324 pandas::DataFrame ne_result = df1.ne(df2);
325 if (ne_result.nrows() != 3 || ne_result.ncols() != 2) {
326 std::cout << " [FAIL] : in pd_test_3_all_comparison_ops() : ne() shape mismatch" << std::endl;
327 throw std::runtime_error("pd_test_3_all_comparison_ops failed: ne() shape");
328 }
329
330 // Test lt()
331 pandas::DataFrame lt_result = df1.lt(df2);
332 if (lt_result.nrows() != 3 || lt_result.ncols() != 2) {
333 std::cout << " [FAIL] : in pd_test_3_all_comparison_ops() : lt() shape mismatch" << std::endl;
ne (pd_test_3_all.cpp:323)
313 pandas::DataFrame df1(data1);
314 pandas::DataFrame df2(data2);
315
316 // Test eq()
317 pandas::DataFrame eq_result = df1.eq(df2);
318 if (eq_result.nrows() != 3 || eq_result.ncols() != 2) {
319 std::cout << " [FAIL] : in pd_test_3_all_comparison_ops() : eq() shape mismatch" << std::endl;
320 throw std::runtime_error("pd_test_3_all_comparison_ops failed: eq() shape");
321 }
322
323 // Test ne()
324 pandas::DataFrame ne_result = df1.ne(df2);
325 if (ne_result.nrows() != 3 || ne_result.ncols() != 2) {
326 std::cout << " [FAIL] : in pd_test_3_all_comparison_ops() : ne() shape mismatch" << std::endl;
327 throw std::runtime_error("pd_test_3_all_comparison_ops failed: ne() shape");
328 }
329
330 // Test lt()
331 pandas::DataFrame lt_result = df1.lt(df2);
332 if (lt_result.nrows() != 3 || lt_result.ncols() != 2) {
333 std::cout << " [FAIL] : in pd_test_3_all_comparison_ops() : lt() shape mismatch" << std::endl;
rank (pd_test_1_all.cpp:6451)
6441 // =====================================================================
6442 // Test: Rank
6443 // =====================================================================
6444 void pd_test_dataframe_rank() {
6445 std::cout << "========= rank =============================";
6446
6447 // Test Series rank with default method (average)
6448 {
6449 std::vector<double> data = {3.0, 1.0, 4.0, 1.0, 5.0};
6450 pandas::Series<double> s(data, "test");
6451 auto ranked = s.rank();
6452
6453 // Values: 3, 1, 4, 1, 5 -> Sorted: 1, 1, 3, 4, 5
6454 // Ranks (average): 1.5, 1.5, 3, 4, 5
6455 // Original positions: 3->3, 1->1.5, 4->4, 1->1.5, 5->5
6456 double r0 = std::stod(ranked.get_value_str(0)); // 3.0 -> rank 3
6457 double r1 = std::stod(ranked.get_value_str(1)); // 1.0 -> rank 1.5
6458
6459 if (std::abs(r0 - 3.0) > 1e-10) {
6460 std::cout << " [FAIL] : in pd_test_dataframe_rank() : value 3.0 should have rank 3, got " << r0 << std::endl;
6461 throw std::runtime_error("pd_test_dataframe_rank failed: value 3.0 rank");
sort_index (pd_test_3_all.cpp:583)
573 // 10/2=5
574 if (std::abs(truediv_result[static_cast<size_t>(0)] - 5.0) > 0.001) {
575 std::cout << " [FAIL] : in pd_test_3_all_series_arithmetic() : truediv() value mismatch" << std::endl;
576 throw std::runtime_error("pd_test_3_all_series_arithmetic failed: truediv() value");
577 }
578
579 std::cout << " -> tests passed" << std::endl;
580}
581
582void pd_test_3_all_series_sort_index() {
583 std::cout << "========= Series.sort_index() ========================";
584
585 // NOTE: Series.sort_index() has an implementation issue:
586 // It calls index_->argsort() but argsort() is not virtual in IndexBase.
587 // This test verifies the function signature exists.
588 // When the implementation is fixed, this test should be updated.
589
590 std::vector<double> vals = {30.0, 10.0, 20.0};
591 pandas::Series<double> s(vals, "test");
592
593 // Verify the Series was created correctly
sort_index (pd_test_3_all.cpp:583)
573 // 10/2=5
574 if (std::abs(truediv_result[static_cast<size_t>(0)] - 5.0) > 0.001) {
575 std::cout << " [FAIL] : in pd_test_3_all_series_arithmetic() : truediv() value mismatch" << std::endl;
576 throw std::runtime_error("pd_test_3_all_series_arithmetic failed: truediv() value");
577 }
578
579 std::cout << " -> tests passed" << std::endl;
580}
581
582void pd_test_3_all_series_sort_index() {
583 std::cout << "========= Series.sort_index() ========================";
584
585 // NOTE: Series.sort_index() has an implementation issue:
586 // It calls index_->argsort() but argsort() is not virtual in IndexBase.
587 // This test verifies the function signature exists.
588 // When the implementation is fixed, this test should be updated.
589
590 std::vector<double> vals = {30.0, 10.0, 20.0};
591 pandas::Series<double> s(vals, "test");
592
593 // Verify the Series was created correctly
sort_index (pd_test_3_all.cpp:583)
573 // 10/2=5
574 if (std::abs(truediv_result[static_cast<size_t>(0)] - 5.0) > 0.001) {
575 std::cout << " [FAIL] : in pd_test_3_all_series_arithmetic() : truediv() value mismatch" << std::endl;
576 throw std::runtime_error("pd_test_3_all_series_arithmetic failed: truediv() value");
577 }
578
579 std::cout << " -> tests passed" << std::endl;
580}
581
582void pd_test_3_all_series_sort_index() {
583 std::cout << "========= Series.sort_index() ========================";
584
585 // NOTE: Series.sort_index() has an implementation issue:
586 // It calls index_->argsort() but argsort() is not virtual in IndexBase.
587 // This test verifies the function signature exists.
588 // When the implementation is fixed, this test should be updated.
589
590 std::vector<double> vals = {30.0, 10.0, 20.0};
591 pandas::Series<double> s(vals, "test");
592
593 // Verify the Series was created correctly
sort_values (pd_test_1_all.cpp:6408)
6398 void pd_test_dataframe_sorting() {
6399 std::cout << "========= sorting ==========================";
6400
6401 std::map<std::string, std::vector<numpy::float64>> data;
6402 data["A"] = {3.0, 1.0, 4.0, 1.0, 5.0};
6403 data["B"] = {9.0, 2.0, 6.0, 5.0, 3.0};
6404
6405 pandas::DataFrame df(data);
6406
6407 // Test sort_values ascending
6408 auto sorted_asc = df.sort_values("A", true);
6409 // First value should be smallest (1.0)
6410 std::string first_val = sorted_asc["A"].get_value_str(0);
6411 if (std::stod(first_val) != 1.0) {
6412 std::cout << " [FAIL] : in pd_test_dataframe_sorting() : sort_values asc first != 1" << std::endl;
6413 throw std::runtime_error("pd_test_dataframe_sorting failed: sort_values asc first != 1");
6414 }
6415
6416 // Test sort_values descending
6417 auto sorted_desc = df.sort_values("A", false);
6418 first_val = sorted_desc["A"].get_value_str(0);
sort_values (pd_test_1_all.cpp:6408)
6398 void pd_test_dataframe_sorting() {
6399 std::cout << "========= sorting ==========================";
6400
6401 std::map<std::string, std::vector<numpy::float64>> data;
6402 data["A"] = {3.0, 1.0, 4.0, 1.0, 5.0};
6403 data["B"] = {9.0, 2.0, 6.0, 5.0, 3.0};
6404
6405 pandas::DataFrame df(data);
6406
6407 // Test sort_values ascending
6408 auto sorted_asc = df.sort_values("A", true);
6409 // First value should be smallest (1.0)
6410 std::string first_val = sorted_asc["A"].get_value_str(0);
6411 if (std::stod(first_val) != 1.0) {
6412 std::cout << " [FAIL] : in pd_test_dataframe_sorting() : sort_values asc first != 1" << std::endl;
6413 throw std::runtime_error("pd_test_dataframe_sorting failed: sort_values asc first != 1");
6414 }
6415
6416 // Test sort_values descending
6417 auto sorted_desc = df.sort_values("A", false);
6418 first_val = sorted_desc["A"].get_value_str(0);
sort_values (pd_test_1_all.cpp:6408)
6398 void pd_test_dataframe_sorting() {
6399 std::cout << "========= sorting ==========================";
6400
6401 std::map<std::string, std::vector<numpy::float64>> data;
6402 data["A"] = {3.0, 1.0, 4.0, 1.0, 5.0};
6403 data["B"] = {9.0, 2.0, 6.0, 5.0, 3.0};
6404
6405 pandas::DataFrame df(data);
6406
6407 // Test sort_values ascending
6408 auto sorted_asc = df.sort_values("A", true);
6409 // First value should be smallest (1.0)
6410 std::string first_val = sorted_asc["A"].get_value_str(0);
6411 if (std::stod(first_val) != 1.0) {
6412 std::cout << " [FAIL] : in pd_test_dataframe_sorting() : sort_values asc first != 1" << std::endl;
6413 throw std::runtime_error("pd_test_dataframe_sorting failed: sort_values asc first != 1");
6414 }
6415
6416 // Test sort_values descending
6417 auto sorted_desc = df.sort_values("A", false);
6418 first_val = sorted_desc["A"].get_value_str(0);
sort_values (pd_test_1_all.cpp:6408)
6398 void pd_test_dataframe_sorting() {
6399 std::cout << "========= sorting ==========================";
6400
6401 std::map<std::string, std::vector<numpy::float64>> data;
6402 data["A"] = {3.0, 1.0, 4.0, 1.0, 5.0};
6403 data["B"] = {9.0, 2.0, 6.0, 5.0, 3.0};
6404
6405 pandas::DataFrame df(data);
6406
6407 // Test sort_values ascending
6408 auto sorted_asc = df.sort_values("A", true);
6409 // First value should be smallest (1.0)
6410 std::string first_val = sorted_asc["A"].get_value_str(0);
6411 if (std::stod(first_val) != 1.0) {
6412 std::cout << " [FAIL] : in pd_test_dataframe_sorting() : sort_values asc first != 1" << std::endl;
6413 throw std::runtime_error("pd_test_dataframe_sorting failed: sort_values asc first != 1");
6414 }
6415
6416 // Test sort_values descending
6417 auto sorted_desc = df.sort_values("A", false);
6418 first_val = sorted_desc["A"].get_value_str(0);
sort_values (pd_test_1_all.cpp:6408)
6398 void pd_test_dataframe_sorting() {
6399 std::cout << "========= sorting ==========================";
6400
6401 std::map<std::string, std::vector<numpy::float64>> data;
6402 data["A"] = {3.0, 1.0, 4.0, 1.0, 5.0};
6403 data["B"] = {9.0, 2.0, 6.0, 5.0, 3.0};
6404
6405 pandas::DataFrame df(data);
6406
6407 // Test sort_values ascending
6408 auto sorted_asc = df.sort_values("A", true);
6409 // First value should be smallest (1.0)
6410 std::string first_val = sorted_asc["A"].get_value_str(0);
6411 if (std::stod(first_val) != 1.0) {
6412 std::cout << " [FAIL] : in pd_test_dataframe_sorting() : sort_values asc first != 1" << std::endl;
6413 throw std::runtime_error("pd_test_dataframe_sorting failed: sort_values asc first != 1");
6414 }
6415
6416 // Test sort_values descending
6417 auto sorted_desc = df.sort_values("A", false);
6418 first_val = sorted_desc["A"].get_value_str(0);
sort_values (pd_test_1_all.cpp:6408)
6398 void pd_test_dataframe_sorting() {
6399 std::cout << "========= sorting ==========================";
6400
6401 std::map<std::string, std::vector<numpy::float64>> data;
6402 data["A"] = {3.0, 1.0, 4.0, 1.0, 5.0};
6403 data["B"] = {9.0, 2.0, 6.0, 5.0, 3.0};
6404
6405 pandas::DataFrame df(data);
6406
6407 // Test sort_values ascending
6408 auto sorted_asc = df.sort_values("A", true);
6409 // First value should be smallest (1.0)
6410 std::string first_val = sorted_asc["A"].get_value_str(0);
6411 if (std::stod(first_val) != 1.0) {
6412 std::cout << " [FAIL] : in pd_test_dataframe_sorting() : sort_values asc first != 1" << std::endl;
6413 throw std::runtime_error("pd_test_dataframe_sorting failed: sort_values asc first != 1");
6414 }
6415
6416 // Test sort_values descending
6417 auto sorted_desc = df.sort_values("A", false);
6418 first_val = sorted_desc["A"].get_value_str(0);
sort_values_by_transformed (pd_test_2_all.cpp:22232)
22222 }
22223 std::cout << "====================================== [OK] pd_test_series_dtype_inference test suite ========================== " << std::endl;
22224 return 0;
22225}
22226
22227} // namespace dataframe_tests
22228// ------------------- pd_test_series_dtype_inference.cpp (end) -----------------------------
22229
22230// ------------------- pd_test_sort_key.cpp (start) -----------------------------
22231// pd_test_sort_key.cpp - Tests for sort_values key function support
22232// Tests sort_values_by_transformed() and resolve_sort_columns_multiindex()
22233
22234#include <iostream>
22235#include <string>
22236#include <vector>
22237#include <cmath>
22238#include <numeric>
22239
22240#include "../pandas/pd_dataframe.h"
22241
22242namespace dataframe_tests {
T (pd_test_1_all.cpp:128)
118 throw std::runtime_error("pd_test_boolean_array_kleene_and failed: NA & F");
119 }
120
121 std::cout << " -> tests passed" << std::endl;
122 }
123
124 void pd_test_boolean_array_kleene_or() {
125 std::cout << "========= BooleanArray: Kleene OR ======================= ";
126
127 // Kleene OR truth table:
128 // T | T = T, T | F = T, T | NA = T (True dominates)
129 // F | T = T, F | F = F, F | NA = NA
130 // NA | T = T, NA | F = NA, NA | NA = NA
131
132 pandas::BooleanArray t({std::optional<bool>(true)});
133 pandas::BooleanArray f({std::optional<bool>(false)});
134 pandas::BooleanArray na({std::nullopt});
135
136 // T | NA = T (True dominates)
137 auto tna = (t | na);
138 if (!tna[0].has_value() || !tna[0].value()) {
explode (pd_test_1_all.cpp:6868)
6858 }
6859 }
6860
6861 // Test explode
6862 {
6863 std::map<std::string, std::vector<std::string>> data;
6864 data["id"] = {"1", "2"};
6865 data["tags"] = {"a,b,c", "d,e"};
6866 pandas::DataFrame df(data);
6867
6868 auto exploded = df.explode("tags");
6869 if (exploded.nrows() != 5) { // 3 + 2 = 5 rows
6870 std::cout << " [FAIL] : in pd_test_dataframe_reshape() : explode nrows != 5, got " << exploded.nrows() << std::endl;
6871 throw std::runtime_error("pd_test_dataframe_reshape failed: explode nrows");
6872 }
6873 }
6874
6875 // Test squeeze
6876 {
6877 std::map<std::string, std::vector<int>> data;
6878 data["A"] = {1};
melt (pd_test_1_all.cpp:6846)
6836 }
6837
6838 // Test melt
6839 {
6840 std::map<std::string, std::vector<int>> data;
6841 data["id"] = {1, 2};
6842 data["A"] = {10, 20};
6843 data["B"] = {30, 40};
6844 pandas::DataFrame df(data);
6845
6846 auto melted = df.melt({"id"});
6847 if (melted.nrows() != 4) { // 2 ids * 2 value columns
6848 std::cout << " [FAIL] : in pd_test_dataframe_reshape() : melt nrows != 4, got " << melted.nrows() << std::endl;
6849 throw std::runtime_error("pd_test_dataframe_reshape failed: melt nrows");
6850 }
6851 if (!melted.has_column("variable")) {
6852 std::cout << " [FAIL] : in pd_test_dataframe_reshape() : melt missing 'variable' column" << std::endl;
6853 throw std::runtime_error("pd_test_dataframe_reshape failed: melt variable column");
6854 }
6855 if (!melted.has_column("value")) {
6856 std::cout << " [FAIL] : in pd_test_dataframe_reshape() : melt missing 'value' column" << std::endl;
melt (pd_test_1_all.cpp:6846)
6836 }
6837
6838 // Test melt
6839 {
6840 std::map<std::string, std::vector<int>> data;
6841 data["id"] = {1, 2};
6842 data["A"] = {10, 20};
6843 data["B"] = {30, 40};
6844 pandas::DataFrame df(data);
6845
6846 auto melted = df.melt({"id"});
6847 if (melted.nrows() != 4) { // 2 ids * 2 value columns
6848 std::cout << " [FAIL] : in pd_test_dataframe_reshape() : melt nrows != 4, got " << melted.nrows() << std::endl;
6849 throw std::runtime_error("pd_test_dataframe_reshape failed: melt nrows");
6850 }
6851 if (!melted.has_column("variable")) {
6852 std::cout << " [FAIL] : in pd_test_dataframe_reshape() : melt missing 'variable' column" << std::endl;
6853 throw std::runtime_error("pd_test_dataframe_reshape failed: melt variable column");
6854 }
6855 if (!melted.has_column("value")) {
6856 std::cout << " [FAIL] : in pd_test_dataframe_reshape() : melt missing 'value' column" << std::endl;
melt (pd_test_1_all.cpp:6846)
6836 }
6837
6838 // Test melt
6839 {
6840 std::map<std::string, std::vector<int>> data;
6841 data["id"] = {1, 2};
6842 data["A"] = {10, 20};
6843 data["B"] = {30, 40};
6844 pandas::DataFrame df(data);
6845
6846 auto melted = df.melt({"id"});
6847 if (melted.nrows() != 4) { // 2 ids * 2 value columns
6848 std::cout << " [FAIL] : in pd_test_dataframe_reshape() : melt nrows != 4, got " << melted.nrows() << std::endl;
6849 throw std::runtime_error("pd_test_dataframe_reshape failed: melt nrows");
6850 }
6851 if (!melted.has_column("variable")) {
6852 std::cout << " [FAIL] : in pd_test_dataframe_reshape() : melt missing 'variable' column" << std::endl;
6853 throw std::runtime_error("pd_test_dataframe_reshape failed: melt variable column");
6854 }
6855 if (!melted.has_column("value")) {
6856 std::cout << " [FAIL] : in pd_test_dataframe_reshape() : melt missing 'value' column" << std::endl;
melt (pd_test_1_all.cpp:6846)
6836 }
6837
6838 // Test melt
6839 {
6840 std::map<std::string, std::vector<int>> data;
6841 data["id"] = {1, 2};
6842 data["A"] = {10, 20};
6843 data["B"] = {30, 40};
6844 pandas::DataFrame df(data);
6845
6846 auto melted = df.melt({"id"});
6847 if (melted.nrows() != 4) { // 2 ids * 2 value columns
6848 std::cout << " [FAIL] : in pd_test_dataframe_reshape() : melt nrows != 4, got " << melted.nrows() << std::endl;
6849 throw std::runtime_error("pd_test_dataframe_reshape failed: melt nrows");
6850 }
6851 if (!melted.has_column("variable")) {
6852 std::cout << " [FAIL] : in pd_test_dataframe_reshape() : melt missing 'variable' column" << std::endl;
6853 throw std::runtime_error("pd_test_dataframe_reshape failed: melt variable column");
6854 }
6855 if (!melted.has_column("value")) {
6856 std::cout << " [FAIL] : in pd_test_dataframe_reshape() : melt missing 'value' column" << std::endl;
pivot (pd_test_1_all.cpp:6827)
6817 std::cout << "========= reshaping ========================";
6818
6819 // Test pivot
6820 {
6821 std::map<std::string, std::vector<std::string>> data;
6822 data["date"] = {"2020-01", "2020-01", "2020-02", "2020-02"};
6823 data["city"] = {"NYC", "LA", "NYC", "LA"};
6824 data["temp"] = {"30", "65", "35", "70"};
6825 pandas::DataFrame df(data);
6826
6827 auto pivoted = df.pivot("date", "city", "temp");
6828 if (pivoted.ncols() != 2) { // LA, NYC (alphabetical)
6829 std::cout << " [FAIL] : in pd_test_dataframe_reshape() : pivot ncols != 2, got " << pivoted.ncols() << std::endl;
6830 throw std::runtime_error("pd_test_dataframe_reshape failed: pivot ncols");
6831 }
6832 if (pivoted.nrows() != 2) { // 2020-01, 2020-02
6833 std::cout << " [FAIL] : in pd_test_dataframe_reshape() : pivot nrows != 2, got " << pivoted.nrows() << std::endl;
6834 throw std::runtime_error("pd_test_dataframe_reshape failed: pivot nrows");
6835 }
6836 }
pivot_table (pd_test_1_all.cpp:25691)
25681namespace dataframe_tests {
25682 namespace dataframe_tests_pivot_table {
25683
25684 bool approx_equal(double a, double b, double tol = 1e-9) {
25685 if (std::isnan(a) && std::isnan(b)) return true;
25686 if (std::isnan(a) || std::isnan(b)) return false;
25687 return std::abs(a - b) < tol;
25688 }
25689
25690 void pd_test_pivot_table_sum() {
25691 std::cout << "========= pivot_table (sum) ================================";
25692
25693 // Create test data: region, product, sales (numeric values for aggregation)
25694 pandas::DataFrame df;
25695 df.add_column<std::string>("region", {"East", "East", "East", "West", "West"});
25696 df.add_column<std::string>("product", {"A", "A", "B", "A", "B"});
25697 df.add_column<numpy::float64>("sales", {100.0, 150.0, 200.0, 120.0, 80.0});
25698
25699 // Pivot with sum aggregation
25700 pandas::DataFrame result = df.pivot_table("sales", "region", "product", "sum");
pivot_table (pd_test_1_all.cpp:25691)
25681namespace dataframe_tests {
25682 namespace dataframe_tests_pivot_table {
25683
25684 bool approx_equal(double a, double b, double tol = 1e-9) {
25685 if (std::isnan(a) && std::isnan(b)) return true;
25686 if (std::isnan(a) || std::isnan(b)) return false;
25687 return std::abs(a - b) < tol;
25688 }
25689
25690 void pd_test_pivot_table_sum() {
25691 std::cout << "========= pivot_table (sum) ================================";
25692
25693 // Create test data: region, product, sales (numeric values for aggregation)
25694 pandas::DataFrame df;
25695 df.add_column<std::string>("region", {"East", "East", "East", "West", "West"});
25696 df.add_column<std::string>("product", {"A", "A", "B", "A", "B"});
25697 df.add_column<numpy::float64>("sales", {100.0, 150.0, 200.0, 120.0, 80.0});
25698
25699 // Pivot with sum aggregation
25700 pandas::DataFrame result = df.pivot_table("sales", "region", "product", "sum");
pivot_table (pd_test_1_all.cpp:25691)
25681namespace dataframe_tests {
25682 namespace dataframe_tests_pivot_table {
25683
25684 bool approx_equal(double a, double b, double tol = 1e-9) {
25685 if (std::isnan(a) && std::isnan(b)) return true;
25686 if (std::isnan(a) || std::isnan(b)) return false;
25687 return std::abs(a - b) < tol;
25688 }
25689
25690 void pd_test_pivot_table_sum() {
25691 std::cout << "========= pivot_table (sum) ================================";
25692
25693 // Create test data: region, product, sales (numeric values for aggregation)
25694 pandas::DataFrame df;
25695 df.add_column<std::string>("region", {"East", "East", "East", "West", "West"});
25696 df.add_column<std::string>("product", {"A", "A", "B", "A", "B"});
25697 df.add_column<numpy::float64>("sales", {100.0, 150.0, 200.0, 120.0, 80.0});
25698
25699 // Pivot with sum aggregation
25700 pandas::DataFrame result = df.pivot_table("sales", "region", "product", "sum");
pivot_table (pd_test_1_all.cpp:25691)
25681namespace dataframe_tests {
25682 namespace dataframe_tests_pivot_table {
25683
25684 bool approx_equal(double a, double b, double tol = 1e-9) {
25685 if (std::isnan(a) && std::isnan(b)) return true;
25686 if (std::isnan(a) || std::isnan(b)) return false;
25687 return std::abs(a - b) < tol;
25688 }
25689
25690 void pd_test_pivot_table_sum() {
25691 std::cout << "========= pivot_table (sum) ================================";
25692
25693 // Create test data: region, product, sales (numeric values for aggregation)
25694 pandas::DataFrame df;
25695 df.add_column<std::string>("region", {"East", "East", "East", "West", "West"});
25696 df.add_column<std::string>("product", {"A", "A", "B", "A", "B"});
25697 df.add_column<numpy::float64>("sales", {100.0, 150.0, 200.0, 120.0, 80.0});
25698
25699 // Pivot with sum aggregation
25700 pandas::DataFrame result = df.pivot_table("sales", "region", "product", "sum");
pivot_table (pd_test_1_all.cpp:25691)
25681namespace dataframe_tests {
25682 namespace dataframe_tests_pivot_table {
25683
25684 bool approx_equal(double a, double b, double tol = 1e-9) {
25685 if (std::isnan(a) && std::isnan(b)) return true;
25686 if (std::isnan(a) || std::isnan(b)) return false;
25687 return std::abs(a - b) < tol;
25688 }
25689
25690 void pd_test_pivot_table_sum() {
25691 std::cout << "========= pivot_table (sum) ================================";
25692
25693 // Create test data: region, product, sales (numeric values for aggregation)
25694 pandas::DataFrame df;
25695 df.add_column<std::string>("region", {"East", "East", "East", "West", "West"});
25696 df.add_column<std::string>("product", {"A", "A", "B", "A", "B"});
25697 df.add_column<numpy::float64>("sales", {100.0, 150.0, 200.0, 120.0, 80.0});
25698
25699 // Pivot with sum aggregation
25700 pandas::DataFrame result = df.pivot_table("sales", "region", "product", "sum");
pivot_table_multi_agg (pd_test_2_all.cpp:21310)
21300 << dataframe_tests_misc_migration::g_fail << " failed)" << std::endl;
21301
21302 return dataframe_tests_misc_migration::g_fail;
21303}
21304
21305} // namespace dataframe_tests
21306// ------------------- pd_test_misc_migration.cpp (end) -----------------------------
21307
21308// ------------------- pd_test_pivot_ext.cpp (start) -----------------------------
21309// pd_test_pivot_ext.cpp - Tests for pivot_table extensions
21310// Tests pivot_table_multi_agg(), pivot_table_with_grouper(), auto-detect numeric columns
21311
21312#include <iostream>
21313#include <string>
21314#include <vector>
21315#include <cmath>
21316#include <set>
21317
21318#include "../pandas/pd_dataframe.h"
21319#include "../pandas/pd_groupby.h"
pivot_table_with_margins (pd_test_3_all.cpp:7480)
7470 passed = (pivot_fill.nrows() == 2);
7471 if (!passed) {
7472 std::cout << " [FAIL] : in pd_test_3_all_pivot_table() : fill_value test failed" << std::endl;
7473 throw std::runtime_error("pd_test_3_all_pivot_table failed: fill_value");
7474 }
7475
7476 std::cout << " -> tests passed" << std::endl;
7477}
7478
7479void pd_test_3_all_pivot_table_margins() {
7480 std::cout << "========= DataFrame.pivot_table_with_margins() =======================";
7481
7482 pandas::DataFrame df;
7483 df.add_column<std::string>("region", {"East", "East", "West", "West"});
7484 df.add_column<std::string>("product", {"A", "B", "A", "B"});
7485 df.add_column<numpy::float64>("sales", {100.0, 150.0, 200.0, 250.0});
7486
7487 // Test without margins (should be same as regular pivot_table)
7488 pandas::DataFrame pivot_no_margins = df.pivot_table_with_margins(
7489 "sales", "region", "product", "sum",
7490 std::numeric_limits<double>::quiet_NaN(), false
pivot_table_with_margins (pd_test_3_all.cpp:7480)
7470 passed = (pivot_fill.nrows() == 2);
7471 if (!passed) {
7472 std::cout << " [FAIL] : in pd_test_3_all_pivot_table() : fill_value test failed" << std::endl;
7473 throw std::runtime_error("pd_test_3_all_pivot_table failed: fill_value");
7474 }
7475
7476 std::cout << " -> tests passed" << std::endl;
7477}
7478
7479void pd_test_3_all_pivot_table_margins() {
7480 std::cout << "========= DataFrame.pivot_table_with_margins() =======================";
7481
7482 pandas::DataFrame df;
7483 df.add_column<std::string>("region", {"East", "East", "West", "West"});
7484 df.add_column<std::string>("product", {"A", "B", "A", "B"});
7485 df.add_column<numpy::float64>("sales", {100.0, 150.0, 200.0, 250.0});
7486
7487 // Test without margins (should be same as regular pivot_table)
7488 pandas::DataFrame pivot_no_margins = df.pivot_table_with_margins(
7489 "sales", "region", "product", "sum",
7490 std::numeric_limits<double>::quiet_NaN(), false
squeeze (pd_test_1_all.cpp:6881)
6871 throw std::runtime_error("pd_test_dataframe_reshape failed: explode nrows");
6872 }
6873 }
6874
6875 // Test squeeze
6876 {
6877 std::map<std::string, std::vector<int>> data;
6878 data["A"] = {1};
6879 pandas::DataFrame df(data);
6880
6881 auto squeezed = df.squeeze();
6882 // Should return without error for 1x1 DataFrame
6883 }
6884
6885 // Test stack
6886 {
6887 std::map<std::string, std::vector<int>> data;
6888 data["A"] = {1, 2};
6889 data["B"] = {3, 4};
6890 pandas::DataFrame df(data);
stack (pd_test_1_all.cpp:6892)
6882 // Should return without error for 1x1 DataFrame
6883 }
6884
6885 // Test stack
6886 {
6887 std::map<std::string, std::vector<int>> data;
6888 data["A"] = {1, 2};
6889 data["B"] = {3, 4};
6890 pandas::DataFrame df(data);
6891
6892 auto stacked = df.stack();
6893 // Stack should produce 4 rows (2 rows * 2 columns)
6894 if (stacked.nrows() != 4) {
6895 std::cout << " [FAIL] : in pd_test_dataframe_reshape() : stack nrows != 4, got " << stacked.nrows() << std::endl;
6896 throw std::runtime_error("pd_test_dataframe_reshape failed: stack nrows");
6897 }
6898 }
6899
6900 std::cout << " -> tests passed" << std::endl;
6901 }
stack_levels (pd_test_3_all.cpp:28695)
28685 };
28686 df.set_columns_levels(lvls, std::vector<std::string>{"l0", "l1"});
28687 return df;
28688}
28689
28690void pd_test_stack_single_level() {
28691 std::cout << " -- pd_test_stack_single_level --" << std::endl;
28692 int fail = 0;
28693 auto df = make_ml_df();
28694 // Default -> stack innermost level (l1 = x,y) -> remaining columns = A,B
28695 auto r = df.stack_levels({-1}, true);
28696 fail += spt_check(r.ncols() == 2, "ncols==2");
28697 fail += spt_check(r.nrows() == 4, "nrows==4 (2 orig rows * 2 stack vals)");
28698 fail += spt_check(r.has_multiindex(), "multiindex present");
28699 if (fail == 0) std::cout << " OK" << std::endl;
28700 if (fail != 0) throw std::runtime_error("pd_test_stack_single_level failed");
28701}
28702
28703void pd_test_stack_level_param() {
28704 std::cout << " -- pd_test_stack_level_param --" << std::endl;
28705 int fail = 0;
swapaxes (pd_test_3_all.cpp:2276)
2266 auto sorted_desc = arr.sort_values(false, "last");
2267 if (*sorted_desc[0] != "c" || *sorted_desc[1] != "b" ||
2268 *sorted_desc[2] != "a" || sorted_desc[3].has_value()) {
2269 throw std::runtime_error("sort_values descending failed");
2270 }
2271
2272 std::cout << " -> tests passed" << std::endl;
2273}
2274
2275void pd_test_3_all_categorical_swapaxes() {
2276 std::cout << "========= CategoricalArray.swapaxes() =================";
2277
2278 std::vector<std::optional<std::string>> values = {"a", "b", "c"};
2279 pandas::CategoricalArray arr(values);
2280
2281 auto result = arr.swapaxes(0, 0);
2282 if (result.size() != 3) {
2283 throw std::runtime_error("swapaxes failed");
2284 }
2285
2286 bool threw = false;
transpose (pd_test_1_all.cpp:16648)
16638 std::cout << " [FAIL] : in pd_test_ndframe_transpose() : T_() size" << std::endl;
16639 throw std::runtime_error("pd_test_ndframe_transpose failed: T_() size");
16640 }
16641
16642 passed = transposed[0] == 1 && transposed[1] == 2 && transposed[2] == 3;
16643 if (!passed) {
16644 std::cout << " [FAIL] : in pd_test_ndframe_transpose() : T_() values" << std::endl;
16645 throw std::runtime_error("pd_test_ndframe_transpose failed: T_() values");
16646 }
16647
16648 // Test transpose() alias
16649 auto transposed2 = s.transpose();
16650 passed = transposed2.size() == s.size();
16651 if (!passed) {
16652 std::cout << " [FAIL] : in pd_test_ndframe_transpose() : transpose() size" << std::endl;
16653 throw std::runtime_error("pd_test_ndframe_transpose failed: transpose() size");
16654 }
16655
16656 std::cout << " -> tests passed" << std::endl;
16657 }
unstack (pd_test_3_all.cpp:1739)
1729 }
1730 if (s.size() != 3) {
1731 std::cout << " [FAIL] : in pd_test_3_all_chainable_mutators() : Case H size" << std::endl;
1732 throw std::runtime_error("pd_test_3_all_chainable_mutators failed: Case H size");
1733 }
1734
1735 std::cout << " -> tests passed" << std::endl;
1736}
1737
1738void pd_test_3_all_dataframe_unstack() {
1739 std::cout << "========= DataFrame.unstack() ========================";
1740
1741 std::map<std::string, std::vector<double>> data = {
1742 {"A", {1.0, 2.0, 3.0}},
1743 {"B", {4.0, 5.0, 6.0}}
1744 };
1745 pandas::DataFrame df(data);
1746
1747 // Without MultiIndex, unstack() returns self (matches pandas behavior)
1748 pandas::DataFrame result = df.unstack();
align (pd_test_1_all.cpp:14035)
14025 if (!approx_equal(a_s1, 2.0) || !approx_equal(a_o1, 99.0)) {
14026 passed = false;
14027 std::cout << " [FAIL] : in pd_test_joining_compare() : difference at row 1 not shown" << std::endl;
14028 throw std::runtime_error("pd_test_joining_compare failed: diff values");
14029 }
14030
14031 std::cout << " -> tests passed" << std::endl;
14032 }
14033
14034 // =====================================================================
14035 // align() Tests
14036 // =====================================================================
14037
14038 void pd_test_joining_align() {
14039 std::cout << "========= align =======================================";
14040
14041 std::map<std::string, std::vector<double>> left_data = {
14042 {"A", {1.0, 2.0}}
14043 };
14044 std::vector<std::string> left_idx = {"x", "y"};
14045 pandas::DataFrame left(left_data, std::make_unique<pandas::Index<std::string>>(left_idx));
align_series (pd_test_2_all.cpp:21152)
21142 std::cout << " -- test_align_df_series_outer --" << std::endl;
21143
21144 pandas::DataFrame df;
21145 df.add_column("A", std::vector<numpy::float64>{1.0, 2.0});
21146 df.add_column("B", std::vector<numpy::float64>{3.0, 4.0});
21147
21148 pandas::Series<numpy::float64> s({10.0, 20.0}, "vals");
21149 s.set_index(std::make_unique<pandas::Index<std::string>>(
21150 std::vector<std::string>{"B", "C"}));
21151
21152 auto [aligned_df, aligned_s] = df.align_series(s, "outer", 1);
21153
21154 // Outer: columns A, B, C
21155 check(aligned_df.ncols() == 3, "df_ncols_3");
21156 check(aligned_s.size() == 3, "s_size_3");
21157
21158 auto df_cols = aligned_df.columns().to_list();
21159 check(df_cols[0] == "A", "df_col_A");
21160 check(df_cols[1] == "B", "df_col_B");
21161 check(df_cols[2] == "C", "df_col_C");
combine (pd_test_2_all.cpp:1700)
1690 std::cout << "====================================== [OK] pd_test_between_time test suite ========================== " << std::endl;
1691 return 0;
1692 }
1693
1694} // namespace dataframe_tests
1695// ------------------- pd_test_between_time.cpp (end) -----------------------------
1696
1697// ------------------- pd_test_combine.cpp (start) -----------------------------
1698// dataframe_tests/pd_test_combine.cpp
1699// Test for DataFrame.combine() - column-wise combine with another DataFrame
1700
1701#include <iostream>
1702#include <cmath>
1703#include <stdexcept>
1704#include "../pandas/pd_dataframe.h"
1705
1706// CRITICAL: No using namespace directives
1707
1708namespace dataframe_tests {
1709 namespace dataframe_tests_combine {
combine_first (pd_test_1_all.cpp:13889)
13879 if (!approx_equal(b1, 10.0) || !approx_equal(b2, 20.0)) {
13880 passed = false;
13881 std::cout << " [FAIL] : in pd_test_joining_join_index() : matched rows wrong" << std::endl;
13882 throw std::runtime_error("pd_test_joining_join_index failed: match values");
13883 }
13884
13885 std::cout << " -> tests passed" << std::endl;
13886 }
13887
13888 // =====================================================================
13889 // combine_first() Tests
13890 // =====================================================================
13891
13892 void pd_test_joining_combine_first() {
13893 std::cout << "========= combine_first ===============================";
13894
13895 std::map<std::string, std::vector<double>> left_data = {
13896 {"A", {1.0, std::nan(""), 3.0}},
13897 {"B", {std::nan(""), 5.0, std::nan("")}}
13898 };
13899 std::vector<std::string> left_idx = {"x", "y", "z"};
concat (pd_test_1_all.cpp:17717)
17707}
17708
17709void pd_test_period_index_concat() {
17710 std::cout << "========= concat factory ==============================";
17711
17712 std::vector<int64_t> ordinals1 = {0, 1};
17713 std::vector<int64_t> ordinals2 = {2, 3};
17714 pandas::PeriodIndex idx1(ordinals1, "D");
17715 pandas::PeriodIndex idx2(ordinals2, "D");
17716
17717 pandas::PeriodIndex concatenated = pandas::PeriodIndex::concat({idx1, idx2});
17718
17719 bool passed = (concatenated.size() == 4);
17720 if (!passed) {
17721 std::cout << " [FAIL] : in pd_test_period_index_concat()" << std::endl;
17722 throw std::runtime_error("pd_test_period_index_concat failed");
17723 }
17724
17725 std::cout << " -> tests passed" << std::endl;
17726}
concat (pd_test_1_all.cpp:17717)
17707}
17708
17709void pd_test_period_index_concat() {
17710 std::cout << "========= concat factory ==============================";
17711
17712 std::vector<int64_t> ordinals1 = {0, 1};
17713 std::vector<int64_t> ordinals2 = {2, 3};
17714 pandas::PeriodIndex idx1(ordinals1, "D");
17715 pandas::PeriodIndex idx2(ordinals2, "D");
17716
17717 pandas::PeriodIndex concatenated = pandas::PeriodIndex::concat({idx1, idx2});
17718
17719 bool passed = (concatenated.size() == 4);
17720 if (!passed) {
17721 std::cout << " [FAIL] : in pd_test_period_index_concat()" << std::endl;
17722 throw std::runtime_error("pd_test_period_index_concat failed");
17723 }
17724
17725 std::cout << " -> tests passed" << std::endl;
17726}
join (pd_test_1_all.cpp:12353)
12343 std::cout << " -> tests passed" << std::endl;
12344 }
12345
12346 void pd_test_index_join() {
12347 std::cout << "========= join ========================================";
12348
12349 pandas::Index<numpy::int64> idx1{1, 2, 3};
12350 pandas::Index<numpy::int64> idx2{2, 3, 4};
12351
12352 auto [inner_joined, left_idx, right_idx] = idx1.join(idx2, "inner");
12353 bool passed = (inner_joined.size() == 2); // {2, 3}
12354
12355 auto [outer_joined, ol_idx, or_idx] = idx1.join(idx2, "outer");
12356 passed = passed && (outer_joined.size() == 4); // {1, 2, 3, 4}
12357
12358 if (!passed) {
12359 std::cout << " [FAIL] : in pd_test_index_join() : join failed" << std::endl;
12360 throw std::runtime_error("pd_test_index_join failed");
12361 }
merge (pd_test_1_all.cpp:13639)
13629namespace dataframe_tests {
13630 namespace dataframe_tests_joining {
13631
13632 // Helper to check approximate equality
13633 bool approx_equal(double a, double b, double tol = 1e-9) {
13634 if (std::isnan(a) && std::isnan(b)) return true;
13635 return std::abs(a - b) < tol;
13636 }
13637
13638 // =====================================================================
13639 // merge() Tests
13640 // =====================================================================
13641
13642 void pd_test_joining_merge_inner() {
13643 std::cout << "========= merge inner join ============================";
13644
13645 // Left DataFrame: id, value_left
13646 std::map<std::string, std::vector<double>> left_data = {
13647 {"id", {1.0, 2.0, 3.0, 4.0}},
13648 {"value_left", {10.0, 20.0, 30.0, 40.0}}
13649 };
merge (pd_test_1_all.cpp:13639)
13629namespace dataframe_tests {
13630 namespace dataframe_tests_joining {
13631
13632 // Helper to check approximate equality
13633 bool approx_equal(double a, double b, double tol = 1e-9) {
13634 if (std::isnan(a) && std::isnan(b)) return true;
13635 return std::abs(a - b) < tol;
13636 }
13637
13638 // =====================================================================
13639 // merge() Tests
13640 // =====================================================================
13641
13642 void pd_test_joining_merge_inner() {
13643 std::cout << "========= merge inner join ============================";
13644
13645 // Left DataFrame: id, value_left
13646 std::map<std::string, std::vector<double>> left_data = {
13647 {"id", {1.0, 2.0, 3.0, 4.0}},
13648 {"value_left", {10.0, 20.0, 30.0, 40.0}}
13649 };
merge (pd_test_1_all.cpp:13639)
13629namespace dataframe_tests {
13630 namespace dataframe_tests_joining {
13631
13632 // Helper to check approximate equality
13633 bool approx_equal(double a, double b, double tol = 1e-9) {
13634 if (std::isnan(a) && std::isnan(b)) return true;
13635 return std::abs(a - b) < tol;
13636 }
13637
13638 // =====================================================================
13639 // merge() Tests
13640 // =====================================================================
13641
13642 void pd_test_joining_merge_inner() {
13643 std::cout << "========= merge inner join ============================";
13644
13645 // Left DataFrame: id, value_left
13646 std::map<std::string, std::vector<double>> left_data = {
13647 {"id", {1.0, 2.0, 3.0, 4.0}},
13648 {"value_left", {10.0, 20.0, 30.0, 40.0}}
13649 };
asfreq (pd_test_1_all.cpp:2869)
2859 std::cout << "========= PeriodArray: asfreq ======================= ";
2860
2861 // Monthly to quarterly
2862 pandas::PeriodArray arr_m(std::vector<std::string>{
2863 "2024-01",
2864 "2024-04",
2865 "2024-07",
2866 "NaT"
2867 }, "M");
2868
2869 auto arr_q = arr_m.asfreq("Q");
2870 if (arr_q.size() != 4) {
2871 std::cout << " [FAIL] : asfreq size should be 4" << std::endl;
2872 throw std::runtime_error("pd_test_period_array_asfreq failed: size");
2873 }
2874 if (arr_q.freqstr() != "Q") {
2875 std::cout << " [FAIL] : asfreq freqstr should be 'Q'" << std::endl;
2876 throw std::runtime_error("pd_test_period_array_asfreq failed: freqstr");
2877 }
2878
2879 // Check NaT is preserved
asof (pd_test_2_all.cpp:366)
356 std::cout << "====================================== [OK] pd_test_add_prefix test suite ========================== " << std::endl;
357 return 0;
358 }
359
360} // namespace dataframe_tests
361// ------------------- pd_test_add_prefix.cpp (end) -----------------------------
362
363// ------------------- pd_test_asof.cpp (start) -----------------------------
364// dataframe_tests/pd_test_asof.cpp
365// Test for DataFrame.asof() method
366
367#include <iostream>
368#include <cmath>
369#include <stdexcept>
370#include <limits>
371#include "../pandas/pd_dataframe.h"
372
373// CRITICAL: No using namespace directives
374
375namespace dataframe_tests {
asof (pd_test_2_all.cpp:366)
356 std::cout << "====================================== [OK] pd_test_add_prefix test suite ========================== " << std::endl;
357 return 0;
358 }
359
360} // namespace dataframe_tests
361// ------------------- pd_test_add_prefix.cpp (end) -----------------------------
362
363// ------------------- pd_test_asof.cpp (start) -----------------------------
364// dataframe_tests/pd_test_asof.cpp
365// Test for DataFrame.asof() method
366
367#include <iostream>
368#include <cmath>
369#include <stdexcept>
370#include <limits>
371#include "../pandas/pd_dataframe.h"
372
373// CRITICAL: No using namespace directives
374
375namespace dataframe_tests {
between_time (pd_test_2_all.cpp:1154)
1144 std::cout << "====================================== [OK] pd_test_at_time test suite ========================== " << std::endl;
1145 return 0;
1146 }
1147
1148} // namespace dataframe_tests
1149// ------------------- pd_test_at_time.cpp (end) -----------------------------
1150
1151// ------------------- pd_test_between_time.cpp (start) -----------------------------
1152// dataframe_tests/pd_test_between_time.cpp
1153// Tests for DataFrame.between_time() method (pandas 2.0+ API)
1154// Selects values between particular times of day from datetime-indexed DataFrame
1155#include <iostream>
1156#include <stdexcept>
1157#include <vector>
1158#include <string>
1159#include <map>
1160#include "../pandas/pd_dataframe.h"
1161
1162// CRITICAL: No using namespace directives
between_time (pd_test_2_all.cpp:1154)
1144 std::cout << "====================================== [OK] pd_test_at_time test suite ========================== " << std::endl;
1145 return 0;
1146 }
1147
1148} // namespace dataframe_tests
1149// ------------------- pd_test_at_time.cpp (end) -----------------------------
1150
1151// ------------------- pd_test_between_time.cpp (start) -----------------------------
1152// dataframe_tests/pd_test_between_time.cpp
1153// Tests for DataFrame.between_time() method (pandas 2.0+ API)
1154// Selects values between particular times of day from datetime-indexed DataFrame
1155#include <iostream>
1156#include <stdexcept>
1157#include <vector>
1158#include <string>
1159#include <map>
1160#include "../pandas/pd_dataframe.h"
1161
1162// CRITICAL: No using namespace directives
diff (pd_test_1_all.cpp:5171)
5161 }
5162
5163 void pd_test_arithmetic_dataframe_diff_shift() {
5164 std::cout << "========= DataFrame diff/shift ==================";
5165
5166 std::map<std::string, std::vector<double>> data;
5167 data["A"] = {1.0, 3.0, 6.0, 10.0};
5168 pandas::DataFrame df(data);
5169
5170 // diff: [NaN, 2, 3, 4]
5171 auto d = df.diff();
5172 std::string val = d["A"].get_value_str(1);
5173 bool passed = std::abs(std::stod(val) - 2.0) < 0.001;
5174 if (!passed) {
5175 std::cout << " [FAIL] : in pd_test_arithmetic_dataframe_diff_shift() : diff failed" << std::endl;
5176 throw std::runtime_error("pd_test_arithmetic_dataframe_diff_shift failed: diff failed");
5177 }
5178
5179 // First element should be NaN
5180 val = d["A"].get_value_str(0);
5181 passed = std::isnan(std::stod(val));
pct_change (pd_test_1_all.cpp:4621)
4611 throw std::runtime_error("pd_test_aggregation_series_skew_kurt failed: kurtosis alias failed");
4612 }
4613
4614 std::cout << " -> tests passed" << std::endl;
4615 }
4616
4617 void pd_test_aggregation_series_pct_change() {
4618 std::cout << "========= Series pct_change =====================";
4619
4620 pandas::Series<double> s({100.0, 110.0, 121.0});
4621 auto pct = s.pct_change();
4622
4623 // First element should be NaN
4624 bool passed = std::isnan(pct[0]);
4625 if (!passed) {
4626 std::cout << " [FAIL] : in pd_test_aggregation_series_pct_change() : first element should be NaN" << std::endl;
4627 throw std::runtime_error("pd_test_aggregation_series_pct_change failed: first element should be NaN");
4628 }
4629
4630 // Second element should be 0.1 (10% increase)
4631 passed = std::abs(pct[1] - 0.1) < 0.001;
shift (pd_test_1_all.cpp:5188)
5178 // First element should be NaN
5179 val = d["A"].get_value_str(0);
5180 passed = std::isnan(std::stod(val));
5181 if (!passed) {
5182 std::cout << " [FAIL] : in pd_test_arithmetic_dataframe_diff_shift() : diff NaN failed" << std::endl;
5183 throw std::runtime_error("pd_test_arithmetic_dataframe_diff_shift failed: diff NaN failed");
5184 }
5185
5186 // shift: [NaN, 1, 3, 6]
5187 auto s = df.shift();
5188 val = s["A"].get_value_str(1);
5189 passed = std::abs(std::stod(val) - 1.0) < 0.001;
5190 if (!passed) {
5191 std::cout << " [FAIL] : in pd_test_arithmetic_dataframe_diff_shift() : shift failed" << std::endl;
5192 throw std::runtime_error("pd_test_arithmetic_dataframe_diff_shift failed: shift failed");
5193 }
5194
5195 std::cout << " -> tests passed" << std::endl;
5196 }
to_period (pd_test_2_all.cpp:14554)
14544 std::cout << "====================================== [OK] pd_test_to_parquet test suite ========================" << std::endl;
14545 return 0;
14546 }
14547
14548} // namespace dataframe_tests
14549// ------------------- pd_test_to_parquet.cpp (end) -----------------------------
14550
14551// ------------------- pd_test_to_period.cpp (start) -----------------------------
14552// dataframe_tests/pd_test_to_period.cpp
14553// Test suite for DataFrame.to_period() method
14554
14555#include <iostream>
14556#include <stdexcept>
14557#include <vector>
14558#include <string>
14559#include <map>
14560
14561#include "../pandas/pd_dataframe.h"
14562
14563// CRITICAL: No using namespace directives
to_timestamp (pd_test_1_all.cpp:2830)
2820 void pd_test_period_array_to_timestamp() {
2821 std::cout << "========= PeriodArray: to_timestamp ======================= ";
2822
2823 pandas::PeriodArray arr(std::vector<std::string>{
2824 "2024-01",
2825 "2024-06",
2826 "NaT"
2827 }, "M");
2828
2829 // to_timestamp with start
2830 auto ts_start = arr.to_timestamp("start");
2831 if (ts_start.size() != 3) {
2832 std::cout << " [FAIL] : to_timestamp size should be 3" << std::endl;
2833 throw std::runtime_error("pd_test_period_array_to_timestamp failed: size");
2834 }
2835
2836 auto ts0 = ts_start[0];
2837 if (!ts0.has_value()) {
2838 std::cout << " [FAIL] : ts_start[0] should have value" << std::endl;
2839 throw std::runtime_error("pd_test_period_array_to_timestamp failed: ts_start[0]");
2840 }
tz_convert (pd_test_2_all.cpp:17874)
17864 std::cout << "====================================== [OK] pd_test_transform test suite ========================== " << std::endl;
17865 return 0;
17866 }
17867
17868} // namespace dataframe_tests
17869// ------------------- pd_test_transform.cpp (end) -----------------------------
17870
17871// ------------------- pd_test_tz_convert.cpp (start) -----------------------------
17872// dataframe_tests/pd_test_tz_convert.cpp
17873// Test for DataFrame.tz_convert() method
17874
17875#include <iostream>
17876#include <stdexcept>
17877#include <cmath>
17878#include "../pandas/pd_dataframe.h"
17879
17880namespace dataframe_tests {
17881 namespace dataframe_tests_tz_convert {
17882
17883 void pd_test_tz_convert_basic() {
tz_localize (pd_test_1_all.cpp:1431)
1421 "2023-06-15"
1422 });
1423
1424 // Initially should be timezone-naive
1425 if (arr.is_tz_aware()) {
1426 std::cout << " [FAIL] : array should be timezone-naive initially" << std::endl;
1427 throw std::runtime_error("pd_test_datetime_array_timezone failed: naive");
1428 }
1429
1430 // Localize to UTC
1431 auto localized = arr.tz_localize("UTC");
1432 if (!localized.is_tz_aware()) {
1433 std::cout << " [FAIL] : localized array should be timezone-aware" << std::endl;
1434 throw std::runtime_error("pd_test_datetime_array_timezone failed: localize");
1435 }
1436
1437 // Verify timezone name in dtype
1438 auto dt = localized.dtype();
1439 if (!dt.is_tz_aware()) {
1440 std::cout << " [FAIL] : dtype should be timezone-aware" << std::endl;
1441 throw std::runtime_error("pd_test_datetime_array_timezone failed: dtype tz");
to_csv (pd_test_1_all.cpp:6967)
6957 void pd_test_dataframe_io() {
6958 std::cout << "========= I/O methods ======================";
6959
6960 std::map<std::string, std::vector<numpy::int64>> data;
6961 data["A"] = {1, 2, 3};
6962 data["B"] = {4, 5, 6};
6963
6964 pandas::DataFrame df(data);
6965
6966 // Test to_csv
6967 std::string csv = df.to_csv(false);
6968 if (csv.empty()) {
6969 std::cout << " [FAIL] : in pd_test_dataframe_io() : to_csv empty" << std::endl;
6970 throw std::runtime_error("pd_test_dataframe_io failed: to_csv empty");
6971 }
6972 if (csv.find("A") == std::string::npos) {
6973 std::cout << " [FAIL] : in pd_test_dataframe_io() : to_csv missing column name" << std::endl;
6974 throw std::runtime_error("pd_test_dataframe_io failed: to_csv missing column name");
6975 }
6976
6977 // Test to_json
to_gbq (pd_test_3_all.cpp:3012)
3002 throw std::runtime_error("to_xml() missing root element");
3003 }
3004 if (xml_output.find("<row>") == std::string::npos) {
3005 throw std::runtime_error("to_xml() missing row elements");
3006 }
3007
3008 std::cout << " -> tests passed" << std::endl;
3009}
3010
3011void pd_test_3_all_df_to_gbq() {
3012 std::cout << "========= DataFrame.to_gbq() =============================";
3013
3014 std::map<std::string, std::vector<double>> data = {
3015 {"A", {1.0, 2.0, 3.0}},
3016 {"B", {4.0, 5.0, 6.0}}
3017 };
3018 pandas::DataFrame df(data);
3019
3020 // Test to_gbq - should throw (not implemented)
3021 bool threw = false;
3022 try {
to_json (pd_test_1_all.cpp:6978)
6968 if (csv.empty()) {
6969 std::cout << " [FAIL] : in pd_test_dataframe_io() : to_csv empty" << std::endl;
6970 throw std::runtime_error("pd_test_dataframe_io failed: to_csv empty");
6971 }
6972 if (csv.find("A") == std::string::npos) {
6973 std::cout << " [FAIL] : in pd_test_dataframe_io() : to_csv missing column name" << std::endl;
6974 throw std::runtime_error("pd_test_dataframe_io failed: to_csv missing column name");
6975 }
6976
6977 // Test to_json
6978 std::string json = df.to_json("columns");
6979 if (json.empty()) {
6980 std::cout << " [FAIL] : in pd_test_dataframe_io() : to_json empty" << std::endl;
6981 throw std::runtime_error("pd_test_dataframe_io failed: to_json empty");
6982 }
6983 if (json.find("{") == std::string::npos) {
6984 std::cout << " [FAIL] : in pd_test_dataframe_io() : to_json not JSON" << std::endl;
6985 throw std::runtime_error("pd_test_dataframe_io failed: to_json not JSON");
6986 }
6987
6988 // Test to_string
to_xml (pd_test_3_all.cpp:2983)
2973 }
2974
2975 std::cout << " -> tests passed" << std::endl;
2976}
2977
2978// ============================================================================
2979// Category 16: DataFrame Plan 2 - Export Functions
2980// ============================================================================
2981
2982void pd_test_3_all_df_to_xml() {
2983 std::cout << "========= DataFrame.to_xml() =============================";
2984
2985 std::map<std::string, std::vector<double>> data = {
2986 {"A", {1.0, 2.0, 3.0}},
2987 {"B", {4.0, 5.0, 6.0}}
2988 };
2989 pandas::DataFrame df(data);
2990
2991 // Test to_xml to string
2992 std::string xml_output = df.to_xml();
2993 if (xml_output.empty()) {
astype (pd_test_1_all.cpp:21292)
21282 std::cout << "========= astype all columns to float64 =============";
21283
21284 // Create DataFrame with int64 columns
21285 std::map<std::string, std::vector<numpy::int64>> data;
21286 data["A"] = {1, 2, 3, 4, 5};
21287 data["B"] = {10, 20, 30, 40, 50};
21288
21289 pandas::DataFrame df(data);
21290
21291 // Convert all columns to float64
21292 pandas::DataFrame df_float = df.astype("float64");
21293
21294 // Verify dtype changed
21295 pandas::Series<std::string> dtypes = df_float.dtypes();
21296
21297 bool passed = true;
21298 if (dtypes[static_cast<size_t>(0)] != "float64") {
21299 std::cout << " [FAIL] : in pd_test_astype_all_columns_to_float64() : column A dtype is " << dtypes[static_cast<size_t>(0)] << ", expected float64" << std::endl;
21300 passed = false;
21301 }
21302 if (dtypes[static_cast<size_t>(1)] != "float64") {
astype (pd_test_1_all.cpp:21292)
21282 std::cout << "========= astype all columns to float64 =============";
21283
21284 // Create DataFrame with int64 columns
21285 std::map<std::string, std::vector<numpy::int64>> data;
21286 data["A"] = {1, 2, 3, 4, 5};
21287 data["B"] = {10, 20, 30, 40, 50};
21288
21289 pandas::DataFrame df(data);
21290
21291 // Convert all columns to float64
21292 pandas::DataFrame df_float = df.astype("float64");
21293
21294 // Verify dtype changed
21295 pandas::Series<std::string> dtypes = df_float.dtypes();
21296
21297 bool passed = true;
21298 if (dtypes[static_cast<size_t>(0)] != "float64") {
21299 std::cout << " [FAIL] : in pd_test_astype_all_columns_to_float64() : column A dtype is " << dtypes[static_cast<size_t>(0)] << ", expected float64" << std::endl;
21300 passed = false;
21301 }
21302 if (dtypes[static_cast<size_t>(1)] != "float64") {
astype (pd_test_1_all.cpp:21292)
21282 std::cout << "========= astype all columns to float64 =============";
21283
21284 // Create DataFrame with int64 columns
21285 std::map<std::string, std::vector<numpy::int64>> data;
21286 data["A"] = {1, 2, 3, 4, 5};
21287 data["B"] = {10, 20, 30, 40, 50};
21288
21289 pandas::DataFrame df(data);
21290
21291 // Convert all columns to float64
21292 pandas::DataFrame df_float = df.astype("float64");
21293
21294 // Verify dtype changed
21295 pandas::Series<std::string> dtypes = df_float.dtypes();
21296
21297 bool passed = true;
21298 if (dtypes[static_cast<size_t>(0)] != "float64") {
21299 std::cout << " [FAIL] : in pd_test_astype_all_columns_to_float64() : column A dtype is " << dtypes[static_cast<size_t>(0)] << ", expected float64" << std::endl;
21300 passed = false;
21301 }
21302 if (dtypes[static_cast<size_t>(1)] != "float64") {
astype (pd_test_1_all.cpp:21292)
21282 std::cout << "========= astype all columns to float64 =============";
21283
21284 // Create DataFrame with int64 columns
21285 std::map<std::string, std::vector<numpy::int64>> data;
21286 data["A"] = {1, 2, 3, 4, 5};
21287 data["B"] = {10, 20, 30, 40, 50};
21288
21289 pandas::DataFrame df(data);
21290
21291 // Convert all columns to float64
21292 pandas::DataFrame df_float = df.astype("float64");
21293
21294 // Verify dtype changed
21295 pandas::Series<std::string> dtypes = df_float.dtypes();
21296
21297 bool passed = true;
21298 if (dtypes[static_cast<size_t>(0)] != "float64") {
21299 std::cout << " [FAIL] : in pd_test_astype_all_columns_to_float64() : column A dtype is " << dtypes[static_cast<size_t>(0)] << ", expected float64" << std::endl;
21300 passed = false;
21301 }
21302 if (dtypes[static_cast<size_t>(1)] != "float64") {
bool_ (pd_test_1_all.cpp:9120)
9110void pd_test_datetime_mixin_array_constructor() {
9111 std::cout << "========= DatetimeTDMixin array constructor =========================";
9112
9113 // Create DatetimeArray with some values
9114 numpy::NDArray<numpy::datetime64> data(std::vector<size_t>{3});
9115 data.setElementAt({0}, numpy::datetime64(1000000000000000000LL, numpy::DateTimeUnit::Nanosecond)); // ~2001
9116 data.setElementAt({1}, numpy::datetime64(1500000000000000000LL, numpy::DateTimeUnit::Nanosecond)); // ~2017
9117 data.setElementAt({2}, numpy::datetime64(1600000000000000000LL, numpy::DateTimeUnit::Nanosecond)); // ~2020
9118
9119 numpy::NDArray<numpy::bool_> mask(std::vector<size_t>{3});
9120 mask.setElementAt({0}, numpy::bool_(false));
9121 mask.setElementAt({1}, numpy::bool_(false));
9122 mask.setElementAt({2}, numpy::bool_(false));
9123
9124 pandas::DatetimeArray arr(data, mask);
9125 pandas::DatetimeTDMixin idx(arr, "timestamps");
9126
9127 bool passed = (idx.size() == 3 && !idx.empty() &&
9128 idx.name().has_value() && *idx.name() == "timestamps" &&
9129 idx.inferred_type() == "datetime");
9130 if (!passed) {
convert_dtypes (pd_test_1_all.cpp:27317)
27307 void pd_test_convert_dtypes_integer_strings() {
27308 std::cout << "========= convert_dtypes: integer strings ============";
27309
27310 // Create DataFrame with string column containing integers
27311 std::map<std::string, std::vector<std::string>> data;
27312 data["a"] = {"1", "2", "3", "4", "5"};
27313
27314 pandas::DataFrame df(data);
27315
27316 // Convert dtypes
27317 pandas::DataFrame converted = df.convert_dtypes();
27318
27319 // After conversion, should be int64
27320 pandas::Series<std::string> dtypes_after = converted.dtypes();
27321 std::string dtype_a = dtypes_after[static_cast<size_t>(0)];
27322
27323 // Verify the dtype was converted to Int64 (nullable integer, per pandas convert_dtypes behavior)
27324 bool passed = (dtype_a == "Int64" || dtype_a == "int64");
27325 if (!passed) {
27326 std::cout << " [FAIL] : in pd_test_convert_dtypes_integer_strings() : expected Int64, got " << dtype_a << std::endl;
27327 throw std::runtime_error("pd_test_convert_dtypes_integer_strings failed: dtype mismatch");
copy (pd_test_1_all.cpp:5798)
5788// ============================================================================
5789// Copy/Rename Tests
5790// ============================================================================
5791
5792void pd_test_categorical_index_copy() {
5793 std::cout << "========= copy ========================================";
5794
5795 pandas::CategoricalArray arr({"a", "b", "c"});
5796 pandas::CategoricalIndex idx(arr, "original");
5797
5798 pandas::CategoricalIndex copied = idx.copy();
5799
5800 bool passed = (copied.size() == idx.size() && copied.name() == idx.name() &&
5801 copied.categories() == idx.categories() && copied.ordered() == idx.ordered());
5802 if (!passed) {
5803 std::cout << " [FAIL] : in pd_test_categorical_index_copy()" << std::endl;
5804 throw std::runtime_error("pd_test_categorical_index_copy failed");
5805 }
5806
5807 std::cout << " -> tests passed" << std::endl;
5808}
infer_objects (pd_test_1_all.cpp:27595)
27585 // Create DataFrame with string column containing integers
27586 std::map<std::string, std::vector<std::string>> data;
27587 data["A"] = {"1", "2", "3", "4", "5"};
27588
27589 pandas::DataFrame df(data);
27590
27591 // Before inference, dtype should be string/object
27592 std::string before_dtype = df["A"].dtype_name();
27593
27594 // Apply infer_objects
27595 pandas::DataFrame result = df.infer_objects();
27596
27597 // After inference, dtype should be int64
27598 std::string after_dtype = result["A"].dtype_name();
27599
27600 bool passed = (after_dtype == "int64");
27601 if (!passed) {
27602 std::cout << " [FAIL] : in pd_test_infer_objects_integer_column() : expected int64, got " << after_dtype << std::endl;
27603 throw std::runtime_error("pd_test_infer_objects_integer_column failed");
27604 }
begin (pd_test_1_all.cpp:457)
447 };
448 pandas::CategoricalArray arr(values);
449
450 const std::vector<std::string>& cats = arr.categories();
451 if (cats.size() != 3) {
452 std::cout << " [FAIL] : in pd_test_categorical_array_categories_property() : categories size != 3" << std::endl;
453 throw std::runtime_error("pd_test_categorical_array_categories_property failed: categories size != 3");
454 }
455
456 // Categories should be unique
457 std::set<std::string> unique_cats(cats.begin(), cats.end());
458 if (unique_cats.size() != cats.size()) {
459 std::cout << " [FAIL] : in pd_test_categorical_array_categories_property() : categories not unique" << std::endl;
460 throw std::runtime_error("pd_test_categorical_array_categories_property failed: categories not unique");
461 }
462
463 std::cout << " -> tests passed" << std::endl;
464 }
465
466 void pd_test_categorical_array_codes_property() {
467 std::cout << "========= CategoricalArray: codes property ======================= ";
begin (pd_test_1_all.cpp:457)
447 };
448 pandas::CategoricalArray arr(values);
449
450 const std::vector<std::string>& cats = arr.categories();
451 if (cats.size() != 3) {
452 std::cout << " [FAIL] : in pd_test_categorical_array_categories_property() : categories size != 3" << std::endl;
453 throw std::runtime_error("pd_test_categorical_array_categories_property failed: categories size != 3");
454 }
455
456 // Categories should be unique
457 std::set<std::string> unique_cats(cats.begin(), cats.end());
458 if (unique_cats.size() != cats.size()) {
459 std::cout << " [FAIL] : in pd_test_categorical_array_categories_property() : categories not unique" << std::endl;
460 throw std::runtime_error("pd_test_categorical_array_categories_property failed: categories not unique");
461 }
462
463 std::cout << " -> tests passed" << std::endl;
464 }
465
466 void pd_test_categorical_array_codes_property() {
467 std::cout << "========= CategoricalArray: codes property ======================= ";
end (pd_test_1_all.cpp:457)
447 };
448 pandas::CategoricalArray arr(values);
449
450 const std::vector<std::string>& cats = arr.categories();
451 if (cats.size() != 3) {
452 std::cout << " [FAIL] : in pd_test_categorical_array_categories_property() : categories size != 3" << std::endl;
453 throw std::runtime_error("pd_test_categorical_array_categories_property failed: categories size != 3");
454 }
455
456 // Categories should be unique
457 std::set<std::string> unique_cats(cats.begin(), cats.end());
458 if (unique_cats.size() != cats.size()) {
459 std::cout << " [FAIL] : in pd_test_categorical_array_categories_property() : categories not unique" << std::endl;
460 throw std::runtime_error("pd_test_categorical_array_categories_property failed: categories not unique");
461 }
462
463 std::cout << " -> tests passed" << std::endl;
464 }
465
466 void pd_test_categorical_array_codes_property() {
467 std::cout << "========= CategoricalArray: codes property ======================= ";
end (pd_test_1_all.cpp:457)
447 };
448 pandas::CategoricalArray arr(values);
449
450 const std::vector<std::string>& cats = arr.categories();
451 if (cats.size() != 3) {
452 std::cout << " [FAIL] : in pd_test_categorical_array_categories_property() : categories size != 3" << std::endl;
453 throw std::runtime_error("pd_test_categorical_array_categories_property failed: categories size != 3");
454 }
455
456 // Categories should be unique
457 std::set<std::string> unique_cats(cats.begin(), cats.end());
458 if (unique_cats.size() != cats.size()) {
459 std::cout << " [FAIL] : in pd_test_categorical_array_categories_property() : categories not unique" << std::endl;
460 throw std::runtime_error("pd_test_categorical_array_categories_property failed: categories not unique");
461 }
462
463 std::cout << " -> tests passed" << std::endl;
464 }
465
466 void pd_test_categorical_array_codes_property() {
467 std::cout << "========= CategoricalArray: codes property ======================= ";
items (pd_test_1_all.cpp:16554)
16544 // =====================================================================
16545 // Iteration Tests (items, keys)
16546 // =====================================================================
16547
16548 void pd_test_ndframe_items_keys() {
16549 std::cout << "========= items/keys ===========================================" << std::endl;
16550
16551 pandas::Series<int> s({10, 20, 30});
16552
16553 // Test items()
16554 std::vector<std::string> collected_keys;
16555 std::vector<int> collected_values;
16556
16557 s.items([&](const std::string& key, int value) {
16558 collected_keys.push_back(key);
16559 collected_values.push_back(value);
16560 });
16561
16562 bool passed = collected_keys.size() == 3;
16563 if (!passed) {
iterrows (pd_test_2_all.cpp:6498)
6488 std::cout << " [FAIL] : in pd_test_iter_items_content_access() : dtype expected int, got "
6489 << dtype << std::endl;
6490 throw std::runtime_error("pd_test_iter_items_content_access failed: dtype");
6491 }
6492 }
6493
6494 std::cout << " -> tests passed" << std::endl;
6495 }
6496
6497 // =========================================================================
6498 // iterrows() tests
6499 // =========================================================================
6500
6501 void pd_test_iter_iterrows_basic() {
6502 std::cout << "========= iterrows() basic iteration =============";
6503
6504 // Create a simple DataFrame
6505 std::map<std::string, std::vector<std::string>> data = {
6506 {"A", {"a", "b", "c"}},
6507 {"B", {"1", "2", "3"}},
6508 {"C", {"x", "y", "z"}}
itertuples (pd_test_2_all.cpp:6774)
6764 passed = (it == end);
6765 if (!passed) {
6766 std::cout << " [FAIL] : in pd_test_iter_iterrows_iterator_methods() : didn't reach end" << std::endl;
6767 throw std::runtime_error("pd_test_iter_iterrows_iterator_methods failed: didn't reach end");
6768 }
6769
6770 std::cout << " -> tests passed" << std::endl;
6771 }
6772
6773 // =========================================================================
6774 // itertuples() tests
6775 // =========================================================================
6776
6777 void pd_test_iter_itertuples_basic() {
6778 std::cout << "========= itertuples() basic iteration ===========";
6779
6780 // Create a DataFrame similar to pandas example
6781 std::map<std::string, std::vector<int>> data = {
6782 {"num_legs", {4, 2}},
6783 {"num_wings", {0, 2}}
6784 };
keys (pd_test_1_all.cpp:16319)
16309 }
16310
16311 // Test default value
16312 passed = attrs.get<int>("missing", 99) == 99;
16313 if (!passed) {
16314 std::cout << " [FAIL] : in pd_test_ndframe_attrs() : default value" << std::endl;
16315 throw std::runtime_error("pd_test_ndframe_attrs failed: default value");
16316 }
16317
16318 // Test keys
16319 auto keys = attrs.keys();
16320 passed = keys.size() == 3;
16321 if (!passed) {
16322 std::cout << " [FAIL] : in pd_test_ndframe_attrs() : keys()" << std::endl;
16323 throw std::runtime_error("pd_test_ndframe_attrs failed: keys()");
16324 }
16325
16326 // Test remove
16327 passed = attrs.remove("count") && !attrs.contains("count");
16328 if (!passed) {
16329 std::cout << " [FAIL] : in pd_test_ndframe_attrs() : remove" << std::endl;
duplicated (pd_test_1_all.cpp:10583)
10573 std::cout << " -> tests passed" << std::endl;
10574}
10575
10576void pd_test_extension_index_duplicated() {
10577 std::cout << "========= duplicated =========================";
10578
10579 pandas::CategoricalArray arr({"a", "b", "a", "c", "a"});
10580 pandas::CategoricalIndex idx(arr);
10581
10582 auto dup_mask = idx.duplicated("first");
10583
10584 bool passed = (dup_mask.getElementAt({0}) == false &&
10585 dup_mask.getElementAt({1}) == false &&
10586 dup_mask.getElementAt({2}) == true &&
10587 dup_mask.getElementAt({3}) == false &&
10588 dup_mask.getElementAt({4}) == true);
10589 if (!passed) {
10590 std::cout << " [FAIL] : in pd_test_extension_index_duplicated() : duplicated check failed" << std::endl;
10591 throw std::runtime_error("pd_test_extension_index_duplicated failed");
10592 }
isin (pd_test_1_all.cpp:5938)
5928 std::cout << " -> tests passed" << std::endl;
5929}
5930
5931void pd_test_categorical_index_isin() {
5932 std::cout << "========= inherited isin ==============================";
5933
5934 pandas::CategoricalArray arr({"a", "b", "c", "d"});
5935 pandas::CategoricalIndex idx(arr);
5936
5937 std::vector<std::string> values = {"a", "c"};
5938 numpy::NDArray<numpy::bool_> mask = idx.isin(values);
5939
5940 bool passed = (mask.getSize() == 4 &&
5941 mask.getElementAt({0}) == true && // a
5942 mask.getElementAt({1}) == false && // b
5943 mask.getElementAt({2}) == true && // c
5944 mask.getElementAt({3}) == false); // d
5945 if (!passed) {
5946 std::cout << " [FAIL] : in pd_test_categorical_index_isin()" << std::endl;
5947 throw std::runtime_error("pd_test_categorical_index_isin failed");
5948 }
isin (pd_test_1_all.cpp:5938)
5928 std::cout << " -> tests passed" << std::endl;
5929}
5930
5931void pd_test_categorical_index_isin() {
5932 std::cout << "========= inherited isin ==============================";
5933
5934 pandas::CategoricalArray arr({"a", "b", "c", "d"});
5935 pandas::CategoricalIndex idx(arr);
5936
5937 std::vector<std::string> values = {"a", "c"};
5938 numpy::NDArray<numpy::bool_> mask = idx.isin(values);
5939
5940 bool passed = (mask.getSize() == 4 &&
5941 mask.getElementAt({0}) == true && // a
5942 mask.getElementAt({1}) == false && // b
5943 mask.getElementAt({2}) == true && // c
5944 mask.getElementAt({3}) == false); // d
5945 if (!passed) {
5946 std::cout << " [FAIL] : in pd_test_categorical_index_isin()" << std::endl;
5947 throw std::runtime_error("pd_test_categorical_index_isin failed");
5948 }
is_na_at (pd_test_5_all.cpp:35205)
35195 pandas::DataFrame df;
35196 df.add_column_nullable<bool>("X", {true, pandas::NA_BOOL, false});
35197 pandas_tests::check(df["X"].get_value_bool(0) == true, "case_3.idx0_true", local_fail);
35198 pandas_tests::check(df["X"].get_value_bool(1) == false, "case_3.idx1_NA_false", local_fail);
35199 pandas_tests::check(df["X"].get_value_bool(2) == false, "case_3.idx2_false", local_fail);
35200}
35201
35202void bool_nullable_826495_case_4_is_na_at_mask_aware(int& local_fail) {
35203 pandas::DataFrame df;
35204 df.add_column_nullable<bool>("X", {true, pandas::NA_BOOL, false});
35205 pandas_tests::check(df["X"].is_na_at(0) == false, "case_4.idx0_not_na", local_fail);
35206 pandas_tests::check(df["X"].is_na_at(1) == true, "case_4.idx1_is_na", local_fail);
35207 pandas_tests::check(df["X"].is_na_at(2) == false, "case_4.idx2_not_na", local_fail);
35208}
35209
35210void bool_nullable_826495_case_5_fillna_preserves_dtype(int& local_fail) {
35211 pandas::DataFrame df;
35212 df.add_column_nullable<bool>("X", {true, pandas::NA_BOOL, false});
35213 pandas_tests::check(df["X"].dtype_name() == "boolean", "case_5.pre_dtype", local_fail);
35214 auto df_filled = df.fillna(1.0);
35215 pandas_tests::check(df_filled["X"].dtype_name() == "boolean",
is_numeric_dtype (pd_test_2_all.cpp:19939)
19929 check(labels2[0] == "33.3%", "pct_label_33_3");
19930}
19931
19932// =====================================================================
19933// Test: is_numeric_dtype covers all expected types
19934// =====================================================================
19935
19936void pd_test_describe_is_numeric_dtype() {
19937 std::cout << " -- pd_test_describe_is_numeric_dtype --" << std::endl;
19938
19939 check(pandas::DataFrame::is_numeric_dtype("int64"), "is_numeric_int64");
19940 check(pandas::DataFrame::is_numeric_dtype("Int64"), "is_numeric_Int64");
19941 check(pandas::DataFrame::is_numeric_dtype("float64"), "is_numeric_float64");
19942 check(pandas::DataFrame::is_numeric_dtype("Float64"), "is_numeric_Float64");
19943 check(pandas::DataFrame::is_numeric_dtype("uint8"), "is_numeric_uint8");
19944 check(pandas::DataFrame::is_numeric_dtype("UInt32"), "is_numeric_UInt32");
19945 check(!pandas::DataFrame::is_numeric_dtype("object"), "not_numeric_object");
19946 check(!pandas::DataFrame::is_numeric_dtype("string"), "not_numeric_string");
19947 check(!pandas::DataFrame::is_numeric_dtype("bool"), "not_numeric_bool");
19948 check(!pandas::DataFrame::is_numeric_dtype("datetime64[ns]"), "not_numeric_datetime");
19949}
abs (pd_test_1_all.cpp:283)
273 std::optional<bool>(true)
274 });
275
276 auto s = arr.sum();
277 if (!s.has_value() || s.value() != 3) {
278 std::cout << " [FAIL] : in pd_test_boolean_array_reductions() : sum should be 3" << std::endl;
279 throw std::runtime_error("pd_test_boolean_array_reductions failed: sum");
280 }
281
282 auto m = arr.mean();
283 if (!m.has_value() || std::abs(m.value() - 0.75) > 0.001) {
284 std::cout << " [FAIL] : in pd_test_boolean_array_reductions() : mean should be 0.75" << std::endl;
285 throw std::runtime_error("pd_test_boolean_array_reductions failed: mean");
286 }
287
288 std::cout << " -> tests passed" << std::endl;
289 }
290
291 void pd_test_boolean_array_dtype() {
292 std::cout << "========= BooleanArray: dtype ======================= ";
all (pd_test_1_all.cpp:247)
237 pandas::BooleanArray has_true({
238 std::optional<bool>(false),
239 std::optional<bool>(true)
240 });
241 any_result = has_true.any();
242 if (!any_result.has_value() || !any_result.value()) {
243 std::cout << " [FAIL] : in pd_test_boolean_array_reductions() : any() with True" << std::endl;
244 throw std::runtime_error("pd_test_boolean_array_reductions failed: any() with True");
245 }
246
247 // Test all()
248 pandas::BooleanArray all_true({
249 std::optional<bool>(true),
250 std::optional<bool>(true)
251 });
252 auto all_result = all_true.all();
253 if (!all_result.has_value() || !all_result.value()) {
254 std::cout << " [FAIL] : in pd_test_boolean_array_reductions() : all() of all True" << std::endl;
255 throw std::runtime_error("pd_test_boolean_array_reductions failed: all() all True");
256 }
all_axis (pd_test_1_all.cpp:22302)
22292 std::cout << "====================================== [OK] pd_test_where_mask test suite ========================== " << std::endl;
22293 return 0;
22294 }
22295
22296} // namespace dataframe_tests
22297// ------------------- pd_test_where_mask.cpp (end) -----------------------------
22298
22299// ------------------- pd_test_all_any.cpp (start) -----------------------------
22300// dataframe_tests/pd_test_all_any.cpp
22301// Tests for DataFrame.all_axis() and DataFrame.any_axis() methods
22302
22303#include <iostream>
22304#include <stdexcept>
22305#include <cmath>
22306#include <limits>
22307#include "../pandas/pd_dataframe.h"
22308
22309// CRITICAL: No using namespace directives
22310
22311namespace dataframe_tests {
all_columns_timedelta (pd_test_5_all.cpp:64597)
64587 pandas_tests::check(!threw, "case_35_four_col.did_not_throw", local_fail);
64588 if (threw) return;
64589 pandas_tests::check(min_dtype == "int32",
64590 "case_35_four_col.min_dtype_is_int32_got_" + min_dtype, local_fail);
64591 pandas_tests::check(max_dtype == "int32",
64592 "case_35_four_col.max_dtype_is_int32_got_" + max_dtype, local_fail);
64593}
64594
64595void f_df_min_max_axis1_dtype_9_4287513_case_36_timedelta_only_regression(int& local_fail) {
64596 std::cout << "-- f_df_min_max_axis1_dtype_9_4287513_case_36_timedelta_only_regression\n";
64597 // The all_columns_timedelta() branch is preserved by plan_38 — must
64598 // remain identical pre-fix and post-fix.
64599 pandas::DataFrame df;
64600 df.add_column<pandas::Timedelta>("a", {
64601 pandas::Timedelta("1 days"),
64602 pandas::Timedelta("2 days"),
64603 pandas::Timedelta("3 days")});
64604 df.add_column<pandas::Timedelta>("b", {
64605 pandas::Timedelta("4 days"),
64606 pandas::Timedelta("5 days"),
64607 pandas::Timedelta("6 days")});
any (pd_test_1_all.cpp:226)
216 std::cout << " [FAIL] : in pd_test_boolean_array_kleene_not() : ~NA should be NA" << std::endl;
217 throw std::runtime_error("pd_test_boolean_array_kleene_not failed: ~NA");
218 }
219
220 std::cout << " -> tests passed" << std::endl;
221 }
222
223 void pd_test_boolean_array_reductions() {
224 std::cout << "========= BooleanArray: reductions ======================= ";
225
226 // Test any()
227 pandas::BooleanArray all_false({
228 std::optional<bool>(false),
229 std::optional<bool>(false)
230 });
231 auto any_result = all_false.any();
232 if (!any_result.has_value() || any_result.value()) {
233 std::cout << " [FAIL] : in pd_test_boolean_array_reductions() : any() of all False" << std::endl;
234 throw std::runtime_error("pd_test_boolean_array_reductions failed: any() all False");
235 }
any_axis (pd_test_1_all.cpp:22302)
22292 std::cout << "====================================== [OK] pd_test_where_mask test suite ========================== " << std::endl;
22293 return 0;
22294 }
22295
22296} // namespace dataframe_tests
22297// ------------------- pd_test_where_mask.cpp (end) -----------------------------
22298
22299// ------------------- pd_test_all_any.cpp (start) -----------------------------
22300// dataframe_tests/pd_test_all_any.cpp
22301// Tests for DataFrame.all_axis() and DataFrame.any_axis() methods
22302
22303#include <iostream>
22304#include <stdexcept>
22305#include <cmath>
22306#include <limits>
22307#include "../pandas/pd_dataframe.h"
22308
22309// CRITICAL: No using namespace directives
22310
22311namespace dataframe_tests {
arr (pd_test_1_all.cpp:45)
35 std::cout << " [FAIL] : in pd_test_boolean_array_constructors() : initializer_list size != 4" << std::endl;
36 throw std::runtime_error("pd_test_boolean_array_constructors failed: initializer_list size != 4");
37 }
38
39 std::cout << " -> tests passed" << std::endl;
40 }
41
42 void pd_test_boolean_array_na_handling() {
43 std::cout << "========= BooleanArray: NA handling ======================= ";
44
45 pandas::BooleanArray arr({
46 std::optional<bool>(true),
47 std::nullopt, // NA at index 1
48 std::optional<bool>(false)
49 });
50
51 if (!arr.is_na(1)) {
52 std::cout << " [FAIL] : in pd_test_boolean_array_na_handling() : is_na(1) should be true" << std::endl;
53 throw std::runtime_error("pd_test_boolean_array_na_handling failed: is_na(1) should be true");
54 }
assemble_multi_series_dataframe (pd_test_3_all.cpp:29020)
29010}
29011
29012void pd_test_df_apply_assembly_assemble_multi_series_df() {
29013 std::cout << " -- pd_test_df_apply_assembly_assemble_multi_series_df --" << std::endl;
29014 int fail = 0;
29015 auto df = make_numeric_df();
29016 std::vector<std::unique_ptr<pandas::NDFrameBase>> cols;
29017 cols.push_back(df.column_to_typed_series(0));
29018 cols.push_back(df.column_to_typed_series(1));
29019 cols.push_back(df.column_to_typed_series(2));
29020 auto out = pandas::DataFrame::assemble_multi_series_dataframe(
29021 std::move(cols), {"A", "B", "C"}, df.index().clone());
29022 fail += dfa_check(out.nrows() == 3, "nrows == 3");
29023 fail += dfa_check(out.ncols() == 3, "ncols == 3");
29024 fail += dfa_check(out.columns().get_value_str(0) == "A", "col 0 == A");
29025 fail += dfa_check(out.columns().get_value_str(2) == "C", "col 2 == C");
29026 fail += dfa_check(out["A"].get_value_double(1) == 2.0, "A[1] == 2.0");
29027 fail += dfa_check(out["C"].get_value_double(2) == 9.0, "C[2] == 9.0");
29028 if (fail == 0) std::cout << " OK" << std::endl;
29029 if (fail != 0) throw std::runtime_error("pd_test_df_apply_assembly_assemble_multi_series_df failed");
29030}
assemble_scalar_series (pd_test_3_all.cpp:28979)
28969 fail += dfa_check(ss->cat_ordered() == true, "cat_ordered preserved");
28970 }
28971 if (fail == 0) std::cout << " OK" << std::endl;
28972 if (fail != 0) throw std::runtime_error("pd_test_df_apply_assembly_column_to_typed_series_category failed");
28973}
28974
28975void pd_test_df_apply_assembly_assemble_scalar_series_int() {
28976 std::cout << " -- pd_test_df_apply_assembly_assemble_scalar_series_int --" << std::endl;
28977 int fail = 0;
28978 auto df = make_numeric_df();
28979 auto s = df.assemble_scalar_series({1.0, 2.0, 3.0}, {"A", "B", "C"});
28980 fail += dfa_check(s.size() == 3, "size == 3");
28981 fail += dfa_check(s.dtype_name() == "int64", "dtype int64 from whole-number detection");
28982 fail += dfa_check(s.index().size() == 3, "index size == 3");
28983 fail += dfa_check(s.index().get_value_str(0) == "A", "index[0] == A");
28984 if (fail == 0) std::cout << " OK" << std::endl;
28985 if (fail != 0) throw std::runtime_error("pd_test_df_apply_assembly_assemble_scalar_series_int failed");
28986}
28987
28988void pd_test_df_apply_assembly_assemble_scalar_series_float() {
28989 std::cout << " -- pd_test_df_apply_assembly_assemble_scalar_series_float --" << std::endl;
assemble_scalar_string_series (pd_test_3_all.cpp:29004)
28994 fail += dfa_check(s.dtype_name() == "float64", "dtype float64");
28995 fail += dfa_check(s.index().get_value_str(1) == "B", "index[1] == B");
28996 if (fail == 0) std::cout << " OK" << std::endl;
28997 if (fail != 0) throw std::runtime_error("pd_test_df_apply_assembly_assemble_scalar_series_float failed");
28998}
28999
29000void pd_test_df_apply_assembly_assemble_scalar_string_series() {
29001 std::cout << " -- pd_test_df_apply_assembly_assemble_scalar_string_series --" << std::endl;
29002 int fail = 0;
29003 auto df = make_numeric_df();
29004 auto s = df.assemble_scalar_string_series({"foo", "bar", "baz"}, {"A", "B", "C"});
29005 fail += dfa_check(s.size() == 3, "size == 3");
29006 fail += dfa_check(s[0] == "foo" && s[2] == "baz", "values");
29007 fail += dfa_check(s.index().get_value_str(2) == "C", "index[2] == C");
29008 if (fail == 0) std::cout << " OK" << std::endl;
29009 if (fail != 0) throw std::runtime_error("pd_test_df_apply_assembly_assemble_scalar_string_series failed");
29010}
29011
29012void pd_test_df_apply_assembly_assemble_multi_series_df() {
29013 std::cout << " -- pd_test_df_apply_assembly_assemble_multi_series_df --" << std::endl;
29014 int fail = 0;
axes (pd_test_1_all.cpp:16602)
16592 // =====================================================================
16593 // Axes Tests
16594 // =====================================================================
16595
16596 void pd_test_ndframe_axes() {
16597 std::cout << "========= axes =================================================" << std::endl;
16598
16599 pandas::Series<double> s({1.0, 2.0, 3.0});
16600
16601 auto axes = s.axes();
16602
16603 bool passed = axes.size() == 1;
16604 if (!passed) {
16605 std::cout << " [FAIL] : in pd_test_ndframe_axes() : axes count" << std::endl;
16606 throw std::runtime_error("pd_test_ndframe_axes failed: axes count");
16607 }
16608
16609 passed = axes[0]->size() == 3;
16610 if (!passed) {
16611 std::cout << " [FAIL] : in pd_test_ndframe_axes() : axis size" << std::endl;
broadcast_series_to_bool_dataframe (pd_test_3_all.cpp:28228)
28218 r.iat<double>(0, b) != -1.0 || r.iat<double>(1, b) != 4.0) {
28219 throw std::runtime_error("pd_test_where_df_condition failed");
28220 }
28221 std::cout << "PASSED" << std::endl;
28222 }
28223
28224 void pd_test_where_series_condition_axis0() {
28225 std::cout << " pd_test_where_series_condition_axis0... ";
28226 auto df = make_df_2x2();
28227 pandas::Series<numpy::float64> s({1.0, 0.0});
28228 auto cond = df.broadcast_series_to_bool_dataframe(s, 0);
28229 auto r = df.where(cond, -1.0);
28230 size_t a = df.get_column_index("A");
28231 size_t b = df.get_column_index("B");
28232 if (r.iat<double>(0, a) != 1.0 || r.iat<double>(1, a) != -1.0 ||
28233 r.iat<double>(0, b) != 3.0 || r.iat<double>(1, b) != -1.0) {
28234 throw std::runtime_error("pd_test_where_series_condition_axis0 failed");
28235 }
28236 std::cout << "PASSED" << std::endl;
28237 }
cbegin (pd_test_2_all.cpp:6185)
6175 std::cout << "========= const iteration ========================";
6176
6177 std::map<std::string, std::vector<int>> data = {
6178 {"Col1", {1}},
6179 {"Col2", {2}}
6180 };
6181 const pandas::DataFrame df(data);
6182
6183 // Test const iteration
6184 std::vector<std::string> cols;
6185 for (auto it = df.cbegin(); it != df.cend(); ++it) {
6186 cols.push_back(*it);
6187 }
6188
6189 bool passed = (cols.size() == 2);
6190 if (!passed) {
6191 std::cout << " [FAIL] : in pd_test_iter_const_iteration() : expected 2 columns" << std::endl;
6192 throw std::runtime_error("pd_test_iter_const_iteration failed: size mismatch");
6193 }
6194
6195 std::cout << " -> tests passed" << std::endl;
ceil (pd_test_1_all.cpp:4949)
4939 throw std::runtime_error("pd_test_arithmetic_series_round failed: round failed");
4940 }
4941
4942 auto f = a.floor();
4943 passed = std::abs(f[0] - 1.0) < 0.001 && std::abs(f[2] - 3.0) < 0.001 && std::abs(f[3] - (-2.0)) < 0.001;
4944 if (!passed) {
4945 std::cout << " [FAIL] : in pd_test_arithmetic_series_round() : floor failed" << std::endl;
4946 throw std::runtime_error("pd_test_arithmetic_series_round failed: floor failed");
4947 }
4948
4949 auto c = a.ceil();
4950 passed = std::abs(c[0] - 2.0) < 0.001 && std::abs(c[2] - 4.0) < 0.001 && std::abs(c[3] - (-1.0)) < 0.001;
4951 if (!passed) {
4952 std::cout << " [FAIL] : in pd_test_arithmetic_series_round() : ceil failed" << std::endl;
4953 throw std::runtime_error("pd_test_arithmetic_series_round failed: ceil failed");
4954 }
4955
4956 // Round with decimals
4957 pandas::Series<double> b({1.234, 2.567, 3.891});
4958 auto r2 = b.round(2);
4959 passed = std::abs(r2[0] - 1.23) < 0.001 && std::abs(r2[1] - 2.57) < 0.001;
cend (pd_test_2_all.cpp:6185)
6175 std::cout << "========= const iteration ========================";
6176
6177 std::map<std::string, std::vector<int>> data = {
6178 {"Col1", {1}},
6179 {"Col2", {2}}
6180 };
6181 const pandas::DataFrame df(data);
6182
6183 // Test const iteration
6184 std::vector<std::string> cols;
6185 for (auto it = df.cbegin(); it != df.cend(); ++it) {
6186 cols.push_back(*it);
6187 }
6188
6189 bool passed = (cols.size() == 2);
6190 if (!passed) {
6191 std::cout << " [FAIL] : in pd_test_iter_const_iteration() : expected 2 columns" << std::endl;
6192 throw std::runtime_error("pd_test_iter_const_iteration failed: size mismatch");
6193 }
6194
6195 std::cout << " -> tests passed" << std::endl;
classify_column_access (pd_test_2_all.cpp:20218)
20208// =========================================================================
20209// Classification tests
20210// =========================================================================
20211
20212void pd_test_getitem_dispatch_classify_numeric() {
20213 std::cout << "pd_test_getitem_dispatch_classify_numeric" << std::endl;
20214 pandas::DataFrame df;
20215 std::vector<numpy::float64> vals = {1.0, 2.0, 3.0};
20216 df.insert(0, "A", std::make_unique<pandas::Series<numpy::float64>>(vals, "A"), true);
20217
20218 auto t = df.classify_column_access("A");
20219 check(t == pandas::DataFrame::ColumnAccessType::NumericColumn, "float64 -> NumericColumn");
20220
20221 // int64 column
20222 pandas::DataFrame df2;
20223 std::vector<numpy::int64> ivals = {10, 20, 30};
20224 auto iseries = std::make_unique<pandas::Series<numpy::int64>>(ivals, "B");
20225 iseries->set_dtype_override("int64");
20226 df2.insert(0, "B", std::move(iseries), true);
20227 auto t2 = df2.classify_column_access("B");
20228 check(t2 == pandas::DataFrame::ColumnAccessType::NumericColumn, "int64 -> NumericColumn");
classify_column_dtypes (pd_test_2_all.cpp:19207)
19197 << (dataframe_tests_agg_dispatch::g_fail == 0 ? "OK" : "FAIL")
19198 << "] pd_test_agg_dispatch test suite ========================== " << std::endl;
19199 return dataframe_tests_agg_dispatch::g_fail;
19200}
19201
19202} // namespace dataframe_tests
19203// ------------------- pd_test_agg_dispatch.cpp (end) -----------------------------
19204
19205// ------------------- pd_test_agg_dtype.cpp (start) -----------------------------
19206// pd_test_agg_dtype.cpp - Tests for aggregation dtype resolution migration
19207// AggColumnInfo struct, classify_column_dtypes(), enhanced sum()
19208
19209#include <iostream>
19210#include <string>
19211#include <vector>
19212#include <cmath>
19213
19214#include "../pandas/pd_dataframe.h"
19215#include "../pandas/pd_series.h"
19216#include "../pandas/pd_index.h"
clip (pd_test_1_all.cpp:5099)
5089 throw std::runtime_error("pd_test_arithmetic_dataframe_abs_clip failed: abs failed");
5090 }
5091
5092 val = a["A"].get_value_str(2);
5093 passed = std::abs(std::stod(val) - 3.0) < 0.001;
5094 if (!passed) {
5095 std::cout << " [FAIL] : in pd_test_arithmetic_dataframe_abs_clip() : abs for -3 failed" << std::endl;
5096 throw std::runtime_error("pd_test_arithmetic_dataframe_abs_clip failed: abs for -3 failed");
5097 }
5098
5099 auto c = df.clip(-2.0, 2.0);
5100 val = c["A"].get_value_str(2);
5101 passed = std::abs(std::stod(val) - (-2.0)) < 0.001; // -3 clipped to -2
5102 if (!passed) {
5103 std::cout << " [FAIL] : in pd_test_arithmetic_dataframe_abs_clip() : clip lower failed" << std::endl;
5104 throw std::runtime_error("pd_test_arithmetic_dataframe_abs_clip failed: clip lower failed");
5105 }
5106
5107 val = c["A"].get_value_str(3);
5108 passed = std::abs(std::stod(val) - 2.0) < 0.001; // 4 clipped to 2
5109 if (!passed) {
clone (pd_test_1_all.cpp:5776)
5766 std::cout << " -> tests passed" << std::endl;
5767}
5768
5769void pd_test_categorical_index_clone() {
5770 std::cout << "========= clone =======================================";
5771
5772 pandas::CategoricalArray arr({"p", "q", "r"});
5773 pandas::CategoricalIndex idx(arr, "original");
5774
5775 std::unique_ptr<pandas::IndexBase> cloned = idx.clone();
5776
5777 bool passed = (cloned != nullptr && cloned->size() == idx.size() &&
5778 cloned->name() == idx.name());
5779 if (!passed) {
5780 std::cout << " [FAIL] : in pd_test_categorical_index_clone()" << std::endl;
5781 throw std::runtime_error("pd_test_categorical_index_clone failed");
5782 }
5783
5784 std::cout << " -> tests passed" << std::endl;
5785}
col (pd_test_1_all.cpp:6625)
6615 // Test replace
6616 {
6617 std::map<std::string, std::vector<numpy::float64>> float_data;
6618 float_data["X"] = {1.0, 2.0, 3.0};
6619 float_data["Y"] = {2.0, 2.0, 4.0};
6620 pandas::DataFrame df_repl(float_data);
6621
6622 auto replaced = df_repl.replace(2.0, 99.0);
6623 // Check some value was replaced (crude check via string)
6624 std::string val_str = replaced.col<numpy::float64>("X").get_value_str(1);
6625 if (val_str.find("99") == std::string::npos) {
6626 std::cout << " [FAIL] : in pd_test_dataframe_manipulation() : replace didn't work" << std::endl;
6627 throw std::runtime_error("pd_test_dataframe_manipulation failed: replace");
6628 }
6629 }
6630
6631 // Test drop_duplicates
6632 {
6633 std::map<std::string, std::vector<numpy::int64>> dup_data;
6634 dup_data["A"] = {1, 1, 2, 2};
col (pd_test_1_all.cpp:6625)
6615 // Test replace
6616 {
6617 std::map<std::string, std::vector<numpy::float64>> float_data;
6618 float_data["X"] = {1.0, 2.0, 3.0};
6619 float_data["Y"] = {2.0, 2.0, 4.0};
6620 pandas::DataFrame df_repl(float_data);
6621
6622 auto replaced = df_repl.replace(2.0, 99.0);
6623 // Check some value was replaced (crude check via string)
6624 std::string val_str = replaced.col<numpy::float64>("X").get_value_str(1);
6625 if (val_str.find("99") == std::string::npos) {
6626 std::cout << " [FAIL] : in pd_test_dataframe_manipulation() : replace didn't work" << std::endl;
6627 throw std::runtime_error("pd_test_dataframe_manipulation failed: replace");
6628 }
6629 }
6630
6631 // Test drop_duplicates
6632 {
6633 std::map<std::string, std::vector<numpy::int64>> dup_data;
6634 dup_data["A"] = {1, 1, 2, 2};
col_data (pd_test_4_all.cpp:5286)
5276namespace {
5277
5278// Build a DataFrame matching the original fixture *after* the sliced setitem.
5279// override_fn(orig) computes the replacement value for every row whose
5280// C-level (third level of the row MultiIndex) is "C1" or "C3".
5281template <typename Fn>
5282pandas::DataFrame build_slicer_post_state(Fn override_fn) {
5283 constexpr size_t N_ROWS = 64; // 4 * 2 * 4 * 2
5284 constexpr size_t N_COLS = 4;
5285
5286 std::vector<std::vector<int64_t>> col_data(N_COLS);
5287 for (auto& v : col_data) v.reserve(N_ROWS);
5288
5289 for (size_t r = 0; r < N_ROWS; ++r) {
5290 // r decomposes as a*16 + b*8 + c*2 + d (strides for 4·2·4·2)
5291 size_t c_lvl = (r / 2) % 4; // 0..3 → C0..C3
5292 bool overridden = (c_lvl == 1 || c_lvl == 3);
5293 for (size_t c = 0; c < N_COLS; ++c) {
5294 int64_t orig = static_cast<int64_t>(r * N_COLS + c);
5295 col_data[c].push_back(overridden ? override_fn(orig) : orig);
5296 }
col_data (pd_test_4_all.cpp:5286)
5276namespace {
5277
5278// Build a DataFrame matching the original fixture *after* the sliced setitem.
5279// override_fn(orig) computes the replacement value for every row whose
5280// C-level (third level of the row MultiIndex) is "C1" or "C3".
5281template <typename Fn>
5282pandas::DataFrame build_slicer_post_state(Fn override_fn) {
5283 constexpr size_t N_ROWS = 64; // 4 * 2 * 4 * 2
5284 constexpr size_t N_COLS = 4;
5285
5286 std::vector<std::vector<int64_t>> col_data(N_COLS);
5287 for (auto& v : col_data) v.reserve(N_ROWS);
5288
5289 for (size_t r = 0; r < N_ROWS; ++r) {
5290 // r decomposes as a*16 + b*8 + c*2 + d (strides for 4·2·4·2)
5291 size_t c_lvl = (r / 2) % 4; // 0..3 → C0..C3
5292 bool overridden = (c_lvl == 1 || c_lvl == 3);
5293 for (size_t c = 0; c < N_COLS; ++c) {
5294 int64_t orig = static_cast<int64_t>(r * N_COLS + c);
5295 col_data[c].push_back(overridden ? override_fn(orig) : orig);
5296 }
col_data (pd_test_4_all.cpp:5286)
5276namespace {
5277
5278// Build a DataFrame matching the original fixture *after* the sliced setitem.
5279// override_fn(orig) computes the replacement value for every row whose
5280// C-level (third level of the row MultiIndex) is "C1" or "C3".
5281template <typename Fn>
5282pandas::DataFrame build_slicer_post_state(Fn override_fn) {
5283 constexpr size_t N_ROWS = 64; // 4 * 2 * 4 * 2
5284 constexpr size_t N_COLS = 4;
5285
5286 std::vector<std::vector<int64_t>> col_data(N_COLS);
5287 for (auto& v : col_data) v.reserve(N_ROWS);
5288
5289 for (size_t r = 0; r < N_ROWS; ++r) {
5290 // r decomposes as a*16 + b*8 + c*2 + d (strides for 4·2·4·2)
5291 size_t c_lvl = (r / 2) % 4; // 0..3 → C0..C3
5292 bool overridden = (c_lvl == 1 || c_lvl == 3);
5293 for (size_t c = 0; c < N_COLS; ++c) {
5294 int64_t orig = static_cast<int64_t>(r * N_COLS + c);
5295 col_data[c].push_back(overridden ? override_fn(orig) : orig);
5296 }
col_data (pd_test_4_all.cpp:5286)
5276namespace {
5277
5278// Build a DataFrame matching the original fixture *after* the sliced setitem.
5279// override_fn(orig) computes the replacement value for every row whose
5280// C-level (third level of the row MultiIndex) is "C1" or "C3".
5281template <typename Fn>
5282pandas::DataFrame build_slicer_post_state(Fn override_fn) {
5283 constexpr size_t N_ROWS = 64; // 4 * 2 * 4 * 2
5284 constexpr size_t N_COLS = 4;
5285
5286 std::vector<std::vector<int64_t>> col_data(N_COLS);
5287 for (auto& v : col_data) v.reserve(N_ROWS);
5288
5289 for (size_t r = 0; r < N_ROWS; ++r) {
5290 // r decomposes as a*16 + b*8 + c*2 + d (strides for 4·2·4·2)
5291 size_t c_lvl = (r / 2) % 4; // 0..3 → C0..C3
5292 bool overridden = (c_lvl == 1 || c_lvl == 3);
5293 for (size_t c = 0; c < N_COLS; ++c) {
5294 int64_t orig = static_cast<int64_t>(r * N_COLS + c);
5295 col_data[c].push_back(overridden ? override_fn(orig) : orig);
5296 }
column (pd_test_1_all.cpp:22039)
22029 std::string a1 = result.iat<double>(1, col_a_idx) == -1.0 ? "ok" : "fail";
22030 std::string a2 = result.iat<double>(2, col_a_idx) == 3.0 ? "ok" : "fail";
22031 std::string a3 = result.iat<double>(3, col_a_idx) == 4.0 ? "ok" : "fail";
22032
22033 if (a0 != "ok" || a1 != "ok" || a2 != "ok" || a3 != "ok") {
22034 passed = false;
22035 error_msg = "Column A values incorrect: A[0]=" + a0 + ", A[1]=" + a1 +
22036 ", A[2]=" + a2 + ", A[3]=" + a3;
22037 }
22038
22039 // Check B column (all should be original)
22040 double b0 = result.iat<double>(0, col_b_idx);
22041 if (b0 != 5.0) {
22042 passed = false;
22043 error_msg = "B[0] should be 5, got " + std::to_string(b0);
22044 }
22045
22046 if (!passed) {
22047 std::cout << " [FAIL] : in pd_test_where_basic() : " << error_msg << std::endl;
22048 throw std::runtime_error("pd_test_where_basic failed: " + error_msg);
22049 }
column (pd_test_1_all.cpp:22039)
22029 std::string a1 = result.iat<double>(1, col_a_idx) == -1.0 ? "ok" : "fail";
22030 std::string a2 = result.iat<double>(2, col_a_idx) == 3.0 ? "ok" : "fail";
22031 std::string a3 = result.iat<double>(3, col_a_idx) == 4.0 ? "ok" : "fail";
22032
22033 if (a0 != "ok" || a1 != "ok" || a2 != "ok" || a3 != "ok") {
22034 passed = false;
22035 error_msg = "Column A values incorrect: A[0]=" + a0 + ", A[1]=" + a1 +
22036 ", A[2]=" + a2 + ", A[3]=" + a3;
22037 }
22038
22039 // Check B column (all should be original)
22040 double b0 = result.iat<double>(0, col_b_idx);
22041 if (b0 != 5.0) {
22042 passed = false;
22043 error_msg = "B[0] should be 5, got " + std::to_string(b0);
22044 }
22045
22046 if (!passed) {
22047 std::cout << " [FAIL] : in pd_test_where_basic() : " << error_msg << std::endl;
22048 throw std::runtime_error("pd_test_where_basic failed: " + error_msg);
22049 }
column_at (pd_test_3_all.cpp:23814)
23804 std::map<std::string, std::vector<double>> data = {
23805 {"A", {1.0, 2.0, 3.0}}, {"B", {10.0, 20.0, 30.0}}
23806 };
23807 pandas::DataFrame df(data);
23808 pandas::DataFrame result = df.agg("sum", 1);
23809 // Row sums: 11, 22, 33
23810 if (result.nrows() != 3) {
23811 std::cout << " [FAIL] : in pd_test_agg_axis1_sum() : wrong nrows" << std::endl;
23812 throw std::runtime_error("pd_test_agg_axis1_sum failed: wrong nrows");
23813 }
23814 if (std::abs(result.column_at(0).get_value_double(0) - 11.0) > 1e-10 ||
23815 std::abs(result.column_at(0).get_value_double(1) - 22.0) > 1e-10 ||
23816 std::abs(result.column_at(0).get_value_double(2) - 33.0) > 1e-10) {
23817 std::cout << " [FAIL] : in pd_test_agg_axis1_sum() : wrong row sums" << std::endl;
23818 throw std::runtime_error("pd_test_agg_axis1_sum failed: wrong row sums");
23819 }
23820 std::cout << " -> tests passed" << std::endl;
23821}
23822
23823void pd_test_agg_axis1_mean() {
23824 std::cout << "========= DataFrame.agg('mean', axis=1) ===============";
column_dtype_override (pd_test_2_all.cpp:20590)
20580 std::vector<pandas::NamedAggSpec> specs;
20581 specs.push_back({"cnt", "val", "count"});
20582
20583 auto result = gb.agg_named(specs);
20584
20585 check(result.has_column("cnt"), "count_has_col");
20586 check(approx_eq(result["cnt"].get_value_double(0), 2.0), "count_a_eq_2");
20587 check(approx_eq(result["cnt"].get_value_double(1), 3.0), "count_b_eq_3");
20588 // count should always produce int64
20589 check(result.column_dtype_override("cnt") == "int64", "count_dtype_int64");
20590}
20591
20592// =====================================================================
20593// Test: agg_named() dtype propagation — int source + sum -> int64
20594// =====================================================================
20595
20596void pd_test_groupby_apply_named_agg_dtype_int_sum() {
20597 std::cout << " -- pd_test_groupby_apply_named_agg_dtype_int_sum --" << std::endl;
20598
20599 pandas::DataFrame df;
column_to_series_f64 (pd_test_2_all.cpp:19385)
19375 << dataframe_tests_agg_dtype::g_fail << " failed)" << std::endl;
19376
19377 return dataframe_tests_agg_dtype::g_fail;
19378}
19379
19380} // namespace dataframe_tests
19381// ------------------- pd_test_agg_dtype.cpp (end) -----------------------------
19382
19383// ------------------- pd_test_apply_extract.cpp (start) -----------------------------
19384// pd_test_apply_extract.cpp - Tests for DataFrame column/row extraction helpers
19385// column_to_series_f64(), column_to_series_str(), row_values_f64()
19386
19387#include <iostream>
19388#include <string>
19389#include <vector>
19390#include <cmath>
19391
19392#include "../pandas/pd_dataframe.h"
19393#include "../pandas/pd_series.h"
19394#include "../pandas/pd_index.h"
column_to_series_str (pd_test_2_all.cpp:19385)
19375 << dataframe_tests_agg_dtype::g_fail << " failed)" << std::endl;
19376
19377 return dataframe_tests_agg_dtype::g_fail;
19378}
19379
19380} // namespace dataframe_tests
19381// ------------------- pd_test_agg_dtype.cpp (end) -----------------------------
19382
19383// ------------------- pd_test_apply_extract.cpp (start) -----------------------------
19384// pd_test_apply_extract.cpp - Tests for DataFrame column/row extraction helpers
19385// column_to_series_f64(), column_to_series_str(), row_values_f64()
19386
19387#include <iostream>
19388#include <string>
19389#include <vector>
19390#include <cmath>
19391
19392#include "../pandas/pd_dataframe.h"
19393#include "../pandas/pd_series.h"
19394#include "../pandas/pd_index.h"
column_to_typed_series (pd_test_3_all.cpp:28921)
28911 fail += dfa_check(rd.numeric_vals[1] == 30.0, "numeric_vals[1]==30.0");
28912 fail += dfa_check(rd.numeric_vals[2] == 100.0, "numeric_vals[2]==100.0");
28913 if (fail == 0) std::cout << " OK" << std::endl;
28914 if (fail != 0) throw std::runtime_error("pd_test_df_apply_assembly_extract_row_typed_mixed failed");
28915}
28916
28917void pd_test_df_apply_assembly_column_to_typed_series_float() {
28918 std::cout << " -- pd_test_df_apply_assembly_column_to_typed_series_float --" << std::endl;
28919 int fail = 0;
28920 auto df = make_numeric_df();
28921 auto s = df.column_to_typed_series(0);
28922 fail += dfa_check(s != nullptr, "series not null");
28923 fail += dfa_check(s->size() == 3, "size == 3");
28924 auto* fs = dynamic_cast<pandas::Series<numpy::float64>*>(s.get());
28925 fail += dfa_check(fs != nullptr, "dynamic_cast<Series<f64>>");
28926 if (fs) {
28927 fail += dfa_check((*fs)[0] == 1.0 && (*fs)[2] == 3.0, "values correct");
28928 }
28929 if (fail == 0) std::cout << " OK" << std::endl;
28930 if (fail != 0) throw std::runtime_error("pd_test_df_apply_assembly_column_to_typed_series_float failed");
28931}
columns (pd_test_1_all.cpp:6220)
6210 throw std::runtime_error("pd_test_dataframe_properties failed: should be empty");
6211 }
6212
6213 // Test nbytes > 0 for non-empty
6214 if (df.nbytes() == 0) {
6215 std::cout << " [FAIL] : in pd_test_dataframe_properties() : nbytes should be > 0" << std::endl;
6216 throw std::runtime_error("pd_test_dataframe_properties failed: nbytes should be > 0");
6217 }
6218
6219 // Test columns index
6220 if (df.columns().size() != 3) {
6221 std::cout << " [FAIL] : in pd_test_dataframe_properties() : columns size != 3" << std::endl;
6222 throw std::runtime_error("pd_test_dataframe_properties failed: columns size != 3");
6223 }
6224
6225 // Test dtypes
6226 auto dtypes = df.dtypes();
6227 if (dtypes.size() != 3) {
6228 std::cout << " [FAIL] : in pd_test_dataframe_properties() : dtypes size != 3" << std::endl;
6229 throw std::runtime_error("pd_test_dataframe_properties failed: dtypes size != 3");
6230 }
columns_index_dtype (pd_test_3_all.cpp:23051)
23041 df2.add_column<double>("B", {3.0, 4.0});
23042 pandas::construct_helpers::apply_column_index_name(df2, "");
23043 // name should remain unset or empty
23044 if (df2.columns().name().has_value() && !df2.columns().name()->empty())
23045 throw std::runtime_error("test_apply_column_metadata: empty name should not set columns_name");
23046
23047 // apply_int_column_dtype with true
23048 pandas::DataFrame df3;
23049 df3.add_column<double>("C", {5.0, 6.0});
23050 pandas::construct_helpers::apply_int_column_dtype(df3, true);
23051 if (df3.columns_index_dtype() != "int64")
23052 throw std::runtime_error("test_apply_column_metadata: columns_index_dtype should be 'int64'");
23053
23054 // apply_int_column_dtype with false -> no change
23055 pandas::DataFrame df4;
23056 df4.add_column<double>("D", {7.0, 8.0});
23057 pandas::construct_helpers::apply_int_column_dtype(df4, false);
23058 if (df4.columns_index_dtype() == "int64")
23059 throw std::runtime_error("test_apply_column_metadata: false flag should not set int64 dtype");
23060
23061 std::cout << " -> test passed" << std::endl;
columns_level_names (pd_test_5_all.cpp:33080)
33070 // Flat: just dump columns_index_
33071 std::cout << tag << " flat_columns=[";
33072 for (size_t c = 0; c < df.ncols(); ++c) {
33073 if (c) std::cout << ", ";
33074 std::cout << "\"" << df.columns().get_value_str(c) << "\"";
33075 }
33076 std::cout << "]\n";
33077 return;
33078 }
33079 const auto& levels = df.columns_levels();
33080 const auto& lnames = df.columns_level_names();
33081 for (size_t lvl = 0; lvl < levels.size(); ++lvl) {
33082 std::cout << tag << " level[" << lvl << "]"
33083 << " name=\""
33084 << (lvl < lnames.size() ? lnames[lvl] : std::string("<?>"))
33085 << "\" values=[";
33086 for (size_t c = 0; c < levels[lvl].size(); ++c) {
33087 if (c) std::cout << ", ";
33088 std::cout << "\"" << levels[lvl][c] << "\"";
33089 }
33090 std::cout << "]\n";
columns_levels (pd_test_3_all.cpp:23276)
23266 // Create a df with 3 columns
23267 std::map<std::string, std::vector<double>> data = {
23268 {"a\tx", {1.0}}, {"a\ty", {2.0}}, {"b\tx", {3.0}}
23269 };
23270 pandas::DataFrame df(data);
23271
23272 pandas::construct_helpers::apply_multiindex_columns(df, mi);
23273
23274 // Verify columns_levels
23275 if (!df.has_columns_levels()) throw std::runtime_error("expected columns_levels");
23276 const auto& levels = df.columns_levels();
23277 if (levels.size() != 2) throw std::runtime_error("expected 2 levels, got " + std::to_string(levels.size()));
23278 if (levels[0].size() != 3) throw std::runtime_error("expected 3 entries in level 0");
23279 if (levels[0][0] != "a" || levels[0][2] != "b") throw std::runtime_error("wrong level 0 values");
23280 if (levels[1][0] != "x" || levels[1][1] != "y") throw std::runtime_error("wrong level 1 values");
23281
23282 std::cout << " -> test passed" << std::endl;
23283}
23284
23285void test_reconstruct_multiindex() {
23286 std::cout << "========= reconstruct_multiindex ======================";
columns_name (pd_test_3_all.cpp:10809)
10799 df.add_column<int64_t>("B", {4, 5, 6});
10800 auto s = df.shape_2d();
10801 if (s.first != 3 || s.second != 2) {
10802 std::cout << " [FAIL] : in pd_test_3_all_shape_2d() : wrong dimensions" << std::endl;
10803 throw std::runtime_error("pd_test_3_all_shape_2d failed");
10804 }
10805 std::cout << " -> tests passed" << std::endl;
10806}
10807
10808void pd_test_3_all_columns_name_setter() {
10809 std::cout << "========= DataFrame.columns_name(string) setter ========";
10810 pandas::DataFrame df;
10811 df.add_column<int64_t>("A", {1, 2, 3});
10812 df.columns_name("MyColumns");
10813 pandas::DataFrame df2;
10814 df2.add_column<int64_t>("A", {1, 2, 3});
10815 df2.set_columns_name("MyColumns");
10816 if (df.to_string() != df2.to_string()) {
10817 std::cout << " [FAIL] : in pd_test_3_all_columns_name_setter() : mismatch" << std::endl;
10818 throw std::runtime_error("pd_test_3_all_columns_name_setter failed");
10819 }
compute_string_column_stats (pd_test_2_all.cpp:19759)
19749 << "] pd_test_broadcasting test suite ==========================" << std::endl;
19750
19751 return dataframe_tests_broadcasting::g_fail;
19752}
19753
19754} // namespace dataframe_tests
19755// ------------------- pd_test_broadcasting.cpp (end) -----------------------------
19756
19757// ------------------- pd_test_describe.cpp (start) -----------------------------
19758// pd_test_describe.cpp - Tests for describe_full() migration
19759// StringColumnStats, compute_string_column_stats(), describe_full() modes
19760
19761#include <iostream>
19762#include <string>
19763#include <vector>
19764#include <cmath>
19765
19766#include "../pandas/pd_dataframe.h"
19767#include "../pandas/pd_series.h"
19768#include "../pandas/pd_index.h"
corr (pd_test_1_all.cpp:4655)
4645 }
4646
4647 void pd_test_aggregation_dataframe_corr() {
4648 std::cout << "========= DataFrame corr ========================";
4649
4650 std::map<std::string, std::vector<double>> data;
4651 data["A"] = {1.0, 2.0, 3.0, 4.0, 5.0};
4652 data["B"] = {2.0, 4.0, 6.0, 8.0, 10.0}; // Perfect correlation
4653 pandas::DataFrame df(data);
4654
4655 auto corr_df = df.corr();
4656
4657 // Check dimensions
4658 bool passed = corr_df.nrows() == 2 && corr_df.ncols() == 2;
4659 if (!passed) {
4660 std::cout << " [FAIL] : in pd_test_aggregation_dataframe_corr() : corr should be 2x2" << std::endl;
4661 throw std::runtime_error("pd_test_aggregation_dataframe_corr failed: corr should be 2x2");
4662 }
4663
4664 // Diagonal should be 1.0
4665 std::string aa = corr_df["A"].get_value_str(0);
corrwith (pd_test_2_all.cpp:2013)
2003 std::cout << "====================================== [OK] pd_test_combine test suite ========================== " << std::endl;
2004 return 0;
2005 }
2006
2007} // namespace dataframe_tests
2008// ------------------- pd_test_combine.cpp (end) -----------------------------
2009
2010// ------------------- pd_test_corrwith.cpp (start) -----------------------------
2011// dataframe_tests/pd_test_corrwith.cpp
2012// Test suite for DataFrame::corrwith() method
2013
2014#include <iostream>
2015#include <stdexcept>
2016#include <cmath>
2017#include "../pandas/pd_dataframe.h"
2018
2019namespace dataframe_tests {
2020 namespace dataframe_tests_corrwith {
2021
2022 // Helper to check if two doubles are approximately equal
corrwith (pd_test_2_all.cpp:2013)
2003 std::cout << "====================================== [OK] pd_test_combine test suite ========================== " << std::endl;
2004 return 0;
2005 }
2006
2007} // namespace dataframe_tests
2008// ------------------- pd_test_combine.cpp (end) -----------------------------
2009
2010// ------------------- pd_test_corrwith.cpp (start) -----------------------------
2011// dataframe_tests/pd_test_corrwith.cpp
2012// Test suite for DataFrame::corrwith() method
2013
2014#include <iostream>
2015#include <stdexcept>
2016#include <cmath>
2017#include "../pandas/pd_dataframe.h"
2018
2019namespace dataframe_tests {
2020 namespace dataframe_tests_corrwith {
2021
2022 // Helper to check if two doubles are approximately equal
cov (pd_test_1_all.cpp:4690)
4680 std::cout << " -> tests passed" << std::endl;
4681 }
4682
4683 void pd_test_aggregation_dataframe_cov() {
4684 std::cout << "========= DataFrame cov =========================";
4685
4686 std::map<std::string, std::vector<double>> data;
4687 data["A"] = {1.0, 2.0, 3.0};
4688 pandas::DataFrame df(data);
4689
4690 auto cov_df = df.cov();
4691
4692 // Check dimensions
4693 bool passed = cov_df.nrows() == 1 && cov_df.ncols() == 1;
4694 if (!passed) {
4695 std::cout << " [FAIL] : in pd_test_aggregation_dataframe_cov() : cov should be 1x1" << std::endl;
4696 throw std::runtime_error("pd_test_aggregation_dataframe_cov failed: cov should be 1x1");
4697 }
4698
4699 // Var(A) = 1.0 with ddof=1
4700 std::string aa = cov_df["A"].get_value_str(0);
dt (pd_test_3_all.cpp:18239)
18229 if (offset.freqstr() != "D") {
18230 std::cout << " [FAIL] : Day freqstr() failed" << std::endl;
18231 throw std::runtime_error("pd_test_day_offset: freqstr() failed");
18232 }
18233 if (offset.name() != "Day") {
18234 std::cout << " [FAIL] : Day name() failed" << std::endl;
18235 throw std::runtime_error("pd_test_day_offset: name() failed");
18236 }
18237
18238 // Test apply
18239 numpy::datetime64 dt("2020-01-15");
18240 auto result = offset.apply(dt);
18241 std::tm tm = result.toTm();
18242 if (tm.tm_mday != 20) {
18243 std::cout << " [FAIL] : Day apply() failed, got day " << tm.tm_mday << std::endl;
18244 throw std::runtime_error("pd_test_day_offset: apply() failed");
18245 }
18246
18247 std::cout << " -> tests passed" << std::endl;
18248}
dtype_name (pd_test_1_all.cpp:10104)
10094}
10095
10096void pd_test_extension_index_array_constructor() {
10097 std::cout << "========= array constructor =========================";
10098
10099 pandas::CategoricalArray arr({"apple", "banana", "apple", "cherry"});
10100 pandas::CategoricalIndex idx(arr, "fruits");
10101
10102 bool passed = (idx.size() == 4 && !idx.empty() &&
10103 idx.name().has_value() && *idx.name() == "fruits" &&
10104 idx.dtype_name() == "category");
10105 if (!passed) {
10106 std::cout << " [FAIL] : in pd_test_extension_index_array_constructor() : array constructor check failed" << std::endl;
10107 throw std::runtime_error("pd_test_extension_index_array_constructor failed");
10108 }
10109
10110 std::cout << " -> tests passed" << std::endl;
10111}
10112
10113void pd_test_extension_index_copy_constructor() {
10114 std::cout << "========= copy constructor =========================";
dtype_string (pd_test_5_all.cpp:93010)
93000 pandas_tests::check(threw_neg,
93001 "f_test_25_iat_resolved_out_of_range_throws_927551.neg_throws",
93002 local_fail);
93003}
93004
93005void case_27_propagate_dtype_nan_suppresses() {
93006 // has_nan_from_upcasting=true => no propagation (column dtype unchanged).
93007 std::cout << "-- case_27_propagate_dtype_nan_suppresses()\n";
93008 int local_fail = 0;
93009 auto df = make_1row_df();
93010 std::string col_before = df.dtype_string("x");
93011
93012 pandas::AlignmentResult ar;
93013 ar.kind = pandas::SetItemResult::PARTIAL_ALIGN;
93014 ar.has_nan_from_upcasting = true;
93015 ar.aligned_values = {std::numeric_limits<double>::quiet_NaN()};
93016
93017 pandas::propagate_aligned_dtype<std::int64_t>(df, std::string("x"), ar,
93018 std::string("int64"));
93019 std::string col_after = df.dtype_string("x");
93020 pandas_tests::check(col_before == col_after,
dtypes (pd_test_1_all.cpp:6226)
6216 throw std::runtime_error("pd_test_dataframe_properties failed: nbytes should be > 0");
6217 }
6218
6219 // Test columns index
6220 if (df.columns().size() != 3) {
6221 std::cout << " [FAIL] : in pd_test_dataframe_properties() : columns size != 3" << std::endl;
6222 throw std::runtime_error("pd_test_dataframe_properties failed: columns size != 3");
6223 }
6224
6225 // Test dtypes
6226 auto dtypes = df.dtypes();
6227 if (dtypes.size() != 3) {
6228 std::cout << " [FAIL] : in pd_test_dataframe_properties() : dtypes size != 3" << std::endl;
6229 throw std::runtime_error("pd_test_dataframe_properties failed: dtypes size != 3");
6230 }
6231
6232 std::cout << " -> tests passed" << std::endl;
6233 }
6234
6235 // =====================================================================
6236 // Test: Column Access
empty (pd_test_1_all.cpp:941)
931#include "../pandas/pd_config.h"
932
933namespace dataframe_tests {
934
935namespace dataframe_tests_config {
936
937 void pd_test_config_version() {
938 std::cout << "========= df_config: version info ======================= ";
939 const char* version = pandas::DataFrameInfo::version();
940 if (version == nullptr || std::string(version).empty()) {
941 std::cout << "[FAIL] : in pd_test_config_version() : version is null or empty" << std::endl;
942 throw std::runtime_error("pd_test_config_version failed: version is null or empty");
943 }
944 std::cout << "-> tests passed" << std::endl;
945 }
946
947 void pd_test_config_na_repr() {
948 std::cout << "========= df_config: NA representation ======================= ";
949 const char* na_repr = pandas::DataFrameConfig::get_na_repr();
950 if (na_repr == nullptr) {
eval (pd_test_2_all.cpp:2722)
2712 std::cout << "====================================== [OK] pd_test_droplevel test suite ========================== " << std::endl;
2713 return 0;
2714 }
2715
2716} // namespace dataframe_tests
2717// ------------------- pd_test_droplevel.cpp (end) -----------------------------
2718
2719// ------------------- pd_test_eval.cpp (start) -----------------------------
2720// dataframe_tests/pd_test_eval.cpp
2721// Test for DataFrame.eval() method
2722
2723#include <iostream>
2724#include <stdexcept>
2725#include <cmath>
2726#include <vector>
2727#include <map>
2728#include <string>
2729#include <limits>
2730#include <variant>
2731#include "../pandas/pd_dataframe.h"
extract_column_as_numeric_series (pd_test_2_all.cpp:20328)
20318// =========================================================================
20319
20320void pd_test_getitem_dispatch_extract_numeric() {
20321 std::cout << "pd_test_getitem_dispatch_extract_numeric" << std::endl;
20322 pandas::DataFrame df;
20323 std::vector<numpy::float64> vals = {1.5, 2.5, 3.5};
20324 df.insert(0, "val", std::make_unique<pandas::Series<numpy::float64>>(vals, "val"), true);
20325 df.set_index(std::make_unique<pandas::Index<std::string>>(
20326 std::vector<std::string>{"a", "b", "c"}));
20327
20328 auto s = df.extract_column_as_numeric_series("val");
20329 check(s.size() == 3, "numeric size");
20330 check(std::abs(s.values().getElementAt({0}) - 1.5) < 1e-10, "numeric val[0]");
20331 check(std::abs(s.values().getElementAt({2}) - 3.5) < 1e-10, "numeric val[2]");
20332 check(s.name() == "val", "numeric name");
20333
20334 // int32 dtype override preserved
20335 pandas::DataFrame df2;
20336 std::vector<numpy::float64> ivals = {10.0, 20.0};
20337 auto is = std::make_unique<pandas::Series<numpy::float64>>(ivals, "ival");
20338 is->set_dtype_override("int32");
extract_row_typed (pd_test_3_all.cpp:28887)
28877 df.add_column("Name", std::vector<std::string>{"Alice", "Bob", "Carol"});
28878 df.add_column("Age", std::vector<numpy::float64>{30.0, 25.0, 40.0});
28879 df.add_column("Salary", std::vector<numpy::float64>{100.0, 200.0, 300.0});
28880 return df;
28881}
28882
28883void pd_test_df_apply_assembly_extract_row_typed_numeric() {
28884 std::cout << " -- pd_test_df_apply_assembly_extract_row_typed_numeric --" << std::endl;
28885 int fail = 0;
28886 auto df = make_numeric_df();
28887 auto rd = df.extract_row_typed(1);
28888 fail += dfa_check(rd.size() == 3, "row size == 3");
28889 fail += dfa_check(!rd.has_string_cols, "all numeric => !has_string_cols");
28890 fail += dfa_check(rd.col_names[0] == "A" && rd.col_names[2] == "C", "col_names");
28891 fail += dfa_check(!rd.is_string_col[0] && !rd.is_string_col[1] && !rd.is_string_col[2],
28892 "is_string_col all false");
28893 fail += dfa_check(rd.numeric_vals[0] == 2.0, "numeric_vals[0]==2.0");
28894 fail += dfa_check(rd.numeric_vals[1] == 5.0, "numeric_vals[1]==5.0");
28895 fail += dfa_check(rd.numeric_vals[2] == 8.0, "numeric_vals[2]==8.0");
28896 if (fail == 0) std::cout << " OK" << std::endl;
28897 if (fail != 0) throw std::runtime_error("pd_test_df_apply_assembly_extract_row_typed_numeric failed");
file (pd_test_2_all.cpp:3463)
3453 {"C", {100, 200, 300, 400, 500}}
3454 };
3455
3456 pandas::DataFrame df(data);
3457
3458 // Export to Excel
3459 std::string filepath = "temp/pd_test_excel_basic.xlsx";
3460 df.to_excel(filepath);
3461
3462 // Verify file was created
3463 std::ifstream file(filepath, std::ios::binary);
3464 if (!file.good()) {
3465 std::cout << " [FAIL] : in pd_test_excel_basic() : File was not created" << std::endl;
3466 throw std::runtime_error("pd_test_excel_basic failed: file not created");
3467 }
3468
3469 // Check file size is reasonable (valid XLSX should be > 1KB)
3470 file.seekg(0, std::ios::end);
3471 auto size = file.tellg();
3472 if (size < 1000) {
3473 std::cout << " [FAIL] : in pd_test_excel_basic() : File size too small: " << size << std::endl;
filter (pd_test_3_all.cpp:2805)
2795 threw = true;
2796 }
2797 if (!threw) {
2798 throw std::runtime_error("bool_() should throw for multi-element DataFrame");
2799 }
2800
2801 std::cout << " -> tests passed" << std::endl;
2802}
2803
2804void pd_test_3_all_df_filter() {
2805 std::cout << "========= DataFrame.filter() =============================";
2806
2807 std::map<std::string, std::vector<double>> data = {
2808 {"col_a", {1.0, 2.0, 3.0}},
2809 {"col_b", {4.0, 5.0, 6.0}},
2810 {"other", {7.0, 8.0, 9.0}}
2811 };
2812 pandas::DataFrame df(data);
2813
2814 // Test filter by items
2815 pandas::DataFrame filtered_items = df.filter({"col_a", "col_b"});
flags (pd_test_1_all.cpp:16397)
16387 // =====================================================================
16388 // Series Flags Integration Tests
16389 // =====================================================================
16390
16391 void pd_test_ndframe_series_flags() {
16392 std::cout << "========= series flags integration =============================" << std::endl;
16393
16394 pandas::Series<int> s({1, 2, 3});
16395
16396 // Test default flags
16397 bool passed = s.flags().allows_duplicate_labels == true;
16398 if (!passed) {
16399 std::cout << " [FAIL] : in pd_test_ndframe_series_flags() : default allows_duplicate_labels" << std::endl;
16400 throw std::runtime_error("pd_test_ndframe_series_flags failed: default allows_duplicate_labels");
16401 }
16402
16403 passed = s.flags().copy_on_write == false;
16404 if (!passed) {
16405 std::cout << " [FAIL] : in pd_test_ndframe_series_flags() : default copy_on_write" << std::endl;
16406 throw std::runtime_error("pd_test_ndframe_series_flags failed: default copy_on_write");
16407 }
floor (pd_test_1_all.cpp:4942)
4932 pandas::Series<double> a({1.4, 2.5, 3.6, -1.4, -2.5});
4933
4934 auto r = a.round();
4935 bool passed = std::abs(r[0] - 1.0) < 0.001 && std::abs(r[2] - 4.0) < 0.001;
4936 if (!passed) {
4937 std::cout << " [FAIL] : in pd_test_arithmetic_series_round() : round failed" << std::endl;
4938 throw std::runtime_error("pd_test_arithmetic_series_round failed: round failed");
4939 }
4940
4941 auto f = a.floor();
4942 passed = std::abs(f[0] - 1.0) < 0.001 && std::abs(f[2] - 3.0) < 0.001 && std::abs(f[3] - (-2.0)) < 0.001;
4943 if (!passed) {
4944 std::cout << " [FAIL] : in pd_test_arithmetic_series_round() : floor failed" << std::endl;
4945 throw std::runtime_error("pd_test_arithmetic_series_round failed: floor failed");
4946 }
4947
4948 auto c = a.ceil();
4949 passed = std::abs(c[0] - 2.0) < 0.001 && std::abs(c[2] - 4.0) < 0.001 && std::abs(c[3] - (-1.0)) < 0.001;
4950 if (!passed) {
4951 std::cout << " [FAIL] : in pd_test_arithmetic_series_round() : ceil failed" << std::endl;
format_percentile_labels (pd_test_2_all.cpp:19921)
19911}
19912
19913// =====================================================================
19914// Test: percentile label formatting
19915// =====================================================================
19916
19917void pd_test_describe_percentile_labels() {
19918 std::cout << " -- pd_test_describe_percentile_labels --" << std::endl;
19919
19920 // Standard percentiles
19921 auto labels = pandas::DataFrame::format_percentile_labels({0.25, 0.5, 0.75});
19922 check(labels.size() == 3, "pct_labels_count_3");
19923 check(labels[0] == "25%", "pct_label_25");
19924 check(labels[1] == "50%", "pct_label_50");
19925 check(labels[2] == "75%", "pct_label_75");
19926
19927 // Non-integer percentile
19928 auto labels2 = pandas::DataFrame::format_percentile_labels({0.333});
19929 check(labels2[0] == "33.3%", "pct_label_33_3");
19930}
frame_dtype_override (pd_test_5_all.cpp:146164)
146154 };
146155}
146156
146157void case_100_apply_int_all_int_axis0(int& lf) {
146158 std::cout << "-- case_100_apply_int_all_int_axis0\n";
146159 auto df = mk_df_one_int64("a", {1.0, 2.0, 3.0});
146160 auto h = mk_hist_all_int();
146161 auto out = df.apply_resolved_typed(cb_int_sum(), h, 0);
146162 pandas_tests::check(out.column_dtype_override("a") == "int64",
146163 "C_26k_case_100_apply_int_all_int_axis0()_a", lf);
146164 pandas_tests::check(out.frame_dtype_override() == "int64",
146165 "C_26k_case_100_apply_int_all_int_axis0()_frame", lf);
146166}
146167
146168void case_101_apply_float_all_int_axis0(int& lf) {
146169 std::cout << "-- case_101_apply_float_all_int_axis0\n";
146170 auto df = mk_df_one_float64("a", {1.5, 2.5, 3.5});
146171 auto h = mk_hist_all_int();
146172 auto out = df.apply_resolved_typed(cb_int_sum(), h, 0);
146173 // plan_02: all-int returns infer int64 even on a float64 source
146174 // (pandas 2.2.2: df(float64).apply(int(sum),axis=0).dtype == int64).
has_column (pd_test_1_all.cpp:6256)
6246 pandas::DataFrame df(data);
6247
6248 // Test single column access
6249 const pandas::NDFrameBase& col_a = df["A"];
6250 if (col_a.size() != 3) {
6251 std::cout << " [FAIL] : in pd_test_dataframe_column_access() : column A size != 3" << std::endl;
6252 throw std::runtime_error("pd_test_dataframe_column_access failed: column A size != 3");
6253 }
6254
6255 // Test has_column
6256 if (!df.has_column("A")) {
6257 std::cout << " [FAIL] : in pd_test_dataframe_column_access() : has_column A should be true" << std::endl;
6258 throw std::runtime_error("pd_test_dataframe_column_access failed: has_column A should be true");
6259 }
6260 if (df.has_column("Z")) {
6261 std::cout << " [FAIL] : in pd_test_dataframe_column_access() : has_column Z should be false" << std::endl;
6262 throw std::runtime_error("pd_test_dataframe_column_access failed: has_column Z should be false");
6263 }
6264
6265 // Test multiple column access
6266 pandas::DataFrame subset = df[std::vector<std::string>{"A", "C"}];
has_columns_levels (pd_test_2_all.cpp:21382)
21372 auto cols = result.columns().to_list();
21373 bool has_sum = false, has_mean = false;
21374 for (const auto& c : cols) {
21375 if (c.find("sum") != std::string::npos) has_sum = true;
21376 if (c.find("mean") != std::string::npos) has_mean = true;
21377 }
21378 check(has_sum, "has_sum_cols");
21379 check(has_mean, "has_mean_cols");
21380
21381 // Check that it has multi-level columns
21382 check(result.has_columns_levels(), "has_columns_levels");
21383
21384 // Verify values: check that expected sum values exist somewhere in the result
21385 // Column names pattern: (sum, A), (sum, B), (mean, A), (mean, B)
21386 // or (sum, (sales, A)), etc. depending on single/multi value columns
21387 auto all_cols = result.columns().to_list();
21388 bool found_sum_col = false;
21389 for (const auto& c : all_cols) {
21390 if (c.find("sum") != std::string::npos) {
21391 const auto& col_ref = result[c];
21392 for (size_t r = 0; r < result.nrows(); ++r) {
has_multiindex (pd_test_1_all.cpp:27019)
27009 std::map<std::string, std::vector<std::string>> data = {
27010 {"A", {"a", "a", "b", "b"}},
27011 {"B", {"x", "x", "y", "y"}}
27012 };
27013 pandas::DataFrame df(data);
27014
27015 auto result = df.value_counts();
27016 auto& counts = std::get<pandas::Series<numpy::int64>>(result);
27017
27018 if (!counts.has_multiindex()) {
27019 std::cout << " [FAIL] : expected MultiIndex" << std::endl;
27020 throw std::runtime_error("pd_test_value_counts_multiindex_levels failed: no multiindex");
27021 }
27022
27023 const auto& midx = counts.multiindex();
27024
27025 // Should have 2 levels
27026 if (midx.nlevels() != 2) {
27027 std::cout << " [FAIL] : expected 2 levels, got " << midx.nlevels() << std::endl;
27028 throw std::runtime_error("pd_test_value_counts_multiindex_levels failed: wrong nlevels");
hasnans (pd_test_1_all.cpp:5363)
5353void pd_test_categorical_index_from_codes() {
5354 std::cout << "========= from_codes =================================";
5355
5356 std::vector<numpy::int32> codes = {0, 1, 0, 2, -1}; // -1 = NA
5357 std::vector<std::string> categories = {"low", "medium", "high"};
5358
5359 pandas::CategoricalIndex idx = pandas::CategoricalIndex::from_codes(codes, categories, true, "level");
5360
5361 bool passed = (idx.size() == 5 && idx.num_categories() == 3 &&
5362 idx.ordered() && idx.name().has_value() && *idx.name() == "level" &&
5363 idx.hasnans()); // has NA from code -1
5364 if (!passed) {
5365 std::cout << " [FAIL] : in pd_test_categorical_index_from_codes()" << std::endl;
5366 throw std::runtime_error("pd_test_categorical_index_from_codes failed");
5367 }
5368
5369 std::cout << " -> tests passed" << std::endl;
5370}
5371
5372void pd_test_categorical_index_simple_new() {
5373 std::cout << "========= _simple_new =================================";
index (pd_test_1_all.cpp:6680)
6670 void pd_test_dataframe_index_ops() {
6671 std::cout << "========= index operations =================";
6672
6673 // Test set_axis (rows)
6674 {
6675 std::map<std::string, std::vector<int>> data;
6676 data["A"] = {1, 2, 3};
6677 pandas::DataFrame df(data);
6678
6679 auto renamed = df.set_axis({"x", "y", "z"}, 0);
6680 std::string idx0 = renamed.index().get_value_str(0);
6681 if (idx0 != "x") {
6682 std::cout << " [FAIL] : in pd_test_dataframe_index_ops() : set_axis first label should be 'x'" << std::endl;
6683 throw std::runtime_error("pd_test_dataframe_index_ops failed: set_axis");
6684 }
6685 }
6686
6687 // Test set_axis (columns)
6688 {
6689 std::map<std::string, std::vector<int>> data;
6690 data["A"] = {1, 2};
index_name (pd_test_3_all.cpp:1290)
1280 }
1281
1282 pandas::DataFrame df3;
1283 df3.add_column<int64_t>("val", {1, 2, 3, 4, 5});
1284 df3.set_index_from_strings({"a", "b", "c", "a", "b"});
1285 if (df3.index().get_value_str(4) != "b") {
1286 throw std::runtime_error("set_index_from_strings: values wrong");
1287 }
1288 }
1289
1290 // E. set_index_name + index_name() getter/setter
1291 {
1292 pandas::DataFrame df;
1293 df.add_column<int64_t>("val", {1, 2, 3});
1294 df.set_index_from_list({"r0", "r1", "r2"});
1295
1296 df.set_index_name("City");
1297 if (!df.index_name().has_value() || df.index_name().value() != "City") {
1298 throw std::runtime_error("set_index_name: round-trip failed");
1299 }
index_name (pd_test_3_all.cpp:1290)
1280 }
1281
1282 pandas::DataFrame df3;
1283 df3.add_column<int64_t>("val", {1, 2, 3, 4, 5});
1284 df3.set_index_from_strings({"a", "b", "c", "a", "b"});
1285 if (df3.index().get_value_str(4) != "b") {
1286 throw std::runtime_error("set_index_from_strings: values wrong");
1287 }
1288 }
1289
1290 // E. set_index_name + index_name() getter/setter
1291 {
1292 pandas::DataFrame df;
1293 df.add_column<int64_t>("val", {1, 2, 3});
1294 df.set_index_from_list({"r0", "r1", "r2"});
1295
1296 df.set_index_name("City");
1297 if (!df.index_name().has_value() || df.index_name().value() != "City") {
1298 throw std::runtime_error("set_index_name: round-trip failed");
1299 }
isetitem (pd_test_3_all.cpp:2882)
2872 // Verify first element
2873 if (std::abs(arr.getElementAt({0, 0}) - 1.0) > 0.001) {
2874 throw std::runtime_error("to_numpy value failed");
2875 }
2876
2877 std::cout << " -> tests passed" << std::endl;
2878}
2879
2880void pd_test_3_all_df_isetitem() {
2881 std::cout << "========= DataFrame.isetitem() ===========================";
2882
2883 std::map<std::string, std::vector<double>> data = {
2884 {"A", {1.0, 2.0, 3.0}},
2885 {"B", {4.0, 5.0, 6.0}}
2886 };
2887 pandas::DataFrame df(data);
2888
2889 // Set first column to new values
2890 df.isetitem(static_cast<size_t>(0), std::vector<double>{10.0, 20.0, 30.0});
iss (pd_test_2_all.cpp:10348)
10338 try {
10339 df.to_clipboard(true, '\t', false); // index=false
10340 } catch (const std::exception& e) {
10341 std::cout << " [FAIL] : in pd_test_to_clipboard_no_index() : exception: " << e.what() << std::endl;
10342 throw std::runtime_error("pd_test_to_clipboard_no_index failed");
10343 }
10344
10345#ifdef _WIN32
10346 std::string clipboard = get_clipboard_text_with_retry();
10347 // Without index, first line should be just "A"
10348 std::istringstream iss(clipboard);
10349 std::string first_line;
10350 std::getline(iss, first_line);
10351 passed = first_line == "A";
10352 if (!passed) {
10353 std::cout << " [FAIL] : in pd_test_to_clipboard_no_index() : first line was '" << first_line << "', expected 'A'" << std::endl;
10354 throw std::runtime_error("pd_test_to_clipboard_no_index failed");
10355 }
10356#endif
10357
10358 std::cout << " -> tests passed" << std::endl;
iss (pd_test_2_all.cpp:10348)
10338 try {
10339 df.to_clipboard(true, '\t', false); // index=false
10340 } catch (const std::exception& e) {
10341 std::cout << " [FAIL] : in pd_test_to_clipboard_no_index() : exception: " << e.what() << std::endl;
10342 throw std::runtime_error("pd_test_to_clipboard_no_index failed");
10343 }
10344
10345#ifdef _WIN32
10346 std::string clipboard = get_clipboard_text_with_retry();
10347 // Without index, first line should be just "A"
10348 std::istringstream iss(clipboard);
10349 std::string first_line;
10350 std::getline(iss, first_line);
10351 passed = first_line == "A";
10352 if (!passed) {
10353 std::cout << " [FAIL] : in pd_test_to_clipboard_no_index() : first line was '" << first_line << "', expected 'A'" << std::endl;
10354 throw std::runtime_error("pd_test_to_clipboard_no_index failed");
10355 }
10356#endif
10357
10358 std::cout << " -> tests passed" << std::endl;
iss (pd_test_2_all.cpp:10348)
10338 try {
10339 df.to_clipboard(true, '\t', false); // index=false
10340 } catch (const std::exception& e) {
10341 std::cout << " [FAIL] : in pd_test_to_clipboard_no_index() : exception: " << e.what() << std::endl;
10342 throw std::runtime_error("pd_test_to_clipboard_no_index failed");
10343 }
10344
10345#ifdef _WIN32
10346 std::string clipboard = get_clipboard_text_with_retry();
10347 // Without index, first line should be just "A"
10348 std::istringstream iss(clipboard);
10349 std::string first_line;
10350 std::getline(iss, first_line);
10351 passed = first_line == "A";
10352 if (!passed) {
10353 std::cout << " [FAIL] : in pd_test_to_clipboard_no_index() : first line was '" << first_line << "', expected 'A'" << std::endl;
10354 throw std::runtime_error("pd_test_to_clipboard_no_index failed");
10355 }
10356#endif
10357
10358 std::cout << " -> tests passed" << std::endl;
iss (pd_test_2_all.cpp:10348)
10338 try {
10339 df.to_clipboard(true, '\t', false); // index=false
10340 } catch (const std::exception& e) {
10341 std::cout << " [FAIL] : in pd_test_to_clipboard_no_index() : exception: " << e.what() << std::endl;
10342 throw std::runtime_error("pd_test_to_clipboard_no_index failed");
10343 }
10344
10345#ifdef _WIN32
10346 std::string clipboard = get_clipboard_text_with_retry();
10347 // Without index, first line should be just "A"
10348 std::istringstream iss(clipboard);
10349 std::string first_line;
10350 std::getline(iss, first_line);
10351 passed = first_line == "A";
10352 if (!passed) {
10353 std::cout << " [FAIL] : in pd_test_to_clipboard_no_index() : first line was '" << first_line << "', expected 'A'" << std::endl;
10354 throw std::runtime_error("pd_test_to_clipboard_no_index failed");
10355 }
10356#endif
10357
10358 std::cout << " -> tests passed" << std::endl;
iss (pd_test_2_all.cpp:10348)
10338 try {
10339 df.to_clipboard(true, '\t', false); // index=false
10340 } catch (const std::exception& e) {
10341 std::cout << " [FAIL] : in pd_test_to_clipboard_no_index() : exception: " << e.what() << std::endl;
10342 throw std::runtime_error("pd_test_to_clipboard_no_index failed");
10343 }
10344
10345#ifdef _WIN32
10346 std::string clipboard = get_clipboard_text_with_retry();
10347 // Without index, first line should be just "A"
10348 std::istringstream iss(clipboard);
10349 std::string first_line;
10350 std::getline(iss, first_line);
10351 passed = first_line == "A";
10352 if (!passed) {
10353 std::cout << " [FAIL] : in pd_test_to_clipboard_no_index() : first line was '" << first_line << "', expected 'A'" << std::endl;
10354 throw std::runtime_error("pd_test_to_clipboard_no_index failed");
10355 }
10356#endif
10357
10358 std::cout << " -> tests passed" << std::endl;
iss (pd_test_2_all.cpp:10348)
10338 try {
10339 df.to_clipboard(true, '\t', false); // index=false
10340 } catch (const std::exception& e) {
10341 std::cout << " [FAIL] : in pd_test_to_clipboard_no_index() : exception: " << e.what() << std::endl;
10342 throw std::runtime_error("pd_test_to_clipboard_no_index failed");
10343 }
10344
10345#ifdef _WIN32
10346 std::string clipboard = get_clipboard_text_with_retry();
10347 // Without index, first line should be just "A"
10348 std::istringstream iss(clipboard);
10349 std::string first_line;
10350 std::getline(iss, first_line);
10351 passed = first_line == "A";
10352 if (!passed) {
10353 std::cout << " [FAIL] : in pd_test_to_clipboard_no_index() : first line was '" << first_line << "', expected 'A'" << std::endl;
10354 throw std::runtime_error("pd_test_to_clipboard_no_index failed");
10355 }
10356#endif
10357
10358 std::cout << " -> tests passed" << std::endl;
iss (pd_test_2_all.cpp:10348)
10338 try {
10339 df.to_clipboard(true, '\t', false); // index=false
10340 } catch (const std::exception& e) {
10341 std::cout << " [FAIL] : in pd_test_to_clipboard_no_index() : exception: " << e.what() << std::endl;
10342 throw std::runtime_error("pd_test_to_clipboard_no_index failed");
10343 }
10344
10345#ifdef _WIN32
10346 std::string clipboard = get_clipboard_text_with_retry();
10347 // Without index, first line should be just "A"
10348 std::istringstream iss(clipboard);
10349 std::string first_line;
10350 std::getline(iss, first_line);
10351 passed = first_line == "A";
10352 if (!passed) {
10353 std::cout << " [FAIL] : in pd_test_to_clipboard_no_index() : first line was '" << first_line << "', expected 'A'" << std::endl;
10354 throw std::runtime_error("pd_test_to_clipboard_no_index failed");
10355 }
10356#endif
10357
10358 std::cout << " -> tests passed" << std::endl;
match_multiindex_columns (pd_test_3_all.cpp:29247)
29237 pandas::DataFrame df;
29238 df.add_column<numpy::float64>("c0", {1.0, 2.0});
29239 df.add_column<numpy::float64>("c1", {3.0, 4.0});
29240 df.add_column<numpy::float64>("c2", {5.0, 6.0});
29241 std::vector<std::vector<std::string>> levels = {
29242 {"bar", "bar", "foo"},
29243 {"one", "two", "one"}
29244 };
29245 df.set_columns_levels(levels, {"L0", "L1"});
29246
29247 auto exact = df.match_multiindex_columns({"bar", "one"});
29248 fail += sm_check(exact.exact_match, "exact match flag set");
29249 fail += sm_check(exact.matching_cols.size() == 1 && exact.matching_cols[0] == 0, "exact match col index");
29250
29251 auto partial = df.match_multiindex_columns({"bar"});
29252 fail += sm_check(!partial.exact_match, "partial match not exact");
29253 fail += sm_check(partial.matching_cols.size() == 2, "partial match 2 cols");
29254 fail += sm_check(partial.matching_cols[0] == 0 && partial.matching_cols[1] == 1, "partial col indices");
29255
29256 auto none = df.match_multiindex_columns({"baz"});
29257 fail += sm_check(none.matching_cols.empty(), "no match -> empty");
nbytes (pd_test_1_all.cpp:6214)
6204 }
6205
6206 // Test empty DataFrame
6207 pandas::DataFrame empty_df;
6208 if (!empty_df.empty()) {
6209 std::cout << " [FAIL] : in pd_test_dataframe_properties() : should be empty" << std::endl;
6210 throw std::runtime_error("pd_test_dataframe_properties failed: should be empty");
6211 }
6212
6213 // Test nbytes > 0 for non-empty
6214 if (df.nbytes() == 0) {
6215 std::cout << " [FAIL] : in pd_test_dataframe_properties() : nbytes should be > 0" << std::endl;
6216 throw std::runtime_error("pd_test_dataframe_properties failed: nbytes should be > 0");
6217 }
6218
6219 // Test columns index
6220 if (df.columns().size() != 3) {
6221 std::cout << " [FAIL] : in pd_test_dataframe_properties() : columns size != 3" << std::endl;
6222 throw std::runtime_error("pd_test_dataframe_properties failed: columns size != 3");
6223 }
ncols (pd_test_1_all.cpp:4658)
4648 std::cout << "========= DataFrame corr ========================";
4649
4650 std::map<std::string, std::vector<double>> data;
4651 data["A"] = {1.0, 2.0, 3.0, 4.0, 5.0};
4652 data["B"] = {2.0, 4.0, 6.0, 8.0, 10.0}; // Perfect correlation
4653 pandas::DataFrame df(data);
4654
4655 auto corr_df = df.corr();
4656
4657 // Check dimensions
4658 bool passed = corr_df.nrows() == 2 && corr_df.ncols() == 2;
4659 if (!passed) {
4660 std::cout << " [FAIL] : in pd_test_aggregation_dataframe_corr() : corr should be 2x2" << std::endl;
4661 throw std::runtime_error("pd_test_aggregation_dataframe_corr failed: corr should be 2x2");
4662 }
4663
4664 // Diagonal should be 1.0
4665 std::string aa = corr_df["A"].get_value_str(0);
4666 passed = std::abs(std::stod(aa) - 1.0) < 0.001;
4667 if (!passed) {
4668 std::cout << " [FAIL] : in pd_test_aggregation_dataframe_corr() : diagonal should be 1.0" << std::endl;
ndim (pd_test_1_all.cpp:6195)
6185 pandas::DataFrame df(data);
6186
6187 // Test shape
6188 auto shape = df.shape();
6189 if (shape.size() != 2 || shape[0] != 4 || shape[1] != 3) {
6190 std::cout << " [FAIL] : in pd_test_dataframe_properties() : shape mismatch" << std::endl;
6191 throw std::runtime_error("pd_test_dataframe_properties failed: shape mismatch");
6192 }
6193
6194 // Test ndim
6195 if (df.ndim() != 2) {
6196 std::cout << " [FAIL] : in pd_test_dataframe_properties() : ndim != 2" << std::endl;
6197 throw std::runtime_error("pd_test_dataframe_properties failed: ndim != 2");
6198 }
6199
6200 // Test empty
6201 if (df.empty()) {
6202 std::cout << " [FAIL] : in pd_test_dataframe_properties() : should not be empty" << std::endl;
6203 throw std::runtime_error("pd_test_dataframe_properties failed: should not be empty");
6204 }
nrows (pd_test_1_all.cpp:4658)
4648 std::cout << "========= DataFrame corr ========================";
4649
4650 std::map<std::string, std::vector<double>> data;
4651 data["A"] = {1.0, 2.0, 3.0, 4.0, 5.0};
4652 data["B"] = {2.0, 4.0, 6.0, 8.0, 10.0}; // Perfect correlation
4653 pandas::DataFrame df(data);
4654
4655 auto corr_df = df.corr();
4656
4657 // Check dimensions
4658 bool passed = corr_df.nrows() == 2 && corr_df.ncols() == 2;
4659 if (!passed) {
4660 std::cout << " [FAIL] : in pd_test_aggregation_dataframe_corr() : corr should be 2x2" << std::endl;
4661 throw std::runtime_error("pd_test_aggregation_dataframe_corr failed: corr should be 2x2");
4662 }
4663
4664 // Diagonal should be 1.0
4665 std::string aa = corr_df["A"].get_value_str(0);
4666 passed = std::abs(std::stod(aa) - 1.0) < 0.001;
4667 if (!passed) {
4668 std::cout << " [FAIL] : in pd_test_aggregation_dataframe_corr() : diagonal should be 1.0" << std::endl;
resolve_column_level (pd_test_3_all.cpp:28773)
28763 fail += spt_check(r.has_multiindex(), "result has multiindex");
28764 fail += spt_check(r.multiindex().nlevels() == 2, "multiindex nlevels==2 (row + stacked)");
28765 if (fail == 0) std::cout << " OK" << std::endl;
28766 if (fail != 0) throw std::runtime_error("pd_test_stack_multiindex_result failed");
28767}
28768
28769void pd_test_resolve_column_level_int() {
28770 std::cout << " -- pd_test_resolve_column_level_int --" << std::endl;
28771 int fail = 0;
28772 auto df = make_ml_df();
28773 fail += spt_check(df.resolve_column_level(0) == 0, "0 -> 0");
28774 fail += spt_check(df.resolve_column_level(1) == 1, "1 -> 1");
28775 fail += spt_check(df.resolve_column_level(-1) == 1, "-1 -> 1");
28776 fail += spt_check(df.resolve_column_level(-2) == 0, "-2 -> 0");
28777 fail += spt_check(df.resolve_column_level(5) == -1, "5 -> -1 (out of range)");
28778 if (fail == 0) std::cout << " OK" << std::endl;
28779 if (fail != 0) throw std::runtime_error("pd_test_resolve_column_level_int failed");
28780}
28781
28782void pd_test_resolve_column_level_string() {
28783 std::cout << " -- pd_test_resolve_column_level_string --" << std::endl;
resolve_column_level (pd_test_3_all.cpp:28773)
28763 fail += spt_check(r.has_multiindex(), "result has multiindex");
28764 fail += spt_check(r.multiindex().nlevels() == 2, "multiindex nlevels==2 (row + stacked)");
28765 if (fail == 0) std::cout << " OK" << std::endl;
28766 if (fail != 0) throw std::runtime_error("pd_test_stack_multiindex_result failed");
28767}
28768
28769void pd_test_resolve_column_level_int() {
28770 std::cout << " -- pd_test_resolve_column_level_int --" << std::endl;
28771 int fail = 0;
28772 auto df = make_ml_df();
28773 fail += spt_check(df.resolve_column_level(0) == 0, "0 -> 0");
28774 fail += spt_check(df.resolve_column_level(1) == 1, "1 -> 1");
28775 fail += spt_check(df.resolve_column_level(-1) == 1, "-1 -> 1");
28776 fail += spt_check(df.resolve_column_level(-2) == 0, "-2 -> 0");
28777 fail += spt_check(df.resolve_column_level(5) == -1, "5 -> -1 (out of range)");
28778 if (fail == 0) std::cout << " OK" << std::endl;
28779 if (fail != 0) throw std::runtime_error("pd_test_resolve_column_level_int failed");
28780}
28781
28782void pd_test_resolve_column_level_string() {
28783 std::cout << " -- pd_test_resolve_column_level_string --" << std::endl;
resolve_sort_columns_multiindex (pd_test_2_all.cpp:22232)
22222 }
22223 std::cout << "====================================== [OK] pd_test_series_dtype_inference test suite ========================== " << std::endl;
22224 return 0;
22225}
22226
22227} // namespace dataframe_tests
22228// ------------------- pd_test_series_dtype_inference.cpp (end) -----------------------------
22229
22230// ------------------- pd_test_sort_key.cpp (start) -----------------------------
22231// pd_test_sort_key.cpp - Tests for sort_values key function support
22232// Tests sort_values_by_transformed() and resolve_sort_columns_multiindex()
22233
22234#include <iostream>
22235#include <string>
22236#include <vector>
22237#include <cmath>
22238#include <numeric>
22239
22240#include "../pandas/pd_dataframe.h"
22241
22242namespace dataframe_tests {
round (pd_test_1_all.cpp:1688)
1678 void pd_test_floating_array_rounding() {
1679 std::cout << "========= FloatingArray: rounding ======================= ";
1680
1681 pandas::FloatingArray<double> arr({
1682 std::optional<double>(1.234),
1683 std::optional<double>(2.567),
1684 std::nullopt
1685 });
1686
1687 auto rounded = arr.round(2);
1688 if (std::abs(rounded[0].value() - 1.23) > 0.001 ||
1689 std::abs(rounded[1].value() - 2.57) > 0.001) {
1690 std::cout << " [FAIL] : in pd_test_floating_array_rounding() : round(2)" << std::endl;
1691 throw std::runtime_error("pd_test_floating_array_rounding failed: round(2)");
1692 }
1693
1694 if (!rounded.is_na(2)) {
1695 std::cout << " [FAIL] : in pd_test_floating_array_rounding() : round should preserve NA" << std::endl;
1696 throw std::runtime_error("pd_test_floating_array_rounding failed: NA preservation");
1697 }
row_values_f64 (pd_test_2_all.cpp:19385)
19375 << dataframe_tests_agg_dtype::g_fail << " failed)" << std::endl;
19376
19377 return dataframe_tests_agg_dtype::g_fail;
19378}
19379
19380} // namespace dataframe_tests
19381// ------------------- pd_test_agg_dtype.cpp (end) -----------------------------
19382
19383// ------------------- pd_test_apply_extract.cpp (start) -----------------------------
19384// pd_test_apply_extract.cpp - Tests for DataFrame column/row extraction helpers
19385// column_to_series_f64(), column_to_series_str(), row_values_f64()
19386
19387#include <iostream>
19388#include <string>
19389#include <vector>
19390#include <cmath>
19391
19392#include "../pandas/pd_dataframe.h"
19393#include "../pandas/pd_series.h"
19394#include "../pandas/pd_index.h"
s (pd_test_1_all.cpp:4524)
4514#include <vector>
4515#include "../pandas/pd_dataframe.h"
4516#include "../pandas/pd_series.h"
4517
4518namespace dataframe_tests {
4519 namespace dataframe_tests_aggregation {
4520
4521 void pd_test_aggregation_series_sem() {
4522 std::cout << "========= Series sem ============================";
4523
4524 pandas::Series<double> s({1.0, 2.0, 3.0, 4.0, 5.0});
4525 auto sem_val = s.sem();
4526 // std(ddof=1) = sqrt(2.5), sem = sqrt(2.5)/sqrt(5) ≈ 0.707
4527 bool passed = sem_val.has_value() && std::abs(*sem_val - 0.707) < 0.01;
4528 if (!passed) {
4529 std::cout << " [FAIL] : in pd_test_aggregation_series_sem() : sem value incorrect" << std::endl;
4530 throw std::runtime_error("pd_test_aggregation_series_sem failed: sem value incorrect");
4531 }
4532
4533 std::cout << " -> tests passed" << std::endl;
4534 }
s (pd_test_1_all.cpp:4524)
4514#include <vector>
4515#include "../pandas/pd_dataframe.h"
4516#include "../pandas/pd_series.h"
4517
4518namespace dataframe_tests {
4519 namespace dataframe_tests_aggregation {
4520
4521 void pd_test_aggregation_series_sem() {
4522 std::cout << "========= Series sem ============================";
4523
4524 pandas::Series<double> s({1.0, 2.0, 3.0, 4.0, 5.0});
4525 auto sem_val = s.sem();
4526 // std(ddof=1) = sqrt(2.5), sem = sqrt(2.5)/sqrt(5) ≈ 0.707
4527 bool passed = sem_val.has_value() && std::abs(*sem_val - 0.707) < 0.01;
4528 if (!passed) {
4529 std::cout << " [FAIL] : in pd_test_aggregation_series_sem() : sem value incorrect" << std::endl;
4530 throw std::runtime_error("pd_test_aggregation_series_sem failed: sem value incorrect");
4531 }
4532
4533 std::cout << " -> tests passed" << std::endl;
4534 }
s (pd_test_1_all.cpp:4524)
4514#include <vector>
4515#include "../pandas/pd_dataframe.h"
4516#include "../pandas/pd_series.h"
4517
4518namespace dataframe_tests {
4519 namespace dataframe_tests_aggregation {
4520
4521 void pd_test_aggregation_series_sem() {
4522 std::cout << "========= Series sem ============================";
4523
4524 pandas::Series<double> s({1.0, 2.0, 3.0, 4.0, 5.0});
4525 auto sem_val = s.sem();
4526 // std(ddof=1) = sqrt(2.5), sem = sqrt(2.5)/sqrt(5) ≈ 0.707
4527 bool passed = sem_val.has_value() && std::abs(*sem_val - 0.707) < 0.01;
4528 if (!passed) {
4529 std::cout << " [FAIL] : in pd_test_aggregation_series_sem() : sem value incorrect" << std::endl;
4530 throw std::runtime_error("pd_test_aggregation_series_sem failed: sem value incorrect");
4531 }
4532
4533 std::cout << " -> tests passed" << std::endl;
4534 }
s (pd_test_1_all.cpp:4524)
4514#include <vector>
4515#include "../pandas/pd_dataframe.h"
4516#include "../pandas/pd_series.h"
4517
4518namespace dataframe_tests {
4519 namespace dataframe_tests_aggregation {
4520
4521 void pd_test_aggregation_series_sem() {
4522 std::cout << "========= Series sem ============================";
4523
4524 pandas::Series<double> s({1.0, 2.0, 3.0, 4.0, 5.0});
4525 auto sem_val = s.sem();
4526 // std(ddof=1) = sqrt(2.5), sem = sqrt(2.5)/sqrt(5) ≈ 0.707
4527 bool passed = sem_val.has_value() && std::abs(*sem_val - 0.707) < 0.01;
4528 if (!passed) {
4529 std::cout << " [FAIL] : in pd_test_aggregation_series_sem() : sem value incorrect" << std::endl;
4530 throw std::runtime_error("pd_test_aggregation_series_sem failed: sem value incorrect");
4531 }
4532
4533 std::cout << " -> tests passed" << std::endl;
4534 }
select_dtypes (pd_test_1_all.cpp:7044)
7034 std::cout << "========= select_dtypes =============================";
7035
7036 // Create DataFrame with mixed types using add_column
7037 pandas::DataFrame df;
7038 df.add_column<numpy::int64>("int_col", {1, 2, 3});
7039 df.add_column<numpy::float64>("float_col", {1.1, 2.2, 3.3});
7040 df.add_column<std::string>("str_col", {"a", "b", "c"});
7041 df.add_column<numpy::bool_>("bool_col", {true, false, true});
7042
7043 // Test include int
7044 pandas::DataFrame int_result = df.select_dtypes({"int"});
7045 if (int_result.ncols() != 1 || !int_result.has_column("int_col")) {
7046 std::cout << " [FAIL] : select_dtypes include int" << std::endl;
7047 throw std::runtime_error("pd_test_dataframe_select_dtypes failed: include int");
7048 }
7049
7050 // Test include number (numeric types)
7051 pandas::DataFrame num_result = df.select_dtypes({"number"});
7052 if (num_result.ncols() != 2 || !num_result.has_column("int_col") || !num_result.has_column("float_col")) {
7053 std::cout << " [FAIL] : select_dtypes include number" << std::endl;
7054 throw std::runtime_error("pd_test_dataframe_select_dtypes failed: include number");
select_multiindex_columns (pd_test_3_all.cpp:29220)
29210 pandas::DataFrame df;
29211 df.add_column<numpy::float64>("c0", {1.0, 2.0});
29212 df.add_column<numpy::float64>("c1", {3.0, 4.0});
29213 df.add_column<numpy::float64>("c2", {5.0, 6.0});
29214 df.add_column<numpy::float64>("c3", {7.0, 8.0});
29215 std::vector<std::vector<std::string>> levels = {
29216 {"bar", "bar", "foo", "baz"},
29217 {"one", "two", "one", "two"}
29218 };
29219 df.set_columns_levels(levels, {"L0", "L1"});
29220 auto sub = df.select_multiindex_columns({"bar", "foo"});
29221 fail += sm_check(sub.ncols() == 3, "3 cols selected (bar,bar,foo)");
29222 fail += sm_check(sub.has_columns_levels(), "MI preserved");
29223 if (sub.has_columns_levels()) {
29224 const auto& lv = sub.columns_levels();
29225 fail += sm_check(lv.size() == 2, "2 levels preserved");
29226 fail += sm_check(lv[0].size() == 3 && lv[0][0] == "bar" && lv[0][2] == "foo", "level 0 values");
29227 }
29228 auto empty = df.select_multiindex_columns({"nonexistent"});
29229 fail += sm_check(empty.ncols() == 0, "no match -> empty");
29230 if (fail == 0) std::cout << " OK" << std::endl;
set_column (pd_test_3_all.cpp:11118)
11108// ============================================================================
11109
11110void pd_test_3_all_set_column_aligned() {
11111 std::cout << "========= Fix 1: set_column aligned by index =============";
11112 pandas::DataFrame df;
11113 df.add_column<int64_t>("values", {1, 2, 3});
11114 df.set_index(std::make_unique<pandas::Index<std::string>>(
11115 std::vector<std::string>{"x", "y", "z"}));
11116
11117 pandas::Series<int> s1({10, 20, 30}, {"z", "x", "y"});
11118 df.set_column("aligned", s1);
11119
11120 // After alignment: x->20, y->30, z->10
11121 if (df.ncols() != 2) {
11122 std::cout << " [FAIL] : in pd_test_3_all_set_column_aligned() : expected 2 cols" << std::endl;
11123 throw std::runtime_error("set_column aligned failed");
11124 }
11125 std::cout << " -> tests passed" << std::endl;
11126}
11127
11128void pd_test_3_all_set_column_partial() {
set_column_cat_categories (pd_test_5_all.cpp:29010)
29000}
29001
29002static const std::vector<std::string>& DAYS() {
29003 static const std::vector<std::string> d = {
29004 "Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"};
29005 return d;
29006}
29007
29008static pandas::DataFrame make_579_fixture_cat(bool ordered = true) {
29009 pandas::DataFrame df = make_579_fixture();
29010 df.set_column_cat_categories("day", DAYS());
29011 df.set_column_cat_ordered("day", ordered);
29012 return df;
29013}
29014
29015static std::vector<std::string> result_index_strs(const pandas::DataFrame& r) {
29016 std::vector<std::string> out;
29017 const auto& idx = r.index();
29018 size_t n = idx.size();
29019 out.reserve(n);
29020 for (size_t i = 0; i < n; ++i) out.push_back(idx.get_value_str(i));
set_column_cat_ordered (pd_test_5_all.cpp:29011)
29001static const std::vector<std::string>& DAYS() {
29002 static const std::vector<std::string> d = {
29003 "Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"};
29004 return d;
29005}
29006
29007static pandas::DataFrame make_579_fixture_cat(bool ordered = true) {
29008 pandas::DataFrame df = make_579_fixture();
29009 df.set_column_cat_categories("day", DAYS());
29010 df.set_column_cat_ordered("day", ordered);
29011 return df;
29012}
29013
29014static std::vector<std::string> result_index_strs(const pandas::DataFrame& r) {
29015 std::vector<std::string> out;
29016 const auto& idx = r.index();
29017 size_t n = idx.size();
29018 out.reserve(n);
29019 for (size_t i = 0; i < n; ++i) out.push_back(idx.get_value_str(i));
29020 return out;
set_column_datetime_ea (pd_test_extension_array.cpp:313)
303 numpy::datetime64(1609459200000000000LL, numpy::DateTimeUnit::Nanosecond),
304 };
305 std::map<std::string, std::vector<numpy::datetime64>> data;
306 data["d"] = dt_vec;
307 pandas::DataFrame df(data);
308 // Use the new C.1 helper to attach the EA.
309 std::vector<std::optional<pandas::Timestamp>> ts = {
310 pandas::Timestamp(1577836800000000000LL),
311 pandas::Timestamp(1609459200000000000LL),
312 };
313 df.set_column_datetime_ea("d", ts, /*uniform_tz=*/"");
314 auto& col = df["d"];
315 auto* col_dt = dynamic_cast<pandas::Series<numpy::datetime64>*>(&col);
316 check(col_dt != nullptr, "set_column_datetime_ea: col is Series<datetime64>");
317 if (col_dt) {
318 const auto& da_opt = col_dt->datetime_array();
319 check(da_opt.has_value() && *da_opt,
320 "set_column_datetime_ea: column EA populated");
321 if (da_opt.has_value() && *da_opt) {
322 check((*da_opt)->size() == 2,
323 "set_column_datetime_ea: EA size matches");
set_column_dtype (pd_test_4_all.cpp:2203)
2193// ============================================================================
2194void na_iloc_slice_with_na() {
2195 pandas::DataFrame df;
2196
2197 constexpr double NaN = std::numeric_limits<double>::quiet_NaN();
2198
2199 // int_na : Int64 nullable — row 1 NA, row 2 = 3.
2200 // Stored as double so NaN can act as the NA sentinel; the
2201 // dtype override drives the "<NA>" / integer rendering.
2202 df.add_column<double>("int_na", {NaN, 3.0});
2203 df.set_column_dtype("int_na", "Int64");
2204
2205 // float_na : plain float64 with NaN — no override needed.
2206 df.add_column<double>("float_na", {NaN, 3.0});
2207
2208 // bool_na : nullable boolean — row 1 NA, row 2 = False.
2209 // Stored as double with NaN sentinel; dtype override drives
2210 // the "<NA>" / "True" / "False" rendering.
2211 df.add_column<double>("bool_na", {NaN, 0.0});
2212 df.set_column_dtype("bool_na", "boolean");
set_column_dtype (pd_test_4_all.cpp:2203)
2193// ============================================================================
2194void na_iloc_slice_with_na() {
2195 pandas::DataFrame df;
2196
2197 constexpr double NaN = std::numeric_limits<double>::quiet_NaN();
2198
2199 // int_na : Int64 nullable — row 1 NA, row 2 = 3.
2200 // Stored as double so NaN can act as the NA sentinel; the
2201 // dtype override drives the "<NA>" / integer rendering.
2202 df.add_column<double>("int_na", {NaN, 3.0});
2203 df.set_column_dtype("int_na", "Int64");
2204
2205 // float_na : plain float64 with NaN — no override needed.
2206 df.add_column<double>("float_na", {NaN, 3.0});
2207
2208 // bool_na : nullable boolean — row 1 NA, row 2 = False.
2209 // Stored as double with NaN sentinel; dtype override drives
2210 // the "<NA>" / "True" / "False" rendering.
2211 df.add_column<double>("bool_na", {NaN, 0.0});
2212 df.set_column_dtype("bool_na", "boolean");
set_column_names (pd_test_3_all.cpp:1708)
1698 pandas::DataFrame f_df;
1699 f_df.add_column<std::int64_t>("a", {1, 2});
1700 f_df.add_column<std::int64_t>("b", {3, 4});
1701 std::map<std::string, std::string> f_mapper{{"a", "A"}, {"b", "B"}};
1702 auto f = f_df.rename_columns(f_mapper);
1703 if (f.columns().get_value_str(0) != "A") {
1704 std::cout << " [FAIL] : in pd_test_3_all_chainable_mutators() : Case F col[0]" << std::endl;
1705 throw std::runtime_error("pd_test_3_all_chainable_mutators failed: Case F col[0]");
1706 }
1707
1708 // --- Case G: set_column_names(vector) returns reference (chainable) ---
1709 pandas::DataFrame g_df;
1710 g_df.add_column<std::int64_t>("c1", {1});
1711 g_df.add_column<std::int64_t>("c2", {2});
1712 auto& g_ref = g_df.set_column_names(std::vector<std::string>{"x", "y"});
1713 if (&g_ref != &g_df) {
1714 std::cout << " [FAIL] : in pd_test_3_all_chainable_mutators() : Case G not self-ref" << std::endl;
1715 throw std::runtime_error("pd_test_3_all_chainable_mutators failed: Case G not self-ref");
1716 }
1717 if (g_df.columns().get_value_str(0) != "x") {
1718 std::cout << " [FAIL] : in pd_test_3_all_chainable_mutators() : Case G col[0]" << std::endl;
set_column_resolved (pd_test_5_all.cpp:94241)
94231 pandas::DtypeOverride ov = pandas::DtypeOverride::parse("Float64");
94232 auto df = pandas::from_records_resolved<pandas::DataFrame>(rows, cols, ov);
94233 pandas_tests::check(df.column_dtype_override("n") == "Float64",
94234 "C_26h_case_144_records_override_applies()_dtype", local_fail);
94235}
94236
94237void case_150_create_new_column(int& local_fail) {
94238 std::cout << "-- case_150_create_new_column\n";
94239 pandas::DataFrame df;
94240 KVec col = { kv_int(1), kv_int(2) };
94241 df.set_column_resolved("n", col);
94242 pandas_tests::check(df.has_column("n"),
94243 "C_26h_case_150_create_new_column()_has", local_fail);
94244 pandas_tests::check(df.column_dtype_override("n") == "int64",
94245 "C_26h_case_150_create_new_column()_dtype", local_fail);
94246}
94247
94248void case_151_replace_existing_int_to_float(int& local_fail) {
94249 std::cout << "-- case_151_replace_existing_int_to_float\n";
94250 pandas::DataFrame df;
94251 df.add_column<std::int64_t>("v", { 1, 2, 3 });
set_column_sparse_fill_value (pd_test_5_all.cpp:23673)
23663 df.to_string(), local_fail);
23664}
23665
23666void f_set_index_col_multiindex_display_6_843271_case_12_site1_sparse_all_nan_numeric(int& local_fail) {
23667 std::cout << "----- case_12_site1_sparse_all_nan_numeric -----\n";
23668 const double NaN = std::numeric_limits<double>::quiet_NaN();
23669 pandas::DataFrame df;
23670 df.add_column<double>("SNaN", {NaN, NaN, NaN});
23671 // Mark the column as sparse with NaN fill value so is_sparse=true and
23672 // the sparse-all-NaN bump path fires.
23673 df.set_column_sparse_fill_value("SNaN", NaN, "float64");
23674 apply_default_display(df);
23675 check_str("case_12.site1_sparse_all_nan_numeric",
23676 EXPECTED_case_12_site1_sparse_all_nan_numeric,
23677 df.to_string(), local_fail);
23678}
23679
23680} // namespace f_test_set_index_col_multiindex_display_6_ns
23681
23682void f_test_set_index_col_multiindex_display_6() {
23683 std::cout << "========= f_test_set_index_col_multiindex_display_6 =======";
set_columns_index_dtype (pd_test_4_all.cpp:4985)
4975// df.rename(columns={0: 10, 1: 20})
4976// Expected pd= block: temp/PandasPython.res:15487
4977// ============================================================================
4978void rename_int_cols() {
4979 std::map<std::string, std::vector<int64_t>> data = {
4980 {"0", {1, 2}},
4981 {"1", {3, 4}},
4982 {"2", {5, 6}},
4983 };
4984 pandas::DataFrame df(data);
4985 df.set_columns_index_dtype("int64");
4986
4987 pandas::DataFrame renamed = df.rename_columns(
4988 std::map<std::string, std::string>{{"0", "10"}, {"1", "20"}});
4989 renamed.set_columns_index_dtype("int64");
4990 apply_default_display(renamed);
4991
4992 // Expected verbatim from temp/PandasPython.res:15488-15490
4993 const std::string expected =
4994 " 10 20 2 \n"
4995 "0 1 3 5\n"
set_columns_levels (pd_test_2_all.cpp:20310)
20300 check(t == pandas::DataFrame::ColumnAccessType::DuplicateColumns, "duplicate -> DuplicateColumns");
20301}
20302
20303void pd_test_getitem_dispatch_classify_multiindex() {
20304 std::cout << "pd_test_getitem_dispatch_classify_multiindex" << std::endl;
20305 pandas::DataFrame df;
20306 std::vector<numpy::float64> v1 = {1.0, 2.0};
20307 std::vector<numpy::float64> v2 = {3.0, 4.0};
20308 df.insert(0, "A_x", std::make_unique<pandas::Series<numpy::float64>>(v1, "A_x"), true);
20309 df.insert(1, "A_y", std::make_unique<pandas::Series<numpy::float64>>(v2, "A_y"), true);
20310 df.set_columns_levels({{"A", "A"}, {"x", "y"}}, {"first", "second"});
20311
20312 auto t = df.classify_column_access("A");
20313 check(t == pandas::DataFrame::ColumnAccessType::MultiIndexGroup, "multiindex top -> MultiIndexGroup");
20314}
20315
20316// =========================================================================
20317// Extraction tests
20318// =========================================================================
20319
20320void pd_test_getitem_dispatch_extract_numeric() {
set_columns_name (pd_test_3_all.cpp:10815)
10805 std::cout << " -> tests passed" << std::endl;
10806}
10807
10808void pd_test_3_all_columns_name_setter() {
10809 std::cout << "========= DataFrame.columns_name(string) setter ========";
10810 pandas::DataFrame df;
10811 df.add_column<int64_t>("A", {1, 2, 3});
10812 df.columns_name("MyColumns");
10813 pandas::DataFrame df2;
10814 df2.add_column<int64_t>("A", {1, 2, 3});
10815 df2.set_columns_name("MyColumns");
10816 if (df.to_string() != df2.to_string()) {
10817 std::cout << " [FAIL] : in pd_test_3_all_columns_name_setter() : mismatch" << std::endl;
10818 throw std::runtime_error("pd_test_3_all_columns_name_setter failed");
10819 }
10820 std::cout << " -> tests passed" << std::endl;
10821}
10822
10823void pd_test_3_all_column_alias() {
10824 std::cout << "========= DataFrame.column<T>() alias ==================";
10825 pandas::DataFrame df;
set_datetime_index (pd_test_3_all.cpp:1322)
1312 if (!df.has_multiindex() || df.multiindex().nlevels() != 2) {
1313 throw std::runtime_error("set_multi_index: shape wrong");
1314 }
1315 }
1316
1317 // G. set_datetime_index
1318 {
1319 pandas::DataFrame df;
1320 df.add_column<double>("price",
1321 {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0});
1322 df.set_datetime_index("2020-01-01", 10, "D");
1323 if (df.index().size() != 10) {
1324 throw std::runtime_error("set_datetime_index: size wrong");
1325 }
1326 }
1327
1328 // H. set_nan_marker
1329 {
1330 pandas::DataFrame df;
1331 std::vector<std::string> v_str = {"", "y", "z", ""};
1332 df.add_column<std::string>("v_str", v_str);
set_display_options (pd_test_4_all.cpp:118)
108}
109static std::string load_expected(const std::string& relpath) {
110 std::ifstream f(relpath, std::ios::binary);
111 if (!f) throw std::runtime_error("cannot open expected fixture: " + relpath);
112 std::string s((std::istreambuf_iterator<char>(f)), std::istreambuf_iterator<char>());
113 std::string out; out.reserve(s.size());
114 for (char ch : s) if (ch != '\r') out.push_back(ch);
115 return out;
116}
117static void apply_default_display(pandas::DataFrame& df) {
118 df.set_display_options(80, 50, 60, true);
119}
120
121void unstack_multiindex_compare_all() {
122 pandas::DataFrame df;
123 df.add_column<int64_t>("A_one", { 0, 2, 4});
124 df.add_column<int64_t>("A_two", { 1, 3, 5});
125 df.add_column<int64_t>("B_one", { 6, 8, 10});
126 df.add_column<int64_t>("B_two", { 7, 9, 11});
127
128 auto row_idx = std::make_unique<pandas::Index<std::string>>(
set_flags (pd_test_1_all.cpp:16410)
16400 throw std::runtime_error("pd_test_ndframe_series_flags failed: default allows_duplicate_labels");
16401 }
16402
16403 passed = s.flags().copy_on_write == false;
16404 if (!passed) {
16405 std::cout << " [FAIL] : in pd_test_ndframe_series_flags() : default copy_on_write" << std::endl;
16406 throw std::runtime_error("pd_test_ndframe_series_flags failed: default copy_on_write");
16407 }
16408
16409 // Test set_flags
16410 s.set_flags(pandas::Flags(false, true));
16411 passed = s.flags().allows_duplicate_labels == false;
16412 if (!passed) {
16413 std::cout << " [FAIL] : in pd_test_ndframe_series_flags() : set allows_duplicate_labels" << std::endl;
16414 throw std::runtime_error("pd_test_ndframe_series_flags failed: set allows_duplicate_labels");
16415 }
16416
16417 passed = s.flags().copy_on_write == true;
16418 if (!passed) {
16419 std::cout << " [FAIL] : in pd_test_ndframe_series_flags() : set copy_on_write" << std::endl;
16420 throw std::runtime_error("pd_test_ndframe_series_flags failed: set copy_on_write");
set_integer_index (pd_test_3_all.cpp:1277)
1267 {
1268 pandas::DataFrame df1;
1269 df1.add_column<double>("temp", {22.1, 23.5, 19.8, 25.0});
1270 df1.set_string_index({"Mon", "Tue", "Wed", "Thu"});
1271 if (df1.index().get_value_str(2) != "Wed") {
1272 throw std::runtime_error("set_string_index: values wrong");
1273 }
1274
1275 pandas::DataFrame df2;
1276 df2.add_column<int64_t>("val", {3, 4});
1277 df2.set_integer_index({2, 3});
1278 if (df2.index().size() != 2) {
1279 throw std::runtime_error("set_integer_index: size wrong");
1280 }
1281
1282 pandas::DataFrame df3;
1283 df3.add_column<int64_t>("val", {1, 2, 3, 4, 5});
1284 df3.set_index_from_strings({"a", "b", "c", "a", "b"});
1285 if (df3.index().get_value_str(4) != "b") {
1286 throw std::runtime_error("set_index_from_strings: values wrong");
1287 }
set_integer_index (pd_test_3_all.cpp:1277)
1267 {
1268 pandas::DataFrame df1;
1269 df1.add_column<double>("temp", {22.1, 23.5, 19.8, 25.0});
1270 df1.set_string_index({"Mon", "Tue", "Wed", "Thu"});
1271 if (df1.index().get_value_str(2) != "Wed") {
1272 throw std::runtime_error("set_string_index: values wrong");
1273 }
1274
1275 pandas::DataFrame df2;
1276 df2.add_column<int64_t>("val", {3, 4});
1277 df2.set_integer_index({2, 3});
1278 if (df2.index().size() != 2) {
1279 throw std::runtime_error("set_integer_index: size wrong");
1280 }
1281
1282 pandas::DataFrame df3;
1283 df3.add_column<int64_t>("val", {1, 2, 3, 4, 5});
1284 df3.set_index_from_strings({"a", "b", "c", "a", "b"});
1285 if (df3.index().get_value_str(4) != "b") {
1286 throw std::runtime_error("set_index_from_strings: values wrong");
1287 }
set_multi_index (pd_test_3_all.cpp:1311)
1301 df.index_name("year_end"); // setter form, used by 6 failing tests
1302 if (!df.index_name().has_value() || df.index_name().value() != "year_end") {
1303 throw std::runtime_error("index_name(setter): round-trip failed");
1304 }
1305 }
1306
1307 // F. set_multi_index -- two-level brace-init
1308 {
1309 pandas::DataFrame df;
1310 df.add_column<int64_t>("val", {10, 20, 30, 40});
1311 df.set_multi_index({{"A", "A", "B", "B"}, {"1", "2", "1", "2"}});
1312 if (!df.has_multiindex() || df.multiindex().nlevels() != 2) {
1313 throw std::runtime_error("set_multi_index: shape wrong");
1314 }
1315 }
1316
1317 // G. set_datetime_index
1318 {
1319 pandas::DataFrame df;
1320 df.add_column<double>("price",
1321 {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0});
set_multi_index (pd_test_3_all.cpp:1311)
1301 df.index_name("year_end"); // setter form, used by 6 failing tests
1302 if (!df.index_name().has_value() || df.index_name().value() != "year_end") {
1303 throw std::runtime_error("index_name(setter): round-trip failed");
1304 }
1305 }
1306
1307 // F. set_multi_index -- two-level brace-init
1308 {
1309 pandas::DataFrame df;
1310 df.add_column<int64_t>("val", {10, 20, 30, 40});
1311 df.set_multi_index({{"A", "A", "B", "B"}, {"1", "2", "1", "2"}});
1312 if (!df.has_multiindex() || df.multiindex().nlevels() != 2) {
1313 throw std::runtime_error("set_multi_index: shape wrong");
1314 }
1315 }
1316
1317 // G. set_datetime_index
1318 {
1319 pandas::DataFrame df;
1320 df.add_column<double>("price",
1321 {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0});
set_multiindex (pd_test_2_all.cpp:20409)
20399 check(s.get_freq().value() == "D", "freq value D");
20400 }
20401
20402 // Test MultiIndex propagation
20403 pandas::DataFrame df2;
20404 std::vector<numpy::float64> vals2 = {10.0, 20.0};
20405 df2.insert(0, "A", std::make_unique<pandas::Series<numpy::float64>>(vals2, "A"), true);
20406 std::vector<std::vector<std::string>> arrays = {{"x", "y"}, {"1", "2"}};
20407 std::vector<std::optional<std::string>> names = {std::string("first"), std::string("second")};
20408 auto mi = pandas::MultiIndex::from_arrays<std::string>(arrays, names);
20409 df2.set_multiindex(mi);
20410
20411 auto s2 = df2.extract_column_as_numeric_series("A");
20412 check(s2.has_multiindex(), "multiindex propagated");
20413}
20414
20415} // namespace dataframe_tests_getitem_dispatch
20416
20417int pd_test_getitem_dispatch_main() {
20418 std::cout << "====================================== pd_test_getitem_dispatch test suite ==========================" << std::endl;
20419 dataframe_tests_getitem_dispatch::g_pass = 0;
set_multiindex (pd_test_2_all.cpp:20409)
20399 check(s.get_freq().value() == "D", "freq value D");
20400 }
20401
20402 // Test MultiIndex propagation
20403 pandas::DataFrame df2;
20404 std::vector<numpy::float64> vals2 = {10.0, 20.0};
20405 df2.insert(0, "A", std::make_unique<pandas::Series<numpy::float64>>(vals2, "A"), true);
20406 std::vector<std::vector<std::string>> arrays = {{"x", "y"}, {"1", "2"}};
20407 std::vector<std::optional<std::string>> names = {std::string("first"), std::string("second")};
20408 auto mi = pandas::MultiIndex::from_arrays<std::string>(arrays, names);
20409 df2.set_multiindex(mi);
20410
20411 auto s2 = df2.extract_column_as_numeric_series("A");
20412 check(s2.has_multiindex(), "multiindex propagated");
20413}
20414
20415} // namespace dataframe_tests_getitem_dispatch
20416
20417int pd_test_getitem_dispatch_main() {
20418 std::cout << "====================================== pd_test_getitem_dispatch test suite ==========================" << std::endl;
20419 dataframe_tests_getitem_dispatch::g_pass = 0;
set_multiindex (pd_test_2_all.cpp:20409)
20399 check(s.get_freq().value() == "D", "freq value D");
20400 }
20401
20402 // Test MultiIndex propagation
20403 pandas::DataFrame df2;
20404 std::vector<numpy::float64> vals2 = {10.0, 20.0};
20405 df2.insert(0, "A", std::make_unique<pandas::Series<numpy::float64>>(vals2, "A"), true);
20406 std::vector<std::vector<std::string>> arrays = {{"x", "y"}, {"1", "2"}};
20407 std::vector<std::optional<std::string>> names = {std::string("first"), std::string("second")};
20408 auto mi = pandas::MultiIndex::from_arrays<std::string>(arrays, names);
20409 df2.set_multiindex(mi);
20410
20411 auto s2 = df2.extract_column_as_numeric_series("A");
20412 check(s2.has_multiindex(), "multiindex propagated");
20413}
20414
20415} // namespace dataframe_tests_getitem_dispatch
20416
20417int pd_test_getitem_dispatch_main() {
20418 std::cout << "====================================== pd_test_getitem_dispatch test suite ==========================" << std::endl;
20419 dataframe_tests_getitem_dispatch::g_pass = 0;
set_multiindex (pd_test_2_all.cpp:20409)
20399 check(s.get_freq().value() == "D", "freq value D");
20400 }
20401
20402 // Test MultiIndex propagation
20403 pandas::DataFrame df2;
20404 std::vector<numpy::float64> vals2 = {10.0, 20.0};
20405 df2.insert(0, "A", std::make_unique<pandas::Series<numpy::float64>>(vals2, "A"), true);
20406 std::vector<std::vector<std::string>> arrays = {{"x", "y"}, {"1", "2"}};
20407 std::vector<std::optional<std::string>> names = {std::string("first"), std::string("second")};
20408 auto mi = pandas::MultiIndex::from_arrays<std::string>(arrays, names);
20409 df2.set_multiindex(mi);
20410
20411 auto s2 = df2.extract_column_as_numeric_series("A");
20412 check(s2.has_multiindex(), "multiindex propagated");
20413}
20414
20415} // namespace dataframe_tests_getitem_dispatch
20416
20417int pd_test_getitem_dispatch_main() {
20418 std::cout << "====================================== pd_test_getitem_dispatch test suite ==========================" << std::endl;
20419 dataframe_tests_getitem_dispatch::g_pass = 0;
set_multiindex (pd_test_2_all.cpp:20409)
20399 check(s.get_freq().value() == "D", "freq value D");
20400 }
20401
20402 // Test MultiIndex propagation
20403 pandas::DataFrame df2;
20404 std::vector<numpy::float64> vals2 = {10.0, 20.0};
20405 df2.insert(0, "A", std::make_unique<pandas::Series<numpy::float64>>(vals2, "A"), true);
20406 std::vector<std::vector<std::string>> arrays = {{"x", "y"}, {"1", "2"}};
20407 std::vector<std::optional<std::string>> names = {std::string("first"), std::string("second")};
20408 auto mi = pandas::MultiIndex::from_arrays<std::string>(arrays, names);
20409 df2.set_multiindex(mi);
20410
20411 auto s2 = df2.extract_column_as_numeric_series("A");
20412 check(s2.has_multiindex(), "multiindex propagated");
20413}
20414
20415} // namespace dataframe_tests_getitem_dispatch
20416
20417int pd_test_getitem_dispatch_main() {
20418 std::cout << "====================================== pd_test_getitem_dispatch test suite ==========================" << std::endl;
20419 dataframe_tests_getitem_dispatch::g_pass = 0;
set_multiindex (pd_test_2_all.cpp:20409)
20399 check(s.get_freq().value() == "D", "freq value D");
20400 }
20401
20402 // Test MultiIndex propagation
20403 pandas::DataFrame df2;
20404 std::vector<numpy::float64> vals2 = {10.0, 20.0};
20405 df2.insert(0, "A", std::make_unique<pandas::Series<numpy::float64>>(vals2, "A"), true);
20406 std::vector<std::vector<std::string>> arrays = {{"x", "y"}, {"1", "2"}};
20407 std::vector<std::optional<std::string>> names = {std::string("first"), std::string("second")};
20408 auto mi = pandas::MultiIndex::from_arrays<std::string>(arrays, names);
20409 df2.set_multiindex(mi);
20410
20411 auto s2 = df2.extract_column_as_numeric_series("A");
20412 check(s2.has_multiindex(), "multiindex propagated");
20413}
20414
20415} // namespace dataframe_tests_getitem_dispatch
20416
20417int pd_test_getitem_dispatch_main() {
20418 std::cout << "====================================== pd_test_getitem_dispatch test suite ==========================" << std::endl;
20419 dataframe_tests_getitem_dispatch::g_pass = 0;
set_multiindex (pd_test_2_all.cpp:20409)
20399 check(s.get_freq().value() == "D", "freq value D");
20400 }
20401
20402 // Test MultiIndex propagation
20403 pandas::DataFrame df2;
20404 std::vector<numpy::float64> vals2 = {10.0, 20.0};
20405 df2.insert(0, "A", std::make_unique<pandas::Series<numpy::float64>>(vals2, "A"), true);
20406 std::vector<std::vector<std::string>> arrays = {{"x", "y"}, {"1", "2"}};
20407 std::vector<std::optional<std::string>> names = {std::string("first"), std::string("second")};
20408 auto mi = pandas::MultiIndex::from_arrays<std::string>(arrays, names);
20409 df2.set_multiindex(mi);
20410
20411 auto s2 = df2.extract_column_as_numeric_series("A");
20412 check(s2.has_multiindex(), "multiindex propagated");
20413}
20414
20415} // namespace dataframe_tests_getitem_dispatch
20416
20417int pd_test_getitem_dispatch_main() {
20418 std::cout << "====================================== pd_test_getitem_dispatch test suite ==========================" << std::endl;
20419 dataframe_tests_getitem_dispatch::g_pass = 0;
set_multiindex (pd_test_2_all.cpp:20409)
20399 check(s.get_freq().value() == "D", "freq value D");
20400 }
20401
20402 // Test MultiIndex propagation
20403 pandas::DataFrame df2;
20404 std::vector<numpy::float64> vals2 = {10.0, 20.0};
20405 df2.insert(0, "A", std::make_unique<pandas::Series<numpy::float64>>(vals2, "A"), true);
20406 std::vector<std::vector<std::string>> arrays = {{"x", "y"}, {"1", "2"}};
20407 std::vector<std::optional<std::string>> names = {std::string("first"), std::string("second")};
20408 auto mi = pandas::MultiIndex::from_arrays<std::string>(arrays, names);
20409 df2.set_multiindex(mi);
20410
20411 auto s2 = df2.extract_column_as_numeric_series("A");
20412 check(s2.has_multiindex(), "multiindex propagated");
20413}
20414
20415} // namespace dataframe_tests_getitem_dispatch
20416
20417int pd_test_getitem_dispatch_main() {
20418 std::cout << "====================================== pd_test_getitem_dispatch test suite ==========================" << std::endl;
20419 dataframe_tests_getitem_dispatch::g_pass = 0;
set_multiindex_names (pd_test_2_all.cpp:20775)
20765 // Simulate a 2-level groupby result with composite \x1f keys
20766 using std::string;
20767 string sep(1, '\x1f');
20768 std::vector<string> keys = {"A" + sep + "X", "A" + sep + "Y", "B" + sep + "X", "B" + sep + "Y"};
20769
20770 std::vector<numpy::float64> values = {1.0, 2.0, 3.0, 4.0};
20771 pandas::Series<std::string> by(keys);
20772 pandas::Series<numpy::float64> data(values);
20773
20774 auto sgb = data.groupby(by);
20775 sgb.set_multiindex_names({"level0", "level1"});
20776
20777 // Create a "result" series with composite index
20778 pandas::Series<numpy::float64> result(values);
20779 result.set_index(std::make_unique<pandas::Index<std::string>>(keys));
20780
20781 sgb.apply_result_index(result);
20782
20783 // Should now have a MultiIndex
20784 check(result.has_multiindex(), "has_multiindex");
20785 check(result.multiindex().nlevels() == 2, "nlevels_2");
set_nan_marker (pd_test_3_all.cpp:1334)
1324 throw std::runtime_error("set_datetime_index: size wrong");
1325 }
1326 }
1327
1328 // H. set_nan_marker
1329 {
1330 pandas::DataFrame df;
1331 std::vector<std::string> v_str = {"", "y", "z", ""};
1332 df.add_column<std::string>("v_str", v_str);
1333 df.add_column<int64_t>("v_num", {1, 2, 3, 4});
1334 df.set_nan_marker("v_str", {true, false, false, true});
1335 // exact mask-bit assertions depend on Series<string> mask API
1336 }
1337
1338 // I. set_index_from_column<T>
1339 {
1340 pandas::DataFrame df;
1341 df.add_column<int64_t>("val", {1, 3});
1342 df.add_column<int64_t>("count", {30, 70});
1343 df.set_index_from_column<std::string>("group", {"A", "B"});
1344 if (!df.index_name().has_value() || df.index_name().value() != "group") {
set_option (pd_test_3_all.cpp:10876)
10866 df.set_multiindex(mi);
10867 auto result = df.reorder_levels({"second", "first"});
10868 if (result.nrows() != 4) {
10869 std::cout << " [FAIL] : in pd_test_3_all_reorder_levels_initlist() : wrong nrows" << std::endl;
10870 throw std::runtime_error("pd_test_3_all_reorder_levels_initlist failed");
10871 }
10872 std::cout << " -> tests passed" << std::endl;
10873}
10874
10875void pd_test_3_all_set_option_instance() {
10876 std::cout << "========= DataFrame.set_option() instance ==============";
10877 pandas::DataFrame df;
10878 df.add_column<int64_t>("A", {1, 2, 3});
10879 df.set_option("display.multi_sparse", false);
10880 df.set_option("display.unknown_key", true); // should not throw
10881 std::cout << " -> tests passed" << std::endl;
10882}
10883
10884// ============================================================================
10885// Category 48: Indexing Fixes (Plan: plan_indexing_fixes)
10886// ============================================================================
set_string_index (pd_test_3_all.cpp:1270)
1260 df.set_index_from_list(labels);
1261 if (df.index().get_value_str(1) != "q") {
1262 throw std::runtime_error("set_index_from_list(vector): values wrong");
1263 }
1264 }
1265
1266 // D. Aliases: set_string_index / set_integer_index / set_index_from_strings
1267 {
1268 pandas::DataFrame df1;
1269 df1.add_column<double>("temp", {22.1, 23.5, 19.8, 25.0});
1270 df1.set_string_index({"Mon", "Tue", "Wed", "Thu"});
1271 if (df1.index().get_value_str(2) != "Wed") {
1272 throw std::runtime_error("set_string_index: values wrong");
1273 }
1274
1275 pandas::DataFrame df2;
1276 df2.add_column<int64_t>("val", {3, 4});
1277 df2.set_integer_index({2, 3});
1278 if (df2.index().size() != 2) {
1279 throw std::runtime_error("set_integer_index: size wrong");
1280 }
shape (pd_test_1_all.cpp:6188)
6178 std::cout << "========= properties =======================";
6179
6180 std::map<std::string, std::vector<numpy::float64>> data;
6181 data["A"] = {1.0, 2.0, 3.0, 4.0};
6182 data["B"] = {5.0, 6.0, 7.0, 8.0};
6183 data["C"] = {9.0, 10.0, 11.0, 12.0};
6184
6185 pandas::DataFrame df(data);
6186
6187 // Test shape
6188 auto shape = df.shape();
6189 if (shape.size() != 2 || shape[0] != 4 || shape[1] != 3) {
6190 std::cout << " [FAIL] : in pd_test_dataframe_properties() : shape mismatch" << std::endl;
6191 throw std::runtime_error("pd_test_dataframe_properties failed: shape mismatch");
6192 }
6193
6194 // Test ndim
6195 if (df.ndim() != 2) {
6196 std::cout << " [FAIL] : in pd_test_dataframe_properties() : ndim != 2" << std::endl;
6197 throw std::runtime_error("pd_test_dataframe_properties failed: ndim != 2");
6198 }
shape_2d (pd_test_3_all.cpp:10796)
10786 }
10787
10788 std::cout << " -> tests passed" << std::endl;
10789}
10790
10791// ============================================================================
10792// Category 47: Quick Fixes & Aliases Tests
10793// ============================================================================
10794
10795void pd_test_3_all_shape_2d() {
10796 std::cout << "========= DataFrame.shape_2d() =========================";
10797 pandas::DataFrame df;
10798 df.add_column<int64_t>("A", {1, 2, 3});
10799 df.add_column<int64_t>("B", {4, 5, 6});
10800 auto s = df.shape_2d();
10801 if (s.first != 3 || s.second != 2) {
10802 std::cout << " [FAIL] : in pd_test_3_all_shape_2d() : wrong dimensions" << std::endl;
10803 throw std::runtime_error("pd_test_3_all_shape_2d failed");
10804 }
10805 std::cout << " -> tests passed" << std::endl;
10806}
size (pd_test_1_all.cpp:22)
12#include "../pandas/pd_boolean_array.h"
13
14namespace dataframe_tests {
15
16namespace dataframe_tests_boolean_array {
17 void pd_test_boolean_array_constructors() {
18 std::cout << "========= BooleanArray: constructors ======================= ";
19
20 // Default constructor
21 pandas::BooleanArray arr1;
22 if (arr1.size() != 0) {
23 std::cout << " [FAIL] : in pd_test_boolean_array_constructors() : default constructor size != 0" << std::endl;
24 throw std::runtime_error("pd_test_boolean_array_constructors failed: default constructor size != 0");
25 }
26
27 // Initializer list constructor
28 pandas::BooleanArray arr2({
29 std::optional<bool>(true),
30 std::optional<bool>(false),
31 std::nullopt,
32 std::optional<bool>(true)
sparse (pd_test_3_all.cpp:20627)
20617#include <cmath>
20618
20619#include "../pandas/pd_series.h"
20620
20621// CRITICAL: No using namespace directives
20622
20623namespace dataframe_tests {
20624namespace dataframe_tests_sparse_accessor {
20625
20626// ============================================================================
20627// Test sparse().density() and sparse().npoints()
20628// ============================================================================
20629
20630void pd_test_sparse_density_npoints() {
20631 std::cout << "========= Series.sparse().density/npoints() =============";
20632
20633 // Create a series with some zeros (sparse values)
20634 pandas::Series<numpy::float64> s({0.0, 1.0, 0.0, 2.0, 0.0, 3.0});
20635
20636 auto sparse = s.sparse(0.0); // 0.0 is the fill value
ss (pd_test_3_all.cpp:27670)
27660 fail++;
27661 } else {
27662 auto cats = str_s->get_cat_categories();
27663 if (cats.size() != 3) {
27664 std::cout << " FAIL: expected 3 categories, got " << cats.size() << std::endl;
27665 fail++;
27666 }
27667 }
27668 }
27669
27670 pandas::Series<std::string> ss({"a", "b", "a", "c"}, "strs");
27671 auto result2 = ss.astype("category");
27672 auto* str_s2 = dynamic_cast<pandas::Series<std::string>*>(result2.get());
27673 if (!str_s2) {
27674 std::cout << " FAIL: expected Series<string> for string->category" << std::endl;
27675 fail++;
27676 } else {
27677 if (str_s2->dtype_name() != "category") {
27678 std::cout << " FAIL: dtype should be category" << std::endl;
27679 fail++;
27680 }
trim (pd_test_5_all.cpp:58873)
58863 auto trim = [](const std::string& s) {
58864 size_t a = s.find_first_not_of(" \t");
58865 size_t b = s.find_last_not_of(" \t\r");
58866 if (a == std::string::npos) return std::string();
58867 return s.substr(a, b - a + 1);
58868 };
58869 size_t pos = 0;
58870 while (pos < body.size()) {
58871 size_t nl = body.find('\n', pos);
58872 std::string raw = body.substr(pos, nl == std::string::npos ? std::string::npos : nl - pos);
58873 std::string t = trim(raw);
58874 if (!t.empty()) {
58875 // tokenise by whitespace
58876 std::vector<std::string> toks;
58877 std::string cur;
58878 for (char c : t) {
58879 if (c == ' ' || c == '\t') {
58880 if (!cur.empty()) { toks.push_back(cur); cur.clear(); }
58881 } else {
58882 cur.push_back(c);
58883 }
trim (pd_test_5_all.cpp:58873)
58863 auto trim = [](const std::string& s) {
58864 size_t a = s.find_first_not_of(" \t");
58865 size_t b = s.find_last_not_of(" \t\r");
58866 if (a == std::string::npos) return std::string();
58867 return s.substr(a, b - a + 1);
58868 };
58869 size_t pos = 0;
58870 while (pos < body.size()) {
58871 size_t nl = body.find('\n', pos);
58872 std::string raw = body.substr(pos, nl == std::string::npos ? std::string::npos : nl - pos);
58873 std::string t = trim(raw);
58874 if (!t.empty()) {
58875 // tokenise by whitespace
58876 std::vector<std::string> toks;
58877 std::string cur;
58878 for (char c : t) {
58879 if (c == ' ' || c == '\t') {
58880 if (!cur.empty()) { toks.push_back(cur); cur.clear(); }
58881 } else {
58882 cur.push_back(c);
58883 }
truncate (pd_test_1_all.cpp:20467)
20457 std::vector<std::string> dates = {
20458 "2020-01-01",
20459 "2020-01-02",
20460 "2020-01-03",
20461 "2020-01-04",
20462 "2020-01-05"
20463 };
20464 df.set_index(std::make_unique<pandas::Index<std::string>>(dates));
20465
20466 // Truncate to keep only dates from 2020-01-02 to 2020-01-04
20467 pandas::DataFrame result = df.truncate("2020-01-02", "2020-01-04");
20468
20469 bool passed = (result.nrows() == 3);
20470
20471 if (!passed) {
20472 std::cout << " [FAIL] : in pd_test_timeseries_truncate() : expected 3 rows, got "
20473 << result.nrows() << std::endl;
20474 throw std::runtime_error("pd_test_timeseries_truncate failed");
20475 }
20476
20477 std::cout << " -> tests passed" << std::endl;
ts (pd_test_2_all.cpp:22590)
22580void test_to_datetime_numeric_seconds() {
22581 std::cout << " -- test_to_datetime_numeric_seconds --" << std::endl;
22582 // 1490195805 seconds = 2017-03-22 15:16:45 UTC
22583 std::vector<double> vals = {1490195805.0};
22584 auto arr = pandas::to_datetime_numeric(vals, "s");
22585 check(arr.size() == 1, "size==1");
22586 auto v = arr[0];
22587 check(v.has_value(), "has_value");
22588 if (v.has_value()) {
22589 pandas::Timestamp ts(v->getValue());
22590 check(ts.year() == 2017, "year==2017");
22591 check(ts.month() == 3, "month==3");
22592 check(ts.day() == 22, "day==22");
22593 check(ts.hour() == 15, "hour==15");
22594 check(ts.minute() == 16, "min==16");
22595 check(ts.second() == 45, "sec==45");
22596 }
22597}
22598
22599void test_to_datetime_numeric_millis() {
values (pd_test_1_all.cpp:364)
354 pandas::CategoricalArray arr1;
355 if (arr1.size() != 0) {
356 std::cout << " [FAIL] : in pd_test_categorical_array_constructors() : default constructor size != 0" << std::endl;
357 throw std::runtime_error("pd_test_categorical_array_constructors failed: default constructor size != 0");
358 }
359 if (arr1.ordered()) {
360 std::cout << " [FAIL] : in pd_test_categorical_array_constructors() : default should be unordered" << std::endl;
361 throw std::runtime_error("pd_test_categorical_array_constructors failed: default should be unordered");
362 }
363
364 // Constructor from values (infer categories)
365 std::vector<std::optional<std::string>> values = {
366 std::optional<std::string>("a"),
367 std::optional<std::string>("b"),
368 std::optional<std::string>("a"),
369 std::optional<std::string>("c")
370 };
371 pandas::CategoricalArray arr2(values);
372 if (arr2.size() != 4) {
373 std::cout << " [FAIL] : in pd_test_categorical_array_constructors() : values constructor size != 4" << std::endl;
374 throw std::runtime_error("pd_test_categorical_array_constructors failed: values constructor size != 4");