Series#
-
class pandas::Series#
Core data container class in the pandas namespace.
Example#
#include <pandas/pandas.h>
using namespace pandas;
// Create Series
Series<double> s({1.0, 2.0, 3.0}, "values");
// Statistics
double mean_val = s.mean();
double std_val = s.std();
Constructors#
Signature |
Location |
Example |
|---|---|---|
|
pd_series.h:802 |
|
|
pd_series.h:828 |
|
|
pd_series.h:883 |
|
|
pd_series.h:902 |
|
|
pd_series.h:919 |
|
|
pd_series.h:938 |
|
|
pd_series.h:970 |
Construction#
Signature |
Return Type |
Location |
Example |
|---|---|---|---|
|
std::unique_ptr<NDFrameBase> |
pd_series.h:11189 |
Indexing / Selection#
Signature |
Return Type |
Location |
Example |
|---|---|---|---|
|
T |
pd_series.h:1550 |
|
|
Series<T> |
pd_series.h:15902 |
|
|
Series<T> |
pd_series.h:12867 |
|
|
std::optional<size_t> |
pd_series.h:6786 |
|
|
T |
pd_series.h:12896 |
|
|
LabelLookup |
pd_series.h:2198 |
|
|
const std::vector<std::string>& |
pd_series.h:1248 |
|
|
std::string |
pd_series.h:1269 |
|
|
numpy::datetime64 |
pd_series.h:10454 |
|
|
std::optional<std::string> |
pd_series.h:10544 |
|
|
std::optional<T> |
pd_series.h:1300 |
|
|
Series<T> |
pd_series.h:2238 |
|
|
std::optional<double> |
pd_series.h:10535 |
|
|
std::string |
pd_series.h:10420 |
|
|
bool |
pd_series.h:2947 |
|
|
double |
pd_series.h:10290 |
|
|
std::string |
pd_series.h:9264 |
|
|
<< |
pd_series.h:10946 |
|
|
Series<T> |
pd_series.h:2494 |
|
|
T |
pd_series.h:1562 |
|
|
T |
pd_series.h:2035 |
|
|
std::string |
pd_series.h:3913 |
|
|
std::pair<bool, std::pair<int64_t, std::string>> |
pd_series.h:3921 |
|
|
std::string |
pd_series.h:3906 |
|
|
std::pair<bool, std::pair<int64_t, std::string>> |
pd_series.h:3934 |
|
|
Series<T> |
pd_series.h:2052 |
|
|
Series<T> |
pd_series.h:2115 |
|
|
Series<T> |
pd_series.h:12882 |
|
|
std::optional<size_t> |
pd_series.h:6806 |
|
|
T |
pd_series.h:2269 |
|
|
Series<T> |
pd_series.h:2282 |
|
|
Series<T> |
pd_series.h:1908 |
|
|
pandas::StringLookupResult |
pd_series.h:1602 |
|
|
pandas::LookupResult<T> |
pd_series.h:1786 |
|
|
pandas::LookupResult<T> |
pd_series.h:1749 |
|
|
const numpy::NDArray<numpy::bool_>& |
pd_series.h:1293 |
|
|
Series<T> |
pd_series.h:2458 |
|
|
bool |
pd_series.h:1295 |
|
|
Series<T> |
pd_series.h:13644 |
|
|
Series<T> |
pd_series.h:13700 |
|
|
Series<T> |
pd_series.h:15536 |
|
|
void |
pd_series.h:2997 |
|
|
void |
pd_series.h:2971 |
|
|
void |
pd_series.h:3011 |
|
|
bool |
pd_series.h:3041 |
|
|
Series<T> |
pd_series.h:2502 |
|
|
Series<T> |
pd_series.h:2512 |
|
|
std::unique_ptr<NDFrameBase> |
pd_series.h:11546 |
|
|
Series<T> |
pd_series.h:2375 |
|
|
Series<T> |
pd_series.h:2409 |
|
|
Result |
pd_series.h:16301 |
|
|
Series<T> |
pd_series.h:16340 |
|
|
Series<T> |
pd_series.h:15785 |
Data Manipulation#
Signature |
Return Type |
Location |
Example |
|---|---|---|---|
|
Series<T> |
pd_series.h:12445 |
|
|
Series<T> |
pd_series.h:5712 |
|
|
Series<T> |
pd_series.h:12485 |
|
|
Series<T> |
pd_series.h:2754 |
|
|
T |
pd_series.h:12744 |
|
|
T |
pd_series.h:12805 |
|
|
Series<T> |
pd_series.h:13996 |
|
|
Result |
pd_series.h:14808 |
|
|
Series<T> |
pd_series.h:15389 |
|
|
Series<std::string> |
pd_series.h:14818 |
|
|
Series<std::string> |
pd_series.h:15097 |
|
|
Series<std::string> |
pd_series.h:15340 |
|
|
Result |
pd_series.h:16284 |
|
|
std::unique_ptr<NDFrameBase> |
pd_series.h:11555 |
|
|
Series<T> |
pd_series.h:8897 |
|
|
Series<T> |
pd_series.h:9020 |
|
|
Series<T> |
pd_series.h:15413 |
|
|
Series<T> |
pd_series.h:15460 |
|
|
Series<T> |
pd_series.h:15498 |
|
|
void |
pd_series.h:2922 |
|
|
Series<T>& |
pd_series.h:1439 |
|
|
void |
pd_series.h:1470 |
|
|
Series<T> |
pd_series.h:8994 |
|
|
void |
pd_series.h:1403 |
|
|
void |
pd_series.h:1417 |
|
|
Series<T> |
pd_series.h:15703 |
|
|
void |
pd_series.h:15754 |
Missing Data#
Signature |
Return Type |
Location |
Example |
|---|---|---|---|
|
Series<T> |
pd_series.h:11928 |
|
|
Series<T> |
pd_series.h:11842 |
|
|
Series<T> |
pd_series.h:11947 |
|
|
Series<T> |
pd_series.h:2653 |
|
|
void |
pd_series.h:2834 |
|
|
Result |
pd_series.h:16299 |
|
|
void |
pd_series.h:2896 |
|
|
Series<T> |
pd_series.h:16336 |
|
|
Series<T> |
pd_series.h:12950 |
|
|
numpy::NDArray<numpy::bool_> |
pd_series.h:2580 |
|
|
numpy::NDArray<numpy::bool_> |
pd_series.h:13520 |
|
|
numpy::NDArray<numpy::bool_> |
pd_series.h:2610 |
|
|
numpy::NDArray<numpy::bool_> |
pd_series.h:13527 |
|
|
Series<T> |
pd_series.h:12012 |
Statistics#
Signature |
Return Type |
Location |
Example |
|---|---|---|---|
|
size_t |
pd_series.h:2545 |
|
|
Series<T> |
pd_series.h:5107 |
|
|
Series<T> |
pd_series.h:5080 |
|
|
Series<T> |
pd_series.h:5020 |
|
|
Series<T> |
pd_series.h:4949 |
|
|
DataFrame |
pd_series.h:3742 |
|
|
Series<numpy::float64> |
pd_series.h:3754 |
|
|
std::optional<double> |
pd_series.h:3578 |
|
|
std::optional<double> |
pd_series.h:3620 |
|
|
std::optional<T> |
pd_series.h:3373 |
|
|
int |
pd_series.h:9092 |
|
|
std::optional<double> |
pd_series.h:3229 |
|
|
std::optional<T> |
pd_series.h:3469 |
|
|
std::optional<T> |
pd_series.h:3331 |
|
|
Series<T> |
pd_series.h:3629 |
|
|
size_t |
pd_series.h:5645 |
|
|
std::optional<T> |
pd_series.h:3416 |
|
|
std::optional<T> |
pd_series.h:13757 |
|
|
std::optional<double> |
pd_series.h:3696 |
|
|
Series<numpy::float64> |
pd_series.h:3764 |
|
|
std::optional<double> |
pd_series.h:3519 |
|
|
std::optional<double> |
pd_series.h:3538 |
|
|
std::optional<double> |
pd_series.h:3256 |
|
|
auto |
pd_series.h:3113 |
|
|
Series<numpy::int64> |
pd_series.h:5745 |
|
|
std::optional<double> |
pd_series.h:3270 |
Aggregation#
Signature |
Return Type |
Location |
Example |
|---|---|---|---|
|
auto |
pd_series.h:6343 |
|
|
std::optional<double> |
pd_series.h:6369 |
|
|
DataFrame |
pd_series.h:6423 |
|
|
Result |
pd_series.h:16297 |
|
|
auto |
pd_series.h:6431 |
|
|
std::optional<double> |
pd_series.h:6440 |
|
|
DataFrame |
pd_series.h:6452 |
|
|
auto |
pd_series.h:5867 |
|
|
auto |
pd_series.h:5909 |
|
|
Series<double> |
pd_series.h:5991 |
|
|
Result |
pd_series.h:5986 |
|
|
Series<T> |
pd_series.h:4351 |
|
|
static void |
pd_series.h:16328 |
|
|
Result |
pd_series.h:16293 |
|
|
pd_series.h:10682 |
||
|
pd_series.h:11058 |
||
|
auto |
pd_series.h:5942 |
|
|
EWM<T> |
pd_series.h:6537 |
|
|
EWM<T> |
pd_series.h:6576 |
|
|
EWM<T> |
pd_series.h:6558 |
|
|
Expanding<T> |
pd_series.h:6510 |
|
|
SeriesGroupBy<T, GroupT> |
pd_series.h:6620 |
|
|
SeriesGroupBy<T, std::string> |
pd_series.h:4376 |
|
|
SeriesGroupBy<T, std::string> |
pd_series.h:6668 |
|
|
SeriesGroupBy<T, std::string> |
pd_series.h:6649 |
|
|
SeriesGroupBy<T, std::string> |
pd_series.h:6688 |
|
|
SeriesGroupBy<T, std::string> |
pd_series.h:6631 |
|
|
SeriesGroupBy<T, std::string> |
pd_series.h:6640 |
|
|
SeriesGroupBy<T, std::string> |
pd_series.h:6678 |
|
|
SeriesGroupBy<T, std::string> |
pd_series.h:6658 |
|
|
SeriesGroupBy<T, std::string> |
pd_series.h:6699 |
|
|
Series<U> |
pd_series.h:6038 |
|
|
auto |
pd_series.h:6230 |
|
|
Series<T> |
pd_series.h:6101 |
|
|
Result |
pd_series.h:16316 |
|
|
Series<T> |
pd_series.h:6134 |
|
|
Result |
pd_series.h:16319 |
|
|
Series<std::string> |
pd_series.h:6184 |
|
|
auto |
pd_series.h:6458 |
|
|
SeriesResampler<T> |
pd_series.h:6748 |
|
|
Rolling<T> |
pd_series.h:6486 |
|
|
Series<T> |
pd_series.h:6281 |
|
|
Series<T> |
pd_series.h:6318 |
|
|
DataFrame |
pd_series.h:6309 |
|
|
Result |
pd_series.h:16321 |
Arithmetic#
Signature |
Return Type |
Location |
Example |
|---|---|---|---|
|
Series<T> |
pd_series.h:4208 |
|
|
Series<T> |
pd_series.h:4255 |
|
|
Series<T> |
pd_series.h:4298 |
|
|
Series<T> |
pd_series.h:4340 |
|
|
Series<T> |
pd_series.h:4265 |
|
|
Series<T> |
pd_series.h:8941 |
|
|
Series<T> |
pd_series.h:8970 |
|
|
Series<numpy::float64> |
pd_series.h:4487 |
|
|
Series<numpy::float64> |
pd_series.h:4519 |
|
|
Series<numpy::float64> |
pd_series.h:4537 |
|
|
std::pair<Series<T>, Series<T>> |
pd_series.h:12352 |
|
|
std::pair<Series<T>, Series<T>> |
pd_series.h:12376 |
|
|
T |
pd_series.h:12410 |
|
|
T |
pd_series.h:12422 |
|
|
Series<T> |
pd_series.h:4567 |
|
|
Series<T> |
pd_series.h:4591 |
|
|
Series<T> |
pd_series.h:4615 |
|
|
Series<T> |
pd_series.h:4636 |
|
|
Series<T> |
pd_series.h:4435 |
|
|
Series<T> |
pd_series.h:4469 |
|
|
const MultiIndex& |
pd_series.h:1521 |
|
|
Series<T> |
pd_series.h:4475 |
|
|
Series<numpy::float64> |
pd_series.h:4660 |
|
|
Series<numpy::float64> |
pd_series.h:4676 |
|
|
Series<T> |
pd_series.h:13771 |
|
|
Series<T> |
pd_series.h:13777 |
|
|
Series<numpy::float64> |
pd_series.h:13840 |
|
|
std::pair<Series<T>, Series<T>> |
pd_series.h:13949 |
|
|
Series<T> |
pd_series.h:13876 |
|
|
Series<T> |
pd_series.h:13902 |
|
|
Series<T> |
pd_series.h:13819 |
|
|
Series<T> |
pd_series.h:13825 |
|
|
Series<numpy::float64> |
pd_series.h:13928 |
|
|
Series<T> |
pd_series.h:13792 |
|
|
Series<T> |
pd_series.h:13804 |
|
|
Series<numpy::float64> |
pd_series.h:13862 |
|
|
Series<T> |
pd_series.h:4387 |
|
|
Series<T> |
pd_series.h:4417 |
|
|
Series<T> |
pd_series.h:4326 |
|
|
Series<T> |
pd_series.h:4289 |
|
|
Series<T> |
pd_series.h:4423 |
|
|
Series<numpy::float64> |
pd_series.h:4549 |
|
|
Series<numpy::float64> |
pd_series.h:4556 |
Comparison#
Signature |
Return Type |
Location |
Example |
|---|---|---|---|
|
DataFrame |
pd_series.h:12194 |
|
|
Series<numpy::bool_> |
pd_series.h:12505 |
|
|
Series<numpy::bool_> |
pd_series.h:12513 |
|
|
bool |
pd_series.h:12637 |
|
|
Series<numpy::bool_> |
pd_series.h:12614 |
|
|
Series<numpy::bool_> |
pd_series.h:12622 |
|
|
std::mt19937 |
pd_series.h:15561 |
|
|
Series<numpy::bool_> |
pd_series.h:12593 |
|
|
Series<numpy::bool_> |
pd_series.h:12601 |
|
|
Series<numpy::bool_> |
pd_series.h:12568 |
|
|
Series<numpy::bool_> |
pd_series.h:12576 |
|
|
std::vector<std::vector<std::string>> |
pd_series.h:2069 |
|
|
std::vector<std::vector<std::string>> |
pd_series.h:2131 |
|
|
Series<numpy::bool_> |
pd_series.h:12547 |
|
|
Series<numpy::bool_> |
pd_series.h:12555 |
|
|
Series<numpy::bool_> |
pd_series.h:12526 |
|
|
Series<numpy::bool_> |
pd_series.h:12534 |
Sorting#
Signature |
Return Type |
Location |
Example |
|---|---|---|---|
|
numpy::NDArray<numpy::int64> |
pd_series.h:5202 |
|
|
Series<numpy::float64> |
pd_series.h:5480 |
|
|
static std::vector<double> |
pd_series.h:16005 |
|
|
size_t |
pd_series.h:15630 |
|
|
Series<T> |
pd_series.h:5348 |
|
|
Series<T> |
pd_series.h:5244 |
Reshaping#
Signature |
Return Type |
Location |
Example |
|---|---|---|---|
|
Series<T> |
pd_series.h:15746 |
|
|
Series<T> |
pd_series.h:8857 |
|
|
Series<T> |
pd_series.h:15680 |
|
|
Series<T> |
pd_series.h:15722 |
|
|
DataFrame |
pd_series.h:8240 |
|
|
Series<T> |
pd_series.h:15739 |
|
|
DataFrame |
pd_series.h:8881 |
Combining#
Signature |
Return Type |
Location |
Example |
|---|---|---|---|
|
std::pair<Series<T>, Series<T>> |
pd_series.h:11650 |
|
|
Series<numpy::float64> |
pd_series.h:16102 |
|
|
Series<numpy::float64> |
pd_series.h:16188 |
|
|
Series<T> |
pd_series.h:12086 |
|
|
Series<T> |
pd_series.h:12115 |
|
|
static Series<T> |
pd_series.h:11386 |
|
|
std::unique_ptr<NDFrameBase> |
pd_series.h:11215 |
Time Series#
Signature |
Return Type |
Location |
Example |
|---|---|---|---|
|
Series<T> |
pd_series.h:6771 |
|
|
std::optional<T> |
pd_series.h:11745 |
|
|
T |
pd_series.h:15832 |
|
|
Series<T> |
pd_series.h:15879 |
|
|
Series<T> |
pd_series.h:15929 |
|
|
Series<double> |
pd_series.h:5132 |
|
|
Series<numpy::float64> |
pd_series.h:3794 |
|
|
Series<T> |
pd_series.h:5158 |
|
|
Series<T> |
pd_series.h:8260 |
|
|
Series<T> |
pd_series.h:8281 |
|
|
Series<T> |
pd_series.h:15958 |
|
|
Series<T> |
pd_series.h:15984 |
I/O#
Signature |
Return Type |
Location |
Example |
|---|---|---|---|
|
void |
pd_series.h:8579 |
|
|
std::string |
pd_series.h:8082 |
|
|
std::map<std::string, T> |
pd_series.h:8221 |
|
|
void |
pd_series.h:8720 |
|
|
std::vector<uint8_t> |
pd_series.h:8817 |
|
|
void |
pd_series.h:8643 |
|
|
std::string |
pd_series.h:8146 |
|
|
std::string |
pd_series.h:8380 |
|
|
void |
pd_series.h:8441 |
|
|
file << |
pd_series.h:8446 |
|
|
std::vector<T> |
pd_series.h:8012 |
|
|
std::string |
pd_series.h:8460 |
|
|
void |
pd_series.h:8502 |
|
|
file << |
pd_series.h:8507 |
|
|
numpy::NDArray<U> |
pd_series.h:7956 |
|
|
numpy::NDArray<U> |
pd_series.h:7984 |
|
|
numpy::NDArray<U> |
pd_series.h:8003 |
|
|
std::vector<uint8_t> |
pd_series.h:8842 |
|
|
std::vector<uint8_t> |
pd_series.h:8757 |
|
|
void |
pd_series.h:8530 |
|
|
std::optional<int64_t> |
pd_series.h:8683 |
|
|
std::vector<uint8_t> |
pd_series.h:8786 |
|
|
std::string |
pd_series.h:10553 |
|
|
std::string |
pd_series.h:10989 |
|
|
std::string |
pd_series.h:10878 |
|
|
std::vector<std::string> |
pd_series.h:10371 |
|
|
DataArray<T> |
pd_series.h:8306 |
|
|
std::vector<T> |
pd_series.h:15732 |
Conversion#
Signature |
Return Type |
Location |
Example |
|---|---|---|---|
|
Series<U> |
pd_series.h:6890 |
|
|
std::unique_ptr<NDFrameBase> |
pd_series.h:7332 |
|
|
std::unique_ptr<NDFrameBase> |
pd_series.h:7091 |
|
|
std::unique_ptr<NDFrameBase> |
pd_series.h:7012 |
|
|
std::unique_ptr<NDFrameBase> |
pd_series.h:11182 |
|
|
std::unique_ptr<NDFrameBase> |
pd_series.h:6963 |
|
|
std::unique_ptr<NDFrameBase> |
pd_series.h:7271 |
|
|
Series<numpy::object_> |
pd_series.h:7624 |
|
|
std::unique_ptr<NDFrameBase> |
pd_series.h:7262 |
|
|
std::unique_ptr<NDFrameBase> |
pd_series.h:7146 |
|
|
bool |
pd_series.h:12024 |
|
|
static numpy::NDArray<numpy::bool_> |
pd_series.h:2442 |
|
|
std::unique_ptr<NDFrameBase> |
pd_series.h:7659 |
|
|
Series<T> |
pd_series.h:6870 |
|
|
void |
pd_series.h:16261 |
|
|
void |
pd_series.h:3086 |
|
|
std::unique_ptr<NDFrameBase> |
pd_series.h:7787 |
|
|
Series<T> |
pd_series.h:15773 |
Iteration#
Set Operations#
Signature |
Return Type |
Location |
Example |
|---|---|---|---|
|
numpy::NDArray<numpy::bool_> |
pd_series.h:5670 |
|
|
Series<numpy::bool_> |
pd_series.h:9047 |
|
|
Series<T> |
pd_series.h:5627 |
Type Checking#
Signature |
Return Type |
Location |
Example |
|---|---|---|---|
|
static bool |
pd_series.h:621 |
|
|
static bool |
pd_series.h:666 |
|
|
static bool |
pd_series.h:647 |
|
|
bool |
pd_series.h:8023 |
|
|
static bool |
pd_series.h:629 |
|
|
bool |
pd_series.h:2807 |
|
|
bool |
pd_series.h:8043 |
Other Methods#
Signature |
Return Type |
Location |
Example |
|---|---|---|---|
|
std::vector<std::vector<double>> |
pd_series.h:13318 |
|
|
std::vector<std::vector<double>> |
pd_series.h:13359 |
|
|
std::vector<std::vector<double>> |
pd_series.h:13376 |
|
|
pd_series.h:8606 |
||
|
pd_series.h:8609 |
||
|
std::vector<std::vector<double>> |
pd_series.h:13336 |
|
|
Series<T> |
pd_series.h:4896 |
|
|
bool |
pd_series.h:9066 |
|
|
bool |
pd_series.h:16332 |
|
|
size_t |
pd_series.h:3875 |
|
|
size_t |
pd_series.h:3844 |
|
|
std::optional<double> |
pd_series.h:11780 |
|
|
std::vector<const IndexBase*> |
pd_series.h:1532 |
|
|
Series<numpy::bool_> |
pd_series.h:4863 |
|
|
size_t |
pd_series.h:11620 |
|
|
static std::string |
pd_series.h:1178 |
|
|
Series<T> |
pd_series.h:12047 |
|
|
CategoricalAccessor<Series<T>> |
pd_series.h:10522 |
|
|
bool |
pd_series.h:1262 |
|
|
Series<numpy::float64> |
pd_series.h:4724 |
|
|
pd_series.h:3961 |
||
|
pd_series.h:4043 |
||
|
void |
pd_series.h:4192 |
|
|
pd_series.h:4248 |
||
|
pd_series.h:16123 |
||
|
pd_series.h:16166 |
||
|
void |
pd_series.h:11605 |
|
|
void |
pd_series.h:1234 |
|
|
Series<T> |
pd_series.h:4923 |
|
|
std::unique_ptr<NDFrameBase> |
pd_series.h:11170 |
|
|
std::optional<double> |
pd_series.h:12209 |
|
|
std::optional<double> |
pd_series.h:12283 |
|
|
const std::optional<std::shared_ptr<pandas::DatetimeArray>>& |
pd_series.h:1277 |
|
|
static std::string |
pd_series.h:6922 |
|
|
numpy::datetime64 |
pd_series.h:4312 |
|
|
DatetimeProperties<Series<T>> |
pd_series.h:10497 |
|
|
std::string |
pd_series.h:1133 |
|
|
std::string |
pd_series.h:1191 |
|
|
const std::optional<std::string>& |
pd_series.h:1167 |
|
|
pd_series.h:8181 |
||
|
pd_series.h:8200 |
||
|
pd_series.h:8209 |
||
|
bool |
pd_series.h:1097 |
|
|
std::pair<numpy::NDArray<numpy::int64>, Series<T>> |
pd_series.h:12669 |
|
|
std::ofstream |
pd_series.h:8442 |
|
|
std::ofstream |
pd_series.h:8503 |
|
|
std::ofstream |
pd_series.h:8535 |
|
|
Series<T> |
pd_series.h:12823 |
|
|
Series<T> |
pd_series.h:1706 |
|
|
Series<T> |
pd_series.h:1726 |
|
|
Series<numpy::float64> |
pd_series.h:4710 |
|
|
oss << fmt_cat(0) << sep << |
pd_series.h:10855 |
|
|
oss << fmt_cat(2) << sep << |
pd_series.h:10857 |
|
|
oss << fmt_cat(last_start) << sep << |
pd_series.h:10860 |
|
|
oss << |
pd_series.h:10861 |
|
|
oss << |
pd_series.h:10862 |
|
|
oss << |
pd_series.h:10866 |
|
|
std::string |
pd_series.h:1353 |
|
|
pd_series.h:13565 |
||
|
bool |
pd_series.h:11609 |
|
|
bool |
pd_series.h:1241 |
|
|
bool |
pd_series.h:1291 |
|
|
bool |
pd_series.h:1514 |
|
|
bool |
pd_series.h:6905 |
|
|
bool |
pd_series.h:2524 |
|
|
std::string |
pd_series.h:10851 |
|
|
const IndexBase& |
pd_series.h:1389 |
|
|
IndexBase& |
pd_series.h:1396 |
|
|
static std::string |
pd_series.h:681 |
|
|
void |
pd_series.h:12918 |
|
|
void |
pd_series.h:16024 |
|
|
T |
pd_series.h:13535 |
|
|
std::unique_ptr<IndexBase> |
pd_series.h:16047 |
|
|
pd_series.h:8605 |
||
|
size_t |
pd_series.h:13608 |
|
|
const std::optional<std::shared_ptr<pandas::MixedTzDatetimeArray>>& |
pd_series.h:1282 |
|
|
std::optional<std::string> |
pd_series.h:1308 |
|
|
const std::string& |
pd_series.h:1344 |
|
|
double |
pd_series.h:1342 |
|
|
double |
pd_series.h:1343 |
|
|
bool |
pd_series.h:1328 |
|
|
bool |
pd_series.h:1335 |
|
|
bool |
pd_series.h:1366 |
|
|
bool |
pd_series.h:1346 |
|
|
int64_t |
pd_series.h:1372 |
|
|
const std::string& |
pd_series.h:1373 |
|
|
const std::vector<std::string>& |
pd_series.h:1350 |
|
|
size_t |
pd_series.h:1118 |
|
|
size_t |
pd_series.h:1111 |
|
|
static std::optional<std::string> |
pd_series.h:4118 |
|
|
static std::optional<std::string> |
pd_series.h:4165 |
|
|
std::optional<std::string> |
pd_series.h:4183 |
|
|
std::optional<std::string> |
pd_series.h:4140 |
|
|
pd_series.h:3964 |
||
|
pd_series.h:3984 |
||
|
pd_series.h:4012 |
||
|
pd_series.h:4032 |
||
|
pd_series.h:4251 |
||
|
pd_series.h:4413 |
||
|
pd_series.h:4465 |
||
|
pd_series.h:4587 |
||
|
pd_series.h:4632 |
||
|
pd_series.h:4672 |
||
|
void |
pd_series.h:16056 |
|
|
void |
pd_series.h:16070 |
|
|
numpy::NDArray<T> |
pd_series.h:13974 |
|
|
Series<T> |
pd_series.h:15432 |
|
|
std::string |
pd_series.h:11133 |
|
|
size_t |
pd_series.h:6705 |
|
|
size_t |
pd_series.h:6716 |
|
|
size_t |
pd_series.h:1832 |
|
|
pd_series.h:1942 |
||
|
pd_series.h:1951 |
||
|
std::string |
pd_series.h:9454 |
|
|
std::string |
pd_series.h:9599 |
|
|
Series<numpy::float64> |
pd_series.h:4695 |
|
|
std::string |
pd_series.h:9117 |
|
|
std::string |
pd_series.h:9178 |
|
|
std::string |
pd_series.h:9233 |
|
|
std::string |
pd_series.h:9738 |
|
|
std::string |
pd_series.h:9778 |
|
|
std::string |
pd_series.h:14891 |
|
|
std::string |
pd_series.h:15230 |
|
|
std::string |
pd_series.h:15281 |
|
|
void |
pd_series.h:1255 |
|
|
void |
pd_series.h:1270 |
|
|
void |
pd_series.h:1267 |
|
|
void |
pd_series.h:1278 |
|
|
void |
pd_series.h:1211 |
|
|
void |
pd_series.h:10537 |
|
|
void |
pd_series.h:1285 |
|
|
void |
pd_series.h:1283 |
|
|
void |
pd_series.h:1501 |
|
|
void |
pd_series.h:1315 |
|
|
void |
pd_series.h:1337 |
|
|
void |
pd_series.h:1333 |
|
|
void |
pd_series.h:1336 |
|
|
void |
pd_series.h:1367 |
|
|
void |
pd_series.h:1347 |
|
|
void |
pd_series.h:1368 |
|
|
void |
pd_series.h:1348 |
|
|
void |
pd_series.h:1349 |
|
|
void |
pd_series.h:10534 |
|
|
std::vector<size_t> |
pd_series.h:1104 |
|
|
size_t |
pd_series.h:1090 |
|
|
propagate_index_to(result, |
pd_series.h:4046 |
|
|
propagate_index_to(result, |
pd_series.h:4058 |
|
|
propagate_index_to(result, |
pd_series.h:4078 |
|
|
propagate_index_to(result, |
pd_series.h:4092 |
|
|
propagate_index_to(result, |
pd_series.h:4109 |
|
|
propagate_index_to(result, |
pd_series.h:4282 |
|
|
propagate_index_to(result, |
pd_series.h:4318 |
|
|
propagate_index_to(result, |
pd_series.h:4364 |
|
|
propagate_index_to(res, |
pd_series.h:4604 |
|
|
propagate_index_to(res, |
pd_series.h:4649 |
|
|
propagate_index_to(res, |
pd_series.h:4684 |
|
|
propagate_index_to(res, |
pd_series.h:4703 |
|
|
propagate_index_to(res, |
pd_series.h:4912 |
|
|
propagate_index_to(q, |
pd_series.h:12371 |
|
|
propagate_index_to(r, |
pd_series.h:12372 |
|
|
propagate_index_to(q, |
pd_series.h:12400 |
|
|
propagate_index_to(r, |
pd_series.h:12401 |
|
|
propagate_index_to(result, |
pd_series.h:13800 |
|
|
propagate_index_to(res, |
pd_series.h:13849 |
|
|
propagate_index_to(res, |
pd_series.h:13889 |
|
|
propagate_index_to(res, |
pd_series.h:13915 |
|
|
propagate_index_to(res, |
pd_series.h:13936 |
|
|
propagate_index_to(q, |
pd_series.h:13966 |
|
|
propagate_index_to(r, |
pd_series.h:13967 |
|
|
SparseAccessor<Series<T>> |
pd_series.h:10546 |
|
|
StringMethods<Series<T>> |
pd_series.h:10442 |
|
|
pd_series.h:10681 |
||
|
pd_series.h:11057 |
||
|
Series<T> |
pd_series.h:6830 |
|
|
void |
pd_series.h:16039 |
|
|
std::unordered_set<T, std::hash<T>> |
pd_series.h:9048 |
|
|
const numpy::NDArray<T>& |
pd_series.h:1382 |
Code Examples#
The following examples are extracted from the test suite.
Series (pd_test_1_all.cpp:11161)
11151 pandas::Series<double> s({1.0, 2.0, 3.0, 4.0}, "values");
11152
11153 // Pipe applies function to entire Series
11154 auto add_mean = [](const pandas::Series<double>& ser, double offset) {
11155 auto mean_val = ser.mean();
11156 std::vector<double> result;
11157 for (size_t i = 0; i < ser.size(); ++i) {
11158 result.push_back(ser[i] + mean_val.value_or(0.0) + offset);
11159 }
11160 return pandas::Series<double>(result, ser.name());
11161 };
11162
11163 auto result = s.pipe(add_mean, 10.0);
11164
11165 bool passed = true;
11166 // mean is 2.5, offset is 10.0, so each value + 12.5
11167 std::vector<double> expected = {13.5, 14.5, 15.5, 16.5};
11168 for (size_t i = 0; i < result.size(); ++i) {
11169 if (!approx_equal(result[i], expected[i])) {
11170 passed = false;
Series (pd_test_1_all.cpp:11161)
11151 pandas::Series<double> s({1.0, 2.0, 3.0, 4.0}, "values");
11152
11153 // Pipe applies function to entire Series
11154 auto add_mean = [](const pandas::Series<double>& ser, double offset) {
11155 auto mean_val = ser.mean();
11156 std::vector<double> result;
11157 for (size_t i = 0; i < ser.size(); ++i) {
11158 result.push_back(ser[i] + mean_val.value_or(0.0) + offset);
11159 }
11160 return pandas::Series<double>(result, ser.name());
11161 };
11162
11163 auto result = s.pipe(add_mean, 10.0);
11164
11165 bool passed = true;
11166 // mean is 2.5, offset is 10.0, so each value + 12.5
11167 std::vector<double> expected = {13.5, 14.5, 15.5, 16.5};
11168 for (size_t i = 0; i < result.size(); ++i) {
11169 if (!approx_equal(result[i], expected[i])) {
11170 passed = false;
Series (pd_test_1_all.cpp:11161)
11151 pandas::Series<double> s({1.0, 2.0, 3.0, 4.0}, "values");
11152
11153 // Pipe applies function to entire Series
11154 auto add_mean = [](const pandas::Series<double>& ser, double offset) {
11155 auto mean_val = ser.mean();
11156 std::vector<double> result;
11157 for (size_t i = 0; i < ser.size(); ++i) {
11158 result.push_back(ser[i] + mean_val.value_or(0.0) + offset);
11159 }
11160 return pandas::Series<double>(result, ser.name());
11161 };
11162
11163 auto result = s.pipe(add_mean, 10.0);
11164
11165 bool passed = true;
11166 // mean is 2.5, offset is 10.0, so each value + 12.5
11167 std::vector<double> expected = {13.5, 14.5, 15.5, 16.5};
11168 for (size_t i = 0; i < result.size(); ++i) {
11169 if (!approx_equal(result[i], expected[i])) {
11170 passed = false;
Series (pd_test_1_all.cpp:11161)
11151 pandas::Series<double> s({1.0, 2.0, 3.0, 4.0}, "values");
11152
11153 // Pipe applies function to entire Series
11154 auto add_mean = [](const pandas::Series<double>& ser, double offset) {
11155 auto mean_val = ser.mean();
11156 std::vector<double> result;
11157 for (size_t i = 0; i < ser.size(); ++i) {
11158 result.push_back(ser[i] + mean_val.value_or(0.0) + offset);
11159 }
11160 return pandas::Series<double>(result, ser.name());
11161 };
11162
11163 auto result = s.pipe(add_mean, 10.0);
11164
11165 bool passed = true;
11166 // mean is 2.5, offset is 10.0, so each value + 12.5
11167 std::vector<double> expected = {13.5, 14.5, 15.5, 16.5};
11168 for (size_t i = 0; i < result.size(); ++i) {
11169 if (!approx_equal(result[i], expected[i])) {
11170 passed = false;
Series (pd_test_1_all.cpp:11161)
11151 pandas::Series<double> s({1.0, 2.0, 3.0, 4.0}, "values");
11152
11153 // Pipe applies function to entire Series
11154 auto add_mean = [](const pandas::Series<double>& ser, double offset) {
11155 auto mean_val = ser.mean();
11156 std::vector<double> result;
11157 for (size_t i = 0; i < ser.size(); ++i) {
11158 result.push_back(ser[i] + mean_val.value_or(0.0) + offset);
11159 }
11160 return pandas::Series<double>(result, ser.name());
11161 };
11162
11163 auto result = s.pipe(add_mean, 10.0);
11164
11165 bool passed = true;
11166 // mean is 2.5, offset is 10.0, so each value + 12.5
11167 std::vector<double> expected = {13.5, 14.5, 15.5, 16.5};
11168 for (size_t i = 0; i < result.size(); ++i) {
11169 if (!approx_equal(result[i], expected[i])) {
11170 passed = false;
Series (pd_test_1_all.cpp:11161)
11151 pandas::Series<double> s({1.0, 2.0, 3.0, 4.0}, "values");
11152
11153 // Pipe applies function to entire Series
11154 auto add_mean = [](const pandas::Series<double>& ser, double offset) {
11155 auto mean_val = ser.mean();
11156 std::vector<double> result;
11157 for (size_t i = 0; i < ser.size(); ++i) {
11158 result.push_back(ser[i] + mean_val.value_or(0.0) + offset);
11159 }
11160 return pandas::Series<double>(result, ser.name());
11161 };
11162
11163 auto result = s.pipe(add_mean, 10.0);
11164
11165 bool passed = true;
11166 // mean is 2.5, offset is 10.0, so each value + 12.5
11167 std::vector<double> expected = {13.5, 14.5, 15.5, 16.5};
11168 for (size_t i = 0; i < result.size(); ++i) {
11169 if (!approx_equal(result[i], expected[i])) {
11170 passed = false;
Series (pd_test_1_all.cpp:11161)
11151 pandas::Series<double> s({1.0, 2.0, 3.0, 4.0}, "values");
11152
11153 // Pipe applies function to entire Series
11154 auto add_mean = [](const pandas::Series<double>& ser, double offset) {
11155 auto mean_val = ser.mean();
11156 std::vector<double> result;
11157 for (size_t i = 0; i < ser.size(); ++i) {
11158 result.push_back(ser[i] + mean_val.value_or(0.0) + offset);
11159 }
11160 return pandas::Series<double>(result, ser.name());
11161 };
11162
11163 auto result = s.pipe(add_mean, 10.0);
11164
11165 bool passed = true;
11166 // mean is 2.5, offset is 10.0, so each value + 12.5
11167 std::vector<double> expected = {13.5, 14.5, 15.5, 16.5};
11168 for (size_t i = 0; i < result.size(); ++i) {
11169 if (!approx_equal(result[i], expected[i])) {
11170 passed = false;
at (pd_test_1_all.cpp:6581)
6571 // Test isna/notna with float data
6572 {
6573 std::map<std::string, std::vector<numpy::float64>> float_data;
6574 float_data["X"] = {1.0, std::nan(""), 3.0};
6575 float_data["Y"] = {4.0, 5.0, std::nan("")};
6576 pandas::DataFrame df_na(float_data);
6577
6578 auto na_mask = df_na.isna();
6579 // Row 1, col 0 (X) should be NA
6580 if (!na_mask.getElementAt({1, 0})) {
6581 std::cout << " [FAIL] : in pd_test_dataframe_manipulation() : isna at (1,0) should be true" << std::endl;
6582 throw std::runtime_error("pd_test_dataframe_manipulation failed: isna at (1,0)");
6583 }
6584 // Row 2, col 1 (Y) should be NA
6585 if (!na_mask.getElementAt({2, 1})) {
6586 std::cout << " [FAIL] : in pd_test_dataframe_manipulation() : isna at (2,1) should be true" << std::endl;
6587 throw std::runtime_error("pd_test_dataframe_manipulation failed: isna at (2,1)");
6588 }
6589 // Row 0, col 0 should NOT be NA
6590 if (na_mask.getElementAt({0, 0})) {
6591 std::cout << " [FAIL] : in pd_test_dataframe_manipulation() : isna at (0,0) should be false" << std::endl;
at_time (pd_test_2_all.cpp:728)
718 std::cout << "====================================== [OK] pd_test_asof test suite ========================== " << std::endl;
719 return 0;
720 }
721
722} // namespace dataframe_tests
723// ------------------- pd_test_asof.cpp (end) -----------------------------
724
725// ------------------- pd_test_at_time.cpp (start) -----------------------------
726// dataframe_tests/pd_test_at_time.cpp
727// Tests for DataFrame.at_time() method (pandas 2.0+ API)
728// Selects values at particular time of day from datetime-indexed DataFrame
729#include <iostream>
730#include <stdexcept>
731#include <vector>
732#include <string>
733#include <map>
734#include "../pandas/pd_dataframe.h"
735
736// CRITICAL: No using namespace directives
first (pd_test_1_all.cpp:11616)
11606 void pd_test_groupby_first_last() {
11607 std::cout << "========= GroupBy first/last ====================";
11608
11609 std::map<std::string, std::vector<double>> data = {
11610 {"category", {1.0, 1.0, 2.0, 2.0}},
11611 {"value", {10.0, 20.0, 30.0, 40.0}}
11612 };
11613 pandas::DataFrame df(data);
11614
11615 auto first_result = df.groupby("category").first();
11616 auto last_result = df.groupby("category").last();
11617
11618 // First for group 1: 10, group 2: 30
11619 // Last for group 1: 20, group 2: 40
11620 double first1 = std::stod(first_result["value"].get_value_str(0));
11621 double first2 = std::stod(first_result["value"].get_value_str(1));
11622
11623 bool passed = ((std::abs(first1 - 10.0) < 0.001 && std::abs(first2 - 30.0) < 0.001) ||
11624 (std::abs(first1 - 30.0) < 0.001 && std::abs(first2 - 10.0) < 0.001));
11625 if (!passed) {
first_valid_index (pd_test_1_all.cpp:20555)
20545 std::vector<double> values = {
20546 std::numeric_limits<double>::quiet_NaN(),
20547 std::numeric_limits<double>::quiet_NaN(),
20548 3.0,
20549 4.0,
20550 5.0
20551 };
20552 pandas::Series<double> s(values, "test");
20553
20554 auto first_idx = s.first_valid_index();
20555
20556 bool passed = first_idx.has_value() && first_idx.value() == 2;
20557
20558 if (!passed) {
20559 std::cout << " [FAIL] : in pd_test_timeseries_first_valid_index() : expected index 2" << std::endl;
20560 throw std::runtime_error("pd_test_timeseries_first_valid_index failed");
20561 }
20562
20563 std::cout << " -> tests passed" << std::endl;
20564 }
get (pd_test_1_all.cpp:10290)
10280void pd_test_extension_index_get_loc_unique() {
10281 std::cout << "========= get_loc (unique) =========================";
10282
10283 pandas::CategoricalArray arr({"apple", "banana", "cherry"});
10284 pandas::CategoricalIndex idx(arr);
10285
10286 auto loc_apple = idx.get_loc("apple");
10287 auto loc_banana = idx.get_loc("banana");
10288 auto loc_cherry = idx.get_loc("cherry");
10289
10290 bool passed = (std::holds_alternative<size_t>(loc_apple) && std::get<size_t>(loc_apple) == 0 &&
10291 std::get<size_t>(loc_banana) == 1 &&
10292 std::get<size_t>(loc_cherry) == 2);
10293 if (!passed) {
10294 std::cout << " [FAIL] : in pd_test_extension_index_get_loc_unique() : get_loc check failed" << std::endl;
10295 throw std::runtime_error("pd_test_extension_index_get_loc_unique failed");
10296 }
10297
10298 std::cout << " -> tests passed" << std::endl;
10299}
get_by_label_duplicates (pd_test_3_all.cpp:28133)
28123 if (fail) throw std::runtime_error("pd_test_getitem_timedelta_str_not_found failed");
28124}
28125
28126void pd_test_getitem_duplicate_labels() {
28127 std::cout << " -- pd_test_getitem_duplicate_labels --" << std::endl;
28128 int fail = 0;
28129 std::vector<numpy::float64> vals{1.0, 2.0, 3.0, 4.0};
28130 pandas::Series<numpy::float64> s(vals, "x");
28131 pandas::Index<std::string> idx(std::vector<std::string>{"a", "x", "x", "x"});
28132 s.set_index(idx);
28133 auto lookup = s.get_by_label_duplicates("x");
28134 if (lookup.count != 3) { std::cout << " FAIL: expected count=3, got " << lookup.count << std::endl; fail++; }
28135 if (!lookup.sub_series.has_value()) { std::cout << " FAIL: sub_series missing" << std::endl; fail++; }
28136 else if (lookup.sub_series->size() != 3) { std::cout << " FAIL: sub size " << lookup.sub_series->size() << std::endl; fail++; }
28137 auto lookup_one = s.get_by_label_duplicates("a");
28138 if (lookup_one.count != 1) { std::cout << " FAIL: expected count=1" << std::endl; fail++; }
28139 if (lookup_one.single_index != 0) { std::cout << " FAIL: expected single_index=0" << std::endl; fail++; }
28140 auto lookup_zero = s.get_by_label_duplicates("z");
28141 if (lookup_zero.count != 0) { std::cout << " FAIL: expected count=0" << std::endl; fail++; }
28142 if (fail == 0) std::cout << " OK" << std::endl;
28143 if (fail) throw std::runtime_error("pd_test_getitem_duplicate_labels failed");
get_cat_categories (pd_test_2_all.cpp:20374)
20364 auto cs = std::make_unique<pandas::Series<std::string>>(svals, "cat");
20365 cs->set_dtype_override("category");
20366 cs->set_cat_categories({"a", "b", "c"});
20367 cs->set_cat_ordered(true);
20368 df.insert(0, "cat", std::move(cs), true);
20369
20370 auto s = df.get_column_as_string_series("cat");
20371 check(s.dtype_name() == "category", "cat dtype");
20372 check(s.has_cat_categories(), "cat has_categories");
20373 check(s.cat_ordered() == true, "cat ordered");
20374 auto cats = s.get_cat_categories();
20375 check(cats.size() == 3, "cat categories size");
20376 std::set<std::string> cat_set(cats.begin(), cats.end());
20377 check(cat_set.count("a") && cat_set.count("b") && cat_set.count("c"), "cat categories content");
20378}
20379
20380void pd_test_getitem_dispatch_index_propagation() {
20381 std::cout << "pd_test_getitem_dispatch_index_propagation" << std::endl;
20382
20383 // Test DatetimeIndex freq propagation
20384 pandas::DataFrame df;
get_freq (pd_test_2_all.cpp:20397)
20387 std::vector<numpy::datetime64> ts = {
20388 numpy::datetime64(0LL, numpy::DateTimeUnit::Day),
20389 numpy::datetime64(1LL, numpy::DateTimeUnit::Day),
20390 numpy::datetime64(2LL, numpy::DateTimeUnit::Day)
20391 };
20392 auto dt_idx = std::make_unique<pandas::DatetimeIndex>(ts);
20393 dt_idx->set_freq(std::string("D"));
20394 df.set_index(std::move(dt_idx));
20395
20396 auto s = df.extract_column_as_numeric_series("val");
20397 check(s.get_freq().has_value(), "freq propagated");
20398 if (s.get_freq().has_value()) {
20399 check(s.get_freq().value() == "D", "freq value D");
20400 }
20401
20402 // Test MultiIndex propagation
20403 pandas::DataFrame df2;
20404 std::vector<numpy::float64> vals2 = {10.0, 20.0};
20405 df2.insert(0, "A", std::make_unique<pandas::Series<numpy::float64>>(vals2, "A"), true);
20406 std::vector<std::vector<std::string>> arrays = {{"x", "y"}, {"1", "2"}};
20407 std::vector<std::optional<std::string>> names = {std::string("first"), std::string("second")};
get_optional (pd_test_1_all.cpp:6741)
6731 throw std::runtime_error("pd_test_dataframe_index_ops failed: reindex cols");
6732 }
6733 }
6734
6735 // Test get_optional
6736 {
6737 std::map<std::string, std::vector<int>> data;
6738 data["A"] = {1, 2, 3};
6739 pandas::DataFrame df(data);
6740
6741 auto col_opt = df.get_optional("A");
6742 if (!col_opt.has_value()) {
6743 std::cout << " [FAIL] : in pd_test_dataframe_index_ops() : get_optional A should exist" << std::endl;
6744 throw std::runtime_error("pd_test_dataframe_index_ops failed: get_optional A");
6745 }
6746
6747 auto missing = df.get_optional("Z");
6748 if (missing.has_value()) {
6749 std::cout << " [FAIL] : in pd_test_dataframe_index_ops() : get_optional Z should not exist" << std::endl;
6750 throw std::runtime_error("pd_test_dataframe_index_ops failed: get_optional Z");
6751 }
get_slice_as_series (pd_test_3_all.cpp:28169)
28159 }
28160 if (fail == 0) std::cout << " OK" << std::endl;
28161 if (fail) throw std::runtime_error("pd_test_getitem_duplicate_preserves_dtype failed");
28162}
28163
28164void pd_test_getitem_period_sub_series() {
28165 std::cout << " -- pd_test_getitem_period_sub_series --" << std::endl;
28166 int fail = 0;
28167 std::vector<numpy::float64> vals{10.0, 20.0, 30.0, 40.0, 50.0};
28168 pandas::Series<numpy::float64> s(vals, "v");
28169 auto sub = s.get_slice_as_series(1, 4);
28170 if (sub.size() != 3) { std::cout << " FAIL: size " << sub.size() << std::endl; fail++; }
28171 if (sub[0] != 20.0) { std::cout << " FAIL: [0] " << sub[0] << std::endl; fail++; }
28172 if (sub[2] != 40.0) { std::cout << " FAIL: [2] " << sub[2] << std::endl; fail++; }
28173 if (fail == 0) std::cout << " OK" << std::endl;
28174 if (fail) throw std::runtime_error("pd_test_getitem_period_sub_series failed");
28175}
28176
28177int pd_test_getitem_edge_main_impl() {
28178 std::cout << "====================================== pd_test_getitem_edge ==============================" << std::endl;
28179 int fail = 0;
get_string (pd_test_3_all.cpp:27746)
27736 }
27737 }
27738
27739 pandas::Series<numpy::int64> si({10, 20, 30}, "ints");
27740 auto result2 = si.astype("str");
27741 auto* str_s2 = dynamic_cast<pandas::Series<std::string>*>(result2.get());
27742 if (!str_s2) {
27743 std::cout << " FAIL: expected Series<string> from int" << std::endl;
27744 fail++;
27745 } else {
27746 if (str_s2->get_string(0) != "10") {
27747 std::cout << " FAIL: expected '10', got '" << str_s2->get_string(0) << "'" << std::endl;
27748 fail++;
27749 }
27750 }
27751
27752 if (fail == 0) std::cout << " OK" << std::endl;
27753}
27754
27755void pd_test_astype_datetime_to_string() {
27756 std::cout << " -- pd_test_astype_datetime_to_string --" << std::endl;
get_value_bool (pd_test_5_all.cpp:35197)
35187 df.add_column_nullable<bool>("X", {true, pandas::NA_BOOL, false});
35188 pandas_tests::check(df["X"].get_value_double(0) == 1.0, "case_2.idx0_one", local_fail);
35189 pandas_tests::check(std::isnan(df["X"].get_value_double(1)),
35190 "case_2.idx1_nan", local_fail);
35191 pandas_tests::check(df["X"].get_value_double(2) == 0.0, "case_2.idx2_zero", local_fail);
35192}
35193
35194void bool_nullable_826495_case_3_get_value_bool_mask_aware(int& local_fail) {
35195 pandas::DataFrame df;
35196 df.add_column_nullable<bool>("X", {true, pandas::NA_BOOL, false});
35197 pandas_tests::check(df["X"].get_value_bool(0) == true, "case_3.idx0_true", local_fail);
35198 pandas_tests::check(df["X"].get_value_bool(1) == false, "case_3.idx1_NA_false", local_fail);
35199 pandas_tests::check(df["X"].get_value_bool(2) == false, "case_3.idx2_false", local_fail);
35200}
35201
35202void bool_nullable_826495_case_4_is_na_at_mask_aware(int& local_fail) {
35203 pandas::DataFrame df;
35204 df.add_column_nullable<bool>("X", {true, pandas::NA_BOOL, false});
35205 pandas_tests::check(df["X"].is_na_at(0) == false, "case_4.idx0_not_na", local_fail);
35206 pandas_tests::check(df["X"].is_na_at(1) == true, "case_4.idx1_is_na", local_fail);
35207 pandas_tests::check(df["X"].is_na_at(2) == false, "case_4.idx2_not_na", local_fail);
get_value_double (pd_test_2_all.cpp:19160)
19150 std::map<std::string, std::string> col_funcs;
19151 col_funcs["a"] = "sum";
19152 col_funcs["b"] = "mean";
19153
19154 pandas::Series<numpy::float64> result = df.agg_to_series(col_funcs);
19155
19156 // a.sum() = 10.0, b.mean() = 25.0
19157 check(result.size() == 2, "result_size_2");
19158
19159 // std::map iterates in alphabetical order: a, b
19160 check(std::abs(result.get_value_double(0) - 10.0) < 1e-9, "a_sum_10");
19161 check(std::abs(result.get_value_double(1) - 25.0) < 1e-9, "b_mean_25");
19162
19163 // Check index labels
19164 check(result.index().get_value_str(0) == "a", "index_0_a");
19165 check(result.index().get_value_str(1) == "b", "index_1_b");
19166}
19167
19168void pd_test_agg_dispatch_dict_simple_single_col() {
19169 std::cout << " -- pd_test_agg_dispatch_dict_simple_single_col --" << std::endl;
get_value_str (pd_test_1_all.cpp:4665)
4655 auto corr_df = df.corr();
4656
4657 // Check dimensions
4658 bool passed = corr_df.nrows() == 2 && corr_df.ncols() == 2;
4659 if (!passed) {
4660 std::cout << " [FAIL] : in pd_test_aggregation_dataframe_corr() : corr should be 2x2" << std::endl;
4661 throw std::runtime_error("pd_test_aggregation_dataframe_corr failed: corr should be 2x2");
4662 }
4663
4664 // Diagonal should be 1.0
4665 std::string aa = corr_df["A"].get_value_str(0);
4666 passed = std::abs(std::stod(aa) - 1.0) < 0.001;
4667 if (!passed) {
4668 std::cout << " [FAIL] : in pd_test_aggregation_dataframe_corr() : diagonal should be 1.0" << std::endl;
4669 throw std::runtime_error("pd_test_aggregation_dataframe_corr failed: diagonal should be 1.0");
4670 }
4671
4672 // A-B correlation should be 1.0 (perfect correlation)
4673 std::string ab = corr_df["B"].get_value_str(0);
4674 passed = std::abs(std::stod(ab) - 1.0) < 0.001;
4675 if (!passed) {
get_value_str (pd_test_1_all.cpp:4665)
4655 auto corr_df = df.corr();
4656
4657 // Check dimensions
4658 bool passed = corr_df.nrows() == 2 && corr_df.ncols() == 2;
4659 if (!passed) {
4660 std::cout << " [FAIL] : in pd_test_aggregation_dataframe_corr() : corr should be 2x2" << std::endl;
4661 throw std::runtime_error("pd_test_aggregation_dataframe_corr failed: corr should be 2x2");
4662 }
4663
4664 // Diagonal should be 1.0
4665 std::string aa = corr_df["A"].get_value_str(0);
4666 passed = std::abs(std::stod(aa) - 1.0) < 0.001;
4667 if (!passed) {
4668 std::cout << " [FAIL] : in pd_test_aggregation_dataframe_corr() : diagonal should be 1.0" << std::endl;
4669 throw std::runtime_error("pd_test_aggregation_dataframe_corr failed: diagonal should be 1.0");
4670 }
4671
4672 // A-B correlation should be 1.0 (perfect correlation)
4673 std::string ab = corr_df["B"].get_value_str(0);
4674 passed = std::abs(std::stod(ab) - 1.0) < 0.001;
4675 if (!passed) {
head (pd_test_1_all.cpp:6301)
6291 void pd_test_dataframe_indexing() {
6292 std::cout << "========= indexing (loc/iloc) ==============";
6293
6294 std::map<std::string, std::vector<numpy::float64>> data;
6295 data["A"] = {10.0, 20.0, 30.0, 40.0, 50.0};
6296 data["B"] = {1.0, 2.0, 3.0, 4.0, 5.0};
6297
6298 pandas::DataFrame df(data);
6299
6300 // Test head
6301 auto head_df = df.head(3);
6302 if (head_df.nrows() != 3) {
6303 std::cout << " [FAIL] : in pd_test_dataframe_indexing() : head(3) nrows != 3" << std::endl;
6304 throw std::runtime_error("pd_test_dataframe_indexing failed: head(3) nrows != 3");
6305 }
6306
6307 // Test tail
6308 auto tail_df = df.tail(2);
6309 if (tail_df.nrows() != 2) {
6310 std::cout << " [FAIL] : in pd_test_dataframe_indexing() : tail(2) nrows != 2" << std::endl;
6311 throw std::runtime_error("pd_test_dataframe_indexing failed: tail(2) nrows != 2");
iat (pd_test_1_all.cpp:22028)
22018 pandas::DataFrame result = df.where(cond, -1.0);
22019
22020 // Get column index for A - it's sorted alphabetically in std::map
22021 size_t col_a_idx = df.get_column_index("A");
22022 size_t col_b_idx = df.get_column_index("B");
22023
22024 bool passed = true;
22025 std::string error_msg;
22026
22027 // Check A column values
22028 std::string a0 = result.iat<double>(0, col_a_idx) == -1.0 ? "ok" : "fail";
22029 std::string a1 = result.iat<double>(1, col_a_idx) == -1.0 ? "ok" : "fail";
22030 std::string a2 = result.iat<double>(2, col_a_idx) == 3.0 ? "ok" : "fail";
22031 std::string a3 = result.iat<double>(3, col_a_idx) == 4.0 ? "ok" : "fail";
22032
22033 if (a0 != "ok" || a1 != "ok" || a2 != "ok" || a3 != "ok") {
22034 passed = false;
22035 error_msg = "Column A values incorrect: A[0]=" + a0 + ", A[1]=" + a1 +
22036 ", A[2]=" + a2 + ", A[3]=" + a3;
22037 }
iat_resolved (pd_test_5_all.cpp:92945)
92935 pandas_tests::check(result.iat(0) == 1.0,
92936 "f_test_25_loc_list_positions_happy_553011.iat0_eq_1", local_fail);
92937 pandas_tests::check(result.iat(2) == 5.0,
92938 "f_test_25_loc_list_positions_happy_553011.iat2_eq_5", local_fail);
92939}
92940
92941void case_23_iat_resolved_positive() {
92942 std::cout << "-- case_23_iat_resolved_positive()\n";
92943 int local_fail = 0;
92944 auto s = make_f64_series_5();
92945 pandas_tests::check(s.iat_resolved(0) == 1.0,
92946 "f_test_25_iat_resolved_positive_446201.idx0_eq_1", local_fail);
92947 pandas_tests::check(s.iat_resolved(2) == 3.0,
92948 "f_test_25_iat_resolved_positive_446201.idx2_eq_3", local_fail);
92949 pandas_tests::check(s.iat_resolved(4) == 5.0,
92950 "f_test_25_iat_resolved_positive_446201.idx4_eq_5", local_fail);
92951}
92952
92953void case_24_iat_resolved_negative_one() {
92954 std::cout << "-- case_24_iat_resolved_negative_one()\n";
92955 int local_fail = 0;
idxmax (pd_test_1_all.cpp:23956)
23946 std::cout << "====================================== [OK] pd_test_ffill_bfill test suite ========================== " << std::endl;
23947 return 0;
23948 }
23949
23950} // namespace dataframe_tests
23951// ------------------- pd_test_ffill_bfill.cpp (end) -----------------------------
23952
23953// ------------------- pd_test_idxmax_idxmin.cpp (start) -----------------------------
23954// dataframe_tests/pd_test_idxmax_idxmin.cpp
23955// Test for DataFrame.idxmax() and idxmin() methods
23956
23957#include <iostream>
23958#include <stdexcept>
23959#include <cmath>
23960#include <limits>
23961#include "../pandas/pd_dataframe.h"
23962
23963// CRITICAL: No using namespace directives
23964
23965namespace dataframe_tests {
idxmax_typed (pd_test_3_all.cpp:26151)
26141// ------------------- pd_test_where_series (end) ---------------------
26142
26143// ------------------- pd_test_idxmax_min_typed (begin) ---------------
26144#include "../pandas/pd_datetime_index.h"
26145
26146namespace dataframe_tests_idxmax_min_typed {
26147
26148void pd_test_idxmax_min_typed_regular_max() {
26149 std::cout << " pd_test_idxmax_min_typed_regular_max: ";
26150 ::pandas::Series<::numpy::float64> s({1.0, 3.0, 2.0, 5.0, 4.0});
26151 auto [is_dt, data] = s.idxmax_typed();
26152 if (is_dt) throw std::runtime_error("Expected non-datetime result");
26153 if (data.second != "3") throw std::runtime_error("Expected label '3', got '" + data.second + "'");
26154 std::cout << "PASSED" << std::endl;
26155}
26156
26157void pd_test_idxmax_min_typed_regular_min() {
26158 std::cout << " pd_test_idxmax_min_typed_regular_min: ";
26159 ::pandas::Series<::numpy::float64> s({3.0, 1.0, 2.0, 5.0, 4.0});
26160 auto [is_dt, data] = s.idxmin_typed();
26161 if (is_dt) throw std::runtime_error("Expected non-datetime result");
idxmin (pd_test_1_all.cpp:23956)
23946 std::cout << "====================================== [OK] pd_test_ffill_bfill test suite ========================== " << std::endl;
23947 return 0;
23948 }
23949
23950} // namespace dataframe_tests
23951// ------------------- pd_test_ffill_bfill.cpp (end) -----------------------------
23952
23953// ------------------- pd_test_idxmax_idxmin.cpp (start) -----------------------------
23954// dataframe_tests/pd_test_idxmax_idxmin.cpp
23955// Test for DataFrame.idxmax() and idxmin() methods
23956
23957#include <iostream>
23958#include <stdexcept>
23959#include <cmath>
23960#include <limits>
23961#include "../pandas/pd_dataframe.h"
23962
23963// CRITICAL: No using namespace directives
23964
23965namespace dataframe_tests {
idxmin_typed (pd_test_3_all.cpp:26160)
26150 ::pandas::Series<::numpy::float64> s({1.0, 3.0, 2.0, 5.0, 4.0});
26151 auto [is_dt, data] = s.idxmax_typed();
26152 if (is_dt) throw std::runtime_error("Expected non-datetime result");
26153 if (data.second != "3") throw std::runtime_error("Expected label '3', got '" + data.second + "'");
26154 std::cout << "PASSED" << std::endl;
26155}
26156
26157void pd_test_idxmax_min_typed_regular_min() {
26158 std::cout << " pd_test_idxmax_min_typed_regular_min: ";
26159 ::pandas::Series<::numpy::float64> s({3.0, 1.0, 2.0, 5.0, 4.0});
26160 auto [is_dt, data] = s.idxmin_typed();
26161 if (is_dt) throw std::runtime_error("Expected non-datetime result");
26162 if (data.second != "1") throw std::runtime_error("Expected label '1', got '" + data.second + "'");
26163 std::cout << "PASSED" << std::endl;
26164}
26165
26166void pd_test_idxmax_min_typed_datetime_max() {
26167 std::cout << " pd_test_idxmax_min_typed_datetime_max: ";
26168 // Create DatetimeIndex with 3 timestamps, values [1,3,2] => max at pos 1
26169 ::pandas::Series<::numpy::float64> s({1.0, 3.0, 2.0});
26170 std::vector<::numpy::datetime64> dates;
iloc (pd_test_1_all.cpp:19149)
19139 pandas::Series<int> s({10, 20, 30, 40, 50});
19140
19141 // Positional indexing
19142 bool passed = s[0] == 10 && s[4] == 50 && s.at(2) == 30;
19143 if (!passed) {
19144 std::cout << " [FAIL] : in pd_test_series_indexing() : positional indexing failed" << std::endl;
19145 throw std::runtime_error("pd_test_series_indexing failed: positional indexing");
19146 }
19147
19148 // iloc slice
19149 auto slice = s.iloc(1, 4);
19150 passed = slice.size() == 3 && slice[0] == 20 && slice[2] == 40;
19151 if (!passed) {
19152 std::cout << " [FAIL] : in pd_test_series_indexing() : iloc slice failed" << std::endl;
19153 throw std::runtime_error("pd_test_series_indexing failed: iloc slice");
19154 }
19155
19156 // iloc with indices
19157 auto selected = s.iloc({0, 2, 4});
19158 passed = selected.size() == 3 && selected[0] == 10 && selected[1] == 30 && selected[2] == 50;
19159 if (!passed) {
iloc (pd_test_1_all.cpp:19149)
19139 pandas::Series<int> s({10, 20, 30, 40, 50});
19140
19141 // Positional indexing
19142 bool passed = s[0] == 10 && s[4] == 50 && s.at(2) == 30;
19143 if (!passed) {
19144 std::cout << " [FAIL] : in pd_test_series_indexing() : positional indexing failed" << std::endl;
19145 throw std::runtime_error("pd_test_series_indexing failed: positional indexing");
19146 }
19147
19148 // iloc slice
19149 auto slice = s.iloc(1, 4);
19150 passed = slice.size() == 3 && slice[0] == 20 && slice[2] == 40;
19151 if (!passed) {
19152 std::cout << " [FAIL] : in pd_test_series_indexing() : iloc slice failed" << std::endl;
19153 throw std::runtime_error("pd_test_series_indexing failed: iloc slice");
19154 }
19155
19156 // iloc with indices
19157 auto selected = s.iloc({0, 2, 4});
19158 passed = selected.size() == 3 && selected[0] == 10 && selected[1] == 30 && selected[2] == 50;
19159 if (!passed) {
last (pd_test_1_all.cpp:11617)
11607 void pd_test_groupby_first_last() {
11608 std::cout << "========= GroupBy first/last ====================";
11609
11610 std::map<std::string, std::vector<double>> data = {
11611 {"category", {1.0, 1.0, 2.0, 2.0}},
11612 {"value", {10.0, 20.0, 30.0, 40.0}}
11613 };
11614 pandas::DataFrame df(data);
11615
11616 auto first_result = df.groupby("category").first();
11617 auto last_result = df.groupby("category").last();
11618
11619 // First for group 1: 10, group 2: 30
11620 // Last for group 1: 20, group 2: 40
11621 double first1 = std::stod(first_result["value"].get_value_str(0));
11622 double first2 = std::stod(first_result["value"].get_value_str(1));
11623
11624 bool passed = ((std::abs(first1 - 10.0) < 0.001 && std::abs(first2 - 30.0) < 0.001) ||
11625 (std::abs(first1 - 30.0) < 0.001 && std::abs(first2 - 10.0) < 0.001));
11626 if (!passed) {
11627 std::cout << " [FAIL] : in pd_test_groupby_first_last() : first values incorrect" << std::endl;
last_valid_index (pd_test_1_all.cpp:20579)
20569 std::vector<double> values = {
20570 1.0,
20571 2.0,
20572 3.0,
20573 std::numeric_limits<double>::quiet_NaN(),
20574 std::numeric_limits<double>::quiet_NaN()
20575 };
20576 pandas::Series<double> s(values, "test");
20577
20578 auto last_idx = s.last_valid_index();
20579
20580 bool passed = last_idx.has_value() && last_idx.value() == 2;
20581
20582 if (!passed) {
20583 std::cout << " [FAIL] : in pd_test_timeseries_last_valid_index() : expected index 2" << std::endl;
20584 throw std::runtime_error("pd_test_timeseries_last_valid_index failed");
20585 }
20586
20587 std::cout << " -> tests passed" << std::endl;
20588 }
loc (pd_test_3_all.cpp:10916)
10906 {{"A","A","B"}, {"x","y","x"}});
10907 df.set_index(mi);
10908 if (!df.has_multiindex() || df.multiindex().nlevels() != 2) {
10909 std::cout << " [FAIL] : in pd_test_3_all_set_index_multiindex()" << std::endl;
10910 throw std::runtime_error("set_index MultiIndex failed");
10911 }
10912 std::cout << " -> tests passed" << std::endl;
10913}
10914
10915void pd_test_3_all_loc_single_arg() {
10916 std::cout << "========= Fix 3: loc(string) MultiIndex =================";
10917 pandas::DataFrame df;
10918 df.add_column<int64_t>("val", {10, 20, 30, 40});
10919 auto mi = pandas::MultiIndex::from_arrays<std::string>(
10920 {{"London","London","Paris","Paris"}, {"2020","2021","2020","2021"}});
10921 df.set_multiindex(mi);
10922 pandas::DataFrame result = df.loc("London");
10923 if (result.nrows() != 2) {
10924 std::cout << " [FAIL] : in pd_test_3_all_loc_single_arg() : expected 2 rows" << std::endl;
10925 throw std::runtime_error("loc single-arg failed");
10926 }
loc (pd_test_3_all.cpp:10916)
10906 {{"A","A","B"}, {"x","y","x"}});
10907 df.set_index(mi);
10908 if (!df.has_multiindex() || df.multiindex().nlevels() != 2) {
10909 std::cout << " [FAIL] : in pd_test_3_all_set_index_multiindex()" << std::endl;
10910 throw std::runtime_error("set_index MultiIndex failed");
10911 }
10912 std::cout << " -> tests passed" << std::endl;
10913}
10914
10915void pd_test_3_all_loc_single_arg() {
10916 std::cout << "========= Fix 3: loc(string) MultiIndex =================";
10917 pandas::DataFrame df;
10918 df.add_column<int64_t>("val", {10, 20, 30, 40});
10919 auto mi = pandas::MultiIndex::from_arrays<std::string>(
10920 {{"London","London","Paris","Paris"}, {"2020","2021","2020","2021"}});
10921 df.set_multiindex(mi);
10922 pandas::DataFrame result = df.loc("London");
10923 if (result.nrows() != 2) {
10924 std::cout << " [FAIL] : in pd_test_3_all_loc_single_arg() : expected 2 rows" << std::endl;
10925 throw std::runtime_error("loc single-arg failed");
10926 }
loc_slice (pd_test_5_all.cpp:92767)
92757void case_13_loc_slice_none_bounds() {
92758 // start=None, stop=None, step=+1: full copy.
92759 std::cout << "-- case_13_loc_slice_none_bounds()\n";
92760 int local_fail = 0;
92761 auto s = make_f64_series_5();
92762 pandas::SliceSpec spec;
92763 spec.start = std::monostate{};
92764 spec.stop = std::monostate{};
92765 spec.step = 1;
92766 auto result = s.loc_slice(spec);
92767 pandas_tests::check(result.size() == 5,
92768 "f_test_25_loc_slice_none_bounds_310099.size_eq_5", local_fail);
92769 pandas_tests::check(result.iat(0) == 1.0,
92770 "f_test_25_loc_slice_none_bounds_310099.iat0_eq_1", local_fail);
92771 pandas_tests::check(result.iat(4) == 5.0,
92772 "f_test_25_loc_slice_none_bounds_310099.iat4_eq_5", local_fail);
92773}
92774
92775void case_14_loc_slice_int_bounds() {
92776 // start=int(1), stop=int(4), step=+1: positions [1,4) -> 3 elements.
loc_string (pd_test_5_all.cpp:90637)
90627 throw std::runtime_error(case_label + " failed");
90628 }
90629 std::cout << " " << case_label << " -> OK\n";
90630}
90631
90632void case_1_plain_string_index_exact() {
90633 std::cout << "-- case_1_plain_string_index_exact\n";
90634 auto s = make_float_series_with_str_index({10.0, 20.0, 30.0},
90635 {"a", "b", "c"});
90636 int local_fail = 0;
90637 pandas::StringLookupResult r = s.loc_string("b");
90638 pandas_tests::check(r.kind == pandas::StringLookupResult::Kind::SCALAR,
90639 "case_1.kind_is_SCALAR", local_fail);
90640 pandas_tests::check(r.scalar_index == 1,
90641 "case_1.scalar_index_eq_1", local_fail);
90642 pandas_tests::check(r.range == std::make_pair<size_t, size_t>(0, 0),
90643 "case_1.range_unused_default", local_fail);
90644 finish_case("case_1_plain_string_index_exact", local_fail);
90645}
90646
90647void case_2_plain_string_index_miss() {
loc_timedelta (pd_test_5_all.cpp:92739)
92729 "f_test_25_loc_timestamp_edge_first_996311.kind_scalar", local_fail);
92730 pandas_tests::check(r.scalar == 10.0,
92731 "f_test_25_loc_timestamp_edge_first_996311.scalar_eq_10", local_fail);
92732}
92733
92734void case_11_loc_timedelta_exact_hit() {
92735 std::cout << "-- case_11_loc_timedelta_exact_hit()\n";
92736 int local_fail = 0;
92737 auto s = make_tdi_f64_series_3();
92738 pandas::Timedelta key(2, 0); // 2 days
92739 auto r = s.loc_timedelta(key);
92740 pandas_tests::check(
92741 r.kind == pandas::LookupResult<numpy::float64>::Kind::SCALAR,
92742 "f_test_25_loc_timedelta_exact_hit_421003.kind_scalar", local_fail);
92743 pandas_tests::check(r.scalar == 200.0,
92744 "f_test_25_loc_timedelta_exact_hit_421003.scalar_eq_200", local_fail);
92745}
92746
92747void case_12_loc_timedelta_not_found() {
92748 std::cout << "-- case_12_loc_timedelta_not_found()\n";
92749 int local_fail = 0;
loc_timestamp (pd_test_5_all.cpp:92701)
92691 pandas_tests::check(threw,
92692 "f_test_25_filter_bool_series_mismatch_throws_172339.value_error",
92693 local_fail);
92694}
92695
92696void case_8_loc_timestamp_exact_hit() {
92697 std::cout << "-- case_8_loc_timestamp_exact_hit()\n";
92698 int local_fail = 0;
92699 auto s = make_dti_f64_series_3();
92700 pandas::Timestamp key(2024, 1, 2, 0, 0, 0);
92701 auto r = s.loc_timestamp(key);
92702 pandas_tests::check(
92703 r.kind == pandas::LookupResult<numpy::float64>::Kind::SCALAR,
92704 "f_test_25_loc_timestamp_exact_hit_318227.kind_scalar", local_fail);
92705 pandas_tests::check(r.scalar == 20.0,
92706 "f_test_25_loc_timestamp_exact_hit_318227.scalar_eq_20", local_fail);
92707}
92708
92709void case_9_loc_timestamp_not_found() {
92710 std::cout << "-- case_9_loc_timestamp_not_found()\n";
92711 int local_fail = 0;
mask (pd_test_1_all.cpp:9119)
9109void pd_test_datetime_mixin_array_constructor() {
9110 std::cout << "========= DatetimeTDMixin array constructor =========================";
9111
9112 // Create DatetimeArray with some values
9113 numpy::NDArray<numpy::datetime64> data(std::vector<size_t>{3});
9114 data.setElementAt({0}, numpy::datetime64(1000000000000000000LL, numpy::DateTimeUnit::Nanosecond)); // ~2001
9115 data.setElementAt({1}, numpy::datetime64(1500000000000000000LL, numpy::DateTimeUnit::Nanosecond)); // ~2017
9116 data.setElementAt({2}, numpy::datetime64(1600000000000000000LL, numpy::DateTimeUnit::Nanosecond)); // ~2020
9117
9118 numpy::NDArray<numpy::bool_> mask(std::vector<size_t>{3});
9119 mask.setElementAt({0}, numpy::bool_(false));
9120 mask.setElementAt({1}, numpy::bool_(false));
9121 mask.setElementAt({2}, numpy::bool_(false));
9122
9123 pandas::DatetimeArray arr(data, mask);
9124 pandas::DatetimeTDMixin idx(arr, "timestamps");
9125
9126 bool passed = (idx.size() == 3 && !idx.empty() &&
9127 idx.name().has_value() && *idx.name() == "timestamps" &&
9128 idx.inferred_type() == "datetime");
mask (pd_test_1_all.cpp:9119)
9109void pd_test_datetime_mixin_array_constructor() {
9110 std::cout << "========= DatetimeTDMixin array constructor =========================";
9111
9112 // Create DatetimeArray with some values
9113 numpy::NDArray<numpy::datetime64> data(std::vector<size_t>{3});
9114 data.setElementAt({0}, numpy::datetime64(1000000000000000000LL, numpy::DateTimeUnit::Nanosecond)); // ~2001
9115 data.setElementAt({1}, numpy::datetime64(1500000000000000000LL, numpy::DateTimeUnit::Nanosecond)); // ~2017
9116 data.setElementAt({2}, numpy::datetime64(1600000000000000000LL, numpy::DateTimeUnit::Nanosecond)); // ~2020
9117
9118 numpy::NDArray<numpy::bool_> mask(std::vector<size_t>{3});
9119 mask.setElementAt({0}, numpy::bool_(false));
9120 mask.setElementAt({1}, numpy::bool_(false));
9121 mask.setElementAt({2}, numpy::bool_(false));
9122
9123 pandas::DatetimeArray arr(data, mask);
9124 pandas::DatetimeTDMixin idx(arr, "timestamps");
9125
9126 bool passed = (idx.size() == 3 && !idx.empty() &&
9127 idx.name().has_value() && *idx.name() == "timestamps" &&
9128 idx.inferred_type() == "datetime");
mask_at (pd_test_3_all.cpp:27712)
27702 fail++;
27703 } else {
27704 if (bool_s->dtype_name() != "boolean") {
27705 std::cout << " FAIL: dtype should be boolean, got " << bool_s->dtype_name() << std::endl;
27706 fail++;
27707 }
27708 if (!bool_s->has_mask()) {
27709 std::cout << " FAIL: should have mask for NA" << std::endl;
27710 fail++;
27711 } else {
27712 if (!bool_s->mask_at(2)) {
27713 std::cout << " FAIL: position 2 should be masked (NA)" << std::endl;
27714 fail++;
27715 }
27716 }
27717 }
27718
27719 if (fail == 0) std::cout << " OK" << std::endl;
27720}
27721
27722void pd_test_astype_to_string() {
nlargest (pd_test_1_all.cpp:6425)
6415 // Test sort_values descending
6416 auto sorted_desc = df.sort_values("A", false);
6417 first_val = sorted_desc["A"].get_value_str(0);
6418 if (std::stod(first_val) != 5.0) {
6419 std::cout << " [FAIL] : in pd_test_dataframe_sorting() : sort_values desc first != 5" << std::endl;
6420 throw std::runtime_error("pd_test_dataframe_sorting failed: sort_values desc first != 5");
6421 }
6422
6423 // Test nlargest
6424 auto largest = df.nlargest(2, "A");
6425 if (largest.nrows() != 2) {
6426 std::cout << " [FAIL] : in pd_test_dataframe_sorting() : nlargest nrows != 2" << std::endl;
6427 throw std::runtime_error("pd_test_dataframe_sorting failed: nlargest nrows != 2");
6428 }
6429
6430 // Test nsmallest
6431 auto smallest = df.nsmallest(2, "A");
6432 if (smallest.nrows() != 2) {
6433 std::cout << " [FAIL] : in pd_test_dataframe_sorting() : nsmallest nrows != 2" << std::endl;
6434 throw std::runtime_error("pd_test_dataframe_sorting failed: nsmallest nrows != 2");
nsmallest (pd_test_1_all.cpp:6432)
6422 }
6423
6424 // Test nlargest
6425 auto largest = df.nlargest(2, "A");
6426 if (largest.nrows() != 2) {
6427 std::cout << " [FAIL] : in pd_test_dataframe_sorting() : nlargest nrows != 2" << std::endl;
6428 throw std::runtime_error("pd_test_dataframe_sorting failed: nlargest nrows != 2");
6429 }
6430
6431 // Test nsmallest
6432 auto smallest = df.nsmallest(2, "A");
6433 if (smallest.nrows() != 2) {
6434 std::cout << " [FAIL] : in pd_test_dataframe_sorting() : nsmallest nrows != 2" << std::endl;
6435 throw std::runtime_error("pd_test_dataframe_sorting failed: nsmallest nrows != 2");
6436 }
6437
6438 std::cout << " -> tests passed" << std::endl;
6439 }
6440
6441 // =====================================================================
6442 // Test: Rank
sample (pd_test_3_all.cpp:207)
197 if (df.index().dtype_name() != "int64") {
198 std::cout << " [FAIL] : in pd_test_3_all_dtype_typing_overloads() : cookbook_105 integration dtype" << std::endl;
199 throw std::runtime_error("pd_test_3_all_dtype_typing_overloads failed: cookbook_105 integration dtype");
200 }
201 }
202
203 std::cout << " -> tests passed" << std::endl;
204}
205
206void pd_test_3_all_sample() {
207 std::cout << "========= DataFrame.sample() =======================";
208
209 std::map<std::string, std::vector<double>> data = {
210 {"A", {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0}},
211 {"B", {10.0, 20.0, 30.0, 40.0, 50.0, 60.0, 70.0, 80.0, 90.0, 100.0}}
212 };
213 pandas::DataFrame df(data);
214
215 // Sample 3 rows
216 pandas::DataFrame result = df.sample(3, 42); // seed=42 for reproducibility
set_value_nan (pd_test_5_all.cpp:18478)
18468 "0 a\n"
18469 "1 NaN\n"
18470 "2 c";
18471 bool ok = (actual == expected);
18472 pandas_tests::check(ok, "where_mask_dtype_promotion_2_503514_case_10_str_col_where_default.to_string", local_fail);
18473 if (!ok) dump_diff("case_10", expected, actual);
18474}
18475
18476void where_mask_dtype_promotion_2_503514_case_11_get_value_str_mask_int_renders_NaN(int& local_fail) {
18477 pandas::Series<std::int64_t> s({10, 20, 30});
18478 s.set_value_nan(0);
18479
18480 std::string actual = s.get_value_str(0);
18481 std::string expected = "NaN";
18482 bool ok = (actual == expected);
18483 pandas_tests::check(ok, "where_mask_dtype_promotion_2_503514_case_11_get_value_str_mask_int_renders_NaN (got " +
18484 actual + ")", local_fail);
18485
18486 bool ok1 = (s.get_value_str(1) == "20");
18487 bool ok2 = (s.get_value_str(2) == "30");
18488 pandas_tests::check(ok1, "case_11.kept_idx1_eq_20", local_fail);
set_value_with_enlarge (pd_test_3_all.cpp:29152)
29142static int sm_check(bool cond, const char* msg) {
29143 if (!cond) { std::cout << " FAIL: " << msg << std::endl; return 1; }
29144 return 0;
29145}
29146
29147void pd_test_setitem_multicolumn_series_enlarge_int() {
29148 std::cout << " -- pd_test_setitem_multicolumn_series_enlarge_int --" << std::endl;
29149 int fail = 0;
29150 std::vector<numpy::float64> v = {1.0, 2.0, 3.0};
29151 pandas::Series<numpy::float64> s(v);
29152 bool grew = s.set_value_with_enlarge("5", 99.0);
29153 fail += sm_check(grew, "enlargement reported");
29154 fail += sm_check(s.size() == 4, "size grew to 4");
29155 fail += sm_check(s[3] == 99.0, "appended value");
29156 auto k = s.keys();
29157 fail += sm_check(k.size() == 4 && k[3] == "5", "label appended");
29158 if (fail == 0) std::cout << " OK" << std::endl;
29159 if (fail != 0) throw std::runtime_error("pd_test_setitem_multicolumn_series_enlarge_int failed");
29160}
29161
29162void pd_test_setitem_multicolumn_series_enlarge_string() {
tail (pd_test_1_all.cpp:6308)
6298 pandas::DataFrame df(data);
6299
6300 // Test head
6301 auto head_df = df.head(3);
6302 if (head_df.nrows() != 3) {
6303 std::cout << " [FAIL] : in pd_test_dataframe_indexing() : head(3) nrows != 3" << std::endl;
6304 throw std::runtime_error("pd_test_dataframe_indexing failed: head(3) nrows != 3");
6305 }
6306
6307 // Test tail
6308 auto tail_df = df.tail(2);
6309 if (tail_df.nrows() != 2) {
6310 std::cout << " [FAIL] : in pd_test_dataframe_indexing() : tail(2) nrows != 2" << std::endl;
6311 throw std::runtime_error("pd_test_dataframe_indexing failed: tail(2) nrows != 2");
6312 }
6313
6314 // Test iloc_rows range
6315 auto slice = df.iloc_rows(1, 4);
6316 if (slice.nrows() != 3) {
6317 std::cout << " [FAIL] : in pd_test_dataframe_indexing() : iloc_rows(1,4) nrows != 3" << std::endl;
6318 throw std::runtime_error("pd_test_dataframe_indexing failed: iloc_rows(1,4) nrows != 3");
take (pd_test_1_all.cpp:5903)
5893// Inherited Operations Tests
5894// ============================================================================
5895
5896void pd_test_categorical_index_take() {
5897 std::cout << "========= inherited take ==============================";
5898
5899 pandas::CategoricalArray arr({"a", "b", "c", "d"});
5900 pandas::CategoricalIndex idx(arr);
5901
5902 std::vector<size_t> indices = {0, 2, 3};
5903 pandas::ExtensionIndex<pandas::CategoricalArray> taken = idx.take(indices);
5904
5905 bool passed = (taken.size() == 3);
5906 if (!passed) {
5907 std::cout << " [FAIL] : in pd_test_categorical_index_take()" << std::endl;
5908 throw std::runtime_error("pd_test_categorical_index_take failed");
5909 }
5910
5911 std::cout << " -> tests passed" << std::endl;
5912}
where (pd_test_1_all.cpp:22018)
22008 data["B"] = {5.0, 6.0, 7.0, 8.0};
22009 pandas::DataFrame df(data);
22010
22011 // Create condition DataFrame (values > 2)
22012 std::map<std::string, std::vector<numpy::bool_>> cond_data;
22013 cond_data["A"] = {false, false, true, true}; // 1<=2, 2<=2, 3>2, 4>2
22014 cond_data["B"] = {true, true, true, true}; // all >2
22015 pandas::DataFrame cond(cond_data);
22016
22017 // Apply where with replacement value -1
22018 pandas::DataFrame result = df.where(cond, -1.0);
22019
22020 // Get column index for A - it's sorted alphabetically in std::map
22021 size_t col_a_idx = df.get_column_index("A");
22022 size_t col_b_idx = df.get_column_index("B");
22023
22024 bool passed = true;
22025 std::string error_msg;
22026
22027 // Check A column values
22028 std::string a0 = result.iat<double>(0, col_a_idx) == -1.0 ? "ok" : "fail";
where (pd_test_1_all.cpp:22018)
22008 data["B"] = {5.0, 6.0, 7.0, 8.0};
22009 pandas::DataFrame df(data);
22010
22011 // Create condition DataFrame (values > 2)
22012 std::map<std::string, std::vector<numpy::bool_>> cond_data;
22013 cond_data["A"] = {false, false, true, true}; // 1<=2, 2<=2, 3>2, 4>2
22014 cond_data["B"] = {true, true, true, true}; // all >2
22015 pandas::DataFrame cond(cond_data);
22016
22017 // Apply where with replacement value -1
22018 pandas::DataFrame result = df.where(cond, -1.0);
22019
22020 // Get column index for A - it's sorted alphabetically in std::map
22021 size_t col_a_idx = df.get_column_index("A");
22022 size_t col_b_idx = df.get_column_index("B");
22023
22024 bool passed = true;
22025 std::string error_msg;
22026
22027 // Check A column values
22028 std::string a0 = result.iat<double>(0, col_a_idx) == -1.0 ? "ok" : "fail";
where_resolved_typed (pd_test_5_all.cpp:144251)
144241 auto s = mk_str({"a", "b", "c"}, {"0", "1", "2"});
144242 auto r = s.fillna_resolved(pandas::FillValue::of_bool(true));
144243 check_dtype_eq("C_26_case_70_fillna_bool_string()_dtype",
144244 result_series_dtype_full(r), "object", local_fail);
144245}
144246
144247void case_71_where_int_int_other(int& local_fail) {
144248 std::cout << "-- case_71_where_int_int_other\n";
144249 auto s = mk_f64({10, 20, 30}, {"0", "1", "2"}, "int64");
144250 auto cond = mk_cond_tft();
144251 auto r = s.where_resolved_typed(cond, pandas::FillValue::of_numeric_int(-1.0));
144252 check_dtype_eq("C_26_case_71_where_int_int_other()_dtype",
144253 result_series_dtype_full(r), "int64", local_fail);
144254}
144255
144256void case_72_where_int_float_other(int& local_fail) {
144257 std::cout << "-- case_72_where_int_float_other\n";
144258 auto s = mk_f64({10, 20, 30}, {"0", "1", "2"}, "int64");
144259 auto cond = mk_cond_tft();
144260 auto r = s.where_resolved_typed(cond, pandas::FillValue::of_numeric(0.5));
144261 check_dtype_eq("C_26_case_72_where_int_float_other()_dtype",
xs (pd_test_2_all.cpp:18668)
18658 std::cout << "====================================== [OK] pd_test_tz_localize test suite ========================== " << std::endl;
18659 return 0;
18660 }
18661
18662} // namespace dataframe_tests
18663// ------------------- pd_test_tz_localize.cpp (end) -----------------------------
18664
18665// ------------------- pd_test_xs.cpp (start) -----------------------------
18666// dataframe_tests/pd_test_xs.cpp
18667// Tests for DataFrame xs() (cross-section) implementation
18668
18669#include <iostream>
18670#include <stdexcept>
18671#include <vector>
18672#include <string>
18673#include <map>
18674
18675#include "../pandas/pd_dataframe.h"
18676
18677// CRITICAL: No using namespace directives
drop (pd_test_1_all.cpp:6558)
6548 if (df.ncols() != 2) {
6549 std::cout << " [FAIL] : in pd_test_dataframe_manipulation() : pop ncols != 2" << std::endl;
6550 throw std::runtime_error("pd_test_dataframe_manipulation failed: pop ncols != 2");
6551 }
6552 if (!popped) {
6553 std::cout << " [FAIL] : in pd_test_dataframe_manipulation() : popped is null" << std::endl;
6554 throw std::runtime_error("pd_test_dataframe_manipulation failed: popped is null");
6555 }
6556
6557 // Test drop columns
6558 auto dropped = df.drop(std::vector<std::string>{"B"}, 1);
6559 if (dropped.ncols() != 1) {
6560 std::cout << " [FAIL] : in pd_test_dataframe_manipulation() : drop ncols != 1" << std::endl;
6561 throw std::runtime_error("pd_test_dataframe_manipulation failed: drop ncols != 1");
6562 }
6563
6564 // Test rename
6565 auto renamed = df.rename_columns(std::map<std::string, std::string>{{"A", "X"}});
6566 if (!renamed.has_column("X")) {
6567 std::cout << " [FAIL] : in pd_test_dataframe_manipulation() : rename failed" << std::endl;
6568 throw std::runtime_error("pd_test_dataframe_manipulation failed: rename failed");
drop_duplicates (pd_test_1_all.cpp:6639)
6629 }
6630 }
6631
6632 // Test drop_duplicates
6633 {
6634 std::map<std::string, std::vector<numpy::int64>> dup_data;
6635 dup_data["A"] = {1, 1, 2, 2};
6636 dup_data["B"] = {1, 1, 2, 3};
6637 pandas::DataFrame df_dup(dup_data);
6638
6639 auto deduped = df_dup.drop_duplicates();
6640 // Rows 0 and 1 are duplicates (A=1, B=1), so should have 3 rows
6641 if (deduped.nrows() != 3) {
6642 std::cout << " [FAIL] : in pd_test_dataframe_manipulation() : drop_duplicates nrows != 3, got " << deduped.nrows() << std::endl;
6643 throw std::runtime_error("pd_test_dataframe_manipulation failed: drop_duplicates");
6644 }
6645 }
6646
6647 // Test assign
6648 {
6649 std::map<std::string, std::vector<numpy::int64>> assign_data;
droplevel (pd_test_1_all.cpp:14428)
14418 void pd_test_multiindex_droplevel() {
14419 std::cout << "========= droplevel =================================== ";
14420
14421 std::vector<std::vector<std::string>> arrays = {
14422 {"a", "a", "b"},
14423 {"x", "y", "z"},
14424 {"1", "2", "3"}
14425 };
14426
14427 pandas::MultiIndex mi = pandas::MultiIndex::from_arrays<std::string>(arrays);
14428 pandas::MultiIndex dropped = mi.droplevel(1);
14429
14430 bool passed = true;
14431
14432 if (dropped.nlevels() != 2) {
14433 std::cout << " [FAIL] : nlevels should be 2 after drop" << std::endl;
14434 passed = false;
14435 }
14436
14437 // Check remaining levels
14438 auto tup = dropped[0];
dropna (pd_test_1_all.cpp:531)
521 }
522
523 // Test isna array
524 numpy::NDArray<numpy::bool_> na_mask = arr.isna();
525 if (na_mask.getSize() != 4) {
526 std::cout << " [FAIL] : in pd_test_categorical_array_na_handling() : isna size != 4" << std::endl;
527 throw std::runtime_error("pd_test_categorical_array_na_handling failed: isna size != 4");
528 }
529
530 // Test dropna
531 pandas::CategoricalArray dropped = arr.dropna();
532 if (dropped.size() != 2) {
533 std::cout << " [FAIL] : in pd_test_categorical_array_na_handling() : dropna size != 2" << std::endl;
534 throw std::runtime_error("pd_test_categorical_array_na_handling failed: dropna size != 2");
535 }
536
537 // Test fillna (fill with existing category)
538 pandas::CategoricalArray filled = arr.fillna("a"); // 'a' is in categories
539 if (filled.has_na()) {
540 std::cout << " [FAIL] : in pd_test_categorical_array_na_handling() : fillna should have no NA" << std::endl;
541 throw std::runtime_error("pd_test_categorical_array_na_handling failed: fillna should have no NA");
pop (pd_test_1_all.cpp:6547)
6537 pandas::DataFrame df(data);
6538
6539 // Test add_column
6540 df.add_column<numpy::int64>("C", {7, 8, 9});
6541 if (df.ncols() != 3) {
6542 std::cout << " [FAIL] : in pd_test_dataframe_manipulation() : add_column ncols != 3" << std::endl;
6543 throw std::runtime_error("pd_test_dataframe_manipulation failed: add_column ncols != 3");
6544 }
6545
6546 // Test pop
6547 auto popped = df.pop("C");
6548 if (df.ncols() != 2) {
6549 std::cout << " [FAIL] : in pd_test_dataframe_manipulation() : pop ncols != 2" << std::endl;
6550 throw std::runtime_error("pd_test_dataframe_manipulation failed: pop ncols != 2");
6551 }
6552 if (!popped) {
6553 std::cout << " [FAIL] : in pd_test_dataframe_manipulation() : popped is null" << std::endl;
6554 throw std::runtime_error("pd_test_dataframe_manipulation failed: popped is null");
6555 }
6556
6557 // Test drop columns
pop (pd_test_1_all.cpp:6547)
6537 pandas::DataFrame df(data);
6538
6539 // Test add_column
6540 df.add_column<numpy::int64>("C", {7, 8, 9});
6541 if (df.ncols() != 3) {
6542 std::cout << " [FAIL] : in pd_test_dataframe_manipulation() : add_column ncols != 3" << std::endl;
6543 throw std::runtime_error("pd_test_dataframe_manipulation failed: add_column ncols != 3");
6544 }
6545
6546 // Test pop
6547 auto popped = df.pop("C");
6548 if (df.ncols() != 2) {
6549 std::cout << " [FAIL] : in pd_test_dataframe_manipulation() : pop ncols != 2" << std::endl;
6550 throw std::runtime_error("pd_test_dataframe_manipulation failed: pop ncols != 2");
6551 }
6552 if (!popped) {
6553 std::cout << " [FAIL] : in pd_test_dataframe_manipulation() : popped is null" << std::endl;
6554 throw std::runtime_error("pd_test_dataframe_manipulation failed: popped is null");
6555 }
6556
6557 // Test drop columns
reindex (pd_test_1_all.cpp:6708)
6698 }
6699 }
6700
6701 // Test reindex rows
6702 {
6703 std::map<std::string, std::vector<double>> data;
6704 data["A"] = {1.0, 2.0, 3.0};
6705 pandas::DataFrame df(data);
6706 df = df.set_axis({"x", "y", "z"}, 0);
6707
6708 auto reindexed = df.reindex({"x", "z", "w"}, 0);
6709 if (reindexed.nrows() != 3) {
6710 std::cout << " [FAIL] : in pd_test_dataframe_index_ops() : reindex wrong nrows" << std::endl;
6711 throw std::runtime_error("pd_test_dataframe_index_ops failed: reindex nrows");
6712 }
6713 // 'w' should have NaN
6714 std::string val = reindexed["A"].get_value_str(2);
6715 if (!std::isnan(std::stod(val))) {
6716 std::cout << " [FAIL] : in pd_test_dataframe_index_ops() : missing label should be NaN" << std::endl;
6717 throw std::runtime_error("pd_test_dataframe_index_ops failed: reindex NaN");
6718 }
reindex_dispatch (pd_test_5_all.cpp:61046)
61036 for (size_t i = 0; i < n; ++i) out.push_back(std::to_string(i));
61037 return out;
61038}
61039
61040template <typename T>
61041static std::pair<std::string, std::string>
61042run_numeric_fill_lt29(const std::vector<T>& vals,
61043 const std::vector<std::string>& new_idx,
61044 double fill) {
61045 auto s = make_series_with_idx_lt29<T>(vals, src_idx_for_lt29(vals.size()));
61046 pandas::Result r = s.reindex_dispatch(
61047 new_idx, "", pandas::FillValue::of_numeric(fill));
61048
61049 if (std::holds_alternative<
61050 std::unique_ptr<pandas::Series<std::string>>>(r.value)) {
61051 auto& sp = std::get<
61052 std::unique_ptr<pandas::Series<std::string>>>(r.value);
61053 auto df = sp->to_frame(std::optional<std::string>("v"));
61054 auto dts = df.dtypes();
61055 return {df.to_string(),
61056 dts.empty() ? std::string("<no col>") : dts[0]};
reindex_like (pd_test_1_all.cpp:6777)
6767 data1["A"] = {1, 2};
6768 data1["B"] = {3, 4};
6769 pandas::DataFrame df1(data1);
6770
6771 std::map<std::string, std::vector<int>> data2;
6772 data2["B"] = {10, 20, 30};
6773 data2["C"] = {40, 50, 60};
6774 pandas::DataFrame df2(data2);
6775 df2 = df2.set_axis({"x", "y", "z"}, 0);
6776
6777 auto reindexed = df1.reindex_like(df2);
6778 if (reindexed.nrows() != 3 || reindexed.ncols() != 2) {
6779 std::cout << " [FAIL] : in pd_test_dataframe_index_ops() : reindex_like wrong shape" << std::endl;
6780 throw std::runtime_error("pd_test_dataframe_index_ops failed: reindex_like");
6781 }
6782 }
6783
6784 std::cout << " -> tests passed" << std::endl;
6785 }
6786
6787 // =====================================================================
reindex_promote_object_bool_fill (pd_test_5_all.cpp:43334)
43324 df, actual, expected, "object", local_fail);
43325}
43326
43327void f_dtype_object_collapse_groupby_314827_case_13_series_float64_reindex_float_fill_one(int& local_fail) {
43328 std::cout << "-- case_13_series_float64_reindex_float_fill_one\n";
43329 pandas::Series<numpy::float64> s({1.0, 2.0, 3.0});
43330 s.set_index(std::make_unique<pandas::Index<std::string>>(
43331 std::vector<std::string>{"a", "b", "c"}));
43332 // c2: invoke the new bool-promote overload directly (mirrors what the
43333 // PandasPython binding now does for a Python bool fill_value).
43334 auto r = s.reindex_promote_object_bool_fill({"a", "b", "z"}, /*method=*/"",
43335 /*bool_fill_value=*/true);
43336 pandas::DataFrame df = r.to_frame(std::optional<std::string>("v"));
43337 std::string actual = df.to_string();
43338
43339 std::cout << " nrows=" << df.nrows() << " ncols=" << df.ncols() << "\n";
43340
43341 std::string expected =
43342 " v\n"
43343 "a 1.0\n"
43344 "b 2.0\n"
reindex_with_fill (pd_test_5_all.cpp:97806)
97796 // by recording a pass: we don't fail the suite for a cell that
97797 // can't even be set up.
97798 pandas_tests::check(true, tag + " [override-rejected-by-storage]", local_fail);
97799 return;
97800 }
97801 }
97802 std::vector<std::string> target_keys = has_missing
97803 ? std::vector<std::string>{"a", "b", "z"}
97804 : std::vector<std::string>{"a", "b", "c"};
97805 try {
97806 pandas::Result r = s.reindex_with_fill(
97807 target_keys, std::string(""), fv,
97808 std::nullopt, std::nullopt, {}, {});
97809 Probe p = probe_result(r);
97810 pandas_tests::check(p.in_whitelist,
97811 tag + " [variant in apply_override_to_result whitelist]",
97812 local_fail);
97813 pandas_tests::check_str(tag + " [result_override]",
97814 expected_override, p.result_override, local_fail);
97815 } catch (const std::exception& e) {
97816 pandas_tests::check(false,
reindex_with_indexer (pd_test_5_all.cpp:40388)
40378 s.set_dtype_override("boolean");
40379 s.set_freq(std::optional<std::string>("D"));
40380 s.set_string_na_sentinel_disabled(true);
40381
40382 // Indexer: identity over the 3 source positions.
40383 numpy::NDArray<numpy::int64> indexer(std::vector<size_t>{3});
40384 indexer.setElementAt({0}, 0);
40385 indexer.setElementAt({1}, 1);
40386 indexer.setElementAt({2}, 2);
40387
40388 auto base = s.reindex_with_indexer(indexer);
40389 pandas_tests::check(base != nullptr, "case7.reindex_with_indexer_nonnull", local_fail);
40390 if (!base) return;
40391
40392 auto* r = dynamic_cast<pandas::Series<std::int64_t>*>(base.get());
40393 pandas_tests::check(r != nullptr, "case7.reindex_with_indexer_is_Series_int64",
40394 local_fail);
40395 if (!r) return;
40396
40397 // dtype_override propagates (oracle says yes).
40398 pandas_tests::check(r->dtype_override().has_value() &&
rename (pd_test_1_all.cpp:5816)
5806 std::cout << " -> tests passed" << std::endl;
5807}
5808
5809void pd_test_categorical_index_rename() {
5810 std::cout << "========= rename ======================================";
5811
5812 pandas::CategoricalArray arr({"x", "y"});
5813 pandas::CategoricalIndex idx(arr, "old_name");
5814
5815 pandas::CategoricalIndex renamed = idx.rename("new_name");
5816
5817 bool passed = (renamed.name().has_value() && *renamed.name() == "new_name" &&
5818 renamed.size() == idx.size() && renamed.categories() == idx.categories());
5819 if (!passed) {
5820 std::cout << " [FAIL] : in pd_test_categorical_index_rename()" << std::endl;
5821 throw std::runtime_error("pd_test_categorical_index_rename failed");
5822 }
5823
5824 std::cout << " -> tests passed" << std::endl;
5825}
rename_axis (pd_test_1_all.cpp:6760)
6750 throw std::runtime_error("pd_test_dataframe_index_ops failed: get_optional Z");
6751 }
6752 }
6753
6754 // Test rename_axis
6755 {
6756 std::map<std::string, std::vector<int>> data;
6757 data["A"] = {1, 2, 3};
6758 pandas::DataFrame df(data);
6759
6760 auto renamed = df.rename_axis("my_index", 0);
6761 // Should not throw
6762 }
6763
6764 // Test reindex_like
6765 {
6766 std::map<std::string, std::vector<int>> data1;
6767 data1["A"] = {1, 2};
6768 data1["B"] = {3, 4};
6769 pandas::DataFrame df1(data1);
reorder_levels (pd_test_1_all.cpp:14495)
14485 void pd_test_multiindex_reorder_levels() {
14486 std::cout << "========= reorder_levels ============================== ";
14487
14488 std::vector<std::vector<std::string>> arrays = {
14489 {"a", "b"},
14490 {"x", "y"},
14491 {"1", "2"}
14492 };
14493
14494 pandas::MultiIndex mi = pandas::MultiIndex::from_arrays<std::string>(arrays);
14495 pandas::MultiIndex reordered = mi.reorder_levels({2, 0, 1});
14496
14497 bool passed = true;
14498
14499 auto tup = reordered[0];
14500 if (tup[0] != "1" || tup[1] != "a" || tup[2] != "x") {
14501 std::cout << " [FAIL] : reordered tuple should be ('1', 'a', 'x')" << std::endl;
14502 passed = false;
14503 }
14504
14505 if (!passed) {
replace (pd_test_1_all.cpp:6623)
6613 }
6614 }
6615
6616 // Test replace
6617 {
6618 std::map<std::string, std::vector<numpy::float64>> float_data;
6619 float_data["X"] = {1.0, 2.0, 3.0};
6620 float_data["Y"] = {2.0, 2.0, 4.0};
6621 pandas::DataFrame df_repl(float_data);
6622
6623 auto replaced = df_repl.replace(2.0, 99.0);
6624 // Check some value was replaced (crude check via string)
6625 std::string val_str = replaced.col<numpy::float64>("X").get_value_str(1);
6626 if (val_str.find("99") == std::string::npos) {
6627 std::cout << " [FAIL] : in pd_test_dataframe_manipulation() : replace didn't work" << std::endl;
6628 throw std::runtime_error("pd_test_dataframe_manipulation failed: replace");
6629 }
6630 }
6631
6632 // Test drop_duplicates
6633 {
replace (pd_test_1_all.cpp:6623)
6613 }
6614 }
6615
6616 // Test replace
6617 {
6618 std::map<std::string, std::vector<numpy::float64>> float_data;
6619 float_data["X"] = {1.0, 2.0, 3.0};
6620 float_data["Y"] = {2.0, 2.0, 4.0};
6621 pandas::DataFrame df_repl(float_data);
6622
6623 auto replaced = df_repl.replace(2.0, 99.0);
6624 // Check some value was replaced (crude check via string)
6625 std::string val_str = replaced.col<numpy::float64>("X").get_value_str(1);
6626 if (val_str.find("99") == std::string::npos) {
6627 std::cout << " [FAIL] : in pd_test_dataframe_manipulation() : replace didn't work" << std::endl;
6628 throw std::runtime_error("pd_test_dataframe_manipulation failed: replace");
6629 }
6630 }
6631
6632 // Test drop_duplicates
6633 {
reset_index (pd_test_3_all.cpp:1618)
1608 }
1609
1610 std::cout << " -> tests passed" << std::endl;
1611}
1612
1613// ============================================================================
1614// Category 10: Remaining Untested Functions
1615// ============================================================================
1616
1617void pd_test_3_all_series_reset_index() {
1618 std::cout << "========= Series.reset_index() =======================";
1619
1620 std::vector<double> vals = {10.0, 20.0, 30.0};
1621 pandas::Series<double> s(vals, "test");
1622
1623 // Set a custom index
1624 pandas::Index<std::string> custom_idx({"a", "b", "c"});
1625 s.set_index(custom_idx);
1626
1627 // Reset the index
1628 s.reset_index(true); // drop=true
set_axis (pd_test_1_all.cpp:6673)
6663 std::cout << " -> tests passed" << std::endl;
6664 }
6665
6666 // =====================================================================
6667 // Test: Index Operations
6668 // =====================================================================
6669 void pd_test_dataframe_index_ops() {
6670 std::cout << "========= index operations =================";
6671
6672 // Test set_axis (rows)
6673 {
6674 std::map<std::string, std::vector<int>> data;
6675 data["A"] = {1, 2, 3};
6676 pandas::DataFrame df(data);
6677
6678 auto renamed = df.set_axis({"x", "y", "z"}, 0);
6679 std::string idx0 = renamed.index().get_value_str(0);
6680 if (idx0 != "x") {
6681 std::cout << " [FAIL] : in pd_test_dataframe_index_ops() : set_axis first label should be 'x'" << std::endl;
6682 throw std::runtime_error("pd_test_dataframe_index_ops failed: set_axis");
set_index (pd_test_1_all.cpp:20318)
20308 // Set datetime index
20309 std::vector<std::string> dates = {
20310 "2020-01-01 00:00:00",
20311 "2020-01-01 12:00:00",
20312 "2020-01-02 00:00:00",
20313 "2020-01-02 12:00:00",
20314 "2020-01-03 00:00:00",
20315 "2020-01-03 12:00:00"
20316 };
20317 df.set_index(std::make_unique<pandas::Index<std::string>>(dates));
20318
20319 // Resample to daily
20320 auto resampler = df.resample("D");
20321 pandas::DataFrame result = resampler.sum();
20322
20323 // Check that we got aggregated results
20324 bool passed = (result.nrows() <= df.nrows());
20325
20326 if (!passed) {
20327 std::cout << " [FAIL] : in pd_test_timeseries_resample_basic() : resample didn't reduce rows" << std::endl;
set_index (pd_test_1_all.cpp:20318)
20308 // Set datetime index
20309 std::vector<std::string> dates = {
20310 "2020-01-01 00:00:00",
20311 "2020-01-01 12:00:00",
20312 "2020-01-02 00:00:00",
20313 "2020-01-02 12:00:00",
20314 "2020-01-03 00:00:00",
20315 "2020-01-03 12:00:00"
20316 };
20317 df.set_index(std::make_unique<pandas::Index<std::string>>(dates));
20318
20319 // Resample to daily
20320 auto resampler = df.resample("D");
20321 pandas::DataFrame result = resampler.sum();
20322
20323 // Check that we got aggregated results
20324 bool passed = (result.nrows() <= df.nrows());
20325
20326 if (!passed) {
20327 std::cout << " [FAIL] : in pd_test_timeseries_resample_basic() : resample didn't reduce rows" << std::endl;
swaplevel (pd_test_1_all.cpp:14461)
14451 void pd_test_multiindex_swaplevel() {
14452 std::cout << "========= swaplevel =================================== ";
14453
14454 std::vector<std::vector<std::string>> arrays = {
14455 {"a", "b"},
14456 {"x", "y"}
14457 };
14458 std::vector<std::optional<std::string>> names = {"first", "second"};
14459
14460 pandas::MultiIndex mi = pandas::MultiIndex::from_arrays<std::string>(arrays, names);
14461 pandas::MultiIndex swapped = mi.swaplevel(0, 1);
14462
14463 bool passed = true;
14464
14465 // Tuple should be reversed
14466 auto tup = swapped[0];
14467 if (tup[0] != "x" || tup[1] != "a") {
14468 std::cout << " [FAIL] : swapped tuple should be ('x', 'a')" << std::endl;
14469 passed = false;
14470 }
update (pd_test_1_all.cpp:13945)
13935 if (!result.has_column("C")) {
13936 passed = false;
13937 std::cout << " [FAIL] : in pd_test_joining_combine_first() : missing column C" << std::endl;
13938 throw std::runtime_error("pd_test_joining_combine_first failed: col C missing");
13939 }
13940
13941 std::cout << " -> tests passed" << std::endl;
13942 }
13943
13944 // =====================================================================
13945 // update() Tests
13946 // =====================================================================
13947
13948 void pd_test_joining_update() {
13949 std::cout << "========= update ======================================";
13950
13951 std::map<std::string, std::vector<double>> left_data = {
13952 {"A", {1.0, 2.0, 3.0}},
13953 {"B", {10.0, 20.0, 30.0}}
13954 };
13955 std::vector<std::string> left_idx = {"x", "y", "z"};
backfill (pd_test_3_all.cpp:2645)
2635void pd_test_3_all_df_backfill_pad() {
2636 std::cout << "========= DataFrame.backfill/pad() =======================";
2637
2638 std::map<std::string, std::vector<double>> data = {
2639 {"A", {1.0, std::nan(""), std::nan(""), 4.0}},
2640 {"B", {std::nan(""), 2.0, std::nan(""), 4.0}}
2641 };
2642 pandas::DataFrame df(data);
2643
2644 // Test backfill (should fill backward)
2645 pandas::DataFrame bfill_result = df.backfill(0);
2646 if (bfill_result.nrows() != 4 || bfill_result.ncols() != 2) {
2647 throw std::runtime_error("backfill shape failed");
2648 }
2649
2650 // Test pad (should fill forward)
2651 pandas::DataFrame pad_result = df.pad(0);
2652 if (pad_result.nrows() != 4 || pad_result.ncols() != 2) {
2653 throw std::runtime_error("pad shape failed");
2654 }
bfill (pd_test_1_all.cpp:23603)
23593 std::cout << "====================================== [OK] pd_test_equals test suite ========================== " << std::endl;
23594 return 0;
23595 }
23596
23597} // namespace dataframe_tests
23598// ------------------- pd_test_equals.cpp (end) -----------------------------
23599
23600// ------------------- pd_test_ffill_bfill.cpp (start) -----------------------------
23601// dataframe_tests/pd_test_ffill_bfill.cpp
23602// Test file for DataFrame.ffill() and DataFrame.bfill() methods
23603
23604#include <iostream>
23605#include <stdexcept>
23606#include <cmath>
23607#include <limits>
23608#include <map>
23609#include "../pandas/pd_dataframe.h"
23610
23611// CRITICAL: No using namespace directives
ffill (pd_test_1_all.cpp:23603)
23593 std::cout << "====================================== [OK] pd_test_equals test suite ========================== " << std::endl;
23594 return 0;
23595 }
23596
23597} // namespace dataframe_tests
23598// ------------------- pd_test_equals.cpp (end) -----------------------------
23599
23600// ------------------- pd_test_ffill_bfill.cpp (start) -----------------------------
23601// dataframe_tests/pd_test_ffill_bfill.cpp
23602// Test file for DataFrame.ffill() and DataFrame.bfill() methods
23603
23604#include <iostream>
23605#include <stdexcept>
23606#include <cmath>
23607#include <limits>
23608#include <map>
23609#include "../pandas/pd_dataframe.h"
23610
23611// CRITICAL: No using namespace directives
fillna (pd_test_1_all.cpp:537)
527 throw std::runtime_error("pd_test_categorical_array_na_handling failed: isna size != 4");
528 }
529
530 // Test dropna
531 pandas::CategoricalArray dropped = arr.dropna();
532 if (dropped.size() != 2) {
533 std::cout << " [FAIL] : in pd_test_categorical_array_na_handling() : dropna size != 2" << std::endl;
534 throw std::runtime_error("pd_test_categorical_array_na_handling failed: dropna size != 2");
535 }
536
537 // Test fillna (fill with existing category)
538 pandas::CategoricalArray filled = arr.fillna("a"); // 'a' is in categories
539 if (filled.has_na()) {
540 std::cout << " [FAIL] : in pd_test_categorical_array_na_handling() : fillna should have no NA" << std::endl;
541 throw std::runtime_error("pd_test_categorical_array_na_handling failed: fillna should have no NA");
542 }
543
544 std::cout << " -> tests passed" << std::endl;
545 }
546
547 void pd_test_categorical_array_add_categories() {
fillna_resolved (pd_test_5_all.cpp:100541)
100531void case_3_object_fillna_nan_no_missing(int& local_fail) {
100532 std::cout << "-- case_3_object_fillna_nan_no_missing\n";
100533 pandas::Series<numpy::object_> s(
100534 std::vector<numpy::object_>{
100535 numpy::object_(std::string("a")),
100536 numpy::object_(std::string("b")),
100537 numpy::object_(std::string("c"))},
100538 std::optional<std::string>{});
100539 pandas::Result r;
100540 try {
100541 r = s.fillna_resolved(pandas::FillValue::nan());
100542 } catch (const std::exception& e) {
100543 std::cout << " exception: " << e.what() << "\n";
100544 }
100545 pandas_tests::check(r.is_series(),
100546 "C_26m_case_3_object_fillna_nan_no_missing()_is_series", local_fail);
100547 pandas_tests::check(result_object_series_dtype_full(r) == "object",
100548 "C_26m_case_3_object_fillna_nan_no_missing()_dtype_object", local_fail);
100549}
100550
100551void case_4_object_fillna_na_with_sentinel(int& local_fail) {
fillna_string (pd_test_5_all.cpp:47965)
47955 "NaT", "null", "<NA>", "x", ""});
47956 auto& col = df["col"];
47957 for (size_t r = 0; r < df.nrows(); ++r) {
47958 std::cout << tag << " [" << r << "] val=\""
47959 << col.get_value_str(r) << "\" is_na_at="
47960 << col.is_na_at(r) << "\n";
47961 }
47962 // CROSS-REFERENCE: pd_series.h:1938 lists only ""/None/nan/NaN as NA
47963 // for Series<std::string>; "NA"/"NaT"/"null"/"<NA>" are NOT treated
47964 // as NA by is_na_at. This interacts with the fillna bug (item #1):
47965 // fillna_string (pd_series.h:1995) shares the SAME list.
47966 } catch (const std::exception& e) {
47967 std::cout << tag << " exception: " << e.what() << "\n";
47968 }
47969 std::cout << tag << " === end ===\n";
47970}
47971
47972static void P33_forced_object_sentinels() {
47973 const std::string tag = "[P33]";
47974 std::cout << "\n" << tag
47975 << " === dtype='object' with 'NaT'/'null' literals (residual bug?) ===\n";
interpolate (pd_test_1_all.cpp:24365)
24355 std::cout << "====================================== [OK] pd_test_idxmax_idxmin test suite ========================== " << std::endl;
24356 return 0;
24357 }
24358
24359} // namespace dataframe_tests
24360// ------------------- pd_test_idxmax_idxmin.cpp (end) -----------------------------
24361
24362// ------------------- pd_test_interpolate.cpp (start) -----------------------------
24363// dataframe_tests/pd_test_interpolate.cpp
24364// Test file for DataFrame.interpolate() method
24365
24366#include <iostream>
24367#include <stdexcept>
24368#include <cmath>
24369#include <limits>
24370#include <map>
24371#include "../pandas/pd_dataframe.h"
24372
24373// CRITICAL: No using namespace directives
isna (pd_test_1_all.cpp:524)
514 throw std::runtime_error("pd_test_categorical_array_na_handling failed: has_na() should be true");
515 }
516
517 // Test count (non-NA)
518 if (arr.count() != 2) {
519 std::cout << " [FAIL] : in pd_test_categorical_array_na_handling() : count() != 2" << std::endl;
520 throw std::runtime_error("pd_test_categorical_array_na_handling failed: count() != 2");
521 }
522
523 // Test isna array
524 numpy::NDArray<numpy::bool_> na_mask = arr.isna();
525 if (na_mask.getSize() != 4) {
526 std::cout << " [FAIL] : in pd_test_categorical_array_na_handling() : isna size != 4" << std::endl;
527 throw std::runtime_error("pd_test_categorical_array_na_handling failed: isna size != 4");
528 }
529
530 // Test dropna
531 pandas::CategoricalArray dropped = arr.dropna();
532 if (dropped.size() != 2) {
533 std::cout << " [FAIL] : in pd_test_categorical_array_na_handling() : dropna size != 2" << std::endl;
534 throw std::runtime_error("pd_test_categorical_array_na_handling failed: dropna size != 2");
isnull (pd_test_3_all.cpp:671)
661// Category 5: Index Null Detection
662// ============================================================================
663
664void pd_test_3_all_index_null_detection() {
665 std::cout << "========= Index.isnull/notnull() =====================";
666
667 // Test with float index (can have NaN)
668 std::vector<double> vals = {1.0, std::nan(""), 3.0, std::nan("")};
669 pandas::Index<double> idx(vals);
670
671 numpy::NDArray<numpy::bool_> isnull_result = idx.isnull();
672 if (isnull_result.getSize() != 4) {
673 std::cout << " [FAIL] : in pd_test_3_all_index_null_detection() : isnull() size mismatch" << std::endl;
674 throw std::runtime_error("pd_test_3_all_index_null_detection failed: isnull() size");
675 }
676 // Index 0: 1.0 -> not null
677 if (isnull_result.getElementAt({0})) {
678 std::cout << " [FAIL] : in pd_test_3_all_index_null_detection() : index 0 should not be null" << std::endl;
679 throw std::runtime_error("pd_test_3_all_index_null_detection failed: index 0");
680 }
681 // Index 1: NaN -> null
notna (pd_test_1_all.cpp:6595)
6585 if (!na_mask.getElementAt({2, 1})) {
6586 std::cout << " [FAIL] : in pd_test_dataframe_manipulation() : isna at (2,1) should be true" << std::endl;
6587 throw std::runtime_error("pd_test_dataframe_manipulation failed: isna at (2,1)");
6588 }
6589 // Row 0, col 0 should NOT be NA
6590 if (na_mask.getElementAt({0, 0})) {
6591 std::cout << " [FAIL] : in pd_test_dataframe_manipulation() : isna at (0,0) should be false" << std::endl;
6592 throw std::runtime_error("pd_test_dataframe_manipulation failed: isna at (0,0)");
6593 }
6594
6595 auto notna_mask = df_na.notna();
6596 if (notna_mask.getElementAt({1, 0})) {
6597 std::cout << " [FAIL] : in pd_test_dataframe_manipulation() : notna at (1,0) should be false" << std::endl;
6598 throw std::runtime_error("pd_test_dataframe_manipulation failed: notna at (1,0)");
6599 }
6600 }
6601
6602 // Test fillna
6603 {
6604 std::map<std::string, std::vector<numpy::float64>> float_data;
6605 float_data["X"] = {1.0, std::nan(""), 3.0};
notnull (pd_test_3_all.cpp:665)
655 }
656
657 std::cout << " -> tests passed" << std::endl;
658}
659
660// ============================================================================
661// Category 5: Index Null Detection
662// ============================================================================
663
664void pd_test_3_all_index_null_detection() {
665 std::cout << "========= Index.isnull/notnull() =====================";
666
667 // Test with float index (can have NaN)
668 std::vector<double> vals = {1.0, std::nan(""), 3.0, std::nan("")};
669 pandas::Index<double> idx(vals);
670
671 numpy::NDArray<numpy::bool_> isnull_result = idx.isnull();
672 if (isnull_result.getSize() != 4) {
673 std::cout << " [FAIL] : in pd_test_3_all_index_null_detection() : isnull() size mismatch" << std::endl;
674 throw std::runtime_error("pd_test_3_all_index_null_detection failed: isnull() size");
675 }
pad (pd_test_3_all.cpp:1771)
1761 if (result_single.nrows() != 3 || result_single.ncols() != 1) {
1762 std::cout << " [FAIL] : in pd_test_3_all_dataframe_unstack() : single col shape mismatch" << std::endl;
1763 throw std::runtime_error("pd_test_3_all_dataframe_unstack failed: single col shape");
1764 }
1765
1766 std::cout << " -> tests passed" << std::endl;
1767}
1768
1769void pd_test_3_all_fbbuilder_pad() {
1770 std::cout << "========= FBBuilder.pad() (internal) =================";
1771
1772 // Note: FBBuilder.pad() is an internal method for FlatBuffer serialization
1773 // It's not the pandas DataFrame.pad() method (which is ffill alias)
1774 // This test verifies the to_feather() serialization works, which uses FBBuilder.pad()
1775
1776 std::map<std::string, std::vector<double>> data = {
1777 {"A", {1.0, 2.0, 3.0}},
1778 {"B", {4.0, 5.0, 6.0}}
1779 };
1780 pandas::DataFrame df(data);
count (pd_test_1_all.cpp:66)
56 if (arr.is_na(0)) {
57 std::cout << " [FAIL] : in pd_test_boolean_array_na_handling() : is_na(0) should be false" << std::endl;
58 throw std::runtime_error("pd_test_boolean_array_na_handling failed: is_na(0) should be false");
59 }
60
61 if (!arr.has_na()) {
62 std::cout << " [FAIL] : in pd_test_boolean_array_na_handling() : has_na() should be true" << std::endl;
63 throw std::runtime_error("pd_test_boolean_array_na_handling failed: has_na() should be true");
64 }
65
66 if (arr.count() != 2) {
67 std::cout << " [FAIL] : in pd_test_boolean_array_na_handling() : count() should be 2" << std::endl;
68 throw std::runtime_error("pd_test_boolean_array_na_handling failed: count() should be 2");
69 }
70
71 std::cout << " -> tests passed" << std::endl;
72 }
73
74 void pd_test_boolean_array_kleene_and() {
75 std::cout << "========= BooleanArray: Kleene AND ======================= ";
cummax (pd_test_1_all.cpp:5152)
5142 // cummin: [1, 1, 1, 1]
5143 auto cmin = df.cummin();
5144 val = cmin["A"].get_value_str(3);
5145 passed = std::abs(std::stod(val) - 1.0) < 0.001;
5146 if (!passed) {
5147 std::cout << " [FAIL] : in pd_test_arithmetic_dataframe_cumulative() : cummin failed" << std::endl;
5148 throw std::runtime_error("pd_test_arithmetic_dataframe_cumulative failed: cummin failed");
5149 }
5150
5151 // cummax: [1, 2, 3, 4]
5152 auto cmax = df.cummax();
5153 val = cmax["A"].get_value_str(2);
5154 passed = std::abs(std::stod(val) - 3.0) < 0.001;
5155 if (!passed) {
5156 std::cout << " [FAIL] : in pd_test_arithmetic_dataframe_cumulative() : cummax failed" << std::endl;
5157 throw std::runtime_error("pd_test_arithmetic_dataframe_cumulative failed: cummax failed");
5158 }
5159
5160 std::cout << " -> tests passed" << std::endl;
5161 }
cummin (pd_test_1_all.cpp:5143)
5133 // cumprod: [1, 2, 6, 24]
5134 auto cp = df.cumprod();
5135 val = cp["A"].get_value_str(3);
5136 passed = std::abs(std::stod(val) - 24.0) < 0.001;
5137 if (!passed) {
5138 std::cout << " [FAIL] : in pd_test_arithmetic_dataframe_cumulative() : cumprod failed" << std::endl;
5139 throw std::runtime_error("pd_test_arithmetic_dataframe_cumulative failed: cumprod failed");
5140 }
5141
5142 // cummin: [1, 1, 1, 1]
5143 auto cmin = df.cummin();
5144 val = cmin["A"].get_value_str(3);
5145 passed = std::abs(std::stod(val) - 1.0) < 0.001;
5146 if (!passed) {
5147 std::cout << " [FAIL] : in pd_test_arithmetic_dataframe_cumulative() : cummin failed" << std::endl;
5148 throw std::runtime_error("pd_test_arithmetic_dataframe_cumulative failed: cummin failed");
5149 }
5150
5151 // cummax: [1, 2, 3, 4]
5152 auto cmax = df.cummax();
5153 val = cmax["A"].get_value_str(2);
cumprod (pd_test_1_all.cpp:5134)
5124 // cumsum: [1, 3, 6, 10]
5125 auto cs = df.cumsum();
5126 std::string val = cs["A"].get_value_str(2);
5127 bool passed = std::abs(std::stod(val) - 6.0) < 0.001;
5128 if (!passed) {
5129 std::cout << " [FAIL] : in pd_test_arithmetic_dataframe_cumulative() : cumsum failed" << std::endl;
5130 throw std::runtime_error("pd_test_arithmetic_dataframe_cumulative failed: cumsum failed");
5131 }
5132
5133 // cumprod: [1, 2, 6, 24]
5134 auto cp = df.cumprod();
5135 val = cp["A"].get_value_str(3);
5136 passed = std::abs(std::stod(val) - 24.0) < 0.001;
5137 if (!passed) {
5138 std::cout << " [FAIL] : in pd_test_arithmetic_dataframe_cumulative() : cumprod failed" << std::endl;
5139 throw std::runtime_error("pd_test_arithmetic_dataframe_cumulative failed: cumprod failed");
5140 }
5141
5142 // cummin: [1, 1, 1, 1]
5143 auto cmin = df.cummin();
5144 val = cmin["A"].get_value_str(3);
cumsum (pd_test_1_all.cpp:5125)
5115 }
5116
5117 void pd_test_arithmetic_dataframe_cumulative() {
5118 std::cout << "========= DataFrame cumulative ==================";
5119
5120 std::map<std::string, std::vector<double>> data;
5121 data["A"] = {1.0, 2.0, 3.0, 4.0};
5122 pandas::DataFrame df(data);
5123
5124 // cumsum: [1, 3, 6, 10]
5125 auto cs = df.cumsum();
5126 std::string val = cs["A"].get_value_str(2);
5127 bool passed = std::abs(std::stod(val) - 6.0) < 0.001;
5128 if (!passed) {
5129 std::cout << " [FAIL] : in pd_test_arithmetic_dataframe_cumulative() : cumsum failed" << std::endl;
5130 throw std::runtime_error("pd_test_arithmetic_dataframe_cumulative failed: cumsum failed");
5131 }
5132
5133 // cumprod: [1, 2, 6, 24]
5134 auto cp = df.cumprod();
5135 val = cp["A"].get_value_str(3);
describe (pd_test_2_all.cpp:19793)
19783 ++g_fail;
19784 }
19785}
19786
19787static bool approx_eq(double a, double b, double tol = 1e-9) {
19788 if (std::isnan(a) && std::isnan(b)) return true;
19789 return std::abs(a - b) < tol;
19790}
19791
19792// =====================================================================
19793// Test: describe() default mode — numeric columns only
19794// =====================================================================
19795
19796void pd_test_describe_numeric_only() {
19797 std::cout << " -- pd_test_describe_numeric_only --" << std::endl;
19798
19799 pandas::DataFrame df;
19800 df.add_column("A", std::vector<double>{1.0, 2.0, 3.0, 4.0, 5.0});
19801 df.add_column("B", std::vector<double>{10.0, 20.0, 30.0, 40.0, 50.0});
19802 df.add_column("Name", std::vector<std::string>{"a", "b", "c", "d", "e"});
describe_as_series (pd_test_3_all.cpp:25806)
25796 if (idx2 != "0.75") throw std::runtime_error("Expected index '0.75', got '" + idx2 + "'");
25797 // Check values (quantile of 1,2,3,4,5: q=0.5 should be 3.0)
25798 double v1 = result[1];
25799 if (std::abs(v1 - 3.0) > 1e-10) throw std::runtime_error("Expected median 3.0, got " + std::to_string(v1));
25800 std::cout << "PASSED" << std::endl;
25801}
25802
25803void pd_test_series_format_helpers_describe() {
25804 std::cout << " describe_as_series... ";
25805 ::pandas::Series<numpy::float64> s({1.0, 2.0, 3.0, 4.0, 5.0}, "test");
25806 auto result = s.describe_as_series();
25807 // Should have 8 rows: count, mean, std, min, 25%, 50%, 75%, max
25808 if (result.size() != 8) throw std::runtime_error("Expected 8 rows, got " + std::to_string(result.size()));
25809 // Check count = 5.0
25810 double count_val = result[0];
25811 if (std::abs(count_val - 5.0) > 1e-10) throw std::runtime_error("Expected count=5.0, got " + std::to_string(count_val));
25812 // Check index[0] = "count"
25813 std::string idx0 = result.index().get_value_str(0);
25814 if (idx0 != "count") throw std::runtime_error("Expected index[0]='count', got '" + idx0 + "'");
25815 std::cout << "PASSED" << std::endl;
25816}
kurt (pd_test_1_all.cpp:4599)
4589 std::cout << "========= Series skew/kurt ======================";
4590
4591 pandas::Series<double> s({1.0, 2.0, 2.0, 3.0, 9.0});
4592 auto skew_val = s.skew();
4593 bool passed = skew_val.has_value() && *skew_val > 0; // Should be right-skewed
4594 if (!passed) {
4595 std::cout << " [FAIL] : in pd_test_aggregation_series_skew_kurt() : skew should be positive" << std::endl;
4596 throw std::runtime_error("pd_test_aggregation_series_skew_kurt failed: skew should be positive");
4597 }
4598
4599 auto kurt_val = s.kurt();
4600 passed = kurt_val.has_value();
4601 if (!passed) {
4602 std::cout << " [FAIL] : in pd_test_aggregation_series_skew_kurt() : kurt should have value" << std::endl;
4603 throw std::runtime_error("pd_test_aggregation_series_skew_kurt failed: kurt should have value");
4604 }
4605
4606 // Test kurtosis alias
4607 auto kurt_alias = s.kurtosis();
4608 passed = kurt_alias.has_value() && std::abs(*kurt_alias - *kurt_val) < 0.0001;
4609 if (!passed) {
kurtosis (pd_test_1_all.cpp:4607)
4597 }
4598
4599 auto kurt_val = s.kurt();
4600 passed = kurt_val.has_value();
4601 if (!passed) {
4602 std::cout << " [FAIL] : in pd_test_aggregation_series_skew_kurt() : kurt should have value" << std::endl;
4603 throw std::runtime_error("pd_test_aggregation_series_skew_kurt failed: kurt should have value");
4604 }
4605
4606 // Test kurtosis alias
4607 auto kurt_alias = s.kurtosis();
4608 passed = kurt_alias.has_value() && std::abs(*kurt_alias - *kurt_val) < 0.0001;
4609 if (!passed) {
4610 std::cout << " [FAIL] : in pd_test_aggregation_series_skew_kurt() : kurtosis alias failed" << std::endl;
4611 throw std::runtime_error("pd_test_aggregation_series_skew_kurt failed: kurtosis alias failed");
4612 }
4613
4614 std::cout << " -> tests passed" << std::endl;
4615 }
4616
4617 void pd_test_aggregation_series_pct_change() {
max (pd_test_1_all.cpp:771)
761 pandas::CategoricalArray arr = pandas::CategoricalArray::from_codes(codes, cats, true); // ordered
762
763 // Test min
764 std::optional<std::string> min_val = arr.min();
765 if (!min_val.has_value() || *min_val != "low") {
766 std::cout << " [FAIL] : in pd_test_categorical_array_ordered_operations() : min != 'low'" << std::endl;
767 throw std::runtime_error("pd_test_categorical_array_ordered_operations failed: min != 'low'");
768 }
769
770 // Test max
771 std::optional<std::string> max_val = arr.max();
772 if (!max_val.has_value() || *max_val != "high") {
773 std::cout << " [FAIL] : in pd_test_categorical_array_ordered_operations() : max != 'high'" << std::endl;
774 throw std::runtime_error("pd_test_categorical_array_ordered_operations failed: max != 'high'");
775 }
776
777 // Test unordered throws for min/max
778 pandas::CategoricalArray unordered = arr.as_unordered();
779 bool threw = false;
780 try {
781 unordered.min();
mean (pd_test_1_all.cpp:282)
272 std::optional<bool>(true),
273 std::optional<bool>(true)
274 });
275
276 auto s = arr.sum();
277 if (!s.has_value() || s.value() != 3) {
278 std::cout << " [FAIL] : in pd_test_boolean_array_reductions() : sum should be 3" << std::endl;
279 throw std::runtime_error("pd_test_boolean_array_reductions failed: sum");
280 }
281
282 auto m = arr.mean();
283 if (!m.has_value() || std::abs(m.value() - 0.75) > 0.001) {
284 std::cout << " [FAIL] : in pd_test_boolean_array_reductions() : mean should be 0.75" << std::endl;
285 throw std::runtime_error("pd_test_boolean_array_reductions failed: mean");
286 }
287
288 std::cout << " -> tests passed" << std::endl;
289 }
290
291 void pd_test_boolean_array_dtype() {
292 std::cout << "========= BooleanArray: dtype ======================= ";
median (pd_test_1_all.cpp:20910)
20900 throw std::runtime_error("pd_test_expanding_var failed: expanding var values incorrect");
20901 }
20902
20903 std::cout << " -> tests passed" << std::endl;
20904 }
20905
20906 void pd_test_expanding_median() {
20907 std::cout << "========= Expanding median ======================";
20908
20909 pandas::Series<double> s({1.0, 2.0, 3.0, 4.0, 5.0});
20910 auto result = s.expanding().median();
20911
20912 // Expanding median: 1, 1.5, 2, 2.5, 3
20913 bool passed = std::abs(result[0] - 1.0) < 0.001 &&
20914 std::abs(result[1] - 1.5) < 0.001 &&
20915 std::abs(result[2] - 2.0) < 0.001 &&
20916 std::abs(result[3] - 2.5) < 0.001 &&
20917 std::abs(result[4] - 3.0) < 0.001;
20918 if (!passed) {
20919 std::cout << " [FAIL] : in pd_test_expanding_median() : expanding median values incorrect" << std::endl;
20920 throw std::runtime_error("pd_test_expanding_median failed: expanding median values incorrect");
min (pd_test_1_all.cpp:764)
754 }
755
756 void pd_test_categorical_array_ordered_operations() {
757 std::cout << "========= CategoricalArray: ordered operations (min/max) ======================= ";
758
759 std::vector<std::string> cats = {"low", "medium", "high"};
760 std::vector<numpy::int32> codes = {0, 2, 1, 0, -1}; // low, high, medium, low, NA
761 pandas::CategoricalArray arr = pandas::CategoricalArray::from_codes(codes, cats, true); // ordered
762
763 // Test min
764 std::optional<std::string> min_val = arr.min();
765 if (!min_val.has_value() || *min_val != "low") {
766 std::cout << " [FAIL] : in pd_test_categorical_array_ordered_operations() : min != 'low'" << std::endl;
767 throw std::runtime_error("pd_test_categorical_array_ordered_operations failed: min != 'low'");
768 }
769
770 // Test max
771 std::optional<std::string> max_val = arr.max();
772 if (!max_val.has_value() || *max_val != "high") {
773 std::cout << " [FAIL] : in pd_test_categorical_array_ordered_operations() : max != 'high'" << std::endl;
774 throw std::runtime_error("pd_test_categorical_array_ordered_operations failed: max != 'high'");
mode (pd_test_1_all.cpp:4569)
4559 throw std::runtime_error("pd_test_aggregation_series_quantile failed: quantile(1) should be 5.0");
4560 }
4561
4562 std::cout << " -> tests passed" << std::endl;
4563 }
4564
4565 void pd_test_aggregation_series_mode() {
4566 std::cout << "========= Series mode ===========================";
4567
4568 pandas::Series<int> s({1, 2, 2, 3, 3, 3});
4569 auto m = s.mode();
4570 bool passed = m.size() == 1 && m[0] == 3;
4571 if (!passed) {
4572 std::cout << " [FAIL] : in pd_test_aggregation_series_mode() : mode should be 3" << std::endl;
4573 throw std::runtime_error("pd_test_aggregation_series_mode failed: mode should be 3");
4574 }
4575
4576 // Test multi-mode
4577 pandas::Series<int> s2({1, 1, 2, 2});
4578 auto m2 = s2.mode();
4579 passed = m2.size() == 2; // Both 1 and 2 are modes
nunique (pd_test_1_all.cpp:10604)
10594 std::cout << " -> tests passed" << std::endl;
10595}
10596
10597void pd_test_extension_index_nunique() {
10598 std::cout << "========= nunique =========================";
10599
10600 pandas::CategoricalArray arr({"a", "b", "a", "c", "b", std::nullopt});
10601 pandas::CategoricalIndex idx(arr);
10602
10603 bool passed = (idx.nunique(true) == 3 && idx.nunique(false) == 4);
10604 if (!passed) {
10605 std::cout << " [FAIL] : in pd_test_extension_index_nunique() : nunique check failed" << std::endl;
10606 throw std::runtime_error("pd_test_extension_index_nunique failed");
10607 }
10608
10609 std::cout << " -> tests passed" << std::endl;
10610}
10611
10612void pd_test_extension_index_factorize() {
10613 std::cout << "========= factorize =========================";
prod (pd_test_1_all.cpp:26082)
26072 std::cout << "====================================== [OK] pd_test_pivot_table test suite ========================== " << std::endl;
26073 return 0;
26074 }
26075
26076} // namespace dataframe_tests
26077// ------------------- pd_test_pivot_table.cpp (end) -----------------------------
26078
26079// ------------------- pd_test_prod.cpp (start) -----------------------------
26080// dataframe_tests/pd_test_prod.cpp
26081// Tests for DataFrame.prod() and DataFrame.prod_cols() methods
26082
26083#include <iostream>
26084#include <stdexcept>
26085#include <cmath>
26086#include <limits>
26087#include "../pandas/pd_dataframe.h"
26088
26089// CRITICAL: No using namespace directives
26090
26091namespace dataframe_tests {
product (pd_test_3_all.cpp:2584)
2574 // Test quantile along rows
2575 pandas::Series<numpy::float64> q50_rows = df.quantile(0.5, 1);
2576 if (q50_rows.size() != 5) {
2577 throw std::runtime_error("quantile(0.5, axis=1) failed");
2578 }
2579
2580 std::cout << " -> tests passed" << std::endl;
2581}
2582
2583void pd_test_3_all_df_product() {
2584 std::cout << "========= DataFrame.product(axis) ========================";
2585
2586 std::map<std::string, std::vector<double>> data = {
2587 {"A", {1.0, 2.0, 3.0}},
2588 {"B", {4.0, 5.0, 6.0}}
2589 };
2590 pandas::DataFrame df(data);
2591
2592 // Test product along columns
2593 pandas::Series<numpy::float64> prod_cols = df.product(0);
2594 if (prod_cols.size() != 2 || std::abs(prod_cols[static_cast<size_t>(0)] - 6.0) > 0.001 ||
quantile (pd_test_1_all.cpp:4540)
4530 throw std::runtime_error("pd_test_aggregation_series_sem failed: sem value incorrect");
4531 }
4532
4533 std::cout << " -> tests passed" << std::endl;
4534 }
4535
4536 void pd_test_aggregation_series_quantile() {
4537 std::cout << "========= Series quantile =======================";
4538
4539 pandas::Series<double> s({1.0, 2.0, 3.0, 4.0, 5.0});
4540 auto q50 = s.quantile(0.5);
4541 bool passed = q50.has_value() && std::abs(*q50 - 3.0) < 0.001;
4542 if (!passed) {
4543 std::cout << " [FAIL] : in pd_test_aggregation_series_quantile() : quantile(0.5) should be 3.0" << std::endl;
4544 throw std::runtime_error("pd_test_aggregation_series_quantile failed: quantile(0.5) should be 3.0");
4545 }
4546
4547 // Test q=0 and q=1
4548 auto q0 = s.quantile(0.0);
4549 passed = q0.has_value() && std::abs(*q0 - 1.0) < 0.001;
4550 if (!passed) {
quantile_list (pd_test_3_all.cpp:25788)
25778 std::string r2 = ::pandas::display::format_quantile_label(0.5);
25779 if (r2 != "0.5") throw std::runtime_error("Expected '0.5', got '" + r2 + "'");
25780 std::string r3 = ::pandas::display::format_quantile_label(0.75);
25781 if (r3 != "0.75") throw std::runtime_error("Expected '0.75', got '" + r3 + "'");
25782 std::cout << "PASSED" << std::endl;
25783}
25784
25785void pd_test_series_format_helpers_quantile_list() {
25786 std::cout << " quantile_list... ";
25787 ::pandas::Series<numpy::float64> s({1.0, 2.0, 3.0, 4.0, 5.0}, "test");
25788 auto result = s.quantile_list({0.25, 0.5, 0.75});
25789 if (result.size() != 3) throw std::runtime_error("Expected 3 values, got " + std::to_string(result.size()));
25790 // Check index labels
25791 std::string idx0 = result.index().get_value_str(0);
25792 std::string idx1 = result.index().get_value_str(1);
25793 std::string idx2 = result.index().get_value_str(2);
25794 if (idx0 != "0.25") throw std::runtime_error("Expected index '0.25', got '" + idx0 + "'");
25795 if (idx1 != "0.5") throw std::runtime_error("Expected index '0.5', got '" + idx1 + "'");
25796 if (idx2 != "0.75") throw std::runtime_error("Expected index '0.75', got '" + idx2 + "'");
25797 // Check values (quantile of 1,2,3,4,5: q=0.5 should be 3.0)
25798 double v1 = result[1];
sem (pd_test_1_all.cpp:4525)
4515#include "../pandas/pd_dataframe.h"
4516#include "../pandas/pd_series.h"
4517
4518namespace dataframe_tests {
4519 namespace dataframe_tests_aggregation {
4520
4521 void pd_test_aggregation_series_sem() {
4522 std::cout << "========= Series sem ============================";
4523
4524 pandas::Series<double> s({1.0, 2.0, 3.0, 4.0, 5.0});
4525 auto sem_val = s.sem();
4526 // std(ddof=1) = sqrt(2.5), sem = sqrt(2.5)/sqrt(5) ≈ 0.707
4527 bool passed = sem_val.has_value() && std::abs(*sem_val - 0.707) < 0.01;
4528 if (!passed) {
4529 std::cout << " [FAIL] : in pd_test_aggregation_series_sem() : sem value incorrect" << std::endl;
4530 throw std::runtime_error("pd_test_aggregation_series_sem failed: sem value incorrect");
4531 }
4532
4533 std::cout << " -> tests passed" << std::endl;
4534 }
skew (pd_test_1_all.cpp:4592)
4582 throw std::runtime_error("pd_test_aggregation_series_mode failed: multi-mode should return 2 values");
4583 }
4584
4585 std::cout << " -> tests passed" << std::endl;
4586 }
4587
4588 void pd_test_aggregation_series_skew_kurt() {
4589 std::cout << "========= Series skew/kurt ======================";
4590
4591 pandas::Series<double> s({1.0, 2.0, 2.0, 3.0, 9.0});
4592 auto skew_val = s.skew();
4593 bool passed = skew_val.has_value() && *skew_val > 0; // Should be right-skewed
4594 if (!passed) {
4595 std::cout << " [FAIL] : in pd_test_aggregation_series_skew_kurt() : skew should be positive" << std::endl;
4596 throw std::runtime_error("pd_test_aggregation_series_skew_kurt failed: skew should be positive");
4597 }
4598
4599 auto kurt_val = s.kurt();
4600 passed = kurt_val.has_value();
4601 if (!passed) {
4602 std::cout << " [FAIL] : in pd_test_aggregation_series_skew_kurt() : kurt should have value" << std::endl;
std_ (pd_test_1_all.cpp:20752)
20742 throw std::runtime_error("pd_test_rolling_min_periods failed: with min_periods=1, idx 1 should be 3.0");
20743 }
20744
20745 std::cout << " -> tests passed" << std::endl;
20746 }
20747
20748 void pd_test_rolling_std() {
20749 std::cout << "========= Rolling std ===========================";
20750
20751 pandas::Series<double> s({1.0, 2.0, 3.0, 4.0, 5.0});
20752 auto result = s.rolling(3).std_();
20753
20754 // std([1,2,3]) = 1.0 (ddof=1)
20755 // std([2,3,4]) = 1.0
20756 // std([3,4,5]) = 1.0
20757 bool passed = std::abs(result[2] - 1.0) < 0.001;
20758 if (!passed) {
20759 std::cout << " [FAIL] : in pd_test_rolling_std() : rolling std should be 1.0" << std::endl;
20760 throw std::runtime_error("pd_test_rolling_std failed: rolling std should be 1.0");
20761 }
sum (pd_test_1_all.cpp:276)
266 }
267
268 // Test sum/mean
269 pandas::BooleanArray arr({
270 std::optional<bool>(true),
271 std::optional<bool>(false),
272 std::optional<bool>(true),
273 std::optional<bool>(true)
274 });
275
276 auto s = arr.sum();
277 if (!s.has_value() || s.value() != 3) {
278 std::cout << " [FAIL] : in pd_test_boolean_array_reductions() : sum should be 3" << std::endl;
279 throw std::runtime_error("pd_test_boolean_array_reductions failed: sum");
280 }
281
282 auto m = arr.mean();
283 if (!m.has_value() || std::abs(m.value() - 0.75) > 0.001) {
284 std::cout << " [FAIL] : in pd_test_boolean_array_reductions() : mean should be 0.75" << std::endl;
285 throw std::runtime_error("pd_test_boolean_array_reductions failed: mean");
286 }
value_counts (pd_test_1_all.cpp:865)
855 std::vector<std::optional<std::string>> values = {
856 std::optional<std::string>("a"),
857 std::optional<std::string>("b"),
858 std::optional<std::string>("a"),
859 std::optional<std::string>("a"),
860 std::optional<std::string>("b"),
861 std::nullopt // NA not counted
862 };
863 pandas::CategoricalArray arr(values);
864
865 auto [cats, counts] = arr.value_counts();
866
867 // Should have 2 categories
868 if (cats.size() != 2 || counts.size() != 2) {
869 std::cout << " [FAIL] : in pd_test_categorical_array_value_counts() : wrong size" << std::endl;
870 throw std::runtime_error("pd_test_categorical_array_value_counts failed: wrong size");
871 }
872
873 // Find 'a' count
874 int64_t a_count = 0, b_count = 0;
875 for (size_t i = 0; i < cats.size(); ++i) {
var (pd_test_1_all.cpp:20890)
20880 throw std::runtime_error("pd_test_expanding_std failed: expanding std values incorrect");
20881 }
20882
20883 std::cout << " -> tests passed" << std::endl;
20884 }
20885
20886 void pd_test_expanding_var() {
20887 std::cout << "========= Expanding var =========================";
20888
20889 pandas::Series<double> s({1.0, 2.0, 3.0, 4.0, 5.0});
20890 auto result = s.expanding().var();
20891
20892 // Expanding var (ddof=1): NaN, 0.5, 1.0, 1.6667, 2.5
20893 bool passed = std::isnan(result[0]) &&
20894 std::abs(result[1] - 0.5) < 0.001 &&
20895 std::abs(result[2] - 1.0) < 0.001 &&
20896 std::abs(result[3] - 1.6667) < 0.001 &&
20897 std::abs(result[4] - 2.5) < 0.001;
20898 if (!passed) {
20899 std::cout << " [FAIL] : in pd_test_expanding_var() : expanding var values incorrect" << std::endl;
20900 throw std::runtime_error("pd_test_expanding_var failed: expanding var values incorrect");
agg (pd_test_1_all.cpp:11100)
11090 }
11091
11092 void pd_test_func_apply_series_agg() {
11093 std::cout << "========= Series agg ==================================";
11094
11095 pandas::Series<double> s({1.0, 2.0, 3.0, 4.0, 5.0}, "values");
11096
11097 bool passed = true;
11098
11099 // Test string-based aggregation
11100 auto sum_result = s.agg("sum");
11101 if (!sum_result.has_value() || !approx_equal(sum_result.value(), 15.0)) {
11102 passed = false;
11103 std::cout << " [FAIL] : in pd_test_func_apply_series_agg() : sum failed" << std::endl;
11104 throw std::runtime_error("pd_test_func_apply_series_agg failed: sum failed");
11105 }
11106
11107 auto mean_result = s.agg("mean");
11108 if (!mean_result.has_value() || !approx_equal(mean_result.value(), 3.0)) {
11109 passed = false;
11110 std::cout << " [FAIL] : in pd_test_func_apply_series_agg() : mean failed" << std::endl;
agg (pd_test_1_all.cpp:11100)
11090 }
11091
11092 void pd_test_func_apply_series_agg() {
11093 std::cout << "========= Series agg ==================================";
11094
11095 pandas::Series<double> s({1.0, 2.0, 3.0, 4.0, 5.0}, "values");
11096
11097 bool passed = true;
11098
11099 // Test string-based aggregation
11100 auto sum_result = s.agg("sum");
11101 if (!sum_result.has_value() || !approx_equal(sum_result.value(), 15.0)) {
11102 passed = false;
11103 std::cout << " [FAIL] : in pd_test_func_apply_series_agg() : sum failed" << std::endl;
11104 throw std::runtime_error("pd_test_func_apply_series_agg failed: sum failed");
11105 }
11106
11107 auto mean_result = s.agg("mean");
11108 if (!mean_result.has_value() || !approx_equal(mean_result.value(), 3.0)) {
11109 passed = false;
11110 std::cout << " [FAIL] : in pd_test_func_apply_series_agg() : mean failed" << std::endl;
agg (pd_test_1_all.cpp:11100)
11090 }
11091
11092 void pd_test_func_apply_series_agg() {
11093 std::cout << "========= Series agg ==================================";
11094
11095 pandas::Series<double> s({1.0, 2.0, 3.0, 4.0, 5.0}, "values");
11096
11097 bool passed = true;
11098
11099 // Test string-based aggregation
11100 auto sum_result = s.agg("sum");
11101 if (!sum_result.has_value() || !approx_equal(sum_result.value(), 15.0)) {
11102 passed = false;
11103 std::cout << " [FAIL] : in pd_test_func_apply_series_agg() : sum failed" << std::endl;
11104 throw std::runtime_error("pd_test_func_apply_series_agg failed: sum failed");
11105 }
11106
11107 auto mean_result = s.agg("mean");
11108 if (!mean_result.has_value() || !approx_equal(mean_result.value(), 3.0)) {
11109 passed = false;
11110 std::cout << " [FAIL] : in pd_test_func_apply_series_agg() : mean failed" << std::endl;
agg_with_dtype (pd_test_5_all.cpp:94652)
94642static void run_dfgb_case(const std::string& fn,
94643 const std::string& col,
94644 const std::string& expected_dtype,
94645 const std::string& label,
94646 int& local_fail) {
94647 pandas::DataFrame df = make_mixed_df();
94648 auto gb = df.groupby("key");
94649 pandas::DataFrame out;
94650 std::string err;
94651 try {
94652 out = gb.agg_with_dtype(fn);
94653 } catch (const std::exception& e) {
94654 err = e.what();
94655 } catch (...) {
94656 err = "<unknown>";
94657 }
94658 pandas_tests::check(err.empty(),
94659 label + "_no_throw",
94660 local_fail);
94661 if (!err.empty()) {
94662 std::cout << " err: " << err << "\n";
aggregate (pd_test_1_all.cpp:11139)
11129 auto custom_agg = s.agg([](const std::vector<double>& v) {
11130 return std::accumulate(v.begin(), v.end(), 0.0) / v.size();
11131 });
11132 if (!approx_equal(custom_agg, 3.0)) {
11133 passed = false;
11134 std::cout << " [FAIL] : in pd_test_func_apply_series_agg() : custom agg failed" << std::endl;
11135 throw std::runtime_error("pd_test_func_apply_series_agg failed: custom agg failed");
11136 }
11137
11138 // Test aggregate alias
11139 auto alias_result = s.aggregate("sum");
11140 if (!alias_result.has_value() || !approx_equal(alias_result.value(), 15.0)) {
11141 passed = false;
11142 std::cout << " [FAIL] : in pd_test_func_apply_series_agg() : aggregate alias failed" << std::endl;
11143 throw std::runtime_error("pd_test_func_apply_series_agg failed: aggregate alias failed");
11144 }
11145
11146 std::cout << " -> tests passed" << std::endl;
11147 }
11148
11149 void pd_test_func_apply_series_pipe() {
aggregate (pd_test_1_all.cpp:11139)
11129 auto custom_agg = s.agg([](const std::vector<double>& v) {
11130 return std::accumulate(v.begin(), v.end(), 0.0) / v.size();
11131 });
11132 if (!approx_equal(custom_agg, 3.0)) {
11133 passed = false;
11134 std::cout << " [FAIL] : in pd_test_func_apply_series_agg() : custom agg failed" << std::endl;
11135 throw std::runtime_error("pd_test_func_apply_series_agg failed: custom agg failed");
11136 }
11137
11138 // Test aggregate alias
11139 auto alias_result = s.aggregate("sum");
11140 if (!alias_result.has_value() || !approx_equal(alias_result.value(), 15.0)) {
11141 passed = false;
11142 std::cout << " [FAIL] : in pd_test_func_apply_series_agg() : aggregate alias failed" << std::endl;
11143 throw std::runtime_error("pd_test_func_apply_series_agg failed: aggregate alias failed");
11144 }
11145
11146 std::cout << " -> tests passed" << std::endl;
11147 }
11148
11149 void pd_test_func_apply_series_pipe() {
aggregate (pd_test_1_all.cpp:11139)
11129 auto custom_agg = s.agg([](const std::vector<double>& v) {
11130 return std::accumulate(v.begin(), v.end(), 0.0) / v.size();
11131 });
11132 if (!approx_equal(custom_agg, 3.0)) {
11133 passed = false;
11134 std::cout << " [FAIL] : in pd_test_func_apply_series_agg() : custom agg failed" << std::endl;
11135 throw std::runtime_error("pd_test_func_apply_series_agg failed: custom agg failed");
11136 }
11137
11138 // Test aggregate alias
11139 auto alias_result = s.aggregate("sum");
11140 if (!alias_result.has_value() || !approx_equal(alias_result.value(), 15.0)) {
11141 passed = false;
11142 std::cout << " [FAIL] : in pd_test_func_apply_series_agg() : aggregate alias failed" << std::endl;
11143 throw std::runtime_error("pd_test_func_apply_series_agg failed: aggregate alias failed");
11144 }
11145
11146 std::cout << " -> tests passed" << std::endl;
11147 }
11148
11149 void pd_test_func_apply_series_pipe() {
apply (pd_test_1_all.cpp:11244)
11234 void pd_test_func_apply_dataframe_apply_axis0() {
11235 std::cout << "========= DataFrame apply axis=0 ======================";
11236
11237 std::map<std::string, std::vector<double>> data = {
11238 {"A", {1.0, 2.0, 3.0}},
11239 {"B", {4.0, 5.0, 6.0}}
11240 };
11241 pandas::DataFrame df(data);
11242
11243 // apply axis=0 applies function to each column
11244 auto result = df.apply([](const std::vector<double>& col) {
11245 return std::accumulate(col.begin(), col.end(), 0.0);
11246 }, 0);
11247
11248 bool passed = true;
11249
11250 // Plan F·dtype: axis=0 reduce now returns a single "result" column
11251 // with the original column names ("A", "B") as the row index.
11252 // Sum of A: 1+2+3=6, Sum of B: 4+5+6=15
11253 const auto& result_col = result["result"];
11254 double sum_a = std::stod(result_col.get_value_str(0));
apply (pd_test_1_all.cpp:11244)
11234 void pd_test_func_apply_dataframe_apply_axis0() {
11235 std::cout << "========= DataFrame apply axis=0 ======================";
11236
11237 std::map<std::string, std::vector<double>> data = {
11238 {"A", {1.0, 2.0, 3.0}},
11239 {"B", {4.0, 5.0, 6.0}}
11240 };
11241 pandas::DataFrame df(data);
11242
11243 // apply axis=0 applies function to each column
11244 auto result = df.apply([](const std::vector<double>& col) {
11245 return std::accumulate(col.begin(), col.end(), 0.0);
11246 }, 0);
11247
11248 bool passed = true;
11249
11250 // Plan F·dtype: axis=0 reduce now returns a single "result" column
11251 // with the original column names ("A", "B") as the row index.
11252 // Sum of A: 1+2+3=6, Sum of B: 4+5+6=15
11253 const auto& result_col = result["result"];
11254 double sum_a = std::stod(result_col.get_value_str(0));
apply (pd_test_1_all.cpp:11244)
11234 void pd_test_func_apply_dataframe_apply_axis0() {
11235 std::cout << "========= DataFrame apply axis=0 ======================";
11236
11237 std::map<std::string, std::vector<double>> data = {
11238 {"A", {1.0, 2.0, 3.0}},
11239 {"B", {4.0, 5.0, 6.0}}
11240 };
11241 pandas::DataFrame df(data);
11242
11243 // apply axis=0 applies function to each column
11244 auto result = df.apply([](const std::vector<double>& col) {
11245 return std::accumulate(col.begin(), col.end(), 0.0);
11246 }, 0);
11247
11248 bool passed = true;
11249
11250 // Plan F·dtype: axis=0 reduce now returns a single "result" column
11251 // with the original column names ("A", "B") as the row index.
11252 // Sum of A: 1+2+3=6, Sum of B: 4+5+6=15
11253 const auto& result_col = result["result"];
11254 double sum_a = std::stod(result_col.get_value_str(0));
apply_dispatch (pd_test_5_all.cpp:53781)
53771 << " actual dtype: [" << dts[0] << "]\n";
53772 }
53773}
53774
53775static void f_series_apply_dispatch_502719_case_H1_dispatch_callable_square_int(int& local_fail) {
53776 std::cout << "-- case_H1_dispatch_callable_square_int\n";
53777 pandas::Series<std::int64_t> s({2, 3, 4}, "v");
53778 auto h = dispatch_test_helpers::make_stub_callable(
53779 dispatch_test_helpers::test_cell_int_cb([](double x) { return static_cast<std::int64_t>(x * x); }),
53780 "square");
53781 pandas::Result r = s.apply_dispatch(pandas::FuncArg::from_callable_handle(h));
53782 bool got = std::holds_alternative<std::unique_ptr<pandas::Series<numpy::float64>>>(r.value);
53783 pandas_tests::check(got, "case_H1.is_series_float64", local_fail);
53784 if (!got) return;
53785 auto& sp = std::get<std::unique_ptr<pandas::Series<numpy::float64>>>(r.value);
53786 bool ok = (sp->size() == 3) && (*sp)[0] == 4.0 && (*sp)[1] == 9.0 && (*sp)[2] == 16.0;
53787 pandas_tests::check(ok, "case_H1.values_squared", local_fail);
53788}
53789
53790static void f_series_apply_dispatch_502719_case_H2_dispatch_callable_half_float(int& local_fail) {
53791 std::cout << "-- case_H2_dispatch_callable_half_float\n";
apply_ns_transform (pd_test_4_all.cpp:6365)
6355 auto neg = me.negate();
6356 auto b = s.add_dateoffset(*neg);
6357 EXPECT(a.size() == b.size());
6358 for (size_t i = 0; i < a.size(); ++i) {
6359 EXPECT(static_cast<int64_t>(a[i]) == static_cast<int64_t>(b[i]));
6360 }
6361}
6362
6363void test_apply_ns_transform_identity() {
6364 auto s = make_dt_series({1, 2, 3});
6365 auto out = s.apply_ns_transform([](int64_t x) { return x; }, "datetime64[ns]");
6366 EXPECT(out.size() == 3);
6367 for (size_t i = 0; i < 3; ++i) {
6368 EXPECT(static_cast<int64_t>(out[i]) == static_cast<int64_t>(s[i]));
6369 }
6370 EXPECT(out.dtype_name() == "datetime64[ns]");
6371}
6372
6373void test_tz_aware_calendar_preserves_override() {
6374 auto s = make_dt_series({0});
6375 s.set_dtype_override("datetime64[ns, UTC]");
apply_resolved_typed (pd_test_5_all.cpp:98141)
98131 switch (cid) {
98132 case CbId::Int: cb = cb_int(hist); break;
98133 case CbId::Bool: cb = cb_bool(hist); break;
98134 case CbId::Float: cb = cb_float(hist); break;
98135 case CbId::Str: cb = cb_string(hist); break;
98136 case CbId::Mixed: cb = cb_mixed(hist); break;
98137 }
98138
98139 pandas::Result r;
98140 try {
98141 r = s.apply_resolved_typed(cb, hist);
98142 } catch (const std::exception& e) {
98143 std::string tag = std::string("apply src=") + src_name(sid) +
98144 " cb=" + cb_name(cid) + " mode=" + mode_name(mid);
98145 std::cout << "[FAIL] : in f_27a_core_3094022_apply_resolved_typed_post_cb_dtype() "
98146 << tag << " unexpected exception: " << e.what() << "\n";
98147 ++pandas_tests::g_failed; ++local_fail;
98148 ++pandas_tests::g_failed; ++local_fail;
98149 ++pandas_tests::g_failed; ++local_fail;
98150 return;
98151 }
apply_with_args (pd_test_3_all.cpp:16993)
16983 }
16984 }
16985
16986 if (!passed) {
16987 throw std::runtime_error("pd_test_apply_axis1_broadcast failed");
16988 }
16989 std::cout << " -> tests passed" << std::endl;
16990}
16991
16992void pd_test_apply_with_args() {
16993 std::cout << "========= DataFrame.apply_with_args() =================";
16994
16995 std::map<std::string, std::vector<double>> data = {
16996 {"A", {1.0, 2.0, 3.0}},
16997 {"B", {4.0, 5.0, 6.0}}
16998 };
16999 pandas::DataFrame df(data);
17000
17001 // Apply with additional argument: multiply sum by factor
17002 auto result = df.apply_with_args(
17003 [](const std::vector<double>& col, double factor) {
ewm (pd_test_3_all.cpp:2961)
2951 // Test expanding sum
2952 pandas::DataFrame expanding_sum = df.expanding().sum();
2953 if (expanding_sum.nrows() != 5 || expanding_sum.ncols() != 2) {
2954 throw std::runtime_error("expanding().sum() shape failed");
2955 }
2956
2957 std::cout << " -> tests passed" << std::endl;
2958}
2959
2960void pd_test_3_all_df_ewm() {
2961 std::cout << "========= DataFrame.ewm() ================================";
2962
2963 std::map<std::string, std::vector<double>> data = {
2964 {"A", {1.0, 2.0, 3.0, 4.0, 5.0}},
2965 {"B", {10.0, 20.0, 30.0, 40.0, 50.0}}
2966 };
2967 pandas::DataFrame df(data);
2968
2969 // Test ewm mean with span=3
2970 pandas::DataFrame ewm_mean = df.ewm(std::nullopt, 3.0).mean();
2971 if (ewm_mean.nrows() != 5 || ewm_mean.ncols() != 2) {
ewm_full (pd_test_3_all.cpp:9903)
9893 // Test with span using legacy overload
9894 auto ewm1 = s.ewm(3.0);
9895 auto result1 = ewm1.mean();
9896 if (result1.size() != 5) {
9897 std::cout << " [FAIL] : ewm with span returned wrong size" << std::endl;
9898 throw std::runtime_error("pd_test_3_all_phase2_ewm_params failed");
9899 }
9900
9901 // Test with alpha using ewm_full
9902 auto ewm2 = s.ewm_full(std::nullopt, std::nullopt, std::nullopt, 0.5);
9903 auto result2 = ewm2.mean();
9904 if (result2.size() != 5) {
9905 std::cout << " [FAIL] : ewm with alpha returned wrong size" << std::endl;
9906 throw std::runtime_error("pd_test_3_all_phase2_ewm_params failed");
9907 }
9908
9909 std::cout << " -> tests passed" << std::endl;
9910}
9911
9912void pd_test_3_all_phase2_combine_params() {
ewm_span (pd_test_1_all.cpp:21167)
21157 std::cout << " -> tests passed" << std::endl;
21158 }
21159
21160 void pd_test_ewm_span() {
21161 std::cout << "========= EWM span ==============================";
21162
21163 pandas::Series<double> s({1.0, 2.0, 3.0, 4.0, 5.0});
21164
21165 // EWM with span=3 => alpha = 2/(3+1) = 0.5
21166 auto result = s.ewm_span(3.0).mean();
21167
21168 // Check that result has correct size
21169 bool passed = result.size() == 5;
21170 if (!passed) {
21171 std::cout << " [FAIL] : in pd_test_ewm_span() : result size should be 5" << std::endl;
21172 throw std::runtime_error("pd_test_ewm_span failed: result size should be 5");
21173 }
21174
21175 // First value should be equal to original (no weighting yet)
21176 passed = std::abs(result[0] - 1.0) < 0.001;
expanding (pd_test_1_all.cpp:20770)
20760 throw std::runtime_error("pd_test_rolling_std failed: rolling std should be 1.0");
20761 }
20762
20763 std::cout << " -> tests passed" << std::endl;
20764 }
20765
20766 void pd_test_expanding_sum() {
20767 std::cout << "========= Expanding sum =========================";
20768
20769 pandas::Series<double> s({1.0, 2.0, 3.0, 4.0, 5.0});
20770 auto result = s.expanding().sum();
20771
20772 // Cumulative sum: 1, 3, 6, 10, 15
20773 bool passed = std::abs(result[0] - 1.0) < 0.001 &&
20774 std::abs(result[1] - 3.0) < 0.001 &&
20775 std::abs(result[2] - 6.0) < 0.001 &&
20776 std::abs(result[3] - 10.0) < 0.001 &&
20777 std::abs(result[4] - 15.0) < 0.001;
20778 if (!passed) {
20779 std::cout << " [FAIL] : in pd_test_expanding_sum() : expanding sum values incorrect" << std::endl;
20780 throw std::runtime_error("pd_test_expanding_sum failed: expanding sum values incorrect");
groupby (pd_test_1_all.cpp:11495)
11485 std::cout << "========= GroupBy basic =========================";
11486
11487 // Create DataFrame with category column
11488 std::map<std::string, std::vector<double>> data = {
11489 {"category", {1.0, 1.0, 2.0, 2.0, 2.0}},
11490 {"value", {10.0, 20.0, 30.0, 40.0, 50.0}}
11491 };
11492 pandas::DataFrame df(data);
11493
11494 // Test groupby
11495 auto grouped = df.groupby("category");
11496
11497 bool passed = grouped.ngroups() == 2;
11498 if (!passed) {
11499 std::cout << " [FAIL] : in pd_test_groupby_basic() : ngroups should be 2" << std::endl;
11500 throw std::runtime_error("pd_test_groupby_basic failed: ngroups should be 2");
11501 }
11502
11503 std::cout << " -> tests passed" << std::endl;
11504 }
groupby_by_callable (pd_test_4_all.cpp:6412)
6402 EXPECT(static_cast<int64_t>(out[1]) - (a + add) <= 2);
6403}
6404
6405void test_groupby_by_callable_int_index() {
6406 std::vector<numpy::float64> v = {1.0, 2.0, 3.0, 4.0};
6407 pandas::Series<numpy::float64> s(v);
6408 auto convert = [](size_t /*i*/, const std::string& label, bool /*hint*/) -> std::string {
6409 int64_t k = std::stoll(label);
6410 return std::to_string(k % 2);
6411 };
6412 auto gb = s.groupby_by_callable(convert, true);
6413 EXPECT(gb.ngroups() == 2);
6414}
6415
6416void test_groupby_by_callable_empty() {
6417 pandas::Series<numpy::float64> s(std::vector<numpy::float64>{});
6418 int calls = 0;
6419 auto convert = [&](size_t, const std::string&, bool) -> std::string { ++calls; return ""; };
6420 auto gb = s.groupby_by_callable(convert, true);
6421 EXPECT(calls == 0);
6422 EXPECT(gb.ngroups() == 0);
groupby_by_categorical (pd_test_3_all.cpp:23500)
23490 auto sums = gb.sum();
23491 if (sums[0] != 40.0 || sums[1] != 60.0)
23492 throw std::runtime_error("sum mismatch");
23493 if (gb.grouper_dtype() != "float64")
23494 throw std::runtime_error("grouper_dtype mismatch");
23495
23496 std::cout << " -> tests passed" << std::endl;
23497}
23498
23499void pd_test_groupby_by_categorical() {
23500 std::cout << "========= groupby_by_categorical() ====================";
23501
23502 pandas::Series<numpy::float64> s({10.0, 20.0, 30.0});
23503 pandas::CategoricalArray cat({"a", "b", "a"}, {"a", "b", "c"});
23504
23505 auto gb_obs = s.groupby_by_categorical(cat, true, true);
23506 if (gb_obs.group_keys_order().size() != 2)
23507 throw std::runtime_error("expected 2 observed groups");
23508
23509 auto gb_all = s.groupby_by_categorical(cat, true, false);
23510 if (gb_all.group_keys_order().size() != 3)
groupby_by_index (pd_test_3_all.cpp:23426)
23416 auto gb = s.groupby_by_level(levels, true);
23417 if (gb.group_keys_order().size() != 4)
23418 throw std::runtime_error("expected 4 composite groups");
23419 if (gb.multiindex_names().size() != 2 || gb.multiindex_names()[0] != "L0" || gb.multiindex_names()[1] != "L1")
23420 throw std::runtime_error("multiindex names mismatch");
23421
23422 std::cout << " -> tests passed" << std::endl;
23423}
23424
23425void pd_test_groupby_by_index() {
23426 std::cout << "========= groupby_by_index() ==========================";
23427
23428 pandas::Series<numpy::float64> s({10.0, 20.0, 30.0});
23429 s.set_index(pandas::Index<std::string>({"a", "b", "a"}));
23430 auto gb = s.groupby_by_index(true, true);
23431 if (gb.group_keys_order().size() != 2)
23432 throw std::runtime_error("expected 2 groups");
23433 auto sums = gb.sum();
23434 if (sums[0] != 40.0 || sums[1] != 20.0)
23435 throw std::runtime_error("sum mismatch");
groupby_by_labels (pd_test_3_all.cpp:23520)
23510 if (gb_all.group_keys_order().size() != 3)
23511 throw std::runtime_error("expected 3 groups with observed=false");
23512
23513 if (gb_obs.categorical_categories().size() != 3)
23514 throw std::runtime_error("categorical_categories not set");
23515
23516 std::cout << " -> tests passed" << std::endl;
23517}
23518
23519void pd_test_groupby_by_labels() {
23520 std::cout << "========= groupby_by_labels() =========================";
23521
23522 pandas::Series<numpy::float64> s({1.0, 2.0, 3.0, 4.0});
23523 std::vector<std::string> labels = {"X", "Y", "X", "Y"};
23524 auto gb = s.groupby_by_labels(labels, "object", true);
23525 auto sums = gb.sum();
23526 if (sums[0] != 4.0 || sums[1] != 6.0)
23527 throw std::runtime_error("sum mismatch");
23528 if (gb.grouper_dtype() != "object")
23529 throw std::runtime_error("grouper_dtype mismatch");
groupby_by_level (pd_test_3_all.cpp:23382)
23372int pd_test_df_construct_mi_main() {
23373 return dataframe_tests_df_construct_mi::pd_test_df_construct_mi_main();
23374}
23375// ------------------- pd_test_df_construct_mi (end) ---------------------------
23376
23377// ------------------- pd_test_groupby_level_dispatch.cpp (start) ---------------------------
23378namespace dataframe_tests_groupby_level_dispatch {
23379
23380void pd_test_groupby_level_single() {
23381 std::cout << "========= groupby_by_level(single) ====================";
23382
23383 pandas::Series<numpy::float64> s({10.0, 20.0, 30.0, 40.0});
23384 std::vector<std::vector<std::string>> level_values = {
23385 {"a", "a", "b", "b"}, {"x", "y", "x", "y"}
23386 };
23387 std::vector<std::optional<std::string>> level_names = {"first", "second"};
23388 auto mi = pandas::MultiIndex::from_arrays<std::string>(level_values, level_names);
23389 s.set_multiindex(mi);
23390
23391 auto gb = s.groupby_by_level(static_cast<size_t>(0), true);
groupby_by_level (pd_test_3_all.cpp:23382)
23372int pd_test_df_construct_mi_main() {
23373 return dataframe_tests_df_construct_mi::pd_test_df_construct_mi_main();
23374}
23375// ------------------- pd_test_df_construct_mi (end) ---------------------------
23376
23377// ------------------- pd_test_groupby_level_dispatch.cpp (start) ---------------------------
23378namespace dataframe_tests_groupby_level_dispatch {
23379
23380void pd_test_groupby_level_single() {
23381 std::cout << "========= groupby_by_level(single) ====================";
23382
23383 pandas::Series<numpy::float64> s({10.0, 20.0, 30.0, 40.0});
23384 std::vector<std::vector<std::string>> level_values = {
23385 {"a", "a", "b", "b"}, {"x", "y", "x", "y"}
23386 };
23387 std::vector<std::optional<std::string>> level_names = {"first", "second"};
23388 auto mi = pandas::MultiIndex::from_arrays<std::string>(level_values, level_names);
23389 s.set_multiindex(mi);
23390
23391 auto gb = s.groupby_by_level(static_cast<size_t>(0), true);
groupby_by_level_names (pd_test_3_all.cpp:23548)
23538 pandas::Series<std::string> by_s({"cat", "dog", "cat"});
23539 auto gb = s.groupby_by_string_series(by_s, true);
23540 auto sums = gb.sum();
23541 if (sums[0] != 400.0 || sums[1] != 200.0)
23542 throw std::runtime_error("sum mismatch");
23543
23544 std::cout << " -> tests passed" << std::endl;
23545}
23546
23547void pd_test_groupby_by_level_names() {
23548 std::cout << "========= groupby_by_level_names() ====================";
23549
23550 pandas::Series<numpy::float64> s({1.0, 2.0, 3.0, 4.0});
23551 std::vector<std::vector<std::string>> level_values = {
23552 {"a", "a", "b", "b"}, {"x", "y", "x", "y"}
23553 };
23554 std::vector<std::optional<std::string>> level_names = {"first", "second"};
23555 auto mi = pandas::MultiIndex::from_arrays<std::string>(level_values, level_names);
23556 s.set_multiindex(mi);
23557
23558 std::vector<std::string> by_names = {"first", "second"};
groupby_by_numeric (pd_test_3_all.cpp:23483)
23473int pd_test_groupby_level_dispatch_main() {
23474 return dataframe_tests_groupby_level_dispatch::pd_test_groupby_level_dispatch_main();
23475}
23476// ------------------- pd_test_groupby_level_dispatch.cpp (end) ---------------------------
23477
23478// ------------------- pd_test_groupby_by_dispatch.cpp (start) ---------------------------
23479namespace dataframe_tests_groupby_by_dispatch {
23480
23481void pd_test_groupby_by_numeric() {
23482 std::cout << "========= groupby_by_numeric() ========================";
23483
23484 pandas::Series<numpy::float64> s({10.0, 20.0, 30.0, 40.0});
23485 pandas::Series<numpy::float64> by_s({1.0, 2.0, 1.0, 2.0});
23486 auto gb = s.groupby_by_numeric(by_s, true);
23487 if (gb.group_keys_order().size() != 2)
23488 throw std::runtime_error("expected 2 groups");
23489 auto sums = gb.sum();
23490 if (sums[0] != 40.0 || sums[1] != 60.0)
23491 throw std::runtime_error("sum mismatch");
23492 if (gb.grouper_dtype() != "float64")
groupby_by_string_series (pd_test_3_all.cpp:23535)
23525 auto sums = gb.sum();
23526 if (sums[0] != 4.0 || sums[1] != 6.0)
23527 throw std::runtime_error("sum mismatch");
23528 if (gb.grouper_dtype() != "object")
23529 throw std::runtime_error("grouper_dtype mismatch");
23530
23531 std::cout << " -> tests passed" << std::endl;
23532}
23533
23534void pd_test_groupby_by_string_series() {
23535 std::cout << "========= groupby_by_string_series() ==================";
23536
23537 pandas::Series<numpy::float64> s({100.0, 200.0, 300.0});
23538 pandas::Series<std::string> by_s({"cat", "dog", "cat"});
23539 auto gb = s.groupby_by_string_series(by_s, true);
23540 auto sums = gb.sum();
23541 if (sums[0] != 400.0 || sums[1] != 200.0)
23542 throw std::runtime_error("sum mismatch");
23543
23544 std::cout << " -> tests passed" << std::endl;
23545}
map (pd_test_1_all.cpp:5839)
5829// Map Tests
5830// ============================================================================
5831
5832void pd_test_categorical_index_map() {
5833 std::cout << "========= map =========================================";
5834
5835 pandas::CategoricalArray arr({"yes", "no", "yes"});
5836 pandas::CategoricalIndex idx(arr);
5837
5838 std::unordered_map<std::string, std::string> mapping = {{"yes", "1"}, {"no", "0"}};
5839 pandas::CategoricalIndex mapped = idx.map(mapping);
5840
5841 bool passed = (mapped.has_category("1") && mapped.has_category("0") &&
5842 !mapped.has_category("yes") && !mapped.has_category("no"));
5843 if (!passed) {
5844 std::cout << " [FAIL] : in pd_test_categorical_index_map()" << std::endl;
5845 throw std::runtime_error("pd_test_categorical_index_map failed");
5846 }
5847
5848 std::cout << " -> tests passed" << std::endl;
5849}
map (pd_test_1_all.cpp:5839)
5829// Map Tests
5830// ============================================================================
5831
5832void pd_test_categorical_index_map() {
5833 std::cout << "========= map =========================================";
5834
5835 pandas::CategoricalArray arr({"yes", "no", "yes"});
5836 pandas::CategoricalIndex idx(arr);
5837
5838 std::unordered_map<std::string, std::string> mapping = {{"yes", "1"}, {"no", "0"}};
5839 pandas::CategoricalIndex mapped = idx.map(mapping);
5840
5841 bool passed = (mapped.has_category("1") && mapped.has_category("0") &&
5842 !mapped.has_category("yes") && !mapped.has_category("no"));
5843 if (!passed) {
5844 std::cout << " [FAIL] : in pd_test_categorical_index_map()" << std::endl;
5845 throw std::runtime_error("pd_test_categorical_index_map failed");
5846 }
5847
5848 std::cout << " -> tests passed" << std::endl;
5849}
map_dict (pd_test_3_all.cpp:23599)
23589// ------------------- pd_test_groupby_by_dispatch.cpp (end) ---------------------------
23590
23591// ------------------- pd_test_map_dispatch.cpp (begin) ---------------------------
23592namespace dataframe_tests_map_dispatch {
23593
23594void pd_test_map_dispatch_dict_basic() {
23595 std::cout << " pd_test_map_dispatch_dict_basic";
23596 // Dict mapping: 3 matched values + 1 unmapped -> NaN; name preserved
23597 pandas::Series<numpy::float64> s({1.0, 2.0, 3.0, 4.0}, "vals");
23598 std::map<numpy::float64, numpy::float64> mapping = {{1.0, 10.0}, {2.0, 20.0}, {3.0, 30.0}};
23599 auto res = s.map_dict(mapping);
23600 if (res.name() != "vals")
23601 throw std::runtime_error("name not preserved");
23602 if (res[0] != 10.0 || res[1] != 20.0 || res[2] != 30.0)
23603 throw std::runtime_error("mapped values incorrect");
23604 if (!std::isnan(res[3]))
23605 throw std::runtime_error("unmapped key should be NaN");
23606 std::cout << " -> tests passed" << std::endl;
23607}
23608
23609void pd_test_map_dispatch_preserves_index() {
map_dict_resolved (pd_test_5_all.cpp:139935)
139925 pandas::Series<numpy::float64> s(data, std::optional<std::string>{});
139926 s.set_index(std::make_unique<pandas::Index<std::string>>(labels));
139927 s.set_dtype_override("int64");
139928 return s;
139929}
139930
139931static void f_plan_02_apply_result_dtype_517043_map_full_int(int& lf) {
139932 std::cout << "-- E_map_full_int\n";
139933 auto s = mk_src_int64({1, 2, 3}, {"a", "b", "c"});
139934 std::map<int64_t, int64_t> m{{1, 10}, {2, 20}, {3, 30}};
139935 auto r = s.map_dict_resolved(m);
139936 pandas_tests::check(result_series_dtype(r) == "int64",
139937 "E_map_full_int()_dtype", lf);
139938}
139939
139940static void f_plan_02_apply_result_dtype_517043_map_partial_int(int& lf) {
139941 std::cout << "-- E_map_partial_int\n";
139942 auto s = mk_src_int64({1, 2, 3}, {"a", "b", "c"});
139943 std::map<int64_t, int64_t> m{{1, 10}};
139944 auto r = s.map_dict_resolved(m);
139945 pandas_tests::check(result_series_dtype(r) == "float64",
map_series (pd_test_3_all.cpp:23633)
23623void pd_test_map_dispatch_series_lookup() {
23624 std::cout << " pd_test_map_dispatch_series_lookup";
23625 // Series lookup via index labels returns correct mapped values
23626 // lookup: index=[1,2,3], values=[10,20,30]
23627 pandas::Series<numpy::float64> lookup({10.0, 20.0, 30.0}, "lk");
23628 pandas::Index<std::string> lk_idx({"1", "2", "3"});
23629 lookup.set_index(lk_idx);
23630
23631 pandas::Series<numpy::float64> s({2.0, 3.0, 1.0}, "src");
23632 auto res = s.map_series(lookup);
23633 if (res[0] != 20.0 || res[1] != 30.0 || res[2] != 10.0)
23634 throw std::runtime_error("series lookup values incorrect");
23635 if (res.name() != "src")
23636 throw std::runtime_error("name not preserved");
23637 std::cout << " -> tests passed" << std::endl;
23638}
23639
23640void pd_test_map_dispatch_nan_passthrough() {
23641 std::cout << " pd_test_map_dispatch_nan_passthrough";
23642 // NaN in source passes through as NaN, non-NaN values mapped
map_series_resolved (pd_test_5_all.cpp:143266)
143256 std::map<int64_t, int64_t> m{{1, 10}};
143257 auto r = s.map_dict_resolved(m);
143258 check_dtype_eq("apply_empty_hist_case_30_empty_map_nonempty_dict_int64()",
143259 result_series_dtype_full(r), "int64", lf);
143260}
143261
143262void case_31_empty_map_nonempty_series_int64(int& lf) {
143263 std::cout << "-- case_31_empty_map_nonempty_series_int64\n";
143264 auto s = mk_f64({}, {}, "int64");
143265 auto mapper = mk_f64({100, 200}, {"1", "2"}, "int64");
143266 auto r = s.map_series_resolved(mapper);
143267 check_dtype_eq("apply_empty_hist_case_31_empty_map_nonempty_series_int64()",
143268 result_series_dtype_full(r), "int64", lf);
143269}
143270
143271void case_40_E1_empty_map_empty_series_int64(int& lf) {
143272 std::cout << "-- case_40_E1_empty_map_empty_series_int64\n";
143273 auto s = mk_f64({}, {}, "int64");
143274 auto mapper = mk_f64({}, {}, "int64");
143275 auto r = s.map_series_resolved(mapper);
143276 check_dtype_eq("apply_empty_hist_case_40_E1_empty_map_empty_series_int64()",
map_to_string (pd_test_3_all.cpp:23669)
23659void pd_test_map_dispatch_to_string() {
23660 std::cout << " pd_test_map_dispatch_to_string";
23661 // map_to_string returns string values for matched, "NaN" for unmatched
23662 pandas::Series<std::string> lookup(std::vector<std::string>{"one", "two"}, "lk");
23663 pandas::Index<std::string> lk_idx({"1", "2"});
23664 lookup.set_index(lk_idx);
23665
23666 double nan_val = std::numeric_limits<double>::quiet_NaN();
23667 pandas::Series<numpy::float64> s({1.0, 2.0, 3.0, nan_val}, "src");
23668 auto res = s.map_to_string(lookup);
23669 if (res[0] != "one" || res[1] != "two")
23670 throw std::runtime_error("matched values incorrect");
23671 if (res[2] != "NaN")
23672 throw std::runtime_error("unmatched should be NaN string");
23673 if (res[3] != "NaN")
23674 throw std::runtime_error("NaN source should be NaN string");
23675 if (res.name() != "src")
23676 throw std::runtime_error("name not preserved");
23677 std::cout << " -> tests passed" << std::endl;
23678}
pipe (pd_test_1_all.cpp:11164)
11154 // Pipe applies function to entire Series
11155 auto add_mean = [](const pandas::Series<double>& ser, double offset) {
11156 auto mean_val = ser.mean();
11157 std::vector<double> result;
11158 for (size_t i = 0; i < ser.size(); ++i) {
11159 result.push_back(ser[i] + mean_val.value_or(0.0) + offset);
11160 }
11161 return pandas::Series<double>(result, ser.name());
11162 };
11163
11164 auto result = s.pipe(add_mean, 10.0);
11165
11166 bool passed = true;
11167 // mean is 2.5, offset is 10.0, so each value + 12.5
11168 std::vector<double> expected = {13.5, 14.5, 15.5, 16.5};
11169 for (size_t i = 0; i < result.size(); ++i) {
11170 if (!approx_equal(result[i], expected[i])) {
11171 passed = false;
11172 std::cout << " [FAIL] : in pd_test_func_apply_series_pipe() : value mismatch at " << i << std::endl;
11173 throw std::runtime_error("pd_test_func_apply_series_pipe failed: value mismatch");
11174 }
resample (pd_test_1_all.cpp:20321)
20311 "2020-01-01 00:00:00",
20312 "2020-01-01 12:00:00",
20313 "2020-01-02 00:00:00",
20314 "2020-01-02 12:00:00",
20315 "2020-01-03 00:00:00",
20316 "2020-01-03 12:00:00"
20317 };
20318 df.set_index(std::make_unique<pandas::Index<std::string>>(dates));
20319
20320 // Resample to daily
20321 auto resampler = df.resample("D");
20322 pandas::DataFrame result = resampler.sum();
20323
20324 // Check that we got aggregated results
20325 bool passed = (result.nrows() <= df.nrows());
20326
20327 if (!passed) {
20328 std::cout << " [FAIL] : in pd_test_timeseries_resample_basic() : resample didn't reduce rows" << std::endl;
20329 throw std::runtime_error("pd_test_timeseries_resample_basic failed");
20330 }
rolling (pd_test_1_all.cpp:20667)
20657#include <vector>
20658#include "../pandas/pd_series.h"
20659
20660namespace dataframe_tests {
20661 namespace dataframe_tests_windowing {
20662
20663 void pd_test_rolling_sum() {
20664 std::cout << "========= Rolling sum ===========================";
20665
20666 pandas::Series<double> s({1.0, 2.0, 3.0, 4.0, 5.0});
20667 auto result = s.rolling(3).sum();
20668
20669 // Window 3:
20670 // idx 0: [1] -> NaN (not enough values)
20671 // idx 1: [1,2] -> NaN (not enough values)
20672 // idx 2: [1,2,3] -> 6
20673 // idx 3: [2,3,4] -> 9
20674 // idx 4: [3,4,5] -> 12
20675 bool passed = result.size() == 5;
20676 if (!passed) {
20677 std::cout << " [FAIL] : in pd_test_rolling_sum() : result size should be 5" << std::endl;
transform (pd_test_1_all.cpp:11071)
11061 std::cout << " -> tests passed" << std::endl;
11062 }
11063
11064 void pd_test_func_apply_series_transform() {
11065 std::cout << "========= Series transform ============================";
11066
11067 pandas::Series<double> s({1.0, 2.0, 3.0, 4.0}, "values");
11068
11069 // Transform must return same shape
11070 auto result = s.transform([](double x) { return x * 2 + 1; });
11071
11072 bool passed = true;
11073 if (result.size() != s.size()) {
11074 passed = false;
11075 std::cout << " [FAIL] : in pd_test_func_apply_series_transform() : size changed" << std::endl;
11076 throw std::runtime_error("pd_test_func_apply_series_transform failed: size changed");
11077 }
11078
11079 std::vector<double> expected = {3.0, 5.0, 7.0, 9.0};
11080 for (size_t i = 0; i < result.size(); ++i) {
transform (pd_test_1_all.cpp:11071)
11061 std::cout << " -> tests passed" << std::endl;
11062 }
11063
11064 void pd_test_func_apply_series_transform() {
11065 std::cout << "========= Series transform ============================";
11066
11067 pandas::Series<double> s({1.0, 2.0, 3.0, 4.0}, "values");
11068
11069 // Transform must return same shape
11070 auto result = s.transform([](double x) { return x * 2 + 1; });
11071
11072 bool passed = true;
11073 if (result.size() != s.size()) {
11074 passed = false;
11075 std::cout << " [FAIL] : in pd_test_func_apply_series_transform() : size changed" << std::endl;
11076 throw std::runtime_error("pd_test_func_apply_series_transform failed: size changed");
11077 }
11078
11079 std::vector<double> expected = {3.0, 5.0, 7.0, 9.0};
11080 for (size_t i = 0; i < result.size(); ++i) {
transform_named_list (pd_test_3_all.cpp:27185)
27175 auto result = s.map_dict(mapping);
27176 check(result[0] == 100.0, "mapped 1->100");
27177 check(std::isnan(result[1]), "unmapped 5->NaN");
27178 check(result[2] == 300.0, "mapped 3->300");
27179}
27180
27181// Test 6: transform multi (list of named functions -> DataFrame)
27182void pd_test_transform_multi() {
27183 std::cout << " -- pd_test_transform_multi --" << std::endl;
27184 Series<numpy::float64> s({4.0, 9.0, 16.0}, std::string("nums"));
27185 auto df = s.transform_named_list({"sqrt", "abs"});
27186 check(df.ncols() == 2, "2 columns");
27187 check(df.nrows() == 3, "3 rows");
27188 // Get sqrt column by index (first column = index 0)
27189 size_t sqrt_idx = df.get_column_index("sqrt");
27190 auto sqrt_series = df.column_to_series_f64(sqrt_idx);
27191 check(std::abs(sqrt_series[0] - 2.0) < 1e-10, "sqrt(4)==2");
27192 check(std::abs(sqrt_series[1] - 3.0) < 1e-10, "sqrt(9)==3");
27193 check(std::abs(sqrt_series[2] - 4.0) < 1e-10, "sqrt(16)==4");
27194 size_t abs_idx = df.get_column_index("abs");
27195 auto abs_series = df.column_to_series_f64(abs_idx);
transform_resolved (pd_test_5_all.cpp:98227)
98217 switch (cid) {
98218 case CbId::Int: cb = cb_int(hist); break;
98219 case CbId::Bool: cb = cb_bool(hist); break;
98220 case CbId::Float: cb = cb_float(hist); break;
98221 case CbId::Str: cb = cb_string(hist); break;
98222 case CbId::Mixed: cb = cb_mixed(hist); break;
98223 }
98224
98225 pandas::Result r;
98226 try {
98227 r = s.transform_resolved(cb, hist);
98228 } catch (const std::exception& e) {
98229 std::string tag = std::string("transform src=") + src_name(sid) +
98230 " cb=" + cb_name(cid) + " mode=" + mode_name(mid);
98231 std::cout << "[FAIL] : in f_27a_core_3094022_apply_resolved_typed_post_cb_dtype() "
98232 << tag << " unexpected exception: " << e.what() << "\n";
98233 ++pandas_tests::g_failed; ++local_fail;
98234 ++pandas_tests::g_failed; ++local_fail;
98235 ++pandas_tests::g_failed; ++local_fail;
98236 return;
98237 }
add (pd_test_1_all.cpp:4844)
4834namespace dataframe_tests {
4835 namespace dataframe_tests_arithmetic {
4836
4837 void pd_test_arithmetic_series_named_ops() {
4838 std::cout << "========= Series named ops ======================";
4839
4840 pandas::Series<double> a({1.0, 2.0, 3.0});
4841 pandas::Series<double> b({4.0, 5.0, 6.0});
4842
4843 auto sum = a.add(b);
4844 bool passed = std::abs(sum[0] - 5.0) < 0.001 && std::abs(sum[1] - 7.0) < 0.001;
4845 if (!passed) {
4846 std::cout << " [FAIL] : in pd_test_arithmetic_series_named_ops() : add failed" << std::endl;
4847 throw std::runtime_error("pd_test_arithmetic_series_named_ops failed: add failed");
4848 }
4849
4850 auto diff = a.sub(b);
4851 passed = std::abs(diff[0] - (-3.0)) < 0.001;
4852 if (!passed) {
4853 std::cout << " [FAIL] : in pd_test_arithmetic_series_named_ops() : sub failed" << std::endl;
add (pd_test_1_all.cpp:4844)
4834namespace dataframe_tests {
4835 namespace dataframe_tests_arithmetic {
4836
4837 void pd_test_arithmetic_series_named_ops() {
4838 std::cout << "========= Series named ops ======================";
4839
4840 pandas::Series<double> a({1.0, 2.0, 3.0});
4841 pandas::Series<double> b({4.0, 5.0, 6.0});
4842
4843 auto sum = a.add(b);
4844 bool passed = std::abs(sum[0] - 5.0) < 0.001 && std::abs(sum[1] - 7.0) < 0.001;
4845 if (!passed) {
4846 std::cout << " [FAIL] : in pd_test_arithmetic_series_named_ops() : add failed" << std::endl;
4847 throw std::runtime_error("pd_test_arithmetic_series_named_ops failed: add failed");
4848 }
4849
4850 auto diff = a.sub(b);
4851 passed = std::abs(diff[0] - (-3.0)) < 0.001;
4852 if (!passed) {
4853 std::cout << " [FAIL] : in pd_test_arithmetic_series_named_ops() : sub failed" << std::endl;
add_dateoffset (pd_test_4_all.cpp:6330)
6320 std::vector<numpy::float64> v(ns.size());
6321 for (size_t i = 0; i < ns.size(); ++i) v[i] = static_cast<numpy::float64>(ns[i]);
6322 pandas::Series<numpy::float64> s(v);
6323 s.set_dtype_override("datetime64[ns]");
6324 return s;
6325}
6326
6327void test_add_dateoffset_tick_day() {
6328 auto s = make_dt_series({0, 86400000000000LL});
6329 pandas::Day d(5);
6330 auto out = s.add_dateoffset(d);
6331 EXPECT(out.size() == 2);
6332 EXPECT(static_cast<int64_t>(out[0]) == 5LL * 86400000000000LL);
6333 EXPECT(static_cast<int64_t>(out[1]) == 6LL * 86400000000000LL);
6334 EXPECT(out.dtype_name() == "datetime64[ns]");
6335}
6336
6337void test_sub_dateoffset_calendar_monthend() {
6338 // 2024-01-31 in ns
6339 int64_t jan31 = 1706659200LL * 1000000000LL;
6340 auto s = make_dt_series({jan31});
add_dateoffset_to_timedelta (pd_test_4_all.cpp:6397)
6387}
6388
6389void test_add_dateoffset_to_timedelta_precision() {
6390 int64_t a = 1LL << 54;
6391 int64_t b = a + 1;
6392 std::vector<numpy::float64> v = {static_cast<numpy::float64>(a),
6393 static_cast<numpy::float64>(b)};
6394 pandas::Series<numpy::float64> s(v);
6395 s.set_dtype_override("timedelta64[ns]");
6396 pandas::Day d(1);
6397 auto out = s.add_dateoffset_to_timedelta(d);
6398 int64_t add = 86400000000000LL;
6399 EXPECT(static_cast<int64_t>(out[0]) == a + add);
6400 // Note: float64 cannot precisely represent (a+1); this only verifies the
6401 // int64-space computation, not full lossless storage.
6402 EXPECT(static_cast<int64_t>(out[1]) - (a + add) <= 2);
6403}
6404
6405void test_groupby_by_callable_int_index() {
6406 std::vector<numpy::float64> v = {1.0, 2.0, 3.0, 4.0};
6407 pandas::Series<numpy::float64> s(v);
add_prefix (pd_test_2_all.cpp:4)
1// ------------------- pd_test_add_prefix.cpp (start) -----------------------------
2// dataframe_tests/pd_test_add_prefix.cpp
3// Tests for DataFrame.add_prefix() and add_suffix() methods (pandas 2.0+ API)
4#include <iostream>
5#include <stdexcept>
6#include <vector>
7#include <string>
8#include <map>
9#include "../pandas/pd_dataframe.h"
10#include "../pandas/pd_groupby.h"
11
12// CRITICAL: No using namespace directives
add_suffix (pd_test_2_all.cpp:4)
1// ------------------- pd_test_add_prefix.cpp (start) -----------------------------
2// dataframe_tests/pd_test_add_prefix.cpp
3// Tests for DataFrame.add_prefix() and add_suffix() methods (pandas 2.0+ API)
4#include <iostream>
5#include <stdexcept>
6#include <vector>
7#include <string>
8#include <map>
9#include "../pandas/pd_dataframe.h"
10#include "../pandas/pd_groupby.h"
11
12// CRITICAL: No using namespace directives
div (pd_test_1_all.cpp:4865)
4855 throw std::runtime_error("pd_test_arithmetic_series_named_ops failed: sub failed");
4856 }
4857
4858 auto prod = a.mul(b);
4859 passed = std::abs(prod[0] - 4.0) < 0.001 && std::abs(prod[1] - 10.0) < 0.001;
4860 if (!passed) {
4861 std::cout << " [FAIL] : in pd_test_arithmetic_series_named_ops() : mul failed" << std::endl;
4862 throw std::runtime_error("pd_test_arithmetic_series_named_ops failed: mul failed");
4863 }
4864
4865 auto quot = a.div(b);
4866 passed = std::abs(quot[0] - 0.25) < 0.001;
4867 if (!passed) {
4868 std::cout << " [FAIL] : in pd_test_arithmetic_series_named_ops() : div failed" << std::endl;
4869 throw std::runtime_error("pd_test_arithmetic_series_named_ops failed: div failed");
4870 }
4871
4872 std::cout << " -> tests passed" << std::endl;
4873 }
4874
4875 void pd_test_arithmetic_series_floordiv_mod() {
div (pd_test_1_all.cpp:4865)
4855 throw std::runtime_error("pd_test_arithmetic_series_named_ops failed: sub failed");
4856 }
4857
4858 auto prod = a.mul(b);
4859 passed = std::abs(prod[0] - 4.0) < 0.001 && std::abs(prod[1] - 10.0) < 0.001;
4860 if (!passed) {
4861 std::cout << " [FAIL] : in pd_test_arithmetic_series_named_ops() : mul failed" << std::endl;
4862 throw std::runtime_error("pd_test_arithmetic_series_named_ops failed: mul failed");
4863 }
4864
4865 auto quot = a.div(b);
4866 passed = std::abs(quot[0] - 0.25) < 0.001;
4867 if (!passed) {
4868 std::cout << " [FAIL] : in pd_test_arithmetic_series_named_ops() : div failed" << std::endl;
4869 throw std::runtime_error("pd_test_arithmetic_series_named_ops failed: div failed");
4870 }
4871
4872 std::cout << " -> tests passed" << std::endl;
4873 }
4874
4875 void pd_test_arithmetic_series_floordiv_mod() {
divide (pd_test_3_all.cpp:555)
545 if (mul_result.size() != 4) {
546 std::cout << " [FAIL] : in pd_test_3_all_series_arithmetic() : multiply() size mismatch" << std::endl;
547 throw std::runtime_error("pd_test_3_all_series_arithmetic failed: multiply()");
548 }
549 // 10*2=20
550 if (std::abs(mul_result[static_cast<size_t>(0)] - 20.0) > 0.001) {
551 std::cout << " [FAIL] : in pd_test_3_all_series_arithmetic() : multiply() value mismatch" << std::endl;
552 throw std::runtime_error("pd_test_3_all_series_arithmetic failed: multiply() value");
553 }
554
555 // Test divide()
556 pandas::Series<numpy::float64> div_result = s1.divide(s2);
557 if (div_result.size() != 4) {
558 std::cout << " [FAIL] : in pd_test_3_all_series_arithmetic() : divide() size mismatch" << std::endl;
559 throw std::runtime_error("pd_test_3_all_series_arithmetic failed: divide()");
560 }
561 // 10/2=5
562 if (std::abs(div_result[static_cast<size_t>(0)] - 5.0) > 0.001) {
563 std::cout << " [FAIL] : in pd_test_3_all_series_arithmetic() : divide() value mismatch" << std::endl;
564 throw std::runtime_error("pd_test_3_all_series_arithmetic failed: divide() value");
565 }
divmod (pd_test_3_all.cpp:12077)
12067 auto cov_val = s1.cov(s2);
12068 if (!cov_val.has_value()) {
12069 std::cout << " [FAIL] : covariance should have a value" << std::endl;
12070 throw std::runtime_error("pd_test_series_corr_cov failed");
12071 }
12072
12073 std::cout << " -> tests passed" << std::endl;
12074}
12075
12076// ============================================================================
12077// Test 9: divmod()
12078// ============================================================================
12079void pd_test_series_divmod() {
12080 std::cout << "========= Series.divmod() ==========================";
12081
12082 std::vector<double> vals = {10.0, 20.0, 30.0};
12083 pandas::Series<double> s(vals, "test");
12084
12085 auto [quot, rem] = s.divmod(7.0);
12086
12087 // 10/7 = 1 remainder 3
divmod (pd_test_3_all.cpp:12077)
12067 auto cov_val = s1.cov(s2);
12068 if (!cov_val.has_value()) {
12069 std::cout << " [FAIL] : covariance should have a value" << std::endl;
12070 throw std::runtime_error("pd_test_series_corr_cov failed");
12071 }
12072
12073 std::cout << " -> tests passed" << std::endl;
12074}
12075
12076// ============================================================================
12077// Test 9: divmod()
12078// ============================================================================
12079void pd_test_series_divmod() {
12080 std::cout << "========= Series.divmod() ==========================";
12081
12082 std::vector<double> vals = {10.0, 20.0, 30.0};
12083 pandas::Series<double> s(vals, "test");
12084
12085 auto [quot, rem] = s.divmod(7.0);
12086
12087 // 10/7 = 1 remainder 3
dot (pd_test_1_all.cpp:22594)
22584 std::cout << "====================================== [OK] pd_test_all_any test suite ========================== " << std::endl;
22585 return 0;
22586 }
22587
22588} // namespace dataframe_tests
22589// ------------------- pd_test_all_any.cpp (end) -----------------------------
22590
22591// ------------------- pd_test_dot.cpp (start) -----------------------------
22592// dataframe_tests/pd_test_dot.cpp
22593// Test DataFrame.dot() method - matrix multiplication
22594
22595#include <iostream>
22596#include <stdexcept>
22597#include <cmath>
22598#include "../pandas/pd_dataframe.h"
22599
22600// CRITICAL: No using namespace directives
22601
22602namespace dataframe_tests {
22603 namespace dataframe_tests_dot {
dot (pd_test_1_all.cpp:22594)
22584 std::cout << "====================================== [OK] pd_test_all_any test suite ========================== " << std::endl;
22585 return 0;
22586 }
22587
22588} // namespace dataframe_tests
22589// ------------------- pd_test_all_any.cpp (end) -----------------------------
22590
22591// ------------------- pd_test_dot.cpp (start) -----------------------------
22592// dataframe_tests/pd_test_dot.cpp
22593// Test DataFrame.dot() method - matrix multiplication
22594
22595#include <iostream>
22596#include <stdexcept>
22597#include <cmath>
22598#include "../pandas/pd_dataframe.h"
22599
22600// CRITICAL: No using namespace directives
22601
22602namespace dataframe_tests {
22603 namespace dataframe_tests_dot {
floordiv (pd_test_1_all.cpp:4881)
4871 std::cout << " -> tests passed" << std::endl;
4872 }
4873
4874 void pd_test_arithmetic_series_floordiv_mod() {
4875 std::cout << "========= Series floordiv/mod ===================";
4876
4877 pandas::Series<double> a({7.0, 8.0, 9.0});
4878 pandas::Series<double> b({2.0, 3.0, 4.0});
4879
4880 auto fd = a.floordiv(b);
4881 bool passed = std::abs(fd[0] - 3.0) < 0.001; // 7 // 2 = 3
4882 if (!passed) {
4883 std::cout << " [FAIL] : in pd_test_arithmetic_series_floordiv_mod() : floordiv failed" << std::endl;
4884 throw std::runtime_error("pd_test_arithmetic_series_floordiv_mod failed: floordiv failed");
4885 }
4886
4887 auto m = a.mod(b);
4888 passed = std::abs(m[0] - 1.0) < 0.001; // 7 % 2 = 1
4889 if (!passed) {
4890 std::cout << " [FAIL] : in pd_test_arithmetic_series_floordiv_mod() : mod failed" << std::endl;
floordiv (pd_test_1_all.cpp:4881)
4871 std::cout << " -> tests passed" << std::endl;
4872 }
4873
4874 void pd_test_arithmetic_series_floordiv_mod() {
4875 std::cout << "========= Series floordiv/mod ===================";
4876
4877 pandas::Series<double> a({7.0, 8.0, 9.0});
4878 pandas::Series<double> b({2.0, 3.0, 4.0});
4879
4880 auto fd = a.floordiv(b);
4881 bool passed = std::abs(fd[0] - 3.0) < 0.001; // 7 // 2 = 3
4882 if (!passed) {
4883 std::cout << " [FAIL] : in pd_test_arithmetic_series_floordiv_mod() : floordiv failed" << std::endl;
4884 throw std::runtime_error("pd_test_arithmetic_series_floordiv_mod failed: floordiv failed");
4885 }
4886
4887 auto m = a.mod(b);
4888 passed = std::abs(m[0] - 1.0) < 0.001; // 7 % 2 = 1
4889 if (!passed) {
4890 std::cout << " [FAIL] : in pd_test_arithmetic_series_floordiv_mod() : mod failed" << std::endl;
mod (pd_test_1_all.cpp:4888)
4878 pandas::Series<double> a({7.0, 8.0, 9.0});
4879 pandas::Series<double> b({2.0, 3.0, 4.0});
4880
4881 auto fd = a.floordiv(b);
4882 bool passed = std::abs(fd[0] - 3.0) < 0.001; // 7 // 2 = 3
4883 if (!passed) {
4884 std::cout << " [FAIL] : in pd_test_arithmetic_series_floordiv_mod() : floordiv failed" << std::endl;
4885 throw std::runtime_error("pd_test_arithmetic_series_floordiv_mod failed: floordiv failed");
4886 }
4887
4888 auto m = a.mod(b);
4889 passed = std::abs(m[0] - 1.0) < 0.001; // 7 % 2 = 1
4890 if (!passed) {
4891 std::cout << " [FAIL] : in pd_test_arithmetic_series_floordiv_mod() : mod failed" << std::endl;
4892 throw std::runtime_error("pd_test_arithmetic_series_floordiv_mod failed: mod failed");
4893 }
4894
4895 // Scalar operations
4896 auto fd_scalar = a.floordiv(2.0);
4897 passed = std::abs(fd_scalar[0] - 3.0) < 0.001 && std::abs(fd_scalar[1] - 4.0) < 0.001;
4898 if (!passed) {
mod (pd_test_1_all.cpp:4888)
4878 pandas::Series<double> a({7.0, 8.0, 9.0});
4879 pandas::Series<double> b({2.0, 3.0, 4.0});
4880
4881 auto fd = a.floordiv(b);
4882 bool passed = std::abs(fd[0] - 3.0) < 0.001; // 7 // 2 = 3
4883 if (!passed) {
4884 std::cout << " [FAIL] : in pd_test_arithmetic_series_floordiv_mod() : floordiv failed" << std::endl;
4885 throw std::runtime_error("pd_test_arithmetic_series_floordiv_mod failed: floordiv failed");
4886 }
4887
4888 auto m = a.mod(b);
4889 passed = std::abs(m[0] - 1.0) < 0.001; // 7 % 2 = 1
4890 if (!passed) {
4891 std::cout << " [FAIL] : in pd_test_arithmetic_series_floordiv_mod() : mod failed" << std::endl;
4892 throw std::runtime_error("pd_test_arithmetic_series_floordiv_mod failed: mod failed");
4893 }
4894
4895 // Scalar operations
4896 auto fd_scalar = a.floordiv(2.0);
4897 passed = std::abs(fd_scalar[0] - 3.0) < 0.001 && std::abs(fd_scalar[1] - 4.0) < 0.001;
4898 if (!passed) {
mul (pd_test_1_all.cpp:4858)
4848 throw std::runtime_error("pd_test_arithmetic_series_named_ops failed: add failed");
4849 }
4850
4851 auto diff = a.sub(b);
4852 passed = std::abs(diff[0] - (-3.0)) < 0.001;
4853 if (!passed) {
4854 std::cout << " [FAIL] : in pd_test_arithmetic_series_named_ops() : sub failed" << std::endl;
4855 throw std::runtime_error("pd_test_arithmetic_series_named_ops failed: sub failed");
4856 }
4857
4858 auto prod = a.mul(b);
4859 passed = std::abs(prod[0] - 4.0) < 0.001 && std::abs(prod[1] - 10.0) < 0.001;
4860 if (!passed) {
4861 std::cout << " [FAIL] : in pd_test_arithmetic_series_named_ops() : mul failed" << std::endl;
4862 throw std::runtime_error("pd_test_arithmetic_series_named_ops failed: mul failed");
4863 }
4864
4865 auto quot = a.div(b);
4866 passed = std::abs(quot[0] - 0.25) < 0.001;
4867 if (!passed) {
4868 std::cout << " [FAIL] : in pd_test_arithmetic_series_named_ops() : div failed" << std::endl;
mul (pd_test_1_all.cpp:4858)
4848 throw std::runtime_error("pd_test_arithmetic_series_named_ops failed: add failed");
4849 }
4850
4851 auto diff = a.sub(b);
4852 passed = std::abs(diff[0] - (-3.0)) < 0.001;
4853 if (!passed) {
4854 std::cout << " [FAIL] : in pd_test_arithmetic_series_named_ops() : sub failed" << std::endl;
4855 throw std::runtime_error("pd_test_arithmetic_series_named_ops failed: sub failed");
4856 }
4857
4858 auto prod = a.mul(b);
4859 passed = std::abs(prod[0] - 4.0) < 0.001 && std::abs(prod[1] - 10.0) < 0.001;
4860 if (!passed) {
4861 std::cout << " [FAIL] : in pd_test_arithmetic_series_named_ops() : mul failed" << std::endl;
4862 throw std::runtime_error("pd_test_arithmetic_series_named_ops failed: mul failed");
4863 }
4864
4865 auto quot = a.div(b);
4866 passed = std::abs(quot[0] - 0.25) < 0.001;
4867 if (!passed) {
4868 std::cout << " [FAIL] : in pd_test_arithmetic_series_named_ops() : div failed" << std::endl;
multiindex (pd_test_1_all.cpp:27024)
27014 pandas::DataFrame df(data);
27015
27016 auto result = df.value_counts();
27017 auto& counts = std::get<pandas::Series<numpy::int64>>(result);
27018
27019 if (!counts.has_multiindex()) {
27020 std::cout << " [FAIL] : expected MultiIndex" << std::endl;
27021 throw std::runtime_error("pd_test_value_counts_multiindex_levels failed: no multiindex");
27022 }
27023
27024 const auto& midx = counts.multiindex();
27025
27026 // Should have 2 levels
27027 if (midx.nlevels() != 2) {
27028 std::cout << " [FAIL] : expected 2 levels, got " << midx.nlevels() << std::endl;
27029 throw std::runtime_error("pd_test_value_counts_multiindex_levels failed: wrong nlevels");
27030 }
27031
27032 std::cout << " -> tests passed" << std::endl;
27033 }
multiply (pd_test_3_all.cpp:543)
533 if (sub_result.size() != 4) {
534 std::cout << " [FAIL] : in pd_test_3_all_series_arithmetic() : subtract() size mismatch" << std::endl;
535 throw std::runtime_error("pd_test_3_all_series_arithmetic failed: subtract()");
536 }
537 // 10-2=8
538 if (std::abs(sub_result[static_cast<size_t>(0)] - 8.0) > 0.001) {
539 std::cout << " [FAIL] : in pd_test_3_all_series_arithmetic() : subtract() value mismatch" << std::endl;
540 throw std::runtime_error("pd_test_3_all_series_arithmetic failed: subtract() value");
541 }
542
543 // Test multiply()
544 pandas::Series<double> mul_result = s1.multiply(s2);
545 if (mul_result.size() != 4) {
546 std::cout << " [FAIL] : in pd_test_3_all_series_arithmetic() : multiply() size mismatch" << std::endl;
547 throw std::runtime_error("pd_test_3_all_series_arithmetic failed: multiply()");
548 }
549 // 10*2=20
550 if (std::abs(mul_result[static_cast<size_t>(0)] - 20.0) > 0.001) {
551 std::cout << " [FAIL] : in pd_test_3_all_series_arithmetic() : multiply() value mismatch" << std::endl;
552 throw std::runtime_error("pd_test_3_all_series_arithmetic failed: multiply() value");
553 }
pow (pd_test_1_all.cpp:4911)
4901 }
4902
4903 std::cout << " -> tests passed" << std::endl;
4904 }
4905
4906 void pd_test_arithmetic_series_pow() {
4907 std::cout << "========= Series pow ============================";
4908
4909 pandas::Series<double> a({2.0, 3.0, 4.0});
4910
4911 auto p = a.pow(2.0);
4912 bool passed = std::abs(p[0] - 4.0) < 0.001 && std::abs(p[1] - 9.0) < 0.001 && std::abs(p[2] - 16.0) < 0.001;
4913 if (!passed) {
4914 std::cout << " [FAIL] : in pd_test_arithmetic_series_pow() : pow scalar failed" << std::endl;
4915 throw std::runtime_error("pd_test_arithmetic_series_pow failed: pow scalar failed");
4916 }
4917
4918 // Series pow Series
4919 pandas::Series<double> exp({1.0, 2.0, 0.5});
4920 auto p2 = a.pow(exp);
4921 passed = std::abs(p2[0] - 2.0) < 0.001 && std::abs(p2[1] - 9.0) < 0.001; // 3^2=9
pow (pd_test_1_all.cpp:4911)
4901 }
4902
4903 std::cout << " -> tests passed" << std::endl;
4904 }
4905
4906 void pd_test_arithmetic_series_pow() {
4907 std::cout << "========= Series pow ============================";
4908
4909 pandas::Series<double> a({2.0, 3.0, 4.0});
4910
4911 auto p = a.pow(2.0);
4912 bool passed = std::abs(p[0] - 4.0) < 0.001 && std::abs(p[1] - 9.0) < 0.001 && std::abs(p[2] - 16.0) < 0.001;
4913 if (!passed) {
4914 std::cout << " [FAIL] : in pd_test_arithmetic_series_pow() : pow scalar failed" << std::endl;
4915 throw std::runtime_error("pd_test_arithmetic_series_pow failed: pow scalar failed");
4916 }
4917
4918 // Series pow Series
4919 pandas::Series<double> exp({1.0, 2.0, 0.5});
4920 auto p2 = a.pow(exp);
4921 passed = std::abs(p2[0] - 2.0) < 0.001 && std::abs(p2[1] - 9.0) < 0.001; // 3^2=9
radd (pd_test_2_all.cpp:7440)
7430 if (std::isinf(a) && std::isinf(b)) return (a > 0) == (b > 0);
7431 return std::abs(a - b) < tol;
7432 }
7433
7434 // Helper to get double value from DataFrame at position
7435 double get_val(const pandas::DataFrame& df, size_t row, size_t col) {
7436 return df.iloc<numpy::float64>(row, col);
7437 }
7438
7439 void pd_test_radd_scalar() {
7440 std::cout << "========= radd() with scalar =====================";
7441
7442 // Create DataFrame: angles=[0, 3, 4], degrees=[360, 180, 360]
7443 std::map<std::string, std::vector<double>> data = {
7444 {"angles", {0.0, 3.0, 4.0}},
7445 {"degrees", {360.0, 180.0, 360.0}}
7446 };
7447 pandas::DataFrame df(data);
7448
7449 // df.radd(1) should be equivalent to 1 + df
7450 pandas::DataFrame result = df.radd(1.0);
radd (pd_test_2_all.cpp:7440)
7430 if (std::isinf(a) && std::isinf(b)) return (a > 0) == (b > 0);
7431 return std::abs(a - b) < tol;
7432 }
7433
7434 // Helper to get double value from DataFrame at position
7435 double get_val(const pandas::DataFrame& df, size_t row, size_t col) {
7436 return df.iloc<numpy::float64>(row, col);
7437 }
7438
7439 void pd_test_radd_scalar() {
7440 std::cout << "========= radd() with scalar =====================";
7441
7442 // Create DataFrame: angles=[0, 3, 4], degrees=[360, 180, 360]
7443 std::map<std::string, std::vector<double>> data = {
7444 {"angles", {0.0, 3.0, 4.0}},
7445 {"degrees", {360.0, 180.0, 360.0}}
7446 };
7447 pandas::DataFrame df(data);
7448
7449 // df.radd(1) should be equivalent to 1 + df
7450 pandas::DataFrame result = df.radd(1.0);
rdiv (pd_test_2_all.cpp:7713)
7703 }
7704
7705 if (!passed) {
7706 throw std::runtime_error("pd_test_rmul_with_fill_value failed");
7707 }
7708
7709 std::cout << " -> tests passed" << std::endl;
7710 }
7711
7712 void pd_test_rdiv_scalar() {
7713 std::cout << "========= rdiv() with scalar =====================";
7714
7715 // From pandas docs example: df.rdiv(10) divides 10 BY the dataframe
7716 std::map<std::string, std::vector<double>> data = {
7717 {"angles", {0.0, 3.0, 4.0}},
7718 {"degrees", {360.0, 180.0, 360.0}}
7719 };
7720 pandas::DataFrame df(data);
7721
7722 // df.rdiv(10) = 10 / df
7723 pandas::DataFrame result = df.rdiv(10.0);
rdivmod (pd_test_3_all.cpp:9176)
9166 // Check: val[2]=3, no condition matches -> default 0.0
9167 if (std::abs(result[static_cast<size_t>(2)] - 0.0) > 0.001) {
9168 std::cout << " [FAIL] : in pd_test_3_all_series_case_when() : default value wrong" << std::endl;
9169 throw std::runtime_error("pd_test_3_all_series_case_when failed: default");
9170 }
9171
9172 std::cout << " -> tests passed" << std::endl;
9173}
9174
9175void pd_test_3_all_series_rdivmod() {
9176 std::cout << "========= Series.rdivmod() ========================";
9177
9178 std::vector<double> vals = {2.0, 3.0, 4.0, 5.0};
9179 pandas::Series<double> s(vals, "test");
9180
9181 // rdivmod: scalar / series -> (quotient, remainder)
9182 // 10 / 2 = 5, 10 % 2 = 0
9183 // 10 / 3 = 3, 10 % 3 = 1
9184 // 10 / 4 = 2, 10 % 4 = 2
9185 // 10 / 5 = 2, 10 % 5 = 0
9186 auto [quotients, remainders] = s.rdivmod(10.0);
rfloordiv (pd_test_2_all.cpp:7909)
7899 }
7900
7901 if (!passed) {
7902 throw std::runtime_error("pd_test_rtruediv_with_fill_value failed");
7903 }
7904
7905 std::cout << " -> tests passed" << std::endl;
7906 }
7907
7908 void pd_test_rfloordiv_scalar() {
7909 std::cout << "========= rfloordiv() with scalar ================";
7910
7911 std::map<std::string, std::vector<double>> data = {
7912 {"A", {3.0, 4.0}},
7913 {"B", {7.0, 8.0}}
7914 };
7915 pandas::DataFrame df(data);
7916
7917 // df.rfloordiv(10) = 10 // df (floor division)
7918 pandas::DataFrame result = df.rfloordiv(10.0);
rmod (pd_test_2_all.cpp:8121)
8111 }
8112
8113 if (!passed) {
8114 throw std::runtime_error("pd_test_rfloordiv_division_by_zero failed");
8115 }
8116
8117 std::cout << " -> tests passed" << std::endl;
8118 }
8119
8120 void pd_test_rmod_scalar() {
8121 std::cout << "========= rmod() with scalar =====================";
8122
8123 std::map<std::string, std::vector<double>> data = {
8124 {"A", {3.0, 4.0}}
8125 };
8126 pandas::DataFrame df(data);
8127
8128 // df.rmod(10) = 10 % df
8129 pandas::DataFrame result = df.rmod(10.0);
8130
8131 bool passed = true;
rmul (pd_test_2_all.cpp:7591)
7581 }
7582
7583 if (!passed) {
7584 throw std::runtime_error("pd_test_rsub_dataframe failed");
7585 }
7586
7587 std::cout << " -> tests passed" << std::endl;
7588 }
7589
7590 void pd_test_rmul_scalar() {
7591 std::cout << "========= rmul() with scalar =====================";
7592
7593 std::map<std::string, std::vector<double>> data = {
7594 {"A", {2.0, 3.0}},
7595 {"B", {4.0, 5.0}}
7596 };
7597 pandas::DataFrame df(data);
7598
7599 // df.rmul(10) = 10 * df
7600 pandas::DataFrame result = df.rmul(10.0);
rmul (pd_test_2_all.cpp:7591)
7581 }
7582
7583 if (!passed) {
7584 throw std::runtime_error("pd_test_rsub_dataframe failed");
7585 }
7586
7587 std::cout << " -> tests passed" << std::endl;
7588 }
7589
7590 void pd_test_rmul_scalar() {
7591 std::cout << "========= rmul() with scalar =====================";
7592
7593 std::map<std::string, std::vector<double>> data = {
7594 {"A", {2.0, 3.0}},
7595 {"B", {4.0, 5.0}}
7596 };
7597 pandas::DataFrame df(data);
7598
7599 // df.rmul(10) = 10 * df
7600 pandas::DataFrame result = df.rmul(10.0);
rpow (pd_test_2_all.cpp:8327)
8317 }
8318
8319 if (!passed) {
8320 throw std::runtime_error("pd_test_rmod_modulo_by_zero failed");
8321 }
8322
8323 std::cout << " -> tests passed" << std::endl;
8324 }
8325
8326 void pd_test_rpow_scalar() {
8327 std::cout << "========= rpow() with scalar =====================";
8328
8329 std::map<std::string, std::vector<double>> data = {
8330 {"A", {2.0, 3.0}},
8331 {"B", {0.0, 1.0}}
8332 };
8333 pandas::DataFrame df(data);
8334
8335 // df.rpow(2) = 2 ** df
8336 pandas::DataFrame result = df.rpow(2.0);
rsub (pd_test_2_all.cpp:7520)
7510 }
7511
7512 if (!passed) {
7513 throw std::runtime_error("pd_test_radd_dataframe failed");
7514 }
7515
7516 std::cout << " -> tests passed" << std::endl;
7517 }
7518
7519 void pd_test_rsub_scalar() {
7520 std::cout << "========= rsub() with scalar =====================";
7521
7522 std::map<std::string, std::vector<double>> data = {
7523 {"A", {1.0, 2.0, 3.0}},
7524 {"B", {4.0, 5.0, 6.0}}
7525 };
7526 pandas::DataFrame df(data);
7527
7528 // df.rsub(10) = 10 - df
7529 pandas::DataFrame result = df.rsub(10.0);
rsub (pd_test_2_all.cpp:7520)
7510 }
7511
7512 if (!passed) {
7513 throw std::runtime_error("pd_test_radd_dataframe failed");
7514 }
7515
7516 std::cout << " -> tests passed" << std::endl;
7517 }
7518
7519 void pd_test_rsub_scalar() {
7520 std::cout << "========= rsub() with scalar =====================";
7521
7522 std::map<std::string, std::vector<double>> data = {
7523 {"A", {1.0, 2.0, 3.0}},
7524 {"B", {4.0, 5.0, 6.0}}
7525 };
7526 pandas::DataFrame df(data);
7527
7528 // df.rsub(10) = 10 - df
7529 pandas::DataFrame result = df.rsub(10.0);
rtruediv (pd_test_2_all.cpp:7795)
7785 }
7786
7787 if (!passed) {
7788 throw std::runtime_error("pd_test_rdiv_dataframe failed");
7789 }
7790
7791 std::cout << " -> tests passed" << std::endl;
7792 }
7793
7794 void pd_test_rtruediv_scalar() {
7795 std::cout << "========= rtruediv() with scalar =================";
7796
7797 std::map<std::string, std::vector<double>> data = {
7798 {"A", {2.0, 4.0}}
7799 };
7800 pandas::DataFrame df(data);
7801
7802 // rtruediv is alias for rdiv
7803 pandas::DataFrame result = df.rtruediv(10.0);
7804
7805 bool passed = true;
sub (pd_test_1_all.cpp:4851)
4841 pandas::Series<double> a({1.0, 2.0, 3.0});
4842 pandas::Series<double> b({4.0, 5.0, 6.0});
4843
4844 auto sum = a.add(b);
4845 bool passed = std::abs(sum[0] - 5.0) < 0.001 && std::abs(sum[1] - 7.0) < 0.001;
4846 if (!passed) {
4847 std::cout << " [FAIL] : in pd_test_arithmetic_series_named_ops() : add failed" << std::endl;
4848 throw std::runtime_error("pd_test_arithmetic_series_named_ops failed: add failed");
4849 }
4850
4851 auto diff = a.sub(b);
4852 passed = std::abs(diff[0] - (-3.0)) < 0.001;
4853 if (!passed) {
4854 std::cout << " [FAIL] : in pd_test_arithmetic_series_named_ops() : sub failed" << std::endl;
4855 throw std::runtime_error("pd_test_arithmetic_series_named_ops failed: sub failed");
4856 }
4857
4858 auto prod = a.mul(b);
4859 passed = std::abs(prod[0] - 4.0) < 0.001 && std::abs(prod[1] - 10.0) < 0.001;
4860 if (!passed) {
4861 std::cout << " [FAIL] : in pd_test_arithmetic_series_named_ops() : mul failed" << std::endl;
sub (pd_test_1_all.cpp:4851)
4841 pandas::Series<double> a({1.0, 2.0, 3.0});
4842 pandas::Series<double> b({4.0, 5.0, 6.0});
4843
4844 auto sum = a.add(b);
4845 bool passed = std::abs(sum[0] - 5.0) < 0.001 && std::abs(sum[1] - 7.0) < 0.001;
4846 if (!passed) {
4847 std::cout << " [FAIL] : in pd_test_arithmetic_series_named_ops() : add failed" << std::endl;
4848 throw std::runtime_error("pd_test_arithmetic_series_named_ops failed: add failed");
4849 }
4850
4851 auto diff = a.sub(b);
4852 passed = std::abs(diff[0] - (-3.0)) < 0.001;
4853 if (!passed) {
4854 std::cout << " [FAIL] : in pd_test_arithmetic_series_named_ops() : sub failed" << std::endl;
4855 throw std::runtime_error("pd_test_arithmetic_series_named_ops failed: sub failed");
4856 }
4857
4858 auto prod = a.mul(b);
4859 passed = std::abs(prod[0] - 4.0) < 0.001 && std::abs(prod[1] - 10.0) < 0.001;
4860 if (!passed) {
4861 std::cout << " [FAIL] : in pd_test_arithmetic_series_named_ops() : mul failed" << std::endl;
sub_dateoffset (pd_test_4_all.cpp:6342)
6332 EXPECT(static_cast<int64_t>(out[0]) == 5LL * 86400000000000LL);
6333 EXPECT(static_cast<int64_t>(out[1]) == 6LL * 86400000000000LL);
6334 EXPECT(out.dtype_name() == "datetime64[ns]");
6335}
6336
6337void test_sub_dateoffset_calendar_monthend() {
6338 // 2024-01-31 in ns
6339 int64_t jan31 = 1706659200LL * 1000000000LL;
6340 auto s = make_dt_series({jan31});
6341 pandas::MonthEnd me(1);
6342 auto out = s.sub_dateoffset(me);
6343 auto neg = me.negate();
6344 auto ref = s.add_dateoffset(*neg);
6345 EXPECT(out.size() == 1);
6346 EXPECT(static_cast<int64_t>(out[0]) == static_cast<int64_t>(ref[0]));
6347 EXPECT(out.dtype_name() == "datetime64[ns]");
6348}
6349
6350void test_sub_dateoffset_equals_add_negated() {
6351 int64_t jan31 = 1706659200LL * 1000000000LL;
6352 auto s = make_dt_series({jan31, jan31 + 86400000000000LL});
subtract (pd_test_3_all.cpp:531)
521// ============================================================================
522
523void pd_test_3_all_series_arithmetic() {
524 std::cout << "========= Series.subtract/multiply/divide/truediv() =";
525
526 std::vector<double> vals1 = {10.0, 20.0, 30.0, 40.0};
527 std::vector<double> vals2 = {2.0, 4.0, 6.0, 8.0};
528 pandas::Series<double> s1(vals1, "s1");
529 pandas::Series<double> s2(vals2, "s2");
530
531 // Test subtract()
532 pandas::Series<double> sub_result = s1.subtract(s2);
533 if (sub_result.size() != 4) {
534 std::cout << " [FAIL] : in pd_test_3_all_series_arithmetic() : subtract() size mismatch" << std::endl;
535 throw std::runtime_error("pd_test_3_all_series_arithmetic failed: subtract()");
536 }
537 // 10-2=8
538 if (std::abs(sub_result[static_cast<size_t>(0)] - 8.0) > 0.001) {
539 std::cout << " [FAIL] : in pd_test_3_all_series_arithmetic() : subtract() value mismatch" << std::endl;
540 throw std::runtime_error("pd_test_3_all_series_arithmetic failed: subtract() value");
541 }
truediv (pd_test_3_all.cpp:524)
514 }
515
516 std::cout << " -> tests passed" << std::endl;
517}
518
519// ============================================================================
520// Category 3: Series Arithmetic Operations
521// ============================================================================
522
523void pd_test_3_all_series_arithmetic() {
524 std::cout << "========= Series.subtract/multiply/divide/truediv() =";
525
526 std::vector<double> vals1 = {10.0, 20.0, 30.0, 40.0};
527 std::vector<double> vals2 = {2.0, 4.0, 6.0, 8.0};
528 pandas::Series<double> s1(vals1, "s1");
529 pandas::Series<double> s2(vals2, "s2");
530
531 // Test subtract()
532 pandas::Series<double> sub_result = s1.subtract(s2);
533 if (sub_result.size() != 4) {
534 std::cout << " [FAIL] : in pd_test_3_all_series_arithmetic() : subtract() size mismatch" << std::endl;
truediv (pd_test_3_all.cpp:524)
514 }
515
516 std::cout << " -> tests passed" << std::endl;
517}
518
519// ============================================================================
520// Category 3: Series Arithmetic Operations
521// ============================================================================
522
523void pd_test_3_all_series_arithmetic() {
524 std::cout << "========= Series.subtract/multiply/divide/truediv() =";
525
526 std::vector<double> vals1 = {10.0, 20.0, 30.0, 40.0};
527 std::vector<double> vals2 = {2.0, 4.0, 6.0, 8.0};
528 pandas::Series<double> s1(vals1, "s1");
529 pandas::Series<double> s2(vals2, "s2");
530
531 // Test subtract()
532 pandas::Series<double> sub_result = s1.subtract(s2);
533 if (sub_result.size() != 4) {
534 std::cout << " [FAIL] : in pd_test_3_all_series_arithmetic() : subtract() size mismatch" << std::endl;
compare (pd_test_1_all.cpp:13989)
13979 if (!approx_equal(std::stod(b_col.get_value_str(0)), 10.0)) {
13980 passed = false;
13981 std::cout << " [FAIL] : in pd_test_joining_update() : column B was changed" << std::endl;
13982 throw std::runtime_error("pd_test_joining_update failed: B changed");
13983 }
13984
13985 std::cout << " -> tests passed" << std::endl;
13986 }
13987
13988 // =====================================================================
13989 // compare() Tests
13990 // =====================================================================
13991
13992 void pd_test_joining_compare() {
13993 std::cout << "========= compare =====================================";
13994
13995 std::map<std::string, std::vector<double>> left_data = {
13996 {"A", {1.0, 2.0, 3.0}},
13997 {"B", {10.0, 20.0, 30.0}}
13998 };
13999 pandas::DataFrame left(left_data);
eq (pd_test_2_all.cpp:19680)
19670 std::vector<pandas::Series<numpy::float64>> cols;
19671 cols.push_back(pandas::Series<numpy::float64>({1.0, 2.0}, "A"));
19672 cols.push_back(pandas::Series<numpy::float64>({3.0, 3.0}, "B"));
19673 pandas::DataFrame df(cols, {"A", "B"});
19674
19675 pandas::Series<numpy::float64> s({1.0, 3.0}, "vals");
19676 s.set_index(std::make_unique<pandas::Index<std::string>>(
19677 std::vector<std::string>{"A", "B"}));
19678
19679 auto result = df.eq(s, 1);
19680
19681 check(approx(result["A"].get_value_double(0), 1.0), "eq_A_r0_true");
19682 check(approx(result["A"].get_value_double(1), 0.0), "eq_A_r1_false");
19683 check(approx(result["B"].get_value_double(0), 1.0), "eq_B_r0_true");
19684 check(approx(result["B"].get_value_double(1), 1.0), "eq_B_r1_true");
19685}
19686
19687// Test 5: mul scalar broadcast (verify existing behavior still works)
19688void pd_test_broadcasting_mul_scalar() {
19689 std::cout << " -- pd_test_broadcasting_mul_scalar --" << std::endl;
eq (pd_test_2_all.cpp:19680)
19670 std::vector<pandas::Series<numpy::float64>> cols;
19671 cols.push_back(pandas::Series<numpy::float64>({1.0, 2.0}, "A"));
19672 cols.push_back(pandas::Series<numpy::float64>({3.0, 3.0}, "B"));
19673 pandas::DataFrame df(cols, {"A", "B"});
19674
19675 pandas::Series<numpy::float64> s({1.0, 3.0}, "vals");
19676 s.set_index(std::make_unique<pandas::Index<std::string>>(
19677 std::vector<std::string>{"A", "B"}));
19678
19679 auto result = df.eq(s, 1);
19680
19681 check(approx(result["A"].get_value_double(0), 1.0), "eq_A_r0_true");
19682 check(approx(result["A"].get_value_double(1), 0.0), "eq_A_r1_false");
19683 check(approx(result["B"].get_value_double(0), 1.0), "eq_B_r0_true");
19684 check(approx(result["B"].get_value_double(1), 1.0), "eq_B_r1_true");
19685}
19686
19687// Test 5: mul scalar broadcast (verify existing behavior still works)
19688void pd_test_broadcasting_mul_scalar() {
19689 std::cout << " -- pd_test_broadcasting_mul_scalar --" << std::endl;
equals (pd_test_1_all.cpp:5866)
5856 std::cout << "========= equals ======================================";
5857
5858 pandas::CategoricalArray arr1({"a", "b", "a"});
5859 pandas::CategoricalArray arr2({"a", "b", "a"});
5860 pandas::CategoricalArray arr3({"a", "b", "c"});
5861
5862 pandas::CategoricalIndex idx1(arr1);
5863 pandas::CategoricalIndex idx2(arr2);
5864 pandas::CategoricalIndex idx3(arr3);
5865
5866 bool passed = (idx1.equals(idx2) && !idx1.equals(idx3));
5867 if (!passed) {
5868 std::cout << " [FAIL] : in pd_test_categorical_index_equals()" << std::endl;
5869 throw std::runtime_error("pd_test_categorical_index_equals failed");
5870 }
5871
5872 std::cout << " -> tests passed" << std::endl;
5873}
5874
5875void pd_test_categorical_index_identical() {
5876 std::cout << "========= identical ===================================";
ge (pd_test_3_all.cpp:303)
293 }
294
295 std::cout << " -> tests passed" << std::endl;
296}
297
298// ============================================================================
299// Category 2: DataFrame Comparison Operations
300// ============================================================================
301
302void pd_test_3_all_comparison_ops() {
303 std::cout << "========= DataFrame.eq/ne/lt/le/gt/ge() =============";
304
305 std::map<std::string, std::vector<double>> data1 = {
306 {"A", {1.0, 2.0, 3.0}},
307 {"B", {4.0, 5.0, 6.0}}
308 };
309 std::map<std::string, std::vector<double>> data2 = {
310 {"A", {1.0, 3.0, 3.0}},
311 {"B", {4.0, 4.0, 7.0}}
312 };
313 pandas::DataFrame df1(data1);
ge (pd_test_3_all.cpp:303)
293 }
294
295 std::cout << " -> tests passed" << std::endl;
296}
297
298// ============================================================================
299// Category 2: DataFrame Comparison Operations
300// ============================================================================
301
302void pd_test_3_all_comparison_ops() {
303 std::cout << "========= DataFrame.eq/ne/lt/le/gt/ge() =============";
304
305 std::map<std::string, std::vector<double>> data1 = {
306 {"A", {1.0, 2.0, 3.0}},
307 {"B", {4.0, 5.0, 6.0}}
308 };
309 std::map<std::string, std::vector<double>> data2 = {
310 {"A", {1.0, 3.0, 3.0}},
311 {"B", {4.0, 4.0, 7.0}}
312 };
313 pandas::DataFrame df1(data1);
gen (pd_test_5_all.cpp:35852)
35842 double pc = pct_change_pc(a, b);
35843 double pd = pct_change_pd(a, b);
35844 pandas_tests::check(std::abs(pc - pd) < 1e-12,
35845 "case_12.formulas_within_ULP", local_fail);
35846}
35847
35848void bin_edge_412638_case_13_entropy_pct_change_invariance(int& local_fail) {
35849 // Generate prices via deterministic walk; compute returns by both
35850 // formulas; bin both; entropy should be IDENTICAL (bin assignments
35851 // not shifted by ULP-scale formula drift). Cycle-1 finding.
35852 std::mt19937_64 gen(42);
35853 std::normal_distribution<double> nd(0.0003, 0.02);
35854 std::vector<double> prices;
35855 prices.reserve(500);
35856 double s = 100.0;
35857 for (int i = 0; i < 500; ++i) {
35858 if (i > 0) s = s * std::exp(nd(gen));
35859 prices.push_back(s);
35860 }
35861 std::vector<double> r_pc, r_pd;
35862 for (size_t i = 1; i < prices.size(); ++i) {
gt (pd_test_3_all.cpp:344)
334 throw std::runtime_error("pd_test_3_all_comparison_ops failed: lt() shape");
335 }
336
337 // Test le()
338 pandas::DataFrame le_result = df1.le(df2);
339 if (le_result.nrows() != 3 || le_result.ncols() != 2) {
340 std::cout << " [FAIL] : in pd_test_3_all_comparison_ops() : le() shape mismatch" << std::endl;
341 throw std::runtime_error("pd_test_3_all_comparison_ops failed: le() shape");
342 }
343
344 // Test gt()
345 pandas::DataFrame gt_result = df1.gt(df2);
346 if (gt_result.nrows() != 3 || gt_result.ncols() != 2) {
347 std::cout << " [FAIL] : in pd_test_3_all_comparison_ops() : gt() shape mismatch" << std::endl;
348 throw std::runtime_error("pd_test_3_all_comparison_ops failed: gt() shape");
349 }
350
351 // Test ge()
352 pandas::DataFrame ge_result = df1.ge(df2);
353 if (ge_result.nrows() != 3 || ge_result.ncols() != 2) {
354 std::cout << " [FAIL] : in pd_test_3_all_comparison_ops() : ge() shape mismatch" << std::endl;
gt (pd_test_3_all.cpp:344)
334 throw std::runtime_error("pd_test_3_all_comparison_ops failed: lt() shape");
335 }
336
337 // Test le()
338 pandas::DataFrame le_result = df1.le(df2);
339 if (le_result.nrows() != 3 || le_result.ncols() != 2) {
340 std::cout << " [FAIL] : in pd_test_3_all_comparison_ops() : le() shape mismatch" << std::endl;
341 throw std::runtime_error("pd_test_3_all_comparison_ops failed: le() shape");
342 }
343
344 // Test gt()
345 pandas::DataFrame gt_result = df1.gt(df2);
346 if (gt_result.nrows() != 3 || gt_result.ncols() != 2) {
347 std::cout << " [FAIL] : in pd_test_3_all_comparison_ops() : gt() shape mismatch" << std::endl;
348 throw std::runtime_error("pd_test_3_all_comparison_ops failed: gt() shape");
349 }
350
351 // Test ge()
352 pandas::DataFrame ge_result = df1.ge(df2);
353 if (ge_result.nrows() != 3 || ge_result.ncols() != 2) {
354 std::cout << " [FAIL] : in pd_test_3_all_comparison_ops() : ge() shape mismatch" << std::endl;
le (pd_test_3_all.cpp:337)
327 throw std::runtime_error("pd_test_3_all_comparison_ops failed: ne() shape");
328 }
329
330 // Test lt()
331 pandas::DataFrame lt_result = df1.lt(df2);
332 if (lt_result.nrows() != 3 || lt_result.ncols() != 2) {
333 std::cout << " [FAIL] : in pd_test_3_all_comparison_ops() : lt() shape mismatch" << std::endl;
334 throw std::runtime_error("pd_test_3_all_comparison_ops failed: lt() shape");
335 }
336
337 // Test le()
338 pandas::DataFrame le_result = df1.le(df2);
339 if (le_result.nrows() != 3 || le_result.ncols() != 2) {
340 std::cout << " [FAIL] : in pd_test_3_all_comparison_ops() : le() shape mismatch" << std::endl;
341 throw std::runtime_error("pd_test_3_all_comparison_ops failed: le() shape");
342 }
343
344 // Test gt()
345 pandas::DataFrame gt_result = df1.gt(df2);
346 if (gt_result.nrows() != 3 || gt_result.ncols() != 2) {
347 std::cout << " [FAIL] : in pd_test_3_all_comparison_ops() : gt() shape mismatch" << std::endl;
le (pd_test_3_all.cpp:337)
327 throw std::runtime_error("pd_test_3_all_comparison_ops failed: ne() shape");
328 }
329
330 // Test lt()
331 pandas::DataFrame lt_result = df1.lt(df2);
332 if (lt_result.nrows() != 3 || lt_result.ncols() != 2) {
333 std::cout << " [FAIL] : in pd_test_3_all_comparison_ops() : lt() shape mismatch" << std::endl;
334 throw std::runtime_error("pd_test_3_all_comparison_ops failed: lt() shape");
335 }
336
337 // Test le()
338 pandas::DataFrame le_result = df1.le(df2);
339 if (le_result.nrows() != 3 || le_result.ncols() != 2) {
340 std::cout << " [FAIL] : in pd_test_3_all_comparison_ops() : le() shape mismatch" << std::endl;
341 throw std::runtime_error("pd_test_3_all_comparison_ops failed: le() shape");
342 }
343
344 // Test gt()
345 pandas::DataFrame gt_result = df1.gt(df2);
346 if (gt_result.nrows() != 3 || gt_result.ncols() != 2) {
347 std::cout << " [FAIL] : in pd_test_3_all_comparison_ops() : gt() shape mismatch" << std::endl;
lt (pd_test_3_all.cpp:330)
320 throw std::runtime_error("pd_test_3_all_comparison_ops failed: eq() shape");
321 }
322
323 // Test ne()
324 pandas::DataFrame ne_result = df1.ne(df2);
325 if (ne_result.nrows() != 3 || ne_result.ncols() != 2) {
326 std::cout << " [FAIL] : in pd_test_3_all_comparison_ops() : ne() shape mismatch" << std::endl;
327 throw std::runtime_error("pd_test_3_all_comparison_ops failed: ne() shape");
328 }
329
330 // Test lt()
331 pandas::DataFrame lt_result = df1.lt(df2);
332 if (lt_result.nrows() != 3 || lt_result.ncols() != 2) {
333 std::cout << " [FAIL] : in pd_test_3_all_comparison_ops() : lt() shape mismatch" << std::endl;
334 throw std::runtime_error("pd_test_3_all_comparison_ops failed: lt() shape");
335 }
336
337 // Test le()
338 pandas::DataFrame le_result = df1.le(df2);
339 if (le_result.nrows() != 3 || le_result.ncols() != 2) {
340 std::cout << " [FAIL] : in pd_test_3_all_comparison_ops() : le() shape mismatch" << std::endl;
lt (pd_test_3_all.cpp:330)
320 throw std::runtime_error("pd_test_3_all_comparison_ops failed: eq() shape");
321 }
322
323 // Test ne()
324 pandas::DataFrame ne_result = df1.ne(df2);
325 if (ne_result.nrows() != 3 || ne_result.ncols() != 2) {
326 std::cout << " [FAIL] : in pd_test_3_all_comparison_ops() : ne() shape mismatch" << std::endl;
327 throw std::runtime_error("pd_test_3_all_comparison_ops failed: ne() shape");
328 }
329
330 // Test lt()
331 pandas::DataFrame lt_result = df1.lt(df2);
332 if (lt_result.nrows() != 3 || lt_result.ncols() != 2) {
333 std::cout << " [FAIL] : in pd_test_3_all_comparison_ops() : lt() shape mismatch" << std::endl;
334 throw std::runtime_error("pd_test_3_all_comparison_ops failed: lt() shape");
335 }
336
337 // Test le()
338 pandas::DataFrame le_result = df1.le(df2);
339 if (le_result.nrows() != 3 || le_result.ncols() != 2) {
340 std::cout << " [FAIL] : in pd_test_3_all_comparison_ops() : le() shape mismatch" << std::endl;
ne (pd_test_3_all.cpp:323)
313 pandas::DataFrame df1(data1);
314 pandas::DataFrame df2(data2);
315
316 // Test eq()
317 pandas::DataFrame eq_result = df1.eq(df2);
318 if (eq_result.nrows() != 3 || eq_result.ncols() != 2) {
319 std::cout << " [FAIL] : in pd_test_3_all_comparison_ops() : eq() shape mismatch" << std::endl;
320 throw std::runtime_error("pd_test_3_all_comparison_ops failed: eq() shape");
321 }
322
323 // Test ne()
324 pandas::DataFrame ne_result = df1.ne(df2);
325 if (ne_result.nrows() != 3 || ne_result.ncols() != 2) {
326 std::cout << " [FAIL] : in pd_test_3_all_comparison_ops() : ne() shape mismatch" << std::endl;
327 throw std::runtime_error("pd_test_3_all_comparison_ops failed: ne() shape");
328 }
329
330 // Test lt()
331 pandas::DataFrame lt_result = df1.lt(df2);
332 if (lt_result.nrows() != 3 || lt_result.ncols() != 2) {
333 std::cout << " [FAIL] : in pd_test_3_all_comparison_ops() : lt() shape mismatch" << std::endl;
ne (pd_test_3_all.cpp:323)
313 pandas::DataFrame df1(data1);
314 pandas::DataFrame df2(data2);
315
316 // Test eq()
317 pandas::DataFrame eq_result = df1.eq(df2);
318 if (eq_result.nrows() != 3 || eq_result.ncols() != 2) {
319 std::cout << " [FAIL] : in pd_test_3_all_comparison_ops() : eq() shape mismatch" << std::endl;
320 throw std::runtime_error("pd_test_3_all_comparison_ops failed: eq() shape");
321 }
322
323 // Test ne()
324 pandas::DataFrame ne_result = df1.ne(df2);
325 if (ne_result.nrows() != 3 || ne_result.ncols() != 2) {
326 std::cout << " [FAIL] : in pd_test_3_all_comparison_ops() : ne() shape mismatch" << std::endl;
327 throw std::runtime_error("pd_test_3_all_comparison_ops failed: ne() shape");
328 }
329
330 // Test lt()
331 pandas::DataFrame lt_result = df1.lt(df2);
332 if (lt_result.nrows() != 3 || lt_result.ncols() != 2) {
333 std::cout << " [FAIL] : in pd_test_3_all_comparison_ops() : lt() shape mismatch" << std::endl;
argsort (pd_test_1_all.cpp:1304)
1294 std::cout << "========= DatetimeArray: sorting ======================= ";
1295
1296 pandas::DatetimeArray arr(std::vector<std::string>{
1297 "2023-06-15",
1298 "NaT",
1299 "2023-01-01",
1300 "2023-12-31"
1301 });
1302
1303 // argsort ascending
1304 auto indices = arr.argsort(true, "last");
1305 // Expected order: 2023-01-01(2), 2023-06-15(0), 2023-12-31(3), NaT(1)
1306 if (indices.getElementAt({0}) != 2) {
1307 std::cout << " [FAIL] : argsort: first should be index 2 (2023-01-01)" << std::endl;
1308 throw std::runtime_error("pd_test_datetime_array_sorting failed: argsort first");
1309 }
1310 if (indices.getElementAt({3}) != 1) {
1311 std::cout << " [FAIL] : argsort: last should be index 1 (NaT)" << std::endl;
1312 throw std::runtime_error("pd_test_datetime_array_sorting failed: NaT position");
1313 }
rank (pd_test_1_all.cpp:6451)
6441 // =====================================================================
6442 // Test: Rank
6443 // =====================================================================
6444 void pd_test_dataframe_rank() {
6445 std::cout << "========= rank =============================";
6446
6447 // Test Series rank with default method (average)
6448 {
6449 std::vector<double> data = {3.0, 1.0, 4.0, 1.0, 5.0};
6450 pandas::Series<double> s(data, "test");
6451 auto ranked = s.rank();
6452
6453 // Values: 3, 1, 4, 1, 5 -> Sorted: 1, 1, 3, 4, 5
6454 // Ranks (average): 1.5, 1.5, 3, 4, 5
6455 // Original positions: 3->3, 1->1.5, 4->4, 1->1.5, 5->5
6456 double r0 = std::stod(ranked.get_value_str(0)); // 3.0 -> rank 3
6457 double r1 = std::stod(ranked.get_value_str(1)); // 1.0 -> rank 1.5
6458
6459 if (std::abs(r0 - 3.0) > 1e-10) {
6460 std::cout << " [FAIL] : in pd_test_dataframe_rank() : value 3.0 should have rank 3, got " << r0 << std::endl;
6461 throw std::runtime_error("pd_test_dataframe_rank failed: value 3.0 rank");
searchsorted (pd_test_1_all.cpp:18958)
18948 // =========================================================================
18949 // Search Tests
18950 // =========================================================================
18951
18952 void pd_test_range_index_searchsorted() {
18953 std::cout << "========= searchsorted ================================ ";
18954
18955 pandas::RangeIndex ri(0, 10, 2); // [0, 2, 4, 6, 8]
18956
18957 bool passed = (ri.searchsorted(4, "left") == 2 &&
18958 ri.searchsorted(4, "right") == 3 &&
18959 ri.searchsorted(3, "left") == 2 && // 3 would go between 2 and 4
18960 ri.searchsorted(-1, "left") == 0 && // Before all
18961 ri.searchsorted(10, "left") == 5); // After all
18962
18963 if (!passed) {
18964 std::cout << " [FAIL] : searchsorted" << std::endl;
18965 throw std::runtime_error("pd_test_range_index_searchsorted failed");
18966 }
sort_index (pd_test_3_all.cpp:583)
573 // 10/2=5
574 if (std::abs(truediv_result[static_cast<size_t>(0)] - 5.0) > 0.001) {
575 std::cout << " [FAIL] : in pd_test_3_all_series_arithmetic() : truediv() value mismatch" << std::endl;
576 throw std::runtime_error("pd_test_3_all_series_arithmetic failed: truediv() value");
577 }
578
579 std::cout << " -> tests passed" << std::endl;
580}
581
582void pd_test_3_all_series_sort_index() {
583 std::cout << "========= Series.sort_index() ========================";
584
585 // NOTE: Series.sort_index() has an implementation issue:
586 // It calls index_->argsort() but argsort() is not virtual in IndexBase.
587 // This test verifies the function signature exists.
588 // When the implementation is fixed, this test should be updated.
589
590 std::vector<double> vals = {30.0, 10.0, 20.0};
591 pandas::Series<double> s(vals, "test");
592
593 // Verify the Series was created correctly
sort_values (pd_test_1_all.cpp:6408)
6398 void pd_test_dataframe_sorting() {
6399 std::cout << "========= sorting ==========================";
6400
6401 std::map<std::string, std::vector<numpy::float64>> data;
6402 data["A"] = {3.0, 1.0, 4.0, 1.0, 5.0};
6403 data["B"] = {9.0, 2.0, 6.0, 5.0, 3.0};
6404
6405 pandas::DataFrame df(data);
6406
6407 // Test sort_values ascending
6408 auto sorted_asc = df.sort_values("A", true);
6409 // First value should be smallest (1.0)
6410 std::string first_val = sorted_asc["A"].get_value_str(0);
6411 if (std::stod(first_val) != 1.0) {
6412 std::cout << " [FAIL] : in pd_test_dataframe_sorting() : sort_values asc first != 1" << std::endl;
6413 throw std::runtime_error("pd_test_dataframe_sorting failed: sort_values asc first != 1");
6414 }
6415
6416 // Test sort_values descending
6417 auto sorted_desc = df.sort_values("A", false);
6418 first_val = sorted_desc["A"].get_value_str(0);
T_ (pd_test_1_all.cpp:16634)
16624 // =====================================================================
16625 // Transpose Tests
16626 // =====================================================================
16627
16628 void pd_test_ndframe_transpose() {
16629 std::cout << "========= transpose ============================================" << std::endl;
16630
16631 pandas::Series<int> s({1, 2, 3});
16632
16633 // For Series, T_() returns a copy
16634 auto transposed = s.T_();
16635 bool passed = transposed.size() == s.size();
16636 if (!passed) {
16637 std::cout << " [FAIL] : in pd_test_ndframe_transpose() : T_() size" << std::endl;
16638 throw std::runtime_error("pd_test_ndframe_transpose failed: T_() size");
16639 }
16640
16641 passed = transposed[0] == 1 && transposed[1] == 2 && transposed[2] == 3;
16642 if (!passed) {
16643 std::cout << " [FAIL] : in pd_test_ndframe_transpose() : T_() values" << std::endl;
explode (pd_test_1_all.cpp:6868)
6858 }
6859 }
6860
6861 // Test explode
6862 {
6863 std::map<std::string, std::vector<std::string>> data;
6864 data["id"] = {"1", "2"};
6865 data["tags"] = {"a,b,c", "d,e"};
6866 pandas::DataFrame df(data);
6867
6868 auto exploded = df.explode("tags");
6869 if (exploded.nrows() != 5) { // 3 + 2 = 5 rows
6870 std::cout << " [FAIL] : in pd_test_dataframe_reshape() : explode nrows != 5, got " << exploded.nrows() << std::endl;
6871 throw std::runtime_error("pd_test_dataframe_reshape failed: explode nrows");
6872 }
6873 }
6874
6875 // Test squeeze
6876 {
6877 std::map<std::string, std::vector<int>> data;
6878 data["A"] = {1};
squeeze (pd_test_1_all.cpp:6881)
6871 throw std::runtime_error("pd_test_dataframe_reshape failed: explode nrows");
6872 }
6873 }
6874
6875 // Test squeeze
6876 {
6877 std::map<std::string, std::vector<int>> data;
6878 data["A"] = {1};
6879 pandas::DataFrame df(data);
6880
6881 auto squeezed = df.squeeze();
6882 // Should return without error for 1x1 DataFrame
6883 }
6884
6885 // Test stack
6886 {
6887 std::map<std::string, std::vector<int>> data;
6888 data["A"] = {1, 2};
6889 data["B"] = {3, 4};
6890 pandas::DataFrame df(data);
swapaxes (pd_test_3_all.cpp:2276)
2266 auto sorted_desc = arr.sort_values(false, "last");
2267 if (*sorted_desc[0] != "c" || *sorted_desc[1] != "b" ||
2268 *sorted_desc[2] != "a" || sorted_desc[3].has_value()) {
2269 throw std::runtime_error("sort_values descending failed");
2270 }
2271
2272 std::cout << " -> tests passed" << std::endl;
2273}
2274
2275void pd_test_3_all_categorical_swapaxes() {
2276 std::cout << "========= CategoricalArray.swapaxes() =================";
2277
2278 std::vector<std::optional<std::string>> values = {"a", "b", "c"};
2279 pandas::CategoricalArray arr(values);
2280
2281 auto result = arr.swapaxes(0, 0);
2282 if (result.size() != 3) {
2283 throw std::runtime_error("swapaxes failed");
2284 }
2285
2286 bool threw = false;
to_frame (pd_test_3_all.cpp:4931)
4921 size_t usage = mi.memory_usage(true);
4922 if (usage == 0) {
4923 throw std::runtime_error("memory_usage() should return > 0");
4924 }
4925
4926 std::cout << " -> tests passed" << std::endl;
4927}
4928
4929void pd_test_3_all_multiindex_to_frame() {
4930 std::cout << "========= MultiIndex.to_frame() =======================";
4931
4932 std::vector<std::vector<std::string>> arrays = {{"a", "b"}, {"x", "y"}};
4933 std::vector<std::optional<std::string>> names = {"first", "second"};
4934 pandas::MultiIndex mi = pandas::MultiIndex::from_arrays<std::string>(arrays, names);
4935
4936 auto frame = mi.to_frame();
4937 if (frame.find("first") == frame.end() || frame.find("second") == frame.end()) {
4938 throw std::runtime_error("to_frame() missing columns");
4939 }
transpose (pd_test_1_all.cpp:16648)
16638 std::cout << " [FAIL] : in pd_test_ndframe_transpose() : T_() size" << std::endl;
16639 throw std::runtime_error("pd_test_ndframe_transpose failed: T_() size");
16640 }
16641
16642 passed = transposed[0] == 1 && transposed[1] == 2 && transposed[2] == 3;
16643 if (!passed) {
16644 std::cout << " [FAIL] : in pd_test_ndframe_transpose() : T_() values" << std::endl;
16645 throw std::runtime_error("pd_test_ndframe_transpose failed: T_() values");
16646 }
16647
16648 // Test transpose() alias
16649 auto transposed2 = s.transpose();
16650 passed = transposed2.size() == s.size();
16651 if (!passed) {
16652 std::cout << " [FAIL] : in pd_test_ndframe_transpose() : transpose() size" << std::endl;
16653 throw std::runtime_error("pd_test_ndframe_transpose failed: transpose() size");
16654 }
16655
16656 std::cout << " -> tests passed" << std::endl;
16657 }
unstack (pd_test_3_all.cpp:1739)
1729 }
1730 if (s.size() != 3) {
1731 std::cout << " [FAIL] : in pd_test_3_all_chainable_mutators() : Case H size" << std::endl;
1732 throw std::runtime_error("pd_test_3_all_chainable_mutators failed: Case H size");
1733 }
1734
1735 std::cout << " -> tests passed" << std::endl;
1736}
1737
1738void pd_test_3_all_dataframe_unstack() {
1739 std::cout << "========= DataFrame.unstack() ========================";
1740
1741 std::map<std::string, std::vector<double>> data = {
1742 {"A", {1.0, 2.0, 3.0}},
1743 {"B", {4.0, 5.0, 6.0}}
1744 };
1745 pandas::DataFrame df(data);
1746
1747 // Without MultiIndex, unstack() returns self (matches pandas behavior)
1748 pandas::DataFrame result = df.unstack();
align (pd_test_1_all.cpp:14035)
14025 if (!approx_equal(a_s1, 2.0) || !approx_equal(a_o1, 99.0)) {
14026 passed = false;
14027 std::cout << " [FAIL] : in pd_test_joining_compare() : difference at row 1 not shown" << std::endl;
14028 throw std::runtime_error("pd_test_joining_compare failed: diff values");
14029 }
14030
14031 std::cout << " -> tests passed" << std::endl;
14032 }
14033
14034 // =====================================================================
14035 // align() Tests
14036 // =====================================================================
14037
14038 void pd_test_joining_align() {
14039 std::cout << "========= align =======================================";
14040
14041 std::map<std::string, std::vector<double>> left_data = {
14042 {"A", {1.0, 2.0}}
14043 };
14044 std::vector<std::string> left_idx = {"x", "y"};
14045 pandas::DataFrame left(left_data, std::make_unique<pandas::Index<std::string>>(left_idx));
aligned_binary_op (pd_test_3_all.cpp:26690)
26680void pd_test_aligned_arith_same_index() {
26681 std::cout << " same index add ... ";
26682 ::pandas::Series<::numpy::float64> s1({1.0, 2.0, 3.0}, std::string("val"));
26683 s1.set_index(std::make_unique<::pandas::Index<std::string>>(
26684 std::vector<std::string>{"a", "b", "c"}));
26685 ::pandas::Series<::numpy::float64> s2({10.0, 20.0, 30.0}, std::string("val"));
26686 s2.set_index(std::make_unique<::pandas::Index<std::string>>(
26687 std::vector<std::string>{"a", "b", "c"}));
26688
26689 auto result = s1.aligned_binary_op(s2, [](double a, double b) { return a + b; });
26690 check(result.size() == 3, "size == 3");
26691 check(result[0] == 11.0, "a -> 11");
26692 check(result[1] == 22.0, "b -> 22");
26693 check(result[2] == 33.0, "c -> 33");
26694 check(result.name() == "val", "name preserved");
26695 std::cout << "test passed" << std::endl;
26696}
26697
26698void pd_test_aligned_arith_different_index() {
26699 std::cout << " different index add ... ";
aligned_binary_op_cross (pd_test_3_all.cpp:26821)
26811void pd_test_aligned_arith_cross_type() {
26812 std::cout << " cross-type alignment (int64 x float64) ... ";
26813 ::pandas::Series<::numpy::int64> s1({1, 2, 3}, std::string("v"));
26814 s1.set_index(std::make_unique<::pandas::Index<std::string>>(
26815 std::vector<std::string>{"a", "b", "c"}));
26816 ::pandas::Series<::numpy::float64> s2({10.5, 20.5, 30.5}, std::string("v"));
26817 s2.set_index(std::make_unique<::pandas::Index<std::string>>(
26818 std::vector<std::string>{"b", "c", "d"}));
26819
26820 auto result = s1.aligned_binary_op_cross(s2, [](double a, double b) { return a + b; });
26821 check(result.size() == 4, "size == 4");
26822 check(std::isnan(result[0]), "a -> NaN");
26823 check(result[1] == 12.5, "b -> 12.5");
26824 check(result[2] == 23.5, "c -> 23.5");
26825 check(std::isnan(result[3]), "d -> NaN");
26826 std::cout << "test passed" << std::endl;
26827}
26828
26829void pd_test_aligned_arith_dtype_coercion() {
26830 // Tests dtype coercion rules matching pandas 2.x behavior for all 5-type combos.
combine (pd_test_2_all.cpp:1700)
1690 std::cout << "====================================== [OK] pd_test_between_time test suite ========================== " << std::endl;
1691 return 0;
1692 }
1693
1694} // namespace dataframe_tests
1695// ------------------- pd_test_between_time.cpp (end) -----------------------------
1696
1697// ------------------- pd_test_combine.cpp (start) -----------------------------
1698// dataframe_tests/pd_test_combine.cpp
1699// Test for DataFrame.combine() - column-wise combine with another DataFrame
1700
1701#include <iostream>
1702#include <cmath>
1703#include <stdexcept>
1704#include "../pandas/pd_dataframe.h"
1705
1706// CRITICAL: No using namespace directives
1707
1708namespace dataframe_tests {
1709 namespace dataframe_tests_combine {
combine_first (pd_test_1_all.cpp:13889)
13879 if (!approx_equal(b1, 10.0) || !approx_equal(b2, 20.0)) {
13880 passed = false;
13881 std::cout << " [FAIL] : in pd_test_joining_join_index() : matched rows wrong" << std::endl;
13882 throw std::runtime_error("pd_test_joining_join_index failed: match values");
13883 }
13884
13885 std::cout << " -> tests passed" << std::endl;
13886 }
13887
13888 // =====================================================================
13889 // combine_first() Tests
13890 // =====================================================================
13891
13892 void pd_test_joining_combine_first() {
13893 std::cout << "========= combine_first ===============================";
13894
13895 std::map<std::string, std::vector<double>> left_data = {
13896 {"A", {1.0, std::nan(""), 3.0}},
13897 {"B", {std::nan(""), 5.0, std::nan("")}}
13898 };
13899 std::vector<std::string> left_idx = {"x", "y", "z"};
concat (pd_test_1_all.cpp:17717)
17707}
17708
17709void pd_test_period_index_concat() {
17710 std::cout << "========= concat factory ==============================";
17711
17712 std::vector<int64_t> ordinals1 = {0, 1};
17713 std::vector<int64_t> ordinals2 = {2, 3};
17714 pandas::PeriodIndex idx1(ordinals1, "D");
17715 pandas::PeriodIndex idx2(ordinals2, "D");
17716
17717 pandas::PeriodIndex concatenated = pandas::PeriodIndex::concat({idx1, idx2});
17718
17719 bool passed = (concatenated.size() == 4);
17720 if (!passed) {
17721 std::cout << " [FAIL] : in pd_test_period_index_concat()" << std::endl;
17722 throw std::runtime_error("pd_test_period_index_concat failed");
17723 }
17724
17725 std::cout << " -> tests passed" << std::endl;
17726}
asfreq (pd_test_1_all.cpp:2869)
2859 std::cout << "========= PeriodArray: asfreq ======================= ";
2860
2861 // Monthly to quarterly
2862 pandas::PeriodArray arr_m(std::vector<std::string>{
2863 "2024-01",
2864 "2024-04",
2865 "2024-07",
2866 "NaT"
2867 }, "M");
2868
2869 auto arr_q = arr_m.asfreq("Q");
2870 if (arr_q.size() != 4) {
2871 std::cout << " [FAIL] : asfreq size should be 4" << std::endl;
2872 throw std::runtime_error("pd_test_period_array_asfreq failed: size");
2873 }
2874 if (arr_q.freqstr() != "Q") {
2875 std::cout << " [FAIL] : asfreq freqstr should be 'Q'" << std::endl;
2876 throw std::runtime_error("pd_test_period_array_asfreq failed: freqstr");
2877 }
2878
2879 // Check NaT is preserved
asof (pd_test_2_all.cpp:366)
356 std::cout << "====================================== [OK] pd_test_add_prefix test suite ========================== " << std::endl;
357 return 0;
358 }
359
360} // namespace dataframe_tests
361// ------------------- pd_test_add_prefix.cpp (end) -----------------------------
362
363// ------------------- pd_test_asof.cpp (start) -----------------------------
364// dataframe_tests/pd_test_asof.cpp
365// Test for DataFrame.asof() method
366
367#include <iostream>
368#include <cmath>
369#include <stdexcept>
370#include <limits>
371#include "../pandas/pd_dataframe.h"
372
373// CRITICAL: No using namespace directives
374
375namespace dataframe_tests {
asof (pd_test_2_all.cpp:366)
356 std::cout << "====================================== [OK] pd_test_add_prefix test suite ========================== " << std::endl;
357 return 0;
358 }
359
360} // namespace dataframe_tests
361// ------------------- pd_test_add_prefix.cpp (end) -----------------------------
362
363// ------------------- pd_test_asof.cpp (start) -----------------------------
364// dataframe_tests/pd_test_asof.cpp
365// Test for DataFrame.asof() method
366
367#include <iostream>
368#include <cmath>
369#include <stdexcept>
370#include <limits>
371#include "../pandas/pd_dataframe.h"
372
373// CRITICAL: No using namespace directives
374
375namespace dataframe_tests {
asof (pd_test_2_all.cpp:366)
356 std::cout << "====================================== [OK] pd_test_add_prefix test suite ========================== " << std::endl;
357 return 0;
358 }
359
360} // namespace dataframe_tests
361// ------------------- pd_test_add_prefix.cpp (end) -----------------------------
362
363// ------------------- pd_test_asof.cpp (start) -----------------------------
364// dataframe_tests/pd_test_asof.cpp
365// Test for DataFrame.asof() method
366
367#include <iostream>
368#include <cmath>
369#include <stdexcept>
370#include <limits>
371#include "../pandas/pd_dataframe.h"
372
373// CRITICAL: No using namespace directives
374
375namespace dataframe_tests {
between_time (pd_test_2_all.cpp:1154)
1144 std::cout << "====================================== [OK] pd_test_at_time test suite ========================== " << std::endl;
1145 return 0;
1146 }
1147
1148} // namespace dataframe_tests
1149// ------------------- pd_test_at_time.cpp (end) -----------------------------
1150
1151// ------------------- pd_test_between_time.cpp (start) -----------------------------
1152// dataframe_tests/pd_test_between_time.cpp
1153// Tests for DataFrame.between_time() method (pandas 2.0+ API)
1154// Selects values between particular times of day from datetime-indexed DataFrame
1155#include <iostream>
1156#include <stdexcept>
1157#include <vector>
1158#include <string>
1159#include <map>
1160#include "../pandas/pd_dataframe.h"
1161
1162// CRITICAL: No using namespace directives
diff (pd_test_1_all.cpp:5171)
5161 }
5162
5163 void pd_test_arithmetic_dataframe_diff_shift() {
5164 std::cout << "========= DataFrame diff/shift ==================";
5165
5166 std::map<std::string, std::vector<double>> data;
5167 data["A"] = {1.0, 3.0, 6.0, 10.0};
5168 pandas::DataFrame df(data);
5169
5170 // diff: [NaN, 2, 3, 4]
5171 auto d = df.diff();
5172 std::string val = d["A"].get_value_str(1);
5173 bool passed = std::abs(std::stod(val) - 2.0) < 0.001;
5174 if (!passed) {
5175 std::cout << " [FAIL] : in pd_test_arithmetic_dataframe_diff_shift() : diff failed" << std::endl;
5176 throw std::runtime_error("pd_test_arithmetic_dataframe_diff_shift failed: diff failed");
5177 }
5178
5179 // First element should be NaN
5180 val = d["A"].get_value_str(0);
5181 passed = std::isnan(std::stod(val));
pct_change (pd_test_1_all.cpp:4621)
4611 throw std::runtime_error("pd_test_aggregation_series_skew_kurt failed: kurtosis alias failed");
4612 }
4613
4614 std::cout << " -> tests passed" << std::endl;
4615 }
4616
4617 void pd_test_aggregation_series_pct_change() {
4618 std::cout << "========= Series pct_change =====================";
4619
4620 pandas::Series<double> s({100.0, 110.0, 121.0});
4621 auto pct = s.pct_change();
4622
4623 // First element should be NaN
4624 bool passed = std::isnan(pct[0]);
4625 if (!passed) {
4626 std::cout << " [FAIL] : in pd_test_aggregation_series_pct_change() : first element should be NaN" << std::endl;
4627 throw std::runtime_error("pd_test_aggregation_series_pct_change failed: first element should be NaN");
4628 }
4629
4630 // Second element should be 0.1 (10% increase)
4631 passed = std::abs(pct[1] - 0.1) < 0.001;
shift (pd_test_1_all.cpp:5188)
5178 // First element should be NaN
5179 val = d["A"].get_value_str(0);
5180 passed = std::isnan(std::stod(val));
5181 if (!passed) {
5182 std::cout << " [FAIL] : in pd_test_arithmetic_dataframe_diff_shift() : diff NaN failed" << std::endl;
5183 throw std::runtime_error("pd_test_arithmetic_dataframe_diff_shift failed: diff NaN failed");
5184 }
5185
5186 // shift: [NaN, 1, 3, 6]
5187 auto s = df.shift();
5188 val = s["A"].get_value_str(1);
5189 passed = std::abs(std::stod(val) - 1.0) < 0.001;
5190 if (!passed) {
5191 std::cout << " [FAIL] : in pd_test_arithmetic_dataframe_diff_shift() : shift failed" << std::endl;
5192 throw std::runtime_error("pd_test_arithmetic_dataframe_diff_shift failed: shift failed");
5193 }
5194
5195 std::cout << " -> tests passed" << std::endl;
5196 }
to_period (pd_test_2_all.cpp:14554)
14544 std::cout << "====================================== [OK] pd_test_to_parquet test suite ========================" << std::endl;
14545 return 0;
14546 }
14547
14548} // namespace dataframe_tests
14549// ------------------- pd_test_to_parquet.cpp (end) -----------------------------
14550
14551// ------------------- pd_test_to_period.cpp (start) -----------------------------
14552// dataframe_tests/pd_test_to_period.cpp
14553// Test suite for DataFrame.to_period() method
14554
14555#include <iostream>
14556#include <stdexcept>
14557#include <vector>
14558#include <string>
14559#include <map>
14560
14561#include "../pandas/pd_dataframe.h"
14562
14563// CRITICAL: No using namespace directives
to_timestamp (pd_test_1_all.cpp:2830)
2820 void pd_test_period_array_to_timestamp() {
2821 std::cout << "========= PeriodArray: to_timestamp ======================= ";
2822
2823 pandas::PeriodArray arr(std::vector<std::string>{
2824 "2024-01",
2825 "2024-06",
2826 "NaT"
2827 }, "M");
2828
2829 // to_timestamp with start
2830 auto ts_start = arr.to_timestamp("start");
2831 if (ts_start.size() != 3) {
2832 std::cout << " [FAIL] : to_timestamp size should be 3" << std::endl;
2833 throw std::runtime_error("pd_test_period_array_to_timestamp failed: size");
2834 }
2835
2836 auto ts0 = ts_start[0];
2837 if (!ts0.has_value()) {
2838 std::cout << " [FAIL] : ts_start[0] should have value" << std::endl;
2839 throw std::runtime_error("pd_test_period_array_to_timestamp failed: ts_start[0]");
2840 }
tz_convert (pd_test_2_all.cpp:17874)
17864 std::cout << "====================================== [OK] pd_test_transform test suite ========================== " << std::endl;
17865 return 0;
17866 }
17867
17868} // namespace dataframe_tests
17869// ------------------- pd_test_transform.cpp (end) -----------------------------
17870
17871// ------------------- pd_test_tz_convert.cpp (start) -----------------------------
17872// dataframe_tests/pd_test_tz_convert.cpp
17873// Test for DataFrame.tz_convert() method
17874
17875#include <iostream>
17876#include <stdexcept>
17877#include <cmath>
17878#include "../pandas/pd_dataframe.h"
17879
17880namespace dataframe_tests {
17881 namespace dataframe_tests_tz_convert {
17882
17883 void pd_test_tz_convert_basic() {
tz_localize (pd_test_1_all.cpp:1431)
1421 "2023-06-15"
1422 });
1423
1424 // Initially should be timezone-naive
1425 if (arr.is_tz_aware()) {
1426 std::cout << " [FAIL] : array should be timezone-naive initially" << std::endl;
1427 throw std::runtime_error("pd_test_datetime_array_timezone failed: naive");
1428 }
1429
1430 // Localize to UTC
1431 auto localized = arr.tz_localize("UTC");
1432 if (!localized.is_tz_aware()) {
1433 std::cout << " [FAIL] : localized array should be timezone-aware" << std::endl;
1434 throw std::runtime_error("pd_test_datetime_array_timezone failed: localize");
1435 }
1436
1437 // Verify timezone name in dtype
1438 auto dt = localized.dtype();
1439 if (!dt.is_tz_aware()) {
1440 std::cout << " [FAIL] : dtype should be timezone-aware" << std::endl;
1441 throw std::runtime_error("pd_test_datetime_array_timezone failed: dtype tz");
to_clipboard (pd_test_2_all.cpp:10176)
10166 std::cout << "====================================== [OK] pd_test_swaplevel test suite ========================== " << std::endl;
10167 return 0;
10168 }
10169
10170} // namespace dataframe_tests
10171// ------------------- pd_test_swaplevel.cpp (end) -----------------------------
10172
10173// ------------------- pd_test_to_clipboard.cpp (start) -----------------------------
10174// pd_test_to_clipboard.cpp
10175// Tests for DataFrame.to_clipboard() method
10176
10177#include <iostream>
10178#include <string>
10179#include <vector>
10180#include <map>
10181#include <sstream>
10182#include <stdexcept>
10183#include <limits>
10184
10185#include "../pandas/pd_dataframe.h"
to_csv (pd_test_1_all.cpp:6967)
6957 void pd_test_dataframe_io() {
6958 std::cout << "========= I/O methods ======================";
6959
6960 std::map<std::string, std::vector<numpy::int64>> data;
6961 data["A"] = {1, 2, 3};
6962 data["B"] = {4, 5, 6};
6963
6964 pandas::DataFrame df(data);
6965
6966 // Test to_csv
6967 std::string csv = df.to_csv(false);
6968 if (csv.empty()) {
6969 std::cout << " [FAIL] : in pd_test_dataframe_io() : to_csv empty" << std::endl;
6970 throw std::runtime_error("pd_test_dataframe_io failed: to_csv empty");
6971 }
6972 if (csv.find("A") == std::string::npos) {
6973 std::cout << " [FAIL] : in pd_test_dataframe_io() : to_csv missing column name" << std::endl;
6974 throw std::runtime_error("pd_test_dataframe_io failed: to_csv missing column name");
6975 }
6976
6977 // Test to_json
to_dict (pd_test_1_all.cpp:13343)
13333 void pd_test_io_to_dict() {
13334 std::cout << "========= to_dict ================================";
13335
13336 std::map<std::string, std::vector<double>> data;
13337 data["A"] = {1.0, 2.0, 3.0};
13338 data["B"] = {4.0, 5.0, 6.0};
13339
13340 pandas::DataFrame df(data);
13341
13342 // Test list orientation
13343 auto dict_list = df.to_dict("list");
13344
13345 bool passed = (dict_list.count("A") > 0 && dict_list.count("B") > 0);
13346 passed = passed && (dict_list["A"].size() == 3);
13347 passed = passed && (dict_list["B"].size() == 3);
13348
13349 if (!passed) {
13350 std::cout << " [FAIL] : in pd_test_io_to_dict() : to_dict list failed" << std::endl;
13351 throw std::runtime_error("pd_test_io_to_dict failed");
13352 }
to_excel (pd_test_2_all.cpp:3427)
3417 std::cout << "====================================== [OK] pd_test_eval test suite ========================== " << std::endl;
3418 return 0;
3419 }
3420
3421} // namespace dataframe_tests
3422// ------------------- pd_test_eval.cpp (end) -----------------------------
3423
3424// ------------------- pd_test_excel.cpp (start) -----------------------------
3425// dataframe_tests/pd_test_excel.cpp
3426// Test file for DataFrame.to_excel() method
3427
3428#include <iostream>
3429#include <fstream>
3430#include <stdexcept>
3431#include <map>
3432#include <vector>
3433#include <string>
3434#include <cmath>
3435#include <limits>
to_feather (pd_test_2_all.cpp:11158)
11148 std::cout << "====================================== [OK] pd_test_to_dict test suite ==========================" << std::endl;
11149 return 0;
11150 }
11151
11152} // namespace dataframe_tests
11153// ------------------- pd_test_to_dict.cpp (end) -----------------------------
11154
11155// ------------------- pd_test_to_feather.cpp (start) -----------------------------
11156// dataframe_tests/pd_test_to_feather.cpp
11157// Comprehensive tests for DataFrame.to_feather() method (pandas-compatible)
11158
11159#include <iostream>
11160#include <fstream>
11161#include <sstream>
11162#include <stdexcept>
11163#include <vector>
11164#include <map>
11165#include <string>
11166#include <cmath>
11167#include <limits>
to_hdf (pd_test_2_all.cpp:11613)
11603 std::cout << "====================================== [OK] pd_test_to_feather test suite ==========================" << std::endl;
11604 return 0;
11605 }
11606
11607} // namespace dataframe_tests
11608// ------------------- pd_test_to_feather.cpp (end) -----------------------------
11609
11610// ------------------- pd_test_to_hdf.cpp (start) -----------------------------
11611// dataframe_tests/pd_test_to_hdf.cpp
11612// Tests for DataFrame.to_hdf() method - Updated for real HDF5 format
11613
11614#include <iostream>
11615#include <stdexcept>
11616#include <map>
11617#include <vector>
11618#include <string>
11619#include <fstream>
11620#include <cstdio>
11621
11622#include "../pandas/pd_dataframe.h"
to_json (pd_test_1_all.cpp:6978)
6968 if (csv.empty()) {
6969 std::cout << " [FAIL] : in pd_test_dataframe_io() : to_csv empty" << std::endl;
6970 throw std::runtime_error("pd_test_dataframe_io failed: to_csv empty");
6971 }
6972 if (csv.find("A") == std::string::npos) {
6973 std::cout << " [FAIL] : in pd_test_dataframe_io() : to_csv missing column name" << std::endl;
6974 throw std::runtime_error("pd_test_dataframe_io failed: to_csv missing column name");
6975 }
6976
6977 // Test to_json
6978 std::string json = df.to_json("columns");
6979 if (json.empty()) {
6980 std::cout << " [FAIL] : in pd_test_dataframe_io() : to_json empty" << std::endl;
6981 throw std::runtime_error("pd_test_dataframe_io failed: to_json empty");
6982 }
6983 if (json.find("{") == std::string::npos) {
6984 std::cout << " [FAIL] : in pd_test_dataframe_io() : to_json not JSON" << std::endl;
6985 throw std::runtime_error("pd_test_dataframe_io failed: to_json not JSON");
6986 }
6987
6988 // Test to_string
to_latex (pd_test_2_all.cpp:9446)
9436 void pd_test_styler_to_latex() {
9437 std::cout << "========= to_latex =================================";
9438
9439 std::map<std::string, std::vector<double>> data = {
9440 {"A", {1.0, 2.0, 3.0}},
9441 {"B", {4.0, 5.0, 6.0}}
9442 };
9443 pandas::DataFrame df(data);
9444
9445 auto styler = df.style();
9446 std::string latex = styler.to_latex();
9447
9448 if (!contains(latex, "\\begin{tabular}")) {
9449 std::cout << " [FAIL] : in pd_test_styler_to_latex() : did not produce tabular environment" << std::endl;
9450 throw std::runtime_error("pd_test_styler_to_latex failed: did not produce tabular environment");
9451 }
9452 if (!contains(latex, "\\end{tabular}")) {
9453 std::cout << " [FAIL] : in pd_test_styler_to_latex() : did not close tabular environment" << std::endl;
9454 throw std::runtime_error("pd_test_styler_to_latex failed: did not close tabular environment");
9455 }
to_latex (pd_test_2_all.cpp:9446)
9436 void pd_test_styler_to_latex() {
9437 std::cout << "========= to_latex =================================";
9438
9439 std::map<std::string, std::vector<double>> data = {
9440 {"A", {1.0, 2.0, 3.0}},
9441 {"B", {4.0, 5.0, 6.0}}
9442 };
9443 pandas::DataFrame df(data);
9444
9445 auto styler = df.style();
9446 std::string latex = styler.to_latex();
9447
9448 if (!contains(latex, "\\begin{tabular}")) {
9449 std::cout << " [FAIL] : in pd_test_styler_to_latex() : did not produce tabular environment" << std::endl;
9450 throw std::runtime_error("pd_test_styler_to_latex failed: did not produce tabular environment");
9451 }
9452 if (!contains(latex, "\\end{tabular}")) {
9453 std::cout << " [FAIL] : in pd_test_styler_to_latex() : did not close tabular environment" << std::endl;
9454 throw std::runtime_error("pd_test_styler_to_latex failed: did not close tabular environment");
9455 }
to_latex (pd_test_2_all.cpp:9446)
9436 void pd_test_styler_to_latex() {
9437 std::cout << "========= to_latex =================================";
9438
9439 std::map<std::string, std::vector<double>> data = {
9440 {"A", {1.0, 2.0, 3.0}},
9441 {"B", {4.0, 5.0, 6.0}}
9442 };
9443 pandas::DataFrame df(data);
9444
9445 auto styler = df.style();
9446 std::string latex = styler.to_latex();
9447
9448 if (!contains(latex, "\\begin{tabular}")) {
9449 std::cout << " [FAIL] : in pd_test_styler_to_latex() : did not produce tabular environment" << std::endl;
9450 throw std::runtime_error("pd_test_styler_to_latex failed: did not produce tabular environment");
9451 }
9452 if (!contains(latex, "\\end{tabular}")) {
9453 std::cout << " [FAIL] : in pd_test_styler_to_latex() : did not close tabular environment" << std::endl;
9454 throw std::runtime_error("pd_test_styler_to_latex failed: did not close tabular environment");
9455 }
to_list (pd_test_1_all.cpp:10247)
10237 std::cout << " -> tests passed" << std::endl;
10238}
10239
10240void pd_test_extension_index_to_list() {
10241 std::cout << "========= to_list =========================";
10242
10243 pandas::CategoricalArray arr({"x", "y", "z"});
10244 pandas::CategoricalIndex idx(arr);
10245
10246 auto list = idx.to_list();
10247
10248 bool passed = (list.size() == 3 &&
10249 list[0].has_value() && *list[0] == "x" &&
10250 list[1].has_value() && *list[1] == "y" &&
10251 list[2].has_value() && *list[2] == "z");
10252 if (!passed) {
10253 std::cout << " [FAIL] : in pd_test_extension_index_to_list() : to_list check failed" << std::endl;
10254 throw std::runtime_error("pd_test_extension_index_to_list failed");
10255 }
to_markdown (pd_test_1_all.cpp:13466)
13456 void pd_test_io_to_markdown() {
13457 std::cout << "========= to_markdown ============================";
13458
13459 std::map<std::string, std::vector<double>> data;
13460 data["X"] = {10.0, 20.0};
13461 data["Y"] = {30.0, 40.0};
13462
13463 pandas::DataFrame df(data);
13464
13465 std::string md = df.to_markdown();
13466
13467 // Check for markdown table elements
13468 bool has_pipe = (md.find("|") != std::string::npos);
13469 bool has_separator = (md.find("---") != std::string::npos);
13470
13471 bool passed = has_pipe && has_separator;
13472
13473 if (!passed) {
13474 std::cout << " [FAIL] : in pd_test_io_to_markdown() : invalid markdown format" << std::endl;
13475 throw std::runtime_error("pd_test_io_to_markdown failed");
to_markdown (pd_test_1_all.cpp:13466)
13456 void pd_test_io_to_markdown() {
13457 std::cout << "========= to_markdown ============================";
13458
13459 std::map<std::string, std::vector<double>> data;
13460 data["X"] = {10.0, 20.0};
13461 data["Y"] = {30.0, 40.0};
13462
13463 pandas::DataFrame df(data);
13464
13465 std::string md = df.to_markdown();
13466
13467 // Check for markdown table elements
13468 bool has_pipe = (md.find("|") != std::string::npos);
13469 bool has_separator = (md.find("---") != std::string::npos);
13470
13471 bool passed = has_pipe && has_separator;
13472
13473 if (!passed) {
13474 std::cout << " [FAIL] : in pd_test_io_to_markdown() : invalid markdown format" << std::endl;
13475 throw std::runtime_error("pd_test_io_to_markdown failed");
to_markdown (pd_test_1_all.cpp:13466)
13456 void pd_test_io_to_markdown() {
13457 std::cout << "========= to_markdown ============================";
13458
13459 std::map<std::string, std::vector<double>> data;
13460 data["X"] = {10.0, 20.0};
13461 data["Y"] = {30.0, 40.0};
13462
13463 pandas::DataFrame df(data);
13464
13465 std::string md = df.to_markdown();
13466
13467 // Check for markdown table elements
13468 bool has_pipe = (md.find("|") != std::string::npos);
13469 bool has_separator = (md.find("---") != std::string::npos);
13470
13471 bool passed = has_pipe && has_separator;
13472
13473 if (!passed) {
13474 std::cout << " [FAIL] : in pd_test_io_to_markdown() : invalid markdown format" << std::endl;
13475 throw std::runtime_error("pd_test_io_to_markdown failed");
to_numpy (pd_test_1_all.cpp:16764)
16754 // =====================================================================
16755 // to_numpy Tests
16756 // =====================================================================
16757
16758 void pd_test_ndframe_to_numpy() {
16759 std::cout << "========= to_numpy =============================================" << std::endl;
16760
16761 pandas::Series<int> s({10, 20, 30});
16762
16763 auto arr = s.to_numpy();
16764
16765 bool passed = arr.getSize() == 3;
16766 if (!passed) {
16767 std::cout << " [FAIL] : in pd_test_ndframe_to_numpy() : size" << std::endl;
16768 throw std::runtime_error("pd_test_ndframe_to_numpy failed: size");
16769 }
16770
16771 passed = arr.getElementAt({0}) == 10 && arr.getElementAt({1}) == 20 && arr.getElementAt({2}) == 30;
16772 if (!passed) {
16773 std::cout << " [FAIL] : in pd_test_ndframe_to_numpy() : values" << std::endl;
to_numpy (pd_test_1_all.cpp:16764)
16754 // =====================================================================
16755 // to_numpy Tests
16756 // =====================================================================
16757
16758 void pd_test_ndframe_to_numpy() {
16759 std::cout << "========= to_numpy =============================================" << std::endl;
16760
16761 pandas::Series<int> s({10, 20, 30});
16762
16763 auto arr = s.to_numpy();
16764
16765 bool passed = arr.getSize() == 3;
16766 if (!passed) {
16767 std::cout << " [FAIL] : in pd_test_ndframe_to_numpy() : size" << std::endl;
16768 throw std::runtime_error("pd_test_ndframe_to_numpy failed: size");
16769 }
16770
16771 passed = arr.getElementAt({0}) == 10 && arr.getElementAt({1}) == 20 && arr.getElementAt({2}) == 30;
16772 if (!passed) {
16773 std::cout << " [FAIL] : in pd_test_ndframe_to_numpy() : values" << std::endl;
to_numpy (pd_test_1_all.cpp:16764)
16754 // =====================================================================
16755 // to_numpy Tests
16756 // =====================================================================
16757
16758 void pd_test_ndframe_to_numpy() {
16759 std::cout << "========= to_numpy =============================================" << std::endl;
16760
16761 pandas::Series<int> s({10, 20, 30});
16762
16763 auto arr = s.to_numpy();
16764
16765 bool passed = arr.getSize() == 3;
16766 if (!passed) {
16767 std::cout << " [FAIL] : in pd_test_ndframe_to_numpy() : size" << std::endl;
16768 throw std::runtime_error("pd_test_ndframe_to_numpy failed: size");
16769 }
16770
16771 passed = arr.getElementAt({0}) == 10 && arr.getElementAt({1}) == 20 && arr.getElementAt({2}) == 30;
16772 if (!passed) {
16773 std::cout << " [FAIL] : in pd_test_ndframe_to_numpy() : values" << std::endl;
to_orc (pd_test_2_all.cpp:13769)
13759 std::cout << "====================================== [OK] pd_test_to_markdown test suite ========================== " << std::endl;
13760 return 0;
13761 }
13762
13763} // namespace dataframe_tests
13764// ------------------- pd_test_to_markdown.cpp (end) -----------------------------
13765
13766// ------------------- pd_test_to_orc.cpp (start) -----------------------------
13767// dataframe_tests/pd_test_to_orc.cpp
13768// Tests for DataFrame.to_orc() method
13769
13770#include <iostream>
13771#include <stdexcept>
13772#include <map>
13773#include <vector>
13774#include <string>
13775#include <fstream>
13776#include <cstdio>
13777
13778#include "../pandas/pd_dataframe.h"
to_parquet (pd_test_2_all.cpp:14117)
14107 std::cout << "====================================== [OK] pd_test_to_orc test suite ==========================" << std::endl;
14108 return 0;
14109 }
14110
14111} // namespace dataframe_tests
14112// ------------------- pd_test_to_orc.cpp (end) -----------------------------
14113
14114// ------------------- pd_test_to_parquet.cpp (start) -----------------------------
14115// dataframe_tests/pd_test_to_parquet.cpp
14116// Tests for DataFrame.to_parquet() method
14117
14118#include <iostream>
14119#include <stdexcept>
14120#include <map>
14121#include <vector>
14122#include <string>
14123#include <fstream>
14124#include <cstdio>
14125
14126#include "../pandas/pd_dataframe.h"
to_pickle (pd_test_2_all.cpp:14906)
14896 std::cout << "====================================== [OK] pd_test_to_period test suite ========================== " << std::endl;
14897 return 0;
14898 }
14899
14900} // namespace dataframe_tests
14901// ------------------- pd_test_to_period.cpp (end) -----------------------------
14902
14903// ------------------- pd_test_to_pickle.cpp (start) -----------------------------
14904// dataframe_tests/pd_test_to_pickle.cpp
14905// Tests for DataFrame.to_pickle() method
14906
14907#include <iostream>
14908#include <stdexcept>
14909#include <map>
14910#include <vector>
14911#include <string>
14912#include <fstream>
14913#include <cstdio>
14914
14915#include "../pandas/pd_dataframe.h"
to_sql (pd_test_2_all.cpp:15576)
15566 std::cout << "====================================== [OK] pd_test_to_records test suite ========================== " << std::endl;
15567 return 0;
15568 }
15569
15570} // namespace dataframe_tests
15571// ------------------- pd_test_to_records.cpp (end) -----------------------------
15572
15573// ------------------- pd_test_to_sql.cpp (start) -----------------------------
15574// dataframe_tests/pd_test_to_sql.cpp
15575// Tests for DataFrame.to_sql() method
15576
15577#include <iostream>
15578#include <stdexcept>
15579#include <map>
15580#include <vector>
15581#include <string>
15582#include <fstream>
15583#include <sstream>
15584#include <cstdio>
to_stata (pd_test_2_all.cpp:16133)
16123 std::cout << "====================================== [OK] pd_test_to_sql test suite ==========================" << std::endl;
16124 return 0;
16125 }
16126
16127} // namespace dataframe_tests
16128// ------------------- pd_test_to_sql.cpp (end) -----------------------------
16129
16130// ------------------- pd_test_to_stata.cpp (start) -----------------------------
16131// pd_test_to_stata.cpp
16132// Tests for DataFrame.to_stata() method
16133
16134#include <iostream>
16135#include <fstream>
16136#include <string>
16137#include <vector>
16138#include <map>
16139#include <cstring>
16140#include <stdexcept>
16141
16142#include "../pandas/pd_dataframe.h"
to_string (pd_test_1_all.cpp:2693)
2683 pandas::PeriodArray arr_m(std::vector<std::string>{
2684 "2020-01",
2685 "NaT",
2686 "2025-06"
2687 }, "M");
2688
2689 // Year
2690 auto years = arr_m.year();
2691 auto y0 = years[0];
2692 if (!y0.has_value() || y0.value() != 2020) {
2693 std::cout << " [FAIL] : year[0] should be 2020, got " << (y0.has_value() ? std::to_string(y0.value()) : "NA") << std::endl;
2694 throw std::runtime_error("pd_test_period_array_year_month_quarter failed: year[0]");
2695 }
2696
2697 auto y1 = years[1];
2698 if (y1.has_value()) {
2699 std::cout << " [FAIL] : year[1] should be NA (NaT)" << std::endl;
2700 throw std::runtime_error("pd_test_period_array_year_month_quarter failed: year[1] should be NA");
2701 }
2702
2703 auto y2 = years[2];
to_string_full (pd_test_5_all.cpp:64951)
64941static pandas::Series<numpy::float64> make_s(
64942 const std::vector<double>& v,
64943 const std::optional<std::string>& name = std::nullopt) {
64944 return pandas::Series<numpy::float64>(v, name);
64945}
64946
64947static const double PINF = std::numeric_limits<double>::infinity();
64948static const double NINF = -std::numeric_limits<double>::infinity();
64949static const double DNAN = std::numeric_limits<double>::quiet_NaN();
64950
64951// Convenience: call to_string_full() with pandas defaults (index=true,
64952// length=false, dtype=true, name=true). We strip the trailer here too so we
64953// match `pd.Series(...).to_string()`'s captured output, which OMITS the dtype
64954// trailer (Python `Series.to_string()` defaults `dtype=False`).
64955static std::string call_to_string_full(
64956 const pandas::Series<numpy::float64>& s) {
64957 std::string raw = s.to_string_full(
64958 /*buf=*/nullptr,
64959 /*na_rep=*/"NaN",
64960 /*float_format=*/"",
64961 /*header=*/false,
to_string_vector (pd_test_1_all.cpp:10871)
10861 std::cout << " -> tests passed" << std::endl;
10862}
10863
10864void pd_test_extension_index_to_string_vector() {
10865 std::cout << "========= to_string_vector =========================";
10866
10867 pandas::CategoricalArray arr({"a", std::nullopt, "c"});
10868 pandas::CategoricalIndex idx(arr);
10869
10870 auto str_vec = idx.to_string_vector();
10871
10872 bool passed = (str_vec.size() == 3 &&
10873 str_vec[0] == "a" && str_vec[1] == "NA" && str_vec[2] == "c");
10874 if (!passed) {
10875 std::cout << " [FAIL] : in pd_test_extension_index_to_string_vector() : to_string_vector check failed" << std::endl;
10876 throw std::runtime_error("pd_test_extension_index_to_string_vector failed");
10877 }
10878
10879 std::cout << " -> tests passed" << std::endl;
10880}
to_xarray (pd_test_2_all.cpp:16928)
16918 std::cout << "====================================== [OK] pd_test_to_timestamp test suite ========================== " << std::endl;
16919 return 0;
16920 }
16921
16922} // namespace dataframe_tests
16923// ------------------- pd_test_to_timestamp.cpp (end) -----------------------------
16924
16925// ------------------- pd_test_to_xarray.cpp (start) -----------------------------
16926// dataframe_tests/pd_test_to_xarray.cpp
16927// Test for DataFrame.to_xarray() - Convert DataFrame to xarray Dataset
16928
16929#include <iostream>
16930#include <stdexcept>
16931#include <string>
16932#include <vector>
16933#include <map>
16934#include <memory>
16935#include <cmath>
16936
16937#include "../pandas/pd_dataframe.h"
tolist (pd_test_3_all.cpp:2300)
2290 threw = true;
2291 }
2292 if (!threw) {
2293 throw std::runtime_error("swapaxes should throw for invalid axes");
2294 }
2295
2296 std::cout << " -> tests passed" << std::endl;
2297}
2298
2299void pd_test_3_all_categorical_to_list() {
2300 std::cout << "========= CategoricalArray.to_list()/tolist() =========";
2301
2302 std::vector<std::optional<std::string>> values = {"a", "b", std::nullopt, "c"};
2303 pandas::CategoricalArray arr(values);
2304
2305 auto list = arr.to_list();
2306 if (list.size() != 4 || *list[0] != "a" || *list[1] != "b" ||
2307 list[2].has_value() || *list[3] != "c") {
2308 throw std::runtime_error("to_list failed");
2309 }
astype (pd_test_1_all.cpp:21292)
21282 std::cout << "========= astype all columns to float64 =============";
21283
21284 // Create DataFrame with int64 columns
21285 std::map<std::string, std::vector<numpy::int64>> data;
21286 data["A"] = {1, 2, 3, 4, 5};
21287 data["B"] = {10, 20, 30, 40, 50};
21288
21289 pandas::DataFrame df(data);
21290
21291 // Convert all columns to float64
21292 pandas::DataFrame df_float = df.astype("float64");
21293
21294 // Verify dtype changed
21295 pandas::Series<std::string> dtypes = df_float.dtypes();
21296
21297 bool passed = true;
21298 if (dtypes[static_cast<size_t>(0)] != "float64") {
21299 std::cout << " [FAIL] : in pd_test_astype_all_columns_to_float64() : column A dtype is " << dtypes[static_cast<size_t>(0)] << ", expected float64" << std::endl;
21300 passed = false;
21301 }
21302 if (dtypes[static_cast<size_t>(1)] != "float64") {
astype (pd_test_1_all.cpp:21292)
21282 std::cout << "========= astype all columns to float64 =============";
21283
21284 // Create DataFrame with int64 columns
21285 std::map<std::string, std::vector<numpy::int64>> data;
21286 data["A"] = {1, 2, 3, 4, 5};
21287 data["B"] = {10, 20, 30, 40, 50};
21288
21289 pandas::DataFrame df(data);
21290
21291 // Convert all columns to float64
21292 pandas::DataFrame df_float = df.astype("float64");
21293
21294 // Verify dtype changed
21295 pandas::Series<std::string> dtypes = df_float.dtypes();
21296
21297 bool passed = true;
21298 if (dtypes[static_cast<size_t>(0)] != "float64") {
21299 std::cout << " [FAIL] : in pd_test_astype_all_columns_to_float64() : column A dtype is " << dtypes[static_cast<size_t>(0)] << ", expected float64" << std::endl;
21300 passed = false;
21301 }
21302 if (dtypes[static_cast<size_t>(1)] != "float64") {
astype_dtype (pd_test_5_all.cpp:43633)
43623 "0 a\n"
43624 "1 b\n"
43625 "2 c";
43626 check_case("dtype_extension_dt_complex_fallback_925116_case_6",
43627 df, actual, expected, "string", local_fail);
43628}
43629
43630void f_dtype_extension_dt_complex_fallback_925116_case_7_series_string_astype_string_drops_override(int& local_fail) {
43631 std::cout << "-- case_7_series_string_astype_string_drops_override\n";
43632 pandas::Series<std::string> s({"a", "b", "c"});
43633 auto r_box = s.astype_dtype("string");
43634 auto* r = dynamic_cast<pandas::Series<std::string>*>(r_box.get());
43635 if (r == nullptr) {
43636 pandas_tests::check(false, "case_7.astype_returned_non_string_series", local_fail);
43637 return;
43638 }
43639 pandas::DataFrame df = r->to_frame(std::optional<std::string>("v"));
43640 std::string actual = df.to_string();
43641
43642 std::cout << " src_dtype=" << show_dtype(s)
43643 << " astype_result_dtype=" << show_dtype(*r) << "\n";
bool_ (pd_test_1_all.cpp:9120)
9110void pd_test_datetime_mixin_array_constructor() {
9111 std::cout << "========= DatetimeTDMixin array constructor =========================";
9112
9113 // Create DatetimeArray with some values
9114 numpy::NDArray<numpy::datetime64> data(std::vector<size_t>{3});
9115 data.setElementAt({0}, numpy::datetime64(1000000000000000000LL, numpy::DateTimeUnit::Nanosecond)); // ~2001
9116 data.setElementAt({1}, numpy::datetime64(1500000000000000000LL, numpy::DateTimeUnit::Nanosecond)); // ~2017
9117 data.setElementAt({2}, numpy::datetime64(1600000000000000000LL, numpy::DateTimeUnit::Nanosecond)); // ~2020
9118
9119 numpy::NDArray<numpy::bool_> mask(std::vector<size_t>{3});
9120 mask.setElementAt({0}, numpy::bool_(false));
9121 mask.setElementAt({1}, numpy::bool_(false));
9122 mask.setElementAt({2}, numpy::bool_(false));
9123
9124 pandas::DatetimeArray arr(data, mask);
9125 pandas::DatetimeTDMixin idx(arr, "timestamps");
9126
9127 bool passed = (idx.size() == 3 && !idx.empty() &&
9128 idx.name().has_value() && *idx.name() == "timestamps" &&
9129 idx.inferred_type() == "datetime");
9130 if (!passed) {
bool_vec_to_cond (pd_test_3_all.cpp:26108)
26098 auto result = s.where(cond, other);
26099 if (!result.name().has_value() || result.name().value() != "my_series") {
26100 throw std::runtime_error("Name not preserved");
26101 }
26102 std::cout << "PASSED" << std::endl;
26103}
26104
26105void pd_test_where_series_bool_vec() {
26106 std::cout << " bool_vec_to_cond conversion... ";
26107 std::vector<bool> bvec = {true, false, true, false, true};
26108 auto cond_arr = ::pandas::Series<::numpy::float64>::bool_vec_to_cond(bvec);
26109 if (cond_arr.getSize() != 5) {
26110 throw std::runtime_error("Size mismatch");
26111 }
26112 for (size_t i = 0; i < 5; ++i) {
26113 if (cond_arr.getElementAt({i}) != bvec[i]) {
26114 throw std::runtime_error("Value mismatch at index " + std::to_string(i));
26115 }
26116 }
26117 std::cout << "PASSED" << std::endl;
26118}
convert_dtypes (pd_test_1_all.cpp:27317)
27307 void pd_test_convert_dtypes_integer_strings() {
27308 std::cout << "========= convert_dtypes: integer strings ============";
27309
27310 // Create DataFrame with string column containing integers
27311 std::map<std::string, std::vector<std::string>> data;
27312 data["a"] = {"1", "2", "3", "4", "5"};
27313
27314 pandas::DataFrame df(data);
27315
27316 // Convert dtypes
27317 pandas::DataFrame converted = df.convert_dtypes();
27318
27319 // After conversion, should be int64
27320 pandas::Series<std::string> dtypes_after = converted.dtypes();
27321 std::string dtype_a = dtypes_after[static_cast<size_t>(0)];
27322
27323 // Verify the dtype was converted to Int64 (nullable integer, per pandas convert_dtypes behavior)
27324 bool passed = (dtype_a == "Int64" || dtype_a == "int64");
27325 if (!passed) {
27326 std::cout << " [FAIL] : in pd_test_convert_dtypes_integer_strings() : expected Int64, got " << dtype_a << std::endl;
27327 throw std::runtime_error("pd_test_convert_dtypes_integer_strings failed: dtype mismatch");
copy (pd_test_1_all.cpp:5798)
5788// ============================================================================
5789// Copy/Rename Tests
5790// ============================================================================
5791
5792void pd_test_categorical_index_copy() {
5793 std::cout << "========= copy ========================================";
5794
5795 pandas::CategoricalArray arr({"a", "b", "c"});
5796 pandas::CategoricalIndex idx(arr, "original");
5797
5798 pandas::CategoricalIndex copied = idx.copy();
5799
5800 bool passed = (copied.size() == idx.size() && copied.name() == idx.name() &&
5801 copied.categories() == idx.categories() && copied.ordered() == idx.ordered());
5802 if (!passed) {
5803 std::cout << " [FAIL] : in pd_test_categorical_index_copy()" << std::endl;
5804 throw std::runtime_error("pd_test_categorical_index_copy failed");
5805 }
5806
5807 std::cout << " -> tests passed" << std::endl;
5808}
copy_series_metadata_from (pd_test_3_all.cpp:27251)
27241// Test 9: copy_series_metadata_from
27242void pd_test_copy_series_metadata() {
27243 std::cout << " -- pd_test_copy_series_metadata --" << std::endl;
27244 Series<numpy::float64> source({1.0, 2.0, 3.0}, std::string("src"));
27245 source.set_index(std::make_unique<Index<std::string>>(
27246 std::vector<std::string>{"a", "b", "c"}));
27247 source.set_dtype_override("int64");
27248
27249 Series<numpy::float64> target({10.0, 20.0, 30.0});
27250 target.copy_series_metadata_from(source);
27251 check(target.name() == "src", "name copied");
27252 check(target.index().size() == 3, "index copied");
27253 check(target.dtype_name() == "int64", "dtype_override copied");
27254}
27255
27256int pd_test_apply_inference_main() {
27257 std::cout << "====================================== pd_test_apply_inference ========================== " << std::endl;
27258 g_pass = 0;
27259 g_fail = 0;
27260 try {
infer_objects (pd_test_1_all.cpp:27595)
27585 // Create DataFrame with string column containing integers
27586 std::map<std::string, std::vector<std::string>> data;
27587 data["A"] = {"1", "2", "3", "4", "5"};
27588
27589 pandas::DataFrame df(data);
27590
27591 // Before inference, dtype should be string/object
27592 std::string before_dtype = df["A"].dtype_name();
27593
27594 // Apply infer_objects
27595 pandas::DataFrame result = df.infer_objects();
27596
27597 // After inference, dtype should be int64
27598 std::string after_dtype = result["A"].dtype_name();
27599
27600 bool passed = (after_dtype == "int64");
27601 if (!passed) {
27602 std::cout << " [FAIL] : in pd_test_infer_objects_integer_column() : expected int64, got " << after_dtype << std::endl;
27603 throw std::runtime_error("pd_test_infer_objects_integer_column failed");
27604 }
view (pd_test_3_all.cpp:2147)
2137 throw std::runtime_error("memory_usage shallow too small");
2138 }
2139 if (deep < shallow) {
2140 throw std::runtime_error("memory_usage deep should be >= shallow");
2141 }
2142
2143 std::cout << " -> tests passed" << std::endl;
2144}
2145
2146void pd_test_3_all_categorical_ravel_view() {
2147 std::cout << "========= CategoricalArray.ravel()/view() =============";
2148
2149 std::vector<std::optional<std::string>> values = {"a", "b", "c"};
2150 pandas::CategoricalArray arr(values);
2151
2152 auto raveled = arr.ravel();
2153 if (raveled.size() != 3 || !raveled.equals(arr)) {
2154 throw std::runtime_error("ravel failed");
2155 }
2156
2157 auto viewed = arr.view();
items (pd_test_1_all.cpp:16554)
16544 // =====================================================================
16545 // Iteration Tests (items, keys)
16546 // =====================================================================
16547
16548 void pd_test_ndframe_items_keys() {
16549 std::cout << "========= items/keys ===========================================" << std::endl;
16550
16551 pandas::Series<int> s({10, 20, 30});
16552
16553 // Test items()
16554 std::vector<std::string> collected_keys;
16555 std::vector<int> collected_values;
16556
16557 s.items([&](const std::string& key, int value) {
16558 collected_keys.push_back(key);
16559 collected_values.push_back(value);
16560 });
16561
16562 bool passed = collected_keys.size() == 3;
16563 if (!passed) {
items (pd_test_1_all.cpp:16554)
16544 // =====================================================================
16545 // Iteration Tests (items, keys)
16546 // =====================================================================
16547
16548 void pd_test_ndframe_items_keys() {
16549 std::cout << "========= items/keys ===========================================" << std::endl;
16550
16551 pandas::Series<int> s({10, 20, 30});
16552
16553 // Test items()
16554 std::vector<std::string> collected_keys;
16555 std::vector<int> collected_values;
16556
16557 s.items([&](const std::string& key, int value) {
16558 collected_keys.push_back(key);
16559 collected_values.push_back(value);
16560 });
16561
16562 bool passed = collected_keys.size() == 3;
16563 if (!passed) {
keys (pd_test_1_all.cpp:16319)
16309 }
16310
16311 // Test default value
16312 passed = attrs.get<int>("missing", 99) == 99;
16313 if (!passed) {
16314 std::cout << " [FAIL] : in pd_test_ndframe_attrs() : default value" << std::endl;
16315 throw std::runtime_error("pd_test_ndframe_attrs failed: default value");
16316 }
16317
16318 // Test keys
16319 auto keys = attrs.keys();
16320 passed = keys.size() == 3;
16321 if (!passed) {
16322 std::cout << " [FAIL] : in pd_test_ndframe_attrs() : keys()" << std::endl;
16323 throw std::runtime_error("pd_test_ndframe_attrs failed: keys()");
16324 }
16325
16326 // Test remove
16327 passed = attrs.remove("count") && !attrs.contains("count");
16328 if (!passed) {
16329 std::cout << " [FAIL] : in pd_test_ndframe_attrs() : remove" << std::endl;
duplicated (pd_test_1_all.cpp:10583)
10573 std::cout << " -> tests passed" << std::endl;
10574}
10575
10576void pd_test_extension_index_duplicated() {
10577 std::cout << "========= duplicated =========================";
10578
10579 pandas::CategoricalArray arr({"a", "b", "a", "c", "a"});
10580 pandas::CategoricalIndex idx(arr);
10581
10582 auto dup_mask = idx.duplicated("first");
10583
10584 bool passed = (dup_mask.getElementAt({0}) == false &&
10585 dup_mask.getElementAt({1}) == false &&
10586 dup_mask.getElementAt({2}) == true &&
10587 dup_mask.getElementAt({3}) == false &&
10588 dup_mask.getElementAt({4}) == true);
10589 if (!passed) {
10590 std::cout << " [FAIL] : in pd_test_extension_index_duplicated() : duplicated check failed" << std::endl;
10591 throw std::runtime_error("pd_test_extension_index_duplicated failed");
10592 }
isin (pd_test_1_all.cpp:5938)
5928 std::cout << " -> tests passed" << std::endl;
5929}
5930
5931void pd_test_categorical_index_isin() {
5932 std::cout << "========= inherited isin ==============================";
5933
5934 pandas::CategoricalArray arr({"a", "b", "c", "d"});
5935 pandas::CategoricalIndex idx(arr);
5936
5937 std::vector<std::string> values = {"a", "c"};
5938 numpy::NDArray<numpy::bool_> mask = idx.isin(values);
5939
5940 bool passed = (mask.getSize() == 4 &&
5941 mask.getElementAt({0}) == true && // a
5942 mask.getElementAt({1}) == false && // b
5943 mask.getElementAt({2}) == true && // c
5944 mask.getElementAt({3}) == false); // d
5945 if (!passed) {
5946 std::cout << " [FAIL] : in pd_test_categorical_index_isin()" << std::endl;
5947 throw std::runtime_error("pd_test_categorical_index_isin failed");
5948 }
unique (pd_test_1_all.cpp:1345)
1335 pandas::DatetimeArray arr(std::vector<std::string>{
1336 "2023-01-01",
1337 "2023-06-15",
1338 "2023-01-01",
1339 "NaT",
1340 "2023-06-15",
1341 "NaT"
1342 });
1343
1344 // unique
1345 auto uniq = arr.unique();
1346 // Should have: NaT, 2023-01-01, 2023-06-15 (3 unique values)
1347 if (uniq.size() != 3) {
1348 std::cout << " [FAIL] : unique size should be 3, got " << uniq.size() << std::endl;
1349 throw std::runtime_error("pd_test_datetime_array_unique failed: size");
1350 }
1351
1352 // factorize
1353 auto [codes, uniques] = arr.factorize();
1354 // Codes for NaT should be -1
1355 if (codes.getElementAt({3}) != -1) {
is_na_at (pd_test_5_all.cpp:35205)
35195 pandas::DataFrame df;
35196 df.add_column_nullable<bool>("X", {true, pandas::NA_BOOL, false});
35197 pandas_tests::check(df["X"].get_value_bool(0) == true, "case_3.idx0_true", local_fail);
35198 pandas_tests::check(df["X"].get_value_bool(1) == false, "case_3.idx1_NA_false", local_fail);
35199 pandas_tests::check(df["X"].get_value_bool(2) == false, "case_3.idx2_false", local_fail);
35200}
35201
35202void bool_nullable_826495_case_4_is_na_at_mask_aware(int& local_fail) {
35203 pandas::DataFrame df;
35204 df.add_column_nullable<bool>("X", {true, pandas::NA_BOOL, false});
35205 pandas_tests::check(df["X"].is_na_at(0) == false, "case_4.idx0_not_na", local_fail);
35206 pandas_tests::check(df["X"].is_na_at(1) == true, "case_4.idx1_is_na", local_fail);
35207 pandas_tests::check(df["X"].is_na_at(2) == false, "case_4.idx2_not_na", local_fail);
35208}
35209
35210void bool_nullable_826495_case_5_fillna_preserves_dtype(int& local_fail) {
35211 pandas::DataFrame df;
35212 df.add_column_nullable<bool>("X", {true, pandas::NA_BOOL, false});
35213 pandas_tests::check(df["X"].dtype_name() == "boolean", "case_5.pre_dtype", local_fail);
35214 auto df_filled = df.fillna(1.0);
35215 pandas_tests::check(df_filled["X"].dtype_name() == "boolean",
GlobalUnlock (pd_test_2_all.cpp:10220)
10210 return "";
10211 }
10212
10213 char* pszText = static_cast<char*>(GlobalLock(hData));
10214 if (pszText == nullptr) {
10215 CloseClipboard();
10216 return "";
10217 }
10218
10219 std::string text(pszText);
10220 GlobalUnlock(hData);
10221 CloseClipboard();
10222
10223 return text;
10224 }
10225#else
10226 std::string get_clipboard_text() {
10227 // Non-Windows: just return empty (can't easily read clipboard)
10228 return "";
10229 }
10230#endif
abs (pd_test_1_all.cpp:283)
273 std::optional<bool>(true)
274 });
275
276 auto s = arr.sum();
277 if (!s.has_value() || s.value() != 3) {
278 std::cout << " [FAIL] : in pd_test_boolean_array_reductions() : sum should be 3" << std::endl;
279 throw std::runtime_error("pd_test_boolean_array_reductions failed: sum");
280 }
281
282 auto m = arr.mean();
283 if (!m.has_value() || std::abs(m.value() - 0.75) > 0.001) {
284 std::cout << " [FAIL] : in pd_test_boolean_array_reductions() : mean should be 0.75" << std::endl;
285 throw std::runtime_error("pd_test_boolean_array_reductions failed: mean");
286 }
287
288 std::cout << " -> tests passed" << std::endl;
289 }
290
291 void pd_test_boolean_array_dtype() {
292 std::cout << "========= BooleanArray: dtype ======================= ";
all_values_whole_number (pd_test_5_all.cpp:30090)
30080 !src_map_ov.empty() ? src_map_ov : src_ser_dt;
30081 bool is_int_like =
30082 (src_effective.find("int") != std::string::npos ||
30083 src_effective.find("uint") != std::string::npos);
30084 bool comb_has_col = combined.has_column(flat);
30085 bool comb_hasnans = false, comb_allwhole = false;
30086 std::string comb_dt = "<missing>";
30087 if (comb_has_col) {
30088 const pandas::NDFrameBase& c = combined[flat];
30089 comb_hasnans = c.hasnans();
30090 comb_allwhole = c.all_values_whole_number();
30091 comb_dt = c.dtype_name();
30092 }
30093 bool would_apply = is_int_like && comb_has_col &&
30094 !comb_hasnans && comb_allwhole;
30095 std::cout << tag << " flat=" << flat
30096 << " src_effective=" << (src_effective.empty() ? "<empty>" : src_effective)
30097 << " is_int_like=" << is_int_like
30098 << " comb_dt=" << comb_dt
30099 << " comb_hasnans=" << comb_hasnans
30100 << " comb_allwhole=" << comb_allwhole
argmax (pd_test_1_all.cpp:1323)
1313 }
1314
1315 // argmin
1316 auto min_idx = arr.argmin();
1317 if (!min_idx.has_value() || min_idx.value() != 2) {
1318 std::cout << " [FAIL] : argmin should be 2 (2023-01-01)" << std::endl;
1319 throw std::runtime_error("pd_test_datetime_array_sorting failed: argmin");
1320 }
1321
1322 // argmax
1323 auto max_idx = arr.argmax();
1324 if (!max_idx.has_value() || max_idx.value() != 3) {
1325 std::cout << " [FAIL] : argmax should be 3 (2023-12-31)" << std::endl;
1326 throw std::runtime_error("pd_test_datetime_array_sorting failed: argmax");
1327 }
1328
1329 std::cout << " -> tests passed" << std::endl;
1330 }
1331
1332 void pd_test_datetime_array_unique() {
1333 std::cout << "========= DatetimeArray: unique/factorize ======================= ";
argmin (pd_test_1_all.cpp:1316)
1306 if (indices.getElementAt({0}) != 2) {
1307 std::cout << " [FAIL] : argsort: first should be index 2 (2023-01-01)" << std::endl;
1308 throw std::runtime_error("pd_test_datetime_array_sorting failed: argsort first");
1309 }
1310 if (indices.getElementAt({3}) != 1) {
1311 std::cout << " [FAIL] : argsort: last should be index 1 (NaT)" << std::endl;
1312 throw std::runtime_error("pd_test_datetime_array_sorting failed: NaT position");
1313 }
1314
1315 // argmin
1316 auto min_idx = arr.argmin();
1317 if (!min_idx.has_value() || min_idx.value() != 2) {
1318 std::cout << " [FAIL] : argmin should be 2 (2023-01-01)" << std::endl;
1319 throw std::runtime_error("pd_test_datetime_array_sorting failed: argmin");
1320 }
1321
1322 // argmax
1323 auto max_idx = arr.argmax();
1324 if (!max_idx.has_value() || max_idx.value() != 3) {
1325 std::cout << " [FAIL] : argmax should be 3 (2023-12-31)" << std::endl;
1326 throw std::runtime_error("pd_test_datetime_array_sorting failed: argmax");
autocorr (pd_test_3_all.cpp:11904)
11894 auto result2 = s.asof("2020-01-05");
11895 if (!result2.has_value() || std::abs(*result2 - 4.0) > 0.001) {
11896 std::cout << " [FAIL] : asof after all dates incorrect" << std::endl;
11897 throw std::runtime_error("pd_test_series_asof failed");
11898 }
11899
11900 std::cout << " -> tests passed" << std::endl;
11901}
11902
11903// ============================================================================
11904// Test 3: autocorr()
11905// ============================================================================
11906void pd_test_series_autocorr() {
11907 std::cout << "========= Series.autocorr() ========================";
11908
11909 // Create a simple series with known autocorrelation
11910 std::vector<double> vals = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0};
11911 pandas::Series<double> s(vals, "test");
11912
11913 auto result = s.autocorr(1);
11914 if (!result.has_value()) {
axes (pd_test_1_all.cpp:16602)
16592 // =====================================================================
16593 // Axes Tests
16594 // =====================================================================
16595
16596 void pd_test_ndframe_axes() {
16597 std::cout << "========= axes =================================================" << std::endl;
16598
16599 pandas::Series<double> s({1.0, 2.0, 3.0});
16600
16601 auto axes = s.axes();
16602
16603 bool passed = axes.size() == 1;
16604 if (!passed) {
16605 std::cout << " [FAIL] : in pd_test_ndframe_axes() : axes count" << std::endl;
16606 throw std::runtime_error("pd_test_ndframe_axes failed: axes count");
16607 }
16608
16609 passed = axes[0]->size() == 3;
16610 if (!passed) {
16611 std::cout << " [FAIL] : in pd_test_ndframe_axes() : axis size" << std::endl;
between (pd_test_1_all.cpp:19258)
19248 throw std::runtime_error("pd_test_series_comparison failed: greater than");
19249 }
19250
19251 auto eq2 = s == 2;
19252 passed = eq2[1] == true && eq2[0] == false;
19253 if (!passed) {
19254 std::cout << " [FAIL] : in pd_test_series_comparison() : equals failed" << std::endl;
19255 throw std::runtime_error("pd_test_series_comparison failed: equals");
19256 }
19257
19258 auto between_result = s.between(2, 4);
19259 passed = between_result[0] == false && between_result[1] == true && between_result[3] == true;
19260 if (!passed) {
19261 std::cout << " [FAIL] : in pd_test_series_comparison() : between failed" << std::endl;
19262 throw std::runtime_error("pd_test_series_comparison failed: between");
19263 }
19264
19265 std::cout << " -> tests passed" << std::endl;
19266 }
19267
19268 void pd_test_series_transformations() {
canonical_dtype_name (pd_test_5_all.cpp:86367)
86357void case_1_series_complex_dtype_name(int& local_fail) {
86358 std::cout << "-- case_1_series_complex_dtype_name\n";
86359 pandas::Series<cdouble> s({cdouble(1.0, 2.0), cdouble(3.0, 4.0)});
86360 const std::string dt = s.dtype_name();
86361 pandas_tests::check(dt == "complex128",
86362 "case_1_series_complex_dtype_name.dtype_is_complex128_got_" + dt, local_fail);
86363}
86364
86365void case_2_series_canonical_dtype_name(int& local_fail) {
86366 std::cout << "-- case_2_series_canonical_dtype_name\n";
86367 const std::string canon = pandas::Series<cdouble>::canonical_dtype_name();
86368 pandas_tests::check(canon == "complex128",
86369 "case_2_canonical_dtype_name_is_complex128_got_" + canon, local_fail);
86370}
86371
86372void case_3_empty_series_complex_dtype(int& local_fail) {
86373 std::cout << "-- case_3_empty_series_complex_dtype\n";
86374 pandas::Series<cdouble> s(std::vector<cdouble>{});
86375 const std::string dt = s.dtype_name();
86376 pandas_tests::check(dt == "complex128",
86377 "case_3_empty_series_complex_dtype.dtype_is_complex128_got_" + dt, local_fail);
case_when (pd_test_3_all.cpp:9129)
9119 }
9120
9121 std::cout << " -> tests passed" << std::endl;
9122}
9123
9124// ============================================================================
9125// Category 35: Plan 08 - Series case_when and rdivmod
9126// ============================================================================
9127
9128void pd_test_3_all_series_case_when() {
9129 std::cout << "========= Series.case_when() ======================";
9130
9131 std::vector<double> vals = {1.0, 2.0, 3.0, 4.0, 5.0};
9132 pandas::Series<double> s(vals, "test");
9133
9134 // Create conditions
9135 numpy::NDArray<numpy::bool_> cond1({5});
9136 numpy::NDArray<numpy::bool_> cond2({5});
9137 for (size_t i = 0; i < 5; ++i) {
9138 cond1.setElementAt({i}, numpy::bool_(vals[i] < 2.0)); // val < 2
9139 cond2.setElementAt({i}, numpy::bool_(vals[i] > 4.0)); // val > 4
cat (pd_test_3_all.cpp:16259)
16249 }
16250
16251 std::cout << " -> tests passed" << std::endl;
16252}
16253
16254void pd_test_categorical_fillna_params() {
16255 std::cout << "========= CategoricalArray fillna params =============";
16256
16257 // Create CategoricalArray using vector constructor with optional values
16258 std::vector<std::optional<std::string>> values = {"a", "b", std::nullopt, "a"};
16259 pandas::CategoricalArray cat(values);
16260
16261 // Test fillna with method and limit parameters (should compile and work)
16262 auto result = cat.fillna("b", "", std::nullopt, true);
16263
16264 bool passed = (result.size() == 4);
16265 if (!passed) {
16266 std::cout << " [FAIL] : in pd_test_categorical_fillna_params() : fillna failed" << std::endl;
16267 throw std::runtime_error("pd_test_categorical_fillna_params failed");
16268 }
cat_ordered (pd_test_2_all.cpp:20373)
20363 std::vector<std::string> svals = {"a", "b", "a", "c"};
20364 auto cs = std::make_unique<pandas::Series<std::string>>(svals, "cat");
20365 cs->set_dtype_override("category");
20366 cs->set_cat_categories({"a", "b", "c"});
20367 cs->set_cat_ordered(true);
20368 df.insert(0, "cat", std::move(cs), true);
20369
20370 auto s = df.get_column_as_string_series("cat");
20371 check(s.dtype_name() == "category", "cat dtype");
20372 check(s.has_cat_categories(), "cat has_categories");
20373 check(s.cat_ordered() == true, "cat ordered");
20374 auto cats = s.get_cat_categories();
20375 check(cats.size() == 3, "cat categories size");
20376 std::set<std::string> cat_set(cats.begin(), cats.end());
20377 check(cat_set.count("a") && cat_set.count("b") && cat_set.count("c"), "cat categories content");
20378}
20379
20380void pd_test_getitem_dispatch_index_propagation() {
20381 std::cout << "pd_test_getitem_dispatch_index_propagation" << std::endl;
20382
20383 // Test DatetimeIndex freq propagation
ceil (pd_test_1_all.cpp:4949)
4939 throw std::runtime_error("pd_test_arithmetic_series_round failed: round failed");
4940 }
4941
4942 auto f = a.floor();
4943 passed = std::abs(f[0] - 1.0) < 0.001 && std::abs(f[2] - 3.0) < 0.001 && std::abs(f[3] - (-2.0)) < 0.001;
4944 if (!passed) {
4945 std::cout << " [FAIL] : in pd_test_arithmetic_series_round() : floor failed" << std::endl;
4946 throw std::runtime_error("pd_test_arithmetic_series_round failed: floor failed");
4947 }
4948
4949 auto c = a.ceil();
4950 passed = std::abs(c[0] - 2.0) < 0.001 && std::abs(c[2] - 4.0) < 0.001 && std::abs(c[3] - (-1.0)) < 0.001;
4951 if (!passed) {
4952 std::cout << " [FAIL] : in pd_test_arithmetic_series_round() : ceil failed" << std::endl;
4953 throw std::runtime_error("pd_test_arithmetic_series_round failed: ceil failed");
4954 }
4955
4956 // Round with decimals
4957 pandas::Series<double> b({1.234, 2.567, 3.891});
4958 auto r2 = b.round(2);
4959 passed = std::abs(r2[0] - 1.23) < 0.001 && std::abs(r2[1] - 2.57) < 0.001;
clear_cache (pd_test_1_all.cpp:19413)
19403 s.mean();
19404 s.min();
19405 s.max();
19406
19407 passed = s.has_cached_values() == true;
19408 if (!passed) {
19409 std::cout << " [FAIL] : in pd_test_series_cache() : cache not populated" << std::endl;
19410 throw std::runtime_error("pd_test_series_cache failed: cache not populated");
19411 }
19412
19413 s.clear_cache();
19414 passed = s.has_cached_values() == false;
19415 if (!passed) {
19416 std::cout << " [FAIL] : in pd_test_series_cache() : cache not cleared" << std::endl;
19417 throw std::runtime_error("pd_test_series_cache failed: cache not cleared");
19418 }
19419
19420 std::cout << " -> tests passed" << std::endl;
19421 }
19422
19423 void pd_test_series_string_repr() {
clip (pd_test_1_all.cpp:5099)
5089 throw std::runtime_error("pd_test_arithmetic_dataframe_abs_clip failed: abs failed");
5090 }
5091
5092 val = a["A"].get_value_str(2);
5093 passed = std::abs(std::stod(val) - 3.0) < 0.001;
5094 if (!passed) {
5095 std::cout << " [FAIL] : in pd_test_arithmetic_dataframe_abs_clip() : abs for -3 failed" << std::endl;
5096 throw std::runtime_error("pd_test_arithmetic_dataframe_abs_clip failed: abs for -3 failed");
5097 }
5098
5099 auto c = df.clip(-2.0, 2.0);
5100 val = c["A"].get_value_str(2);
5101 passed = std::abs(std::stod(val) - (-2.0)) < 0.001; // -3 clipped to -2
5102 if (!passed) {
5103 std::cout << " [FAIL] : in pd_test_arithmetic_dataframe_abs_clip() : clip lower failed" << std::endl;
5104 throw std::runtime_error("pd_test_arithmetic_dataframe_abs_clip failed: clip lower failed");
5105 }
5106
5107 val = c["A"].get_value_str(3);
5108 passed = std::abs(std::stod(val) - 2.0) < 0.001; // 4 clipped to 2
5109 if (!passed) {
clone (pd_test_1_all.cpp:5776)
5766 std::cout << " -> tests passed" << std::endl;
5767}
5768
5769void pd_test_categorical_index_clone() {
5770 std::cout << "========= clone =======================================";
5771
5772 pandas::CategoricalArray arr({"p", "q", "r"});
5773 pandas::CategoricalIndex idx(arr, "original");
5774
5775 std::unique_ptr<pandas::IndexBase> cloned = idx.clone();
5776
5777 bool passed = (cloned != nullptr && cloned->size() == idx.size() &&
5778 cloned->name() == idx.name());
5779 if (!passed) {
5780 std::cout << " [FAIL] : in pd_test_categorical_index_clone()" << std::endl;
5781 throw std::runtime_error("pd_test_categorical_index_clone failed");
5782 }
5783
5784 std::cout << " -> tests passed" << std::endl;
5785}
corr (pd_test_1_all.cpp:4655)
4645 }
4646
4647 void pd_test_aggregation_dataframe_corr() {
4648 std::cout << "========= DataFrame corr ========================";
4649
4650 std::map<std::string, std::vector<double>> data;
4651 data["A"] = {1.0, 2.0, 3.0, 4.0, 5.0};
4652 data["B"] = {2.0, 4.0, 6.0, 8.0, 10.0}; // Perfect correlation
4653 pandas::DataFrame df(data);
4654
4655 auto corr_df = df.corr();
4656
4657 // Check dimensions
4658 bool passed = corr_df.nrows() == 2 && corr_df.ncols() == 2;
4659 if (!passed) {
4660 std::cout << " [FAIL] : in pd_test_aggregation_dataframe_corr() : corr should be 2x2" << std::endl;
4661 throw std::runtime_error("pd_test_aggregation_dataframe_corr failed: corr should be 2x2");
4662 }
4663
4664 // Diagonal should be 1.0
4665 std::string aa = corr_df["A"].get_value_str(0);
cov (pd_test_1_all.cpp:4690)
4680 std::cout << " -> tests passed" << std::endl;
4681 }
4682
4683 void pd_test_aggregation_dataframe_cov() {
4684 std::cout << "========= DataFrame cov =========================";
4685
4686 std::map<std::string, std::vector<double>> data;
4687 data["A"] = {1.0, 2.0, 3.0};
4688 pandas::DataFrame df(data);
4689
4690 auto cov_df = df.cov();
4691
4692 // Check dimensions
4693 bool passed = cov_df.nrows() == 1 && cov_df.ncols() == 1;
4694 if (!passed) {
4695 std::cout << " [FAIL] : in pd_test_aggregation_dataframe_cov() : cov should be 1x1" << std::endl;
4696 throw std::runtime_error("pd_test_aggregation_dataframe_cov failed: cov should be 1x1");
4697 }
4698
4699 // Var(A) = 1.0 with ddof=1
4700 std::string aa = cov_df["A"].get_value_str(0);
datetime_array (pd_test_extension_array.cpp:255)
245 }
246 pandas::Series<numpy::datetime64> s(dt_vec);
247 s.set_datetime_array(ea);
248 // to_frame() must propagate the EA into the resulting DataFrame's column.
249 pandas::DataFrame df = s.to_frame(std::optional<std::string>{"d"});
250 check(df.has_column("d"), "to_frame: column 'd' present");
251 auto& col = df["d"];
252 auto* col_dt = dynamic_cast<pandas::Series<numpy::datetime64>*>(&col);
253 check(col_dt != nullptr, "to_frame: column dynamic_casts to Series<datetime64>");
254 if (col_dt) {
255 const auto& da_opt = col_dt->datetime_array();
256 check(da_opt.has_value() && *da_opt,
257 "to_frame: column has datetime_array EA populated");
258 if (da_opt.has_value() && *da_opt) {
259 // Pointer equality: the SAME shared_ptr should propagate (no clone).
260 check((*da_opt).get() == ea.get(),
261 "to_frame: datetime_array shared_ptr identity preserved");
262 }
263 }
264 return g_errors - errors_before;
265}
datetime_ns_to_string (pd_test_3_all.cpp:27760)
27750 }
27751
27752 if (fail == 0) std::cout << " OK" << std::endl;
27753}
27754
27755void pd_test_astype_datetime_to_string() {
27756 std::cout << " -- pd_test_astype_datetime_to_string --" << std::endl;
27757 int fail = 0;
27758
27759 double ns_val = 1577836800000000000.0;
27760 std::string formatted = pandas::Series<numpy::float64>::datetime_ns_to_string(ns_val);
27761 if (formatted != "2020-01-01") {
27762 std::cout << " FAIL: expected '2020-01-01', got '" << formatted << "'" << std::endl;
27763 fail++;
27764 }
27765
27766 double ns_val2 = 1577836800000000000.0 + 12*3600000000000.0 + 30*60000000000.0 + 45*1000000000.0;
27767 std::string formatted2 = pandas::Series<numpy::float64>::datetime_ns_to_string(ns_val2);
27768 if (formatted2 != "2020-01-01 12:30:45") {
27769 std::cout << " FAIL: expected '2020-01-01 12:30:45', got '" << formatted2 << "'" << std::endl;
27770 fail++;
dt (pd_test_3_all.cpp:18239)
18229 if (offset.freqstr() != "D") {
18230 std::cout << " [FAIL] : Day freqstr() failed" << std::endl;
18231 throw std::runtime_error("pd_test_day_offset: freqstr() failed");
18232 }
18233 if (offset.name() != "Day") {
18234 std::cout << " [FAIL] : Day name() failed" << std::endl;
18235 throw std::runtime_error("pd_test_day_offset: name() failed");
18236 }
18237
18238 // Test apply
18239 numpy::datetime64 dt("2020-01-15");
18240 auto result = offset.apply(dt);
18241 std::tm tm = result.toTm();
18242 if (tm.tm_mday != 20) {
18243 std::cout << " [FAIL] : Day apply() failed, got day " << tm.tm_mday << std::endl;
18244 throw std::runtime_error("pd_test_day_offset: apply() failed");
18245 }
18246
18247 std::cout << " -> tests passed" << std::endl;
18248}
dt (pd_test_3_all.cpp:18239)
18229 if (offset.freqstr() != "D") {
18230 std::cout << " [FAIL] : Day freqstr() failed" << std::endl;
18231 throw std::runtime_error("pd_test_day_offset: freqstr() failed");
18232 }
18233 if (offset.name() != "Day") {
18234 std::cout << " [FAIL] : Day name() failed" << std::endl;
18235 throw std::runtime_error("pd_test_day_offset: name() failed");
18236 }
18237
18238 // Test apply
18239 numpy::datetime64 dt("2020-01-15");
18240 auto result = offset.apply(dt);
18241 std::tm tm = result.toTm();
18242 if (tm.tm_mday != 20) {
18243 std::cout << " [FAIL] : Day apply() failed, got day " << tm.tm_mday << std::endl;
18244 throw std::runtime_error("pd_test_day_offset: apply() failed");
18245 }
18246
18247 std::cout << " -> tests passed" << std::endl;
18248}
dtype_name (pd_test_1_all.cpp:10104)
10094}
10095
10096void pd_test_extension_index_array_constructor() {
10097 std::cout << "========= array constructor =========================";
10098
10099 pandas::CategoricalArray arr({"apple", "banana", "apple", "cherry"});
10100 pandas::CategoricalIndex idx(arr, "fruits");
10101
10102 bool passed = (idx.size() == 4 && !idx.empty() &&
10103 idx.name().has_value() && *idx.name() == "fruits" &&
10104 idx.dtype_name() == "category");
10105 if (!passed) {
10106 std::cout << " [FAIL] : in pd_test_extension_index_array_constructor() : array constructor check failed" << std::endl;
10107 throw std::runtime_error("pd_test_extension_index_array_constructor failed");
10108 }
10109
10110 std::cout << " -> tests passed" << std::endl;
10111}
10112
10113void pd_test_extension_index_copy_constructor() {
10114 std::cout << "========= copy constructor =========================";
dtype_name_full (pd_test_5_all.cpp:26384)
26374 pandas::DataFrame df;
26375 df.add_column<std::string>("group", {"A", "A", "B"});
26376 df.add_column<bool>("flag", {true, false, true});
26377 // Promote the column's dtype override to the PandasPython-origin sub-type.
26378 df.set_column_dtype("flag", "object:bool");
26379
26380 // Pre-check: dtype_name strips the colon, dtype_name_full keeps it.
26381 pandas_tests::check(df["flag"].dtype_name() == "object",
26382 "b21.pre: df[flag].dtype_name()==object (got '" +
26383 df["flag"].dtype_name() + "')", local_fail);
26384 pandas_tests::check(df["flag"].dtype_name_full() == "object:bool",
26385 "b21.pre: df[flag].dtype_name_full()==object:bool (got '" +
26386 df["flag"].dtype_name_full() + "')", local_fail);
26387
26388 auto gg = df.groupby("group").get_group("A");
26389
26390 // FIX VERIFIED: Option 2 via iloc_rows + take_indices preserves the
26391 // dtype_override ("object:bool"); dtype_name() strips the colon and
26392 // returns "object".
26393 std::string gg_dt = gg["flag"].dtype_name();
26394 std::string gg_dt_full = gg["flag"].dtype_name_full();
dtype_override (pd_test_5_all.cpp:26391)
26381 pandas_tests::check(df["flag"].dtype_name() == "object",
26382 "b21.pre: df[flag].dtype_name()==object (got '" +
26383 df["flag"].dtype_name() + "')", local_fail);
26384 pandas_tests::check(df["flag"].dtype_name_full() == "object:bool",
26385 "b21.pre: df[flag].dtype_name_full()==object:bool (got '" +
26386 df["flag"].dtype_name_full() + "')", local_fail);
26387
26388 auto gg = df.groupby("group").get_group("A");
26389
26390 // FIX VERIFIED: Option 2 via iloc_rows + take_indices preserves the
26391 // dtype_override ("object:bool"); dtype_name() strips the colon and
26392 // returns "object".
26393 std::string gg_dt = gg["flag"].dtype_name();
26394 std::string gg_dt_full = gg["flag"].dtype_name_full();
26395 pandas_tests::check(gg_dt == "object",
26396 "b21.gg[flag].dtype_name()==object (FIX VERIFIED; got '" +
26397 gg_dt + "')", local_fail);
26398 pandas_tests::check(gg_dt_full == "object:bool",
26399 "b21.gg[flag].dtype_name_full()==object:bool (FIX VERIFIED; got '" +
26400 gg_dt_full + "')", local_fail);
26401}
empty (pd_test_1_all.cpp:941)
931#include "../pandas/pd_config.h"
932
933namespace dataframe_tests {
934
935namespace dataframe_tests_config {
936
937 void pd_test_config_version() {
938 std::cout << "========= df_config: version info ======================= ";
939 const char* version = pandas::DataFrameInfo::version();
940 if (version == nullptr || std::string(version).empty()) {
941 std::cout << "[FAIL] : in pd_test_config_version() : version is null or empty" << std::endl;
942 throw std::runtime_error("pd_test_config_version failed: version is null or empty");
943 }
944 std::cout << "-> tests passed" << std::endl;
945 }
946
947 void pd_test_config_na_repr() {
948 std::cout << "========= df_config: NA representation ======================= ";
949 const char* na_repr = pandas::DataFrameConfig::get_na_repr();
950 if (na_repr == nullptr) {
factorize (pd_test_1_all.cpp:1353)
1343 // unique
1344 auto uniq = arr.unique();
1345 // Should have: NaT, 2023-01-01, 2023-06-15 (3 unique values)
1346 if (uniq.size() != 3) {
1347 std::cout << " [FAIL] : unique size should be 3, got " << uniq.size() << std::endl;
1348 throw std::runtime_error("pd_test_datetime_array_unique failed: size");
1349 }
1350
1351 // factorize
1352 auto [codes, uniques] = arr.factorize();
1353 // Codes for NaT should be -1
1354 if (codes.getElementAt({3}) != -1) {
1355 std::cout << " [FAIL] : factorize: NaT code should be -1" << std::endl;
1356 throw std::runtime_error("pd_test_datetime_array_unique failed: NaT code");
1357 }
1358 // Same values should have same codes
1359 if (codes.getElementAt({0}) != codes.getElementAt({2})) {
1360 std::cout << " [FAIL] : factorize: 2023-01-01 values should have same code" << std::endl;
1361 throw std::runtime_error("pd_test_datetime_array_unique failed: same code");
1362 }
file (pd_test_2_all.cpp:3463)
3453 {"C", {100, 200, 300, 400, 500}}
3454 };
3455
3456 pandas::DataFrame df(data);
3457
3458 // Export to Excel
3459 std::string filepath = "temp/pd_test_excel_basic.xlsx";
3460 df.to_excel(filepath);
3461
3462 // Verify file was created
3463 std::ifstream file(filepath, std::ios::binary);
3464 if (!file.good()) {
3465 std::cout << " [FAIL] : in pd_test_excel_basic() : File was not created" << std::endl;
3466 throw std::runtime_error("pd_test_excel_basic failed: file not created");
3467 }
3468
3469 // Check file size is reasonable (valid XLSX should be > 1KB)
3470 file.seekg(0, std::ios::end);
3471 auto size = file.tellg();
3472 if (size < 1000) {
3473 std::cout << " [FAIL] : in pd_test_excel_basic() : File size too small: " << size << std::endl;
file (pd_test_2_all.cpp:3463)
3453 {"C", {100, 200, 300, 400, 500}}
3454 };
3455
3456 pandas::DataFrame df(data);
3457
3458 // Export to Excel
3459 std::string filepath = "temp/pd_test_excel_basic.xlsx";
3460 df.to_excel(filepath);
3461
3462 // Verify file was created
3463 std::ifstream file(filepath, std::ios::binary);
3464 if (!file.good()) {
3465 std::cout << " [FAIL] : in pd_test_excel_basic() : File was not created" << std::endl;
3466 throw std::runtime_error("pd_test_excel_basic failed: file not created");
3467 }
3468
3469 // Check file size is reasonable (valid XLSX should be > 1KB)
3470 file.seekg(0, std::ios::end);
3471 auto size = file.tellg();
3472 if (size < 1000) {
3473 std::cout << " [FAIL] : in pd_test_excel_basic() : File size too small: " << size << std::endl;
file (pd_test_2_all.cpp:3463)
3453 {"C", {100, 200, 300, 400, 500}}
3454 };
3455
3456 pandas::DataFrame df(data);
3457
3458 // Export to Excel
3459 std::string filepath = "temp/pd_test_excel_basic.xlsx";
3460 df.to_excel(filepath);
3461
3462 // Verify file was created
3463 std::ifstream file(filepath, std::ios::binary);
3464 if (!file.good()) {
3465 std::cout << " [FAIL] : in pd_test_excel_basic() : File was not created" << std::endl;
3466 throw std::runtime_error("pd_test_excel_basic failed: file not created");
3467 }
3468
3469 // Check file size is reasonable (valid XLSX should be > 1KB)
3470 file.seekg(0, std::ios::end);
3471 auto size = file.tellg();
3472 if (size < 1000) {
3473 std::cout << " [FAIL] : in pd_test_excel_basic() : File size too small: " << size << std::endl;
filter (pd_test_3_all.cpp:2805)
2795 threw = true;
2796 }
2797 if (!threw) {
2798 throw std::runtime_error("bool_() should throw for multi-element DataFrame");
2799 }
2800
2801 std::cout << " -> tests passed" << std::endl;
2802}
2803
2804void pd_test_3_all_df_filter() {
2805 std::cout << "========= DataFrame.filter() =============================";
2806
2807 std::map<std::string, std::vector<double>> data = {
2808 {"col_a", {1.0, 2.0, 3.0}},
2809 {"col_b", {4.0, 5.0, 6.0}},
2810 {"other", {7.0, 8.0, 9.0}}
2811 };
2812 pandas::DataFrame df(data);
2813
2814 // Test filter by items
2815 pandas::DataFrame filtered_items = df.filter({"col_a", "col_b"});
filter_by_bool_mask (pd_test_5_all.cpp:92595)
92585 std::vector<std::vector<double>> data = {{1.0, 2.0}};
92586 std::vector<std::string> cols = {"x", "y"};
92587 return pandas::DataFrame::from_records(data, cols);
92588}
92589
92590void case_1_filter_mask_f64() {
92591 std::cout << "-- case_1_filter_mask_f64()\n";
92592 int local_fail = 0;
92593 auto s = make_f64_series_5();
92594 auto mask = make_mask_TFTFT();
92595 auto result = s.filter_by_bool_mask(mask);
92596 pandas_tests::check(result.size() == 3,
92597 "f_test_25_filter_mask_f64_393211.size_eq_3", local_fail);
92598 pandas_tests::check(result.iat(0) == 1.0,
92599 "f_test_25_filter_mask_f64_393211.iat0_eq_1", local_fail);
92600 pandas_tests::check(result.iat(1) == 3.0,
92601 "f_test_25_filter_mask_f64_393211.iat1_eq_3", local_fail);
92602 pandas_tests::check(result.iat(2) == 5.0,
92603 "f_test_25_filter_mask_f64_393211.iat2_eq_5", local_fail);
92604}
filter_by_bool_series (pd_test_5_all.cpp:92669)
92659 pandas_tests::check(threw,
92660 "f_test_25_filter_mask_length_mismatch_throws_604411.value_error",
92661 local_fail);
92662}
92663
92664void case_6_filter_bool_series_happy() {
92665 std::cout << "-- case_6_filter_bool_series_happy()\n";
92666 int local_fail = 0;
92667 auto s = make_f64_series_5();
92668 auto cond = make_bool_series_TFTFT();
92669 auto result = s.filter_by_bool_series(cond);
92670 pandas_tests::check(result.size() == 3,
92671 "f_test_25_filter_bool_series_happy_490201.size_eq_3", local_fail);
92672 pandas_tests::check(result.iat(0) == 1.0,
92673 "f_test_25_filter_bool_series_happy_490201.iat0_eq_1", local_fail);
92674 pandas_tests::check(result.iat(1) == 3.0,
92675 "f_test_25_filter_bool_series_happy_490201.iat1_eq_3", local_fail);
92676}
92677
92678void case_7_filter_bool_series_mismatch_throws() {
92679 std::cout << "-- case_7_filter_bool_series_mismatch_throws()\n";
floor (pd_test_1_all.cpp:4942)
4932 pandas::Series<double> a({1.4, 2.5, 3.6, -1.4, -2.5});
4933
4934 auto r = a.round();
4935 bool passed = std::abs(r[0] - 1.0) < 0.001 && std::abs(r[2] - 4.0) < 0.001;
4936 if (!passed) {
4937 std::cout << " [FAIL] : in pd_test_arithmetic_series_round() : round failed" << std::endl;
4938 throw std::runtime_error("pd_test_arithmetic_series_round failed: round failed");
4939 }
4940
4941 auto f = a.floor();
4942 passed = std::abs(f[0] - 1.0) < 0.001 && std::abs(f[2] - 3.0) < 0.001 && std::abs(f[3] - (-2.0)) < 0.001;
4943 if (!passed) {
4944 std::cout << " [FAIL] : in pd_test_arithmetic_series_round() : floor failed" << std::endl;
4945 throw std::runtime_error("pd_test_arithmetic_series_round failed: floor failed");
4946 }
4947
4948 auto c = a.ceil();
4949 passed = std::abs(c[0] - 2.0) < 0.001 && std::abs(c[2] - 4.0) < 0.001 && std::abs(c[3] - (-1.0)) < 0.001;
4950 if (!passed) {
4951 std::cout << " [FAIL] : in pd_test_arithmetic_series_round() : ceil failed" << std::endl;
func (pd_test_3_all.cpp:13837)
13827// ============================================================================
13828// Read Stubs Tests (verify they throw correctly)
13829// ============================================================================
13830
13831void pd_test_top_level_read_stubs() {
13832 std::cout << "========= read_* stubs ================================";
13833
13834 // Test that read functions throw as expected (they are stubs)
13835 auto test_throws = [](const std::string& name, auto func) {
13836 try {
13837 func();
13838 std::cout << " [FAIL] : " << name << " should throw" << std::endl;
13839 return false;
13840 } catch (const std::exception&) {
13841 return true;
13842 }
13843 };
13844
13845 bool all_passed = true;
13846 all_passed &= test_throws("read_clipboard", []() { pandas::read_clipboard(); });
13847 all_passed &= test_throws("read_excel", []() { pandas::read_excel("test.xlsx"); });
has_cached_values (pd_test_1_all.cpp:19395)
19385 }
19386
19387 std::cout << " -> tests passed" << std::endl;
19388 }
19389
19390 void pd_test_series_cache() {
19391 std::cout << "========= cache management =========================================";
19392
19393 pandas::Series<double> s({1.0, 2.0, 3.0, 4.0, 5.0});
19394
19395 bool passed = s.has_cached_values() == false;
19396 if (!passed) {
19397 std::cout << " [FAIL] : in pd_test_series_cache() : initial cache not empty" << std::endl;
19398 throw std::runtime_error("pd_test_series_cache failed: initial cache not empty");
19399 }
19400
19401 // Trigger cache
19402 s.sum();
19403 s.mean();
19404 s.min();
19405 s.max();
has_cat_categories (pd_test_2_all.cpp:20372)
20362 pandas::DataFrame df;
20363 std::vector<std::string> svals = {"a", "b", "a", "c"};
20364 auto cs = std::make_unique<pandas::Series<std::string>>(svals, "cat");
20365 cs->set_dtype_override("category");
20366 cs->set_cat_categories({"a", "b", "c"});
20367 cs->set_cat_ordered(true);
20368 df.insert(0, "cat", std::move(cs), true);
20369
20370 auto s = df.get_column_as_string_series("cat");
20371 check(s.dtype_name() == "category", "cat dtype");
20372 check(s.has_cat_categories(), "cat has_categories");
20373 check(s.cat_ordered() == true, "cat ordered");
20374 auto cats = s.get_cat_categories();
20375 check(cats.size() == 3, "cat categories size");
20376 std::set<std::string> cat_set(cats.begin(), cats.end());
20377 check(cat_set.count("a") && cat_set.count("b") && cat_set.count("c"), "cat categories content");
20378}
20379
20380void pd_test_getitem_dispatch_index_propagation() {
20381 std::cout << "pd_test_getitem_dispatch_index_propagation" << std::endl;
has_mask (pd_test_3_all.cpp:27708)
27698 auto* bool_s = dynamic_cast<pandas::Series<numpy::bool_>*>(result.get());
27699 if (!bool_s) {
27700 std::cout << " FAIL: expected Series<bool_>" << std::endl;
27701 fail++;
27702 } else {
27703 if (bool_s->dtype_name() != "boolean") {
27704 std::cout << " FAIL: dtype should be boolean, got " << bool_s->dtype_name() << std::endl;
27705 fail++;
27706 }
27707 if (!bool_s->has_mask()) {
27708 std::cout << " FAIL: should have mask for NA" << std::endl;
27709 fail++;
27710 } else {
27711 if (!bool_s->mask_at(2)) {
27712 std::cout << " FAIL: position 2 should be masked (NA)" << std::endl;
27713 fail++;
27714 }
27715 }
27716 }
has_multiindex (pd_test_1_all.cpp:27019)
27009 std::map<std::string, std::vector<std::string>> data = {
27010 {"A", {"a", "a", "b", "b"}},
27011 {"B", {"x", "x", "y", "y"}}
27012 };
27013 pandas::DataFrame df(data);
27014
27015 auto result = df.value_counts();
27016 auto& counts = std::get<pandas::Series<numpy::int64>>(result);
27017
27018 if (!counts.has_multiindex()) {
27019 std::cout << " [FAIL] : expected MultiIndex" << std::endl;
27020 throw std::runtime_error("pd_test_value_counts_multiindex_levels failed: no multiindex");
27021 }
27022
27023 const auto& midx = counts.multiindex();
27024
27025 // Should have 2 levels
27026 if (midx.nlevels() != 2) {
27027 std::cout << " [FAIL] : expected 2 levels, got " << midx.nlevels() << std::endl;
27028 throw std::runtime_error("pd_test_value_counts_multiindex_levels failed: wrong nlevels");
hasnans (pd_test_1_all.cpp:5363)
5353void pd_test_categorical_index_from_codes() {
5354 std::cout << "========= from_codes =================================";
5355
5356 std::vector<numpy::int32> codes = {0, 1, 0, 2, -1}; // -1 = NA
5357 std::vector<std::string> categories = {"low", "medium", "high"};
5358
5359 pandas::CategoricalIndex idx = pandas::CategoricalIndex::from_codes(codes, categories, true, "level");
5360
5361 bool passed = (idx.size() == 5 && idx.num_categories() == 3 &&
5362 idx.ordered() && idx.name().has_value() && *idx.name() == "level" &&
5363 idx.hasnans()); // has NA from code -1
5364 if (!passed) {
5365 std::cout << " [FAIL] : in pd_test_categorical_index_from_codes()" << std::endl;
5366 throw std::runtime_error("pd_test_categorical_index_from_codes failed");
5367 }
5368
5369 std::cout << " -> tests passed" << std::endl;
5370}
5371
5372void pd_test_categorical_index_simple_new() {
5373 std::cout << "========= _simple_new =================================";
index (pd_test_1_all.cpp:6680)
6670 void pd_test_dataframe_index_ops() {
6671 std::cout << "========= index operations =================";
6672
6673 // Test set_axis (rows)
6674 {
6675 std::map<std::string, std::vector<int>> data;
6676 data["A"] = {1, 2, 3};
6677 pandas::DataFrame df(data);
6678
6679 auto renamed = df.set_axis({"x", "y", "z"}, 0);
6680 std::string idx0 = renamed.index().get_value_str(0);
6681 if (idx0 != "x") {
6682 std::cout << " [FAIL] : in pd_test_dataframe_index_ops() : set_axis first label should be 'x'" << std::endl;
6683 throw std::runtime_error("pd_test_dataframe_index_ops failed: set_axis");
6684 }
6685 }
6686
6687 // Test set_axis (columns)
6688 {
6689 std::map<std::string, std::vector<int>> data;
6690 data["A"] = {1, 2};
index_mut (pd_test_5_all.cpp:40329)
40319 pandas_tests::check(r.string_na_sentinel_disabled() == true,
40320 "case4.string_na_sentinel_disabled_propagates", local_fail);
40321
40322 std::cout << " source flag=" << s.string_na_sentinel_disabled()
40323 << " result flag=" << r.string_na_sentinel_disabled() << "\n";
40324}
40325
40326void case_5_index_name_propagates(int& local_fail) {
40327 std::cout << "----- case_5_index_name_propagates -----\n";
40328 auto s = make_series_3<std::int64_t>({10, 20, 30});
40329 s.index_mut().set_name(std::optional<std::string>("idx_name"));
40330
40331 auto r = s.reindex({"0", "1", "2"});
40332
40333 auto src_name = s.index().name();
40334 auto res_name = r.index().name();
40335 pandas_tests::check(res_name.has_value(),
40336 "case5.index_name_present_after_reindex", local_fail);
40337 pandas_tests::check(res_name.has_value() && *res_name == "idx_name",
40338 "case5.index_name_value_is_idx_name", local_fail);
info (pd_test_1_all.cpp:7122)
7112 }
7113 if (!empty_params_error) {
7114 std::cout << " [FAIL] : select_dtypes empty params should throw" << std::endl;
7115 throw std::runtime_error("pd_test_dataframe_select_dtypes failed: empty params error");
7116 }
7117
7118 std::cout << " -> tests passed" << std::endl;
7119 }
7120
7121 // =====================================================================
7122 // Test: info() method
7123 // =====================================================================
7124 void pd_test_dataframe_info() {
7125 std::cout << "========= info ========================";
7126
7127 // Test basic info() with stringstream
7128 std::map<std::string, std::vector<int>> data = {
7129 {"A", {1, 2, 3, 4, 5}},
7130 {"B", {10, 20, 30, 40, 50}},
7131 {"C", {100, 200, 300, 400, 500}}
7132 };
item (pd_test_3_all.cpp:3712)
3702 // Test is_interval (always false for base Index)
3703 if (int_idx.is_interval()) {
3704 throw std::runtime_error("base Index should not be interval");
3705 }
3706
3707 std::cout << " -> tests passed" << std::endl;
3708}
3709
3710void pd_test_3_all_index_item() {
3711 std::cout << "========= Index.item() =============================";
3712
3713 pandas::Index<numpy::int64> idx1({42});
3714 numpy::int64 val = idx1.item();
3715
3716 if (val != 42) {
3717 throw std::runtime_error("item() should return 42");
3718 }
3719
3720 // Test error for size != 1
3721 pandas::Index<numpy::int64> idx2({1, 2, 3});
memcpy (pd_test_5_all.cpp:33658)
33648 }
33649 std::cout << " -> tests passed" << std::endl;
33650}
33651
33652// --- f_test_formatter_to_chars_9.cpp ---
33653
33654namespace f_test_formatter_to_chars_9_ns {
33655
33656static double bits_to_double(std::uint64_t bits) {
33657 double v;
33658 std::memcpy(&v, &bits, sizeof(v));
33659 return v;
33660}
33661
33662static int format_current(char* buf, std::size_t bufsz, int digits, double v) {
33663 if (digits < 0 || !std::isfinite(v)) {
33664 return std::snprintf(buf, bufsz, "%.*f", digits, v);
33665 }
33666 long double scale = 1.0L;
33667 for (int k = 0; k < digits; ++k) scale *= 10.0L;
33668 long double scaled = static_cast<long double>(v) * scale;
memory_usage (pd_test_1_all.cpp:27063)
27053 }
27054
27055 std::cout << "====================================== [OK] pd_test_value_counts test suite ========================== " << std::endl;
27056 return 0;
27057 }
27058
27059} // namespace dataframe_tests
27060// ------------------- pd_test_value_counts.cpp (end) -----------------------------
27061
27062// ------------------- pd_test_memory_usage.cpp (start) -----------------------------
27063// Tests for DataFrame.memory_usage() - pandas-compatible memory usage reporting
27064
27065namespace dataframe_tests {
27066 namespace dataframe_tests_memory_usage {
27067
27068 void pd_test_memory_usage_basic() {
27069 std::cout << "========= basic memory_usage =======================";
27070
27071 // Create a simple DataFrame with multiple columns
27072 std::map<std::string, std::vector<double>> data;
27073 data["A"] = {1.0, 2.0, 3.0, 4.0, 5.0};
mixed_tz_array (pd_test_extension_array.cpp:287)
277 dt_vec.push_back(numpy::datetime64(t->value(), numpy::DateTimeUnit::Nanosecond));
278 }
279 pandas::Series<numpy::datetime64> s(dt_vec);
280 s.set_mixed_tz_array(mta);
281 pandas::DataFrame df = s.to_frame(std::optional<std::string>{"m"});
282 check(df.has_column("m"), "to_frame mixed-tz: column 'm' present");
283 auto& col = df["m"];
284 auto* col_dt = dynamic_cast<pandas::Series<numpy::datetime64>*>(&col);
285 check(col_dt != nullptr, "to_frame mixed-tz: column is Series<datetime64>");
286 if (col_dt) {
287 const auto& mta_opt = col_dt->mixed_tz_array();
288 check(mta_opt.has_value() && *mta_opt,
289 "to_frame mixed-tz: column has mixed_tz_array EA populated");
290 if (mta_opt.has_value() && *mta_opt) {
291 check((*mta_opt).get() == mta.get(),
292 "to_frame mixed-tz: shared_ptr identity preserved");
293 }
294 }
295 return g_errors - errors_before;
296}
name (pd_test_1_all.cpp:295)
285 throw std::runtime_error("pd_test_boolean_array_reductions failed: mean");
286 }
287
288 std::cout << " -> tests passed" << std::endl;
289 }
290
291 void pd_test_boolean_array_dtype() {
292 std::cout << "========= BooleanArray: dtype ======================= ";
293
294 pandas::BooleanArray arr;
295 if (arr.dtype().name() != "boolean") {
296 std::cout << " [FAIL] : in pd_test_boolean_array_dtype() : dtype name should be 'boolean'" << std::endl;
297 throw std::runtime_error("pd_test_boolean_array_dtype failed: dtype name");
298 }
299
300 if (arr.dtype().kind() != "b") {
301 std::cout << " [FAIL] : in pd_test_boolean_array_dtype() : dtype kind should be 'b'" << std::endl;
302 throw std::runtime_error("pd_test_boolean_array_dtype failed: dtype kind");
303 }
304
305 std::cout << " -> tests passed" << std::endl;
nbytes (pd_test_1_all.cpp:6214)
6204 }
6205
6206 // Test empty DataFrame
6207 pandas::DataFrame empty_df;
6208 if (!empty_df.empty()) {
6209 std::cout << " [FAIL] : in pd_test_dataframe_properties() : should be empty" << std::endl;
6210 throw std::runtime_error("pd_test_dataframe_properties failed: should be empty");
6211 }
6212
6213 // Test nbytes > 0 for non-empty
6214 if (df.nbytes() == 0) {
6215 std::cout << " [FAIL] : in pd_test_dataframe_properties() : nbytes should be > 0" << std::endl;
6216 throw std::runtime_error("pd_test_dataframe_properties failed: nbytes should be > 0");
6217 }
6218
6219 // Test columns index
6220 if (df.columns().size() != 3) {
6221 std::cout << " [FAIL] : in pd_test_dataframe_properties() : columns size != 3" << std::endl;
6222 throw std::runtime_error("pd_test_dataframe_properties failed: columns size != 3");
6223 }
ndim (pd_test_1_all.cpp:6195)
6185 pandas::DataFrame df(data);
6186
6187 // Test shape
6188 auto shape = df.shape();
6189 if (shape.size() != 2 || shape[0] != 4 || shape[1] != 3) {
6190 std::cout << " [FAIL] : in pd_test_dataframe_properties() : shape mismatch" << std::endl;
6191 throw std::runtime_error("pd_test_dataframe_properties failed: shape mismatch");
6192 }
6193
6194 // Test ndim
6195 if (df.ndim() != 2) {
6196 std::cout << " [FAIL] : in pd_test_dataframe_properties() : ndim != 2" << std::endl;
6197 throw std::runtime_error("pd_test_dataframe_properties failed: ndim != 2");
6198 }
6199
6200 // Test empty
6201 if (df.empty()) {
6202 std::cout << " [FAIL] : in pd_test_dataframe_properties() : should not be empty" << std::endl;
6203 throw std::runtime_error("pd_test_dataframe_properties failed: should not be empty");
6204 }
ravel (pd_test_3_all.cpp:2147)
2137 throw std::runtime_error("memory_usage shallow too small");
2138 }
2139 if (deep < shallow) {
2140 throw std::runtime_error("memory_usage deep should be >= shallow");
2141 }
2142
2143 std::cout << " -> tests passed" << std::endl;
2144}
2145
2146void pd_test_3_all_categorical_ravel_view() {
2147 std::cout << "========= CategoricalArray.ravel()/view() =============";
2148
2149 std::vector<std::optional<std::string>> values = {"a", "b", "c"};
2150 pandas::CategoricalArray arr(values);
2151
2152 auto raveled = arr.ravel();
2153 if (raveled.size() != 3 || !raveled.equals(arr)) {
2154 throw std::runtime_error("ravel failed");
2155 }
2156
2157 auto viewed = arr.view();
repeat (pd_test_3_all.cpp:2166)
2156 auto viewed = arr.view();
2157 if (viewed.size() != 3 || !viewed.equals(arr)) {
2158 throw std::runtime_error("view failed");
2159 }
2160
2161 std::cout << " -> tests passed" << std::endl;
2162}
2163
2164void pd_test_3_all_categorical_repeat() {
2165 std::cout << "========= CategoricalArray.repeat() ===================";
2166
2167 std::vector<std::optional<std::string>> values = {"a", "b"};
2168 pandas::CategoricalArray arr(values);
2169
2170 auto result = arr.repeat(3);
2171 if (result.size() != 6 || *result[0] != "a" || *result[2] != "a" ||
2172 *result[3] != "b" || *result[5] != "b") {
2173 throw std::runtime_error("repeat scalar failed");
2174 }
repr (pd_test_1_all.cpp:10906)
10896 std::cout << " -> tests passed" << std::endl;
10897}
10898
10899void pd_test_extension_index_repr() {
10900 std::cout << "========= repr =========================";
10901
10902 pandas::CategoricalArray arr({"a", "b", "c"});
10903 // Use ExtensionIndex<CategoricalArray> directly to test base class repr
10904 pandas::ExtensionIndex<pandas::CategoricalArray> idx(arr, "test");
10905
10906 std::string repr_str = idx.repr();
10907
10908 bool passed = (!repr_str.empty() && repr_str.find("ExtensionIndex") != std::string::npos);
10909 if (!passed) {
10910 std::cout << " [FAIL] : in pd_test_extension_index_repr() : repr check failed" << std::endl;
10911 throw std::runtime_error("pd_test_extension_index_repr failed");
10912 }
10913
10914 std::cout << " -> tests passed" << std::endl;
10915}
resolve_multiindex_level (pd_test_3_all.cpp:23441)
23431 if (gb.group_keys_order().size() != 2)
23432 throw std::runtime_error("expected 2 groups");
23433 auto sums = gb.sum();
23434 if (sums[0] != 40.0 || sums[1] != 20.0)
23435 throw std::runtime_error("sum mismatch");
23436
23437 std::cout << " -> tests passed" << std::endl;
23438}
23439
23440void pd_test_groupby_resolve_level() {
23441 std::cout << "========= resolve_multiindex_level() ==================";
23442
23443 pandas::Series<numpy::float64> s({1.0, 2.0});
23444 std::vector<std::vector<std::string>> level_values = {{"a", "b"}, {"x", "y"}};
23445 std::vector<std::optional<std::string>> level_names = {"first", "second"};
23446 auto mi = pandas::MultiIndex::from_arrays<std::string>(level_values, level_names);
23447 s.set_multiindex(mi);
23448
23449 if (s.resolve_multiindex_level("first") != 0 || s.resolve_multiindex_level("second") != 1)
23450 throw std::runtime_error("string level resolution failed");
23451 if (s.resolve_multiindex_level(0) != 0 || s.resolve_multiindex_level(-1) != 1)
resolve_multiindex_level (pd_test_3_all.cpp:23441)
23431 if (gb.group_keys_order().size() != 2)
23432 throw std::runtime_error("expected 2 groups");
23433 auto sums = gb.sum();
23434 if (sums[0] != 40.0 || sums[1] != 20.0)
23435 throw std::runtime_error("sum mismatch");
23436
23437 std::cout << " -> tests passed" << std::endl;
23438}
23439
23440void pd_test_groupby_resolve_level() {
23441 std::cout << "========= resolve_multiindex_level() ==================";
23442
23443 pandas::Series<numpy::float64> s({1.0, 2.0});
23444 std::vector<std::vector<std::string>> level_values = {{"a", "b"}, {"x", "y"}};
23445 std::vector<std::optional<std::string>> level_names = {"first", "second"};
23446 auto mi = pandas::MultiIndex::from_arrays<std::string>(level_values, level_names);
23447 s.set_multiindex(mi);
23448
23449 if (s.resolve_multiindex_level("first") != 0 || s.resolve_multiindex_level("second") != 1)
23450 throw std::runtime_error("string level resolution failed");
23451 if (s.resolve_multiindex_level(0) != 0 || s.resolve_multiindex_level(-1) != 1)
result (pd_test_1_all.cpp:15406)
15396 data.setElementAt({0}, numpy::datetime64(100LL, numpy::DateTimeUnit::Nanosecond));
15397 data.setElementAt({1}, numpy::datetime64(200LL, numpy::DateTimeUnit::Nanosecond));
15398
15399 numpy::NDArray<numpy::bool_> mask(std::vector<size_t>{2});
15400 mask.setElementAt({0}, numpy::bool_(false));
15401 mask.setElementAt({1}, numpy::bool_(false));
15402
15403 pandas::DatetimeArray arr(data, mask);
15404 pandas::DatetimeIndexBase idx(arr, "original");
15405
15406 // Create join result (int64 values)
15407 numpy::NDArray<numpy::int64> join_result(std::vector<size_t>{3});
15408 join_result.setElementAt({0}, numpy::int64(500LL));
15409 join_result.setElementAt({1}, numpy::int64(600LL));
15410 join_result.setElementAt({2}, numpy::int64(700LL));
15411
15412 auto new_idx = idx._from_join_target(join_result);
15413
15414 bool passed = (new_idx.size() == 3 &&
15415 new_idx.name().has_value() && *new_idx.name() == "original");
15416 if (!passed) {
result (pd_test_1_all.cpp:15406)
15396 data.setElementAt({0}, numpy::datetime64(100LL, numpy::DateTimeUnit::Nanosecond));
15397 data.setElementAt({1}, numpy::datetime64(200LL, numpy::DateTimeUnit::Nanosecond));
15398
15399 numpy::NDArray<numpy::bool_> mask(std::vector<size_t>{2});
15400 mask.setElementAt({0}, numpy::bool_(false));
15401 mask.setElementAt({1}, numpy::bool_(false));
15402
15403 pandas::DatetimeArray arr(data, mask);
15404 pandas::DatetimeIndexBase idx(arr, "original");
15405
15406 // Create join result (int64 values)
15407 numpy::NDArray<numpy::int64> join_result(std::vector<size_t>{3});
15408 join_result.setElementAt({0}, numpy::int64(500LL));
15409 join_result.setElementAt({1}, numpy::int64(600LL));
15410 join_result.setElementAt({2}, numpy::int64(700LL));
15411
15412 auto new_idx = idx._from_join_target(join_result);
15413
15414 bool passed = (new_idx.size() == 3 &&
15415 new_idx.name().has_value() && *new_idx.name() == "original");
15416 if (!passed) {
round (pd_test_1_all.cpp:1688)
1678 void pd_test_floating_array_rounding() {
1679 std::cout << "========= FloatingArray: rounding ======================= ";
1680
1681 pandas::FloatingArray<double> arr({
1682 std::optional<double>(1.234),
1683 std::optional<double>(2.567),
1684 std::nullopt
1685 });
1686
1687 auto rounded = arr.round(2);
1688 if (std::abs(rounded[0].value() - 1.23) > 0.001 ||
1689 std::abs(rounded[1].value() - 2.57) > 0.001) {
1690 std::cout << " [FAIL] : in pd_test_floating_array_rounding() : round(2)" << std::endl;
1691 throw std::runtime_error("pd_test_floating_array_rounding failed: round(2)");
1692 }
1693
1694 if (!rounded.is_na(2)) {
1695 std::cout << " [FAIL] : in pd_test_floating_array_rounding() : round should preserve NA" << std::endl;
1696 throw std::runtime_error("pd_test_floating_array_rounding failed: NA preservation");
1697 }
s (pd_test_1_all.cpp:4524)
4514#include <vector>
4515#include "../pandas/pd_dataframe.h"
4516#include "../pandas/pd_series.h"
4517
4518namespace dataframe_tests {
4519 namespace dataframe_tests_aggregation {
4520
4521 void pd_test_aggregation_series_sem() {
4522 std::cout << "========= Series sem ============================";
4523
4524 pandas::Series<double> s({1.0, 2.0, 3.0, 4.0, 5.0});
4525 auto sem_val = s.sem();
4526 // std(ddof=1) = sqrt(2.5), sem = sqrt(2.5)/sqrt(5) ≈ 0.707
4527 bool passed = sem_val.has_value() && std::abs(*sem_val - 0.707) < 0.01;
4528 if (!passed) {
4529 std::cout << " [FAIL] : in pd_test_aggregation_series_sem() : sem value incorrect" << std::endl;
4530 throw std::runtime_error("pd_test_aggregation_series_sem failed: sem value incorrect");
4531 }
4532
4533 std::cout << " -> tests passed" << std::endl;
4534 }
s (pd_test_1_all.cpp:4524)
4514#include <vector>
4515#include "../pandas/pd_dataframe.h"
4516#include "../pandas/pd_series.h"
4517
4518namespace dataframe_tests {
4519 namespace dataframe_tests_aggregation {
4520
4521 void pd_test_aggregation_series_sem() {
4522 std::cout << "========= Series sem ============================";
4523
4524 pandas::Series<double> s({1.0, 2.0, 3.0, 4.0, 5.0});
4525 auto sem_val = s.sem();
4526 // std(ddof=1) = sqrt(2.5), sem = sqrt(2.5)/sqrt(5) ≈ 0.707
4527 bool passed = sem_val.has_value() && std::abs(*sem_val - 0.707) < 0.01;
4528 if (!passed) {
4529 std::cout << " [FAIL] : in pd_test_aggregation_series_sem() : sem value incorrect" << std::endl;
4530 throw std::runtime_error("pd_test_aggregation_series_sem failed: sem value incorrect");
4531 }
4532
4533 std::cout << " -> tests passed" << std::endl;
4534 }
s (pd_test_1_all.cpp:4524)
4514#include <vector>
4515#include "../pandas/pd_dataframe.h"
4516#include "../pandas/pd_series.h"
4517
4518namespace dataframe_tests {
4519 namespace dataframe_tests_aggregation {
4520
4521 void pd_test_aggregation_series_sem() {
4522 std::cout << "========= Series sem ============================";
4523
4524 pandas::Series<double> s({1.0, 2.0, 3.0, 4.0, 5.0});
4525 auto sem_val = s.sem();
4526 // std(ddof=1) = sqrt(2.5), sem = sqrt(2.5)/sqrt(5) ≈ 0.707
4527 bool passed = sem_val.has_value() && std::abs(*sem_val - 0.707) < 0.01;
4528 if (!passed) {
4529 std::cout << " [FAIL] : in pd_test_aggregation_series_sem() : sem value incorrect" << std::endl;
4530 throw std::runtime_error("pd_test_aggregation_series_sem failed: sem value incorrect");
4531 }
4532
4533 std::cout << " -> tests passed" << std::endl;
4534 }
s (pd_test_1_all.cpp:4524)
4514#include <vector>
4515#include "../pandas/pd_dataframe.h"
4516#include "../pandas/pd_series.h"
4517
4518namespace dataframe_tests {
4519 namespace dataframe_tests_aggregation {
4520
4521 void pd_test_aggregation_series_sem() {
4522 std::cout << "========= Series sem ============================";
4523
4524 pandas::Series<double> s({1.0, 2.0, 3.0, 4.0, 5.0});
4525 auto sem_val = s.sem();
4526 // std(ddof=1) = sqrt(2.5), sem = sqrt(2.5)/sqrt(5) ≈ 0.707
4527 bool passed = sem_val.has_value() && std::abs(*sem_val - 0.707) < 0.01;
4528 if (!passed) {
4529 std::cout << " [FAIL] : in pd_test_aggregation_series_sem() : sem value incorrect" << std::endl;
4530 throw std::runtime_error("pd_test_aggregation_series_sem failed: sem value incorrect");
4531 }
4532
4533 std::cout << " -> tests passed" << std::endl;
4534 }
s (pd_test_1_all.cpp:4524)
4514#include <vector>
4515#include "../pandas/pd_dataframe.h"
4516#include "../pandas/pd_series.h"
4517
4518namespace dataframe_tests {
4519 namespace dataframe_tests_aggregation {
4520
4521 void pd_test_aggregation_series_sem() {
4522 std::cout << "========= Series sem ============================";
4523
4524 pandas::Series<double> s({1.0, 2.0, 3.0, 4.0, 5.0});
4525 auto sem_val = s.sem();
4526 // std(ddof=1) = sqrt(2.5), sem = sqrt(2.5)/sqrt(5) ≈ 0.707
4527 bool passed = sem_val.has_value() && std::abs(*sem_val - 0.707) < 0.01;
4528 if (!passed) {
4529 std::cout << " [FAIL] : in pd_test_aggregation_series_sem() : sem value incorrect" << std::endl;
4530 throw std::runtime_error("pd_test_aggregation_series_sem failed: sem value incorrect");
4531 }
4532
4533 std::cout << " -> tests passed" << std::endl;
4534 }
s (pd_test_1_all.cpp:4524)
4514#include <vector>
4515#include "../pandas/pd_dataframe.h"
4516#include "../pandas/pd_series.h"
4517
4518namespace dataframe_tests {
4519 namespace dataframe_tests_aggregation {
4520
4521 void pd_test_aggregation_series_sem() {
4522 std::cout << "========= Series sem ============================";
4523
4524 pandas::Series<double> s({1.0, 2.0, 3.0, 4.0, 5.0});
4525 auto sem_val = s.sem();
4526 // std(ddof=1) = sqrt(2.5), sem = sqrt(2.5)/sqrt(5) ≈ 0.707
4527 bool passed = sem_val.has_value() && std::abs(*sem_val - 0.707) < 0.01;
4528 if (!passed) {
4529 std::cout << " [FAIL] : in pd_test_aggregation_series_sem() : sem value incorrect" << std::endl;
4530 throw std::runtime_error("pd_test_aggregation_series_sem failed: sem value incorrect");
4531 }
4532
4533 std::cout << " -> tests passed" << std::endl;
4534 }
s (pd_test_1_all.cpp:4524)
4514#include <vector>
4515#include "../pandas/pd_dataframe.h"
4516#include "../pandas/pd_series.h"
4517
4518namespace dataframe_tests {
4519 namespace dataframe_tests_aggregation {
4520
4521 void pd_test_aggregation_series_sem() {
4522 std::cout << "========= Series sem ============================";
4523
4524 pandas::Series<double> s({1.0, 2.0, 3.0, 4.0, 5.0});
4525 auto sem_val = s.sem();
4526 // std(ddof=1) = sqrt(2.5), sem = sqrt(2.5)/sqrt(5) ≈ 0.707
4527 bool passed = sem_val.has_value() && std::abs(*sem_val - 0.707) < 0.01;
4528 if (!passed) {
4529 std::cout << " [FAIL] : in pd_test_aggregation_series_sem() : sem value incorrect" << std::endl;
4530 throw std::runtime_error("pd_test_aggregation_series_sem failed: sem value incorrect");
4531 }
4532
4533 std::cout << " -> tests passed" << std::endl;
4534 }
s (pd_test_1_all.cpp:4524)
4514#include <vector>
4515#include "../pandas/pd_dataframe.h"
4516#include "../pandas/pd_series.h"
4517
4518namespace dataframe_tests {
4519 namespace dataframe_tests_aggregation {
4520
4521 void pd_test_aggregation_series_sem() {
4522 std::cout << "========= Series sem ============================";
4523
4524 pandas::Series<double> s({1.0, 2.0, 3.0, 4.0, 5.0});
4525 auto sem_val = s.sem();
4526 // std(ddof=1) = sqrt(2.5), sem = sqrt(2.5)/sqrt(5) ≈ 0.707
4527 bool passed = sem_val.has_value() && std::abs(*sem_val - 0.707) < 0.01;
4528 if (!passed) {
4529 std::cout << " [FAIL] : in pd_test_aggregation_series_sem() : sem value incorrect" << std::endl;
4530 throw std::runtime_error("pd_test_aggregation_series_sem failed: sem value incorrect");
4531 }
4532
4533 std::cout << " -> tests passed" << std::endl;
4534 }
set_cat_categories (pd_test_2_all.cpp:20366)
20356 check(sub.columns().get_value_str(0) == "col", "dup col0 name");
20357 check(sub.columns().get_value_str(1) == "col", "dup col1 name");
20358}
20359
20360void pd_test_getitem_dispatch_category_metadata() {
20361 std::cout << "pd_test_getitem_dispatch_category_metadata" << std::endl;
20362 pandas::DataFrame df;
20363 std::vector<std::string> svals = {"a", "b", "a", "c"};
20364 auto cs = std::make_unique<pandas::Series<std::string>>(svals, "cat");
20365 cs->set_dtype_override("category");
20366 cs->set_cat_categories({"a", "b", "c"});
20367 cs->set_cat_ordered(true);
20368 df.insert(0, "cat", std::move(cs), true);
20369
20370 auto s = df.get_column_as_string_series("cat");
20371 check(s.dtype_name() == "category", "cat dtype");
20372 check(s.has_cat_categories(), "cat has_categories");
20373 check(s.cat_ordered() == true, "cat ordered");
20374 auto cats = s.get_cat_categories();
20375 check(cats.size() == 3, "cat categories size");
20376 std::set<std::string> cat_set(cats.begin(), cats.end());
set_cat_ordered (pd_test_2_all.cpp:20367)
20357 check(sub.columns().get_value_str(1) == "col", "dup col1 name");
20358}
20359
20360void pd_test_getitem_dispatch_category_metadata() {
20361 std::cout << "pd_test_getitem_dispatch_category_metadata" << std::endl;
20362 pandas::DataFrame df;
20363 std::vector<std::string> svals = {"a", "b", "a", "c"};
20364 auto cs = std::make_unique<pandas::Series<std::string>>(svals, "cat");
20365 cs->set_dtype_override("category");
20366 cs->set_cat_categories({"a", "b", "c"});
20367 cs->set_cat_ordered(true);
20368 df.insert(0, "cat", std::move(cs), true);
20369
20370 auto s = df.get_column_as_string_series("cat");
20371 check(s.dtype_name() == "category", "cat dtype");
20372 check(s.has_cat_categories(), "cat has_categories");
20373 check(s.cat_ordered() == true, "cat ordered");
20374 auto cats = s.get_cat_categories();
20375 check(cats.size() == 3, "cat categories size");
20376 std::set<std::string> cat_set(cats.begin(), cats.end());
20377 check(cat_set.count("a") && cat_set.count("b") && cat_set.count("c"), "cat categories content");
set_datetime_array (pd_test_extension_array.cpp:247)
237 pandas::Timestamp(1577836800000000000LL),
238 pandas::Timestamp(1609459200000000000LL),
239 };
240 auto ea = std::make_shared<pandas::DatetimeArray>(
241 pandas::DatetimeArray::from_timestamps(ts, /*uniform_tz=*/""));
242 std::vector<numpy::datetime64> dt_vec;
243 for (const auto& t : ts) {
244 dt_vec.push_back(numpy::datetime64(t->value(), numpy::DateTimeUnit::Nanosecond));
245 }
246 pandas::Series<numpy::datetime64> s(dt_vec);
247 s.set_datetime_array(ea);
248 // to_frame() must propagate the EA into the resulting DataFrame's column.
249 pandas::DataFrame df = s.to_frame(std::optional<std::string>{"d"});
250 check(df.has_column("d"), "to_frame: column 'd' present");
251 auto& col = df["d"];
252 auto* col_dt = dynamic_cast<pandas::Series<numpy::datetime64>*>(&col);
253 check(col_dt != nullptr, "to_frame: column dynamic_casts to Series<datetime64>");
254 if (col_dt) {
255 const auto& da_opt = col_dt->datetime_array();
256 check(da_opt.has_value() && *da_opt,
257 "to_frame: column has datetime_array EA populated");
set_dtype_override (pd_test_2_all.cpp:20225)
20215 std::vector<numpy::float64> vals = {1.0, 2.0, 3.0};
20216 df.insert(0, "A", std::make_unique<pandas::Series<numpy::float64>>(vals, "A"), true);
20217
20218 auto t = df.classify_column_access("A");
20219 check(t == pandas::DataFrame::ColumnAccessType::NumericColumn, "float64 -> NumericColumn");
20220
20221 // int64 column
20222 pandas::DataFrame df2;
20223 std::vector<numpy::int64> ivals = {10, 20, 30};
20224 auto iseries = std::make_unique<pandas::Series<numpy::int64>>(ivals, "B");
20225 iseries->set_dtype_override("int64");
20226 df2.insert(0, "B", std::move(iseries), true);
20227 auto t2 = df2.classify_column_access("B");
20228 check(t2 == pandas::DataFrame::ColumnAccessType::NumericColumn, "int64 -> NumericColumn");
20229}
20230
20231void pd_test_getitem_dispatch_classify_bool() {
20232 std::cout << "pd_test_getitem_dispatch_classify_bool" << std::endl;
20233 pandas::DataFrame df;
20234 std::vector<numpy::bool_> bvals = {true, false, true};
20235 df.insert(0, "flag", std::make_unique<pandas::Series<numpy::bool_>>(bvals, "flag"), true);
set_freq (pd_test_1_all.cpp:8254)
8244void pd_test_datetime_mixin_set_freq() {
8245 std::cout << "========= set_freq ====================================";
8246
8247 std::vector<std::optional<numpy::datetime64>> values = {
8248 numpy::datetime64(0LL, numpy::DateTimeUnit::Nanosecond)
8249 };
8250 pandas::DatetimeArray arr(values);
8251 pandas::DatetimeMixinIndex idx(arr);
8252
8253 idx.set_freq("D");
8254 auto f = idx.freq();
8255
8256 bool passed = (f.has_value() && *f == "D");
8257 if (!passed) {
8258 std::cout << " [FAIL] : in pd_test_datetime_mixin_set_freq()" << std::endl;
8259 throw std::runtime_error("pd_test_datetime_mixin_set_freq failed");
8260 }
8261
8262 std::cout << " -> tests passed" << std::endl;
8263}
set_mask (pd_test_3_all.cpp:25879)
25869 std::cout << " PASSED" << std::endl;
25870}
25871
25872void pd_test_cat_constructor_helpers_mask() {
25873 std::cout << "========= cat_constructor_helpers mask ==================" << std::endl;
25874 ::pandas::Series<std::string> s(std::vector<std::string>{"x", "y", "z"});
25875 ::numpy::NDArray<::numpy::bool_> mask(std::vector<size_t>{3});
25876 mask.setElementAt({0}, ::numpy::bool_(false));
25877 mask.setElementAt({1}, ::numpy::bool_(true));
25878 mask.setElementAt({2}, ::numpy::bool_(false));
25879 s.set_mask(mask);
25880 auto result = ::pandas::series_to_optional_string_vector(s);
25881 if (result.size() != 3) throw std::runtime_error("Expected size 3");
25882 if (!result[0].has_value()) throw std::runtime_error("Expected value at index 0");
25883 if (result[1].has_value()) throw std::runtime_error("Expected nullopt at index 1 (masked)");
25884 if (!result[2].has_value()) throw std::runtime_error("Expected value at index 2");
25885 std::cout << " PASSED" << std::endl;
25886}
25887
25888void pd_test_cat_constructor_helpers_empty() {
25889 std::cout << "========= cat_constructor_helpers empty =================" << std::endl;
set_mixed_tz_array (pd_test_extension_array.cpp:280)
270 pandas::Timestamp(1577836800000000000LL, "UTC"),
271 pandas::Timestamp(1609459200000000000LL, "US/Eastern"),
272 };
273 auto mta = std::make_shared<pandas::MixedTzDatetimeArray>(
274 pandas::MixedTzDatetimeArray::from_timestamps(ts));
275 std::vector<numpy::datetime64> dt_vec;
276 for (const auto& t : ts) {
277 dt_vec.push_back(numpy::datetime64(t->value(), numpy::DateTimeUnit::Nanosecond));
278 }
279 pandas::Series<numpy::datetime64> s(dt_vec);
280 s.set_mixed_tz_array(mta);
281 pandas::DataFrame df = s.to_frame(std::optional<std::string>{"m"});
282 check(df.has_column("m"), "to_frame mixed-tz: column 'm' present");
283 auto& col = df["m"];
284 auto* col_dt = dynamic_cast<pandas::Series<numpy::datetime64>*>(&col);
285 check(col_dt != nullptr, "to_frame mixed-tz: column is Series<datetime64>");
286 if (col_dt) {
287 const auto& mta_opt = col_dt->mixed_tz_array();
288 check(mta_opt.has_value() && *mta_opt,
289 "to_frame mixed-tz: column has mixed_tz_array EA populated");
290 if (mta_opt.has_value() && *mta_opt) {
set_multiindex (pd_test_2_all.cpp:20409)
20399 check(s.get_freq().value() == "D", "freq value D");
20400 }
20401
20402 // Test MultiIndex propagation
20403 pandas::DataFrame df2;
20404 std::vector<numpy::float64> vals2 = {10.0, 20.0};
20405 df2.insert(0, "A", std::make_unique<pandas::Series<numpy::float64>>(vals2, "A"), true);
20406 std::vector<std::vector<std::string>> arrays = {{"x", "y"}, {"1", "2"}};
20407 std::vector<std::optional<std::string>> names = {std::string("first"), std::string("second")};
20408 auto mi = pandas::MultiIndex::from_arrays<std::string>(arrays, names);
20409 df2.set_multiindex(mi);
20410
20411 auto s2 = df2.extract_column_as_numeric_series("A");
20412 check(s2.has_multiindex(), "multiindex propagated");
20413}
20414
20415} // namespace dataframe_tests_getitem_dispatch
20416
20417int pd_test_getitem_dispatch_main() {
20418 std::cout << "====================================== pd_test_getitem_dispatch test suite ==========================" << std::endl;
20419 dataframe_tests_getitem_dispatch::g_pass = 0;
set_name (pd_test_1_all.cpp:11798)
11788 throw std::runtime_error("pd_test_index_vector_constructor failed");
11789 }
11790
11791 std::cout << " -> tests passed" << std::endl;
11792 }
11793
11794 void pd_test_index_copy_constructor() {
11795 std::cout << "========= copy constructor ============================";
11796
11797 pandas::Index<numpy::int64> idx1{1, 2, 3};
11798 idx1.set_name("original");
11799
11800 pandas::Index<numpy::int64> idx2(idx1);
11801
11802 bool passed = (idx2.size() == 3);
11803 passed = passed && (idx2.name().value() == "original");
11804 passed = passed && idx2.equals(idx1);
11805
11806 if (!passed) {
11807 std::cout << " [FAIL] : in pd_test_index_copy_constructor() : copy failed" << std::endl;
11808 throw std::runtime_error("pd_test_index_copy_constructor failed");
shape (pd_test_1_all.cpp:6188)
6178 std::cout << "========= properties =======================";
6179
6180 std::map<std::string, std::vector<numpy::float64>> data;
6181 data["A"] = {1.0, 2.0, 3.0, 4.0};
6182 data["B"] = {5.0, 6.0, 7.0, 8.0};
6183 data["C"] = {9.0, 10.0, 11.0, 12.0};
6184
6185 pandas::DataFrame df(data);
6186
6187 // Test shape
6188 auto shape = df.shape();
6189 if (shape.size() != 2 || shape[0] != 4 || shape[1] != 3) {
6190 std::cout << " [FAIL] : in pd_test_dataframe_properties() : shape mismatch" << std::endl;
6191 throw std::runtime_error("pd_test_dataframe_properties failed: shape mismatch");
6192 }
6193
6194 // Test ndim
6195 if (df.ndim() != 2) {
6196 std::cout << " [FAIL] : in pd_test_dataframe_properties() : ndim != 2" << std::endl;
6197 throw std::runtime_error("pd_test_dataframe_properties failed: ndim != 2");
6198 }
size (pd_test_1_all.cpp:22)
12#include "../pandas/pd_boolean_array.h"
13
14namespace dataframe_tests {
15
16namespace dataframe_tests_boolean_array {
17 void pd_test_boolean_array_constructors() {
18 std::cout << "========= BooleanArray: constructors ======================= ";
19
20 // Default constructor
21 pandas::BooleanArray arr1;
22 if (arr1.size() != 0) {
23 std::cout << " [FAIL] : in pd_test_boolean_array_constructors() : default constructor size != 0" << std::endl;
24 throw std::runtime_error("pd_test_boolean_array_constructors failed: default constructor size != 0");
25 }
26
27 // Initializer list constructor
28 pandas::BooleanArray arr2({
29 std::optional<bool>(true),
30 std::optional<bool>(false),
31 std::nullopt,
32 std::optional<bool>(true)
size (pd_test_1_all.cpp:22)
12#include "../pandas/pd_boolean_array.h"
13
14namespace dataframe_tests {
15
16namespace dataframe_tests_boolean_array {
17 void pd_test_boolean_array_constructors() {
18 std::cout << "========= BooleanArray: constructors ======================= ";
19
20 // Default constructor
21 pandas::BooleanArray arr1;
22 if (arr1.size() != 0) {
23 std::cout << " [FAIL] : in pd_test_boolean_array_constructors() : default constructor size != 0" << std::endl;
24 throw std::runtime_error("pd_test_boolean_array_constructors failed: default constructor size != 0");
25 }
26
27 // Initializer list constructor
28 pandas::BooleanArray arr2({
29 std::optional<bool>(true),
30 std::optional<bool>(false),
31 std::nullopt,
32 std::optional<bool>(true)
size (pd_test_1_all.cpp:22)
12#include "../pandas/pd_boolean_array.h"
13
14namespace dataframe_tests {
15
16namespace dataframe_tests_boolean_array {
17 void pd_test_boolean_array_constructors() {
18 std::cout << "========= BooleanArray: constructors ======================= ";
19
20 // Default constructor
21 pandas::BooleanArray arr1;
22 if (arr1.size() != 0) {
23 std::cout << " [FAIL] : in pd_test_boolean_array_constructors() : default constructor size != 0" << std::endl;
24 throw std::runtime_error("pd_test_boolean_array_constructors failed: default constructor size != 0");
25 }
26
27 // Initializer list constructor
28 pandas::BooleanArray arr2({
29 std::optional<bool>(true),
30 std::optional<bool>(false),
31 std::nullopt,
32 std::optional<bool>(true)
size (pd_test_1_all.cpp:22)
12#include "../pandas/pd_boolean_array.h"
13
14namespace dataframe_tests {
15
16namespace dataframe_tests_boolean_array {
17 void pd_test_boolean_array_constructors() {
18 std::cout << "========= BooleanArray: constructors ======================= ";
19
20 // Default constructor
21 pandas::BooleanArray arr1;
22 if (arr1.size() != 0) {
23 std::cout << " [FAIL] : in pd_test_boolean_array_constructors() : default constructor size != 0" << std::endl;
24 throw std::runtime_error("pd_test_boolean_array_constructors failed: default constructor size != 0");
25 }
26
27 // Initializer list constructor
28 pandas::BooleanArray arr2({
29 std::optional<bool>(true),
30 std::optional<bool>(false),
31 std::nullopt,
32 std::optional<bool>(true)
size (pd_test_1_all.cpp:22)
12#include "../pandas/pd_boolean_array.h"
13
14namespace dataframe_tests {
15
16namespace dataframe_tests_boolean_array {
17 void pd_test_boolean_array_constructors() {
18 std::cout << "========= BooleanArray: constructors ======================= ";
19
20 // Default constructor
21 pandas::BooleanArray arr1;
22 if (arr1.size() != 0) {
23 std::cout << " [FAIL] : in pd_test_boolean_array_constructors() : default constructor size != 0" << std::endl;
24 throw std::runtime_error("pd_test_boolean_array_constructors failed: default constructor size != 0");
25 }
26
27 // Initializer list constructor
28 pandas::BooleanArray arr2({
29 std::optional<bool>(true),
30 std::optional<bool>(false),
31 std::nullopt,
32 std::optional<bool>(true)
size (pd_test_1_all.cpp:22)
12#include "../pandas/pd_boolean_array.h"
13
14namespace dataframe_tests {
15
16namespace dataframe_tests_boolean_array {
17 void pd_test_boolean_array_constructors() {
18 std::cout << "========= BooleanArray: constructors ======================= ";
19
20 // Default constructor
21 pandas::BooleanArray arr1;
22 if (arr1.size() != 0) {
23 std::cout << " [FAIL] : in pd_test_boolean_array_constructors() : default constructor size != 0" << std::endl;
24 throw std::runtime_error("pd_test_boolean_array_constructors failed: default constructor size != 0");
25 }
26
27 // Initializer list constructor
28 pandas::BooleanArray arr2({
29 std::optional<bool>(true),
30 std::optional<bool>(false),
31 std::nullopt,
32 std::optional<bool>(true)
size (pd_test_1_all.cpp:22)
12#include "../pandas/pd_boolean_array.h"
13
14namespace dataframe_tests {
15
16namespace dataframe_tests_boolean_array {
17 void pd_test_boolean_array_constructors() {
18 std::cout << "========= BooleanArray: constructors ======================= ";
19
20 // Default constructor
21 pandas::BooleanArray arr1;
22 if (arr1.size() != 0) {
23 std::cout << " [FAIL] : in pd_test_boolean_array_constructors() : default constructor size != 0" << std::endl;
24 throw std::runtime_error("pd_test_boolean_array_constructors failed: default constructor size != 0");
25 }
26
27 // Initializer list constructor
28 pandas::BooleanArray arr2({
29 std::optional<bool>(true),
30 std::optional<bool>(false),
31 std::nullopt,
32 std::optional<bool>(true)
size (pd_test_1_all.cpp:22)
12#include "../pandas/pd_boolean_array.h"
13
14namespace dataframe_tests {
15
16namespace dataframe_tests_boolean_array {
17 void pd_test_boolean_array_constructors() {
18 std::cout << "========= BooleanArray: constructors ======================= ";
19
20 // Default constructor
21 pandas::BooleanArray arr1;
22 if (arr1.size() != 0) {
23 std::cout << " [FAIL] : in pd_test_boolean_array_constructors() : default constructor size != 0" << std::endl;
24 throw std::runtime_error("pd_test_boolean_array_constructors failed: default constructor size != 0");
25 }
26
27 // Initializer list constructor
28 pandas::BooleanArray arr2({
29 std::optional<bool>(true),
30 std::optional<bool>(false),
31 std::nullopt,
32 std::optional<bool>(true)
size (pd_test_1_all.cpp:22)
12#include "../pandas/pd_boolean_array.h"
13
14namespace dataframe_tests {
15
16namespace dataframe_tests_boolean_array {
17 void pd_test_boolean_array_constructors() {
18 std::cout << "========= BooleanArray: constructors ======================= ";
19
20 // Default constructor
21 pandas::BooleanArray arr1;
22 if (arr1.size() != 0) {
23 std::cout << " [FAIL] : in pd_test_boolean_array_constructors() : default constructor size != 0" << std::endl;
24 throw std::runtime_error("pd_test_boolean_array_constructors failed: default constructor size != 0");
25 }
26
27 // Initializer list constructor
28 pandas::BooleanArray arr2({
29 std::optional<bool>(true),
30 std::optional<bool>(false),
31 std::nullopt,
32 std::optional<bool>(true)
size (pd_test_1_all.cpp:22)
12#include "../pandas/pd_boolean_array.h"
13
14namespace dataframe_tests {
15
16namespace dataframe_tests_boolean_array {
17 void pd_test_boolean_array_constructors() {
18 std::cout << "========= BooleanArray: constructors ======================= ";
19
20 // Default constructor
21 pandas::BooleanArray arr1;
22 if (arr1.size() != 0) {
23 std::cout << " [FAIL] : in pd_test_boolean_array_constructors() : default constructor size != 0" << std::endl;
24 throw std::runtime_error("pd_test_boolean_array_constructors failed: default constructor size != 0");
25 }
26
27 // Initializer list constructor
28 pandas::BooleanArray arr2({
29 std::optional<bool>(true),
30 std::optional<bool>(false),
31 std::nullopt,
32 std::optional<bool>(true)
size (pd_test_1_all.cpp:22)
12#include "../pandas/pd_boolean_array.h"
13
14namespace dataframe_tests {
15
16namespace dataframe_tests_boolean_array {
17 void pd_test_boolean_array_constructors() {
18 std::cout << "========= BooleanArray: constructors ======================= ";
19
20 // Default constructor
21 pandas::BooleanArray arr1;
22 if (arr1.size() != 0) {
23 std::cout << " [FAIL] : in pd_test_boolean_array_constructors() : default constructor size != 0" << std::endl;
24 throw std::runtime_error("pd_test_boolean_array_constructors failed: default constructor size != 0");
25 }
26
27 // Initializer list constructor
28 pandas::BooleanArray arr2({
29 std::optional<bool>(true),
30 std::optional<bool>(false),
31 std::nullopt,
32 std::optional<bool>(true)
size (pd_test_1_all.cpp:22)
12#include "../pandas/pd_boolean_array.h"
13
14namespace dataframe_tests {
15
16namespace dataframe_tests_boolean_array {
17 void pd_test_boolean_array_constructors() {
18 std::cout << "========= BooleanArray: constructors ======================= ";
19
20 // Default constructor
21 pandas::BooleanArray arr1;
22 if (arr1.size() != 0) {
23 std::cout << " [FAIL] : in pd_test_boolean_array_constructors() : default constructor size != 0" << std::endl;
24 throw std::runtime_error("pd_test_boolean_array_constructors failed: default constructor size != 0");
25 }
26
27 // Initializer list constructor
28 pandas::BooleanArray arr2({
29 std::optional<bool>(true),
30 std::optional<bool>(false),
31 std::nullopt,
32 std::optional<bool>(true)
size (pd_test_1_all.cpp:22)
12#include "../pandas/pd_boolean_array.h"
13
14namespace dataframe_tests {
15
16namespace dataframe_tests_boolean_array {
17 void pd_test_boolean_array_constructors() {
18 std::cout << "========= BooleanArray: constructors ======================= ";
19
20 // Default constructor
21 pandas::BooleanArray arr1;
22 if (arr1.size() != 0) {
23 std::cout << " [FAIL] : in pd_test_boolean_array_constructors() : default constructor size != 0" << std::endl;
24 throw std::runtime_error("pd_test_boolean_array_constructors failed: default constructor size != 0");
25 }
26
27 // Initializer list constructor
28 pandas::BooleanArray arr2({
29 std::optional<bool>(true),
30 std::optional<bool>(false),
31 std::nullopt,
32 std::optional<bool>(true)
size (pd_test_1_all.cpp:22)
12#include "../pandas/pd_boolean_array.h"
13
14namespace dataframe_tests {
15
16namespace dataframe_tests_boolean_array {
17 void pd_test_boolean_array_constructors() {
18 std::cout << "========= BooleanArray: constructors ======================= ";
19
20 // Default constructor
21 pandas::BooleanArray arr1;
22 if (arr1.size() != 0) {
23 std::cout << " [FAIL] : in pd_test_boolean_array_constructors() : default constructor size != 0" << std::endl;
24 throw std::runtime_error("pd_test_boolean_array_constructors failed: default constructor size != 0");
25 }
26
27 // Initializer list constructor
28 pandas::BooleanArray arr2({
29 std::optional<bool>(true),
30 std::optional<bool>(false),
31 std::nullopt,
32 std::optional<bool>(true)
size (pd_test_1_all.cpp:22)
12#include "../pandas/pd_boolean_array.h"
13
14namespace dataframe_tests {
15
16namespace dataframe_tests_boolean_array {
17 void pd_test_boolean_array_constructors() {
18 std::cout << "========= BooleanArray: constructors ======================= ";
19
20 // Default constructor
21 pandas::BooleanArray arr1;
22 if (arr1.size() != 0) {
23 std::cout << " [FAIL] : in pd_test_boolean_array_constructors() : default constructor size != 0" << std::endl;
24 throw std::runtime_error("pd_test_boolean_array_constructors failed: default constructor size != 0");
25 }
26
27 // Initializer list constructor
28 pandas::BooleanArray arr2({
29 std::optional<bool>(true),
30 std::optional<bool>(false),
31 std::nullopt,
32 std::optional<bool>(true)
size (pd_test_1_all.cpp:22)
12#include "../pandas/pd_boolean_array.h"
13
14namespace dataframe_tests {
15
16namespace dataframe_tests_boolean_array {
17 void pd_test_boolean_array_constructors() {
18 std::cout << "========= BooleanArray: constructors ======================= ";
19
20 // Default constructor
21 pandas::BooleanArray arr1;
22 if (arr1.size() != 0) {
23 std::cout << " [FAIL] : in pd_test_boolean_array_constructors() : default constructor size != 0" << std::endl;
24 throw std::runtime_error("pd_test_boolean_array_constructors failed: default constructor size != 0");
25 }
26
27 // Initializer list constructor
28 pandas::BooleanArray arr2({
29 std::optional<bool>(true),
30 std::optional<bool>(false),
31 std::nullopt,
32 std::optional<bool>(true)
size (pd_test_1_all.cpp:22)
12#include "../pandas/pd_boolean_array.h"
13
14namespace dataframe_tests {
15
16namespace dataframe_tests_boolean_array {
17 void pd_test_boolean_array_constructors() {
18 std::cout << "========= BooleanArray: constructors ======================= ";
19
20 // Default constructor
21 pandas::BooleanArray arr1;
22 if (arr1.size() != 0) {
23 std::cout << " [FAIL] : in pd_test_boolean_array_constructors() : default constructor size != 0" << std::endl;
24 throw std::runtime_error("pd_test_boolean_array_constructors failed: default constructor size != 0");
25 }
26
27 // Initializer list constructor
28 pandas::BooleanArray arr2({
29 std::optional<bool>(true),
30 std::optional<bool>(false),
31 std::nullopt,
32 std::optional<bool>(true)
size (pd_test_1_all.cpp:22)
12#include "../pandas/pd_boolean_array.h"
13
14namespace dataframe_tests {
15
16namespace dataframe_tests_boolean_array {
17 void pd_test_boolean_array_constructors() {
18 std::cout << "========= BooleanArray: constructors ======================= ";
19
20 // Default constructor
21 pandas::BooleanArray arr1;
22 if (arr1.size() != 0) {
23 std::cout << " [FAIL] : in pd_test_boolean_array_constructors() : default constructor size != 0" << std::endl;
24 throw std::runtime_error("pd_test_boolean_array_constructors failed: default constructor size != 0");
25 }
26
27 // Initializer list constructor
28 pandas::BooleanArray arr2({
29 std::optional<bool>(true),
30 std::optional<bool>(false),
31 std::nullopt,
32 std::optional<bool>(true)
size (pd_test_1_all.cpp:22)
12#include "../pandas/pd_boolean_array.h"
13
14namespace dataframe_tests {
15
16namespace dataframe_tests_boolean_array {
17 void pd_test_boolean_array_constructors() {
18 std::cout << "========= BooleanArray: constructors ======================= ";
19
20 // Default constructor
21 pandas::BooleanArray arr1;
22 if (arr1.size() != 0) {
23 std::cout << " [FAIL] : in pd_test_boolean_array_constructors() : default constructor size != 0" << std::endl;
24 throw std::runtime_error("pd_test_boolean_array_constructors failed: default constructor size != 0");
25 }
26
27 // Initializer list constructor
28 pandas::BooleanArray arr2({
29 std::optional<bool>(true),
30 std::optional<bool>(false),
31 std::nullopt,
32 std::optional<bool>(true)
size (pd_test_1_all.cpp:22)
12#include "../pandas/pd_boolean_array.h"
13
14namespace dataframe_tests {
15
16namespace dataframe_tests_boolean_array {
17 void pd_test_boolean_array_constructors() {
18 std::cout << "========= BooleanArray: constructors ======================= ";
19
20 // Default constructor
21 pandas::BooleanArray arr1;
22 if (arr1.size() != 0) {
23 std::cout << " [FAIL] : in pd_test_boolean_array_constructors() : default constructor size != 0" << std::endl;
24 throw std::runtime_error("pd_test_boolean_array_constructors failed: default constructor size != 0");
25 }
26
27 // Initializer list constructor
28 pandas::BooleanArray arr2({
29 std::optional<bool>(true),
30 std::optional<bool>(false),
31 std::nullopt,
32 std::optional<bool>(true)
size (pd_test_1_all.cpp:22)
12#include "../pandas/pd_boolean_array.h"
13
14namespace dataframe_tests {
15
16namespace dataframe_tests_boolean_array {
17 void pd_test_boolean_array_constructors() {
18 std::cout << "========= BooleanArray: constructors ======================= ";
19
20 // Default constructor
21 pandas::BooleanArray arr1;
22 if (arr1.size() != 0) {
23 std::cout << " [FAIL] : in pd_test_boolean_array_constructors() : default constructor size != 0" << std::endl;
24 throw std::runtime_error("pd_test_boolean_array_constructors failed: default constructor size != 0");
25 }
26
27 // Initializer list constructor
28 pandas::BooleanArray arr2({
29 std::optional<bool>(true),
30 std::optional<bool>(false),
31 std::nullopt,
32 std::optional<bool>(true)
size (pd_test_1_all.cpp:22)
12#include "../pandas/pd_boolean_array.h"
13
14namespace dataframe_tests {
15
16namespace dataframe_tests_boolean_array {
17 void pd_test_boolean_array_constructors() {
18 std::cout << "========= BooleanArray: constructors ======================= ";
19
20 // Default constructor
21 pandas::BooleanArray arr1;
22 if (arr1.size() != 0) {
23 std::cout << " [FAIL] : in pd_test_boolean_array_constructors() : default constructor size != 0" << std::endl;
24 throw std::runtime_error("pd_test_boolean_array_constructors failed: default constructor size != 0");
25 }
26
27 // Initializer list constructor
28 pandas::BooleanArray arr2({
29 std::optional<bool>(true),
30 std::optional<bool>(false),
31 std::nullopt,
32 std::optional<bool>(true)
size (pd_test_1_all.cpp:22)
12#include "../pandas/pd_boolean_array.h"
13
14namespace dataframe_tests {
15
16namespace dataframe_tests_boolean_array {
17 void pd_test_boolean_array_constructors() {
18 std::cout << "========= BooleanArray: constructors ======================= ";
19
20 // Default constructor
21 pandas::BooleanArray arr1;
22 if (arr1.size() != 0) {
23 std::cout << " [FAIL] : in pd_test_boolean_array_constructors() : default constructor size != 0" << std::endl;
24 throw std::runtime_error("pd_test_boolean_array_constructors failed: default constructor size != 0");
25 }
26
27 // Initializer list constructor
28 pandas::BooleanArray arr2({
29 std::optional<bool>(true),
30 std::optional<bool>(false),
31 std::nullopt,
32 std::optional<bool>(true)
size (pd_test_1_all.cpp:22)
12#include "../pandas/pd_boolean_array.h"
13
14namespace dataframe_tests {
15
16namespace dataframe_tests_boolean_array {
17 void pd_test_boolean_array_constructors() {
18 std::cout << "========= BooleanArray: constructors ======================= ";
19
20 // Default constructor
21 pandas::BooleanArray arr1;
22 if (arr1.size() != 0) {
23 std::cout << " [FAIL] : in pd_test_boolean_array_constructors() : default constructor size != 0" << std::endl;
24 throw std::runtime_error("pd_test_boolean_array_constructors failed: default constructor size != 0");
25 }
26
27 // Initializer list constructor
28 pandas::BooleanArray arr2({
29 std::optional<bool>(true),
30 std::optional<bool>(false),
31 std::nullopt,
32 std::optional<bool>(true)
size (pd_test_1_all.cpp:22)
12#include "../pandas/pd_boolean_array.h"
13
14namespace dataframe_tests {
15
16namespace dataframe_tests_boolean_array {
17 void pd_test_boolean_array_constructors() {
18 std::cout << "========= BooleanArray: constructors ======================= ";
19
20 // Default constructor
21 pandas::BooleanArray arr1;
22 if (arr1.size() != 0) {
23 std::cout << " [FAIL] : in pd_test_boolean_array_constructors() : default constructor size != 0" << std::endl;
24 throw std::runtime_error("pd_test_boolean_array_constructors failed: default constructor size != 0");
25 }
26
27 // Initializer list constructor
28 pandas::BooleanArray arr2({
29 std::optional<bool>(true),
30 std::optional<bool>(false),
31 std::nullopt,
32 std::optional<bool>(true)
sparse (pd_test_3_all.cpp:20627)
20617#include <cmath>
20618
20619#include "../pandas/pd_series.h"
20620
20621// CRITICAL: No using namespace directives
20622
20623namespace dataframe_tests {
20624namespace dataframe_tests_sparse_accessor {
20625
20626// ============================================================================
20627// Test sparse().density() and sparse().npoints()
20628// ============================================================================
20629
20630void pd_test_sparse_density_npoints() {
20631 std::cout << "========= Series.sparse().density/npoints() =============";
20632
20633 // Create a series with some zeros (sparse values)
20634 pandas::Series<numpy::float64> s({0.0, 1.0, 0.0, 2.0, 0.0, 3.0});
20635
20636 auto sparse = s.sparse(0.0); // 0.0 is the fill value
str (pd_test_1_all.cpp:7137)
7127 // Test basic info() with stringstream
7128 std::map<std::string, std::vector<int>> data = {
7129 {"A", {1, 2, 3, 4, 5}},
7130 {"B", {10, 20, 30, 40, 50}},
7131 {"C", {100, 200, 300, 400, 500}}
7132 };
7133 pandas::DataFrame df(data);
7134
7135 std::ostringstream oss;
7136 df.info(oss);
7137 std::string output = oss.str();
7138
7139 // Verify key components
7140 if (output.find("<class 'pandas.core.frame.DataFrame'>") == std::string::npos) {
7141 std::cout << " [FAIL] : info missing class name" << std::endl;
7142 throw std::runtime_error("pd_test_dataframe_info failed: missing class name");
7143 }
7144 if (output.find("RangeIndex:") == std::string::npos) {
7145 std::cout << " [FAIL] : info missing RangeIndex" << std::endl;
7146 throw std::runtime_error("pd_test_dataframe_info failed: missing RangeIndex");
7147 }
truncate (pd_test_1_all.cpp:20467)
20457 std::vector<std::string> dates = {
20458 "2020-01-01",
20459 "2020-01-02",
20460 "2020-01-03",
20461 "2020-01-04",
20462 "2020-01-05"
20463 };
20464 df.set_index(std::make_unique<pandas::Index<std::string>>(dates));
20465
20466 // Truncate to keep only dates from 2020-01-02 to 2020-01-04
20467 pandas::DataFrame result = df.truncate("2020-01-02", "2020-01-04");
20468
20469 bool passed = (result.nrows() == 3);
20470
20471 if (!passed) {
20472 std::cout << " [FAIL] : in pd_test_timeseries_truncate() : expected 3 rows, got "
20473 << result.nrows() << std::endl;
20474 throw std::runtime_error("pd_test_timeseries_truncate failed");
20475 }
20476
20477 std::cout << " -> tests passed" << std::endl;
values (pd_test_1_all.cpp:364)
354 pandas::CategoricalArray arr1;
355 if (arr1.size() != 0) {
356 std::cout << " [FAIL] : in pd_test_categorical_array_constructors() : default constructor size != 0" << std::endl;
357 throw std::runtime_error("pd_test_categorical_array_constructors failed: default constructor size != 0");
358 }
359 if (arr1.ordered()) {
360 std::cout << " [FAIL] : in pd_test_categorical_array_constructors() : default should be unordered" << std::endl;
361 throw std::runtime_error("pd_test_categorical_array_constructors failed: default should be unordered");
362 }
363
364 // Constructor from values (infer categories)
365 std::vector<std::optional<std::string>> values = {
366 std::optional<std::string>("a"),
367 std::optional<std::string>("b"),
368 std::optional<std::string>("a"),
369 std::optional<std::string>("c")
370 };
371 pandas::CategoricalArray arr2(values);
372 if (arr2.size() != 4) {
373 std::cout << " [FAIL] : in pd_test_categorical_array_constructors() : values constructor size != 4" << std::endl;
374 throw std::runtime_error("pd_test_categorical_array_constructors failed: values constructor size != 4");