TimedeltaIndex#
-
class pandas::TimedeltaIndex#
Index class for axis labels in pandas data structures.
Example#
#include <pandas/pandas.h>
using namespace pandas;
// Create TimedeltaIndex
TimedeltaIndex<int64_t> idx({1, 2, 3}, "my_index");
size_t len = idx.size();
Constructors#
Signature |
Location |
Example |
|---|---|---|
|
pd_timedelta_index.h:105 |
|
|
pd_timedelta_index.h:115 |
|
|
pd_timedelta_index.h:125 |
|
|
pd_timedelta_index.h:143 |
|
|
pd_timedelta_index.h:154 |
|
|
pd_timedelta_index.h:161 |
Indexing / Selection#
Signature |
Return Type |
Location |
Example |
|---|---|---|---|
|
numpy::NDArray<numpy::int64> |
pd_timedelta_index.h:1646 |
|
|
numpy::NDArray<numpy::int64> |
pd_timedelta_index.h:1697 |
|
|
pd_timedelta_index.h:1731 |
||
|
TimedeltaIndex |
pd_timedelta_index.h:1793 |
|
|
int64_t |
pd_timedelta_index.h:1806 |
|
|
std::optional<size_t> |
pd_timedelta_index.h:1829 |
|
|
size_t |
pd_timedelta_index.h:1853 |
|
|
std::string |
pd_timedelta_index.h:829 |
|
|
std::string |
pd_timedelta_index.h:852 |
|
|
TimedeltaIndex |
pd_timedelta_index.h:2752 |
|
|
TimedeltaIndex |
pd_timedelta_index.h:3139 |
|
|
TimedeltaIndex |
pd_timedelta_index.h:3164 |
Data Manipulation#
Signature |
Return Type |
Location |
Example |
|---|---|---|---|
|
TimedeltaIndex |
pd_timedelta_index.h:1354 |
|
|
TimedeltaIndex |
pd_timedelta_index.h:1402 |
|
|
TimedeltaIndex |
pd_timedelta_index.h:1473 |
|
|
TimedeltaIndex |
pd_timedelta_index.h:1485 |
|
|
TimedeltaIndex |
pd_timedelta_index.h:1954 |
|
|
pd_timedelta_index.h:2415 |
||
|
TimedeltaIndex |
pd_timedelta_index.h:580 |
|
|
TimedeltaIndex |
pd_timedelta_index.h:2550 |
|
|
TimedeltaIndex |
pd_timedelta_index.h:2566 |
Missing Data#
Signature |
Return Type |
Location |
Example |
|---|---|---|---|
|
TimedeltaIndex |
pd_timedelta_index.h:1602 |
|
|
BooleanArray |
pd_timedelta_index.h:2136 |
|
|
BooleanArray |
pd_timedelta_index.h:2149 |
|
|
BooleanArray |
pd_timedelta_index.h:2157 |
|
|
BooleanArray |
pd_timedelta_index.h:2170 |
Statistics#
Signature |
Return Type |
Location |
Example |
|---|---|---|---|
|
std::optional<numpy::timedelta64> |
pd_timedelta_index.h:390 |
|
|
std::optional<numpy::timedelta64> |
pd_timedelta_index.h:2276 |
|
|
std::optional<numpy::timedelta64> |
pd_timedelta_index.h:357 |
|
|
size_t |
pd_timedelta_index.h:2330 |
|
|
std::optional<numpy::timedelta64> |
pd_timedelta_index.h:507 |
|
|
std::optional<numpy::timedelta64> |
pd_timedelta_index.h:423 |
Aggregation#
Signature |
Return Type |
Location |
Example |
|---|---|---|---|
|
std::unordered_map<int64_t, std::vector<size_t>> |
pd_timedelta_index.h:1889 |
|
|
TimedeltaIndex |
pd_timedelta_index.h:2241 |
Comparison#
Signature |
Return Type |
Location |
Example |
|---|---|---|---|
|
bool |
pd_timedelta_index.h:1560 |
Sorting#
Signature |
Return Type |
Location |
Example |
|---|---|---|---|
|
numpy::NDArray<numpy::int64> |
pd_timedelta_index.h:1005 |
|
|
size_t |
pd_timedelta_index.h:2501 |
|
|
TimedeltaIndex |
pd_timedelta_index.h:2668 |
Reshaping#
Signature |
Return Type |
Location |
Example |
|---|---|---|---|
|
TimedeltaIndex |
pd_timedelta_index.h:3003 |
|
|
pd_timedelta_index.h:1156 |
||
|
FrameData |
pd_timedelta_index.h:2905 |
|
|
std::vector<int64_t> |
pd_timedelta_index.h:2798 |
|
|
TimedeltaIndex |
pd_timedelta_index.h:2996 |
Combining#
Signature |
Return Type |
Location |
Example |
|---|---|---|---|
|
TimedeltaIndex |
pd_timedelta_index.h:976 |
|
|
TimedeltaIndex |
pd_timedelta_index.h:985 |
|
|
static void |
pd_timedelta_index.h:596 |
|
|
pd_timedelta_index.h:655 |
||
|
pd_timedelta_index.h:675 |
||
|
static TimedeltaIndex |
pd_timedelta_index.h:956 |
|
|
TimedeltaIndex |
pd_timedelta_index.h:2210 |
Time Series#
Signature |
Return Type |
Location |
Example |
|---|---|---|---|
|
int64_t |
pd_timedelta_index.h:1025 |
|
|
std::vector<int64_t> |
pd_timedelta_index.h:1050 |
|
|
TimedeltaIndex |
pd_timedelta_index.h:1260 |
|
|
TimedeltaIndex |
pd_timedelta_index.h:1337 |
|
|
TimedeltaIndex |
pd_timedelta_index.h:1308 |
|
|
TimedeltaIndex |
pd_timedelta_index.h:342 |
I/O#
Signature |
Return Type |
Location |
Example |
|---|---|---|---|
|
TimedeltaIndex |
pd_timedelta_index.h:2784 |
|
|
std::vector<std::optional<numpy::timedelta64>> |
pd_timedelta_index.h:2821 |
|
|
std::vector<std::optional<PyTimedelta>> |
pd_timedelta_index.h:267 |
|
|
SeriesData |
pd_timedelta_index.h:2964 |
|
|
std::vector<std::optional<numpy::timedelta64>> |
pd_timedelta_index.h:2859 |
|
|
std::string |
pd_timedelta_index.h:682 |
|
|
std::vector<std::optional<numpy::timedelta64>> |
pd_timedelta_index.h:2828 |
Conversion#
Signature |
Return Type |
Location |
Example |
|---|---|---|---|
|
AsTypeResult |
pd_timedelta_index.h:1121 |
|
|
numpy::NDArray<numpy::int64> |
pd_timedelta_index.h:1070 |
|
|
TimedeltaIndex |
pd_timedelta_index.h:565 |
|
|
TimedeltaIndex |
pd_timedelta_index.h:1940 |
|
|
TimedeltaIndex |
pd_timedelta_index.h:3125 |
Set Operations#
Signature |
Return Type |
Location |
Example |
|---|---|---|---|
|
BooleanArray |
pd_timedelta_index.h:1499 |
|
|
TimedeltaIndex |
pd_timedelta_index.h:1984 |
|
|
BooleanArray |
pd_timedelta_index.h:2103 |
|
|
TimedeltaIndex |
pd_timedelta_index.h:2727 |
|
|
TimedeltaIndex |
pd_timedelta_index.h:3017 |
|
|
TimedeltaIndex |
pd_timedelta_index.h:3048 |
|
|
TimedeltaIndex |
pd_timedelta_index.h:3064 |
Type Checking#
Signature |
Return Type |
Location |
Example |
|---|---|---|---|
|
bool |
pd_timedelta_index.h:2029 |
|
|
bool |
pd_timedelta_index.h:2041 |
|
|
bool |
pd_timedelta_index.h:2049 |
|
|
bool |
pd_timedelta_index.h:2057 |
|
|
bool |
pd_timedelta_index.h:2065 |
|
|
bool |
pd_timedelta_index.h:2073 |
|
|
bool |
pd_timedelta_index.h:2081 |
|
|
bool |
pd_timedelta_index.h:2089 |
Other Methods#
Signature |
Return Type |
Location |
Example |
|---|---|---|---|
|
bool |
pd_timedelta_index.h:909 |
|
|
bool |
pd_timedelta_index.h:931 |
|
|
int64_t |
pd_timedelta_index.h:482 |
|
|
int64_t |
pd_timedelta_index.h:458 |
|
|
TimedeltaArray |
pd_timedelta_index.h:134 |
|
|
TimedeltaArray |
pd_timedelta_index.h:147 |
|
|
TimedeltaIndex |
pd_timedelta_index.h:330 |
|
|
TimedeltaIndex |
pd_timedelta_index.h:316 |
|
|
std::unique_ptr<IndexBase> |
pd_timedelta_index.h:552 |
|
|
TimedeltaComponents |
pd_timedelta_index.h:250 |
|
|
IntegerArray<numpy::int64> |
pd_timedelta_index.h:218 |
|
|
TimedeltaIndex |
pd_timedelta_index.h:1221 |
|
|
TimedeltaIndex |
pd_timedelta_index.h:1240 |
|
|
std::string |
pd_timedelta_index.h:201 |
|
|
std::pair<numpy::NDArray<numpy::int64>, TimedeltaIndex> |
pd_timedelta_index.h:1587 |
|
|
TimedeltaIndex |
pd_timedelta_index.h:300 |
|
|
std::vector<std::string> |
pd_timedelta_index.h:1621 |
|
|
static std::string |
pd_timedelta_index.h:623 |
|
|
std::string |
pd_timedelta_index.h:608 |
|
|
bool |
pd_timedelta_index.h:1915 |
|
|
bool |
pd_timedelta_index.h:1928 |
|
|
std::string |
pd_timedelta_index.h:741 |
|
|
std::string |
pd_timedelta_index.h:193 |
|
|
TimedeltaIndex |
pd_timedelta_index.h:2013 |
|
|
numpy::timedelta64 |
pd_timedelta_index.h:2182 |
|
|
size_t |
pd_timedelta_index.h:2317 |
|
|
std::string |
pd_timedelta_index.h:809 |
|
|
IntegerArray<numpy::int64> |
pd_timedelta_index.h:234 |
|
|
IntegerArray<numpy::int64> |
pd_timedelta_index.h:242 |
|
|
TimedeltaIndex |
pd_timedelta_index.h:2363 |
|
|
std::vector<std::optional<numpy::timedelta64>> |
pd_timedelta_index.h:2391 |
|
|
TimedeltaIndex |
pd_timedelta_index.h:2451 |
|
|
TimedeltaIndex |
pd_timedelta_index.h:2471 |
|
|
std::string |
pd_timedelta_index.h:820 |
|
|
std::vector<std::optional<bool>> |
pd_timedelta_index.h:1501 |
|
|
TimedeltaIndex |
pd_timedelta_index.h:284 |
|
|
IntegerArray<numpy::int64> |
pd_timedelta_index.h:226 |
|
|
void |
pd_timedelta_index.h:206 |
|
|
TimedeltaIndex |
pd_timedelta_index.h:2586 |
|
|
std::vector<size_t> |
pd_timedelta_index.h:2630 |
|
|
std::pair<size_t, size_t> |
pd_timedelta_index.h:2603 |
|
|
TimedeltaIndex |
pd_timedelta_index.h:2656 |
|
|
StringMethods<TimedeltaIndex> |
pd_timedelta_index.h:892 |
|
|
pandas::Timedelta |
pd_timedelta_index.h:1831 |
|
|
IndexTypeId |
pd_timedelta_index.h:556 |
|
|
TimedeltaIndex |
pd_timedelta_index.h:3197 |
|
|
TimedeltaIndex |
pd_timedelta_index.h:3233 |
Code Examples#
The following examples are extracted from the test suite.
TimedeltaIndex (pd_test_5_all.cpp:91483)
91473 values.push_back(numpy::datetime64(k_ns_2024_01_03, numpy::DateTimeUnit::Nanosecond));
91474 return pandas::DatetimeIndex(values);
91475}
91476
91477static pandas::TimedeltaIndex make_td_index_3() {
91478 constexpr int64_t k_ns_per_day = 86400000000000LL;
91479 std::vector<numpy::timedelta64> values;
91480 values.push_back(numpy::timedelta64(1 * k_ns_per_day, numpy::DateTimeUnit::Nanosecond));
91481 values.push_back(numpy::timedelta64(2 * k_ns_per_day, numpy::DateTimeUnit::Nanosecond));
91482 values.push_back(numpy::timedelta64(3 * k_ns_per_day, numpy::DateTimeUnit::Nanosecond));
91483 return pandas::TimedeltaIndex(values);
91484}
91485
91486static pandas::PeriodIndex make_period_index_3() {
91487 std::vector<std::string> periods = {"2020-01", "2020-02", "2020-03"};
91488 return pandas::PeriodIndex(periods, "M");
91489}
91490
91491static pandas::MultiIndex make_multi_index_mixed() {
91492 std::vector<std::vector<std::string>> arrays = {
91493 {"a", "b", "b"},
TimedeltaIndex (pd_test_5_all.cpp:91483)
91473 values.push_back(numpy::datetime64(k_ns_2024_01_03, numpy::DateTimeUnit::Nanosecond));
91474 return pandas::DatetimeIndex(values);
91475}
91476
91477static pandas::TimedeltaIndex make_td_index_3() {
91478 constexpr int64_t k_ns_per_day = 86400000000000LL;
91479 std::vector<numpy::timedelta64> values;
91480 values.push_back(numpy::timedelta64(1 * k_ns_per_day, numpy::DateTimeUnit::Nanosecond));
91481 values.push_back(numpy::timedelta64(2 * k_ns_per_day, numpy::DateTimeUnit::Nanosecond));
91482 values.push_back(numpy::timedelta64(3 * k_ns_per_day, numpy::DateTimeUnit::Nanosecond));
91483 return pandas::TimedeltaIndex(values);
91484}
91485
91486static pandas::PeriodIndex make_period_index_3() {
91487 std::vector<std::string> periods = {"2020-01", "2020-02", "2020-03"};
91488 return pandas::PeriodIndex(periods, "M");
91489}
91490
91491static pandas::MultiIndex make_multi_index_mixed() {
91492 std::vector<std::vector<std::string>> arrays = {
91493 {"a", "b", "b"},
TimedeltaIndex (pd_test_5_all.cpp:91483)
91473 values.push_back(numpy::datetime64(k_ns_2024_01_03, numpy::DateTimeUnit::Nanosecond));
91474 return pandas::DatetimeIndex(values);
91475}
91476
91477static pandas::TimedeltaIndex make_td_index_3() {
91478 constexpr int64_t k_ns_per_day = 86400000000000LL;
91479 std::vector<numpy::timedelta64> values;
91480 values.push_back(numpy::timedelta64(1 * k_ns_per_day, numpy::DateTimeUnit::Nanosecond));
91481 values.push_back(numpy::timedelta64(2 * k_ns_per_day, numpy::DateTimeUnit::Nanosecond));
91482 values.push_back(numpy::timedelta64(3 * k_ns_per_day, numpy::DateTimeUnit::Nanosecond));
91483 return pandas::TimedeltaIndex(values);
91484}
91485
91486static pandas::PeriodIndex make_period_index_3() {
91487 std::vector<std::string> periods = {"2020-01", "2020-02", "2020-03"};
91488 return pandas::PeriodIndex(periods, "M");
91489}
91490
91491static pandas::MultiIndex make_multi_index_mixed() {
91492 std::vector<std::vector<std::string>> arrays = {
91493 {"a", "b", "b"},
TimedeltaIndex (pd_test_5_all.cpp:91483)
91473 values.push_back(numpy::datetime64(k_ns_2024_01_03, numpy::DateTimeUnit::Nanosecond));
91474 return pandas::DatetimeIndex(values);
91475}
91476
91477static pandas::TimedeltaIndex make_td_index_3() {
91478 constexpr int64_t k_ns_per_day = 86400000000000LL;
91479 std::vector<numpy::timedelta64> values;
91480 values.push_back(numpy::timedelta64(1 * k_ns_per_day, numpy::DateTimeUnit::Nanosecond));
91481 values.push_back(numpy::timedelta64(2 * k_ns_per_day, numpy::DateTimeUnit::Nanosecond));
91482 values.push_back(numpy::timedelta64(3 * k_ns_per_day, numpy::DateTimeUnit::Nanosecond));
91483 return pandas::TimedeltaIndex(values);
91484}
91485
91486static pandas::PeriodIndex make_period_index_3() {
91487 std::vector<std::string> periods = {"2020-01", "2020-02", "2020-03"};
91488 return pandas::PeriodIndex(periods, "M");
91489}
91490
91491static pandas::MultiIndex make_multi_index_mixed() {
91492 std::vector<std::vector<std::string>> arrays = {
91493 {"a", "b", "b"},
TimedeltaIndex (pd_test_5_all.cpp:91483)
91473 values.push_back(numpy::datetime64(k_ns_2024_01_03, numpy::DateTimeUnit::Nanosecond));
91474 return pandas::DatetimeIndex(values);
91475}
91476
91477static pandas::TimedeltaIndex make_td_index_3() {
91478 constexpr int64_t k_ns_per_day = 86400000000000LL;
91479 std::vector<numpy::timedelta64> values;
91480 values.push_back(numpy::timedelta64(1 * k_ns_per_day, numpy::DateTimeUnit::Nanosecond));
91481 values.push_back(numpy::timedelta64(2 * k_ns_per_day, numpy::DateTimeUnit::Nanosecond));
91482 values.push_back(numpy::timedelta64(3 * k_ns_per_day, numpy::DateTimeUnit::Nanosecond));
91483 return pandas::TimedeltaIndex(values);
91484}
91485
91486static pandas::PeriodIndex make_period_index_3() {
91487 std::vector<std::string> periods = {"2020-01", "2020-02", "2020-03"};
91488 return pandas::PeriodIndex(periods, "M");
91489}
91490
91491static pandas::MultiIndex make_multi_index_mixed() {
91492 std::vector<std::vector<std::string>> arrays = {
91493 {"a", "b", "b"},
TimedeltaIndex (pd_test_5_all.cpp:91483)
91473 values.push_back(numpy::datetime64(k_ns_2024_01_03, numpy::DateTimeUnit::Nanosecond));
91474 return pandas::DatetimeIndex(values);
91475}
91476
91477static pandas::TimedeltaIndex make_td_index_3() {
91478 constexpr int64_t k_ns_per_day = 86400000000000LL;
91479 std::vector<numpy::timedelta64> values;
91480 values.push_back(numpy::timedelta64(1 * k_ns_per_day, numpy::DateTimeUnit::Nanosecond));
91481 values.push_back(numpy::timedelta64(2 * k_ns_per_day, numpy::DateTimeUnit::Nanosecond));
91482 values.push_back(numpy::timedelta64(3 * k_ns_per_day, numpy::DateTimeUnit::Nanosecond));
91483 return pandas::TimedeltaIndex(values);
91484}
91485
91486static pandas::PeriodIndex make_period_index_3() {
91487 std::vector<std::string> periods = {"2020-01", "2020-02", "2020-03"};
91488 return pandas::PeriodIndex(periods, "M");
91489}
91490
91491static pandas::MultiIndex make_multi_index_mixed() {
91492 std::vector<std::vector<std::string>> arrays = {
91493 {"a", "b", "b"},
get_indexer (pd_test_1_all.cpp:10332)
10322void pd_test_extension_index_get_indexer() {
10323 std::cout << "========= get_indexer =========================";
10324
10325 pandas::CategoricalArray arr1({"a", "b", "c", "d"});
10326 pandas::CategoricalIndex idx1(arr1);
10327
10328 pandas::CategoricalArray arr2({"b", "d", "x"});
10329 pandas::CategoricalIndex idx2(arr2);
10330
10331 auto indexer = idx1.get_indexer(idx2);
10332
10333 bool passed = (indexer.getSize() == 3 &&
10334 indexer.getElementAt({0}) == 1 &&
10335 indexer.getElementAt({1}) == 3 &&
10336 indexer.getElementAt({2}) == -1);
10337 if (!passed) {
10338 std::cout << " [FAIL] : in pd_test_extension_index_get_indexer() : get_indexer check failed" << std::endl;
10339 throw std::runtime_error("pd_test_extension_index_get_indexer failed");
10340 }
get_indexer_for (pd_test_3_all.cpp:716)
706// ============================================================================
707// Category 6: Index Indexer Methods
708// ============================================================================
709
710void pd_test_3_all_index_indexers() {
711 std::cout << "========= Index.get_indexer_for/non_unique/slice_indexer() ";
712
713 std::vector<std::string> vals = {"a", "b", "c", "d", "e"};
714 pandas::Index<std::string> idx(vals);
715
716 // Test get_indexer_for()
717 std::vector<std::string> target = {"b", "d", "f"}; // "f" doesn't exist
718 numpy::NDArray<numpy::int64> indexer = idx.get_indexer_for(target);
719 if (indexer.getSize() != 3) {
720 std::cout << " [FAIL] : in pd_test_3_all_index_indexers() : get_indexer_for size mismatch" << std::endl;
721 throw std::runtime_error("pd_test_3_all_index_indexers failed: get_indexer_for size");
722 }
723 // "b" is at index 1
724 if (indexer.getElementAt({0}) != 1) {
725 std::cout << " [FAIL] : in pd_test_3_all_index_indexers() : 'b' should be at index 1" << std::endl;
726 throw std::runtime_error("pd_test_3_all_index_indexers failed: 'b' index");
get_indexer_non_unique (pd_test_3_all.cpp:739)
729 if (indexer.getElementAt({1}) != 3) {
730 std::cout << " [FAIL] : in pd_test_3_all_index_indexers() : 'd' should be at index 3" << std::endl;
731 throw std::runtime_error("pd_test_3_all_index_indexers failed: 'd' index");
732 }
733 // "f" doesn't exist -> -1
734 if (indexer.getElementAt({2}) != -1) {
735 std::cout << " [FAIL] : in pd_test_3_all_index_indexers() : 'f' should be -1" << std::endl;
736 throw std::runtime_error("pd_test_3_all_index_indexers failed: 'f' index");
737 }
738
739 // Test get_indexer_non_unique()
740 std::vector<std::string> target2 = {"a", "c", "z"}; // "z" doesn't exist
741 pandas::Index<std::string> target_idx(target2);
742 auto [indexer2, missing] = idx.get_indexer_non_unique(target_idx);
743
744 if (indexer2.getSize() < 2) {
745 std::cout << " [FAIL] : in pd_test_3_all_index_indexers() : get_indexer_non_unique size too small" << std::endl;
746 throw std::runtime_error("pd_test_3_all_index_indexers failed: get_indexer_non_unique size");
747 }
748
749 // Test slice_indexer()
get_level_values (pd_test_3_all.cpp:4524)
4514 }
4515
4516 std::cout << " -> tests passed" << std::endl;
4517}
4518
4519void pd_test_3_all_interval_index_get_level_values_droplevel() {
4520 std::cout << "========= IntervalIndex.get_level_values/droplevel() ";
4521
4522 pandas::IntervalIndex64 idx = pandas::IntervalIndex64::from_breaks({0, 10, 20, 30});
4523
4524 // get_level_values(0) should work
4525 pandas::IntervalIndex64 level_vals = idx.get_level_values(0);
4526 if (level_vals.size() != idx.size()) {
4527 throw std::runtime_error("get_level_values(0) size mismatch");
4528 }
4529
4530 // get_level_values(1) should throw
4531 bool threw = false;
4532 try {
4533 idx.get_level_values(1);
4534 } catch (const std::out_of_range&) {
get_loc (pd_test_1_all.cpp:10281)
10271 bool passed = (idx.contains("apple") && idx.contains("banana") && !idx.contains("grape"));
10272 if (!passed) {
10273 std::cout << " [FAIL] : in pd_test_extension_index_contains() : contains check failed" << std::endl;
10274 throw std::runtime_error("pd_test_extension_index_contains failed");
10275 }
10276
10277 std::cout << " -> tests passed" << std::endl;
10278}
10279
10280void pd_test_extension_index_get_loc_unique() {
10281 std::cout << "========= get_loc (unique) =========================";
10282
10283 pandas::CategoricalArray arr({"apple", "banana", "cherry"});
10284 pandas::CategoricalIndex idx(arr);
10285
10286 auto loc_apple = idx.get_loc("apple");
10287 auto loc_banana = idx.get_loc("banana");
10288 auto loc_cherry = idx.get_loc("cherry");
10289
10290 bool passed = (std::holds_alternative<size_t>(loc_apple) && std::get<size_t>(loc_apple) == 0 &&
10291 std::get<size_t>(loc_banana) == 1 &&
get_loc_string (pd_test_3_all.cpp:28108)
28098 vals.push_back(numpy::timedelta64(ns, numpy::DateTimeUnit::Nanosecond));
28099 }
28100 return pandas::TimedeltaArray(vals);
28101}
28102
28103void pd_test_getitem_timedelta_str_lookup() {
28104 std::cout << " -- pd_test_getitem_timedelta_str_lookup --" << std::endl;
28105 int fail = 0;
28106 auto tda = ge_make_tda({1 * GE_NS_PER_DAY, 2 * GE_NS_PER_DAY, 3 * GE_NS_PER_DAY});
28107 pandas::TimedeltaIndex tdi(tda);
28108 auto pos = tdi.get_loc_string("2 days");
28109 if (!pos.has_value()) { std::cout << " FAIL: '2 days' not found" << std::endl; fail++; }
28110 else if (*pos != 1) { std::cout << " FAIL: expected pos=1, got " << *pos << std::endl; fail++; }
28111 if (fail == 0) std::cout << " OK" << std::endl;
28112 if (fail) throw std::runtime_error("pd_test_getitem_timedelta_str_lookup failed");
28113}
28114
28115void pd_test_getitem_timedelta_str_not_found() {
28116 std::cout << " -- pd_test_getitem_timedelta_str_not_found --" << std::endl;
28117 int fail = 0;
28118 auto tda = ge_make_tda({1 * GE_NS_PER_DAY});
get_slice_bound (pd_test_3_all.cpp:3644)
3634 formatted = idx.format(custom_formatter);
3635
3636 if (formatted[0] != "val:1") {
3637 throw std::runtime_error("custom formatter failed");
3638 }
3639
3640 std::cout << " -> tests passed" << std::endl;
3641}
3642
3643void pd_test_3_all_index_get_slice_bound() {
3644 std::cout << "========= Index.get_slice_bound() ==================";
3645
3646 pandas::Index<numpy::int64> idx({10, 20, 30, 40, 50});
3647
3648 // Exact match, left side
3649 size_t bound = idx.get_slice_bound(30, "left");
3650 if (bound != 2) {
3651 throw std::runtime_error("left bound for 30 should be 2");
3652 }
3653
3654 // Exact match, right side
get_string (pd_test_3_all.cpp:27746)
27736 }
27737 }
27738
27739 pandas::Series<numpy::int64> si({10, 20, 30}, "ints");
27740 auto result2 = si.astype("str");
27741 auto* str_s2 = dynamic_cast<pandas::Series<std::string>*>(result2.get());
27742 if (!str_s2) {
27743 std::cout << " FAIL: expected Series<string> from int" << std::endl;
27744 fail++;
27745 } else {
27746 if (str_s2->get_string(0) != "10") {
27747 std::cout << " FAIL: expected '10', got '" << str_s2->get_string(0) << "'" << std::endl;
27748 fail++;
27749 }
27750 }
27751
27752 if (fail == 0) std::cout << " OK" << std::endl;
27753}
27754
27755void pd_test_astype_datetime_to_string() {
27756 std::cout << " -- pd_test_astype_datetime_to_string --" << std::endl;
get_value_str (pd_test_1_all.cpp:4665)
4655 auto corr_df = df.corr();
4656
4657 // Check dimensions
4658 bool passed = corr_df.nrows() == 2 && corr_df.ncols() == 2;
4659 if (!passed) {
4660 std::cout << " [FAIL] : in pd_test_aggregation_dataframe_corr() : corr should be 2x2" << std::endl;
4661 throw std::runtime_error("pd_test_aggregation_dataframe_corr failed: corr should be 2x2");
4662 }
4663
4664 // Diagonal should be 1.0
4665 std::string aa = corr_df["A"].get_value_str(0);
4666 passed = std::abs(std::stod(aa) - 1.0) < 0.001;
4667 if (!passed) {
4668 std::cout << " [FAIL] : in pd_test_aggregation_dataframe_corr() : diagonal should be 1.0" << std::endl;
4669 throw std::runtime_error("pd_test_aggregation_dataframe_corr failed: diagonal should be 1.0");
4670 }
4671
4672 // A-B correlation should be 1.0 (perfect correlation)
4673 std::string ab = corr_df["B"].get_value_str(0);
4674 passed = std::abs(std::stod(ab) - 1.0) < 0.001;
4675 if (!passed) {
take (pd_test_1_all.cpp:5903)
5893// Inherited Operations Tests
5894// ============================================================================
5895
5896void pd_test_categorical_index_take() {
5897 std::cout << "========= inherited take ==============================";
5898
5899 pandas::CategoricalArray arr({"a", "b", "c", "d"});
5900 pandas::CategoricalIndex idx(arr);
5901
5902 std::vector<size_t> indices = {0, 2, 3};
5903 pandas::ExtensionIndex<pandas::CategoricalArray> taken = idx.take(indices);
5904
5905 bool passed = (taken.size() == 3);
5906 if (!passed) {
5907 std::cout << " [FAIL] : in pd_test_categorical_index_take()" << std::endl;
5908 throw std::runtime_error("pd_test_categorical_index_take failed");
5909 }
5910
5911 std::cout << " -> tests passed" << std::endl;
5912}
where (pd_test_1_all.cpp:22018)
22008 data["B"] = {5.0, 6.0, 7.0, 8.0};
22009 pandas::DataFrame df(data);
22010
22011 // Create condition DataFrame (values > 2)
22012 std::map<std::string, std::vector<numpy::bool_>> cond_data;
22013 cond_data["A"] = {false, false, true, true}; // 1<=2, 2<=2, 3>2, 4>2
22014 cond_data["B"] = {true, true, true, true}; // all >2
22015 pandas::DataFrame cond(cond_data);
22016
22017 // Apply where with replacement value -1
22018 pandas::DataFrame result = df.where(cond, -1.0);
22019
22020 // Get column index for A - it's sorted alphabetically in std::map
22021 size_t col_a_idx = df.get_column_index("A");
22022 size_t col_b_idx = df.get_column_index("B");
22023
22024 bool passed = true;
22025 std::string error_msg;
22026
22027 // Check A column values
22028 std::string a0 = result.iat<double>(0, col_a_idx) == -1.0 ? "ok" : "fail";
where (pd_test_1_all.cpp:22018)
22008 data["B"] = {5.0, 6.0, 7.0, 8.0};
22009 pandas::DataFrame df(data);
22010
22011 // Create condition DataFrame (values > 2)
22012 std::map<std::string, std::vector<numpy::bool_>> cond_data;
22013 cond_data["A"] = {false, false, true, true}; // 1<=2, 2<=2, 3>2, 4>2
22014 cond_data["B"] = {true, true, true, true}; // all >2
22015 pandas::DataFrame cond(cond_data);
22016
22017 // Apply where with replacement value -1
22018 pandas::DataFrame result = df.where(cond, -1.0);
22019
22020 // Get column index for A - it's sorted alphabetically in std::map
22021 size_t col_a_idx = df.get_column_index("A");
22022 size_t col_b_idx = df.get_column_index("B");
22023
22024 bool passed = true;
22025 std::string error_msg;
22026
22027 // Check A column values
22028 std::string a0 = result.iat<double>(0, col_a_idx) == -1.0 ? "ok" : "fail";
drop (pd_test_1_all.cpp:6558)
6548 if (df.ncols() != 2) {
6549 std::cout << " [FAIL] : in pd_test_dataframe_manipulation() : pop ncols != 2" << std::endl;
6550 throw std::runtime_error("pd_test_dataframe_manipulation failed: pop ncols != 2");
6551 }
6552 if (!popped) {
6553 std::cout << " [FAIL] : in pd_test_dataframe_manipulation() : popped is null" << std::endl;
6554 throw std::runtime_error("pd_test_dataframe_manipulation failed: popped is null");
6555 }
6556
6557 // Test drop columns
6558 auto dropped = df.drop(std::vector<std::string>{"B"}, 1);
6559 if (dropped.ncols() != 1) {
6560 std::cout << " [FAIL] : in pd_test_dataframe_manipulation() : drop ncols != 1" << std::endl;
6561 throw std::runtime_error("pd_test_dataframe_manipulation failed: drop ncols != 1");
6562 }
6563
6564 // Test rename
6565 auto renamed = df.rename_columns(std::map<std::string, std::string>{{"A", "X"}});
6566 if (!renamed.has_column("X")) {
6567 std::cout << " [FAIL] : in pd_test_dataframe_manipulation() : rename failed" << std::endl;
6568 throw std::runtime_error("pd_test_dataframe_manipulation failed: rename failed");
drop_duplicates (pd_test_1_all.cpp:6639)
6629 }
6630 }
6631
6632 // Test drop_duplicates
6633 {
6634 std::map<std::string, std::vector<numpy::int64>> dup_data;
6635 dup_data["A"] = {1, 1, 2, 2};
6636 dup_data["B"] = {1, 1, 2, 3};
6637 pandas::DataFrame df_dup(dup_data);
6638
6639 auto deduped = df_dup.drop_duplicates();
6640 // Rows 0 and 1 are duplicates (A=1, B=1), so should have 3 rows
6641 if (deduped.nrows() != 3) {
6642 std::cout << " [FAIL] : in pd_test_dataframe_manipulation() : drop_duplicates nrows != 3, got " << deduped.nrows() << std::endl;
6643 throw std::runtime_error("pd_test_dataframe_manipulation failed: drop_duplicates");
6644 }
6645 }
6646
6647 // Test assign
6648 {
6649 std::map<std::string, std::vector<numpy::int64>> assign_data;
droplevel (pd_test_1_all.cpp:14428)
14418 void pd_test_multiindex_droplevel() {
14419 std::cout << "========= droplevel =================================== ";
14420
14421 std::vector<std::vector<std::string>> arrays = {
14422 {"a", "a", "b"},
14423 {"x", "y", "z"},
14424 {"1", "2", "3"}
14425 };
14426
14427 pandas::MultiIndex mi = pandas::MultiIndex::from_arrays<std::string>(arrays);
14428 pandas::MultiIndex dropped = mi.droplevel(1);
14429
14430 bool passed = true;
14431
14432 if (dropped.nlevels() != 2) {
14433 std::cout << " [FAIL] : nlevels should be 2 after drop" << std::endl;
14434 passed = false;
14435 }
14436
14437 // Check remaining levels
14438 auto tup = dropped[0];
dropna (pd_test_1_all.cpp:531)
521 }
522
523 // Test isna array
524 numpy::NDArray<numpy::bool_> na_mask = arr.isna();
525 if (na_mask.getSize() != 4) {
526 std::cout << " [FAIL] : in pd_test_categorical_array_na_handling() : isna size != 4" << std::endl;
527 throw std::runtime_error("pd_test_categorical_array_na_handling failed: isna size != 4");
528 }
529
530 // Test dropna
531 pandas::CategoricalArray dropped = arr.dropna();
532 if (dropped.size() != 2) {
533 std::cout << " [FAIL] : in pd_test_categorical_array_na_handling() : dropna size != 2" << std::endl;
534 throw std::runtime_error("pd_test_categorical_array_na_handling failed: dropna size != 2");
535 }
536
537 // Test fillna (fill with existing category)
538 pandas::CategoricalArray filled = arr.fillna("a"); // 'a' is in categories
539 if (filled.has_na()) {
540 std::cout << " [FAIL] : in pd_test_categorical_array_na_handling() : fillna should have no NA" << std::endl;
541 throw std::runtime_error("pd_test_categorical_array_na_handling failed: fillna should have no NA");
insert (pd_test_1_all.cpp:12028)
12018 }
12019
12020 std::cout << " -> tests passed" << std::endl;
12021 }
12022
12023 void pd_test_index_insert_delete() {
12024 std::cout << "========= insert and delete ===========================";
12025
12026 pandas::Index<numpy::int64> idx{1, 2, 4, 5};
12027
12028 auto inserted = idx.insert(2, 3);
12029 bool passed = (inserted.size() == 5);
12030 passed = passed && (inserted[2] == 3);
12031
12032 auto deleted = inserted.delete_(2);
12033 passed = passed && (deleted.size() == 4);
12034 passed = passed && deleted.equals(idx);
12035
12036 if (!passed) {
12037 std::cout << " [FAIL] : in pd_test_index_insert_delete() : insert/delete failed" << std::endl;
12038 throw std::runtime_error("pd_test_index_insert_delete failed");
reindex (pd_test_1_all.cpp:6708)
6698 }
6699 }
6700
6701 // Test reindex rows
6702 {
6703 std::map<std::string, std::vector<double>> data;
6704 data["A"] = {1.0, 2.0, 3.0};
6705 pandas::DataFrame df(data);
6706 df = df.set_axis({"x", "y", "z"}, 0);
6707
6708 auto reindexed = df.reindex({"x", "z", "w"}, 0);
6709 if (reindexed.nrows() != 3) {
6710 std::cout << " [FAIL] : in pd_test_dataframe_index_ops() : reindex wrong nrows" << std::endl;
6711 throw std::runtime_error("pd_test_dataframe_index_ops failed: reindex nrows");
6712 }
6713 // 'w' should have NaN
6714 std::string val = reindexed["A"].get_value_str(2);
6715 if (!std::isnan(std::stod(val))) {
6716 std::cout << " [FAIL] : in pd_test_dataframe_index_ops() : missing label should be NaN" << std::endl;
6717 throw std::runtime_error("pd_test_dataframe_index_ops failed: reindex NaN");
6718 }
rename (pd_test_1_all.cpp:5816)
5806 std::cout << " -> tests passed" << std::endl;
5807}
5808
5809void pd_test_categorical_index_rename() {
5810 std::cout << "========= rename ======================================";
5811
5812 pandas::CategoricalArray arr({"x", "y"});
5813 pandas::CategoricalIndex idx(arr, "old_name");
5814
5815 pandas::CategoricalIndex renamed = idx.rename("new_name");
5816
5817 bool passed = (renamed.name().has_value() && *renamed.name() == "new_name" &&
5818 renamed.size() == idx.size() && renamed.categories() == idx.categories());
5819 if (!passed) {
5820 std::cout << " [FAIL] : in pd_test_categorical_index_rename()" << std::endl;
5821 throw std::runtime_error("pd_test_categorical_index_rename failed");
5822 }
5823
5824 std::cout << " -> tests passed" << std::endl;
5825}
set_names (pd_test_1_all.cpp:14519)
14509 std::cout << "-> tests passed" << std::endl;
14510 }
14511
14512 void pd_test_multiindex_set_names() {
14513 std::cout << "========= set_names =================================== ";
14514
14515 std::vector<std::vector<std::string>> arrays = {{"a", "b"}, {"x", "y"}};
14516 pandas::MultiIndex mi = pandas::MultiIndex::from_arrays<std::string>(arrays);
14517
14518 std::vector<std::optional<std::string>> new_names = {"level_a", "level_b"};
14519 pandas::MultiIndex named = mi.set_names(new_names);
14520
14521 bool passed = (named.names()[0] == "level_a" && named.names()[1] == "level_b");
14522
14523 if (!passed) {
14524 std::cout << " [FAIL] : names not set correctly" << std::endl;
14525 throw std::runtime_error("pd_test_multiindex_set_names failed");
14526 }
14527
14528 std::cout << "-> tests passed" << std::endl;
14529 }
set_names (pd_test_1_all.cpp:14519)
14509 std::cout << "-> tests passed" << std::endl;
14510 }
14511
14512 void pd_test_multiindex_set_names() {
14513 std::cout << "========= set_names =================================== ";
14514
14515 std::vector<std::vector<std::string>> arrays = {{"a", "b"}, {"x", "y"}};
14516 pandas::MultiIndex mi = pandas::MultiIndex::from_arrays<std::string>(arrays);
14517
14518 std::vector<std::optional<std::string>> new_names = {"level_a", "level_b"};
14519 pandas::MultiIndex named = mi.set_names(new_names);
14520
14521 bool passed = (named.names()[0] == "level_a" && named.names()[1] == "level_b");
14522
14523 if (!passed) {
14524 std::cout << " [FAIL] : names not set correctly" << std::endl;
14525 throw std::runtime_error("pd_test_multiindex_set_names failed");
14526 }
14527
14528 std::cout << "-> tests passed" << std::endl;
14529 }
fillna (pd_test_1_all.cpp:537)
527 throw std::runtime_error("pd_test_categorical_array_na_handling failed: isna size != 4");
528 }
529
530 // Test dropna
531 pandas::CategoricalArray dropped = arr.dropna();
532 if (dropped.size() != 2) {
533 std::cout << " [FAIL] : in pd_test_categorical_array_na_handling() : dropna size != 2" << std::endl;
534 throw std::runtime_error("pd_test_categorical_array_na_handling failed: dropna size != 2");
535 }
536
537 // Test fillna (fill with existing category)
538 pandas::CategoricalArray filled = arr.fillna("a"); // 'a' is in categories
539 if (filled.has_na()) {
540 std::cout << " [FAIL] : in pd_test_categorical_array_na_handling() : fillna should have no NA" << std::endl;
541 throw std::runtime_error("pd_test_categorical_array_na_handling failed: fillna should have no NA");
542 }
543
544 std::cout << " -> tests passed" << std::endl;
545 }
546
547 void pd_test_categorical_array_add_categories() {
isna (pd_test_1_all.cpp:524)
514 throw std::runtime_error("pd_test_categorical_array_na_handling failed: has_na() should be true");
515 }
516
517 // Test count (non-NA)
518 if (arr.count() != 2) {
519 std::cout << " [FAIL] : in pd_test_categorical_array_na_handling() : count() != 2" << std::endl;
520 throw std::runtime_error("pd_test_categorical_array_na_handling failed: count() != 2");
521 }
522
523 // Test isna array
524 numpy::NDArray<numpy::bool_> na_mask = arr.isna();
525 if (na_mask.getSize() != 4) {
526 std::cout << " [FAIL] : in pd_test_categorical_array_na_handling() : isna size != 4" << std::endl;
527 throw std::runtime_error("pd_test_categorical_array_na_handling failed: isna size != 4");
528 }
529
530 // Test dropna
531 pandas::CategoricalArray dropped = arr.dropna();
532 if (dropped.size() != 2) {
533 std::cout << " [FAIL] : in pd_test_categorical_array_na_handling() : dropna size != 2" << std::endl;
534 throw std::runtime_error("pd_test_categorical_array_na_handling failed: dropna size != 2");
isnull (pd_test_3_all.cpp:671)
661// Category 5: Index Null Detection
662// ============================================================================
663
664void pd_test_3_all_index_null_detection() {
665 std::cout << "========= Index.isnull/notnull() =====================";
666
667 // Test with float index (can have NaN)
668 std::vector<double> vals = {1.0, std::nan(""), 3.0, std::nan("")};
669 pandas::Index<double> idx(vals);
670
671 numpy::NDArray<numpy::bool_> isnull_result = idx.isnull();
672 if (isnull_result.getSize() != 4) {
673 std::cout << " [FAIL] : in pd_test_3_all_index_null_detection() : isnull() size mismatch" << std::endl;
674 throw std::runtime_error("pd_test_3_all_index_null_detection failed: isnull() size");
675 }
676 // Index 0: 1.0 -> not null
677 if (isnull_result.getElementAt({0})) {
678 std::cout << " [FAIL] : in pd_test_3_all_index_null_detection() : index 0 should not be null" << std::endl;
679 throw std::runtime_error("pd_test_3_all_index_null_detection failed: index 0");
680 }
681 // Index 1: NaN -> null
notna (pd_test_1_all.cpp:6595)
6585 if (!na_mask.getElementAt({2, 1})) {
6586 std::cout << " [FAIL] : in pd_test_dataframe_manipulation() : isna at (2,1) should be true" << std::endl;
6587 throw std::runtime_error("pd_test_dataframe_manipulation failed: isna at (2,1)");
6588 }
6589 // Row 0, col 0 should NOT be NA
6590 if (na_mask.getElementAt({0, 0})) {
6591 std::cout << " [FAIL] : in pd_test_dataframe_manipulation() : isna at (0,0) should be false" << std::endl;
6592 throw std::runtime_error("pd_test_dataframe_manipulation failed: isna at (0,0)");
6593 }
6594
6595 auto notna_mask = df_na.notna();
6596 if (notna_mask.getElementAt({1, 0})) {
6597 std::cout << " [FAIL] : in pd_test_dataframe_manipulation() : notna at (1,0) should be false" << std::endl;
6598 throw std::runtime_error("pd_test_dataframe_manipulation failed: notna at (1,0)");
6599 }
6600 }
6601
6602 // Test fillna
6603 {
6604 std::map<std::string, std::vector<numpy::float64>> float_data;
6605 float_data["X"] = {1.0, std::nan(""), 3.0};
notnull (pd_test_3_all.cpp:665)
655 }
656
657 std::cout << " -> tests passed" << std::endl;
658}
659
660// ============================================================================
661// Category 5: Index Null Detection
662// ============================================================================
663
664void pd_test_3_all_index_null_detection() {
665 std::cout << "========= Index.isnull/notnull() =====================";
666
667 // Test with float index (can have NaN)
668 std::vector<double> vals = {1.0, std::nan(""), 3.0, std::nan("")};
669 pandas::Index<double> idx(vals);
670
671 numpy::NDArray<numpy::bool_> isnull_result = idx.isnull();
672 if (isnull_result.getSize() != 4) {
673 std::cout << " [FAIL] : in pd_test_3_all_index_null_detection() : isnull() size mismatch" << std::endl;
674 throw std::runtime_error("pd_test_3_all_index_null_detection failed: isnull() size");
675 }
max (pd_test_1_all.cpp:771)
761 pandas::CategoricalArray arr = pandas::CategoricalArray::from_codes(codes, cats, true); // ordered
762
763 // Test min
764 std::optional<std::string> min_val = arr.min();
765 if (!min_val.has_value() || *min_val != "low") {
766 std::cout << " [FAIL] : in pd_test_categorical_array_ordered_operations() : min != 'low'" << std::endl;
767 throw std::runtime_error("pd_test_categorical_array_ordered_operations failed: min != 'low'");
768 }
769
770 // Test max
771 std::optional<std::string> max_val = arr.max();
772 if (!max_val.has_value() || *max_val != "high") {
773 std::cout << " [FAIL] : in pd_test_categorical_array_ordered_operations() : max != 'high'" << std::endl;
774 throw std::runtime_error("pd_test_categorical_array_ordered_operations failed: max != 'high'");
775 }
776
777 // Test unordered throws for min/max
778 pandas::CategoricalArray unordered = arr.as_unordered();
779 bool threw = false;
780 try {
781 unordered.min();
median (pd_test_1_all.cpp:20910)
20900 throw std::runtime_error("pd_test_expanding_var failed: expanding var values incorrect");
20901 }
20902
20903 std::cout << " -> tests passed" << std::endl;
20904 }
20905
20906 void pd_test_expanding_median() {
20907 std::cout << "========= Expanding median ======================";
20908
20909 pandas::Series<double> s({1.0, 2.0, 3.0, 4.0, 5.0});
20910 auto result = s.expanding().median();
20911
20912 // Expanding median: 1, 1.5, 2, 2.5, 3
20913 bool passed = std::abs(result[0] - 1.0) < 0.001 &&
20914 std::abs(result[1] - 1.5) < 0.001 &&
20915 std::abs(result[2] - 2.0) < 0.001 &&
20916 std::abs(result[3] - 2.5) < 0.001 &&
20917 std::abs(result[4] - 3.0) < 0.001;
20918 if (!passed) {
20919 std::cout << " [FAIL] : in pd_test_expanding_median() : expanding median values incorrect" << std::endl;
20920 throw std::runtime_error("pd_test_expanding_median failed: expanding median values incorrect");
min (pd_test_1_all.cpp:764)
754 }
755
756 void pd_test_categorical_array_ordered_operations() {
757 std::cout << "========= CategoricalArray: ordered operations (min/max) ======================= ";
758
759 std::vector<std::string> cats = {"low", "medium", "high"};
760 std::vector<numpy::int32> codes = {0, 2, 1, 0, -1}; // low, high, medium, low, NA
761 pandas::CategoricalArray arr = pandas::CategoricalArray::from_codes(codes, cats, true); // ordered
762
763 // Test min
764 std::optional<std::string> min_val = arr.min();
765 if (!min_val.has_value() || *min_val != "low") {
766 std::cout << " [FAIL] : in pd_test_categorical_array_ordered_operations() : min != 'low'" << std::endl;
767 throw std::runtime_error("pd_test_categorical_array_ordered_operations failed: min != 'low'");
768 }
769
770 // Test max
771 std::optional<std::string> max_val = arr.max();
772 if (!max_val.has_value() || *max_val != "high") {
773 std::cout << " [FAIL] : in pd_test_categorical_array_ordered_operations() : max != 'high'" << std::endl;
774 throw std::runtime_error("pd_test_categorical_array_ordered_operations failed: max != 'high'");
nunique (pd_test_1_all.cpp:10604)
10594 std::cout << " -> tests passed" << std::endl;
10595}
10596
10597void pd_test_extension_index_nunique() {
10598 std::cout << "========= nunique =========================";
10599
10600 pandas::CategoricalArray arr({"a", "b", "a", "c", "b", std::nullopt});
10601 pandas::CategoricalIndex idx(arr);
10602
10603 bool passed = (idx.nunique(true) == 3 && idx.nunique(false) == 4);
10604 if (!passed) {
10605 std::cout << " [FAIL] : in pd_test_extension_index_nunique() : nunique check failed" << std::endl;
10606 throw std::runtime_error("pd_test_extension_index_nunique failed");
10607 }
10608
10609 std::cout << " -> tests passed" << std::endl;
10610}
10611
10612void pd_test_extension_index_factorize() {
10613 std::cout << "========= factorize =========================";
std (pd_test_1_all.cpp:4526)
4516#include "../pandas/pd_series.h"
4517
4518namespace dataframe_tests {
4519 namespace dataframe_tests_aggregation {
4520
4521 void pd_test_aggregation_series_sem() {
4522 std::cout << "========= Series sem ============================";
4523
4524 pandas::Series<double> s({1.0, 2.0, 3.0, 4.0, 5.0});
4525 auto sem_val = s.sem();
4526 // std(ddof=1) = sqrt(2.5), sem = sqrt(2.5)/sqrt(5) ≈ 0.707
4527 bool passed = sem_val.has_value() && std::abs(*sem_val - 0.707) < 0.01;
4528 if (!passed) {
4529 std::cout << " [FAIL] : in pd_test_aggregation_series_sem() : sem value incorrect" << std::endl;
4530 throw std::runtime_error("pd_test_aggregation_series_sem failed: sem value incorrect");
4531 }
4532
4533 std::cout << " -> tests passed" << std::endl;
4534 }
4535
4536 void pd_test_aggregation_series_quantile() {
sum (pd_test_1_all.cpp:276)
266 }
267
268 // Test sum/mean
269 pandas::BooleanArray arr({
270 std::optional<bool>(true),
271 std::optional<bool>(false),
272 std::optional<bool>(true),
273 std::optional<bool>(true)
274 });
275
276 auto s = arr.sum();
277 if (!s.has_value() || s.value() != 3) {
278 std::cout << " [FAIL] : in pd_test_boolean_array_reductions() : sum should be 3" << std::endl;
279 throw std::runtime_error("pd_test_boolean_array_reductions failed: sum");
280 }
281
282 auto m = arr.mean();
283 if (!m.has_value() || std::abs(m.value() - 0.75) > 0.001) {
284 std::cout << " [FAIL] : in pd_test_boolean_array_reductions() : mean should be 0.75" << std::endl;
285 throw std::runtime_error("pd_test_boolean_array_reductions failed: mean");
286 }
groupby (pd_test_1_all.cpp:11495)
11485 std::cout << "========= GroupBy basic =========================";
11486
11487 // Create DataFrame with category column
11488 std::map<std::string, std::vector<double>> data = {
11489 {"category", {1.0, 1.0, 2.0, 2.0, 2.0}},
11490 {"value", {10.0, 20.0, 30.0, 40.0, 50.0}}
11491 };
11492 pandas::DataFrame df(data);
11493
11494 // Test groupby
11495 auto grouped = df.groupby("category");
11496
11497 bool passed = grouped.ngroups() == 2;
11498 if (!passed) {
11499 std::cout << " [FAIL] : in pd_test_groupby_basic() : ngroups should be 2" << std::endl;
11500 throw std::runtime_error("pd_test_groupby_basic failed: ngroups should be 2");
11501 }
11502
11503 std::cout << " -> tests passed" << std::endl;
11504 }
map (pd_test_1_all.cpp:5839)
5829// Map Tests
5830// ============================================================================
5831
5832void pd_test_categorical_index_map() {
5833 std::cout << "========= map =========================================";
5834
5835 pandas::CategoricalArray arr({"yes", "no", "yes"});
5836 pandas::CategoricalIndex idx(arr);
5837
5838 std::unordered_map<std::string, std::string> mapping = {{"yes", "1"}, {"no", "0"}};
5839 pandas::CategoricalIndex mapped = idx.map(mapping);
5840
5841 bool passed = (mapped.has_category("1") && mapped.has_category("0") &&
5842 !mapped.has_category("yes") && !mapped.has_category("no"));
5843 if (!passed) {
5844 std::cout << " [FAIL] : in pd_test_categorical_index_map()" << std::endl;
5845 throw std::runtime_error("pd_test_categorical_index_map failed");
5846 }
5847
5848 std::cout << " -> tests passed" << std::endl;
5849}
equals (pd_test_1_all.cpp:5866)
5856 std::cout << "========= equals ======================================";
5857
5858 pandas::CategoricalArray arr1({"a", "b", "a"});
5859 pandas::CategoricalArray arr2({"a", "b", "a"});
5860 pandas::CategoricalArray arr3({"a", "b", "c"});
5861
5862 pandas::CategoricalIndex idx1(arr1);
5863 pandas::CategoricalIndex idx2(arr2);
5864 pandas::CategoricalIndex idx3(arr3);
5865
5866 bool passed = (idx1.equals(idx2) && !idx1.equals(idx3));
5867 if (!passed) {
5868 std::cout << " [FAIL] : in pd_test_categorical_index_equals()" << std::endl;
5869 throw std::runtime_error("pd_test_categorical_index_equals failed");
5870 }
5871
5872 std::cout << " -> tests passed" << std::endl;
5873}
5874
5875void pd_test_categorical_index_identical() {
5876 std::cout << "========= identical ===================================";
argsort (pd_test_1_all.cpp:1304)
1294 std::cout << "========= DatetimeArray: sorting ======================= ";
1295
1296 pandas::DatetimeArray arr(std::vector<std::string>{
1297 "2023-06-15",
1298 "NaT",
1299 "2023-01-01",
1300 "2023-12-31"
1301 });
1302
1303 // argsort ascending
1304 auto indices = arr.argsort(true, "last");
1305 // Expected order: 2023-01-01(2), 2023-06-15(0), 2023-12-31(3), NaT(1)
1306 if (indices.getElementAt({0}) != 2) {
1307 std::cout << " [FAIL] : argsort: first should be index 2 (2023-01-01)" << std::endl;
1308 throw std::runtime_error("pd_test_datetime_array_sorting failed: argsort first");
1309 }
1310 if (indices.getElementAt({3}) != 1) {
1311 std::cout << " [FAIL] : argsort: last should be index 1 (NaT)" << std::endl;
1312 throw std::runtime_error("pd_test_datetime_array_sorting failed: NaT position");
1313 }
searchsorted (pd_test_1_all.cpp:18958)
18948 // =========================================================================
18949 // Search Tests
18950 // =========================================================================
18951
18952 void pd_test_range_index_searchsorted() {
18953 std::cout << "========= searchsorted ================================ ";
18954
18955 pandas::RangeIndex ri(0, 10, 2); // [0, 2, 4, 6, 8]
18956
18957 bool passed = (ri.searchsorted(4, "left") == 2 &&
18958 ri.searchsorted(4, "right") == 3 &&
18959 ri.searchsorted(3, "left") == 2 && // 3 would go between 2 and 4
18960 ri.searchsorted(-1, "left") == 0 && // Before all
18961 ri.searchsorted(10, "left") == 5); // After all
18962
18963 if (!passed) {
18964 std::cout << " [FAIL] : searchsorted" << std::endl;
18965 throw std::runtime_error("pd_test_range_index_searchsorted failed");
18966 }
sort_values (pd_test_1_all.cpp:6408)
6398 void pd_test_dataframe_sorting() {
6399 std::cout << "========= sorting ==========================";
6400
6401 std::map<std::string, std::vector<numpy::float64>> data;
6402 data["A"] = {3.0, 1.0, 4.0, 1.0, 5.0};
6403 data["B"] = {9.0, 2.0, 6.0, 5.0, 3.0};
6404
6405 pandas::DataFrame df(data);
6406
6407 // Test sort_values ascending
6408 auto sorted_asc = df.sort_values("A", true);
6409 // First value should be smallest (1.0)
6410 std::string first_val = sorted_asc["A"].get_value_str(0);
6411 if (std::stod(first_val) != 1.0) {
6412 std::cout << " [FAIL] : in pd_test_dataframe_sorting() : sort_values asc first != 1" << std::endl;
6413 throw std::runtime_error("pd_test_dataframe_sorting failed: sort_values asc first != 1");
6414 }
6415
6416 // Test sort_values descending
6417 auto sorted_desc = df.sort_values("A", false);
6418 first_val = sorted_desc["A"].get_value_str(0);
T (pd_test_1_all.cpp:128)
118 throw std::runtime_error("pd_test_boolean_array_kleene_and failed: NA & F");
119 }
120
121 std::cout << " -> tests passed" << std::endl;
122 }
123
124 void pd_test_boolean_array_kleene_or() {
125 std::cout << "========= BooleanArray: Kleene OR ======================= ";
126
127 // Kleene OR truth table:
128 // T | T = T, T | F = T, T | NA = T (True dominates)
129 // F | T = T, F | F = F, F | NA = NA
130 // NA | T = T, NA | F = NA, NA | NA = NA
131
132 pandas::BooleanArray t({std::optional<bool>(true)});
133 pandas::BooleanArray f({std::optional<bool>(false)});
134 pandas::BooleanArray na({std::nullopt});
135
136 // T | NA = T (True dominates)
137 auto tna = (t | na);
138 if (!tna[0].has_value() || !tna[0].value()) {
TimedeltaArray (pd_test_3_all.cpp:27945)
27935static pandas::TimedeltaArray make_tda(const std::vector<double>& ns_values) {
27936 std::vector<std::optional<numpy::timedelta64>> vals;
27937 vals.reserve(ns_values.size());
27938 for (double ns : ns_values) {
27939 if (std::isnan(ns)) {
27940 vals.push_back(std::nullopt);
27941 } else {
27942 vals.push_back(numpy::timedelta64(static_cast<int64_t>(ns), numpy::DateTimeUnit::Nanosecond));
27943 }
27944 }
27945 return pandas::TimedeltaArray(vals);
27946}
27947
27948void pd_test_td_decompose_positive() {
27949 std::cout << " -- pd_test_td_decompose_positive --" << std::endl;
27950 int fail = 0;
27951 constexpr int64_t NS = 1000000000LL;
27952 int64_t total_ns = (1 * 86400 + 2 * 3600 + 3 * 60 + 4) * NS;
27953 auto tda = make_tda({static_cast<double>(total_ns)});
27954 auto days = tda.days();
27955 auto secs = tda.seconds();
to_frame (pd_test_3_all.cpp:4931)
4921 size_t usage = mi.memory_usage(true);
4922 if (usage == 0) {
4923 throw std::runtime_error("memory_usage() should return > 0");
4924 }
4925
4926 std::cout << " -> tests passed" << std::endl;
4927}
4928
4929void pd_test_3_all_multiindex_to_frame() {
4930 std::cout << "========= MultiIndex.to_frame() =======================";
4931
4932 std::vector<std::vector<std::string>> arrays = {{"a", "b"}, {"x", "y"}};
4933 std::vector<std::optional<std::string>> names = {"first", "second"};
4934 pandas::MultiIndex mi = pandas::MultiIndex::from_arrays<std::string>(arrays, names);
4935
4936 auto frame = mi.to_frame();
4937 if (frame.find("first") == frame.end() || frame.find("second") == frame.end()) {
4938 throw std::runtime_error("to_frame() missing columns");
4939 }
transpose (pd_test_1_all.cpp:16648)
16638 std::cout << " [FAIL] : in pd_test_ndframe_transpose() : T_() size" << std::endl;
16639 throw std::runtime_error("pd_test_ndframe_transpose failed: T_() size");
16640 }
16641
16642 passed = transposed[0] == 1 && transposed[1] == 2 && transposed[2] == 3;
16643 if (!passed) {
16644 std::cout << " [FAIL] : in pd_test_ndframe_transpose() : T_() values" << std::endl;
16645 throw std::runtime_error("pd_test_ndframe_transpose failed: T_() values");
16646 }
16647
16648 // Test transpose() alias
16649 auto transposed2 = s.transpose();
16650 passed = transposed2.size() == s.size();
16651 if (!passed) {
16652 std::cout << " [FAIL] : in pd_test_ndframe_transpose() : transpose() size" << std::endl;
16653 throw std::runtime_error("pd_test_ndframe_transpose failed: transpose() size");
16654 }
16655
16656 std::cout << " -> tests passed" << std::endl;
16657 }
append (pd_test_1_all.cpp:10650)
10640 std::cout << "========= append =========================";
10641
10642 // Use same categories for both arrays (required by CategoricalArray::concat)
10643 std::vector<std::string> cats = {"a", "b", "c", "d"};
10644 pandas::CategoricalArray arr1({"a", "b"}, cats);
10645 pandas::CategoricalIndex idx1(arr1);
10646
10647 pandas::CategoricalArray arr2({"c", "d"}, cats);
10648 pandas::CategoricalIndex idx2(arr2);
10649
10650 auto appended = idx1.append(idx2);
10651
10652 bool passed = (appended.size() == 4);
10653 if (!passed) {
10654 std::cout << " [FAIL] : in pd_test_extension_index_append() : append check failed" << std::endl;
10655 throw std::runtime_error("pd_test_extension_index_append failed");
10656 }
10657
10658 std::cout << " -> tests passed" << std::endl;
10659}
append (pd_test_1_all.cpp:10650)
10640 std::cout << "========= append =========================";
10641
10642 // Use same categories for both arrays (required by CategoricalArray::concat)
10643 std::vector<std::string> cats = {"a", "b", "c", "d"};
10644 pandas::CategoricalArray arr1({"a", "b"}, cats);
10645 pandas::CategoricalIndex idx1(arr1);
10646
10647 pandas::CategoricalArray arr2({"c", "d"}, cats);
10648 pandas::CategoricalIndex idx2(arr2);
10649
10650 auto appended = idx1.append(idx2);
10651
10652 bool passed = (appended.size() == 4);
10653 if (!passed) {
10654 std::cout << " [FAIL] : in pd_test_extension_index_append() : append check failed" << std::endl;
10655 throw std::runtime_error("pd_test_extension_index_append failed");
10656 }
10657
10658 std::cout << " -> tests passed" << std::endl;
10659}
concat (pd_test_1_all.cpp:17717)
17707}
17708
17709void pd_test_period_index_concat() {
17710 std::cout << "========= concat factory ==============================";
17711
17712 std::vector<int64_t> ordinals1 = {0, 1};
17713 std::vector<int64_t> ordinals2 = {2, 3};
17714 pandas::PeriodIndex idx1(ordinals1, "D");
17715 pandas::PeriodIndex idx2(ordinals2, "D");
17716
17717 pandas::PeriodIndex concatenated = pandas::PeriodIndex::concat({idx1, idx2});
17718
17719 bool passed = (concatenated.size() == 4);
17720 if (!passed) {
17721 std::cout << " [FAIL] : in pd_test_period_index_concat()" << std::endl;
17722 throw std::runtime_error("pd_test_period_index_concat failed");
17723 }
17724
17725 std::cout << " -> tests passed" << std::endl;
17726}
join (pd_test_1_all.cpp:12353)
12343 std::cout << " -> tests passed" << std::endl;
12344 }
12345
12346 void pd_test_index_join() {
12347 std::cout << "========= join ========================================";
12348
12349 pandas::Index<numpy::int64> idx1{1, 2, 3};
12350 pandas::Index<numpy::int64> idx2{2, 3, 4};
12351
12352 auto [inner_joined, left_idx, right_idx] = idx1.join(idx2, "inner");
12353 bool passed = (inner_joined.size() == 2); // {2, 3}
12354
12355 auto [outer_joined, ol_idx, or_idx] = idx1.join(idx2, "outer");
12356 passed = passed && (outer_joined.size() == 4); // {1, 2, 3, 4}
12357
12358 if (!passed) {
12359 std::cout << " [FAIL] : in pd_test_index_join() : join failed" << std::endl;
12360 throw std::runtime_error("pd_test_index_join failed");
12361 }
asof (pd_test_2_all.cpp:366)
356 std::cout << "====================================== [OK] pd_test_add_prefix test suite ========================== " << std::endl;
357 return 0;
358 }
359
360} // namespace dataframe_tests
361// ------------------- pd_test_add_prefix.cpp (end) -----------------------------
362
363// ------------------- pd_test_asof.cpp (start) -----------------------------
364// dataframe_tests/pd_test_asof.cpp
365// Test for DataFrame.asof() method
366
367#include <iostream>
368#include <cmath>
369#include <stdexcept>
370#include <limits>
371#include "../pandas/pd_dataframe.h"
372
373// CRITICAL: No using namespace directives
374
375namespace dataframe_tests {
asof_locs (pd_test_3_all.cpp:3557)
3547 throw std::runtime_error("all() should be true for empty index");
3548 }
3549 if (empty_idx.any()) {
3550 throw std::runtime_error("any() should be false for empty index");
3551 }
3552
3553 std::cout << " -> tests passed" << std::endl;
3554}
3555
3556void pd_test_3_all_index_asof() {
3557 std::cout << "========= Index.asof()/asof_locs() =================";
3558
3559 // Test with monotonically increasing index
3560 pandas::Index<numpy::int64> idx({10, 20, 30, 40, 50});
3561
3562 // Exact match
3563 auto result = idx.asof(30);
3564 if (!result.has_value() || result.value() != 30) {
3565 throw std::runtime_error("asof() exact match should return 30");
3566 }
diff (pd_test_1_all.cpp:5171)
5161 }
5162
5163 void pd_test_arithmetic_dataframe_diff_shift() {
5164 std::cout << "========= DataFrame diff/shift ==================";
5165
5166 std::map<std::string, std::vector<double>> data;
5167 data["A"] = {1.0, 3.0, 6.0, 10.0};
5168 pandas::DataFrame df(data);
5169
5170 // diff: [NaN, 2, 3, 4]
5171 auto d = df.diff();
5172 std::string val = d["A"].get_value_str(1);
5173 bool passed = std::abs(std::stod(val) - 2.0) < 0.001;
5174 if (!passed) {
5175 std::cout << " [FAIL] : in pd_test_arithmetic_dataframe_diff_shift() : diff failed" << std::endl;
5176 throw std::runtime_error("pd_test_arithmetic_dataframe_diff_shift failed: diff failed");
5177 }
5178
5179 // First element should be NaN
5180 val = d["A"].get_value_str(0);
5181 passed = std::isnan(std::stod(val));
difference (pd_test_1_all.cpp:10718)
10708 std::cout << "========= difference =========================";
10709
10710 // Use same categories for both arrays
10711 std::vector<std::string> cats = {"a", "b", "c", "d"};
10712 pandas::CategoricalArray arr1({"a", "b", "c", "d"}, cats);
10713 pandas::CategoricalIndex idx1(arr1);
10714
10715 pandas::CategoricalArray arr2({"b", "d"}, cats);
10716 pandas::CategoricalIndex idx2(arr2);
10717
10718 auto diff = idx1.difference(idx2);
10719
10720 bool passed = (diff.size() == 2 &&
10721 diff.contains("a") && diff.contains("c") &&
10722 !diff.contains("b") && !diff.contains("d"));
10723 if (!passed) {
10724 std::cout << " [FAIL] : in pd_test_extension_index_difference() : difference check failed" << std::endl;
10725 throw std::runtime_error("pd_test_extension_index_difference failed");
10726 }
10727
10728 std::cout << " -> tests passed" << std::endl;
shift (pd_test_1_all.cpp:5188)
5178 // First element should be NaN
5179 val = d["A"].get_value_str(0);
5180 passed = std::isnan(std::stod(val));
5181 if (!passed) {
5182 std::cout << " [FAIL] : in pd_test_arithmetic_dataframe_diff_shift() : diff NaN failed" << std::endl;
5183 throw std::runtime_error("pd_test_arithmetic_dataframe_diff_shift failed: diff NaN failed");
5184 }
5185
5186 // shift: [NaN, 1, 3, 6]
5187 auto s = df.shift();
5188 val = s["A"].get_value_str(1);
5189 passed = std::abs(std::stod(val) - 1.0) < 0.001;
5190 if (!passed) {
5191 std::cout << " [FAIL] : in pd_test_arithmetic_dataframe_diff_shift() : shift failed" << std::endl;
5192 throw std::runtime_error("pd_test_arithmetic_dataframe_diff_shift failed: shift failed");
5193 }
5194
5195 std::cout << " -> tests passed" << std::endl;
5196 }
to_flat_index (pd_test_1_all.cpp:14733)
14723 void pd_test_multiindex_to_flat_index() {
14724 std::cout << "========= to_flat_index =============================== ";
14725
14726 std::vector<std::vector<std::string>> arrays = {
14727 {"a", "b"},
14728 {"x", "y"}
14729 };
14730
14731 pandas::MultiIndex mi = pandas::MultiIndex::from_arrays<std::string>(arrays);
14732 auto flat = mi.to_flat_index();
14733
14734 bool passed = (flat.size() == 2 &&
14735 flat[0][0] == "a" && flat[0][1] == "x" &&
14736 flat[1][0] == "b" && flat[1][1] == "y");
14737
14738 if (!passed) {
14739 std::cout << " [FAIL] : to_flat_index incorrect" << std::endl;
14740 throw std::runtime_error("pd_test_multiindex_to_flat_index failed");
14741 }
to_list (pd_test_1_all.cpp:10247)
10237 std::cout << " -> tests passed" << std::endl;
10238}
10239
10240void pd_test_extension_index_to_list() {
10241 std::cout << "========= to_list =========================";
10242
10243 pandas::CategoricalArray arr({"x", "y", "z"});
10244 pandas::CategoricalIndex idx(arr);
10245
10246 auto list = idx.to_list();
10247
10248 bool passed = (list.size() == 3 &&
10249 list[0].has_value() && *list[0] == "x" &&
10250 list[1].has_value() && *list[1] == "y" &&
10251 list[2].has_value() && *list[2] == "z");
10252 if (!passed) {
10253 std::cout << " [FAIL] : in pd_test_extension_index_to_list() : to_list check failed" << std::endl;
10254 throw std::runtime_error("pd_test_extension_index_to_list failed");
10255 }
to_pytimedelta (pd_test_3_all.cpp:5952)
5942 }
5943
5944 std::cout << " -> tests passed" << std::endl;
5945}
5946
5947// ============================================================================
5948// Category 21: Timedelta Plan 9 - Missing Functions
5949// ============================================================================
5950
5951void pd_test_3_all_timedelta_to_pytimedelta() {
5952 std::cout << "========= Timedelta.to_pytimedelta() =================";
5953
5954 // Test PyTimedelta struct
5955 pandas::PyTimedelta td1;
5956 if (td1.days != 0 || td1.seconds != 0 || td1.microseconds != 0) {
5957 throw std::runtime_error("PyTimedelta default constructor failed");
5958 }
5959
5960 pandas::PyTimedelta td2(5, 3600, 500000);
5961 if (td2.days != 5 || td2.seconds != 3600 || td2.microseconds != 500000) {
5962 throw std::runtime_error("PyTimedelta parameterized constructor failed");
to_series (pd_test_3_all.cpp:5788)
5778 throw std::runtime_error("to_frame use_index should be false when index=false");
5779 }
5780 if (frame3.column_name != "0") {
5781 throw std::runtime_error("to_frame column_name should be '0' when no name");
5782 }
5783
5784 std::cout << " -> tests passed" << std::endl;
5785}
5786
5787void pd_test_3_all_period_index_to_series() {
5788 std::cout << "========= PeriodIndex.to_series() =====================";
5789
5790 pandas::PeriodIndex idx = make_period_index({1, 2, 3}, "M").rename("periods");
5791
5792 // Test to_series() with default parameters
5793 pandas::PeriodIndex::SeriesData series = idx.to_series();
5794
5795 // values should have same size
5796 if (series.values.size() != 3) {
5797 throw std::runtime_error("to_series values size should be 3");
5798 }
to_string (pd_test_1_all.cpp:2693)
2683 pandas::PeriodArray arr_m(std::vector<std::string>{
2684 "2020-01",
2685 "NaT",
2686 "2025-06"
2687 }, "M");
2688
2689 // Year
2690 auto years = arr_m.year();
2691 auto y0 = years[0];
2692 if (!y0.has_value() || y0.value() != 2020) {
2693 std::cout << " [FAIL] : year[0] should be 2020, got " << (y0.has_value() ? std::to_string(y0.value()) : "NA") << std::endl;
2694 throw std::runtime_error("pd_test_period_array_year_month_quarter failed: year[0]");
2695 }
2696
2697 auto y1 = years[1];
2698 if (y1.has_value()) {
2699 std::cout << " [FAIL] : year[1] should be NA (NaT)" << std::endl;
2700 throw std::runtime_error("pd_test_period_array_year_month_quarter failed: year[1] should be NA");
2701 }
2702
2703 auto y2 = years[2];
tolist (pd_test_3_all.cpp:2300)
2290 threw = true;
2291 }
2292 if (!threw) {
2293 throw std::runtime_error("swapaxes should throw for invalid axes");
2294 }
2295
2296 std::cout << " -> tests passed" << std::endl;
2297}
2298
2299void pd_test_3_all_categorical_to_list() {
2300 std::cout << "========= CategoricalArray.to_list()/tolist() =========";
2301
2302 std::vector<std::optional<std::string>> values = {"a", "b", std::nullopt, "c"};
2303 pandas::CategoricalArray arr(values);
2304
2305 auto list = arr.to_list();
2306 if (list.size() != 4 || *list[0] != "a" || *list[1] != "b" ||
2307 list[2].has_value() || *list[3] != "c") {
2308 throw std::runtime_error("to_list failed");
2309 }
astype (pd_test_1_all.cpp:21292)
21282 std::cout << "========= astype all columns to float64 =============";
21283
21284 // Create DataFrame with int64 columns
21285 std::map<std::string, std::vector<numpy::int64>> data;
21286 data["A"] = {1, 2, 3, 4, 5};
21287 data["B"] = {10, 20, 30, 40, 50};
21288
21289 pandas::DataFrame df(data);
21290
21291 // Convert all columns to float64
21292 pandas::DataFrame df_float = df.astype("float64");
21293
21294 // Verify dtype changed
21295 pandas::Series<std::string> dtypes = df_float.dtypes();
21296
21297 bool passed = true;
21298 if (dtypes[static_cast<size_t>(0)] != "float64") {
21299 std::cout << " [FAIL] : in pd_test_astype_all_columns_to_float64() : column A dtype is " << dtypes[static_cast<size_t>(0)] << ", expected float64" << std::endl;
21300 passed = false;
21301 }
21302 if (dtypes[static_cast<size_t>(1)] != "float64") {
astype_int64 (pd_test_3_all.cpp:5178)
5168 auto locs = idx.asof_locs({3, 4, 10});
5169 if (locs.size() != 3) {
5170 throw std::runtime_error("asof_locs should return 3 locations");
5171 }
5172
5173 std::cout << " -> tests passed" << std::endl;
5174}
5175
5176void pd_test_3_all_period_index_astype() {
5177 std::cout << "========= PeriodIndex.astype_int64() ==================";
5178
5179 pandas::PeriodIndex idx = make_period_index({0, 1, 2}, "D");
5180 auto int_arr = idx.astype_int64();
5181
5182 if (int_arr.getSize() != 3) {
5183 throw std::runtime_error("astype_int64 should return 3 elements");
5184 }
5185 if (int_arr.getElementAt({0}) != 0 || int_arr.getElementAt({1}) != 1) {
5186 throw std::runtime_error("astype_int64 should preserve ordinal values");
5187 }
copy (pd_test_1_all.cpp:5798)
5788// ============================================================================
5789// Copy/Rename Tests
5790// ============================================================================
5791
5792void pd_test_categorical_index_copy() {
5793 std::cout << "========= copy ========================================";
5794
5795 pandas::CategoricalArray arr({"a", "b", "c"});
5796 pandas::CategoricalIndex idx(arr, "original");
5797
5798 pandas::CategoricalIndex copied = idx.copy();
5799
5800 bool passed = (copied.size() == idx.size() && copied.name() == idx.name() &&
5801 copied.categories() == idx.categories() && copied.ordered() == idx.ordered());
5802 if (!passed) {
5803 std::cout << " [FAIL] : in pd_test_categorical_index_copy()" << std::endl;
5804 throw std::runtime_error("pd_test_categorical_index_copy failed");
5805 }
5806
5807 std::cout << " -> tests passed" << std::endl;
5808}
infer_objects (pd_test_1_all.cpp:27595)
27585 // Create DataFrame with string column containing integers
27586 std::map<std::string, std::vector<std::string>> data;
27587 data["A"] = {"1", "2", "3", "4", "5"};
27588
27589 pandas::DataFrame df(data);
27590
27591 // Before inference, dtype should be string/object
27592 std::string before_dtype = df["A"].dtype_name();
27593
27594 // Apply infer_objects
27595 pandas::DataFrame result = df.infer_objects();
27596
27597 // After inference, dtype should be int64
27598 std::string after_dtype = result["A"].dtype_name();
27599
27600 bool passed = (after_dtype == "int64");
27601 if (!passed) {
27602 std::cout << " [FAIL] : in pd_test_infer_objects_integer_column() : expected int64, got " << after_dtype << std::endl;
27603 throw std::runtime_error("pd_test_infer_objects_integer_column failed");
27604 }
view (pd_test_3_all.cpp:2147)
2137 throw std::runtime_error("memory_usage shallow too small");
2138 }
2139 if (deep < shallow) {
2140 throw std::runtime_error("memory_usage deep should be >= shallow");
2141 }
2142
2143 std::cout << " -> tests passed" << std::endl;
2144}
2145
2146void pd_test_3_all_categorical_ravel_view() {
2147 std::cout << "========= CategoricalArray.ravel()/view() =============";
2148
2149 std::vector<std::optional<std::string>> values = {"a", "b", "c"};
2150 pandas::CategoricalArray arr(values);
2151
2152 auto raveled = arr.ravel();
2153 if (raveled.size() != 3 || !raveled.equals(arr)) {
2154 throw std::runtime_error("ravel failed");
2155 }
2156
2157 auto viewed = arr.view();
duplicated (pd_test_1_all.cpp:10583)
10573 std::cout << " -> tests passed" << std::endl;
10574}
10575
10576void pd_test_extension_index_duplicated() {
10577 std::cout << "========= duplicated =========================";
10578
10579 pandas::CategoricalArray arr({"a", "b", "a", "c", "a"});
10580 pandas::CategoricalIndex idx(arr);
10581
10582 auto dup_mask = idx.duplicated("first");
10583
10584 bool passed = (dup_mask.getElementAt({0}) == false &&
10585 dup_mask.getElementAt({1}) == false &&
10586 dup_mask.getElementAt({2}) == true &&
10587 dup_mask.getElementAt({3}) == false &&
10588 dup_mask.getElementAt({4}) == true);
10589 if (!passed) {
10590 std::cout << " [FAIL] : in pd_test_extension_index_duplicated() : duplicated check failed" << std::endl;
10591 throw std::runtime_error("pd_test_extension_index_duplicated failed");
10592 }
intersection (pd_test_1_all.cpp:10672)
10662 std::cout << "========= intersection =========================";
10663
10664 // Use same categories for both arrays
10665 std::vector<std::string> cats = {"a", "b", "c", "d", "e", "f"};
10666 pandas::CategoricalArray arr1({"a", "b", "c", "d"}, cats);
10667 pandas::CategoricalIndex idx1(arr1);
10668
10669 pandas::CategoricalArray arr2({"b", "c", "e", "f"}, cats);
10670 pandas::CategoricalIndex idx2(arr2);
10671
10672 auto inter = idx1.intersection(idx2);
10673
10674 bool passed = (inter.size() == 2 && inter.contains("b") && inter.contains("c"));
10675 if (!passed) {
10676 std::cout << " [FAIL] : in pd_test_extension_index_intersection() : intersection check failed" << std::endl;
10677 throw std::runtime_error("pd_test_extension_index_intersection failed");
10678 }
10679
10680 std::cout << " -> tests passed" << std::endl;
10681}
isin (pd_test_1_all.cpp:5938)
5928 std::cout << " -> tests passed" << std::endl;
5929}
5930
5931void pd_test_categorical_index_isin() {
5932 std::cout << "========= inherited isin ==============================";
5933
5934 pandas::CategoricalArray arr({"a", "b", "c", "d"});
5935 pandas::CategoricalIndex idx(arr);
5936
5937 std::vector<std::string> values = {"a", "c"};
5938 numpy::NDArray<numpy::bool_> mask = idx.isin(values);
5939
5940 bool passed = (mask.getSize() == 4 &&
5941 mask.getElementAt({0}) == true && // a
5942 mask.getElementAt({1}) == false && // b
5943 mask.getElementAt({2}) == true && // c
5944 mask.getElementAt({3}) == false); // d
5945 if (!passed) {
5946 std::cout << " [FAIL] : in pd_test_categorical_index_isin()" << std::endl;
5947 throw std::runtime_error("pd_test_categorical_index_isin failed");
5948 }
symmetric_difference (pd_test_1_all.cpp:10742)
10732 std::cout << "========= symmetric_difference =========================";
10733
10734 // Use same categories for both arrays
10735 std::vector<std::string> cats = {"a", "b", "c", "d"};
10736 pandas::CategoricalArray arr1({"a", "b", "c"}, cats);
10737 pandas::CategoricalIndex idx1(arr1);
10738
10739 pandas::CategoricalArray arr2({"b", "c", "d"}, cats);
10740 pandas::CategoricalIndex idx2(arr2);
10741
10742 auto sym_diff = idx1.symmetric_difference(idx2);
10743
10744 bool passed = (sym_diff.size() == 2 &&
10745 sym_diff.contains("a") && sym_diff.contains("d") &&
10746 !sym_diff.contains("b") && !sym_diff.contains("c"));
10747 if (!passed) {
10748 std::cout << " [FAIL] : in pd_test_extension_index_symmetric_difference() : symmetric_difference check failed" << std::endl;
10749 throw std::runtime_error("pd_test_extension_index_symmetric_difference failed");
10750 }
10751
10752 std::cout << " -> tests passed" << std::endl;
union_ (pd_test_1_all.cpp:10694)
10684 std::cout << "========= union =========================";
10685
10686 // Use same categories for both arrays
10687 std::vector<std::string> cats = {"a", "b", "c", "d", "e"};
10688 pandas::CategoricalArray arr1({"a", "b", "c"}, cats);
10689 pandas::CategoricalIndex idx1(arr1);
10690
10691 pandas::CategoricalArray arr2({"b", "c", "d", "e"}, cats);
10692 pandas::CategoricalIndex idx2(arr2);
10693
10694 auto uni = idx1.union_(idx2);
10695
10696 bool passed = (uni.size() == 5 &&
10697 uni.contains("a") && uni.contains("b") && uni.contains("c") &&
10698 uni.contains("d") && uni.contains("e"));
10699 if (!passed) {
10700 std::cout << " [FAIL] : in pd_test_extension_index_union() : union check failed" << std::endl;
10701 throw std::runtime_error("pd_test_extension_index_union failed");
10702 }
10703
10704 std::cout << " -> tests passed" << std::endl;
unique (pd_test_1_all.cpp:1345)
1335 pandas::DatetimeArray arr(std::vector<std::string>{
1336 "2023-01-01",
1337 "2023-06-15",
1338 "2023-01-01",
1339 "NaT",
1340 "2023-06-15",
1341 "NaT"
1342 });
1343
1344 // unique
1345 auto uniq = arr.unique();
1346 // Should have: NaT, 2023-01-01, 2023-06-15 (3 unique values)
1347 if (uniq.size() != 3) {
1348 std::cout << " [FAIL] : unique size should be 3, got " << uniq.size() << std::endl;
1349 throw std::runtime_error("pd_test_datetime_array_unique failed: size");
1350 }
1351
1352 // factorize
1353 auto [codes, uniques] = arr.factorize();
1354 // Codes for NaT should be -1
1355 if (codes.getElementAt({3}) != -1) {
is_ (pd_test_3_all.cpp:3972)
3962 // For typed Index, this is a no-op
3963 if (result.size() != 5) {
3964 throw std::runtime_error("infer_objects size should be 5");
3965 }
3966
3967 std::cout << " -> tests passed" << std::endl;
3968}
3969
3970void pd_test_3_all_index_is_() {
3971 std::cout << "========= Index.is_() ==============================";
3972
3973 pandas::Index<numpy::int64> idx1({1, 2, 3, 4, 5});
3974 pandas::Index<numpy::int64> idx2({1, 2, 3, 4, 5}); // Different object
3975
3976 // Different objects should not be the same
3977 if (idx1.is_(idx2)) {
3978 throw std::runtime_error("different objects should not be is_() equal");
3979 }
3980
3981 // Same object should be the same
is_boolean (pd_test_3_all.cpp:3290)
3280 std::cout << " -> tests passed" << std::endl;
3281}
3282
3283void pd_test_3_all_datetime_index_type_checks() {
3284 std::cout << "========= DatetimeIndex type checks ======================";
3285
3286 pandas::DatetimeIndex idx = pandas::date_range("2024-01-01", "2024-01-05", std::nullopt, "D");
3287
3288 // Type check methods
3289 if (idx.is_boolean()) {
3290 throw std::runtime_error("is_boolean() should be false");
3291 }
3292 if (idx.is_categorical()) {
3293 throw std::runtime_error("is_categorical() should be false");
3294 }
3295 if (idx.is_floating()) {
3296 throw std::runtime_error("is_floating() should be false");
3297 }
3298 if (idx.is_integer()) {
3299 throw std::runtime_error("is_integer() should be false");
is_categorical (pd_test_3_all.cpp:3293)
3283void pd_test_3_all_datetime_index_type_checks() {
3284 std::cout << "========= DatetimeIndex type checks ======================";
3285
3286 pandas::DatetimeIndex idx = pandas::date_range("2024-01-01", "2024-01-05", std::nullopt, "D");
3287
3288 // Type check methods
3289 if (idx.is_boolean()) {
3290 throw std::runtime_error("is_boolean() should be false");
3291 }
3292 if (idx.is_categorical()) {
3293 throw std::runtime_error("is_categorical() should be false");
3294 }
3295 if (idx.is_floating()) {
3296 throw std::runtime_error("is_floating() should be false");
3297 }
3298 if (idx.is_integer()) {
3299 throw std::runtime_error("is_integer() should be false");
3300 }
3301 if (idx.is_interval()) {
3302 throw std::runtime_error("is_interval() should be false");
is_floating (pd_test_3_all.cpp:622)
612 // Test with integer index
613 pandas::IndexDtype<numpy::int64> int_dtype;
614 if (!int_dtype.is_numeric()) {
615 std::cout << " [FAIL] : in pd_test_3_all_index_dtype_checks() : int should be numeric" << std::endl;
616 throw std::runtime_error("pd_test_3_all_index_dtype_checks failed: int is_numeric");
617 }
618 if (!int_dtype.is_integer()) {
619 std::cout << " [FAIL] : in pd_test_3_all_index_dtype_checks() : int should be integer" << std::endl;
620 throw std::runtime_error("pd_test_3_all_index_dtype_checks failed: int is_integer");
621 }
622 if (int_dtype.is_floating()) {
623 std::cout << " [FAIL] : in pd_test_3_all_index_dtype_checks() : int should not be floating" << std::endl;
624 throw std::runtime_error("pd_test_3_all_index_dtype_checks failed: int is_floating");
625 }
626 if (int_dtype.is_object()) {
627 std::cout << " [FAIL] : in pd_test_3_all_index_dtype_checks() : int should not be object" << std::endl;
628 throw std::runtime_error("pd_test_3_all_index_dtype_checks failed: int is_object");
629 }
630
631 // Test with float index
632 pandas::IndexDtype<double> float_dtype;
is_integer (pd_test_3_all.cpp:618)
608void pd_test_3_all_index_dtype_checks() {
609 std::cout << "========= IndexDtype.is_numeric/integer/floating/object() ";
610
611 // Test with integer index
612 pandas::IndexDtype<numpy::int64> int_dtype;
613 if (!int_dtype.is_numeric()) {
614 std::cout << " [FAIL] : in pd_test_3_all_index_dtype_checks() : int should be numeric" << std::endl;
615 throw std::runtime_error("pd_test_3_all_index_dtype_checks failed: int is_numeric");
616 }
617 if (!int_dtype.is_integer()) {
618 std::cout << " [FAIL] : in pd_test_3_all_index_dtype_checks() : int should be integer" << std::endl;
619 throw std::runtime_error("pd_test_3_all_index_dtype_checks failed: int is_integer");
620 }
621 if (int_dtype.is_floating()) {
622 std::cout << " [FAIL] : in pd_test_3_all_index_dtype_checks() : int should not be floating" << std::endl;
623 throw std::runtime_error("pd_test_3_all_index_dtype_checks failed: int is_floating");
624 }
625 if (int_dtype.is_object()) {
626 std::cout << " [FAIL] : in pd_test_3_all_index_dtype_checks() : int should not be object" << std::endl;
627 throw std::runtime_error("pd_test_3_all_index_dtype_checks failed: int is_object");
is_interval (pd_test_3_all.cpp:3302)
3292 }
3293 if (idx.is_categorical()) {
3294 throw std::runtime_error("is_categorical() should be false");
3295 }
3296 if (idx.is_floating()) {
3297 throw std::runtime_error("is_floating() should be false");
3298 }
3299 if (idx.is_integer()) {
3300 throw std::runtime_error("is_integer() should be false");
3301 }
3302 if (idx.is_interval()) {
3303 throw std::runtime_error("is_interval() should be false");
3304 }
3305 if (idx.is_numeric()) {
3306 throw std::runtime_error("is_numeric() should be false");
3307 }
3308 if (idx.is_object()) {
3309 throw std::runtime_error("is_object() should be false");
3310 }
3311 if (idx.holds_integer()) {
3312 throw std::runtime_error("holds_integer() should be false");
is_numeric (pd_test_3_all.cpp:614)
604// ============================================================================
605// Category 4: Index Type Checking (IndexDtype)
606// ============================================================================
607
608void pd_test_3_all_index_dtype_checks() {
609 std::cout << "========= IndexDtype.is_numeric/integer/floating/object() ";
610
611 // Test with integer index
612 pandas::IndexDtype<numpy::int64> int_dtype;
613 if (!int_dtype.is_numeric()) {
614 std::cout << " [FAIL] : in pd_test_3_all_index_dtype_checks() : int should be numeric" << std::endl;
615 throw std::runtime_error("pd_test_3_all_index_dtype_checks failed: int is_numeric");
616 }
617 if (!int_dtype.is_integer()) {
618 std::cout << " [FAIL] : in pd_test_3_all_index_dtype_checks() : int should be integer" << std::endl;
619 throw std::runtime_error("pd_test_3_all_index_dtype_checks failed: int is_integer");
620 }
621 if (int_dtype.is_floating()) {
622 std::cout << " [FAIL] : in pd_test_3_all_index_dtype_checks() : int should not be floating" << std::endl;
623 throw std::runtime_error("pd_test_3_all_index_dtype_checks failed: int is_floating");
is_object (pd_test_3_all.cpp:626)
616 throw std::runtime_error("pd_test_3_all_index_dtype_checks failed: int is_numeric");
617 }
618 if (!int_dtype.is_integer()) {
619 std::cout << " [FAIL] : in pd_test_3_all_index_dtype_checks() : int should be integer" << std::endl;
620 throw std::runtime_error("pd_test_3_all_index_dtype_checks failed: int is_integer");
621 }
622 if (int_dtype.is_floating()) {
623 std::cout << " [FAIL] : in pd_test_3_all_index_dtype_checks() : int should not be floating" << std::endl;
624 throw std::runtime_error("pd_test_3_all_index_dtype_checks failed: int is_floating");
625 }
626 if (int_dtype.is_object()) {
627 std::cout << " [FAIL] : in pd_test_3_all_index_dtype_checks() : int should not be object" << std::endl;
628 throw std::runtime_error("pd_test_3_all_index_dtype_checks failed: int is_object");
629 }
630
631 // Test with float index
632 pandas::IndexDtype<double> float_dtype;
633 if (!float_dtype.is_numeric()) {
634 std::cout << " [FAIL] : in pd_test_3_all_index_dtype_checks() : float should be numeric" << std::endl;
635 throw std::runtime_error("pd_test_3_all_index_dtype_checks failed: float is_numeric");
636 }
all (pd_test_1_all.cpp:247)
237 pandas::BooleanArray has_true({
238 std::optional<bool>(false),
239 std::optional<bool>(true)
240 });
241 any_result = has_true.any();
242 if (!any_result.has_value() || !any_result.value()) {
243 std::cout << " [FAIL] : in pd_test_boolean_array_reductions() : any() with True" << std::endl;
244 throw std::runtime_error("pd_test_boolean_array_reductions failed: any() with True");
245 }
246
247 // Test all()
248 pandas::BooleanArray all_true({
249 std::optional<bool>(true),
250 std::optional<bool>(true)
251 });
252 auto all_result = all_true.all();
253 if (!all_result.has_value() || !all_result.value()) {
254 std::cout << " [FAIL] : in pd_test_boolean_array_reductions() : all() of all True" << std::endl;
255 throw std::runtime_error("pd_test_boolean_array_reductions failed: all() all True");
256 }
any (pd_test_1_all.cpp:226)
216 std::cout << " [FAIL] : in pd_test_boolean_array_kleene_not() : ~NA should be NA" << std::endl;
217 throw std::runtime_error("pd_test_boolean_array_kleene_not failed: ~NA");
218 }
219
220 std::cout << " -> tests passed" << std::endl;
221 }
222
223 void pd_test_boolean_array_reductions() {
224 std::cout << "========= BooleanArray: reductions ======================= ";
225
226 // Test any()
227 pandas::BooleanArray all_false({
228 std::optional<bool>(false),
229 std::optional<bool>(false)
230 });
231 auto any_result = all_false.any();
232 if (!any_result.has_value() || any_result.value()) {
233 std::cout << " [FAIL] : in pd_test_boolean_array_reductions() : any() of all False" << std::endl;
234 throw std::runtime_error("pd_test_boolean_array_reductions failed: any() all False");
235 }
argmax (pd_test_1_all.cpp:1323)
1313 }
1314
1315 // argmin
1316 auto min_idx = arr.argmin();
1317 if (!min_idx.has_value() || min_idx.value() != 2) {
1318 std::cout << " [FAIL] : argmin should be 2 (2023-01-01)" << std::endl;
1319 throw std::runtime_error("pd_test_datetime_array_sorting failed: argmin");
1320 }
1321
1322 // argmax
1323 auto max_idx = arr.argmax();
1324 if (!max_idx.has_value() || max_idx.value() != 3) {
1325 std::cout << " [FAIL] : argmax should be 3 (2023-12-31)" << std::endl;
1326 throw std::runtime_error("pd_test_datetime_array_sorting failed: argmax");
1327 }
1328
1329 std::cout << " -> tests passed" << std::endl;
1330 }
1331
1332 void pd_test_datetime_array_unique() {
1333 std::cout << "========= DatetimeArray: unique/factorize ======================= ";
argmin (pd_test_1_all.cpp:1316)
1306 if (indices.getElementAt({0}) != 2) {
1307 std::cout << " [FAIL] : argsort: first should be index 2 (2023-01-01)" << std::endl;
1308 throw std::runtime_error("pd_test_datetime_array_sorting failed: argsort first");
1309 }
1310 if (indices.getElementAt({3}) != 1) {
1311 std::cout << " [FAIL] : argsort: last should be index 1 (NaT)" << std::endl;
1312 throw std::runtime_error("pd_test_datetime_array_sorting failed: NaT position");
1313 }
1314
1315 // argmin
1316 auto min_idx = arr.argmin();
1317 if (!min_idx.has_value() || min_idx.value() != 2) {
1318 std::cout << " [FAIL] : argmin should be 2 (2023-01-01)" << std::endl;
1319 throw std::runtime_error("pd_test_datetime_array_sorting failed: argmin");
1320 }
1321
1322 // argmax
1323 auto max_idx = arr.argmax();
1324 if (!max_idx.has_value() || max_idx.value() != 3) {
1325 std::cout << " [FAIL] : argmax should be 3 (2023-12-31)" << std::endl;
1326 throw std::runtime_error("pd_test_datetime_array_sorting failed: argmax");
arr (pd_test_1_all.cpp:45)
35 std::cout << " [FAIL] : in pd_test_boolean_array_constructors() : initializer_list size != 4" << std::endl;
36 throw std::runtime_error("pd_test_boolean_array_constructors failed: initializer_list size != 4");
37 }
38
39 std::cout << " -> tests passed" << std::endl;
40 }
41
42 void pd_test_boolean_array_na_handling() {
43 std::cout << "========= BooleanArray: NA handling ======================= ";
44
45 pandas::BooleanArray arr({
46 std::optional<bool>(true),
47 std::nullopt, // NA at index 1
48 std::optional<bool>(false)
49 });
50
51 if (!arr.is_na(1)) {
52 std::cout << " [FAIL] : in pd_test_boolean_array_na_handling() : is_na(1) should be true" << std::endl;
53 throw std::runtime_error("pd_test_boolean_array_na_handling failed: is_na(1) should be true");
54 }
arr (pd_test_1_all.cpp:45)
35 std::cout << " [FAIL] : in pd_test_boolean_array_constructors() : initializer_list size != 4" << std::endl;
36 throw std::runtime_error("pd_test_boolean_array_constructors failed: initializer_list size != 4");
37 }
38
39 std::cout << " -> tests passed" << std::endl;
40 }
41
42 void pd_test_boolean_array_na_handling() {
43 std::cout << "========= BooleanArray: NA handling ======================= ";
44
45 pandas::BooleanArray arr({
46 std::optional<bool>(true),
47 std::nullopt, // NA at index 1
48 std::optional<bool>(false)
49 });
50
51 if (!arr.is_na(1)) {
52 std::cout << " [FAIL] : in pd_test_boolean_array_na_handling() : is_na(1) should be true" << std::endl;
53 throw std::runtime_error("pd_test_boolean_array_na_handling failed: is_na(1) should be true");
54 }
as_unit (pd_test_1_all.cpp:9361)
9351 data.setElementAt({1}, numpy::datetime64(2000000000LL, numpy::DateTimeUnit::Nanosecond)); // 2 seconds in ns
9352
9353 numpy::NDArray<numpy::bool_> mask(std::vector<size_t>{2});
9354 mask.setElementAt({0}, numpy::bool_(false));
9355 mask.setElementAt({1}, numpy::bool_(false));
9356
9357 pandas::DatetimeArray arr(data, mask);
9358 pandas::DatetimeTDMixin idx(arr, "test");
9359
9360 // Convert to microseconds
9361 pandas::DatetimeTDMixin us_idx = idx.as_unit("us");
9362
9363 // Convert to same unit (should return identical)
9364 pandas::DatetimeTDMixin same_idx = idx.as_unit("ns");
9365
9366 bool passed = (us_idx.size() == 2 && same_idx.size() == 2 &&
9367 us_idx.name().has_value() && *us_idx.name() == "test");
9368 if (!passed) {
9369 std::cout << " [FAIL] : in pd_test_datetime_as_unit() : as_unit check failed" << std::endl;
9370 throw std::runtime_error("pd_test_datetime_as_unit failed");
9371 }
ceil (pd_test_1_all.cpp:4949)
4939 throw std::runtime_error("pd_test_arithmetic_series_round failed: round failed");
4940 }
4941
4942 auto f = a.floor();
4943 passed = std::abs(f[0] - 1.0) < 0.001 && std::abs(f[2] - 3.0) < 0.001 && std::abs(f[3] - (-2.0)) < 0.001;
4944 if (!passed) {
4945 std::cout << " [FAIL] : in pd_test_arithmetic_series_round() : floor failed" << std::endl;
4946 throw std::runtime_error("pd_test_arithmetic_series_round failed: floor failed");
4947 }
4948
4949 auto c = a.ceil();
4950 passed = std::abs(c[0] - 2.0) < 0.001 && std::abs(c[2] - 4.0) < 0.001 && std::abs(c[3] - (-1.0)) < 0.001;
4951 if (!passed) {
4952 std::cout << " [FAIL] : in pd_test_arithmetic_series_round() : ceil failed" << std::endl;
4953 throw std::runtime_error("pd_test_arithmetic_series_round failed: ceil failed");
4954 }
4955
4956 // Round with decimals
4957 pandas::Series<double> b({1.234, 2.567, 3.891});
4958 auto r2 = b.round(2);
4959 passed = std::abs(r2[0] - 1.23) < 0.001 && std::abs(r2[1] - 2.57) < 0.001;
clone (pd_test_1_all.cpp:5776)
5766 std::cout << " -> tests passed" << std::endl;
5767}
5768
5769void pd_test_categorical_index_clone() {
5770 std::cout << "========= clone =======================================";
5771
5772 pandas::CategoricalArray arr({"p", "q", "r"});
5773 pandas::CategoricalIndex idx(arr, "original");
5774
5775 std::unique_ptr<pandas::IndexBase> cloned = idx.clone();
5776
5777 bool passed = (cloned != nullptr && cloned->size() == idx.size() &&
5778 cloned->name() == idx.name());
5779 if (!passed) {
5780 std::cout << " [FAIL] : in pd_test_categorical_index_clone()" << std::endl;
5781 throw std::runtime_error("pd_test_categorical_index_clone failed");
5782 }
5783
5784 std::cout << " -> tests passed" << std::endl;
5785}
components (pd_test_1_all.cpp:19750)
19740void pd_test_timedelta_index_components() {
19741 std::cout << "========= components property =========================";
19742
19743 std::vector<std::optional<numpy::timedelta64>> values = {
19744 make_td(NS_PER_DAY + 2 * NS_PER_HOUR + 30 * NS_PER_MIN + 15 * NS_PER_SEC)
19745 };
19746 pandas::TimedeltaArray arr(values);
19747 pandas::TimedeltaIndex idx(arr);
19748
19749 auto comps = idx.components();
19750
19751 bool passed = (comps.days.size() == 1 &&
19752 comps.hours.size() == 1 &&
19753 comps.minutes.size() == 1 &&
19754 comps.seconds.size() == 1);
19755
19756 auto d = comps.days[0];
19757 auto h = comps.hours[0];
19758 auto m = comps.minutes[0];
19759 auto s = comps.seconds[0];
days (pd_test_1_all.cpp:4160)
4150 void pd_test_timedelta_array_component_days() {
4151 std::cout << "========= TimedeltaArray: days component ======================= ";
4152
4153 pandas::TimedeltaArray arr({
4154 std::optional<numpy::timedelta64>(numpy::timedelta64(3, numpy::DateTimeUnit::Day)),
4155 std::nullopt,
4156 std::optional<numpy::timedelta64>(numpy::timedelta64(36, numpy::DateTimeUnit::Hour)) // 1.5 days
4157 });
4158
4159 auto days_arr = arr.days();
4160
4161 auto d0 = days_arr[0];
4162 if (!d0.has_value() || d0.value() != 3) {
4163 std::cout << " [FAIL] : days[0] should be 3" << std::endl;
4164 throw std::runtime_error("pd_test_timedelta_array_component_days failed: days[0]");
4165 }
4166
4167 auto d1 = days_arr[1];
4168 if (d1.has_value()) {
4169 std::cout << " [FAIL] : days[1] should be NA (NaT)" << std::endl;
delete_ (pd_test_1_all.cpp:10501)
10491 std::cout << " -> tests passed" << std::endl;
10492}
10493
10494void pd_test_extension_index_delete() {
10495 std::cout << "========= delete_ =========================";
10496
10497 pandas::CategoricalArray arr({"a", "b", "c", "d"});
10498 pandas::CategoricalIndex idx(arr);
10499
10500 auto deleted = idx.delete_(1);
10501 auto v0 = deleted[0];
10502 auto v1 = deleted[1];
10503 auto v2 = deleted[2];
10504
10505 bool passed = (deleted.size() == 3 &&
10506 v0.has_value() && *v0 == "a" &&
10507 v1.has_value() && *v1 == "c" &&
10508 v2.has_value() && *v2 == "d");
10509 if (!passed) {
10510 std::cout << " [FAIL] : in pd_test_extension_index_delete() : delete_ check failed" << std::endl;
delete_ (pd_test_1_all.cpp:10501)
10491 std::cout << " -> tests passed" << std::endl;
10492}
10493
10494void pd_test_extension_index_delete() {
10495 std::cout << "========= delete_ =========================";
10496
10497 pandas::CategoricalArray arr({"a", "b", "c", "d"});
10498 pandas::CategoricalIndex idx(arr);
10499
10500 auto deleted = idx.delete_(1);
10501 auto v0 = deleted[0];
10502 auto v1 = deleted[1];
10503 auto v2 = deleted[2];
10504
10505 bool passed = (deleted.size() == 3 &&
10506 v0.has_value() && *v0 == "a" &&
10507 v1.has_value() && *v1 == "c" &&
10508 v2.has_value() && *v2 == "d");
10509 if (!passed) {
10510 std::cout << " [FAIL] : in pd_test_extension_index_delete() : delete_ check failed" << std::endl;
dtype_str (pd_test_1_all.cpp:17251)
17241 std::cout << " -> tests passed" << std::endl;
17242}
17243
17244void pd_test_period_index_dtype_str() {
17245 std::cout << "========= dtype_str property ==========================";
17246
17247 std::vector<int64_t> ordinals = {0, 1};
17248 pandas::PeriodIndex idx = pandas::PeriodIndex::from_ordinals(ordinals, "M");
17249
17250 std::string dt_str = idx.dtype_str();
17251
17252 bool passed = (dt_str.find("period[") != std::string::npos);
17253 if (!passed) {
17254 std::cout << " [FAIL] : in pd_test_period_index_dtype_str() got: " << dt_str << std::endl;
17255 throw std::runtime_error("pd_test_period_index_dtype_str failed");
17256 }
17257
17258 std::cout << " -> tests passed" << std::endl;
17259}
factorize (pd_test_1_all.cpp:1353)
1343 // unique
1344 auto uniq = arr.unique();
1345 // Should have: NaT, 2023-01-01, 2023-06-15 (3 unique values)
1346 if (uniq.size() != 3) {
1347 std::cout << " [FAIL] : unique size should be 3, got " << uniq.size() << std::endl;
1348 throw std::runtime_error("pd_test_datetime_array_unique failed: size");
1349 }
1350
1351 // factorize
1352 auto [codes, uniques] = arr.factorize();
1353 // Codes for NaT should be -1
1354 if (codes.getElementAt({3}) != -1) {
1355 std::cout << " [FAIL] : factorize: NaT code should be -1" << std::endl;
1356 throw std::runtime_error("pd_test_datetime_array_unique failed: NaT code");
1357 }
1358 // Same values should have same codes
1359 if (codes.getElementAt({0}) != codes.getElementAt({2})) {
1360 std::cout << " [FAIL] : factorize: 2023-01-01 values should have same code" << std::endl;
1361 throw std::runtime_error("pd_test_datetime_array_unique failed: same code");
1362 }
floor (pd_test_1_all.cpp:4942)
4932 pandas::Series<double> a({1.4, 2.5, 3.6, -1.4, -2.5});
4933
4934 auto r = a.round();
4935 bool passed = std::abs(r[0] - 1.0) < 0.001 && std::abs(r[2] - 4.0) < 0.001;
4936 if (!passed) {
4937 std::cout << " [FAIL] : in pd_test_arithmetic_series_round() : round failed" << std::endl;
4938 throw std::runtime_error("pd_test_arithmetic_series_round failed: round failed");
4939 }
4940
4941 auto f = a.floor();
4942 passed = std::abs(f[0] - 1.0) < 0.001 && std::abs(f[2] - 3.0) < 0.001 && std::abs(f[3] - (-2.0)) < 0.001;
4943 if (!passed) {
4944 std::cout << " [FAIL] : in pd_test_arithmetic_series_round() : floor failed" << std::endl;
4945 throw std::runtime_error("pd_test_arithmetic_series_round failed: floor failed");
4946 }
4947
4948 auto c = a.ceil();
4949 passed = std::abs(c[0] - 2.0) < 0.001 && std::abs(c[2] - 4.0) < 0.001 && std::abs(c[3] - (-1.0)) < 0.001;
4950 if (!passed) {
4951 std::cout << " [FAIL] : in pd_test_arithmetic_series_round() : ceil failed" << std::endl;
format (main.cpp:20)
10int main() {
11 // Automatically log all output to temp/pd_test_output.log
12 numpy::TestLogger logger("temp/pd_test_output.log");
13
14 int res = 0;
15 int res1 = 0;
16 std::string resS = "";
17
18 // call all the tests
19 res1 = dataframe_tests::pd_test_main();
20 resS += std::format(" pd_test_main: {} errors\n", res1);
21 res += res1;
22
23 std::cout << "\n------------------------- main --------------------------------------------\n";
24 std::cout << std::endl << "All tests completed. Nb errors = " << res << std::endl;
25 std::cout << "Details: \n" << resS;
26 std::cout << "\n---------------------------------------------------------------------------\n";
27 return res;
28}
holds_integer (pd_test_3_all.cpp:3311)
3301 }
3302 if (idx.is_interval()) {
3303 throw std::runtime_error("is_interval() should be false");
3304 }
3305 if (idx.is_numeric()) {
3306 throw std::runtime_error("is_numeric() should be false");
3307 }
3308 if (idx.is_object()) {
3309 throw std::runtime_error("is_object() should be false");
3310 }
3311 if (idx.holds_integer()) {
3312 throw std::runtime_error("holds_integer() should be false");
3313 }
3314
3315 std::cout << " -> tests passed" << std::endl;
3316}
3317
3318void pd_test_3_all_datetime_index_sort() {
3319 std::cout << "========= DatetimeIndex.sort_values() ====================";
3320
3321 pandas::DatetimeIndex idx = pandas::date_range("2024-01-01", "2024-01-05", std::nullopt, "D");
identical (pd_test_1_all.cpp:5883)
5873}
5874
5875void pd_test_categorical_index_identical() {
5876 std::cout << "========= identical ===================================";
5877
5878 pandas::CategoricalArray arr({"a", "b"});
5879 pandas::CategoricalIndex idx1(arr, "same_name");
5880 pandas::CategoricalIndex idx2(arr, "same_name");
5881 pandas::CategoricalIndex idx3(arr, "diff_name");
5882
5883 bool passed = (idx1.identical(idx2) && !idx1.identical(idx3));
5884 if (!passed) {
5885 std::cout << " [FAIL] : in pd_test_categorical_index_identical()" << std::endl;
5886 throw std::runtime_error("pd_test_categorical_index_identical failed");
5887 }
5888
5889 std::cout << " -> tests passed" << std::endl;
5890}
5891
5892// ============================================================================
5893// Inherited Operations Tests
inferred_type (pd_test_1_all.cpp:5270)
5260}
5261
5262void pd_test_categorical_index_array_constructor() {
5263 std::cout << "========= array constructor ===========================";
5264
5265 pandas::CategoricalArray arr({"apple", "banana", "apple", "cherry"});
5266 pandas::CategoricalIndex idx(arr, "fruits");
5267
5268 bool passed = (idx.size() == 4 && !idx.empty() &&
5269 idx.name().has_value() && *idx.name() == "fruits" &&
5270 idx.inferred_type() == "categorical");
5271 if (!passed) {
5272 std::cout << " [FAIL] : in pd_test_categorical_index_array_constructor()" << std::endl;
5273 throw std::runtime_error("pd_test_categorical_index_array_constructor failed");
5274 }
5275
5276 std::cout << " -> tests passed" << std::endl;
5277}
5278
5279void pd_test_categorical_index_values_constructor() {
5280 std::cout << "========= values constructor ==========================";
item (pd_test_3_all.cpp:3712)
3702 // Test is_interval (always false for base Index)
3703 if (int_idx.is_interval()) {
3704 throw std::runtime_error("base Index should not be interval");
3705 }
3706
3707 std::cout << " -> tests passed" << std::endl;
3708}
3709
3710void pd_test_3_all_index_item() {
3711 std::cout << "========= Index.item() =============================";
3712
3713 pandas::Index<numpy::int64> idx1({42});
3714 numpy::int64 val = idx1.item();
3715
3716 if (val != 42) {
3717 throw std::runtime_error("item() should return 42");
3718 }
3719
3720 // Test error for size != 1
3721 pandas::Index<numpy::int64> idx2({1, 2, 3});
memory_usage (pd_test_1_all.cpp:27063)
27053 }
27054
27055 std::cout << "====================================== [OK] pd_test_value_counts test suite ========================== " << std::endl;
27056 return 0;
27057 }
27058
27059} // namespace dataframe_tests
27060// ------------------- pd_test_value_counts.cpp (end) -----------------------------
27061
27062// ------------------- pd_test_memory_usage.cpp (start) -----------------------------
27063// Tests for DataFrame.memory_usage() - pandas-compatible memory usage reporting
27064
27065namespace dataframe_tests {
27066 namespace dataframe_tests_memory_usage {
27067
27068 void pd_test_memory_usage_basic() {
27069 std::cout << "========= basic memory_usage =======================";
27070
27071 // Create a simple DataFrame with multiple columns
27072 std::map<std::string, std::vector<double>> data;
27073 data["A"] = {1.0, 2.0, 3.0, 4.0, 5.0};
microseconds (pd_test_1_all.cpp:19701)
19691 constexpr int64_t NS_PER_US = 1000LL;
19692 std::vector<std::optional<numpy::timedelta64>> values = {
19693 make_td(0), // 0 us
19694 make_td(500 * NS_PER_US), // 500 us
19695 make_td(NS_PER_SEC + 100 * NS_PER_US) // 1 sec + 100 us
19696 };
19697 pandas::TimedeltaArray arr(values);
19698 pandas::TimedeltaIndex idx(arr);
19699
19700 auto microseconds = idx.microseconds();
19701
19702 bool passed = (microseconds.size() == 3);
19703 if (!passed) {
19704 std::cout << " [FAIL] : in pd_test_timedelta_index_microseconds()" << std::endl;
19705 throw std::runtime_error("pd_test_timedelta_index_microseconds failed");
19706 }
19707
19708 std::cout << " -> tests passed" << std::endl;
19709}
nanoseconds (pd_test_1_all.cpp:9379)
9369 std::cout << " [FAIL] : in pd_test_datetime_as_unit() : as_unit check failed" << std::endl;
9370 throw std::runtime_error("pd_test_datetime_as_unit failed");
9371 }
9372
9373 std::cout << " -> tests passed" << std::endl;
9374}
9375
9376void pd_test_timedelta_as_unit() {
9377 std::cout << "========= TimedeltaTDMixin as_unit =========================";
9378
9379 // Create index in nanoseconds (1 hour, 2 hours)
9380 numpy::NDArray<numpy::timedelta64> data(std::vector<size_t>{2});
9381 data.setElementAt({0}, numpy::timedelta64(3600000000000LL, numpy::DateTimeUnit::Nanosecond)); // 1 hour
9382 data.setElementAt({1}, numpy::timedelta64(7200000000000LL, numpy::DateTimeUnit::Nanosecond)); // 2 hours
9383
9384 numpy::NDArray<numpy::bool_> mask(std::vector<size_t>{2});
9385 mask.setElementAt({0}, numpy::bool_(false));
9386 mask.setElementAt({1}, numpy::bool_(false));
9387
9388 pandas::TimedeltaArray arr(data, mask);
9389 pandas::TimedeltaTDMixin idx(arr, "durations");
putmask (pd_test_3_all.cpp:3752)
3742 // Should be at least sizeof index + 5 * sizeof(int64)
3743 if (usage < 5 * sizeof(numpy::int64)) {
3744 throw std::runtime_error("memory_usage too small");
3745 }
3746
3747 std::cout << " -> tests passed" << std::endl;
3748}
3749
3750void pd_test_3_all_index_putmask() {
3751 std::cout << "========= Index.putmask() ==========================";
3752
3753 pandas::Index<numpy::int64> idx({1, 2, 3, 4, 5});
3754 numpy::NDArray<numpy::bool_> mask(std::vector<size_t>{5});
3755 mask.setElementAt({0}, numpy::bool_(true));
3756 mask.setElementAt({1}, numpy::bool_(false));
3757 mask.setElementAt({2}, numpy::bool_(true));
3758 mask.setElementAt({3}, numpy::bool_(false));
3759 mask.setElementAt({4}, numpy::bool_(true));
3760
3761 auto result = idx.putmask(mask, numpy::int64(99));
ravel (pd_test_3_all.cpp:2147)
2137 throw std::runtime_error("memory_usage shallow too small");
2138 }
2139 if (deep < shallow) {
2140 throw std::runtime_error("memory_usage deep should be >= shallow");
2141 }
2142
2143 std::cout << " -> tests passed" << std::endl;
2144}
2145
2146void pd_test_3_all_categorical_ravel_view() {
2147 std::cout << "========= CategoricalArray.ravel()/view() =============";
2148
2149 std::vector<std::optional<std::string>> values = {"a", "b", "c"};
2150 pandas::CategoricalArray arr(values);
2151
2152 auto raveled = arr.ravel();
2153 if (raveled.size() != 3 || !raveled.equals(arr)) {
2154 throw std::runtime_error("ravel failed");
2155 }
2156
2157 auto viewed = arr.view();
repeat (pd_test_3_all.cpp:2166)
2156 auto viewed = arr.view();
2157 if (viewed.size() != 3 || !viewed.equals(arr)) {
2158 throw std::runtime_error("view failed");
2159 }
2160
2161 std::cout << " -> tests passed" << std::endl;
2162}
2163
2164void pd_test_3_all_categorical_repeat() {
2165 std::cout << "========= CategoricalArray.repeat() ===================";
2166
2167 std::vector<std::optional<std::string>> values = {"a", "b"};
2168 pandas::CategoricalArray arr(values);
2169
2170 auto result = arr.repeat(3);
2171 if (result.size() != 6 || *result[0] != "a" || *result[2] != "a" ||
2172 *result[3] != "b" || *result[5] != "b") {
2173 throw std::runtime_error("repeat scalar failed");
2174 }
repeat (pd_test_3_all.cpp:2166)
2156 auto viewed = arr.view();
2157 if (viewed.size() != 3 || !viewed.equals(arr)) {
2158 throw std::runtime_error("view failed");
2159 }
2160
2161 std::cout << " -> tests passed" << std::endl;
2162}
2163
2164void pd_test_3_all_categorical_repeat() {
2165 std::cout << "========= CategoricalArray.repeat() ===================";
2166
2167 std::vector<std::optional<std::string>> values = {"a", "b"};
2168 pandas::CategoricalArray arr(values);
2169
2170 auto result = arr.repeat(3);
2171 if (result.size() != 6 || *result[0] != "a" || *result[2] != "a" ||
2172 *result[3] != "b" || *result[5] != "b") {
2173 throw std::runtime_error("repeat scalar failed");
2174 }
repr (pd_test_1_all.cpp:10906)
10896 std::cout << " -> tests passed" << std::endl;
10897}
10898
10899void pd_test_extension_index_repr() {
10900 std::cout << "========= repr =========================";
10901
10902 pandas::CategoricalArray arr({"a", "b", "c"});
10903 // Use ExtensionIndex<CategoricalArray> directly to test base class repr
10904 pandas::ExtensionIndex<pandas::CategoricalArray> idx(arr, "test");
10905
10906 std::string repr_str = idx.repr();
10907
10908 bool passed = (!repr_str.empty() && repr_str.find("ExtensionIndex") != std::string::npos);
10909 if (!passed) {
10910 std::cout << " [FAIL] : in pd_test_extension_index_repr() : repr check failed" << std::endl;
10911 throw std::runtime_error("pd_test_extension_index_repr failed");
10912 }
10913
10914 std::cout << " -> tests passed" << std::endl;
10915}
result (pd_test_1_all.cpp:15406)
15396 data.setElementAt({0}, numpy::datetime64(100LL, numpy::DateTimeUnit::Nanosecond));
15397 data.setElementAt({1}, numpy::datetime64(200LL, numpy::DateTimeUnit::Nanosecond));
15398
15399 numpy::NDArray<numpy::bool_> mask(std::vector<size_t>{2});
15400 mask.setElementAt({0}, numpy::bool_(false));
15401 mask.setElementAt({1}, numpy::bool_(false));
15402
15403 pandas::DatetimeArray arr(data, mask);
15404 pandas::DatetimeIndexBase idx(arr, "original");
15405
15406 // Create join result (int64 values)
15407 numpy::NDArray<numpy::int64> join_result(std::vector<size_t>{3});
15408 join_result.setElementAt({0}, numpy::int64(500LL));
15409 join_result.setElementAt({1}, numpy::int64(600LL));
15410 join_result.setElementAt({2}, numpy::int64(700LL));
15411
15412 auto new_idx = idx._from_join_target(join_result);
15413
15414 bool passed = (new_idx.size() == 3 &&
15415 new_idx.name().has_value() && *new_idx.name() == "original");
15416 if (!passed) {
round (pd_test_1_all.cpp:1688)
1678 void pd_test_floating_array_rounding() {
1679 std::cout << "========= FloatingArray: rounding ======================= ";
1680
1681 pandas::FloatingArray<double> arr({
1682 std::optional<double>(1.234),
1683 std::optional<double>(2.567),
1684 std::nullopt
1685 });
1686
1687 auto rounded = arr.round(2);
1688 if (std::abs(rounded[0].value() - 1.23) > 0.001 ||
1689 std::abs(rounded[1].value() - 2.57) > 0.001) {
1690 std::cout << " [FAIL] : in pd_test_floating_array_rounding() : round(2)" << std::endl;
1691 throw std::runtime_error("pd_test_floating_array_rounding failed: round(2)");
1692 }
1693
1694 if (!rounded.is_na(2)) {
1695 std::cout << " [FAIL] : in pd_test_floating_array_rounding() : round should preserve NA" << std::endl;
1696 throw std::runtime_error("pd_test_floating_array_rounding failed: NA preservation");
1697 }
seconds (pd_test_1_all.cpp:4192)
4182 void pd_test_timedelta_array_component_seconds() {
4183 std::cout << "========= TimedeltaArray: seconds component ======================= ";
4184
4185 pandas::TimedeltaArray arr({
4186 std::optional<numpy::timedelta64>(numpy::timedelta64(90, numpy::DateTimeUnit::Second)), // 90 secs
4187 std::optional<numpy::timedelta64>(numpy::timedelta64(3700, numpy::DateTimeUnit::Second)), // 1h + 100s
4188 std::nullopt
4189 });
4190
4191 auto secs = arr.seconds();
4192
4193 auto s0 = secs[0];
4194 if (!s0.has_value() || s0.value() != 90) {
4195 std::cout << " [FAIL] : seconds[0] should be 90" << std::endl;
4196 throw std::runtime_error("pd_test_timedelta_array_component_seconds failed: seconds[0]");
4197 }
4198
4199 auto s1 = secs[1];
4200 if (!s1.has_value() || s1.value() != 3700) {
4201 std::cout << " [FAIL] : seconds[1] should be 3700" << std::endl;
slice (pd_test_1_all.cpp:17546)
17536// ============================================================================
17537// Slicing / Indexing Tests
17538// ============================================================================
17539
17540void pd_test_period_index_slice() {
17541 std::cout << "========= slice method ================================";
17542
17543 std::vector<int64_t> ordinals = {0, 1, 2, 3, 4};
17544 pandas::PeriodIndex idx(ordinals, "D");
17545
17546 pandas::PeriodIndex sliced = idx.slice(1, 4);
17547
17548 bool passed = (sliced.size() == 3 &&
17549 sliced[0].has_value() && *sliced[0] == 1);
17550 if (!passed) {
17551 std::cout << " [FAIL] : in pd_test_period_index_slice()" << std::endl;
17552 throw std::runtime_error("pd_test_period_index_slice failed");
17553 }
17554
17555 std::cout << " -> tests passed" << std::endl;
17556}
slice_indexer (pd_test_3_all.cpp:711)
701 }
702
703 std::cout << " -> tests passed" << std::endl;
704}
705
706// ============================================================================
707// Category 6: Index Indexer Methods
708// ============================================================================
709
710void pd_test_3_all_index_indexers() {
711 std::cout << "========= Index.get_indexer_for/non_unique/slice_indexer() ";
712
713 std::vector<std::string> vals = {"a", "b", "c", "d", "e"};
714 pandas::Index<std::string> idx(vals);
715
716 // Test get_indexer_for()
717 std::vector<std::string> target = {"b", "d", "f"}; // "f" doesn't exist
718 numpy::NDArray<numpy::int64> indexer = idx.get_indexer_for(target);
719 if (indexer.getSize() != 3) {
720 std::cout << " [FAIL] : in pd_test_3_all_index_indexers() : get_indexer_for size mismatch" << std::endl;
721 throw std::runtime_error("pd_test_3_all_index_indexers failed: get_indexer_for size");
slice_locs (pd_test_1_all.cpp:18275)
18265 }
18266
18267 std::cout << "-> tests passed" << std::endl;
18268 }
18269
18270 void pd_test_range_index_slice_locs() {
18271 std::cout << "========= slice_locs ================================== ";
18272
18273 pandas::RangeIndex ri(0, 10); // [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
18274
18275 auto [start_idx, stop_idx] = ri.slice_locs(3, 7);
18276
18277 bool passed = (start_idx == 3 && stop_idx == 8);
18278
18279 if (!passed) {
18280 std::cout << " [FAIL] : slice_locs" << std::endl;
18281 throw std::runtime_error("pd_test_range_index_slice_locs failed");
18282 }
18283
18284 std::cout << "-> tests passed" << std::endl;
18285 }
sort (pd_test_3_all.cpp:3869)
3859 throw std::runtime_error("last 2 positions should be NaN");
3860 }
3861 if (std::abs(result[0] - 3.0) > 0.001) {
3862 throw std::runtime_error("shift(-2) [0] should be 3.0");
3863 }
3864
3865 std::cout << " -> tests passed" << std::endl;
3866}
3867
3868void pd_test_3_all_index_sort() {
3869 std::cout << "========= Index.sort() =============================";
3870
3871 pandas::Index<numpy::int64> idx({3, 1, 4, 1, 5, 9, 2, 6});
3872 auto result = idx.sort();
3873
3874 if (result[0] != 1 || result[1] != 1 || result[7] != 9) {
3875 throw std::runtime_error("sort() not working correctly");
3876 }
3877
3878 // Test descending
3879 result = idx.sort(false);
str (pd_test_1_all.cpp:7137)
7127 // Test basic info() with stringstream
7128 std::map<std::string, std::vector<int>> data = {
7129 {"A", {1, 2, 3, 4, 5}},
7130 {"B", {10, 20, 30, 40, 50}},
7131 {"C", {100, 200, 300, 400, 500}}
7132 };
7133 pandas::DataFrame df(data);
7134
7135 std::ostringstream oss;
7136 df.info(oss);
7137 std::string output = oss.str();
7138
7139 // Verify key components
7140 if (output.find("<class 'pandas.core.frame.DataFrame'>") == std::string::npos) {
7141 std::cout << " [FAIL] : info missing class name" << std::endl;
7142 throw std::runtime_error("pd_test_dataframe_info failed: missing class name");
7143 }
7144 if (output.find("RangeIndex:") == std::string::npos) {
7145 std::cout << " [FAIL] : info missing RangeIndex" << std::endl;
7146 throw std::runtime_error("pd_test_dataframe_info failed: missing RangeIndex");
7147 }
type_id (pd_test_3_all.cpp:25592)
25582// ------------------- pd_test_value_classify (end) ------------------
25583
25584// ------------------- pd_test_index_type_id (start) ------------------
25585namespace dataframe_tests_index_type_id {
25586
25587void pd_test_index_type_id_dispatch() {
25588 std::cout << "========= IndexTypeId dispatch =======================";
25589
25590 // RangeIndex
25591 ::pandas::RangeIndex ri(0, 5);
25592 if (ri.type_id() != ::pandas::IndexTypeId::RangeIndex)
25593 throw std::runtime_error("RangeIndex type_id failed");
25594
25595 // Index<string>
25596 ::pandas::Index<std::string> si(std::vector<std::string>{"a", "b", "c"});
25597 if (si.type_id() != ::pandas::IndexTypeId::IndexString)
25598 throw std::runtime_error("Index<string> type_id failed");
25599
25600 // Index<int64>
25601 ::pandas::Index<numpy::int64> ii(std::vector<numpy::int64>{1, 2, 3});
25602 if (ii.type_id() != ::pandas::IndexTypeId::IndexInt64)
upsample (pd_test_5_all.cpp:87061)
87051 pandas::DataFrame df;
87052 std::vector<int64_t> v(idx.size(), 0);
87053 df.add_column<int64_t>("v", v);
87054 df.set_index(std::make_unique<pandas::DatetimeIndex>(idx));
87055 return df;
87056}
87057
87058void f_core_05_upsample_05f4ab_case_1_hourly_of_daily(int& local_fail) {
87059 std::cout << "-- case_1_hourly_of_daily\n";
87060 auto idx = mk_idx({"2020-01-01", "2020-01-02", "2020-01-03"});
87061 auto up = idx.upsample(pandas::Hour(1));
87062 pandas_tests::check(up.size() == 49,
87063 "case_1.hourly_of_daily.size==49", local_fail);
87064}
87065
87066void f_core_05_upsample_05f4ab_case_2_minute_of_hourly(int& local_fail) {
87067 std::cout << "-- case_2_minute_of_hourly\n";
87068 auto idx = mk_idx({"2020-01-01T00:00:00", "2020-01-01T02:00:00"});
87069 auto up = idx.upsample(pandas::Minute(1));
87070 pandas_tests::check(up.size() == 121,
87071 "case_2.minute_of_hourly.size==121", local_fail);
upsample (pd_test_5_all.cpp:87061)
87051 pandas::DataFrame df;
87052 std::vector<int64_t> v(idx.size(), 0);
87053 df.add_column<int64_t>("v", v);
87054 df.set_index(std::make_unique<pandas::DatetimeIndex>(idx));
87055 return df;
87056}
87057
87058void f_core_05_upsample_05f4ab_case_1_hourly_of_daily(int& local_fail) {
87059 std::cout << "-- case_1_hourly_of_daily\n";
87060 auto idx = mk_idx({"2020-01-01", "2020-01-02", "2020-01-03"});
87061 auto up = idx.upsample(pandas::Hour(1));
87062 pandas_tests::check(up.size() == 49,
87063 "case_1.hourly_of_daily.size==49", local_fail);
87064}
87065
87066void f_core_05_upsample_05f4ab_case_2_minute_of_hourly(int& local_fail) {
87067 std::cout << "-- case_2_minute_of_hourly\n";
87068 auto idx = mk_idx({"2020-01-01T00:00:00", "2020-01-01T02:00:00"});
87069 auto up = idx.upsample(pandas::Minute(1));
87070 pandas_tests::check(up.size() == 121,
87071 "case_2.minute_of_hourly.size==121", local_fail);